scanner: add Scanner struct + tokens! macro

The scanner is the engine that will drive the underlying byte stream,
converting it to tokens. This struct will not attempt to impose semantic
restrictions on token ordering, that will be for high level data
structures.
This commit is contained in:
Paul Stemmet 2021-06-01 19:07:47 +00:00
parent f4608a8588
commit bdc70312f1
Signed by: Paul Stemmet
GPG Key ID: EDEA539F594E7E75
3 changed files with 122 additions and 0 deletions

View File

@ -7,3 +7,6 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
[dev-dependencies]
pretty_assertions = "0.7"

83
src/scanner/mod.rs Normal file
View File

@ -0,0 +1,83 @@
use crate::token::{ScalarStyle, Slice, StreamEncoding, Token};
#[derive(Debug)]
struct Scanner<'a> {
buffer: &'a str,
state: StreamState,
}
impl<'a> Scanner<'a> {
pub fn new(data: &'a str) -> Self {
Self {
buffer: data,
state: StreamState::Start,
}
}
fn start_stream(&mut self) -> Option<Token<'a>> {
match self.state {
StreamState::Start => {
self.state = StreamState::Stream;
Some(Token::StreamStart(StreamEncoding::UTF8))
}
_ => None,
}
}
fn stream_end(&mut self) -> Option<Token<'a>> {
match (self.state, self.buffer.is_empty()) {
(StreamState::Done, _) => None,
(_, true) => {
self.state = StreamState::Done;
Some(Token::StreamEnd)
}
(_, false) => None,
}
}
}
impl<'a> Iterator for Scanner<'a> {
type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(begin) = self.start_stream() {
return Some(begin);
}
if let Some(end) = self.stream_end() {
return Some(end);
}
None
}
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
enum StreamState {
Start,
Stream,
Done,
}
#[cfg(test)]
mod tests {
#[macro_use]
mod macros;
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn empty() {
let data = "";
let mut s = Scanner::new(data);
tokens!(s =>
| Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream",
| Token::StreamEnd => "expected end of stream",
@ None => "expected stream to be finished"
);
}
}

View File

@ -0,0 +1,36 @@
/// Macro for asserting token streams
/// Used as: events!(Scanner => <sigil> <expected> [=> <message>] [, ..])
/// Where:
/// <sigil> '|' for a Token, or '@' for an Option<Token>
/// <expected> Either Token or Option<Token>
/// <message> A message to print on failure
macro_rules! tokens {
($scanner:expr => $($id:tt $expected:expr $(=> $msg:tt)?),+ ) => {
$( tokens!(@unwrap $id $scanner => $expected $(=> $msg)? ) );+
};
// <-- PRIVATE VARIANTS -->
// Forward to the @token variants, with/without a message
(@unwrap | $scanner:expr => $expected:expr $(=> $msg:tt)? ) => {
tokens!(@token $scanner => $expected $(, $msg)? )
};
// Variant for option assert
(@unwrap @ $scanner:expr => $expected:expr $(=> $msg:tt)? ) => {
assert_eq!($scanner.next(), $expected $(, $msg)? )
};
// Forward to option assert any unknown sigils
(@unwrap $any:tt $scanner:expr => $expected:expr $(=> $msg:tt)? ) => {
tokens!(@unwrap @ $scanner:expr => $expected:expr $(=> $msg)? )
};
// Variant for token assert, no message
(@token $scanner:expr => $expected:expr) => {
assert_eq!($scanner.next().expect("Unexpected end of events"), $expected)
};
// Variant for token assert, no with message
(@token $scanner:expr => $expected:expr, $msg:tt) => {
let event = $scanner.next().expect("Unexpected end of events");
assert_eq!(event, $expected, $msg)
};
}