Feature/scanner/anchor alias #9

Merged
bazaah merged 2 commits from feature/scanner/anchor-alias into master 2021-06-06 09:54:28 +00:00
2 changed files with 165 additions and 0 deletions

View file

@ -27,6 +27,9 @@ pub enum ScanError
/// or more spaces /// or more spaces
InvalidTagPrefix, InvalidTagPrefix,
/// Either an anchor (*) or alias (&)'s name was invalid
InvalidAnchorName,
/// Got end of stream while parsing a token /// Got end of stream while parsing a token
UnexpectedEOF, UnexpectedEOF,
} }

View file

@ -48,6 +48,11 @@ impl<'a> Scanner<'a>
return Ok(directive); return Ok(directive);
} }
if let anchor @ Some(_) = self.anchor()?
{
return Ok(anchor);
}
Ok(None) Ok(None)
} }
@ -274,6 +279,60 @@ impl<'a> Scanner<'a>
Ok(Some(token)) Ok(Some(token))
} }
fn anchor(&mut self) -> Result<Option<Token<'a>>>
{
let mut buffer = self.buffer;
// *anchor 'rest of the line'
// ^
let kind = match buffer.as_bytes()
{
[b @ b'*', ..] | [b @ b'&', ..] =>
{
AnchorKind::new(b).expect("we only bind * or & so this cannot fail")
},
_ => return Ok(None),
};
advance!(buffer, 1);
// *anchor 'rest of the line'
// ^^^^^^
let anchor = take_while(buffer.as_bytes(), u8::is_ascii_alphanumeric);
let anchor = advance!(<- buffer, anchor.len());
// anchor name cannot be empty, must contain >= 1
// alphanumeric character
if anchor.is_empty()
{
return Err(ScanError::InvalidAnchorName);
}
// *anchor 'rest of the line'
// ^
// There does not necessarily need to be a whitespace so we
// also check against a list of valid starting
// tokens
check!(buffer.as_bytes(),
is b' ' | b'\n' | b'?' | b',' | b']' | b'}' | b'%' | b'@' | b'`',
else ScanError::InvalidAnchorName
)?;
let token = match kind
{
AnchorKind::Alias => Token::Alias(cow!(anchor)),
AnchorKind::Anchor => Token::Anchor(cow!(anchor)),
};
// *anchor 'rest of the line'
// ^^^^^^^^^^^^^^^^^^^ buffer.len
// ^^^^^^^ self.buffer.len - buffer.len
advance!(self.buffer, self.buffer.len() - buffer.len());
Ok(Some(token))
}
} }
impl<'a> Iterator for Scanner<'a> impl<'a> Iterator for Scanner<'a>
@ -323,6 +382,28 @@ impl DirectiveKind
} }
} }
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
enum AnchorKind
{
Anchor,
Alias,
}
impl AnchorKind
{
pub fn new(b: &u8) -> Option<Self>
{
let s = match b
{
b'*' => Self::Alias,
b'&' => Self::Anchor,
_ => return None,
};
Some(s)
}
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)] #[derive(Debug, PartialEq, Eq, Clone, Copy)]
enum StreamState enum StreamState
{ {
@ -398,6 +479,22 @@ mod tests
); );
} }
#[test]
fn multi_document_empty()
{
let data = "---\n---\n---";
let mut s = Scanner::new(data);
tokens!(s =>
| Token::StreamStart(StreamEncoding::UTF8),
| Token::DocumentStart,
| Token::DocumentStart,
| Token::DocumentStart,
| Token::StreamEnd,
@ None
);
}
#[test] #[test]
fn document_markers() fn document_markers()
{ {
@ -535,6 +632,71 @@ mod tests
); );
} }
#[test]
fn anchor_alias()
{
let data = "*alias\n";
let mut s = Scanner::new(data);
tokens!(s =>
| Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream",
| Token::Alias(cow!("alias")) => "expected an alias named 'alias'",
| Token::StreamEnd => "expected end of stream",
@ None => "expected stream to be finished"
);
}
#[test]
fn anchor()
{
let data = " &anchor \n";
let mut s = Scanner::new(data);
tokens!(s =>
| Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream",
| Token::Anchor(cow!("anchor")) => "expected an anchor named 'anchor'",
| Token::StreamEnd => "expected end of stream",
@ None => "expected stream to be finished"
);
}
#[test]
fn complex_no_map_sequence_scalar()
{
let data = r##"
---
%YAML 1.2 # our document's version.
%TAG ! primary:namespace # our doc's primary tag
%TAG !! secondary/namespace: # our doc's secondary tag
%TAG !named0! named0: # A named tag
&ref
*ref
...
"##;
let mut s = Scanner::new(data);
tokens!(s =>
| Token::StreamStart(StreamEncoding::UTF8),
| Token::DocumentStart,
| Token::VersionDirective(1, 2),
| Token::TagDirective(cow!("!"), cow!("primary:namespace")),
| Token::TagDirective(cow!("!!"), cow!("secondary/namespace:")),
| Token::TagDirective(cow!("!named0!"), cow!("named0:")),
| Token::Anchor(cow!("ref")),
| Token::Alias(cow!("ref")),
| Token::DocumentEnd,
| Token::StreamEnd,
@ None
);
}
#[test] #[test]
fn eat_whitespace() fn eat_whitespace()
{ {