Feature/scanner/keys #16
|
@ -16,6 +16,9 @@ pub enum ScanError
|
|||
/// ^
|
||||
MissingMinor,
|
||||
|
||||
/// A value was expected, but not found
|
||||
MissingValue,
|
||||
|
||||
/// A directive major or minor digit was not 0..=9
|
||||
InvalidVersion,
|
||||
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
#[derive(Debug, Clone)]
|
||||
pub(in crate::scanner) struct Key
|
||||
{
|
||||
possible: Option<KeyPossible>,
|
||||
}
|
||||
|
||||
impl Key
|
||||
{
|
||||
pub fn new() -> Self
|
||||
{
|
||||
Self { possible: None }
|
||||
}
|
||||
|
||||
/// A key is possible / .required at the current stream
|
||||
/// position
|
||||
pub fn possible(&mut self, required: bool)
|
||||
{
|
||||
self.possible = match required
|
||||
{
|
||||
true => KeyPossible::Required,
|
||||
false => KeyPossible::Yes,
|
||||
}
|
||||
.into();
|
||||
}
|
||||
|
||||
/// A key is impossible / illegal at the current stream
|
||||
/// position
|
||||
pub fn impossible(&mut self)
|
||||
{
|
||||
self.possible = Some(KeyPossible::No)
|
||||
}
|
||||
|
||||
/// Is a key allowed at the current position?
|
||||
pub fn allowed(&self) -> bool
|
||||
{
|
||||
self.possible.as_ref().map(|s| s.allowed()).unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Is a key required at the current position?
|
||||
pub fn required(&self) -> bool
|
||||
{
|
||||
self.possible
|
||||
.as_ref()
|
||||
.map(|s| s.required())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Key
|
||||
{
|
||||
fn default() -> Self
|
||||
{
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub(in crate::scanner) enum KeyPossible
|
||||
{
|
||||
No,
|
||||
Yes,
|
||||
Required,
|
||||
}
|
||||
|
||||
impl KeyPossible
|
||||
{
|
||||
fn allowed(&self) -> bool
|
||||
{
|
||||
matches!(self, Self::Yes | Self::Required)
|
||||
}
|
||||
|
||||
fn required(&self) -> bool
|
||||
{
|
||||
matches!(self, Self::Required)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for KeyPossible
|
||||
{
|
||||
fn default() -> Self
|
||||
{
|
||||
Self::No
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -421,7 +421,7 @@ mod tests
|
|||
.map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;
|
||||
|
||||
assert_eq!(
|
||||
&*scratch, e,
|
||||
scratch, e,
|
||||
"on iteration {}, expected byte sequence {:?}, got {:?}",
|
||||
i, e, &*scratch
|
||||
);
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
use std::ops::Range;
|
||||
|
||||
use crate::{
|
||||
scanner::{
|
||||
error::{ScanError, ScanResult as Result},
|
||||
scalar::escape::flow_unescape,
|
||||
MStats,
|
||||
},
|
||||
token::{Ref, ScalarStyle, Token},
|
||||
token::{ScalarStyle, Token},
|
||||
};
|
||||
|
||||
/// Scans a single or double quoted (flow) scalar returning
|
||||
|
@ -12,15 +14,15 @@ use crate::{
|
|||
/// from .base. This function will attempt to borrow from
|
||||
/// the underlying .base, however it may be required to copy
|
||||
/// into .scratch and borrow from that lifetime.
|
||||
pub(in crate::scanner) fn scan_flow_scalar<'b, 'c>(
|
||||
base: &'b str,
|
||||
pub(in crate::scanner) fn scan_flow_scalar(
|
||||
base: &str,
|
||||
stats: &mut MStats,
|
||||
scratch: &'c mut Vec<u8>,
|
||||
single: bool,
|
||||
) -> Result<(Ref<'b, 'c>, usize)>
|
||||
) -> Result<(ScalarRange, usize)>
|
||||
{
|
||||
use ScalarStyle::{DoubleQuote, SingleQuote};
|
||||
|
||||
let mut scratch = Vec::new();
|
||||
let mut buffer = base;
|
||||
let mut can_borrow = true;
|
||||
let mut escaped_break;
|
||||
|
@ -62,7 +64,7 @@ pub(in crate::scanner) fn scan_flow_scalar<'b, 'c>(
|
|||
// from .base, we must unescape the quote into .scratch
|
||||
if kind == SingleQuote && check!(~buffer => [SINGLE, SINGLE, ..])
|
||||
{
|
||||
set_no_borrow(&mut can_borrow, base, buffer, scratch);
|
||||
set_no_borrow(&mut can_borrow, base, buffer, &mut scratch);
|
||||
|
||||
scratch.push(SINGLE);
|
||||
advance!(buffer, :stats, 2);
|
||||
|
@ -78,7 +80,7 @@ pub(in crate::scanner) fn scan_flow_scalar<'b, 'c>(
|
|||
&& check!(~buffer => [BACKSLASH, ..])
|
||||
&& isBreak!(~buffer, 1)
|
||||
{
|
||||
set_no_borrow(&mut can_borrow, base, buffer, scratch);
|
||||
set_no_borrow(&mut can_borrow, base, buffer, &mut scratch);
|
||||
|
||||
escaped_break = Some(EscapeState::Start);
|
||||
advance!(buffer, :stats, 1);
|
||||
|
@ -86,9 +88,9 @@ pub(in crate::scanner) fn scan_flow_scalar<'b, 'c>(
|
|||
// We've hit an escape sequence, parse it
|
||||
else if kind == DoubleQuote && check!(~buffer => [BACKSLASH, ..])
|
||||
{
|
||||
set_no_borrow(&mut can_borrow, base, buffer, scratch);
|
||||
set_no_borrow(&mut can_borrow, base, buffer, &mut scratch);
|
||||
|
||||
let read = flow_unescape(buffer, scratch)?;
|
||||
let read = flow_unescape(buffer, &mut scratch)?;
|
||||
advance!(buffer, :stats, read);
|
||||
}
|
||||
// Its a non blank character, add it
|
||||
|
@ -135,15 +137,15 @@ pub(in crate::scanner) fn scan_flow_scalar<'b, 'c>(
|
|||
{
|
||||
if !can_borrow
|
||||
{
|
||||
whitespace += 1;
|
||||
scratch.push(buffer.as_bytes()[0]);
|
||||
}
|
||||
whitespace += 1;
|
||||
advance!(buffer, :stats, 1);
|
||||
},
|
||||
// Handle line breaks
|
||||
(false, _) =>
|
||||
{
|
||||
set_no_borrow(&mut can_borrow, base, buffer, scratch);
|
||||
set_no_borrow(&mut can_borrow, base, buffer, &mut scratch);
|
||||
|
||||
if let Some(EscapeState::Start) = escaped_break
|
||||
{
|
||||
|
@ -169,7 +171,7 @@ pub(in crate::scanner) fn scan_flow_scalar<'b, 'c>(
|
|||
// whitespace, therefore join via a space
|
||||
1 =>
|
||||
{
|
||||
set_no_borrow(&mut can_borrow, base, buffer, scratch);
|
||||
set_no_borrow(&mut can_borrow, base, buffer, &mut scratch);
|
||||
|
||||
scratch.truncate(scratch.len() - whitespace);
|
||||
|
||||
|
@ -181,7 +183,7 @@ pub(in crate::scanner) fn scan_flow_scalar<'b, 'c>(
|
|||
// Else we need to append (n - 1) newlines, as we skip the origin line's break
|
||||
n =>
|
||||
{
|
||||
set_no_borrow(&mut can_borrow, base, buffer, scratch);
|
||||
set_no_borrow(&mut can_borrow, base, buffer, &mut scratch);
|
||||
|
||||
scratch.truncate(scratch.len() - whitespace);
|
||||
|
||||
|
@ -194,31 +196,15 @@ pub(in crate::scanner) fn scan_flow_scalar<'b, 'c>(
|
|||
}
|
||||
}
|
||||
|
||||
// Retrieve the token slice, either from the .base slice, or
|
||||
// if we couldn't borrow, the .scratch space
|
||||
let token = if can_borrow
|
||||
let token: ScalarRange = match can_borrow
|
||||
{
|
||||
// Safety: we must be on a code point boundary, as the only
|
||||
// way can get to this section is:
|
||||
//
|
||||
// 1. .base->0 must be a quote
|
||||
// 2. .base->.base.len() - .buffer.len() must be a quote
|
||||
// 3. .base must be valid UTF8 (its a str)
|
||||
let fragment = &base[1..base.len() - buffer.len()];
|
||||
let token = Token::Scalar(cow!(fragment), kind);
|
||||
true => (1..base.len() - buffer.len(), kind).into(),
|
||||
false =>
|
||||
{
|
||||
let utf8 = String::from_utf8(scratch).unwrap();
|
||||
|
||||
Ref::Borrow(token)
|
||||
}
|
||||
else
|
||||
{
|
||||
// Safety: characters added to scratch are either:
|
||||
//
|
||||
// A. added from a str (.base)
|
||||
// B. Unescaped into valid UTF8
|
||||
let fragment = std::str::from_utf8(scratch).unwrap();
|
||||
let token = Token::Scalar(cow!(fragment), kind);
|
||||
|
||||
Ref::Copy(token)
|
||||
(utf8, kind).into()
|
||||
},
|
||||
};
|
||||
|
||||
// Eat the right quote
|
||||
|
@ -242,6 +228,56 @@ fn set_no_borrow(can_borrow: &mut bool, base: &str, buffer: &str, scratch: &mut
|
|||
*can_borrow = false
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(in crate::scanner) struct ScalarRange
|
||||
{
|
||||
inner: ScalarRangeInner,
|
||||
style: ScalarStyle,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum ScalarRangeInner
|
||||
{
|
||||
Borrow(Range<usize>),
|
||||
Owned(String),
|
||||
}
|
||||
|
||||
impl ScalarRange
|
||||
{
|
||||
pub fn into_token(self, base: &str) -> Result<Token<'_>>
|
||||
{
|
||||
use ScalarRangeInner::*;
|
||||
|
||||
match self.inner
|
||||
{
|
||||
Borrow(range) => Ok(Token::Scalar(cow!(&base[range]), self.style.clone())),
|
||||
Owned(s) => Ok(Token::Scalar(cow!(s), self.style.clone())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<(Range<usize>, ScalarStyle)> for ScalarRange
|
||||
{
|
||||
fn from((range, style): (Range<usize>, ScalarStyle)) -> Self
|
||||
{
|
||||
Self {
|
||||
inner: ScalarRangeInner::Borrow(range),
|
||||
style,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<(String, ScalarStyle)> for ScalarRange
|
||||
{
|
||||
fn from((s, style): (String, ScalarStyle)) -> Self
|
||||
{
|
||||
Self {
|
||||
inner: ScalarRangeInner::Owned(s),
|
||||
style,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
enum EscapeState
|
||||
{
|
||||
|
@ -271,11 +307,11 @@ mod tests
|
|||
fn flow_single_empty() -> TestResult
|
||||
{
|
||||
let data = "''";
|
||||
let scratch = &mut Vec::new();
|
||||
let stats = &mut MStats::new();
|
||||
let expected = Ref::Borrow(Token::Scalar(cow!(""), ScalarStyle::SingleQuote));
|
||||
let expected = Token::Scalar(cow!(""), ScalarStyle::SingleQuote);
|
||||
|
||||
let (scalar, read) = scan_flow_scalar(data, stats, scratch, true)?;
|
||||
let (range, read) = scan_flow_scalar(data, stats, true)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
assert_eq!(read, 2);
|
||||
|
||||
|
@ -291,11 +327,11 @@ mod tests
|
|||
fn flow_single_simple() -> TestResult
|
||||
{
|
||||
let data = "'hello world'";
|
||||
let scratch = &mut Vec::new();
|
||||
let stats = &mut MStats::new();
|
||||
let expected = Ref::Borrow(Token::Scalar(cow!("hello world"), ScalarStyle::SingleQuote));
|
||||
let expected = Token::Scalar(cow!("hello world"), ScalarStyle::SingleQuote);
|
||||
|
||||
let (scalar, read) = scan_flow_scalar(data, stats, scratch, true)?;
|
||||
let (range, read) = scan_flow_scalar(data, stats, true)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
assert_eq!(read, 13);
|
||||
|
||||
|
@ -314,12 +350,32 @@ mod tests
|
|||
second
|
||||
third
|
||||
fourth'"#;
|
||||
let scratch = &mut Vec::new();
|
||||
let stats = &mut MStats::new();
|
||||
let cmp = "first second third fourth";
|
||||
let expected = Ref::Copy(Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote));
|
||||
let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);
|
||||
|
||||
let (scalar, _read) = scan_flow_scalar(data, stats, scratch, true)?;
|
||||
let (range, _read) = scan_flow_scalar(data, stats, true)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
{
|
||||
bail!("\nexpected: {:?}\nbut got: {:?}", expected, &scalar)
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flow_single_trim_whitespace() -> TestResult
|
||||
{
|
||||
let data = r#"'first
|
||||
second'"#;
|
||||
let stats = &mut MStats::new();
|
||||
let cmp = "first second";
|
||||
let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);
|
||||
|
||||
let (range, _read) = scan_flow_scalar(data, stats, true)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
{
|
||||
|
@ -337,12 +393,12 @@ fourth'"#;
|
|||
third
|
||||
|
||||
fourth'"#;
|
||||
let scratch = &mut Vec::new();
|
||||
let stats = &mut MStats::new();
|
||||
let cmp = "first second third\nfourth";
|
||||
let expected = Ref::Copy(Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote));
|
||||
let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);
|
||||
|
||||
let (scalar, _read) = scan_flow_scalar(data, stats, scratch, true)?;
|
||||
let (range, _read) = scan_flow_scalar(data, stats, true)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
{
|
||||
|
@ -356,7 +412,6 @@ fourth'"#;
|
|||
fn flow_single_reject_document()
|
||||
{
|
||||
let data = ["'\n--- '", "'\n---\n'"];
|
||||
let scratch = &mut Vec::new();
|
||||
let expected = ScanError::InvalidFlowScalar;
|
||||
let mut stats;
|
||||
|
||||
|
@ -364,14 +419,14 @@ fourth'"#;
|
|||
{
|
||||
stats = MStats::new();
|
||||
|
||||
match scan_flow_scalar(t, &mut stats, scratch, true)
|
||||
match scan_flow_scalar(t, &mut stats, true)
|
||||
{
|
||||
Err(e) => assert_eq!(
|
||||
e, expected,
|
||||
"on iteration {}, expected error {}, got {}",
|
||||
i, expected, e
|
||||
),
|
||||
Ok((unexpected, _)) => panic!(
|
||||
Ok(unexpected) => panic!(
|
||||
"on iteration {}, expected error {}, got unexpected value {:?}",
|
||||
i, expected, unexpected
|
||||
),
|
||||
|
@ -383,7 +438,6 @@ fourth'"#;
|
|||
fn flow_single_reject_eof()
|
||||
{
|
||||
let data = ["'end space ", "'", "'end word"];
|
||||
let scratch = &mut Vec::new();
|
||||
let expected = ScanError::UnexpectedEOF;
|
||||
let mut stats;
|
||||
|
||||
|
@ -391,14 +445,14 @@ fourth'"#;
|
|||
{
|
||||
stats = MStats::new();
|
||||
|
||||
match scan_flow_scalar(t, &mut stats, scratch, true)
|
||||
match scan_flow_scalar(t, &mut stats, true)
|
||||
{
|
||||
Err(e) => assert_eq!(
|
||||
e, expected,
|
||||
"on iteration {}, expected error {}, got {}",
|
||||
i, expected, e
|
||||
),
|
||||
Ok((unexpected, _)) => panic!(
|
||||
Ok(unexpected) => panic!(
|
||||
"on iteration {}, expected error {}, got unexpected value {:?}",
|
||||
i, expected, unexpected
|
||||
),
|
||||
|
@ -412,11 +466,11 @@ fourth'"#;
|
|||
fn flow_double_empty() -> TestResult
|
||||
{
|
||||
let data = r#""""#;
|
||||
let scratch = &mut Vec::new();
|
||||
let stats = &mut MStats::new();
|
||||
let expected = Ref::Borrow(Token::Scalar(cow!(""), ScalarStyle::DoubleQuote));
|
||||
let expected = Token::Scalar(cow!(""), ScalarStyle::DoubleQuote);
|
||||
|
||||
let (scalar, read) = scan_flow_scalar(data, stats, scratch, false)?;
|
||||
let (range, read) = scan_flow_scalar(data, stats, false)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
assert_eq!(read, 2);
|
||||
|
||||
|
@ -432,11 +486,11 @@ fourth'"#;
|
|||
fn flow_double_simple() -> TestResult
|
||||
{
|
||||
let data = r#""hello world""#;
|
||||
let scratch = &mut Vec::new();
|
||||
let stats = &mut MStats::new();
|
||||
let expected = Ref::Borrow(Token::Scalar(cow!("hello world"), ScalarStyle::DoubleQuote));
|
||||
let expected = Token::Scalar(cow!("hello world"), ScalarStyle::DoubleQuote);
|
||||
|
||||
let (scalar, read) = scan_flow_scalar(data, stats, scratch, false)?;
|
||||
let (range, read) = scan_flow_scalar(data, stats, false)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
assert_eq!(read, 13);
|
||||
|
||||
|
@ -452,14 +506,11 @@ fourth'"#;
|
|||
fn flow_double_unicode_escape() -> TestResult
|
||||
{
|
||||
let data = r#""hello \U000003B1 \u03A9 \u30C3""#;
|
||||
let scratch = &mut Vec::new();
|
||||
let stats = &mut MStats::new();
|
||||
let expected = Ref::Copy(Token::Scalar(
|
||||
cow!("hello α Ω ッ"),
|
||||
ScalarStyle::DoubleQuote,
|
||||
));
|
||||
let expected = Token::Scalar(cow!("hello α Ω ッ"), ScalarStyle::DoubleQuote);
|
||||
|
||||
let (scalar, read) = scan_flow_scalar(data, stats, scratch, false)?;
|
||||
let (range, read) = scan_flow_scalar(data, stats, false)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
{
|
||||
|
@ -484,12 +535,12 @@ fourth'"#;
|
|||
second
|
||||
third
|
||||
fourth""#;
|
||||
let scratch = &mut Vec::new();
|
||||
let stats = &mut MStats::new();
|
||||
let cmp = "first second third fourth";
|
||||
let expected = Ref::Copy(Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote));
|
||||
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
|
||||
|
||||
let (scalar, _read) = scan_flow_scalar(data, stats, scratch, false)?;
|
||||
let (range, _read) = scan_flow_scalar(data, stats, false)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
{
|
||||
|
@ -507,12 +558,32 @@ fourth""#;
|
|||
third
|
||||
|
||||
fourth""#;
|
||||
let scratch = &mut Vec::new();
|
||||
let stats = &mut MStats::new();
|
||||
let cmp = "first second third\nfourth";
|
||||
let expected = Ref::Copy(Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote));
|
||||
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
|
||||
|
||||
let (scalar, _read) = scan_flow_scalar(data, stats, scratch, false)?;
|
||||
let (range, _read) = scan_flow_scalar(data, stats, false)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
{
|
||||
bail!("\nexpected: {:?}\nbut got: {:?}", expected, &scalar)
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flow_double_trim_whitespace() -> TestResult
|
||||
{
|
||||
let data = r#""first
|
||||
second""#;
|
||||
let stats = &mut MStats::new();
|
||||
let cmp = "first second";
|
||||
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
|
||||
|
||||
let (range, _read) = scan_flow_scalar(data, stats, false)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
{
|
||||
|
@ -531,12 +602,12 @@ rst \
|
|||
third
|
||||
|
||||
fourth""#;
|
||||
let scratch = &mut Vec::new();
|
||||
let stats = &mut MStats::new();
|
||||
let cmp = "first second third\nfourth";
|
||||
let expected = Ref::Copy(Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote));
|
||||
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
|
||||
|
||||
let (scalar, _read) = scan_flow_scalar(data, stats, scratch, false)?;
|
||||
let (range, _read) = scan_flow_scalar(data, stats, false)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
{
|
||||
|
|
|
@ -62,8 +62,6 @@
|
|||
//! same character ('!') to mean three different things
|
||||
//! depending on the context. What a massive headache.
|
||||
|
||||
use std::ops::Range;
|
||||
|
||||
use crate::{
|
||||
scanner::{
|
||||
eat_whitespace,
|
||||
|
@ -71,7 +69,7 @@ use crate::{
|
|||
scalar::escape::tag_uri_unescape,
|
||||
MStats,
|
||||
},
|
||||
token::{Ref, Token},
|
||||
token::{Slice, Token},
|
||||
};
|
||||
|
||||
/// Scan a tag directive from .base returning a tag
|
||||
|
@ -80,11 +78,10 @@ use crate::{
|
|||
/// This function will attempt to borrow from .base where
|
||||
/// possible, but may also copy the directive's handle and
|
||||
/// prefix into .scratch if borrowing is not possible.
|
||||
pub(in crate::scanner) fn scan_tag_directive<'b, 'c>(
|
||||
base: &'b str,
|
||||
pub(in crate::scanner) fn scan_tag_directive<'de>(
|
||||
base: &'de str,
|
||||
stats: &mut MStats,
|
||||
scratch: &'c mut Vec<u8>,
|
||||
) -> Result<(Ref<'b, 'c>, usize)>
|
||||
) -> Result<(Token<'de>, usize)>
|
||||
{
|
||||
let mut buffer = base;
|
||||
let mut can_borrow = true;
|
||||
|
@ -109,7 +106,7 @@ pub(in crate::scanner) fn scan_tag_directive<'b, 'c>(
|
|||
|
||||
// %TAG !named! :tag:prefix # a comment\n
|
||||
// ^^^^^^^^^^^
|
||||
let (prefix, amt) = scan_tag_uri(buffer, stats, scratch, &mut can_borrow, false)?;
|
||||
let (prefix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?;
|
||||
|
||||
// %TAG !named! tag-prefix # a comment\n
|
||||
// ^
|
||||
|
@ -120,20 +117,14 @@ pub(in crate::scanner) fn scan_tag_directive<'b, 'c>(
|
|||
// .buffer
|
||||
let token = if can_borrow
|
||||
{
|
||||
Token::TagDirective(cow!(handle), cow!(&buffer[prefix])).borrowed()
|
||||
Token::TagDirective(cow!(handle), prefix)
|
||||
}
|
||||
// Otherwise, we'll need to copy both the handle and prefix, to unify our
|
||||
// lifetimes. Note that this isn't strictly necessary, but requiring Token to
|
||||
// contain two unrelated lifetimes is just asking for pain and suffering.
|
||||
else
|
||||
{
|
||||
let start = scratch.len();
|
||||
scratch.extend_from_slice(handle.as_bytes());
|
||||
|
||||
let handle = std::str::from_utf8(&scratch[start..]).unwrap();
|
||||
let prefix = std::str::from_utf8(&scratch[prefix]).unwrap();
|
||||
|
||||
Token::TagDirective(cow!(handle), cow!(prefix)).copied()
|
||||
Token::TagDirective(cow!(handle), prefix).into_owned()
|
||||
};
|
||||
|
||||
advance!(buffer, amt);
|
||||
|
@ -154,11 +145,10 @@ pub(in crate::scanner) fn scan_tag_directive<'b, 'c>(
|
|||
/// ("", suffix) => A verbatim tag
|
||||
/// ("!", "") => A non resolving tag
|
||||
/// (handle, suffix) => A primary, secondary or named tag
|
||||
pub(in crate::scanner) fn scan_node_tag<'b, 'c>(
|
||||
base: &'b str,
|
||||
pub(in crate::scanner) fn scan_node_tag<'de>(
|
||||
base: &'de str,
|
||||
stats: &mut MStats,
|
||||
scratch: &'c mut Vec<u8>,
|
||||
) -> Result<(Ref<'b, 'c>, usize)>
|
||||
) -> Result<(Token<'de>, usize)>
|
||||
{
|
||||
let mut buffer = base;
|
||||
let mut can_borrow = true;
|
||||
|
@ -182,13 +172,13 @@ pub(in crate::scanner) fn scan_node_tag<'b, 'c>(
|
|||
|
||||
// !<global:verbatim:tag:> "node"
|
||||
// ^^^^^^^^^^^^^^^^^^^^
|
||||
let (verbatim, amt) = scan_tag_uri(buffer, stats, scratch, &mut can_borrow, true)?;
|
||||
let (verbatim, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, true)?;
|
||||
|
||||
// !<global:verbatim:tag:> "node"
|
||||
// ^
|
||||
check!(~buffer, amt + 1 => b'>', else ScanError::InvalidTagSuffix)?;
|
||||
|
||||
let token = assemble_tag(buffer, scratch, &buffer[0..0], verbatim, can_borrow);
|
||||
let token = assemble_tag(&buffer[0..0], verbatim, can_borrow);
|
||||
|
||||
(token, amt + 1)
|
||||
}
|
||||
|
@ -200,10 +190,7 @@ pub(in crate::scanner) fn scan_node_tag<'b, 'c>(
|
|||
// ! "node"
|
||||
// ^
|
||||
// Single ! without a suffix disables tag resolution
|
||||
Some((TagHandle::Primary(h), amt)) =>
|
||||
{
|
||||
(Token::Tag(cow!(h), cow!(&buffer[0..0])).borrowed(), amt)
|
||||
},
|
||||
Some((TagHandle::Primary(h), amt)) => (Token::Tag(cow!(h), cow!(&buffer[0..0])), amt),
|
||||
// !!global "node" OR !named!global "node"
|
||||
// ^^ ^^^^^^^
|
||||
// Got a secondary or named tag, scan the suffix now
|
||||
|
@ -213,9 +200,9 @@ pub(in crate::scanner) fn scan_node_tag<'b, 'c>(
|
|||
|
||||
// !!global "node" OR !named!global "node"
|
||||
// ^^^^^^ ^^^^^^
|
||||
let (suffix, amt) = scan_tag_uri(buffer, stats, scratch, &mut can_borrow, false)?;
|
||||
let (suffix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?;
|
||||
|
||||
let token = assemble_tag(buffer, scratch, h, suffix, can_borrow);
|
||||
let token = assemble_tag(h, suffix, can_borrow);
|
||||
|
||||
(token, amt)
|
||||
},
|
||||
|
@ -230,9 +217,9 @@ pub(in crate::scanner) fn scan_node_tag<'b, 'c>(
|
|||
|
||||
// !local "node"
|
||||
// ^^^^^
|
||||
let (suffix, amt) = scan_tag_uri(buffer, stats, scratch, &mut can_borrow, false)?;
|
||||
let (suffix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?;
|
||||
|
||||
let token = assemble_tag(buffer, scratch, handle, suffix, can_borrow);
|
||||
let token = assemble_tag(handle, suffix, can_borrow);
|
||||
|
||||
(token, amt)
|
||||
},
|
||||
|
@ -252,16 +239,15 @@ pub(in crate::scanner) fn scan_node_tag<'b, 'c>(
|
|||
/// into .base or .scratch.
|
||||
///
|
||||
/// [Link]: https://yaml.org/spec/1.2/spec.html#ns-global-tag-prefix
|
||||
pub(in crate::scanner) fn scan_tag_uri(
|
||||
base: &str,
|
||||
pub(in crate::scanner) fn scan_tag_uri<'de>(
|
||||
base: &'de str,
|
||||
stats: &mut MStats,
|
||||
scratch: &mut Vec<u8>,
|
||||
can_borrow: &mut bool,
|
||||
verbatim: bool,
|
||||
) -> Result<(Range<usize>, usize)>
|
||||
) -> Result<(Slice<'de>, usize)>
|
||||
{
|
||||
let mut buffer = base;
|
||||
let start = scratch.len();
|
||||
let mut scratch = Vec::new();
|
||||
|
||||
loop
|
||||
{
|
||||
|
@ -307,7 +293,7 @@ pub(in crate::scanner) fn scan_tag_uri(
|
|||
|
||||
*can_borrow = false;
|
||||
}
|
||||
let amt = tag_uri_unescape(buffer, scratch, true)?;
|
||||
let amt = tag_uri_unescape(buffer, &mut scratch, true)?;
|
||||
advance!(buffer, :stats, amt);
|
||||
},
|
||||
// EOF before loop end is an error
|
||||
|
@ -321,11 +307,13 @@ pub(in crate::scanner) fn scan_tag_uri(
|
|||
|
||||
if *can_borrow
|
||||
{
|
||||
Ok((0..advance, advance))
|
||||
Ok((cow!(&base[0..advance]), advance))
|
||||
}
|
||||
else
|
||||
{
|
||||
Ok((start..scratch.len(), advance))
|
||||
let utf8 = String::from_utf8(scratch).unwrap();
|
||||
|
||||
Ok((cow!(utf8), advance))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -421,26 +409,14 @@ where
|
|||
|
||||
/// Helper function for constructing
|
||||
/// [Ref][Ref]<[Token::Tag][Token]>s
|
||||
fn assemble_tag<'b, 'c>(
|
||||
buffer: &'b str,
|
||||
scratch: &'c mut Vec<u8>,
|
||||
handle: &'b str,
|
||||
suffix: Range<usize>,
|
||||
can_borrow: bool,
|
||||
) -> Ref<'b, 'c>
|
||||
fn assemble_tag<'de>(handle: &'de str, suffix: Slice<'de>, can_borrow: bool) -> Token<'de>
|
||||
{
|
||||
if can_borrow
|
||||
{
|
||||
Token::Tag(cow!(handle), cow!(&buffer[suffix])).borrowed()
|
||||
Token::Tag(cow!(handle), suffix)
|
||||
}
|
||||
else
|
||||
{
|
||||
let start = scratch.len();
|
||||
scratch.extend_from_slice(handle.as_bytes());
|
||||
|
||||
let h = std::str::from_utf8(&scratch[start..]).unwrap();
|
||||
let t = std::str::from_utf8(&scratch[suffix]).unwrap();
|
||||
|
||||
Token::Tag(cow!(h), cow!(t)).copied()
|
||||
Token::Tag(cow!(handle), suffix).into_owned()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,18 +7,15 @@
|
|||
/// failure
|
||||
macro_rules! tokens {
|
||||
($scanner:expr => $($id:tt $expected:expr $(=> $msg:tt)?),+ ) => {
|
||||
fn __tokens<'b: 'a, 'a>(s: &'a mut crate::scanner::Scanner<'b>) {
|
||||
let mut scratch = Vec::new();
|
||||
let iter = crate::scanner::ScanIter::new(s, &mut scratch);
|
||||
|
||||
let f = move |mut i: crate::scanner::ScanIter| -> std::result::Result<(), ::anyhow::Error> {
|
||||
fn __tokens<'de>(s: &mut crate::scanner::ScanIter<'de>) {
|
||||
let f = move |i: &mut crate::scanner::ScanIter| -> std::result::Result<(), ::anyhow::Error> {
|
||||
|
||||
$( tokens!(@unwrap $id i => $expected $(=> $msg)? ); )+
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
if let Err(e) = f(iter) {
|
||||
if let Err(e) = f(s) {
|
||||
panic!("tokens! error: {}", e)
|
||||
}
|
||||
}
|
||||
|
@ -39,10 +36,9 @@ macro_rules! tokens {
|
|||
(@unwrap > $scanner:expr => $expected:expr $(=> $msg:tt)? ) => {
|
||||
let event = match $scanner
|
||||
.next()
|
||||
.map(|res| res.map(|r| r.into_inner()))
|
||||
{
|
||||
Some(r) => r,
|
||||
None => anyhow::bail!("Unexpected end of tokens, was expecting: {:?} ~{}", $expected, $scanner.inner.buffer),
|
||||
None => anyhow::bail!("Unexpected end of tokens, was expecting: {:?} ~{}", $expected, $scanner.data),
|
||||
|
||||
};
|
||||
assert_eq!(event, $expected $(, $msg)? )
|
||||
|
@ -58,9 +54,9 @@ macro_rules! tokens {
|
|||
Some(r) => match r
|
||||
{
|
||||
Ok(r) => r,
|
||||
Err(e) => anyhow::bail!("{} ~{}", e, $scanner.inner.buffer),
|
||||
Err(e) => anyhow::bail!("{} ~{}", e, $scanner.data),
|
||||
}
|
||||
None => anyhow::bail!("Unexpected end of tokens, was expecting: {:?} ~{}", $expected, $scanner.inner.buffer)
|
||||
None => anyhow::bail!("Unexpected end of tokens, was expecting: {:?} ~{}", $expected, $scanner.data)
|
||||
};
|
||||
|
||||
assert_eq!(event, $expected)
|
||||
|
@ -72,9 +68,9 @@ macro_rules! tokens {
|
|||
Some(r) => match r
|
||||
{
|
||||
Ok(r) => r,
|
||||
Err(e) => anyhow::bail!("{} ~{}", e, $scanner.inner.buffer),
|
||||
Err(e) => anyhow::bail!("{} ~{}", e, $scanner.data),
|
||||
},
|
||||
None => anyhow::bail!("Unexpected end of tokens, {}: {:?} ~{}", $msg, $expected, $scanner.inner.buffer)
|
||||
None => anyhow::bail!("Unexpected end of tokens, {}: {:?} ~{}", $msg, $expected, $scanner.data)
|
||||
};
|
||||
|
||||
assert_eq!(event, $expected, $msg)
|
||||
|
|
135
src/token.rs
135
src/token.rs
|
@ -87,71 +87,110 @@ impl<'a> Token<'a>
|
|||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn borrowed<'c>(self) -> Ref<'a, 'c>
|
||||
{
|
||||
Ref::Borrow(self)
|
||||
}
|
||||
|
||||
pub fn copied<'b>(self) -> Ref<'b, 'a>
|
||||
{
|
||||
Ref::Copy(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// This allows us to discriminate between a Token with
|
||||
/// different lifetimes, specifically either a lifetime
|
||||
/// 'borrow-ed from the underlying data or 'copy-ied from
|
||||
/// some scratch space provided.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Ref<'borrow, 'copy>
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Marker
|
||||
{
|
||||
Borrow(Token<'borrow>),
|
||||
Copy(Token<'copy>),
|
||||
/// The stream's start, with the byte (encoding)
|
||||
/// [virtual]
|
||||
StreamStart,
|
||||
/// The stream's end [virtual]
|
||||
StreamEnd,
|
||||
/// The %YAML directive, (major,minor)
|
||||
VersionDirective,
|
||||
/// The %TAG directive
|
||||
TagDirective,
|
||||
/// A ---
|
||||
DocumentStart,
|
||||
/// A ...
|
||||
DocumentEnd,
|
||||
/// Indentation increase for a block (sequence)
|
||||
BlockSequenceStart,
|
||||
/// Indentation increase for a block (map)
|
||||
BlockMappingStart,
|
||||
/// Indentation decrease for a block
|
||||
BlockEnd,
|
||||
/// A '['
|
||||
FlowSequenceStart,
|
||||
/// A ']'
|
||||
FlowSequenceEnd,
|
||||
/// A '{'
|
||||
FlowMappingStart,
|
||||
/// A '}'
|
||||
FlowMappingEnd,
|
||||
/// A '-'
|
||||
BlockEntry,
|
||||
/// A ','
|
||||
FlowEntry,
|
||||
/// Either a '?' or nothing
|
||||
Key,
|
||||
/// A ':'
|
||||
Value,
|
||||
/// An alias (*anchor)
|
||||
Alias,
|
||||
/// An anchor (&anchor)
|
||||
Anchor,
|
||||
/// A tag (!handle, !suffix)
|
||||
Tag,
|
||||
/// A scalar (value, style)
|
||||
Scalar,
|
||||
}
|
||||
|
||||
impl<'b, 'c> Ref<'b, 'c>
|
||||
impl Marker
|
||||
{
|
||||
/// Unifies the lifetimes of the underlying Token,
|
||||
/// returning one that lives at least as long as
|
||||
/// 'borrow. Note that this _will_ allocate if a copy
|
||||
/// needs to be made.
|
||||
pub fn into_inner(self) -> Token<'b>
|
||||
fn from_token(t: &Token<'_>) -> Self
|
||||
{
|
||||
match self
|
||||
use Token::*;
|
||||
|
||||
match t
|
||||
{
|
||||
Self::Borrow(t) => t,
|
||||
Self::Copy(t) => t.into_owned(),
|
||||
StreamStart(_) => Self::StreamStart,
|
||||
StreamEnd => Self::StreamEnd,
|
||||
VersionDirective(_, _) => Self::VersionDirective,
|
||||
TagDirective(_, _) => Self::TagDirective,
|
||||
DocumentStart => Self::DocumentStart,
|
||||
DocumentEnd => Self::DocumentEnd,
|
||||
BlockSequenceStart => Self::BlockSequenceStart,
|
||||
BlockMappingStart => Self::BlockMappingStart,
|
||||
BlockEnd => Self::BlockEnd,
|
||||
FlowSequenceStart => Self::FlowSequenceStart,
|
||||
FlowSequenceEnd => Self::FlowSequenceEnd,
|
||||
FlowMappingStart => Self::FlowMappingStart,
|
||||
FlowMappingEnd => Self::FlowMappingEnd,
|
||||
BlockEntry => Self::BlockEntry,
|
||||
FlowEntry => Self::FlowEntry,
|
||||
Key => Self::Key,
|
||||
Value => Self::Value,
|
||||
Alias(_) => Self::Alias,
|
||||
Anchor(_) => Self::Anchor,
|
||||
Tag(_, _) => Self::Tag,
|
||||
Scalar(_, _) => Self::Scalar,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Short hand check if the Ref contains a borrowed
|
||||
/// Token
|
||||
pub fn is_borrowed(&self) -> bool
|
||||
impl Default for Marker
|
||||
{
|
||||
fn default() -> Self
|
||||
{
|
||||
match self
|
||||
{
|
||||
Self::Borrow(_) => true,
|
||||
Self::Copy(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Short hand check if the Ref contains a copied Token
|
||||
pub fn is_copied(&self) -> bool
|
||||
{
|
||||
!self.is_borrowed()
|
||||
Self::StreamStart
|
||||
}
|
||||
}
|
||||
|
||||
impl<'b, 'c> PartialEq<Token<'_>> for Ref<'b, 'c>
|
||||
impl From<&'_ Token<'_>> for Marker
|
||||
{
|
||||
fn from(t: &'_ Token<'_>) -> Self
|
||||
{
|
||||
Self::from_token(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<Token<'_>> for Marker
|
||||
{
|
||||
fn eq(&self, other: &Token<'_>) -> bool
|
||||
{
|
||||
match self
|
||||
{
|
||||
Self::Borrow(t) => t.eq(other),
|
||||
Self::Copy(t) => t.eq(other),
|
||||
}
|
||||
self == &Self::from(other)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -161,7 +200,7 @@ pub enum StreamEncoding
|
|||
UTF8,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum ScalarStyle
|
||||
{
|
||||
Plain,
|
||||
|
|
Loading…
Reference in New Issue