Feature/scanner/option #27
|
@ -23,6 +23,9 @@ jobs:
|
|||
- name: "Unit Tests"
|
||||
cmd: test
|
||||
args: --lib --bins
|
||||
- name: "Unit Tests: feature.test_buffer_small"
|
||||
cmd: test
|
||||
args: --lib --features=test_buffer_small
|
||||
include:
|
||||
- os: ubuntu-latest
|
||||
sccache-path: /home/runner/.cache/sccache
|
||||
|
|
|
@ -30,6 +30,18 @@ version = "1.0.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "ctor"
|
||||
version = "0.1.20"
|
||||
|
@ -139,5 +151,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"anyhow",
|
||||
"atoi",
|
||||
"bitflags",
|
||||
"cfg-if",
|
||||
"pretty_assertions",
|
||||
]
|
||||
|
|
|
@ -6,9 +6,18 @@ edition = "2018"
|
|||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[features]
|
||||
# PRIVATE! FOR USE IN TEST ONLY!
|
||||
test_buffer = []
|
||||
test_buffer_large = ["test_buffer"]
|
||||
test_buffer_medium = ["test_buffer"]
|
||||
test_buffer_small = ["test_buffer"]
|
||||
|
||||
[dependencies]
|
||||
atoi = "0.4"
|
||||
bitflags = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
anyhow = "1"
|
||||
pretty_assertions = "0.7"
|
||||
cfg-if = "1"
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use super::{
|
||||
error::{ScanError, ScanResult as Result},
|
||||
flag::Flags,
|
||||
stats::MStats,
|
||||
ALIAS, ANCHOR,
|
||||
};
|
||||
|
@ -8,6 +9,7 @@ use crate::token::Token;
|
|||
/// Scan an anchor or alias from the underlying .buffer
|
||||
/// returning the relevant Token
|
||||
pub(in crate::scanner) fn scan_anchor<'de>(
|
||||
opts: Flags,
|
||||
buffer: &mut &'de str,
|
||||
stats: &mut MStats,
|
||||
kind: &AnchorKind,
|
||||
|
@ -17,7 +19,7 @@ pub(in crate::scanner) fn scan_anchor<'de>(
|
|||
|
||||
// *anchor 'rest of the line'
|
||||
// ^^^^^^
|
||||
let anchor = take_while(buffer.as_bytes(), u8::is_ascii_alphanumeric);
|
||||
let anchor = take_while(opts, buffer.as_bytes(), u8::is_ascii_alphanumeric)?;
|
||||
|
||||
let anchor = advance!(<- *buffer, :stats, anchor.len());
|
||||
|
||||
|
@ -33,6 +35,7 @@ pub(in crate::scanner) fn scan_anchor<'de>(
|
|||
// There does not necessarily need to be a whitespace so we
|
||||
// also check against a list of valid starting
|
||||
// tokens
|
||||
cache!(~buffer, 1, opts)?;
|
||||
check!(~buffer
|
||||
=> b' ' | b'\n' | b'?' | b',' | b']' | b'}' | b'%' | b'@' | b'`',
|
||||
else ScanError::InvalidAnchorName
|
||||
|
@ -62,10 +65,10 @@ impl AnchorKind
|
|||
/// starts from the given .byte
|
||||
pub fn new(byte: &u8) -> Option<Self>
|
||||
{
|
||||
let s = match byte
|
||||
let s = match *byte
|
||||
{
|
||||
&ALIAS => Self::Alias,
|
||||
&ANCHOR => Self::Anchor,
|
||||
ALIAS => Self::Alias,
|
||||
ANCHOR => Self::Anchor,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
|
@ -73,7 +76,7 @@ impl AnchorKind
|
|||
}
|
||||
}
|
||||
|
||||
fn take_while<F>(b: &[u8], f: F) -> &[u8]
|
||||
fn take_while<F>(opts: Flags, base: &[u8], f: F) -> Result<&[u8]>
|
||||
where
|
||||
F: Fn(&u8) -> bool,
|
||||
{
|
||||
|
@ -81,10 +84,12 @@ where
|
|||
|
||||
loop
|
||||
{
|
||||
match b.get(index)
|
||||
let i = cache!(base, @index, 1, opts)?;
|
||||
|
||||
match base.get(index)
|
||||
{
|
||||
Some(b) if f(b) => index += 1,
|
||||
_ => return &b[..index],
|
||||
Some(b) if f(b) => index += i,
|
||||
_ => return Ok(&base[..index]),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,13 +5,14 @@ use super::{
|
|||
stats::MStats,
|
||||
};
|
||||
use crate::{
|
||||
scanner::{eat_whitespace, tag::scan_tag_directive, COMMENTS},
|
||||
scanner::{eat_whitespace, flag::Flags, tag::scan_tag_directive, COMMENTS},
|
||||
token::Token,
|
||||
};
|
||||
|
||||
/// Scans a version or tag directive from .buffer, based on
|
||||
/// the .kind of directive, returning the relevant Token.
|
||||
pub(in crate::scanner) fn scan_directive<'de>(
|
||||
opts: Flags,
|
||||
buffer: &mut &'de str,
|
||||
mut stats: &mut MStats,
|
||||
kind: &DirectiveKind,
|
||||
|
@ -22,21 +23,25 @@ pub(in crate::scanner) fn scan_directive<'de>(
|
|||
DirectiveKind::Version =>
|
||||
{
|
||||
// Chomp any preceding whitespace
|
||||
advance!(*buffer, eat_whitespace(buffer, &mut stats, !COMMENTS));
|
||||
advance!(
|
||||
*buffer,
|
||||
eat_whitespace(opts, buffer, &mut stats, !COMMENTS)?
|
||||
);
|
||||
|
||||
// %YAML 1.1
|
||||
// ^
|
||||
let (major, skip) = scan_directive_version(buffer)?;
|
||||
let (major, skip) = scan_directive_version(opts, buffer)?;
|
||||
advance!(*buffer, :stats, skip);
|
||||
|
||||
// %YAML 1.1
|
||||
// ^
|
||||
cache!(~buffer, 1, opts)?;
|
||||
check!(~buffer => b'.', else ScanError::InvalidVersion)?;
|
||||
advance!(*buffer, :stats, 1);
|
||||
|
||||
// %YAML 1.1
|
||||
// ^
|
||||
let (minor, skip) = scan_directive_version(buffer)?;
|
||||
let (minor, skip) = scan_directive_version(opts, buffer)?;
|
||||
advance!(*buffer, :stats, skip);
|
||||
|
||||
Ok(Token::VersionDirective(major, minor))
|
||||
|
@ -44,10 +49,13 @@ pub(in crate::scanner) fn scan_directive<'de>(
|
|||
DirectiveKind::Tag =>
|
||||
{
|
||||
// Chomp any spaces up to the handle
|
||||
advance!(*buffer, eat_whitespace(buffer, &mut stats, !COMMENTS));
|
||||
advance!(
|
||||
*buffer,
|
||||
eat_whitespace(opts, buffer, &mut stats, !COMMENTS)?
|
||||
);
|
||||
|
||||
// Scan the directive, copying if necessary
|
||||
let (token, amt) = scan_tag_directive(buffer, &mut stats)?;
|
||||
let (token, amt) = scan_tag_directive(opts, buffer, &mut stats)?;
|
||||
advance!(*buffer, amt);
|
||||
|
||||
Ok(token)
|
||||
|
@ -97,15 +105,15 @@ impl DirectiveKind
|
|||
}
|
||||
}
|
||||
|
||||
fn scan_directive_version(b: &str) -> Result<(u8, usize)>
|
||||
fn scan_directive_version(opts: Flags, b: &str) -> Result<(u8, usize)>
|
||||
{
|
||||
let v_slice = take_while(b.as_bytes(), u8::is_ascii_digit);
|
||||
let v_slice = take_while(opts, b.as_bytes(), u8::is_ascii_digit)?;
|
||||
let v = atoi(v_slice).ok_or(ScanError::InvalidVersion)?;
|
||||
|
||||
Ok((v, v_slice.len()))
|
||||
}
|
||||
|
||||
fn take_while<F>(b: &[u8], f: F) -> &[u8]
|
||||
fn take_while<F>(opts: Flags, base: &[u8], f: F) -> Result<&[u8]>
|
||||
where
|
||||
F: Fn(&u8) -> bool,
|
||||
{
|
||||
|
@ -113,10 +121,12 @@ where
|
|||
|
||||
loop
|
||||
{
|
||||
match b.get(index)
|
||||
let i = cache!(base, @index, 1, opts)?;
|
||||
|
||||
match base.get(index)
|
||||
{
|
||||
Some(b) if f(b) => index += 1,
|
||||
_ => return &b[..index],
|
||||
Some(b) if f(b) => index += i,
|
||||
_ => return Ok(&base[..index]),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -72,6 +72,10 @@ pub enum ScanError
|
|||
|
||||
/// An integer overflowed
|
||||
IntOverflow,
|
||||
|
||||
/// The underlying buffer should be extended before
|
||||
/// calling the Scanner again
|
||||
Extend,
|
||||
}
|
||||
|
||||
impl fmt::Display for ScanError
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
use bitflags::bitflags;
|
||||
|
||||
/// An empty, zeroed flag set. This is the default set, with
|
||||
/// all other flags disabled.
|
||||
pub const O_ZEROED: Flags = Flags::empty();
|
||||
/// Hints to the Scanner if the given byte slice can be
|
||||
/// extended. Typically used when processing data in chunks,
|
||||
/// or in circumstances when there may be more data in the
|
||||
/// future.
|
||||
///
|
||||
/// If this flag is set the Scanner will return a
|
||||
/// ScanError::Extend if the byte stream terminates before a
|
||||
/// token can be scanned.
|
||||
pub const O_EXTENDABLE: Flags = Flags::EXTENDABLE;
|
||||
/// Sets the Scanner to lazily process the underlying byte
|
||||
/// stream.
|
||||
///
|
||||
/// In particular, the Scanner will not fully process
|
||||
/// scalars, only locating the start and end markers in the
|
||||
/// stream. This means that any allocations, escape parsing
|
||||
/// or line joins will be deferred until the caller
|
||||
/// explicitly requests the token. This _also applies to
|
||||
/// errors_ in the scalar itself, which will not be caught
|
||||
/// until the caller requests the token!
|
||||
pub const O_LAZY: Flags = Flags::LAZY;
|
||||
|
||||
bitflags! {
|
||||
/// Directives controlling various behaviors of the Scanner,
|
||||
/// see each O_ variant for an explanation of how each works
|
||||
#[derive(Default)]
|
||||
pub struct Flags: u32 {
|
||||
const EXTENDABLE = 0b00000001;
|
||||
const LAZY = 0b00000010;
|
||||
}
|
||||
}
|
|
@ -87,6 +87,60 @@ macro_rules! cow {
|
|||
};
|
||||
}
|
||||
|
||||
/// Check that the underlying .buffer has at least the given
|
||||
/// number of UTF8 .codepoints available, returning an error
|
||||
/// if O_EXTENDABLE is set in .opts. Returns the number of
|
||||
/// _bytes_ read.
|
||||
///
|
||||
/// Modifiers
|
||||
/// ~ .buffer := .buffer.as_bytes()
|
||||
///
|
||||
/// Variants
|
||||
/// /1 .buffer, .codepoints
|
||||
/// := /4 .buffer, @0, .codepoints, O_ZEROED
|
||||
/// /2 .buffer, @.offset, .codepoints
|
||||
/// := /4 .buffer, @.offset, .codepoints, O_ZEROED
|
||||
/// /3 .buffer, .codepoints, .opts
|
||||
/// := /4 .buffer @0, .codepoints, .opts
|
||||
/// /4 .buffer, @.offset, .codepoints, .opts
|
||||
macro_rules! cache {
|
||||
(~$buffer:expr $(, @$offset:expr )?, $codepoints:expr $(, $opts:expr )?) => {
|
||||
cache!($buffer.as_bytes(), $( @$offset, )? $codepoints $(, $opts )?)
|
||||
};
|
||||
($buffer:expr $(, @$offset:expr )?, $codepoints:expr $(, $opts:expr )?) => {
|
||||
cache!(@inner $buffer, $( @$offset, )? @0, $codepoints $(, $opts )?, $crate::scanner::flag::O_ZEROED)
|
||||
};
|
||||
(@inner $buffer:expr, @$offset:expr, $( @$_:expr, )? $codepoints:expr, $opts:expr $(, $__:expr )?) => {
|
||||
cache!(@priv $buffer, $offset, $codepoints, $opts.contains($crate::scanner::flag::O_EXTENDABLE))
|
||||
};
|
||||
(@priv $buffer:expr, $offset:expr, $codepoints:expr, $extend:expr) => {{
|
||||
let mut ret = Ok(0);
|
||||
let mut bytes = $offset;
|
||||
for _ in 0..$codepoints
|
||||
{
|
||||
match widthOf!($buffer, bytes)
|
||||
{
|
||||
0 =>
|
||||
{
|
||||
if $extend
|
||||
{
|
||||
ret = Err($crate::scanner::error::ScanError::Extend);
|
||||
}
|
||||
|
||||
break;
|
||||
},
|
||||
n =>
|
||||
{
|
||||
bytes += n;
|
||||
ret = ret.map(|r| r + n);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
ret
|
||||
}};
|
||||
}
|
||||
|
||||
/// Check the .buffer (@ .offset) matches the given
|
||||
/// .pattern, optionally returning an .error.
|
||||
///
|
||||
|
|
|
@ -8,6 +8,7 @@ mod context;
|
|||
mod directive;
|
||||
mod entry;
|
||||
mod error;
|
||||
mod flag;
|
||||
mod key;
|
||||
mod scalar;
|
||||
mod stats;
|
||||
|
@ -21,6 +22,7 @@ use crate::{
|
|||
directive::{scan_directive, DirectiveKind},
|
||||
entry::TokenEntry,
|
||||
error::{ScanError, ScanResult as Result},
|
||||
flag::*,
|
||||
key::{Key, KeyPossible},
|
||||
scalar::{block::scan_block_scalar, flow::scan_flow_scalar, plain::scan_plain_scalar},
|
||||
stats::MStats,
|
||||
|
@ -66,7 +68,12 @@ impl Scanner
|
|||
|
||||
/// Scan some tokens from the given .base into .tokens
|
||||
/// returning the number added.
|
||||
pub fn scan_tokens<'de>(&mut self, base: &'de str, tokens: &mut Tokens<'de>) -> Result<usize>
|
||||
pub fn scan_tokens<'de>(
|
||||
&mut self,
|
||||
opts: Flags,
|
||||
base: &'de str,
|
||||
tokens: &mut Tokens<'de>,
|
||||
) -> Result<usize>
|
||||
{
|
||||
let mut num_tokens = 0;
|
||||
let starting_tokens = tokens.len();
|
||||
|
@ -76,9 +83,14 @@ impl Scanner
|
|||
{
|
||||
if let Some(mut buffer) = base.get(self.offset..)
|
||||
{
|
||||
self.scan_next_token(&mut buffer, tokens)?;
|
||||
let run = self.scan_next_token(opts, &mut buffer, tokens);
|
||||
|
||||
self.offset = base.len() - buffer.len();
|
||||
if matches!(run, Err(ScanError::Extend) | Ok(_))
|
||||
{
|
||||
self.offset = base.len() - buffer.len();
|
||||
}
|
||||
|
||||
run?;
|
||||
|
||||
num_tokens = tokens.len() - starting_tokens;
|
||||
}
|
||||
|
@ -87,8 +99,12 @@ impl Scanner
|
|||
Ok(num_tokens)
|
||||
}
|
||||
|
||||
fn scan_next_token<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>)
|
||||
-> Result<()>
|
||||
fn scan_next_token<'de>(
|
||||
&mut self,
|
||||
opts: Flags,
|
||||
base: &mut &'de str,
|
||||
tokens: &mut Tokens<'de>,
|
||||
) -> Result<()>
|
||||
{
|
||||
// Is it the beginning of the stream?
|
||||
if self.state == StreamState::Start
|
||||
|
@ -98,7 +114,7 @@ impl Scanner
|
|||
}
|
||||
|
||||
// Eat whitespace to the next delimiter
|
||||
self.eat_whitespace(base, COMMENTS);
|
||||
self.eat_whitespace(opts, base, COMMENTS)?;
|
||||
|
||||
// Remove any saved key positions that cannot contain keys
|
||||
// anymore
|
||||
|
@ -114,11 +130,17 @@ impl Scanner
|
|||
return self.fetch_stream_end(*base, tokens);
|
||||
}
|
||||
|
||||
// 4 characters is the longest token we can encounter, one
|
||||
// of:
|
||||
// - '--- '
|
||||
// - '... '
|
||||
cache!(~base, 4, opts)?;
|
||||
|
||||
// Fetch the next token(s)
|
||||
match base.as_bytes()
|
||||
{
|
||||
// Is it a directive?
|
||||
[DIRECTIVE, ..] if self.stats.column == 0 => self.fetch_directive(base, tokens),
|
||||
[DIRECTIVE, ..] if self.stats.column == 0 => self.fetch_directive(opts, base, tokens),
|
||||
|
||||
// Is it a document marker?
|
||||
[b @ b'-', b'-', b'-', ..] | [b @ b'.', b'.', b'.', ..]
|
||||
|
@ -161,25 +183,25 @@ impl Scanner
|
|||
},
|
||||
|
||||
// Is it an anchor or alias?
|
||||
[ANCHOR, ..] | [ALIAS, ..] => self.fetch_anchor(base, tokens),
|
||||
[ANCHOR, ..] | [ALIAS, ..] => self.fetch_anchor(opts, base, tokens),
|
||||
|
||||
// Is it a tag?
|
||||
[TAG, ..] => self.fetch_tag(base, tokens),
|
||||
[TAG, ..] => self.fetch_tag(opts, base, tokens),
|
||||
|
||||
// Is it a block scalar?
|
||||
[c @ LITERAL, ..] | [c @ FOLDED, ..] if self.context.is_block() =>
|
||||
{
|
||||
self.fetch_block_scalar(base, tokens, *c == FOLDED)
|
||||
self.fetch_block_scalar(opts, base, tokens, *c == FOLDED)
|
||||
},
|
||||
|
||||
// Is it a flow scalar?
|
||||
[SINGLE, ..] | [DOUBLE, ..] => self.fetch_flow_scalar(base, tokens),
|
||||
[SINGLE, ..] | [DOUBLE, ..] => self.fetch_flow_scalar(opts, base, tokens),
|
||||
|
||||
// Is it a plain scalar?
|
||||
_ if self.is_plain_scalar(*base) => self.fetch_plain_scalar(base, tokens),
|
||||
_ if self.is_plain_scalar(*base) => self.fetch_plain_scalar(opts, base, tokens),
|
||||
|
||||
// Otherwise its an error
|
||||
_ => return Err(ScanError::UnknownDelimiter),
|
||||
_ => Err(ScanError::UnknownDelimiter),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -253,8 +275,12 @@ impl Scanner
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn fetch_directive<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>)
|
||||
-> Result<()>
|
||||
fn fetch_directive<'de>(
|
||||
&mut self,
|
||||
opts: Flags,
|
||||
base: &mut &'de str,
|
||||
tokens: &mut Tokens<'de>,
|
||||
) -> Result<()>
|
||||
{
|
||||
let mut buffer = *base;
|
||||
let mut stats = MStats::new();
|
||||
|
@ -264,11 +290,8 @@ impl Scanner
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
// Reset indent to starting level
|
||||
self.unroll_indent(tokens, STARTING_INDENT)?;
|
||||
|
||||
// Reset saved key
|
||||
self.remove_saved_key()?;
|
||||
// Ensure we can read the 'YAML' or 'TAG' identifiers
|
||||
cache!(~buffer, @1, 4, opts)?;
|
||||
|
||||
// Safety: we check above that we have len >= 1 (e.g a '%')
|
||||
//
|
||||
|
@ -282,7 +305,13 @@ impl Scanner
|
|||
advance!(buffer, :stats, 1 + kind.len());
|
||||
|
||||
// Scan the directive token from the .buffer
|
||||
let token = scan_directive(&mut buffer, &mut stats, &kind)?;
|
||||
let token = scan_directive(opts, &mut buffer, &mut stats, &kind)?;
|
||||
|
||||
// Reset indent to starting level
|
||||
self.unroll_indent(tokens, STARTING_INDENT)?;
|
||||
|
||||
// Reset saved key
|
||||
self.remove_saved_key()?;
|
||||
|
||||
// A key cannot follow a directive (a newline is required)
|
||||
self.simple_key_allowed = false;
|
||||
|
@ -298,7 +327,12 @@ impl Scanner
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn fetch_tag<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>) -> Result<()>
|
||||
fn fetch_tag<'de>(
|
||||
&mut self,
|
||||
opts: Flags,
|
||||
base: &mut &'de str,
|
||||
tokens: &mut Tokens<'de>,
|
||||
) -> Result<()>
|
||||
{
|
||||
let mut buffer = *base;
|
||||
let mut stats = MStats::new();
|
||||
|
@ -308,11 +342,11 @@ impl Scanner
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
self.save_key(!REQUIRED)?;
|
||||
|
||||
let (token, amt) = scan_node_tag(buffer, &mut stats)?;
|
||||
let (token, amt) = scan_node_tag(opts, buffer, &mut stats)?;
|
||||
advance!(buffer, amt);
|
||||
|
||||
self.save_key(!REQUIRED)?;
|
||||
|
||||
// A key may not start after a tag (only before)
|
||||
self.simple_key_allowed = false;
|
||||
|
||||
|
@ -327,7 +361,12 @@ impl Scanner
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn fetch_anchor<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>) -> Result<()>
|
||||
fn fetch_anchor<'de>(
|
||||
&mut self,
|
||||
opts: Flags,
|
||||
base: &mut &'de str,
|
||||
tokens: &mut Tokens<'de>,
|
||||
) -> Result<()>
|
||||
{
|
||||
let mut buffer = *base;
|
||||
let mut stats = MStats::new();
|
||||
|
@ -343,12 +382,12 @@ impl Scanner
|
|||
_ => return Ok(()),
|
||||
};
|
||||
|
||||
// Scan the token from the .buffer
|
||||
let token = scan_anchor(opts, &mut buffer, &mut stats, &kind)?;
|
||||
|
||||
// An anchor / alias may start a simple key
|
||||
self.save_key(!REQUIRED)?;
|
||||
|
||||
// Scan the token from the .buffer
|
||||
let token = scan_anchor(&mut buffer, &mut stats, &kind)?;
|
||||
|
||||
// A key may not start after an anchor (only before)
|
||||
self.simple_key_allowed = false;
|
||||
|
||||
|
@ -365,6 +404,7 @@ impl Scanner
|
|||
|
||||
fn fetch_flow_scalar<'de>(
|
||||
&mut self,
|
||||
opts: Flags,
|
||||
base: &mut &'de str,
|
||||
tokens: &mut Tokens<'de>,
|
||||
) -> Result<()>
|
||||
|
@ -378,11 +418,11 @@ impl Scanner
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
self.save_key(!REQUIRED)?;
|
||||
|
||||
let (range, amt) = scan_flow_scalar(buffer, &mut stats, single)?;
|
||||
let (range, amt) = scan_flow_scalar(opts, buffer, &mut stats, single)?;
|
||||
let token = range.into_token(buffer)?;
|
||||
|
||||
self.save_key(!REQUIRED)?;
|
||||
|
||||
// A key cannot follow a flow scalar, as we're either
|
||||
// currently in a key (which should be followed by a
|
||||
// value), or a value which needs a separator (e.g line
|
||||
|
@ -399,6 +439,7 @@ impl Scanner
|
|||
|
||||
fn fetch_plain_scalar<'de>(
|
||||
&mut self,
|
||||
opts: Flags,
|
||||
base: &mut &'de str,
|
||||
tokens: &mut Tokens<'de>,
|
||||
) -> Result<()>
|
||||
|
@ -406,9 +447,9 @@ impl Scanner
|
|||
let buffer = *base;
|
||||
let mut stats = self.stats.clone();
|
||||
|
||||
self.save_key(!REQUIRED)?;
|
||||
let (token, amt) = scan_plain_scalar(opts, buffer, &mut stats, &self.context)?;
|
||||
|
||||
let (token, amt) = scan_plain_scalar(buffer, &mut stats, &self.context)?;
|
||||
self.save_key(!REQUIRED)?;
|
||||
|
||||
// A simple key cannot follow a plain scalar, there must be
|
||||
// an indicator or new line before a key is valid
|
||||
|
@ -425,6 +466,7 @@ impl Scanner
|
|||
|
||||
fn fetch_block_scalar<'de>(
|
||||
&mut self,
|
||||
opts: Flags,
|
||||
base: &mut &'de str,
|
||||
tokens: &mut Tokens<'de>,
|
||||
fold: bool,
|
||||
|
@ -440,7 +482,7 @@ impl Scanner
|
|||
// always follow a block scalar.
|
||||
self.simple_key_allowed = true;
|
||||
|
||||
let (token, amt) = scan_block_scalar(buffer, &mut stats, &self.context, fold)?;
|
||||
let (token, amt) = scan_block_scalar(opts, buffer, &mut stats, &self.context, fold)?;
|
||||
|
||||
advance!(*base, amt);
|
||||
self.stats = stats;
|
||||
|
@ -869,11 +911,11 @@ impl Scanner
|
|||
/// Chomp whitespace and optionally comments until we
|
||||
/// reach the next token, updating buffer[0] to the
|
||||
/// beginning of the new token
|
||||
fn eat_whitespace(&mut self, buffer: &mut &str, comments: bool) -> usize
|
||||
fn eat_whitespace(&mut self, opts: Flags, buffer: &mut &str, comments: bool) -> Result<usize>
|
||||
{
|
||||
let mut stats = MStats::new();
|
||||
|
||||
let amt = eat_whitespace(*buffer, &mut stats, comments);
|
||||
let amt = eat_whitespace(opts, *buffer, &mut stats, comments)?;
|
||||
|
||||
// A new line may start a key in the block context
|
||||
//
|
||||
|
@ -887,7 +929,7 @@ impl Scanner
|
|||
advance!(*buffer, amt);
|
||||
self.stats += stats;
|
||||
|
||||
amt
|
||||
Ok(amt)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -902,7 +944,7 @@ enum StreamState
|
|||
/// Chomp whitespace and .comments if allowed until a non
|
||||
/// whitespace character is encountered, returning the
|
||||
/// amount chomped
|
||||
fn eat_whitespace(base: &str, stats: &mut MStats, comments: bool) -> usize
|
||||
fn eat_whitespace(opts: Flags, base: &str, stats: &mut MStats, comments: bool) -> Result<usize>
|
||||
{
|
||||
let mut buffer = base;
|
||||
let mut chomp_line = false;
|
||||
|
@ -910,6 +952,8 @@ fn eat_whitespace(base: &str, stats: &mut MStats, comments: bool) -> usize
|
|||
|
||||
loop
|
||||
{
|
||||
cache!(~buffer, 1, opts)?;
|
||||
|
||||
let (blank, brk) = (isBlank!(~buffer), isBreak!(~buffer));
|
||||
|
||||
match (blank, brk)
|
||||
|
@ -943,7 +987,7 @@ fn eat_whitespace(base: &str, stats: &mut MStats, comments: bool) -> usize
|
|||
}
|
||||
}
|
||||
|
||||
base.len() - buffer.len()
|
||||
Ok(base.len() - buffer.len())
|
||||
}
|
||||
|
||||
/// Roll the indentation level and push a block collection
|
||||
|
@ -1077,12 +1121,22 @@ mod tests
|
|||
mod tag;
|
||||
mod whitespace;
|
||||
|
||||
#[cfg(feature = "test_buffer")]
|
||||
mod str_reader;
|
||||
|
||||
use super::*;
|
||||
use crate::token::{ScalarStyle::*, Token::*};
|
||||
|
||||
struct ScanIter<'de>
|
||||
{
|
||||
data: &'de str,
|
||||
#[cfg(feature = "test_buffer")]
|
||||
data: str_reader::StrReader<'de>,
|
||||
#[cfg(feature = "test_buffer")]
|
||||
opts: Flags,
|
||||
|
||||
#[cfg(not(feature = "test_buffer"))]
|
||||
data: &'de str,
|
||||
|
||||
scan: Scanner,
|
||||
tokens: Tokens<'de>,
|
||||
|
||||
|
@ -1094,7 +1148,14 @@ mod tests
|
|||
pub fn new(data: &'de str) -> Self
|
||||
{
|
||||
Self {
|
||||
#[cfg(feature = "test_buffer")]
|
||||
data: str_reader::StrReader::new(data, str_reader::StrReader::BUF_SIZE),
|
||||
#[cfg(feature = "test_buffer")]
|
||||
opts: O_ZEROED | O_EXTENDABLE,
|
||||
|
||||
#[cfg(not(feature = "test_buffer"))]
|
||||
data,
|
||||
|
||||
scan: Scanner::new(),
|
||||
tokens: Tokens::new(),
|
||||
done: false,
|
||||
|
@ -1105,10 +1166,7 @@ mod tests
|
|||
{
|
||||
if (!self.done) && self.tokens.is_empty()
|
||||
{
|
||||
if let 0 = self.scan.scan_tokens(self.data, &mut self.tokens)?
|
||||
{
|
||||
self.done = true
|
||||
}
|
||||
self.get_next_token()?;
|
||||
}
|
||||
|
||||
if !self.done
|
||||
|
@ -1120,6 +1178,52 @@ mod tests
|
|||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "test_buffer")]
|
||||
fn get_next_token(&mut self) -> Result<()>
|
||||
{
|
||||
let count = loop
|
||||
{
|
||||
match self
|
||||
.scan
|
||||
.scan_tokens(self.opts, self.data.read(), &mut self.tokens)
|
||||
{
|
||||
Ok(count) => break count,
|
||||
Err(e) if e == ScanError::Extend =>
|
||||
{
|
||||
self.data.expand(str_reader::StrReader::BUF_EXTEND);
|
||||
|
||||
if !self.data.expandable()
|
||||
{
|
||||
self.opts.remove(O_EXTENDABLE)
|
||||
}
|
||||
|
||||
continue;
|
||||
},
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
};
|
||||
|
||||
if count == 0
|
||||
{
|
||||
self.done = true
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "test_buffer"))]
|
||||
fn get_next_token(&mut self) -> Result<()>
|
||||
{
|
||||
if let 0 = self
|
||||
.scan
|
||||
.scan_tokens(O_ZEROED, self.data, &mut self.tokens)?
|
||||
{
|
||||
self.done = true
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Iterator for ScanIter<'de>
|
||||
|
|
|
@ -26,6 +26,7 @@ use crate::{
|
|||
scanner::{
|
||||
context::Context,
|
||||
error::{ScanError, ScanResult as Result},
|
||||
flag::Flags,
|
||||
stats::MStats,
|
||||
},
|
||||
token::{ScalarStyle, Slice, Token},
|
||||
|
@ -40,6 +41,7 @@ use crate::{
|
|||
/// YAML 1.2: Section 8.1
|
||||
/// yaml.org/spec/1.2/#c-b-block-header(m,t)
|
||||
pub(in crate::scanner) fn scan_block_scalar<'de>(
|
||||
opts: Flags,
|
||||
base: &'de str,
|
||||
stats: &mut MStats,
|
||||
cxt: &Context,
|
||||
|
@ -78,14 +80,16 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
|
|||
};
|
||||
|
||||
// Eat the '|' or '>'
|
||||
cache!(~buffer, 1, opts)?;
|
||||
advance!(buffer, :local_stats, 1);
|
||||
|
||||
// Calculate any headers this scalar may have
|
||||
let (chomp, explicit) = scan_headers(&mut buffer, &mut local_stats)?;
|
||||
let (chomp, explicit) = scan_headers(opts, &mut buffer, &mut local_stats)?;
|
||||
|
||||
// The header line must contain nothing after the headers
|
||||
// excluding a comment until the line ending
|
||||
skip_blanks(&mut buffer, &mut local_stats, COMMENTS)?;
|
||||
skip_blanks(opts, &mut buffer, &mut local_stats, COMMENTS)?;
|
||||
cache!(~buffer, 1, opts)?;
|
||||
if !isWhiteSpaceZ!(~buffer)
|
||||
{
|
||||
return Err(ScanError::InvalidBlockScalar);
|
||||
|
@ -102,6 +106,7 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
|
|||
None =>
|
||||
{
|
||||
indent = detect_indent_level(
|
||||
opts,
|
||||
&mut buffer,
|
||||
&mut local_stats,
|
||||
cxt,
|
||||
|
@ -201,8 +206,11 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
|
|||
}
|
||||
|
||||
// Eat the line's content until the line break (or EOF)
|
||||
cache!(~buffer, 1, opts)?;
|
||||
while !isBreakZ!(~buffer)
|
||||
{
|
||||
cache!(~buffer, 1, opts)?;
|
||||
|
||||
if !can_borrow
|
||||
{
|
||||
scratch.push(buffer.as_bytes()[0])
|
||||
|
@ -218,6 +226,7 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
|
|||
}
|
||||
|
||||
// Eat the line break (if not EOF)
|
||||
cache!(~buffer, 1, opts)?;
|
||||
if isBreak!(~buffer)
|
||||
{
|
||||
advance!(buffer, :local_stats, @line);
|
||||
|
@ -226,6 +235,7 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
|
|||
|
||||
// Chomp indentation until the next indented line
|
||||
scan_indent(
|
||||
opts,
|
||||
&mut buffer,
|
||||
&mut local_stats,
|
||||
&mut lines,
|
||||
|
@ -246,12 +256,18 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
|
|||
}
|
||||
|
||||
/// Retrieve a block scalar's headers
|
||||
fn scan_headers(buffer: &mut &str, stats: &mut MStats) -> Result<(ChompStyle, IndentHeader)>
|
||||
fn scan_headers(
|
||||
opts: Flags,
|
||||
buffer: &mut &str,
|
||||
stats: &mut MStats,
|
||||
) -> Result<(ChompStyle, IndentHeader)>
|
||||
{
|
||||
let mut skip = 0;
|
||||
let mut indent = None;
|
||||
let mut chomp = ChompStyle::Clip;
|
||||
|
||||
cache!(~buffer, 2, opts)?;
|
||||
|
||||
// Set the explicit indent if it exists.
|
||||
//
|
||||
// Note that we silently eat an invalid indent (0) rather
|
||||
|
@ -291,6 +307,7 @@ fn scan_headers(buffer: &mut &str, stats: &mut MStats) -> Result<(ChompStyle, In
|
|||
|
||||
/// Chomp the indentation spaces of a block scalar
|
||||
fn scan_indent(
|
||||
opts: Flags,
|
||||
buffer: &mut &str,
|
||||
stats: &mut MStats,
|
||||
lines: &mut usize,
|
||||
|
@ -303,6 +320,8 @@ fn scan_indent(
|
|||
return Ok(false);
|
||||
}
|
||||
|
||||
cache!(~buffer, 1, opts)?;
|
||||
|
||||
while stats.column < indent && isWhiteSpace!(~buffer)
|
||||
{
|
||||
// Indentation space, chomp
|
||||
|
@ -321,6 +340,8 @@ fn scan_indent(
|
|||
*lines += 1;
|
||||
advance!(*buffer, :stats, @line);
|
||||
}
|
||||
|
||||
cache!(~buffer, 1, opts)?;
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
|
@ -433,6 +454,7 @@ fn scan_chomp<'de>(
|
|||
/// Auto-detect the indentation level from the first non
|
||||
/// header line of a block scalar
|
||||
fn detect_indent_level(
|
||||
opts: Flags,
|
||||
buffer: &mut &str,
|
||||
stats: &mut MStats,
|
||||
cxt: &Context,
|
||||
|
@ -444,9 +466,13 @@ fn detect_indent_level(
|
|||
|
||||
loop
|
||||
{
|
||||
cache!(~buffer, 1, opts)?;
|
||||
|
||||
// Chomp indentation spaces, erroring on a tab
|
||||
while isBlank!(~buffer)
|
||||
{
|
||||
cache!(~buffer, 1, opts)?;
|
||||
|
||||
if check!(~buffer => b'\t')
|
||||
{
|
||||
return Err(ScanError::InvalidTab);
|
||||
|
@ -467,6 +493,7 @@ fn detect_indent_level(
|
|||
}
|
||||
|
||||
// If its not a line break we're done, exit the loop
|
||||
cache!(~buffer, 1, opts)?;
|
||||
if !isBreak!(~buffer)
|
||||
{
|
||||
break;
|
||||
|
@ -489,10 +516,13 @@ fn detect_indent_level(
|
|||
|
||||
/// Skip any blanks (and .comments) until we reach a line
|
||||
/// ending or non blank character
|
||||
fn skip_blanks(buffer: &mut &str, stats: &mut MStats, comments: bool) -> Result<()>
|
||||
fn skip_blanks(opts: Flags, buffer: &mut &str, stats: &mut MStats, comments: bool) -> Result<()>
|
||||
{
|
||||
cache!(~buffer, 1, opts)?;
|
||||
|
||||
while isBlank!(~buffer)
|
||||
{
|
||||
cache!(~buffer, 1, opts)?;
|
||||
advance!(*buffer, :stats, 1);
|
||||
}
|
||||
|
||||
|
@ -500,6 +530,7 @@ fn skip_blanks(buffer: &mut &str, stats: &mut MStats, comments: bool) -> Result<
|
|||
{
|
||||
while !isBreakZ!(~buffer)
|
||||
{
|
||||
cache!(~buffer, 1, opts)?;
|
||||
advance!(*buffer, :stats, 1);
|
||||
}
|
||||
}
|
||||
|
@ -577,6 +608,7 @@ mod tests
|
|||
use ScalarStyle::{Folded, Literal};
|
||||
|
||||
use super::*;
|
||||
use crate::scanner::flag::O_ZEROED;
|
||||
|
||||
type TestResult = anyhow::Result<()>;
|
||||
|
||||
|
@ -615,7 +647,7 @@ mod tests
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("this is a simple block scalar"), Literal);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -630,7 +662,7 @@ mod tests
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("trailing lines...\n"), Literal);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -645,7 +677,7 @@ mod tests
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("trailing lines..."), Literal);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -660,7 +692,7 @@ mod tests
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("trailing lines...\n\n\n"), Literal);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -679,7 +711,7 @@ mod tests
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("some folded\nlines\nhere\n"), Literal);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -700,7 +732,7 @@ mod tests
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("\n\nsome folded\nlines\nhere"), Literal);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -721,7 +753,7 @@ mod tests
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("some folded\nlines\nhere\n\n\n"), Literal);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -742,7 +774,7 @@ some.other.key: value";
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("some folded\nlines\nhere\n\n\n"), Literal);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -763,7 +795,7 @@ some.other.key: value";
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("this\n\nhas\n\nbreaks"), Literal);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -778,7 +810,7 @@ some.other.key: value";
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("simple block scalar"), Literal);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -795,7 +827,7 @@ some.other.key: value";
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("this is a simple block scalar"), Folded);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -810,7 +842,7 @@ some.other.key: value";
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("trailing lines...\n"), Folded);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -825,7 +857,7 @@ some.other.key: value";
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("trailing lines..."), Folded);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -840,7 +872,7 @@ some.other.key: value";
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("trailing lines...\n\n\n"), Folded);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -859,7 +891,7 @@ some.other.key: value";
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("some folded lines here\n"), Folded);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -880,7 +912,7 @@ some.other.key: value";
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("\n\nsome folded lines here"), Folded);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -901,7 +933,7 @@ some.other.key: value";
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("some folded lines here\n\n\n"), Folded);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -922,7 +954,7 @@ some.other.key: value";
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("some folded lines here\n\n\n"), Folded);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -943,7 +975,7 @@ some.other.key: value";
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("this\nhas\nbreaks"), Folded);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -958,7 +990,7 @@ some.other.key: value";
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("simple block scalar"), Folded);
|
||||
|
||||
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
|
||||
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
//! This module exports function(s) for handling scalar
|
||||
//! escapes in YAML documents.
|
||||
|
||||
use crate::scanner::error::{ScanError, ScanResult as Result};
|
||||
use crate::scanner::{
|
||||
error::{ScanError, ScanResult as Result},
|
||||
flag::Flags,
|
||||
};
|
||||
|
||||
/// Unescape a given YAML escape sequence as defined in
|
||||
/// [Section 5.7][Link]. Specifically, YAML defines 18
|
||||
|
@ -15,12 +18,17 @@ use crate::scanner::error::{ScanError, ScanResult as Result};
|
|||
/// escape sequence.
|
||||
///
|
||||
/// [Link]: https://yaml.org/spec/1.2/spec.html#c-escape
|
||||
pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Result<usize>
|
||||
pub(in crate::scanner) fn flow_unescape(
|
||||
opts: Flags,
|
||||
base: &str,
|
||||
scratch: &mut Vec<u8>,
|
||||
) -> Result<usize>
|
||||
{
|
||||
let mut buffer = base;
|
||||
let mut escape_len: Option<u8> = None;
|
||||
|
||||
// Not an escape sequence, early exit
|
||||
cache!(~buffer, 1, opts)?;
|
||||
if !check!(~buffer => b'\\')
|
||||
{
|
||||
return Ok(0);
|
||||
|
@ -30,6 +38,7 @@ pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Re
|
|||
|
||||
// See 5.7: Escaped Characters
|
||||
// yaml.org/spec/1.2/spec.html#id2776092
|
||||
cache!(~buffer, 1, opts)?;
|
||||
match buffer.as_bytes()
|
||||
{
|
||||
[b'0', ..] => scratch.push(b'\0'),
|
||||
|
@ -60,6 +69,9 @@ pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Re
|
|||
|
||||
if let Some(sequence) = escape_len
|
||||
{
|
||||
// Note that we cache the _entire_ escape sequence before
|
||||
// calling write_unicode_point
|
||||
cache!(~buffer, sequence, opts)?;
|
||||
let amt = write_unicode_point(buffer, scratch, sequence)?;
|
||||
advance!(buffer, amt);
|
||||
}
|
||||
|
@ -73,6 +85,7 @@ pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Re
|
|||
///
|
||||
/// [Link]: https://yaml.org/spec/1.2/spec.html#ns-uri-char
|
||||
pub(in crate::scanner) fn tag_uri_unescape(
|
||||
opts: Flags,
|
||||
base: &str,
|
||||
scratch: &mut Vec<u8>,
|
||||
_directive: bool,
|
||||
|
@ -82,6 +95,8 @@ pub(in crate::scanner) fn tag_uri_unescape(
|
|||
let mut codepoint_len: i8 = 0;
|
||||
|
||||
while {
|
||||
cache!(~buffer, 3, opts)?;
|
||||
|
||||
if buffer.len() < 3
|
||||
{
|
||||
return Err(ScanError::UnexpectedEOF);
|
||||
|
@ -224,6 +239,7 @@ mod tests
|
|||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
use crate::scanner::flag::O_ZEROED;
|
||||
|
||||
type TestResult = anyhow::Result<()>;
|
||||
|
||||
|
@ -268,7 +284,7 @@ mod tests
|
|||
for (i, (&t, &ex)) in data.into_iter().zip(expected).enumerate()
|
||||
{
|
||||
scratch.clear();
|
||||
flow_unescape(t, scratch)
|
||||
flow_unescape(O_ZEROED, t, scratch)
|
||||
.map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;
|
||||
|
||||
assert_eq!(scratch, ex, "on iteration {}", i)
|
||||
|
@ -329,7 +345,7 @@ mod tests
|
|||
let mut c: [u8; 4] = [0; 4];
|
||||
scratch.clear();
|
||||
|
||||
flow_unescape(t, scratch)
|
||||
flow_unescape(O_ZEROED, t, scratch)
|
||||
.map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;
|
||||
|
||||
assert_eq!(
|
||||
|
@ -370,7 +386,7 @@ mod tests
|
|||
{
|
||||
scratch.clear();
|
||||
|
||||
let consumed = flow_unescape(t, scratch)
|
||||
let consumed = flow_unescape(O_ZEROED, t, scratch)
|
||||
.map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;
|
||||
|
||||
assert_eq!(
|
||||
|
@ -417,7 +433,7 @@ mod tests
|
|||
{
|
||||
scratch.clear();
|
||||
|
||||
let consumed = tag_uri_unescape(t, scratch, true)
|
||||
let consumed = tag_uri_unescape(O_ZEROED, t, scratch, true)
|
||||
.map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;
|
||||
|
||||
assert_eq!(
|
||||
|
@ -446,7 +462,7 @@ mod tests
|
|||
let scratch = &mut Vec::new();
|
||||
let expected = ScanError::UnexpectedEOF;
|
||||
|
||||
match tag_uri_unescape(data, scratch, true)
|
||||
match tag_uri_unescape(O_ZEROED, data, scratch, true)
|
||||
{
|
||||
Err(e) if e == expected => Ok(()),
|
||||
|
||||
|
@ -466,7 +482,7 @@ mod tests
|
|||
let scratch = &mut Vec::new();
|
||||
let expected = ScanError::UnknownEscape;
|
||||
|
||||
match tag_uri_unescape(data, scratch, true)
|
||||
match tag_uri_unescape(O_ZEROED, data, scratch, true)
|
||||
{
|
||||
Err(e) if e == expected => Ok(()),
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@ use std::ops::Range;
|
|||
use crate::{
|
||||
scanner::{
|
||||
error::{ScanError, ScanResult as Result},
|
||||
flag::Flags,
|
||||
scalar::escape::flow_unescape,
|
||||
stats::MStats,
|
||||
},
|
||||
|
@ -15,6 +16,7 @@ use crate::{
|
|||
/// the underlying .base, however it may be required to copy
|
||||
/// into .scratch and borrow from that lifetime.
|
||||
pub(in crate::scanner) fn scan_flow_scalar(
|
||||
opts: Flags,
|
||||
base: &str,
|
||||
stats: &mut MStats,
|
||||
single: bool,
|
||||
|
@ -35,6 +37,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
|
|||
};
|
||||
|
||||
// Eat left quote
|
||||
cache!(~buffer, 1, opts)?;
|
||||
advance!(buffer, :stats, 1);
|
||||
|
||||
'scalar: loop
|
||||
|
@ -44,6 +47,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
|
|||
// Even in a scalar context, YAML prohibits starting a line
|
||||
// with document stream tokens followed by a blank
|
||||
// character
|
||||
cache!(~buffer, 4, opts)?;
|
||||
if isDocumentIndicator!(~buffer, :stats)
|
||||
{
|
||||
return Err(ScanError::InvalidFlowScalar);
|
||||
|
@ -55,9 +59,14 @@ pub(in crate::scanner) fn scan_flow_scalar(
|
|||
return Err(ScanError::UnexpectedEOF);
|
||||
}
|
||||
|
||||
cache!(~buffer, 1, opts)?;
|
||||
|
||||
// Consume non whitespace characters
|
||||
while !isWhiteSpaceZ!(~buffer)
|
||||
{
|
||||
// Longest sequence we can hit is 2 characters ('')
|
||||
cache!(~buffer, 2, opts)?;
|
||||
|
||||
// if we encounter an escaped quote we can no longer borrow
|
||||
// from .base, we must unescape the quote into .scratch
|
||||
if kind == SingleQuote && check!(~buffer => [SINGLE, SINGLE, ..])
|
||||
|
@ -88,7 +97,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
|
|||
{
|
||||
set_no_borrow(&mut can_borrow, base, buffer, &mut scratch);
|
||||
|
||||
let read = flow_unescape(buffer, &mut scratch)?;
|
||||
let read = flow_unescape(opts, buffer, &mut scratch)?;
|
||||
advance!(buffer, :stats, read);
|
||||
}
|
||||
// Its a non blank character, add it
|
||||
|
@ -126,6 +135,8 @@ pub(in crate::scanner) fn scan_flow_scalar(
|
|||
// Consume whitespace
|
||||
loop
|
||||
{
|
||||
cache!(~buffer, 1, opts)?;
|
||||
|
||||
match (isBlank!(~buffer), isBreak!(~buffer))
|
||||
{
|
||||
// No more whitespace, exit loop
|
||||
|
@ -206,6 +217,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
|
|||
};
|
||||
|
||||
// Eat the right quote
|
||||
cache!(~buffer, 1, opts)?;
|
||||
advance!(buffer, :stats, 1);
|
||||
|
||||
let advance = base.len() - buffer.len();
|
||||
|
@ -296,6 +308,7 @@ mod tests
|
|||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
use crate::scanner::flag::O_ZEROED;
|
||||
|
||||
type TestResult = anyhow::Result<()>;
|
||||
|
||||
|
@ -308,7 +321,7 @@ mod tests
|
|||
let stats = &mut MStats::new();
|
||||
let expected = Token::Scalar(cow!(""), ScalarStyle::SingleQuote);
|
||||
|
||||
let (range, read) = scan_flow_scalar(data, stats, true)?;
|
||||
let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
assert_eq!(read, 2);
|
||||
|
@ -328,7 +341,7 @@ mod tests
|
|||
let stats = &mut MStats::new();
|
||||
let expected = Token::Scalar(cow!("hello world"), ScalarStyle::SingleQuote);
|
||||
|
||||
let (range, read) = scan_flow_scalar(data, stats, true)?;
|
||||
let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
assert_eq!(read, 13);
|
||||
|
@ -352,7 +365,7 @@ fourth'"#;
|
|||
let cmp = "first second third fourth";
|
||||
let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);
|
||||
|
||||
let (range, _read) = scan_flow_scalar(data, stats, true)?;
|
||||
let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
|
@ -372,7 +385,7 @@ fourth'"#;
|
|||
let cmp = "first second";
|
||||
let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);
|
||||
|
||||
let (range, _read) = scan_flow_scalar(data, stats, true)?;
|
||||
let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
|
@ -395,7 +408,7 @@ fourth'"#;
|
|||
let cmp = "first second third\nfourth";
|
||||
let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);
|
||||
|
||||
let (range, _read) = scan_flow_scalar(data, stats, true)?;
|
||||
let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
|
@ -417,7 +430,7 @@ fourth'"#;
|
|||
{
|
||||
stats = MStats::new();
|
||||
|
||||
match scan_flow_scalar(t, &mut stats, true)
|
||||
match scan_flow_scalar(O_ZEROED, t, &mut stats, true)
|
||||
{
|
||||
Err(e) => assert_eq!(
|
||||
e, expected,
|
||||
|
@ -443,7 +456,7 @@ fourth'"#;
|
|||
{
|
||||
stats = MStats::new();
|
||||
|
||||
match scan_flow_scalar(t, &mut stats, true)
|
||||
match scan_flow_scalar(O_ZEROED, t, &mut stats, true)
|
||||
{
|
||||
Err(e) => assert_eq!(
|
||||
e, expected,
|
||||
|
@ -467,7 +480,7 @@ fourth'"#;
|
|||
let stats = &mut MStats::new();
|
||||
let expected = Token::Scalar(cow!(""), ScalarStyle::DoubleQuote);
|
||||
|
||||
let (range, read) = scan_flow_scalar(data, stats, false)?;
|
||||
let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
assert_eq!(read, 2);
|
||||
|
@ -487,7 +500,7 @@ fourth'"#;
|
|||
let stats = &mut MStats::new();
|
||||
let expected = Token::Scalar(cow!("hello world"), ScalarStyle::DoubleQuote);
|
||||
|
||||
let (range, read) = scan_flow_scalar(data, stats, false)?;
|
||||
let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
assert_eq!(read, 13);
|
||||
|
@ -507,7 +520,7 @@ fourth'"#;
|
|||
let stats = &mut MStats::new();
|
||||
let expected = Token::Scalar(cow!("hello α Ω ッ"), ScalarStyle::DoubleQuote);
|
||||
|
||||
let (range, read) = scan_flow_scalar(data, stats, false)?;
|
||||
let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
|
@ -537,7 +550,7 @@ fourth""#;
|
|||
let cmp = "first second third fourth";
|
||||
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
|
||||
|
||||
let (range, _read) = scan_flow_scalar(data, stats, false)?;
|
||||
let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
|
@ -560,7 +573,7 @@ fourth""#;
|
|||
let cmp = "first second third\nfourth";
|
||||
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
|
||||
|
||||
let (range, _read) = scan_flow_scalar(data, stats, false)?;
|
||||
let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
|
@ -580,7 +593,7 @@ fourth""#;
|
|||
let cmp = "first second";
|
||||
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
|
||||
|
||||
let (range, _read) = scan_flow_scalar(data, stats, false)?;
|
||||
let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
|
@ -604,7 +617,7 @@ rst \
|
|||
let cmp = "first second third\nfourth";
|
||||
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
|
||||
|
||||
let (range, _read) = scan_flow_scalar(data, stats, false)?;
|
||||
let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
|
||||
let scalar = range.into_token(data)?;
|
||||
|
||||
if !(scalar == expected)
|
||||
|
|
|
@ -2,6 +2,7 @@ use crate::{
|
|||
scanner::{
|
||||
context::Context,
|
||||
error::{ScanError, ScanResult as Result},
|
||||
flag::Flags,
|
||||
stats::MStats,
|
||||
},
|
||||
token::{ScalarStyle, Token},
|
||||
|
@ -17,6 +18,7 @@ use crate::{
|
|||
/// YAML 1.2: Section 7.3.3
|
||||
/// yaml.org/spec/1.2/spec.html#ns-plain-first(c)
|
||||
pub(in crate::scanner) fn scan_plain_scalar<'de>(
|
||||
opts: Flags,
|
||||
base: &'de str,
|
||||
stats: &mut MStats,
|
||||
cxt: &Context,
|
||||
|
@ -53,6 +55,7 @@ pub(in crate::scanner) fn scan_plain_scalar<'de>(
|
|||
|
||||
// Inside flow contexts you *may not* start a plain scalar
|
||||
// with a ':', '?', or '-' followed by a flow indicator
|
||||
cache!(~buffer, 2, opts)?;
|
||||
if flow_context && check!(~buffer => b':' | b'?' | b'-') && flow_indicator(buffer, 1)
|
||||
{
|
||||
return Err(ScanError::InvalidPlainScalar);
|
||||
|
@ -60,6 +63,10 @@ pub(in crate::scanner) fn scan_plain_scalar<'de>(
|
|||
|
||||
'scalar: loop
|
||||
{
|
||||
// 4 is the largest character sequence we can encounter
|
||||
// (document indicators)
|
||||
cache!(~buffer, 4, opts)?;
|
||||
|
||||
if buffer.is_empty()
|
||||
{
|
||||
break 'scalar;
|
||||
|
@ -110,6 +117,8 @@ pub(in crate::scanner) fn scan_plain_scalar<'de>(
|
|||
// Handle non whitespace characters
|
||||
while !isWhiteSpaceZ!(~buffer)
|
||||
{
|
||||
cache!(~buffer, 2, opts)?;
|
||||
|
||||
if (check!(~buffer => b':') && isWhiteSpaceZ!(~buffer, 1))
|
||||
|| flow_context && flow_indicator(buffer, 0)
|
||||
{
|
||||
|
@ -128,6 +137,8 @@ pub(in crate::scanner) fn scan_plain_scalar<'de>(
|
|||
// Handle whitespace characters
|
||||
loop
|
||||
{
|
||||
cache!(~buffer, 1, opts)?;
|
||||
|
||||
match (isBlank!(~buffer), isBreak!(~buffer))
|
||||
{
|
||||
// No more whitespace, exit loop
|
||||
|
@ -242,6 +253,7 @@ mod tests
|
|||
use ScalarStyle::Plain;
|
||||
|
||||
use super::*;
|
||||
use crate::scanner::flag::O_ZEROED;
|
||||
|
||||
type TestResult = anyhow::Result<()>;
|
||||
|
||||
|
@ -280,7 +292,7 @@ mod tests
|
|||
|
||||
for (i, &data) in tests.iter().enumerate()
|
||||
{
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)
|
||||
.map_err(|e| anyhow!("iteration {}: {}", i, e))?;
|
||||
|
||||
assert_eq!(token, expected, "on iteration {}", i);
|
||||
|
@ -301,7 +313,7 @@ mod tests
|
|||
|
||||
for (i, &data) in tests.iter().enumerate()
|
||||
{
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)
|
||||
.map_err(|e| anyhow!("iteration {}: {}", i, e))?;
|
||||
|
||||
assert_eq!(token, expected, "on iteration {}", i);
|
||||
|
@ -320,7 +332,7 @@ mod tests
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!(""), Plain);
|
||||
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -339,7 +351,7 @@ mod tests
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -356,7 +368,7 @@ mod tests
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("hello, world!"), Plain);
|
||||
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -379,7 +391,7 @@ mod tests
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("hello this is a multi-line scalar"), Plain);
|
||||
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -406,7 +418,7 @@ mod tests
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("this is\n\na scalar\nwith line#breaks"), Plain);
|
||||
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -423,7 +435,7 @@ mod tests
|
|||
let cxt = cxt!(block -> [0]);
|
||||
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -442,7 +454,7 @@ mod tests
|
|||
let cxt = cxt!(flow -> 1);
|
||||
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -461,7 +473,7 @@ mod tests
|
|||
|
||||
for (i, &data) in tests.iter().enumerate()
|
||||
{
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)
|
||||
.map_err(|e| anyhow!("iteration {}: {}", i, e))?;
|
||||
|
||||
assert_eq!(token, expected, "on iteration {}", i);
|
||||
|
@ -485,7 +497,7 @@ string!";
|
|||
let cxt = cxt!(flow -> 1);
|
||||
let expected = Token::Scalar(cow!("hello this is a multi-line string!"), Plain);
|
||||
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -514,7 +526,7 @@ breaks
|
|||
let cxt = cxt!(flow -> 1);
|
||||
let expected = Token::Scalar(cow!("hello this\nbig\nstring\nhas\nline\nbreaks"), Plain);
|
||||
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -531,7 +543,7 @@ breaks
|
|||
let cxt = cxt!(flow -> 1);
|
||||
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -548,7 +560,7 @@ breaks
|
|||
let cxt = cxt!(flow -> 1);
|
||||
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
@ -569,7 +581,7 @@ breaks
|
|||
let cxt = cxt!(flow -> 1);
|
||||
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||
|
||||
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
|
||||
|
||||
assert_eq!(token, expected);
|
||||
|
||||
|
|
|
@ -66,6 +66,7 @@ use crate::{
|
|||
scanner::{
|
||||
eat_whitespace,
|
||||
error::{ScanError, ScanResult as Result},
|
||||
flag::Flags,
|
||||
scalar::escape::tag_uri_unescape,
|
||||
stats::MStats,
|
||||
},
|
||||
|
@ -79,6 +80,7 @@ use crate::{
|
|||
/// possible, but may also copy the directive's handle and
|
||||
/// prefix into .scratch if borrowing is not possible.
|
||||
pub(in crate::scanner) fn scan_tag_directive<'de>(
|
||||
opts: Flags,
|
||||
base: &'de str,
|
||||
stats: &mut MStats,
|
||||
) -> Result<(Token<'de>, usize)>
|
||||
|
@ -88,7 +90,7 @@ pub(in crate::scanner) fn scan_tag_directive<'de>(
|
|||
|
||||
// %TAG !named! :tag:prefix # a comment\n
|
||||
// ^^^^^^^
|
||||
let (handle, amt) = match scan_tag_handle(buffer, stats)?
|
||||
let (handle, amt) = match scan_tag_handle(opts, buffer, stats)?
|
||||
{
|
||||
Some((handle, amt)) => (handle.into_inner(), amt),
|
||||
None => return Err(ScanError::InvalidTagHandle),
|
||||
|
@ -99,14 +101,15 @@ pub(in crate::scanner) fn scan_tag_directive<'de>(
|
|||
// ^
|
||||
// Check that there is >= 1 whitespace between handle and
|
||||
// prefix
|
||||
cache!(~buffer, 1, opts)?;
|
||||
isBlank!(~buffer, else ScanError::InvalidTagPrefix)?;
|
||||
|
||||
// Chomp whitespace to prefix
|
||||
advance!(buffer, eat_whitespace(buffer, stats, false));
|
||||
advance!(buffer, eat_whitespace(opts, buffer, stats, false)?);
|
||||
|
||||
// %TAG !named! :tag:prefix # a comment\n
|
||||
// ^^^^^^^^^^^
|
||||
let (prefix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?;
|
||||
let (prefix, amt) = scan_tag_uri(opts, buffer, stats, &mut can_borrow, false)?;
|
||||
|
||||
// %TAG !named! tag-prefix # a comment\n
|
||||
// ^
|
||||
|
@ -146,6 +149,7 @@ pub(in crate::scanner) fn scan_tag_directive<'de>(
|
|||
/// ("!", "") => A non resolving tag
|
||||
/// (handle, suffix) => A primary, secondary or named tag
|
||||
pub(in crate::scanner) fn scan_node_tag<'de>(
|
||||
opts: Flags,
|
||||
base: &'de str,
|
||||
stats: &mut MStats,
|
||||
) -> Result<(Token<'de>, usize)>
|
||||
|
@ -163,6 +167,8 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
|
|||
* a zero length sub-slice out.
|
||||
*/
|
||||
|
||||
cache!(~buffer, 2, opts)?;
|
||||
|
||||
// !<global:verbatim:tag:> "node"
|
||||
// ^^
|
||||
// If its a verbatim tag scan it
|
||||
|
@ -172,10 +178,11 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
|
|||
|
||||
// !<global:verbatim:tag:> "node"
|
||||
// ^^^^^^^^^^^^^^^^^^^^
|
||||
let (verbatim, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, true)?;
|
||||
let (verbatim, amt) = scan_tag_uri(opts, buffer, stats, &mut can_borrow, true)?;
|
||||
|
||||
// !<global:verbatim:tag:> "node"
|
||||
// ^
|
||||
cache!(~buffer, @amt + 1, 1, opts)?;
|
||||
check!(~buffer, amt + 1 => b'>', else ScanError::InvalidTagSuffix)?;
|
||||
|
||||
let token = assemble_tag(&buffer[0..0], verbatim, can_borrow);
|
||||
|
@ -185,7 +192,7 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
|
|||
// Otherwise scan it as a normal tag
|
||||
else
|
||||
{
|
||||
match scan_tag_handle(buffer, stats)?
|
||||
match scan_tag_handle(opts, buffer, stats)?
|
||||
{
|
||||
// ! "node"
|
||||
// ^
|
||||
|
@ -200,7 +207,7 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
|
|||
|
||||
// !!global "node" OR !named!global "node"
|
||||
// ^^^^^^ ^^^^^^
|
||||
let (suffix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?;
|
||||
let (suffix, amt) = scan_tag_uri(opts, buffer, stats, &mut can_borrow, false)?;
|
||||
|
||||
let token = assemble_tag(h, suffix, can_borrow);
|
||||
|
||||
|
@ -210,6 +217,8 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
|
|||
// Handle scan couldn't find a closing !, meaning this is a local tag
|
||||
None =>
|
||||
{
|
||||
cache!(~buffer, 1, opts)?;
|
||||
|
||||
// !local "node"
|
||||
// ^
|
||||
let handle = &buffer[..1];
|
||||
|
@ -217,7 +226,7 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
|
|||
|
||||
// !local "node"
|
||||
// ^^^^^
|
||||
let (suffix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?;
|
||||
let (suffix, amt) = scan_tag_uri(opts, buffer, stats, &mut can_borrow, false)?;
|
||||
|
||||
let token = assemble_tag(handle, suffix, can_borrow);
|
||||
|
||||
|
@ -240,6 +249,7 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
|
|||
///
|
||||
/// [Link]: https://yaml.org/spec/1.2/spec.html#ns-global-tag-prefix
|
||||
pub(in crate::scanner) fn scan_tag_uri<'de>(
|
||||
opts: Flags,
|
||||
base: &'de str,
|
||||
stats: &mut MStats,
|
||||
can_borrow: &mut bool,
|
||||
|
@ -251,6 +261,8 @@ pub(in crate::scanner) fn scan_tag_uri<'de>(
|
|||
|
||||
loop
|
||||
{
|
||||
cache!(~buffer, 1, opts)?;
|
||||
|
||||
match buffer.as_bytes()
|
||||
{
|
||||
// If its a normal allowed character, add it
|
||||
|
@ -293,7 +305,7 @@ pub(in crate::scanner) fn scan_tag_uri<'de>(
|
|||
|
||||
*can_borrow = false;
|
||||
}
|
||||
let amt = tag_uri_unescape(buffer, &mut scratch, true)?;
|
||||
let amt = tag_uri_unescape(opts, buffer, &mut scratch, true)?;
|
||||
advance!(buffer, :stats, amt);
|
||||
},
|
||||
// EOF before loop end is an error
|
||||
|
@ -320,6 +332,7 @@ pub(in crate::scanner) fn scan_tag_uri<'de>(
|
|||
/// Scans a tag handle from .base, attempting to return the
|
||||
/// fragment if the handle is unambiguous.
|
||||
pub(in crate::scanner) fn scan_tag_handle<'b>(
|
||||
opts: Flags,
|
||||
base: &'b str,
|
||||
stats: &mut MStats,
|
||||
) -> Result<Option<(TagHandle<'b>, usize)>>
|
||||
|
@ -331,6 +344,7 @@ pub(in crate::scanner) fn scan_tag_handle<'b>(
|
|||
// !!tag
|
||||
// ^
|
||||
// Check that we are indeed starting a handle
|
||||
cache!(~buffer, 1, opts)?;
|
||||
check!(~buffer => b'!', else ScanError::InvalidTagHandle)?;
|
||||
|
||||
// %TAG !handle! tag-prefix # a comment \n
|
||||
|
@ -338,9 +352,10 @@ pub(in crate::scanner) fn scan_tag_handle<'b>(
|
|||
// !handle!tag
|
||||
// ^^^^^^
|
||||
// Safety: we just proved above we have >= 1 byte ('!')
|
||||
let name = take_while(buffer[1..].as_bytes(), u8::is_ascii_alphanumeric);
|
||||
let name = take_while(opts, buffer[1..].as_bytes(), u8::is_ascii_alphanumeric)?;
|
||||
let mut offset = 1 + name.len();
|
||||
|
||||
cache!(~buffer, @offset, 1, opts)?;
|
||||
match buffer.as_bytes().get(offset)
|
||||
{
|
||||
// If we find a closing '!', then it must either be a secondary or named handle
|
||||
|
@ -391,7 +406,7 @@ impl<'a> TagHandle<'a>
|
|||
}
|
||||
}
|
||||
|
||||
fn take_while<F>(b: &[u8], f: F) -> &[u8]
|
||||
fn take_while<F>(opts: Flags, base: &[u8], f: F) -> Result<&[u8]>
|
||||
where
|
||||
F: Fn(&u8) -> bool,
|
||||
{
|
||||
|
@ -399,10 +414,12 @@ where
|
|||
|
||||
loop
|
||||
{
|
||||
match b.get(index)
|
||||
let i = cache!(base, @index, 1, opts)?;
|
||||
|
||||
match base.get(index)
|
||||
{
|
||||
Some(b) if f(b) => index += 1,
|
||||
_ => return &b[..index],
|
||||
Some(b) if f(b) => index += i,
|
||||
_ => return Ok(&base[..index]),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
use cfg_if::cfg_if;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(super) struct StrReader<'de>
|
||||
{
|
||||
s: &'de str,
|
||||
size: usize,
|
||||
}
|
||||
|
||||
impl<'de> StrReader<'de>
|
||||
{
|
||||
cfg_if! {
|
||||
if #[cfg(feature = "test_buffer_large")]
|
||||
{
|
||||
pub const BUF_SIZE: usize = 4 * 1024;
|
||||
pub const BUF_EXTEND: usize = 64;
|
||||
}
|
||||
else if #[cfg(feature = "test_buffer_medium")]
|
||||
{
|
||||
pub const BUF_SIZE: usize = 8;
|
||||
pub const BUF_EXTEND: usize = 8;
|
||||
}
|
||||
else if #[cfg(feature = "test_buffer_small")]
|
||||
{
|
||||
pub const BUF_SIZE: usize = 1;
|
||||
pub const BUF_EXTEND: usize = 1;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(s: &'de str, size: usize) -> Self
|
||||
{
|
||||
let size = std::cmp::min(s.len(), size);
|
||||
|
||||
Self { s, size }
|
||||
}
|
||||
|
||||
pub fn read(&self) -> &'de str
|
||||
{
|
||||
&self.s[..self.size]
|
||||
}
|
||||
|
||||
pub fn expand(&mut self, size: usize)
|
||||
{
|
||||
let new = self.size + size;
|
||||
|
||||
match self.s.len() > new
|
||||
{
|
||||
true => self.size = new,
|
||||
false => self.size = self.s.len(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expandable(&self) -> bool
|
||||
{
|
||||
self.size < self.s.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for StrReader<'_>
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result
|
||||
{
|
||||
self.s.fmt(f)
|
||||
}
|
||||
}
|
|
@ -27,7 +27,7 @@ fn eat()
|
|||
let mut buffer = data;
|
||||
let mut s = Scanner::new();
|
||||
|
||||
s.eat_whitespace(&mut buffer, false);
|
||||
s.eat_whitespace(O_ZEROED, &mut buffer, false).unwrap();
|
||||
|
||||
assert_eq!(buffer, "abc");
|
||||
assert_eq!(s.stats, (3, 0, 3))
|
||||
|
@ -40,7 +40,7 @@ fn eat_none()
|
|||
let mut buffer = data;
|
||||
let mut s = Scanner::new();
|
||||
|
||||
s.eat_whitespace(&mut buffer, false);
|
||||
s.eat_whitespace(O_ZEROED, &mut buffer, false).unwrap();
|
||||
|
||||
assert_eq!(buffer, "abc");
|
||||
assert_eq!(s.stats, (0, 0, 0))
|
||||
|
|
Loading…
Reference in New Issue