Feature/scanner/option #27

Merged
bazaah merged 19 commits from feature/scanner/option into master 2021-09-09 19:29:29 +00:00
16 changed files with 535 additions and 142 deletions

View File

@ -23,6 +23,9 @@ jobs:
- name: "Unit Tests"
cmd: test
args: --lib --bins
- name: "Unit Tests: feature.test_buffer_small"
cmd: test
args: --lib --features=test_buffer_small
include:
- os: ubuntu-latest
sccache-path: /home/runner/.cache/sccache

14
Cargo.lock generated
View File

@ -30,6 +30,18 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "ctor"
version = "0.1.20"
@ -139,5 +151,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"atoi",
"bitflags",
"cfg-if",
"pretty_assertions",
]

View File

@ -6,9 +6,18 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
# PRIVATE! FOR USE IN TEST ONLY!
test_buffer = []
test_buffer_large = ["test_buffer"]
test_buffer_medium = ["test_buffer"]
test_buffer_small = ["test_buffer"]
[dependencies]
atoi = "0.4"
bitflags = "1"
[dev-dependencies]
anyhow = "1"
pretty_assertions = "0.7"
cfg-if = "1"

View File

@ -1,5 +1,6 @@
use super::{
error::{ScanError, ScanResult as Result},
flag::Flags,
stats::MStats,
ALIAS, ANCHOR,
};
@ -8,6 +9,7 @@ use crate::token::Token;
/// Scan an anchor or alias from the underlying .buffer
/// returning the relevant Token
pub(in crate::scanner) fn scan_anchor<'de>(
opts: Flags,
buffer: &mut &'de str,
stats: &mut MStats,
kind: &AnchorKind,
@ -17,7 +19,7 @@ pub(in crate::scanner) fn scan_anchor<'de>(
// *anchor 'rest of the line'
// ^^^^^^
let anchor = take_while(buffer.as_bytes(), u8::is_ascii_alphanumeric);
let anchor = take_while(opts, buffer.as_bytes(), u8::is_ascii_alphanumeric)?;
let anchor = advance!(<- *buffer, :stats, anchor.len());
@ -33,6 +35,7 @@ pub(in crate::scanner) fn scan_anchor<'de>(
// There does not necessarily need to be a whitespace so we
// also check against a list of valid starting
// tokens
cache!(~buffer, 1, opts)?;
check!(~buffer
=> b' ' | b'\n' | b'?' | b',' | b']' | b'}' | b'%' | b'@' | b'`',
else ScanError::InvalidAnchorName
@ -62,10 +65,10 @@ impl AnchorKind
/// starts from the given .byte
pub fn new(byte: &u8) -> Option<Self>
{
let s = match byte
let s = match *byte
{
&ALIAS => Self::Alias,
&ANCHOR => Self::Anchor,
ALIAS => Self::Alias,
ANCHOR => Self::Anchor,
_ => return None,
};
@ -73,7 +76,7 @@ impl AnchorKind
}
}
fn take_while<F>(b: &[u8], f: F) -> &[u8]
fn take_while<F>(opts: Flags, base: &[u8], f: F) -> Result<&[u8]>
where
F: Fn(&u8) -> bool,
{
@ -81,10 +84,12 @@ where
loop
{
match b.get(index)
let i = cache!(base, @index, 1, opts)?;
match base.get(index)
{
Some(b) if f(b) => index += 1,
_ => return &b[..index],
Some(b) if f(b) => index += i,
_ => return Ok(&base[..index]),
}
}
}

View File

@ -5,13 +5,14 @@ use super::{
stats::MStats,
};
use crate::{
scanner::{eat_whitespace, tag::scan_tag_directive, COMMENTS},
scanner::{eat_whitespace, flag::Flags, tag::scan_tag_directive, COMMENTS},
token::Token,
};
/// Scans a version or tag directive from .buffer, based on
/// the .kind of directive, returning the relevant Token.
pub(in crate::scanner) fn scan_directive<'de>(
opts: Flags,
buffer: &mut &'de str,
mut stats: &mut MStats,
kind: &DirectiveKind,
@ -22,21 +23,25 @@ pub(in crate::scanner) fn scan_directive<'de>(
DirectiveKind::Version =>
{
// Chomp any preceding whitespace
advance!(*buffer, eat_whitespace(buffer, &mut stats, !COMMENTS));
advance!(
*buffer,
eat_whitespace(opts, buffer, &mut stats, !COMMENTS)?
);
// %YAML 1.1
// ^
let (major, skip) = scan_directive_version(buffer)?;
let (major, skip) = scan_directive_version(opts, buffer)?;
advance!(*buffer, :stats, skip);
// %YAML 1.1
// ^
cache!(~buffer, 1, opts)?;
check!(~buffer => b'.', else ScanError::InvalidVersion)?;
advance!(*buffer, :stats, 1);
// %YAML 1.1
// ^
let (minor, skip) = scan_directive_version(buffer)?;
let (minor, skip) = scan_directive_version(opts, buffer)?;
advance!(*buffer, :stats, skip);
Ok(Token::VersionDirective(major, minor))
@ -44,10 +49,13 @@ pub(in crate::scanner) fn scan_directive<'de>(
DirectiveKind::Tag =>
{
// Chomp any spaces up to the handle
advance!(*buffer, eat_whitespace(buffer, &mut stats, !COMMENTS));
advance!(
*buffer,
eat_whitespace(opts, buffer, &mut stats, !COMMENTS)?
);
// Scan the directive, copying if necessary
let (token, amt) = scan_tag_directive(buffer, &mut stats)?;
let (token, amt) = scan_tag_directive(opts, buffer, &mut stats)?;
advance!(*buffer, amt);
Ok(token)
@ -97,15 +105,15 @@ impl DirectiveKind
}
}
fn scan_directive_version(b: &str) -> Result<(u8, usize)>
fn scan_directive_version(opts: Flags, b: &str) -> Result<(u8, usize)>
{
let v_slice = take_while(b.as_bytes(), u8::is_ascii_digit);
let v_slice = take_while(opts, b.as_bytes(), u8::is_ascii_digit)?;
let v = atoi(v_slice).ok_or(ScanError::InvalidVersion)?;
Ok((v, v_slice.len()))
}
fn take_while<F>(b: &[u8], f: F) -> &[u8]
fn take_while<F>(opts: Flags, base: &[u8], f: F) -> Result<&[u8]>
where
F: Fn(&u8) -> bool,
{
@ -113,10 +121,12 @@ where
loop
{
match b.get(index)
let i = cache!(base, @index, 1, opts)?;
match base.get(index)
{
Some(b) if f(b) => index += 1,
_ => return &b[..index],
Some(b) if f(b) => index += i,
_ => return Ok(&base[..index]),
}
}
}

View File

@ -72,6 +72,10 @@ pub enum ScanError
/// An integer overflowed
IntOverflow,
/// The underlying buffer should be extended before
/// calling the Scanner again
Extend,
}
impl fmt::Display for ScanError

35
src/scanner/flag.rs Normal file
View File

@ -0,0 +1,35 @@
use bitflags::bitflags;
/// An empty, zeroed flag set. This is the default set, with
/// all other flags disabled.
pub const O_ZEROED: Flags = Flags::empty();
/// Hints to the Scanner if the given byte slice can be
/// extended. Typically used when processing data in chunks,
/// or in circumstances when there may be more data in the
/// future.
///
/// If this flag is set the Scanner will return a
/// ScanError::Extend if the byte stream terminates before a
/// token can be scanned.
pub const O_EXTENDABLE: Flags = Flags::EXTENDABLE;
/// Sets the Scanner to lazily process the underlying byte
/// stream.
///
/// In particular, the Scanner will not fully process
/// scalars, only locating the start and end markers in the
/// stream. This means that any allocations, escape parsing
/// or line joins will be deferred until the caller
/// explicitly requests the token. This _also applies to
/// errors_ in the scalar itself, which will not be caught
/// until the caller requests the token!
pub const O_LAZY: Flags = Flags::LAZY;
bitflags! {
/// Directives controlling various behaviors of the Scanner,
/// see each O_ variant for an explanation of how each works
#[derive(Default)]
pub struct Flags: u32 {
const EXTENDABLE = 0b00000001;
const LAZY = 0b00000010;
}
}

View File

@ -87,6 +87,60 @@ macro_rules! cow {
};
}
/// Check that the underlying .buffer has at least the given
/// number of UTF8 .codepoints available, returning an error
/// if O_EXTENDABLE is set in .opts. Returns the number of
/// _bytes_ read.
///
/// Modifiers
/// ~ .buffer := .buffer.as_bytes()
///
/// Variants
/// /1 .buffer, .codepoints
/// := /4 .buffer, @0, .codepoints, O_ZEROED
/// /2 .buffer, @.offset, .codepoints
/// := /4 .buffer, @.offset, .codepoints, O_ZEROED
/// /3 .buffer, .codepoints, .opts
/// := /4 .buffer @0, .codepoints, .opts
/// /4 .buffer, @.offset, .codepoints, .opts
macro_rules! cache {
(~$buffer:expr $(, @$offset:expr )?, $codepoints:expr $(, $opts:expr )?) => {
cache!($buffer.as_bytes(), $( @$offset, )? $codepoints $(, $opts )?)
};
($buffer:expr $(, @$offset:expr )?, $codepoints:expr $(, $opts:expr )?) => {
cache!(@inner $buffer, $( @$offset, )? @0, $codepoints $(, $opts )?, $crate::scanner::flag::O_ZEROED)
};
(@inner $buffer:expr, @$offset:expr, $( @$_:expr, )? $codepoints:expr, $opts:expr $(, $__:expr )?) => {
cache!(@priv $buffer, $offset, $codepoints, $opts.contains($crate::scanner::flag::O_EXTENDABLE))
};
(@priv $buffer:expr, $offset:expr, $codepoints:expr, $extend:expr) => {{
let mut ret = Ok(0);
let mut bytes = $offset;
for _ in 0..$codepoints
{
match widthOf!($buffer, bytes)
{
0 =>
{
if $extend
{
ret = Err($crate::scanner::error::ScanError::Extend);
}
break;
},
n =>
{
bytes += n;
ret = ret.map(|r| r + n);
},
}
}
ret
}};
}
/// Check the .buffer (@ .offset) matches the given
/// .pattern, optionally returning an .error.
///

View File

@ -8,6 +8,7 @@ mod context;
mod directive;
mod entry;
mod error;
mod flag;
mod key;
mod scalar;
mod stats;
@ -21,6 +22,7 @@ use crate::{
directive::{scan_directive, DirectiveKind},
entry::TokenEntry,
error::{ScanError, ScanResult as Result},
flag::*,
key::{Key, KeyPossible},
scalar::{block::scan_block_scalar, flow::scan_flow_scalar, plain::scan_plain_scalar},
stats::MStats,
@ -66,7 +68,12 @@ impl Scanner
/// Scan some tokens from the given .base into .tokens
/// returning the number added.
pub fn scan_tokens<'de>(&mut self, base: &'de str, tokens: &mut Tokens<'de>) -> Result<usize>
pub fn scan_tokens<'de>(
&mut self,
opts: Flags,
base: &'de str,
tokens: &mut Tokens<'de>,
) -> Result<usize>
{
let mut num_tokens = 0;
let starting_tokens = tokens.len();
@ -76,9 +83,14 @@ impl Scanner
{
if let Some(mut buffer) = base.get(self.offset..)
{
self.scan_next_token(&mut buffer, tokens)?;
let run = self.scan_next_token(opts, &mut buffer, tokens);
self.offset = base.len() - buffer.len();
if matches!(run, Err(ScanError::Extend) | Ok(_))
{
self.offset = base.len() - buffer.len();
}
run?;
num_tokens = tokens.len() - starting_tokens;
}
@ -87,8 +99,12 @@ impl Scanner
Ok(num_tokens)
}
fn scan_next_token<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>)
-> Result<()>
fn scan_next_token<'de>(
&mut self,
opts: Flags,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
// Is it the beginning of the stream?
if self.state == StreamState::Start
@ -98,7 +114,7 @@ impl Scanner
}
// Eat whitespace to the next delimiter
self.eat_whitespace(base, COMMENTS);
self.eat_whitespace(opts, base, COMMENTS)?;
// Remove any saved key positions that cannot contain keys
// anymore
@ -114,11 +130,17 @@ impl Scanner
return self.fetch_stream_end(*base, tokens);
}
// 4 characters is the longest token we can encounter, one
// of:
// - '--- '
// - '... '
cache!(~base, 4, opts)?;
// Fetch the next token(s)
match base.as_bytes()
{
// Is it a directive?
[DIRECTIVE, ..] if self.stats.column == 0 => self.fetch_directive(base, tokens),
[DIRECTIVE, ..] if self.stats.column == 0 => self.fetch_directive(opts, base, tokens),
// Is it a document marker?
[b @ b'-', b'-', b'-', ..] | [b @ b'.', b'.', b'.', ..]
@ -161,25 +183,25 @@ impl Scanner
},
// Is it an anchor or alias?
[ANCHOR, ..] | [ALIAS, ..] => self.fetch_anchor(base, tokens),
[ANCHOR, ..] | [ALIAS, ..] => self.fetch_anchor(opts, base, tokens),
// Is it a tag?
[TAG, ..] => self.fetch_tag(base, tokens),
[TAG, ..] => self.fetch_tag(opts, base, tokens),
// Is it a block scalar?
[c @ LITERAL, ..] | [c @ FOLDED, ..] if self.context.is_block() =>
{
self.fetch_block_scalar(base, tokens, *c == FOLDED)
self.fetch_block_scalar(opts, base, tokens, *c == FOLDED)
},
// Is it a flow scalar?
[SINGLE, ..] | [DOUBLE, ..] => self.fetch_flow_scalar(base, tokens),
[SINGLE, ..] | [DOUBLE, ..] => self.fetch_flow_scalar(opts, base, tokens),
// Is it a plain scalar?
_ if self.is_plain_scalar(*base) => self.fetch_plain_scalar(base, tokens),
_ if self.is_plain_scalar(*base) => self.fetch_plain_scalar(opts, base, tokens),
// Otherwise its an error
_ => return Err(ScanError::UnknownDelimiter),
_ => Err(ScanError::UnknownDelimiter),
}
}
@ -253,8 +275,12 @@ impl Scanner
Ok(())
}
fn fetch_directive<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>)
-> Result<()>
fn fetch_directive<'de>(
&mut self,
opts: Flags,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
let mut buffer = *base;
let mut stats = MStats::new();
@ -264,11 +290,8 @@ impl Scanner
return Ok(());
}
// Reset indent to starting level
self.unroll_indent(tokens, STARTING_INDENT)?;
// Reset saved key
self.remove_saved_key()?;
// Ensure we can read the 'YAML' or 'TAG' identifiers
cache!(~buffer, @1, 4, opts)?;
// Safety: we check above that we have len >= 1 (e.g a '%')
//
@ -282,7 +305,13 @@ impl Scanner
advance!(buffer, :stats, 1 + kind.len());
// Scan the directive token from the .buffer
let token = scan_directive(&mut buffer, &mut stats, &kind)?;
let token = scan_directive(opts, &mut buffer, &mut stats, &kind)?;
// Reset indent to starting level
self.unroll_indent(tokens, STARTING_INDENT)?;
// Reset saved key
self.remove_saved_key()?;
// A key cannot follow a directive (a newline is required)
self.simple_key_allowed = false;
@ -298,7 +327,12 @@ impl Scanner
Ok(())
}
fn fetch_tag<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>) -> Result<()>
fn fetch_tag<'de>(
&mut self,
opts: Flags,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
let mut buffer = *base;
let mut stats = MStats::new();
@ -308,11 +342,11 @@ impl Scanner
return Ok(());
}
self.save_key(!REQUIRED)?;
let (token, amt) = scan_node_tag(buffer, &mut stats)?;
let (token, amt) = scan_node_tag(opts, buffer, &mut stats)?;
advance!(buffer, amt);
self.save_key(!REQUIRED)?;
// A key may not start after a tag (only before)
self.simple_key_allowed = false;
@ -327,7 +361,12 @@ impl Scanner
Ok(())
}
fn fetch_anchor<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>) -> Result<()>
fn fetch_anchor<'de>(
&mut self,
opts: Flags,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
let mut buffer = *base;
let mut stats = MStats::new();
@ -343,12 +382,12 @@ impl Scanner
_ => return Ok(()),
};
// Scan the token from the .buffer
let token = scan_anchor(opts, &mut buffer, &mut stats, &kind)?;
// An anchor / alias may start a simple key
self.save_key(!REQUIRED)?;
// Scan the token from the .buffer
let token = scan_anchor(&mut buffer, &mut stats, &kind)?;
// A key may not start after an anchor (only before)
self.simple_key_allowed = false;
@ -365,6 +404,7 @@ impl Scanner
fn fetch_flow_scalar<'de>(
&mut self,
opts: Flags,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
@ -378,11 +418,11 @@ impl Scanner
return Ok(());
}
self.save_key(!REQUIRED)?;
let (range, amt) = scan_flow_scalar(buffer, &mut stats, single)?;
let (range, amt) = scan_flow_scalar(opts, buffer, &mut stats, single)?;
let token = range.into_token(buffer)?;
self.save_key(!REQUIRED)?;
// A key cannot follow a flow scalar, as we're either
// currently in a key (which should be followed by a
// value), or a value which needs a separator (e.g line
@ -399,6 +439,7 @@ impl Scanner
fn fetch_plain_scalar<'de>(
&mut self,
opts: Flags,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
@ -406,9 +447,9 @@ impl Scanner
let buffer = *base;
let mut stats = self.stats.clone();
self.save_key(!REQUIRED)?;
let (token, amt) = scan_plain_scalar(opts, buffer, &mut stats, &self.context)?;
let (token, amt) = scan_plain_scalar(buffer, &mut stats, &self.context)?;
self.save_key(!REQUIRED)?;
// A simple key cannot follow a plain scalar, there must be
// an indicator or new line before a key is valid
@ -425,6 +466,7 @@ impl Scanner
fn fetch_block_scalar<'de>(
&mut self,
opts: Flags,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
fold: bool,
@ -440,7 +482,7 @@ impl Scanner
// always follow a block scalar.
self.simple_key_allowed = true;
let (token, amt) = scan_block_scalar(buffer, &mut stats, &self.context, fold)?;
let (token, amt) = scan_block_scalar(opts, buffer, &mut stats, &self.context, fold)?;
advance!(*base, amt);
self.stats = stats;
@ -869,11 +911,11 @@ impl Scanner
/// Chomp whitespace and optionally comments until we
/// reach the next token, updating buffer[0] to the
/// beginning of the new token
fn eat_whitespace(&mut self, buffer: &mut &str, comments: bool) -> usize
fn eat_whitespace(&mut self, opts: Flags, buffer: &mut &str, comments: bool) -> Result<usize>
{
let mut stats = MStats::new();
let amt = eat_whitespace(*buffer, &mut stats, comments);
let amt = eat_whitespace(opts, *buffer, &mut stats, comments)?;
// A new line may start a key in the block context
//
@ -887,7 +929,7 @@ impl Scanner
advance!(*buffer, amt);
self.stats += stats;
amt
Ok(amt)
}
}
@ -902,7 +944,7 @@ enum StreamState
/// Chomp whitespace and .comments if allowed until a non
/// whitespace character is encountered, returning the
/// amount chomped
fn eat_whitespace(base: &str, stats: &mut MStats, comments: bool) -> usize
fn eat_whitespace(opts: Flags, base: &str, stats: &mut MStats, comments: bool) -> Result<usize>
{
let mut buffer = base;
let mut chomp_line = false;
@ -910,6 +952,8 @@ fn eat_whitespace(base: &str, stats: &mut MStats, comments: bool) -> usize
loop
{
cache!(~buffer, 1, opts)?;
let (blank, brk) = (isBlank!(~buffer), isBreak!(~buffer));
match (blank, brk)
@ -943,7 +987,7 @@ fn eat_whitespace(base: &str, stats: &mut MStats, comments: bool) -> usize
}
}
base.len() - buffer.len()
Ok(base.len() - buffer.len())
}
/// Roll the indentation level and push a block collection
@ -1077,12 +1121,22 @@ mod tests
mod tag;
mod whitespace;
#[cfg(feature = "test_buffer")]
mod str_reader;
use super::*;
use crate::token::{ScalarStyle::*, Token::*};
struct ScanIter<'de>
{
data: &'de str,
#[cfg(feature = "test_buffer")]
data: str_reader::StrReader<'de>,
#[cfg(feature = "test_buffer")]
opts: Flags,
#[cfg(not(feature = "test_buffer"))]
data: &'de str,
scan: Scanner,
tokens: Tokens<'de>,
@ -1094,7 +1148,14 @@ mod tests
pub fn new(data: &'de str) -> Self
{
Self {
#[cfg(feature = "test_buffer")]
data: str_reader::StrReader::new(data, str_reader::StrReader::BUF_SIZE),
#[cfg(feature = "test_buffer")]
opts: O_ZEROED | O_EXTENDABLE,
#[cfg(not(feature = "test_buffer"))]
data,
scan: Scanner::new(),
tokens: Tokens::new(),
done: false,
@ -1105,10 +1166,7 @@ mod tests
{
if (!self.done) && self.tokens.is_empty()
{
if let 0 = self.scan.scan_tokens(self.data, &mut self.tokens)?
{
self.done = true
}
self.get_next_token()?;
}
if !self.done
@ -1120,6 +1178,52 @@ mod tests
Ok(None)
}
}
#[cfg(feature = "test_buffer")]
fn get_next_token(&mut self) -> Result<()>
{
let count = loop
{
match self
.scan
.scan_tokens(self.opts, self.data.read(), &mut self.tokens)
{
Ok(count) => break count,
Err(e) if e == ScanError::Extend =>
{
self.data.expand(str_reader::StrReader::BUF_EXTEND);
if !self.data.expandable()
{
self.opts.remove(O_EXTENDABLE)
}
continue;
},
Err(e) => return Err(e),
};
};
if count == 0
{
self.done = true
}
Ok(())
}
#[cfg(not(feature = "test_buffer"))]
fn get_next_token(&mut self) -> Result<()>
{
if let 0 = self
.scan
.scan_tokens(O_ZEROED, self.data, &mut self.tokens)?
{
self.done = true
}
Ok(())
}
}
impl<'de> Iterator for ScanIter<'de>

View File

@ -26,6 +26,7 @@ use crate::{
scanner::{
context::Context,
error::{ScanError, ScanResult as Result},
flag::Flags,
stats::MStats,
},
token::{ScalarStyle, Slice, Token},
@ -40,6 +41,7 @@ use crate::{
/// YAML 1.2: Section 8.1
/// yaml.org/spec/1.2/#c-b-block-header(m,t)
pub(in crate::scanner) fn scan_block_scalar<'de>(
opts: Flags,
base: &'de str,
stats: &mut MStats,
cxt: &Context,
@ -78,14 +80,16 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
};
// Eat the '|' or '>'
cache!(~buffer, 1, opts)?;
advance!(buffer, :local_stats, 1);
// Calculate any headers this scalar may have
let (chomp, explicit) = scan_headers(&mut buffer, &mut local_stats)?;
let (chomp, explicit) = scan_headers(opts, &mut buffer, &mut local_stats)?;
// The header line must contain nothing after the headers
// excluding a comment until the line ending
skip_blanks(&mut buffer, &mut local_stats, COMMENTS)?;
skip_blanks(opts, &mut buffer, &mut local_stats, COMMENTS)?;
cache!(~buffer, 1, opts)?;
if !isWhiteSpaceZ!(~buffer)
{
return Err(ScanError::InvalidBlockScalar);
@ -102,6 +106,7 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
None =>
{
indent = detect_indent_level(
opts,
&mut buffer,
&mut local_stats,
cxt,
@ -201,8 +206,11 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
}
// Eat the line's content until the line break (or EOF)
cache!(~buffer, 1, opts)?;
while !isBreakZ!(~buffer)
{
cache!(~buffer, 1, opts)?;
if !can_borrow
{
scratch.push(buffer.as_bytes()[0])
@ -218,6 +226,7 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
}
// Eat the line break (if not EOF)
cache!(~buffer, 1, opts)?;
if isBreak!(~buffer)
{
advance!(buffer, :local_stats, @line);
@ -226,6 +235,7 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
// Chomp indentation until the next indented line
scan_indent(
opts,
&mut buffer,
&mut local_stats,
&mut lines,
@ -246,12 +256,18 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
}
/// Retrieve a block scalar's headers
fn scan_headers(buffer: &mut &str, stats: &mut MStats) -> Result<(ChompStyle, IndentHeader)>
fn scan_headers(
opts: Flags,
buffer: &mut &str,
stats: &mut MStats,
) -> Result<(ChompStyle, IndentHeader)>
{
let mut skip = 0;
let mut indent = None;
let mut chomp = ChompStyle::Clip;
cache!(~buffer, 2, opts)?;
// Set the explicit indent if it exists.
//
// Note that we silently eat an invalid indent (0) rather
@ -291,6 +307,7 @@ fn scan_headers(buffer: &mut &str, stats: &mut MStats) -> Result<(ChompStyle, In
/// Chomp the indentation spaces of a block scalar
fn scan_indent(
opts: Flags,
buffer: &mut &str,
stats: &mut MStats,
lines: &mut usize,
@ -303,6 +320,8 @@ fn scan_indent(
return Ok(false);
}
cache!(~buffer, 1, opts)?;
while stats.column < indent && isWhiteSpace!(~buffer)
{
// Indentation space, chomp
@ -321,6 +340,8 @@ fn scan_indent(
*lines += 1;
advance!(*buffer, :stats, @line);
}
cache!(~buffer, 1, opts)?;
}
Ok(true)
@ -433,6 +454,7 @@ fn scan_chomp<'de>(
/// Auto-detect the indentation level from the first non
/// header line of a block scalar
fn detect_indent_level(
opts: Flags,
buffer: &mut &str,
stats: &mut MStats,
cxt: &Context,
@ -444,9 +466,13 @@ fn detect_indent_level(
loop
{
cache!(~buffer, 1, opts)?;
// Chomp indentation spaces, erroring on a tab
while isBlank!(~buffer)
{
cache!(~buffer, 1, opts)?;
if check!(~buffer => b'\t')
{
return Err(ScanError::InvalidTab);
@ -467,6 +493,7 @@ fn detect_indent_level(
}
// If its not a line break we're done, exit the loop
cache!(~buffer, 1, opts)?;
if !isBreak!(~buffer)
{
break;
@ -489,10 +516,13 @@ fn detect_indent_level(
/// Skip any blanks (and .comments) until we reach a line
/// ending or non blank character
fn skip_blanks(buffer: &mut &str, stats: &mut MStats, comments: bool) -> Result<()>
fn skip_blanks(opts: Flags, buffer: &mut &str, stats: &mut MStats, comments: bool) -> Result<()>
{
cache!(~buffer, 1, opts)?;
while isBlank!(~buffer)
{
cache!(~buffer, 1, opts)?;
advance!(*buffer, :stats, 1);
}
@ -500,6 +530,7 @@ fn skip_blanks(buffer: &mut &str, stats: &mut MStats, comments: bool) -> Result<
{
while !isBreakZ!(~buffer)
{
cache!(~buffer, 1, opts)?;
advance!(*buffer, :stats, 1);
}
}
@ -577,6 +608,7 @@ mod tests
use ScalarStyle::{Folded, Literal};
use super::*;
use crate::scanner::flag::O_ZEROED;
type TestResult = anyhow::Result<()>;
@ -615,7 +647,7 @@ mod tests
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("this is a simple block scalar"), Literal);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
assert_eq!(token, expected);
@ -630,7 +662,7 @@ mod tests
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("trailing lines...\n"), Literal);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
assert_eq!(token, expected);
@ -645,7 +677,7 @@ mod tests
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("trailing lines..."), Literal);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
assert_eq!(token, expected);
@ -660,7 +692,7 @@ mod tests
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("trailing lines...\n\n\n"), Literal);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
assert_eq!(token, expected);
@ -679,7 +711,7 @@ mod tests
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("some folded\nlines\nhere\n"), Literal);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
assert_eq!(token, expected);
@ -700,7 +732,7 @@ mod tests
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("\n\nsome folded\nlines\nhere"), Literal);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
assert_eq!(token, expected);
@ -721,7 +753,7 @@ mod tests
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("some folded\nlines\nhere\n\n\n"), Literal);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
assert_eq!(token, expected);
@ -742,7 +774,7 @@ some.other.key: value";
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("some folded\nlines\nhere\n\n\n"), Literal);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
assert_eq!(token, expected);
@ -763,7 +795,7 @@ some.other.key: value";
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("this\n\nhas\n\nbreaks"), Literal);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
assert_eq!(token, expected);
@ -778,7 +810,7 @@ some.other.key: value";
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("simple block scalar"), Literal);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;
assert_eq!(token, expected);
@ -795,7 +827,7 @@ some.other.key: value";
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("this is a simple block scalar"), Folded);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
assert_eq!(token, expected);
@ -810,7 +842,7 @@ some.other.key: value";
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("trailing lines...\n"), Folded);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
assert_eq!(token, expected);
@ -825,7 +857,7 @@ some.other.key: value";
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("trailing lines..."), Folded);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
assert_eq!(token, expected);
@ -840,7 +872,7 @@ some.other.key: value";
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("trailing lines...\n\n\n"), Folded);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
assert_eq!(token, expected);
@ -859,7 +891,7 @@ some.other.key: value";
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("some folded lines here\n"), Folded);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
assert_eq!(token, expected);
@ -880,7 +912,7 @@ some.other.key: value";
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("\n\nsome folded lines here"), Folded);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
assert_eq!(token, expected);
@ -901,7 +933,7 @@ some.other.key: value";
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("some folded lines here\n\n\n"), Folded);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
assert_eq!(token, expected);
@ -922,7 +954,7 @@ some.other.key: value";
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("some folded lines here\n\n\n"), Folded);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
assert_eq!(token, expected);
@ -943,7 +975,7 @@ some.other.key: value";
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("this\nhas\nbreaks"), Folded);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
assert_eq!(token, expected);
@ -958,7 +990,7 @@ some.other.key: value";
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("simple block scalar"), Folded);
let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;
assert_eq!(token, expected);

View File

@ -1,7 +1,10 @@
//! This module exports function(s) for handling scalar
//! escapes in YAML documents.
use crate::scanner::error::{ScanError, ScanResult as Result};
use crate::scanner::{
error::{ScanError, ScanResult as Result},
flag::Flags,
};
/// Unescape a given YAML escape sequence as defined in
/// [Section 5.7][Link]. Specifically, YAML defines 18
@ -15,12 +18,17 @@ use crate::scanner::error::{ScanError, ScanResult as Result};
/// escape sequence.
///
/// [Link]: https://yaml.org/spec/1.2/spec.html#c-escape
pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Result<usize>
pub(in crate::scanner) fn flow_unescape(
opts: Flags,
base: &str,
scratch: &mut Vec<u8>,
) -> Result<usize>
{
let mut buffer = base;
let mut escape_len: Option<u8> = None;
// Not an escape sequence, early exit
cache!(~buffer, 1, opts)?;
if !check!(~buffer => b'\\')
{
return Ok(0);
@ -30,6 +38,7 @@ pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Re
// See 5.7: Escaped Characters
// yaml.org/spec/1.2/spec.html#id2776092
cache!(~buffer, 1, opts)?;
match buffer.as_bytes()
{
[b'0', ..] => scratch.push(b'\0'),
@ -60,6 +69,9 @@ pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Re
if let Some(sequence) = escape_len
{
// Note that we cache the _entire_ escape sequence before
// calling write_unicode_point
cache!(~buffer, sequence, opts)?;
let amt = write_unicode_point(buffer, scratch, sequence)?;
advance!(buffer, amt);
}
@ -73,6 +85,7 @@ pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Re
///
/// [Link]: https://yaml.org/spec/1.2/spec.html#ns-uri-char
pub(in crate::scanner) fn tag_uri_unescape(
opts: Flags,
base: &str,
scratch: &mut Vec<u8>,
_directive: bool,
@ -82,6 +95,8 @@ pub(in crate::scanner) fn tag_uri_unescape(
let mut codepoint_len: i8 = 0;
while {
cache!(~buffer, 3, opts)?;
if buffer.len() < 3
{
return Err(ScanError::UnexpectedEOF);
@ -224,6 +239,7 @@ mod tests
use pretty_assertions::assert_eq;
use super::*;
use crate::scanner::flag::O_ZEROED;
type TestResult = anyhow::Result<()>;
@ -268,7 +284,7 @@ mod tests
for (i, (&t, &ex)) in data.into_iter().zip(expected).enumerate()
{
scratch.clear();
flow_unescape(t, scratch)
flow_unescape(O_ZEROED, t, scratch)
.map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;
assert_eq!(scratch, ex, "on iteration {}", i)
@ -329,7 +345,7 @@ mod tests
let mut c: [u8; 4] = [0; 4];
scratch.clear();
flow_unescape(t, scratch)
flow_unescape(O_ZEROED, t, scratch)
.map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;
assert_eq!(
@ -370,7 +386,7 @@ mod tests
{
scratch.clear();
let consumed = flow_unescape(t, scratch)
let consumed = flow_unescape(O_ZEROED, t, scratch)
.map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;
assert_eq!(
@ -417,7 +433,7 @@ mod tests
{
scratch.clear();
let consumed = tag_uri_unescape(t, scratch, true)
let consumed = tag_uri_unescape(O_ZEROED, t, scratch, true)
.map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;
assert_eq!(
@ -446,7 +462,7 @@ mod tests
let scratch = &mut Vec::new();
let expected = ScanError::UnexpectedEOF;
match tag_uri_unescape(data, scratch, true)
match tag_uri_unescape(O_ZEROED, data, scratch, true)
{
Err(e) if e == expected => Ok(()),
@ -466,7 +482,7 @@ mod tests
let scratch = &mut Vec::new();
let expected = ScanError::UnknownEscape;
match tag_uri_unescape(data, scratch, true)
match tag_uri_unescape(O_ZEROED, data, scratch, true)
{
Err(e) if e == expected => Ok(()),

View File

@ -3,6 +3,7 @@ use std::ops::Range;
use crate::{
scanner::{
error::{ScanError, ScanResult as Result},
flag::Flags,
scalar::escape::flow_unescape,
stats::MStats,
},
@ -15,6 +16,7 @@ use crate::{
/// the underlying .base, however it may be required to copy
/// into .scratch and borrow from that lifetime.
pub(in crate::scanner) fn scan_flow_scalar(
opts: Flags,
base: &str,
stats: &mut MStats,
single: bool,
@ -35,6 +37,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
};
// Eat left quote
cache!(~buffer, 1, opts)?;
advance!(buffer, :stats, 1);
'scalar: loop
@ -44,6 +47,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
// Even in a scalar context, YAML prohibits starting a line
// with document stream tokens followed by a blank
// character
cache!(~buffer, 4, opts)?;
if isDocumentIndicator!(~buffer, :stats)
{
return Err(ScanError::InvalidFlowScalar);
@ -55,9 +59,14 @@ pub(in crate::scanner) fn scan_flow_scalar(
return Err(ScanError::UnexpectedEOF);
}
cache!(~buffer, 1, opts)?;
// Consume non whitespace characters
while !isWhiteSpaceZ!(~buffer)
{
// Longest sequence we can hit is 2 characters ('')
cache!(~buffer, 2, opts)?;
// if we encounter an escaped quote we can no longer borrow
// from .base, we must unescape the quote into .scratch
if kind == SingleQuote && check!(~buffer => [SINGLE, SINGLE, ..])
@ -88,7 +97,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
{
set_no_borrow(&mut can_borrow, base, buffer, &mut scratch);
let read = flow_unescape(buffer, &mut scratch)?;
let read = flow_unescape(opts, buffer, &mut scratch)?;
advance!(buffer, :stats, read);
}
// Its a non blank character, add it
@ -126,6 +135,8 @@ pub(in crate::scanner) fn scan_flow_scalar(
// Consume whitespace
loop
{
cache!(~buffer, 1, opts)?;
match (isBlank!(~buffer), isBreak!(~buffer))
{
// No more whitespace, exit loop
@ -206,6 +217,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
};
// Eat the right quote
cache!(~buffer, 1, opts)?;
advance!(buffer, :stats, 1);
let advance = base.len() - buffer.len();
@ -296,6 +308,7 @@ mod tests
use pretty_assertions::assert_eq;
use super::*;
use crate::scanner::flag::O_ZEROED;
type TestResult = anyhow::Result<()>;
@ -308,7 +321,7 @@ mod tests
let stats = &mut MStats::new();
let expected = Token::Scalar(cow!(""), ScalarStyle::SingleQuote);
let (range, read) = scan_flow_scalar(data, stats, true)?;
let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
let scalar = range.into_token(data)?;
assert_eq!(read, 2);
@ -328,7 +341,7 @@ mod tests
let stats = &mut MStats::new();
let expected = Token::Scalar(cow!("hello world"), ScalarStyle::SingleQuote);
let (range, read) = scan_flow_scalar(data, stats, true)?;
let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
let scalar = range.into_token(data)?;
assert_eq!(read, 13);
@ -352,7 +365,7 @@ fourth'"#;
let cmp = "first second third fourth";
let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);
let (range, _read) = scan_flow_scalar(data, stats, true)?;
let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
let scalar = range.into_token(data)?;
if !(scalar == expected)
@ -372,7 +385,7 @@ fourth'"#;
let cmp = "first second";
let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);
let (range, _read) = scan_flow_scalar(data, stats, true)?;
let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
let scalar = range.into_token(data)?;
if !(scalar == expected)
@ -395,7 +408,7 @@ fourth'"#;
let cmp = "first second third\nfourth";
let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);
let (range, _read) = scan_flow_scalar(data, stats, true)?;
let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
let scalar = range.into_token(data)?;
if !(scalar == expected)
@ -417,7 +430,7 @@ fourth'"#;
{
stats = MStats::new();
match scan_flow_scalar(t, &mut stats, true)
match scan_flow_scalar(O_ZEROED, t, &mut stats, true)
{
Err(e) => assert_eq!(
e, expected,
@ -443,7 +456,7 @@ fourth'"#;
{
stats = MStats::new();
match scan_flow_scalar(t, &mut stats, true)
match scan_flow_scalar(O_ZEROED, t, &mut stats, true)
{
Err(e) => assert_eq!(
e, expected,
@ -467,7 +480,7 @@ fourth'"#;
let stats = &mut MStats::new();
let expected = Token::Scalar(cow!(""), ScalarStyle::DoubleQuote);
let (range, read) = scan_flow_scalar(data, stats, false)?;
let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
let scalar = range.into_token(data)?;
assert_eq!(read, 2);
@ -487,7 +500,7 @@ fourth'"#;
let stats = &mut MStats::new();
let expected = Token::Scalar(cow!("hello world"), ScalarStyle::DoubleQuote);
let (range, read) = scan_flow_scalar(data, stats, false)?;
let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
let scalar = range.into_token(data)?;
assert_eq!(read, 13);
@ -507,7 +520,7 @@ fourth'"#;
let stats = &mut MStats::new();
let expected = Token::Scalar(cow!("hello α Ω ッ"), ScalarStyle::DoubleQuote);
let (range, read) = scan_flow_scalar(data, stats, false)?;
let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
let scalar = range.into_token(data)?;
if !(scalar == expected)
@ -537,7 +550,7 @@ fourth""#;
let cmp = "first second third fourth";
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
let (range, _read) = scan_flow_scalar(data, stats, false)?;
let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
let scalar = range.into_token(data)?;
if !(scalar == expected)
@ -560,7 +573,7 @@ fourth""#;
let cmp = "first second third\nfourth";
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
let (range, _read) = scan_flow_scalar(data, stats, false)?;
let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
let scalar = range.into_token(data)?;
if !(scalar == expected)
@ -580,7 +593,7 @@ fourth""#;
let cmp = "first second";
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
let (range, _read) = scan_flow_scalar(data, stats, false)?;
let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
let scalar = range.into_token(data)?;
if !(scalar == expected)
@ -604,7 +617,7 @@ rst \
let cmp = "first second third\nfourth";
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
let (range, _read) = scan_flow_scalar(data, stats, false)?;
let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
let scalar = range.into_token(data)?;
if !(scalar == expected)

View File

@ -2,6 +2,7 @@ use crate::{
scanner::{
context::Context,
error::{ScanError, ScanResult as Result},
flag::Flags,
stats::MStats,
},
token::{ScalarStyle, Token},
@ -17,6 +18,7 @@ use crate::{
/// YAML 1.2: Section 7.3.3
/// yaml.org/spec/1.2/spec.html#ns-plain-first(c)
pub(in crate::scanner) fn scan_plain_scalar<'de>(
opts: Flags,
base: &'de str,
stats: &mut MStats,
cxt: &Context,
@ -53,6 +55,7 @@ pub(in crate::scanner) fn scan_plain_scalar<'de>(
// Inside flow contexts you *may not* start a plain scalar
// with a ':', '?', or '-' followed by a flow indicator
cache!(~buffer, 2, opts)?;
if flow_context && check!(~buffer => b':' | b'?' | b'-') && flow_indicator(buffer, 1)
{
return Err(ScanError::InvalidPlainScalar);
@ -60,6 +63,10 @@ pub(in crate::scanner) fn scan_plain_scalar<'de>(
'scalar: loop
{
// 4 is the largest character sequence we can encounter
// (document indicators)
cache!(~buffer, 4, opts)?;
if buffer.is_empty()
{
break 'scalar;
@ -110,6 +117,8 @@ pub(in crate::scanner) fn scan_plain_scalar<'de>(
// Handle non whitespace characters
while !isWhiteSpaceZ!(~buffer)
{
cache!(~buffer, 2, opts)?;
if (check!(~buffer => b':') && isWhiteSpaceZ!(~buffer, 1))
|| flow_context && flow_indicator(buffer, 0)
{
@ -128,6 +137,8 @@ pub(in crate::scanner) fn scan_plain_scalar<'de>(
// Handle whitespace characters
loop
{
cache!(~buffer, 1, opts)?;
match (isBlank!(~buffer), isBreak!(~buffer))
{
// No more whitespace, exit loop
@ -242,6 +253,7 @@ mod tests
use ScalarStyle::Plain;
use super::*;
use crate::scanner::flag::O_ZEROED;
type TestResult = anyhow::Result<()>;
@ -280,7 +292,7 @@ mod tests
for (i, &data) in tests.iter().enumerate()
{
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)
.map_err(|e| anyhow!("iteration {}: {}", i, e))?;
assert_eq!(token, expected, "on iteration {}", i);
@ -301,7 +313,7 @@ mod tests
for (i, &data) in tests.iter().enumerate()
{
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)
.map_err(|e| anyhow!("iteration {}: {}", i, e))?;
assert_eq!(token, expected, "on iteration {}", i);
@ -320,7 +332,7 @@ mod tests
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!(""), Plain);
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
assert_eq!(token, expected);
@ -339,7 +351,7 @@ mod tests
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("hello"), Plain);
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
assert_eq!(token, expected);
@ -356,7 +368,7 @@ mod tests
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("hello, world!"), Plain);
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
assert_eq!(token, expected);
@ -379,7 +391,7 @@ mod tests
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("hello this is a multi-line scalar"), Plain);
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
assert_eq!(token, expected);
@ -406,7 +418,7 @@ mod tests
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("this is\n\na scalar\nwith line#breaks"), Plain);
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
assert_eq!(token, expected);
@ -423,7 +435,7 @@ mod tests
let cxt = cxt!(block -> [0]);
let expected = Token::Scalar(cow!("hello"), Plain);
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
assert_eq!(token, expected);
@ -442,7 +454,7 @@ mod tests
let cxt = cxt!(flow -> 1);
let expected = Token::Scalar(cow!("hello"), Plain);
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
assert_eq!(token, expected);
@ -461,7 +473,7 @@ mod tests
for (i, &data) in tests.iter().enumerate()
{
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)
.map_err(|e| anyhow!("iteration {}: {}", i, e))?;
assert_eq!(token, expected, "on iteration {}", i);
@ -485,7 +497,7 @@ string!";
let cxt = cxt!(flow -> 1);
let expected = Token::Scalar(cow!("hello this is a multi-line string!"), Plain);
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
assert_eq!(token, expected);
@ -514,7 +526,7 @@ breaks
let cxt = cxt!(flow -> 1);
let expected = Token::Scalar(cow!("hello this\nbig\nstring\nhas\nline\nbreaks"), Plain);
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
assert_eq!(token, expected);
@ -531,7 +543,7 @@ breaks
let cxt = cxt!(flow -> 1);
let expected = Token::Scalar(cow!("hello"), Plain);
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
assert_eq!(token, expected);
@ -548,7 +560,7 @@ breaks
let cxt = cxt!(flow -> 1);
let expected = Token::Scalar(cow!("hello"), Plain);
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
assert_eq!(token, expected);
@ -569,7 +581,7 @@ breaks
let cxt = cxt!(flow -> 1);
let expected = Token::Scalar(cow!("hello"), Plain);
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;
assert_eq!(token, expected);

View File

@ -66,6 +66,7 @@ use crate::{
scanner::{
eat_whitespace,
error::{ScanError, ScanResult as Result},
flag::Flags,
scalar::escape::tag_uri_unescape,
stats::MStats,
},
@ -79,6 +80,7 @@ use crate::{
/// possible, but may also copy the directive's handle and
/// prefix into .scratch if borrowing is not possible.
pub(in crate::scanner) fn scan_tag_directive<'de>(
opts: Flags,
base: &'de str,
stats: &mut MStats,
) -> Result<(Token<'de>, usize)>
@ -88,7 +90,7 @@ pub(in crate::scanner) fn scan_tag_directive<'de>(
// %TAG !named! :tag:prefix # a comment\n
// ^^^^^^^
let (handle, amt) = match scan_tag_handle(buffer, stats)?
let (handle, amt) = match scan_tag_handle(opts, buffer, stats)?
{
Some((handle, amt)) => (handle.into_inner(), amt),
None => return Err(ScanError::InvalidTagHandle),
@ -99,14 +101,15 @@ pub(in crate::scanner) fn scan_tag_directive<'de>(
// ^
// Check that there is >= 1 whitespace between handle and
// prefix
cache!(~buffer, 1, opts)?;
isBlank!(~buffer, else ScanError::InvalidTagPrefix)?;
// Chomp whitespace to prefix
advance!(buffer, eat_whitespace(buffer, stats, false));
advance!(buffer, eat_whitespace(opts, buffer, stats, false)?);
// %TAG !named! :tag:prefix # a comment\n
// ^^^^^^^^^^^
let (prefix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?;
let (prefix, amt) = scan_tag_uri(opts, buffer, stats, &mut can_borrow, false)?;
// %TAG !named! tag-prefix # a comment\n
// ^
@ -146,6 +149,7 @@ pub(in crate::scanner) fn scan_tag_directive<'de>(
/// ("!", "") => A non resolving tag
/// (handle, suffix) => A primary, secondary or named tag
pub(in crate::scanner) fn scan_node_tag<'de>(
opts: Flags,
base: &'de str,
stats: &mut MStats,
) -> Result<(Token<'de>, usize)>
@ -163,6 +167,8 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
* a zero length sub-slice out.
*/
cache!(~buffer, 2, opts)?;
// !<global:verbatim:tag:> "node"
// ^^
// If its a verbatim tag scan it
@ -172,10 +178,11 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
// !<global:verbatim:tag:> "node"
// ^^^^^^^^^^^^^^^^^^^^
let (verbatim, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, true)?;
let (verbatim, amt) = scan_tag_uri(opts, buffer, stats, &mut can_borrow, true)?;
// !<global:verbatim:tag:> "node"
// ^
cache!(~buffer, @amt + 1, 1, opts)?;
check!(~buffer, amt + 1 => b'>', else ScanError::InvalidTagSuffix)?;
let token = assemble_tag(&buffer[0..0], verbatim, can_borrow);
@ -185,7 +192,7 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
// Otherwise scan it as a normal tag
else
{
match scan_tag_handle(buffer, stats)?
match scan_tag_handle(opts, buffer, stats)?
{
// ! "node"
// ^
@ -200,7 +207,7 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
// !!global "node" OR !named!global "node"
// ^^^^^^ ^^^^^^
let (suffix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?;
let (suffix, amt) = scan_tag_uri(opts, buffer, stats, &mut can_borrow, false)?;
let token = assemble_tag(h, suffix, can_borrow);
@ -210,6 +217,8 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
// Handle scan couldn't find a closing !, meaning this is a local tag
None =>
{
cache!(~buffer, 1, opts)?;
// !local "node"
// ^
let handle = &buffer[..1];
@ -217,7 +226,7 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
// !local "node"
// ^^^^^
let (suffix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?;
let (suffix, amt) = scan_tag_uri(opts, buffer, stats, &mut can_borrow, false)?;
let token = assemble_tag(handle, suffix, can_borrow);
@ -240,6 +249,7 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
///
/// [Link]: https://yaml.org/spec/1.2/spec.html#ns-global-tag-prefix
pub(in crate::scanner) fn scan_tag_uri<'de>(
opts: Flags,
base: &'de str,
stats: &mut MStats,
can_borrow: &mut bool,
@ -251,6 +261,8 @@ pub(in crate::scanner) fn scan_tag_uri<'de>(
loop
{
cache!(~buffer, 1, opts)?;
match buffer.as_bytes()
{
// If its a normal allowed character, add it
@ -293,7 +305,7 @@ pub(in crate::scanner) fn scan_tag_uri<'de>(
*can_borrow = false;
}
let amt = tag_uri_unescape(buffer, &mut scratch, true)?;
let amt = tag_uri_unescape(opts, buffer, &mut scratch, true)?;
advance!(buffer, :stats, amt);
},
// EOF before loop end is an error
@ -320,6 +332,7 @@ pub(in crate::scanner) fn scan_tag_uri<'de>(
/// Scans a tag handle from .base, attempting to return the
/// fragment if the handle is unambiguous.
pub(in crate::scanner) fn scan_tag_handle<'b>(
opts: Flags,
base: &'b str,
stats: &mut MStats,
) -> Result<Option<(TagHandle<'b>, usize)>>
@ -331,6 +344,7 @@ pub(in crate::scanner) fn scan_tag_handle<'b>(
// !!tag
// ^
// Check that we are indeed starting a handle
cache!(~buffer, 1, opts)?;
check!(~buffer => b'!', else ScanError::InvalidTagHandle)?;
// %TAG !handle! tag-prefix # a comment \n
@ -338,9 +352,10 @@ pub(in crate::scanner) fn scan_tag_handle<'b>(
// !handle!tag
// ^^^^^^
// Safety: we just proved above we have >= 1 byte ('!')
let name = take_while(buffer[1..].as_bytes(), u8::is_ascii_alphanumeric);
let name = take_while(opts, buffer[1..].as_bytes(), u8::is_ascii_alphanumeric)?;
let mut offset = 1 + name.len();
cache!(~buffer, @offset, 1, opts)?;
match buffer.as_bytes().get(offset)
{
// If we find a closing '!', then it must either be a secondary or named handle
@ -391,7 +406,7 @@ impl<'a> TagHandle<'a>
}
}
fn take_while<F>(b: &[u8], f: F) -> &[u8]
fn take_while<F>(opts: Flags, base: &[u8], f: F) -> Result<&[u8]>
where
F: Fn(&u8) -> bool,
{
@ -399,10 +414,12 @@ where
loop
{
match b.get(index)
let i = cache!(base, @index, 1, opts)?;
match base.get(index)
{
Some(b) if f(b) => index += 1,
_ => return &b[..index],
Some(b) if f(b) => index += i,
_ => return Ok(&base[..index]),
}
}
}

View File

@ -0,0 +1,65 @@
use cfg_if::cfg_if;
#[derive(Debug, Clone)]
pub(super) struct StrReader<'de>
{
s: &'de str,
size: usize,
}
impl<'de> StrReader<'de>
{
cfg_if! {
if #[cfg(feature = "test_buffer_large")]
{
pub const BUF_SIZE: usize = 4 * 1024;
pub const BUF_EXTEND: usize = 64;
}
else if #[cfg(feature = "test_buffer_medium")]
{
pub const BUF_SIZE: usize = 8;
pub const BUF_EXTEND: usize = 8;
}
else if #[cfg(feature = "test_buffer_small")]
{
pub const BUF_SIZE: usize = 1;
pub const BUF_EXTEND: usize = 1;
}
}
pub fn new(s: &'de str, size: usize) -> Self
{
let size = std::cmp::min(s.len(), size);
Self { s, size }
}
pub fn read(&self) -> &'de str
{
&self.s[..self.size]
}
pub fn expand(&mut self, size: usize)
{
let new = self.size + size;
match self.s.len() > new
{
true => self.size = new,
false => self.size = self.s.len(),
}
}
pub fn expandable(&self) -> bool
{
self.size < self.s.len()
}
}
impl std::fmt::Display for StrReader<'_>
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result
{
self.s.fmt(f)
}
}

View File

@ -27,7 +27,7 @@ fn eat()
let mut buffer = data;
let mut s = Scanner::new();
s.eat_whitespace(&mut buffer, false);
s.eat_whitespace(O_ZEROED, &mut buffer, false).unwrap();
assert_eq!(buffer, "abc");
assert_eq!(s.stats, (3, 0, 3))
@ -40,7 +40,7 @@ fn eat_none()
let mut buffer = data;
let mut s = Scanner::new();
s.eat_whitespace(&mut buffer, false);
s.eat_whitespace(O_ZEROED, &mut buffer, false).unwrap();
assert_eq!(buffer, "abc");
assert_eq!(s.stats, (0, 0, 0))