Feature/scanner/option #27

Merged
bazaah merged 19 commits from feature/scanner/option into master 2021-09-09 19:29:29 +00:00
3 changed files with 41 additions and 18 deletions
Showing only changes of commit 8b68d2f8e4 - Show all commits

View file

@ -190,7 +190,7 @@ impl Scanner
}, },
// Is it a flow scalar? // Is it a flow scalar?
[SINGLE, ..] | [DOUBLE, ..] => self.fetch_flow_scalar(base, tokens), [SINGLE, ..] | [DOUBLE, ..] => self.fetch_flow_scalar(opts, base, tokens),
// Is it a plain scalar? // Is it a plain scalar?
_ if self.is_plain_scalar(*base) => self.fetch_plain_scalar(base, tokens), _ if self.is_plain_scalar(*base) => self.fetch_plain_scalar(base, tokens),
@ -391,6 +391,7 @@ impl Scanner
fn fetch_flow_scalar<'de>( fn fetch_flow_scalar<'de>(
&mut self, &mut self,
opts: Flags,
base: &mut &'de str, base: &mut &'de str,
tokens: &mut Tokens<'de>, tokens: &mut Tokens<'de>,
) -> Result<()> ) -> Result<()>
@ -406,7 +407,7 @@ impl Scanner
self.save_key(!REQUIRED)?; self.save_key(!REQUIRED)?;
let (range, amt) = scan_flow_scalar(buffer, &mut stats, single)?; let (range, amt) = scan_flow_scalar(opts, buffer, &mut stats, single)?;
let token = range.into_token(buffer)?; let token = range.into_token(buffer)?;
// A key cannot follow a flow scalar, as we're either // A key cannot follow a flow scalar, as we're either

View file

@ -18,12 +18,17 @@ use crate::scanner::{
/// escape sequence. /// escape sequence.
/// ///
/// [Link]: https://yaml.org/spec/1.2/spec.html#c-escape /// [Link]: https://yaml.org/spec/1.2/spec.html#c-escape
pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Result<usize> pub(in crate::scanner) fn flow_unescape(
opts: Flags,
base: &str,
scratch: &mut Vec<u8>,
) -> Result<usize>
{ {
let mut buffer = base; let mut buffer = base;
let mut escape_len: Option<u8> = None; let mut escape_len: Option<u8> = None;
// Not an escape sequence, early exit // Not an escape sequence, early exit
cache!(~buffer, 1, opts)?;
if !check!(~buffer => b'\\') if !check!(~buffer => b'\\')
{ {
return Ok(0); return Ok(0);
@ -33,6 +38,7 @@ pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Re
// See 5.7: Escaped Characters // See 5.7: Escaped Characters
// yaml.org/spec/1.2/spec.html#id2776092 // yaml.org/spec/1.2/spec.html#id2776092
cache!(~buffer, 1, opts)?;
match buffer.as_bytes() match buffer.as_bytes()
{ {
[b'0', ..] => scratch.push(b'\0'), [b'0', ..] => scratch.push(b'\0'),
@ -63,6 +69,9 @@ pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Re
if let Some(sequence) = escape_len if let Some(sequence) = escape_len
{ {
// Note that we cache the _entire_ escape sequence before
// calling write_unicode_point
cache!(~buffer, sequence, opts)?;
let amt = write_unicode_point(buffer, scratch, sequence)?; let amt = write_unicode_point(buffer, scratch, sequence)?;
advance!(buffer, amt); advance!(buffer, amt);
} }

View file

@ -3,6 +3,7 @@ use std::ops::Range;
use crate::{ use crate::{
scanner::{ scanner::{
error::{ScanError, ScanResult as Result}, error::{ScanError, ScanResult as Result},
flag::Flags,
scalar::escape::flow_unescape, scalar::escape::flow_unescape,
stats::MStats, stats::MStats,
}, },
@ -15,6 +16,7 @@ use crate::{
/// the underlying .base, however it may be required to copy /// the underlying .base, however it may be required to copy
/// into .scratch and borrow from that lifetime. /// into .scratch and borrow from that lifetime.
pub(in crate::scanner) fn scan_flow_scalar( pub(in crate::scanner) fn scan_flow_scalar(
opts: Flags,
base: &str, base: &str,
stats: &mut MStats, stats: &mut MStats,
single: bool, single: bool,
@ -35,6 +37,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
}; };
// Eat left quote // Eat left quote
cache!(~buffer, 1, opts)?;
advance!(buffer, :stats, 1); advance!(buffer, :stats, 1);
'scalar: loop 'scalar: loop
@ -44,6 +47,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
// Even in a scalar context, YAML prohibits starting a line // Even in a scalar context, YAML prohibits starting a line
// with document stream tokens followed by a blank // with document stream tokens followed by a blank
// character // character
cache!(~buffer, 4, opts)?;
if isDocumentIndicator!(~buffer, :stats) if isDocumentIndicator!(~buffer, :stats)
{ {
return Err(ScanError::InvalidFlowScalar); return Err(ScanError::InvalidFlowScalar);
@ -55,9 +59,14 @@ pub(in crate::scanner) fn scan_flow_scalar(
return Err(ScanError::UnexpectedEOF); return Err(ScanError::UnexpectedEOF);
} }
cache!(~buffer, 1, opts)?;
// Consume non whitespace characters // Consume non whitespace characters
while !isWhiteSpaceZ!(~buffer) while !isWhiteSpaceZ!(~buffer)
{ {
// Longest sequence we can hit is 2 characters ('')
cache!(~buffer, 2, opts)?;
// if we encounter an escaped quote we can no longer borrow // if we encounter an escaped quote we can no longer borrow
// from .base, we must unescape the quote into .scratch // from .base, we must unescape the quote into .scratch
if kind == SingleQuote && check!(~buffer => [SINGLE, SINGLE, ..]) if kind == SingleQuote && check!(~buffer => [SINGLE, SINGLE, ..])
@ -88,7 +97,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
{ {
set_no_borrow(&mut can_borrow, base, buffer, &mut scratch); set_no_borrow(&mut can_borrow, base, buffer, &mut scratch);
let read = flow_unescape(buffer, &mut scratch)?; let read = flow_unescape(opts, buffer, &mut scratch)?;
advance!(buffer, :stats, read); advance!(buffer, :stats, read);
} }
// Its a non blank character, add it // Its a non blank character, add it
@ -126,6 +135,8 @@ pub(in crate::scanner) fn scan_flow_scalar(
// Consume whitespace // Consume whitespace
loop loop
{ {
cache!(~buffer, 1, opts)?;
match (isBlank!(~buffer), isBreak!(~buffer)) match (isBlank!(~buffer), isBreak!(~buffer))
{ {
// No more whitespace, exit loop // No more whitespace, exit loop
@ -206,6 +217,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
}; };
// Eat the right quote // Eat the right quote
cache!(~buffer, 1, opts)?;
advance!(buffer, :stats, 1); advance!(buffer, :stats, 1);
let advance = base.len() - buffer.len(); let advance = base.len() - buffer.len();
@ -296,6 +308,7 @@ mod tests
use pretty_assertions::assert_eq; use pretty_assertions::assert_eq;
use super::*; use super::*;
use crate::scanner::flag::O_ZEROED;
type TestResult = anyhow::Result<()>; type TestResult = anyhow::Result<()>;
@ -308,7 +321,7 @@ mod tests
let stats = &mut MStats::new(); let stats = &mut MStats::new();
let expected = Token::Scalar(cow!(""), ScalarStyle::SingleQuote); let expected = Token::Scalar(cow!(""), ScalarStyle::SingleQuote);
let (range, read) = scan_flow_scalar(data, stats, true)?; let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
let scalar = range.into_token(data)?; let scalar = range.into_token(data)?;
assert_eq!(read, 2); assert_eq!(read, 2);
@ -328,7 +341,7 @@ mod tests
let stats = &mut MStats::new(); let stats = &mut MStats::new();
let expected = Token::Scalar(cow!("hello world"), ScalarStyle::SingleQuote); let expected = Token::Scalar(cow!("hello world"), ScalarStyle::SingleQuote);
let (range, read) = scan_flow_scalar(data, stats, true)?; let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
let scalar = range.into_token(data)?; let scalar = range.into_token(data)?;
assert_eq!(read, 13); assert_eq!(read, 13);
@ -352,7 +365,7 @@ fourth'"#;
let cmp = "first second third fourth"; let cmp = "first second third fourth";
let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote); let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);
let (range, _read) = scan_flow_scalar(data, stats, true)?; let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
let scalar = range.into_token(data)?; let scalar = range.into_token(data)?;
if !(scalar == expected) if !(scalar == expected)
@ -372,7 +385,7 @@ fourth'"#;
let cmp = "first second"; let cmp = "first second";
let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote); let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);
let (range, _read) = scan_flow_scalar(data, stats, true)?; let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
let scalar = range.into_token(data)?; let scalar = range.into_token(data)?;
if !(scalar == expected) if !(scalar == expected)
@ -395,7 +408,7 @@ fourth'"#;
let cmp = "first second third\nfourth"; let cmp = "first second third\nfourth";
let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote); let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);
let (range, _read) = scan_flow_scalar(data, stats, true)?; let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
let scalar = range.into_token(data)?; let scalar = range.into_token(data)?;
if !(scalar == expected) if !(scalar == expected)
@ -417,7 +430,7 @@ fourth'"#;
{ {
stats = MStats::new(); stats = MStats::new();
match scan_flow_scalar(t, &mut stats, true) match scan_flow_scalar(O_ZEROED, t, &mut stats, true)
{ {
Err(e) => assert_eq!( Err(e) => assert_eq!(
e, expected, e, expected,
@ -443,7 +456,7 @@ fourth'"#;
{ {
stats = MStats::new(); stats = MStats::new();
match scan_flow_scalar(t, &mut stats, true) match scan_flow_scalar(O_ZEROED, t, &mut stats, true)
{ {
Err(e) => assert_eq!( Err(e) => assert_eq!(
e, expected, e, expected,
@ -467,7 +480,7 @@ fourth'"#;
let stats = &mut MStats::new(); let stats = &mut MStats::new();
let expected = Token::Scalar(cow!(""), ScalarStyle::DoubleQuote); let expected = Token::Scalar(cow!(""), ScalarStyle::DoubleQuote);
let (range, read) = scan_flow_scalar(data, stats, false)?; let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
let scalar = range.into_token(data)?; let scalar = range.into_token(data)?;
assert_eq!(read, 2); assert_eq!(read, 2);
@ -487,7 +500,7 @@ fourth'"#;
let stats = &mut MStats::new(); let stats = &mut MStats::new();
let expected = Token::Scalar(cow!("hello world"), ScalarStyle::DoubleQuote); let expected = Token::Scalar(cow!("hello world"), ScalarStyle::DoubleQuote);
let (range, read) = scan_flow_scalar(data, stats, false)?; let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
let scalar = range.into_token(data)?; let scalar = range.into_token(data)?;
assert_eq!(read, 13); assert_eq!(read, 13);
@ -507,7 +520,7 @@ fourth'"#;
let stats = &mut MStats::new(); let stats = &mut MStats::new();
let expected = Token::Scalar(cow!("hello α Ω ッ"), ScalarStyle::DoubleQuote); let expected = Token::Scalar(cow!("hello α Ω ッ"), ScalarStyle::DoubleQuote);
let (range, read) = scan_flow_scalar(data, stats, false)?; let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
let scalar = range.into_token(data)?; let scalar = range.into_token(data)?;
if !(scalar == expected) if !(scalar == expected)
@ -537,7 +550,7 @@ fourth""#;
let cmp = "first second third fourth"; let cmp = "first second third fourth";
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote); let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
let (range, _read) = scan_flow_scalar(data, stats, false)?; let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
let scalar = range.into_token(data)?; let scalar = range.into_token(data)?;
if !(scalar == expected) if !(scalar == expected)
@ -560,7 +573,7 @@ fourth""#;
let cmp = "first second third\nfourth"; let cmp = "first second third\nfourth";
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote); let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
let (range, _read) = scan_flow_scalar(data, stats, false)?; let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
let scalar = range.into_token(data)?; let scalar = range.into_token(data)?;
if !(scalar == expected) if !(scalar == expected)
@ -580,7 +593,7 @@ fourth""#;
let cmp = "first second"; let cmp = "first second";
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote); let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
let (range, _read) = scan_flow_scalar(data, stats, false)?; let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
let scalar = range.into_token(data)?; let scalar = range.into_token(data)?;
if !(scalar == expected) if !(scalar == expected)
@ -604,7 +617,7 @@ rst \
let cmp = "first second third\nfourth"; let cmp = "first second third\nfourth";
let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote); let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);
let (range, _read) = scan_flow_scalar(data, stats, false)?; let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
let scalar = range.into_token(data)?; let scalar = range.into_token(data)?;
if !(scalar == expected) if !(scalar == expected)