Feature/scalar/plain #22
|
@ -247,6 +247,18 @@ impl PartialEq<usize> for Indent
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl PartialEq<Indent> for usize
|
||||||
|
{
|
||||||
|
fn eq(&self, other: &Indent) -> bool
|
||||||
|
{
|
||||||
|
match other.0
|
||||||
|
{
|
||||||
|
Some(indent) => *self == indent,
|
||||||
|
None => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl PartialOrd<usize> for Indent
|
impl PartialOrd<usize> for Indent
|
||||||
{
|
{
|
||||||
fn partial_cmp(&self, other: &usize) -> Option<std::cmp::Ordering>
|
fn partial_cmp(&self, other: &usize) -> Option<std::cmp::Ordering>
|
||||||
|
@ -259,6 +271,18 @@ impl PartialOrd<usize> for Indent
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl PartialOrd<Indent> for usize
|
||||||
|
{
|
||||||
|
fn partial_cmp(&self, other: &Indent) -> Option<std::cmp::Ordering>
|
||||||
|
{
|
||||||
|
match other.0
|
||||||
|
{
|
||||||
|
Some(ref indent) => self.partial_cmp(indent),
|
||||||
|
None => Some(std::cmp::Ordering::Greater),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Add<usize> for Indent
|
impl Add<usize> for Indent
|
||||||
{
|
{
|
||||||
type Output = usize;
|
type Output = usize;
|
||||||
|
|
|
@ -39,6 +39,10 @@ pub enum ScanError
|
||||||
/// A flow scalar was invalid for some reason
|
/// A flow scalar was invalid for some reason
|
||||||
InvalidFlowScalar,
|
InvalidFlowScalar,
|
||||||
|
|
||||||
|
/// A plain scalar contained a character sequence that
|
||||||
|
/// is not permitted
|
||||||
|
InvalidPlainScalar,
|
||||||
|
|
||||||
/// A block entry was not expected or allowed
|
/// A block entry was not expected or allowed
|
||||||
InvalidBlockEntry,
|
InvalidBlockEntry,
|
||||||
|
|
||||||
|
|
|
@ -287,6 +287,35 @@ macro_rules! isWhiteSpaceZ {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if a YAML document indicator ('---', '...') exists
|
||||||
|
/// @.offset in the given .buffer.
|
||||||
|
///
|
||||||
|
/// You must provide the current .buffer .column (or .stats
|
||||||
|
/// object)
|
||||||
|
///
|
||||||
|
/// Modifiers:
|
||||||
|
/// ~ .buffer := .buffer.as_bytes()
|
||||||
|
///
|
||||||
|
/// Variants
|
||||||
|
/// /1 .buffer, .column
|
||||||
|
/// /2 .buffer, :.stats
|
||||||
|
macro_rules! isDocumentIndicator {
|
||||||
|
(~ $buffer:expr, :$stats:expr) => {
|
||||||
|
isDocumentIndicator!($buffer.as_bytes(), $stats.column)
|
||||||
|
};
|
||||||
|
($buffer:expr, :$stats:expr) => {
|
||||||
|
isDocumentIndicator!($buffer, $stats.column)
|
||||||
|
};
|
||||||
|
(~ $buffer:expr, $column:expr) => {
|
||||||
|
isDocumentIndicator!($buffer.as_bytes(), $column)
|
||||||
|
};
|
||||||
|
($buffer:expr, $column:expr) => {
|
||||||
|
$column == 0
|
||||||
|
&& check!($buffer => [b'-', b'-', b'-', ..] | [b'.', b'.', b'.', ..])
|
||||||
|
&& isWhiteSpaceZ!($buffer, 3)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/// Checks if byte (@ .offset) in .buffer is hexadecimal
|
/// Checks if byte (@ .offset) in .buffer is hexadecimal
|
||||||
///
|
///
|
||||||
/// Modifiers:
|
/// Modifiers:
|
||||||
|
|
|
@ -19,6 +19,7 @@ use self::{
|
||||||
entry::TokenEntry,
|
entry::TokenEntry,
|
||||||
error::{ScanError, ScanResult as Result},
|
error::{ScanError, ScanResult as Result},
|
||||||
key::{Key, KeyPossible},
|
key::{Key, KeyPossible},
|
||||||
|
scalar::plain::scan_plain_scalar,
|
||||||
};
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
queue::Queue,
|
queue::Queue,
|
||||||
|
@ -167,7 +168,7 @@ impl Scanner
|
||||||
[SINGLE, ..] | [DOUBLE, ..] => self.flow_scalar(base, tokens),
|
[SINGLE, ..] | [DOUBLE, ..] => self.flow_scalar(base, tokens),
|
||||||
|
|
||||||
// Is it a plain scalar?
|
// Is it a plain scalar?
|
||||||
// TODO
|
_ if self.is_plain_scalar(*base) => self.plain_scalar(base, tokens),
|
||||||
|
|
||||||
// Otherwise its an error
|
// Otherwise its an error
|
||||||
// TODO
|
// TODO
|
||||||
|
@ -469,6 +470,28 @@ impl Scanner
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn plain_scalar<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>) -> Result<()>
|
||||||
|
{
|
||||||
|
let buffer = *base;
|
||||||
|
let mut stats = self.stats.clone();
|
||||||
|
|
||||||
|
self.save_key(!REQUIRED)?;
|
||||||
|
|
||||||
|
let (token, amt) = scan_plain_scalar(buffer, &mut stats, &self.context)?;
|
||||||
|
|
||||||
|
// A simple key cannot follow a plain scalar, there must be
|
||||||
|
// an indicator or new line before a key is valid
|
||||||
|
// again.
|
||||||
|
self.simple_key_allowed = false;
|
||||||
|
|
||||||
|
advance!(*base, amt);
|
||||||
|
self.stats = stats;
|
||||||
|
|
||||||
|
enqueue!(token, :self.stats => tokens);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn explicit_key<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>) -> Result<()>
|
fn explicit_key<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>) -> Result<()>
|
||||||
{
|
{
|
||||||
let block_context = self.context.is_block();
|
let block_context = self.context.is_block();
|
||||||
|
@ -835,6 +858,51 @@ impl Scanner
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Checks if .base starts with a character that could
|
||||||
|
/// be a plain scalar
|
||||||
|
fn is_plain_scalar(&self, base: &str) -> bool
|
||||||
|
{
|
||||||
|
if isWhiteSpaceZ!(~base)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Per the YAML spec, a plain scalar cannot start with
|
||||||
|
* any YAML indicators, excluding ':' '?' '-' in
|
||||||
|
* certain circumstances.
|
||||||
|
*
|
||||||
|
* See:
|
||||||
|
* YAML 1.2: Section 7.3.3
|
||||||
|
* yaml.org/spec/1.2/spec.html#ns-plain-first(c)
|
||||||
|
*/
|
||||||
|
match base.as_bytes()
|
||||||
|
{
|
||||||
|
[DIRECTIVE, ..]
|
||||||
|
| [ANCHOR, ..]
|
||||||
|
| [ALIAS, ..]
|
||||||
|
| [TAG, ..]
|
||||||
|
| [SINGLE, ..]
|
||||||
|
| [DOUBLE, ..]
|
||||||
|
| [FLOW_MAPPING_START, ..]
|
||||||
|
| [FLOW_SEQUENCE_START, ..]
|
||||||
|
| [FLOW_MAPPING_END, ..]
|
||||||
|
| [FLOW_SEQUENCE_END]
|
||||||
|
| [FLOW_ENTRY, ..]
|
||||||
|
| [b'|', ..]
|
||||||
|
| [b'<', ..]
|
||||||
|
| [b'#', ..]
|
||||||
|
| [b'@', ..]
|
||||||
|
| [b'`', ..] => false,
|
||||||
|
[VALUE, ..] | [EXPLICIT_KEY, ..] | [BLOCK_ENTRY, ..]
|
||||||
|
if !is_plain_safe_c(base, 1, self.context.is_block()) =>
|
||||||
|
{
|
||||||
|
false
|
||||||
|
},
|
||||||
|
_ => true,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ScanIter<'de>
|
struct ScanIter<'de>
|
||||||
|
@ -1108,6 +1176,18 @@ where
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Checks if the character at .offset is "safe" to start a
|
||||||
|
/// plain scalar with, as defined in
|
||||||
|
///
|
||||||
|
/// yaml.org/spec/1.2/spec.html#ns-plain-safe(c)
|
||||||
|
fn is_plain_safe_c(base: &str, offset: usize, block_context: bool) -> bool
|
||||||
|
{
|
||||||
|
let flow_context = !block_context;
|
||||||
|
let not_flow_indicator = !check!(~base, offset => b',' | b'[' | b']' | b'{' | b'}');
|
||||||
|
|
||||||
|
block_context || (flow_context && not_flow_indicator)
|
||||||
|
}
|
||||||
|
|
||||||
/// Vessel for tracking various stats about the underlying
|
/// Vessel for tracking various stats about the underlying
|
||||||
/// buffer that are required for correct parsing of certain
|
/// buffer that are required for correct parsing of certain
|
||||||
/// elements, and when contextualizing an error.
|
/// elements, and when contextualizing an error.
|
||||||
|
@ -1346,6 +1426,56 @@ mod tests
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn flow_collection_sequence_plain()
|
||||||
|
{
|
||||||
|
use ScalarStyle::Plain;
|
||||||
|
let data = "[a key: a value,another key: another value]";
|
||||||
|
let mut s = ScanIter::new(data);
|
||||||
|
|
||||||
|
tokens!(s =>
|
||||||
|
| Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream",
|
||||||
|
| Token::FlowSequenceStart => "expected a flow sequence start '['",
|
||||||
|
| Token::Key => "expected a key",
|
||||||
|
| Token::Scalar(cow!("a key"), Plain) => "expected a scalar key: 'a key'",
|
||||||
|
| Token::Value => "expected a value",
|
||||||
|
| Token::Scalar(cow!("a value"), Plain) => "expected a scalar value: 'a value'",
|
||||||
|
| Token::FlowEntry => "expected a flow entry: ','",
|
||||||
|
| Token::Key => "expected a key",
|
||||||
|
| Token::Scalar(cow!("another key"), Plain) => "expected a scalar key: 'another key'",
|
||||||
|
| Token::Value => "expected a value",
|
||||||
|
| Token::Scalar(cow!("another value"), Plain) => "expected a scalar value: 'another value'",
|
||||||
|
| Token::FlowSequenceEnd => "expected a flow sequence end ']'",
|
||||||
|
| Token::StreamEnd => "expected end of stream",
|
||||||
|
@ None => "expected stream to be finished"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn flow_collection_sequence_plain_abnormal()
|
||||||
|
{
|
||||||
|
use ScalarStyle::Plain;
|
||||||
|
let data = "[-: -123,not# a comment : (-%!&*@`|>+-)]";
|
||||||
|
let mut s = ScanIter::new(data);
|
||||||
|
|
||||||
|
tokens!(s =>
|
||||||
|
| Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream",
|
||||||
|
| Token::FlowSequenceStart => "expected a flow sequence start '['",
|
||||||
|
| Token::Key => "expected a key",
|
||||||
|
| Token::Scalar(cow!("-"), Plain) => "expected a scalar key: '-'",
|
||||||
|
| Token::Value => "expected a value",
|
||||||
|
| Token::Scalar(cow!("-123"), Plain) => "expected a scalar value: '-123'",
|
||||||
|
| Token::FlowEntry => "expected a flow entry: ','",
|
||||||
|
| Token::Key => "expected a key",
|
||||||
|
| Token::Scalar(cow!("not# a comment"), Plain) => "expected a scalar key: 'not# a comment'",
|
||||||
|
| Token::Value => "expected a value",
|
||||||
|
| Token::Scalar(cow!("(-%!&*@`|>+-)"), Plain) => "expected a scalar value: '(-%!&*@`|>+-)'",
|
||||||
|
| Token::FlowSequenceEnd => "expected a flow sequence end ']'",
|
||||||
|
| Token::StreamEnd => "expected end of stream",
|
||||||
|
@ None => "expected stream to be finished"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn flow_collection_nested()
|
fn flow_collection_nested()
|
||||||
{
|
{
|
||||||
|
@ -1567,6 +1697,8 @@ mod tests
|
||||||
let data = "
|
let data = "
|
||||||
'one':
|
'one':
|
||||||
- 'two'
|
- 'two'
|
||||||
|
|
||||||
|
|
||||||
- 'three'
|
- 'three'
|
||||||
";
|
";
|
||||||
let mut s = ScanIter::new(data);
|
let mut s = ScanIter::new(data);
|
||||||
|
@ -1987,6 +2119,51 @@ mod tests
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn plain_scalar_simple()
|
||||||
|
{
|
||||||
|
use ScalarStyle::Plain;
|
||||||
|
|
||||||
|
let data = "hello from a plain scalar!";
|
||||||
|
let mut s = ScanIter::new(data);
|
||||||
|
|
||||||
|
tokens!(s =>
|
||||||
|
| Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream",
|
||||||
|
| Token::Scalar(cow!("hello from a plain scalar!"), Plain) => "expected a flow scalar (single)",
|
||||||
|
| Token::StreamEnd => "expected end of stream",
|
||||||
|
@ None => "expected stream to be finished"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(s.scan.stats, stats_of(data));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn plain_scalar_starting_indicator()
|
||||||
|
{
|
||||||
|
use ScalarStyle::Plain;
|
||||||
|
|
||||||
|
let data = "-a key-: ?value\n:: :value";
|
||||||
|
let mut s = ScanIter::new(data);
|
||||||
|
|
||||||
|
tokens!(s =>
|
||||||
|
| Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream",
|
||||||
|
| Token::BlockMappingStart => "expected the start of a block mapping",
|
||||||
|
| Token::Key => "expected an explicit key",
|
||||||
|
| Token::Scalar(cow!("-a key-"), Plain) => "expected a plain scalar",
|
||||||
|
| Token::Value => "expected a value",
|
||||||
|
| Token::Scalar(cow!("?value"), Plain) => "expected a plain scalar",
|
||||||
|
| Token::Key => "expected an explicit key",
|
||||||
|
| Token::Scalar(cow!(":"), Plain) => "expected a plain scalar",
|
||||||
|
| Token::Value => "expected a value",
|
||||||
|
| Token::Scalar(cow!(":value"), Plain) => "expected a plain scalar",
|
||||||
|
| Token::BlockEnd => "expected the end of a block mapping",
|
||||||
|
| Token::StreamEnd => "expected end of stream",
|
||||||
|
@ None => "expected stream to be finished"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(s.scan.stats, stats_of(data));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn flow_scalar_single_simple()
|
fn flow_scalar_single_simple()
|
||||||
{
|
{
|
||||||
|
@ -2182,6 +2359,94 @@ mod tests
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn complex_plain()
|
||||||
|
{
|
||||||
|
use ScalarStyle::Plain;
|
||||||
|
|
||||||
|
let data = r##"
|
||||||
|
|
||||||
|
---
|
||||||
|
- [
|
||||||
|
key: value,
|
||||||
|
indented: value,
|
||||||
|
{an object: inside a sequence},
|
||||||
|
[sequence inception!]
|
||||||
|
]
|
||||||
|
- lets do it: &val as block,
|
||||||
|
can we :
|
||||||
|
build it:
|
||||||
|
higher?: *val
|
||||||
|
yes: we
|
||||||
|
can: baby
|
||||||
|
|
||||||
|
"##;
|
||||||
|
|
||||||
|
let mut s = ScanIter::new(data);
|
||||||
|
|
||||||
|
tokens!(s =>
|
||||||
|
| Token::StreamStart(StreamEncoding::UTF8),
|
||||||
|
| Token::DocumentStart,
|
||||||
|
| Token::BlockSequenceStart,
|
||||||
|
| Token::BlockEntry,
|
||||||
|
| Token::FlowSequenceStart,
|
||||||
|
| Token::Key,
|
||||||
|
| Token::Scalar(cow!("key"), Plain),
|
||||||
|
| Token::Value,
|
||||||
|
| Token::Scalar(cow!("value"), Plain),
|
||||||
|
| Token::FlowEntry,
|
||||||
|
| Token::Key,
|
||||||
|
| Token::Scalar(cow!("indented"), Plain),
|
||||||
|
| Token::Value,
|
||||||
|
| Token::Scalar(cow!("value"), Plain),
|
||||||
|
| Token::FlowEntry,
|
||||||
|
| Token::FlowMappingStart,
|
||||||
|
| Token::Key,
|
||||||
|
| Token::Scalar(cow!("an object"), Plain),
|
||||||
|
| Token::Value,
|
||||||
|
| Token::Scalar(cow!("inside a sequence"), Plain),
|
||||||
|
| Token::FlowMappingEnd,
|
||||||
|
| Token::FlowEntry,
|
||||||
|
| Token::FlowSequenceStart,
|
||||||
|
| Token::Scalar(cow!("sequence inception!"), Plain),
|
||||||
|
| Token::FlowSequenceEnd,
|
||||||
|
| Token::FlowSequenceEnd,
|
||||||
|
| Token::BlockEntry,
|
||||||
|
| Token::BlockMappingStart,
|
||||||
|
| Token::Key,
|
||||||
|
| Token::Scalar(cow!("lets do it"), Plain),
|
||||||
|
| Token::Value,
|
||||||
|
| Token::Anchor(cow!("val")),
|
||||||
|
| Token::Scalar(cow!("as block,"), Plain),
|
||||||
|
| Token::Key,
|
||||||
|
| Token::Scalar(cow!("can we"), Plain),
|
||||||
|
| Token::Value,
|
||||||
|
| Token::BlockMappingStart,
|
||||||
|
| Token::Key,
|
||||||
|
| Token::Scalar(cow!("build it"), Plain),
|
||||||
|
| Token::Value,
|
||||||
|
| Token::BlockMappingStart,
|
||||||
|
| Token::Key,
|
||||||
|
| Token::Scalar(cow!("higher?"), Plain),
|
||||||
|
| Token::Value,
|
||||||
|
| Token::Alias(cow!("val")),
|
||||||
|
| Token::BlockEnd,
|
||||||
|
| Token::BlockEnd,
|
||||||
|
| Token::Key,
|
||||||
|
| Token::Scalar(cow!("yes"), Plain),
|
||||||
|
| Token::Value,
|
||||||
|
| Token::Scalar(cow!("we"), Plain),
|
||||||
|
| Token::Key,
|
||||||
|
| Token::Scalar(cow!("can"), Plain),
|
||||||
|
| Token::Value,
|
||||||
|
| Token::Scalar(cow!("baby"), Plain),
|
||||||
|
| Token::BlockEnd,
|
||||||
|
| Token::BlockEnd,
|
||||||
|
| Token::StreamEnd,
|
||||||
|
@ None
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn stale_required_key_oversized()
|
fn stale_required_key_oversized()
|
||||||
{
|
{
|
||||||
|
|
|
@ -44,9 +44,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
|
||||||
// Even in a scalar context, YAML prohibits starting a line
|
// Even in a scalar context, YAML prohibits starting a line
|
||||||
// with document stream tokens followed by a blank
|
// with document stream tokens followed by a blank
|
||||||
// character
|
// character
|
||||||
if stats.column == 0
|
if isDocumentIndicator!(~buffer, :stats)
|
||||||
&& check!(~buffer => [b'-', b'-', b'-', ..] | [b'.', b'.', b'.', ..])
|
|
||||||
&& isWhiteSpaceZ!(~buffer, 3)
|
|
||||||
{
|
{
|
||||||
return Err(ScanError::InvalidFlowScalar);
|
return Err(ScanError::InvalidFlowScalar);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,2 +1,3 @@
|
||||||
pub mod escape;
|
pub mod escape;
|
||||||
pub mod flow;
|
pub mod flow;
|
||||||
|
pub mod plain;
|
||||||
|
|
|
@ -0,0 +1,580 @@
|
||||||
|
use crate::{
|
||||||
|
scanner::{
|
||||||
|
context::Context,
|
||||||
|
error::{ScanError, ScanResult as Result},
|
||||||
|
MStats,
|
||||||
|
},
|
||||||
|
token::{ScalarStyle, Token},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Scans a plain scalar, returning a Token, and the amount
|
||||||
|
/// read from .base. This function will attempt to borrow
|
||||||
|
/// from .base, however it may be required to copy into a
|
||||||
|
/// new allocation if line joining is required in the
|
||||||
|
/// scalar.
|
||||||
|
///
|
||||||
|
/// See:
|
||||||
|
/// YAML 1.2: Section 7.3.3
|
||||||
|
/// yaml.org/spec/1.2/spec.html#ns-plain-first(c)
|
||||||
|
pub(in crate::scanner) fn scan_plain_scalar<'de>(
|
||||||
|
base: &'de str,
|
||||||
|
stats: &mut MStats,
|
||||||
|
cxt: &Context,
|
||||||
|
) -> Result<(Token<'de>, usize)>
|
||||||
|
{
|
||||||
|
let mut buffer = base;
|
||||||
|
let mut scratch = Vec::new();
|
||||||
|
let indent = cxt.indent() + 1;
|
||||||
|
|
||||||
|
// Local copies of the given stats
|
||||||
|
let mut local_stats = stats.clone();
|
||||||
|
let mut scalar_stats = stats.clone();
|
||||||
|
|
||||||
|
// Do we need to normalize and therefore allocate?
|
||||||
|
let mut can_borrow = true;
|
||||||
|
// Have we hit a lower indentation to our starting indent?
|
||||||
|
let mut outdent = false;
|
||||||
|
|
||||||
|
// Track whitespace, line breaks accumulated, these have two
|
||||||
|
// uses:
|
||||||
|
//
|
||||||
|
// 1. In loop, for handling line joins
|
||||||
|
// 2. Post loop for truncating trailing space
|
||||||
|
let mut whitespace: usize = 0;
|
||||||
|
let mut lines: usize = 0;
|
||||||
|
|
||||||
|
// Are we in block/flow context?
|
||||||
|
let block_context = cxt.is_block();
|
||||||
|
let flow_context = !block_context;
|
||||||
|
|
||||||
|
// Have we hit a flow context scalar end indicator?
|
||||||
|
let flow_indicator =
|
||||||
|
|buffer: &str, at: usize| check!(~buffer, at => b',' | b'[' | b']' | b'{' | b'}');
|
||||||
|
|
||||||
|
// Inside flow contexts you *may not* start a plain scalar
|
||||||
|
// with a ':', '?', or '-' followed by a flow indicator
|
||||||
|
if flow_context && check!(~buffer => b':' | b'?' | b'-') && flow_indicator(buffer, 1)
|
||||||
|
{
|
||||||
|
return Err(ScanError::InvalidPlainScalar);
|
||||||
|
}
|
||||||
|
|
||||||
|
'scalar: loop
|
||||||
|
{
|
||||||
|
if buffer.is_empty()
|
||||||
|
{
|
||||||
|
break 'scalar;
|
||||||
|
}
|
||||||
|
|
||||||
|
if outdent
|
||||||
|
{
|
||||||
|
break 'scalar;
|
||||||
|
}
|
||||||
|
|
||||||
|
// A YAML document indicator or ' #' terminates a plain
|
||||||
|
// scalar
|
||||||
|
//
|
||||||
|
// Note that due to how this function is setup, the _only_
|
||||||
|
// times we will hit this guard is if:
|
||||||
|
//
|
||||||
|
// 1. We've just started the function, and thus we were
|
||||||
|
// called on a non whitespace character
|
||||||
|
//
|
||||||
|
// 2. We've gone through the loop, exhausting any
|
||||||
|
// whitespace, thus hitting this guard again
|
||||||
|
//
|
||||||
|
// Therefore just checking for '#' is okay
|
||||||
|
if isDocumentIndicator!(~buffer, :local_stats) || check!(~buffer => b'#')
|
||||||
|
{
|
||||||
|
break 'scalar;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for character sequences which end a plain scalar,
|
||||||
|
// namely:
|
||||||
|
//
|
||||||
|
// ': ' -> anywhere
|
||||||
|
// ',' | '[' | ']' | '{' | '}' -> flow context
|
||||||
|
if (check!(~buffer => b':') && isWhiteSpaceZ!(~buffer, 1))
|
||||||
|
|| flow_context && flow_indicator(buffer, 0)
|
||||||
|
{
|
||||||
|
break 'scalar;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset whitespace counters for next char / whitespace
|
||||||
|
// sequence. We do this here after all possible terminations
|
||||||
|
// that could leave trailing whitespace, so we can
|
||||||
|
// accurately truncate the trailing whitespace post
|
||||||
|
// loop.
|
||||||
|
whitespace = 0;
|
||||||
|
lines = 0;
|
||||||
|
|
||||||
|
// Handle non whitespace characters
|
||||||
|
while !isWhiteSpaceZ!(~buffer)
|
||||||
|
{
|
||||||
|
if (check!(~buffer => b':') && isWhiteSpaceZ!(~buffer, 1))
|
||||||
|
|| flow_context && flow_indicator(buffer, 0)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !can_borrow
|
||||||
|
{
|
||||||
|
scratch.push(buffer.as_bytes()[0])
|
||||||
|
}
|
||||||
|
advance!(buffer, :local_stats, 1);
|
||||||
|
}
|
||||||
|
// Save last non whitespace character position
|
||||||
|
scalar_stats = local_stats.clone();
|
||||||
|
|
||||||
|
// Handle whitespace characters
|
||||||
|
loop
|
||||||
|
{
|
||||||
|
match (isBlank!(~buffer), isBreak!(~buffer))
|
||||||
|
{
|
||||||
|
// No more whitespace, exit loop
|
||||||
|
(false, false) => break,
|
||||||
|
// Handle non break space
|
||||||
|
(true, _) =>
|
||||||
|
{
|
||||||
|
if !can_borrow
|
||||||
|
{
|
||||||
|
scratch.push(buffer.as_bytes()[0])
|
||||||
|
}
|
||||||
|
whitespace += 1;
|
||||||
|
advance!(buffer, :local_stats, 1);
|
||||||
|
},
|
||||||
|
// Handle line breaks
|
||||||
|
(false, _) =>
|
||||||
|
{
|
||||||
|
set_no_borrow(&mut can_borrow, base, buffer, &mut scratch);
|
||||||
|
|
||||||
|
lines += 1;
|
||||||
|
advance!(buffer, :local_stats, @line);
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the whitespace ended at a lower indent, then we're
|
||||||
|
// done, and should exit on the next loop
|
||||||
|
outdent = block_context && local_stats.column < indent;
|
||||||
|
|
||||||
|
// Handle line joins as needed
|
||||||
|
match lines
|
||||||
|
{
|
||||||
|
// No join needed, we're done
|
||||||
|
0 =>
|
||||||
|
{},
|
||||||
|
// If a single line was recorded, we _cannot_ have seen a line wholly made of
|
||||||
|
// whitespace, therefore join via a space
|
||||||
|
1 =>
|
||||||
|
{
|
||||||
|
// Note that we reset whitespace to zero here, so that the
|
||||||
|
// post loop truncate doesn't remove characters we've
|
||||||
|
// already removed here
|
||||||
|
scratch.truncate(scratch.len() - whitespace);
|
||||||
|
whitespace = 0;
|
||||||
|
|
||||||
|
scratch.push(SPACE);
|
||||||
|
},
|
||||||
|
// Else we need to append (n - 1) newlines, as we skip the origin line's break
|
||||||
|
_ =>
|
||||||
|
{
|
||||||
|
// Similarly, we reset whitespace here, but we _also_ set
|
||||||
|
// lines to the amount of lines we actually add to the
|
||||||
|
// scratch space.
|
||||||
|
scratch.truncate(scratch.len() - whitespace);
|
||||||
|
whitespace = 0;
|
||||||
|
lines -= 1;
|
||||||
|
|
||||||
|
// Safety: we can only reach this branch if lines > 1
|
||||||
|
for _ in 0..lines
|
||||||
|
{
|
||||||
|
scratch.push(NEWLINE)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trim any trailing whitespace that might be left after
|
||||||
|
// exiting the loop
|
||||||
|
if !can_borrow
|
||||||
|
{
|
||||||
|
scratch.truncate(scratch.len() - (whitespace + lines));
|
||||||
|
}
|
||||||
|
// Note we use the stats which point at the last word read
|
||||||
|
let advance = scalar_stats.read - stats.read;
|
||||||
|
|
||||||
|
let slice = match can_borrow
|
||||||
|
{
|
||||||
|
true => cow!(&base[..advance]),
|
||||||
|
false =>
|
||||||
|
{
|
||||||
|
let utf8 = String::from_utf8(scratch).unwrap();
|
||||||
|
|
||||||
|
cow!(utf8)
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let token = Token::Scalar(slice, ScalarStyle::Plain);
|
||||||
|
*stats = scalar_stats;
|
||||||
|
|
||||||
|
Ok((token, advance))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles the trap door from borrowing to copying
|
||||||
|
fn set_no_borrow(can_borrow: &mut bool, base: &str, buffer: &str, scratch: &mut Vec<u8>)
|
||||||
|
{
|
||||||
|
if *can_borrow
|
||||||
|
{
|
||||||
|
scratch.extend_from_slice(base[0..base.len() - buffer.len()].as_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
*can_borrow = false
|
||||||
|
}
|
||||||
|
|
||||||
|
const SPACE: u8 = b' ';
|
||||||
|
const NEWLINE: u8 = b'\n';
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests
|
||||||
|
{
|
||||||
|
use anyhow::anyhow;
|
||||||
|
use pretty_assertions::assert_eq;
|
||||||
|
use ScalarStyle::Plain;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
type TestResult = anyhow::Result<()>;
|
||||||
|
|
||||||
|
macro_rules! cxt {
|
||||||
|
(flow -> $level:expr) => {
|
||||||
|
{
|
||||||
|
let mut c = Context::new();
|
||||||
|
|
||||||
|
for _ in 0..$level {
|
||||||
|
c.flow_increment().unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
c
|
||||||
|
}
|
||||||
|
};
|
||||||
|
(block -> [ $($indent:expr),+ ]) => {
|
||||||
|
{
|
||||||
|
let mut c = Context::new();
|
||||||
|
$( cxt!(@blk &mut c, $indent) )+;
|
||||||
|
|
||||||
|
c
|
||||||
|
}
|
||||||
|
};
|
||||||
|
(@blk $cxt:expr, $indent:expr) => {
|
||||||
|
$cxt.indent_increment($indent, 0, true).unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn end_on_doc() -> TestResult
|
||||||
|
{
|
||||||
|
let tests = ["hello\n---\n", "hello\n... "];
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(block -> [0]);
|
||||||
|
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||||
|
|
||||||
|
for (i, &data) in tests.iter().enumerate()
|
||||||
|
{
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)
|
||||||
|
.map_err(|e| anyhow!("iteration {}: {}", i, e))?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected, "on iteration {}", i);
|
||||||
|
|
||||||
|
assert_eq!(amt, 5, "on iteration {}", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn end_on_comment() -> TestResult
|
||||||
|
{
|
||||||
|
let tests = ["hello #", "hello\n#"];
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(block -> [0]);
|
||||||
|
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||||
|
|
||||||
|
for (i, &data) in tests.iter().enumerate()
|
||||||
|
{
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)
|
||||||
|
.map_err(|e| anyhow!("iteration {}: {}", i, e))?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected, "on iteration {}", i);
|
||||||
|
|
||||||
|
assert_eq!(amt, 5, "on iteration {}", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty() -> TestResult
|
||||||
|
{
|
||||||
|
let data = "# a comment";
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(block -> [0]);
|
||||||
|
let expected = Token::Scalar(cow!(""), Plain);
|
||||||
|
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected);
|
||||||
|
|
||||||
|
assert_eq!(amt, 0);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/* === BLOCK CONTEXT === */
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn block_simple() -> TestResult
|
||||||
|
{
|
||||||
|
let data = "hello";
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(block -> [0]);
|
||||||
|
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||||
|
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected);
|
||||||
|
|
||||||
|
assert_eq!(amt, data.len());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn block_simple_key() -> TestResult
|
||||||
|
{
|
||||||
|
let data = "hello, world!: ";
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(block -> [0]);
|
||||||
|
let expected = Token::Scalar(cow!("hello, world!"), Plain);
|
||||||
|
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected);
|
||||||
|
|
||||||
|
assert_eq!(amt, 13);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn block_multi_line() -> TestResult
|
||||||
|
{
|
||||||
|
let data = "hello
|
||||||
|
this
|
||||||
|
is
|
||||||
|
a
|
||||||
|
multi-line
|
||||||
|
scalar
|
||||||
|
";
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(block -> [0]);
|
||||||
|
let expected = Token::Scalar(cow!("hello this is a multi-line scalar"), Plain);
|
||||||
|
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected);
|
||||||
|
|
||||||
|
assert_eq!(amt, data.trim_end().len());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn block_multi_line_breaks() -> TestResult
|
||||||
|
{
|
||||||
|
let data = "this
|
||||||
|
is
|
||||||
|
|
||||||
|
|
||||||
|
a
|
||||||
|
scalar
|
||||||
|
|
||||||
|
with
|
||||||
|
line#breaks
|
||||||
|
|
||||||
|
";
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(block -> [0]);
|
||||||
|
let expected = Token::Scalar(cow!("this is\n\na scalar\nwith line#breaks"), Plain);
|
||||||
|
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected);
|
||||||
|
|
||||||
|
assert_eq!(amt, data.trim_end().len());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn block_trailing_whitespace() -> TestResult
|
||||||
|
{
|
||||||
|
let data = "hello ";
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(block -> [0]);
|
||||||
|
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||||
|
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected);
|
||||||
|
|
||||||
|
assert_eq!(amt, 5);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/* === FLOW CONTEXT === */
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn flow_simple() -> TestResult
|
||||||
|
{
|
||||||
|
let data = "hello";
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(flow -> 1);
|
||||||
|
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||||
|
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected);
|
||||||
|
|
||||||
|
assert_eq!(amt, data.len());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn flow_end_on_indicator() -> TestResult
|
||||||
|
{
|
||||||
|
let tests = ["hello: ", "hello,", "hello[", "hello]", "hello{", "hello}"];
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(flow -> 1);
|
||||||
|
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||||
|
|
||||||
|
for (i, &data) in tests.iter().enumerate()
|
||||||
|
{
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)
|
||||||
|
.map_err(|e| anyhow!("iteration {}: {}", i, e))?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected, "on iteration {}", i);
|
||||||
|
|
||||||
|
assert_eq!(amt, 5, "on iteration {}", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn flow_multi_line() -> TestResult
|
||||||
|
{
|
||||||
|
let data = "hello
|
||||||
|
this
|
||||||
|
is
|
||||||
|
a
|
||||||
|
multi-line
|
||||||
|
string!";
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(flow -> 1);
|
||||||
|
let expected = Token::Scalar(cow!("hello this is a multi-line string!"), Plain);
|
||||||
|
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected);
|
||||||
|
|
||||||
|
assert_eq!(amt, data.len());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn flow_multi_line_breaks() -> TestResult
|
||||||
|
{
|
||||||
|
let data = "hello
|
||||||
|
this
|
||||||
|
|
||||||
|
big
|
||||||
|
|
||||||
|
string
|
||||||
|
|
||||||
|
has
|
||||||
|
|
||||||
|
line
|
||||||
|
|
||||||
|
breaks
|
||||||
|
";
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(flow -> 1);
|
||||||
|
let expected = Token::Scalar(cow!("hello this\nbig\nstring\nhas\nline\nbreaks"), Plain);
|
||||||
|
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected);
|
||||||
|
|
||||||
|
assert_eq!(amt, 66);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn flow_trailing_whitespace_key() -> TestResult
|
||||||
|
{
|
||||||
|
let data = "hello : ";
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(flow -> 1);
|
||||||
|
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||||
|
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected);
|
||||||
|
|
||||||
|
assert_eq!(amt, 5);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn flow_trailing_whitespace() -> TestResult
|
||||||
|
{
|
||||||
|
let data = "hello ";
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(flow -> 1);
|
||||||
|
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||||
|
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected);
|
||||||
|
|
||||||
|
assert_eq!(amt, 5);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn flow_trailing_breaks() -> TestResult
|
||||||
|
{
|
||||||
|
let data = "hello
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
";
|
||||||
|
let mut stats = MStats::new();
|
||||||
|
let cxt = cxt!(flow -> 1);
|
||||||
|
let expected = Token::Scalar(cow!("hello"), Plain);
|
||||||
|
|
||||||
|
let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
|
||||||
|
|
||||||
|
assert_eq!(token, expected);
|
||||||
|
|
||||||
|
assert_eq!(amt, 5);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue