diff --git a/src/scanner/context.rs b/src/scanner/context.rs index 2443282..1ce5b4b 100644 --- a/src/scanner/context.rs +++ b/src/scanner/context.rs @@ -1,6 +1,9 @@ use std::ops::Add; -use crate::scanner::error::{ScanError, ScanResult as Result}; +use crate::{ + scanner::error::{ScanError, ScanResult as Result}, + token::Marker, +}; pub(in crate::scanner) const STARTING_INDENT: Indent = Indent(None); @@ -20,7 +23,7 @@ pub(in crate::scanner) struct Context // Block context fields indent: usize, - indents: Vec, + indents: Vec, started: bool, } @@ -87,6 +90,16 @@ impl Context } } + pub fn indents(&self) -> &[IndentEntry] + { + &self.indents + } + + pub fn indents_mut(&mut self) -> &mut Vec + { + &mut self.indents + } + /// Check if we are currently in the block context pub fn is_block(&self) -> bool { @@ -97,15 +110,12 @@ impl Context /// flow context, returning the current level. Note /// that this function will only increment the indent if /// .column > current_indent and .is_block returns true - pub fn indent_increment(&mut self, column: usize) -> Result + pub fn indent_increment(&mut self, column: usize, line: usize, map: bool) -> Result { - if self.is_block() && self.indent() < column - { - self.started = true; - self.indents.push(self.indent); + self.started = true; + self.indents.push(IndentEntry::new(self.indent, line, map)); - self.indent = column; - } + self.indent = column; Ok(self.indent.into()) } @@ -125,21 +135,79 @@ impl Context { while Indent(Some(self.indent)) > column { - match self.indents.pop() + if !self.pop_indent(&mut f)? { - Some(indent) => - { - self.indent = indent; - - f(self.indent)?; - }, - None => break, + break; } } } Ok(old - self.indents.len()) } + + pub fn pop_indent(&mut self, mut f: F) -> Result + where + F: FnMut(usize) -> Result<()>, + { + match self.indents.pop() + { + Some(entry) => + { + self.indent = entry.indent; + + f(self.indent)?; + + Ok(true) + }, + None => Ok(false), + } + } +} + +/// Stack entry for tracking indentation levels, and +/// associated metadata +#[derive(Debug, Clone, Copy)] +pub(in crate::scanner) struct IndentEntry +{ + indent: usize, + + /// The type of indentation started, either + /// BlockSequenceStart or BlockMappingStart + pub kind: Marker, + + /// Line the indentation was set on. Note that this may + /// be different from the original line _if_ + /// .zero_indented is true, as it is used for record + /// keeping + pub line: usize, + + /// Flag for checking if the associated indent for a + /// zero indented sequence + pub zero_indented: bool, +} + +impl IndentEntry +{ + pub fn new(indent: usize, line: usize, map: bool) -> Self + { + let kind = match map + { + true => Marker::BlockMappingStart, + false => Marker::BlockSequenceStart, + }; + + Self { + indent, + kind, + line, + zero_indented: false, + } + } + + pub fn indent(&self) -> Indent + { + self.indent.into() + } } /// A wrapper around usize, that allows it us to express the diff --git a/src/scanner/error.rs b/src/scanner/error.rs index f802484..29f15c8 100644 --- a/src/scanner/error.rs +++ b/src/scanner/error.rs @@ -42,6 +42,9 @@ pub enum ScanError /// A block entry was not expected or allowed InvalidBlockEntry, + /// A mapping key was not expected or allowed + InvalidKey, + /// A mapping value was not expected or allowed InvalidValue, diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index f985fc2..ece70f8 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -27,7 +27,7 @@ use crate::{ scalar::flow::scan_flow_scalar, tag::{scan_node_tag, scan_tag_directive}, }, - token::{StreamEncoding, Token}, + token::{Marker, StreamEncoding, Token}, }; type Tokens<'de> = Queue>; @@ -101,6 +101,7 @@ impl Scanner self.expire_stale_saved_key()?; + self.pop_zero_indent_sequence(*base, tokens)?; self.unroll_indent(tokens, self.stats.column)?; if base.is_empty() || self.state == StreamState::Done @@ -142,7 +143,10 @@ impl Scanner }, // Is it an explicit key? - // TODO + [EXPLICIT_KEY, ..] if self.context.is_flow() || isWhiteSpaceZ!(~base, 1) => + { + self.explicit_key(base, tokens) + }, // Is it a value? [VALUE, ..] if isWhiteSpaceZ!(~base, 1) || self.context.is_flow() => @@ -465,6 +469,69 @@ impl Scanner Ok(()) } + fn explicit_key<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>) -> Result<()> + { + let block_context = self.context.is_block(); + /* + * If in the block context we may need to add indentation + * tokens to the stream, and we need an additional + * check that keys are currently legal. + * + * This can occur, for example if you have the following + * YAML: + * + * !!str ? 'whoops, tag is': 'in the wrong place' + * ^^^^^^^ + * Invalid token sequence + * + * As node decorators (tags, anchors, aliases) must be + * directly preceding the node + */ + if block_context + { + // Ensure that keys are legal + if !self.simple_key_allowed + { + return Err(ScanError::InvalidKey); + } + + // Increase the indentation level, and push a + // BlockMappingStart token to the queue, if + // required + roll_indent( + &mut self.context, + tokens, + self.stats.read, + self.stats.lines, + self.stats.column, + BLOCK_MAP, + )?; + } + + // Remove any saved implicit key + self.remove_saved_key()?; + + /* Another key may follow an explicit key in the block + * context, typically when this explicit key is a + * mapping node, and the mapping starts inline with the + * explicit key. E.g: + * + * ? my key: value + * : value + * + * is equivalent to + * + * ? { my key: value }: value + */ + self.simple_key_allowed = block_context; + + advance!(*base, :self.stats, 1); + + enqueue!(Token::Key, :self.stats => tokens); + + Ok(()) + } + /// Fetch a value token (':') from .base, adding to /// .tokens. Also handles unwinding any saved /// keys and indentation increases, as needed @@ -481,8 +548,9 @@ impl Scanner // block mapping start token roll_indent( &mut self.context, - key_stats.read, tokens, + key_stats.read, + key_stats.lines, key_stats.column, BLOCK_MAP, )?; @@ -511,8 +579,9 @@ impl Scanner // block mapping start token roll_indent( &mut self.context, - self.stats.read, tokens, + self.stats.read, + self.stats.lines, self.stats.column, BLOCK_MAP, )?; @@ -618,14 +687,33 @@ impl Scanner { true => roll_indent( &mut self.context, - self.stats.read, tokens, + self.stats.read, + self.stats.lines, self.stats.column, !BLOCK_MAP, ), false => Err(ScanError::InvalidBlockEntry), }?; + // Check if the current block context is zero + // indented + let is_zero_indented = self.context.indents().last().map_or(false, |entry| { + entry.indent() == self.stats.column && entry.line < self.stats.lines + }); + + // If it is, we need to update the line to the + // current, to disarm pop_zero_indent_sequence + if is_zero_indented + { + let current = self.stats.lines; + + if let Some(entry) = self.context.indents_mut().last_mut() + { + entry.line = current; + } + } + // Reset saved key self.remove_saved_key()?; @@ -682,6 +770,40 @@ impl Scanner Ok(()) } + /// Manages the decrement of zero indented block + /// sequences + fn pop_zero_indent_sequence<'de>( + &mut self, + base: &'de str, + tokens: &mut Tokens<'de>, + ) -> Result<()> + { + if let Some(entry) = self.context.indents().last() + { + /* + * Pop an indentation level if, and only if: + * 1. Current line != entry's line + * 2. Current indentation is for a sequence + * 3. The next byte sequence is not a block entry + * 4. The entry was flagged zero_indented + */ + if entry.line < self.stats.lines + && entry.zero_indented + && entry.kind == Marker::BlockSequenceStart + && (!check!(~base => b'-')) + { + let read = self.stats.read; + + self.context.pop_indent(|_| { + enqueue!(Token::BlockEnd, read => tokens); + Ok(()) + })?; + } + } + + Ok(()) + } + /// Save a position in the buffer as a potential simple /// key location, if a simple key is possible fn save_key(&mut self, required: bool) -> Result<()> @@ -911,23 +1033,49 @@ fn eat_whitespace(base: &str, stats: &mut MStats, comments: bool) -> usize /// indent token to the indent stack if required fn roll_indent<'de>( context: &mut Context, - mark: usize, tokens: &mut Tokens<'de>, + mark: usize, + line: usize, column: usize, map: bool, ) -> Result<()> { - if context.is_block() && context.indent() < column + let token = match map { - context.indent_increment(column)?; + true => Token::BlockMappingStart, + false => Token::BlockSequenceStart, + }; - let token = match map + if context.is_block() + { + // If the indent is greater, we don't need to worry about + // same level sequences + if context.indent() < column { - true => Token::BlockMappingStart, - false => Token::BlockSequenceStart, - }; + context.indent_increment(column, line, map)?; - enqueue!(token, mark => tokens); + enqueue!(token, mark => tokens); + } + // Otherwise we need to check if this is: + // 1. A sequence + // 2. At the same indentation level + // 3. Is the first element of this sequence + else if (!map) && context.indent() == column + { + let add_token = context + .indents() + .last() + .map_or(false, |entry| entry.kind == Marker::BlockMappingStart); + + if add_token + { + context.indent_increment(column, line, map)?; + + context.indents_mut().last_mut().unwrap().zero_indented = true; + + enqueue!(token, mark => tokens); + } + } } Ok(()) @@ -1044,6 +1192,7 @@ const FLOW_SEQUENCE_START: u8 = b'['; const FLOW_SEQUENCE_END: u8 = b']'; const FLOW_ENTRY: u8 = b','; const BLOCK_ENTRY: u8 = b'-'; +const EXPLICIT_KEY: u8 = b'?'; const COMMENTS: bool = true; const REQUIRED: bool = true; @@ -1304,7 +1453,8 @@ mod tests use ScalarStyle::SingleQuote; let data = " - - 'a' - - 'nested' + - + 'nested' - 'block' - 'sequence' "; @@ -1409,6 +1559,87 @@ mod tests ); } + #[test] + fn block_collection_sequence_no_indent() + { + use ScalarStyle::SingleQuote; + + let data = " +'one': +- 'two' +- 'three' +"; + let mut s = ScanIter::new(data); + + tokens!(s => + | Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream", + | Token::BlockMappingStart => "expected start of block mapping", + | Token::Key => "expected an implicit key", + | Token::Scalar(cow!("one"), SingleQuote) => "expected a flow scalar", + | Token::Value => "expected a value", + | Token::BlockSequenceStart => "expected start of block sequence", + | Token::BlockEntry => "expected a sequence entry", + | Token::Scalar(cow!("two"), SingleQuote) => "expected a flow scalar", + | Token::BlockEntry => "expected a sequence entry", + | Token::Scalar(cow!("three"), SingleQuote) => "expected a flow scalar", + | Token::BlockEnd => "expected end of nested mapping", + | Token::BlockEnd => "expected end of block mapping", + | Token::StreamEnd => "expected end of stream", + @ None => "expected stream to be finished" + ); + } + + #[test] + fn block_collection_sequence_no_indent_nested() + { + use ScalarStyle::SingleQuote; + + let data = " +'one': + 'two': + - 'three' + 'four': + - 'five' +'six': +- 'seven' +"; + let mut s = ScanIter::new(data); + + tokens!(s => + | Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream", + | Token::BlockMappingStart => "expected start of block mapping", + | Token::Key => "expected an implicit key", + | Token::Scalar(cow!("one"), SingleQuote) => "expected a flow scalar", + | Token::Value => "expected a value", + | Token::BlockMappingStart => "expected start of nested mapping", + | Token::Key => "expected an implicit key", + | Token::Scalar(cow!("two"), SingleQuote) => "expected a flow scalar", + | Token::Value => "expected a value", + | Token::BlockSequenceStart => "expected start of zero indented sequence", + | Token::BlockEntry => "expected a sequence entry", + | Token::Scalar(cow!("three"), SingleQuote) => "expected a flow scalar", + | Token::BlockEnd => "expected end of zero indented sequence", + | Token::Key => "expected an implicit key", + | Token::Scalar(cow!("four"), SingleQuote) => "expected a flow scalar", + | Token::Value => "expected a value", + | Token::BlockSequenceStart => "expected start of zero indented sequence", + | Token::BlockEntry => "expected a sequence entry", + | Token::Scalar(cow!("five"), SingleQuote) => "expected a flow scalar", + | Token::BlockEnd => "expected end of zero indented sequence", + | Token::BlockEnd => "expected end of nested mapping", + | Token::Key => "expected an implicit key", + | Token::Scalar(cow!("six"), SingleQuote) => "expected a flow scalar", + | Token::Value => "expected a value", + | Token::BlockSequenceStart => "expected start of zero indented sequence", + | Token::BlockEntry => "expected a sequence entry", + | Token::Scalar(cow!("seven"), SingleQuote) => "expected a flow scalar", + | Token::BlockEnd => "expected end of zero indented sequence", + | Token::BlockEnd => "expected end of block mapping", + | Token::StreamEnd => "expected end of stream", + @ None => "expected stream to be finished" + ); + } + #[test] fn chomp_comments() { @@ -1677,6 +1908,85 @@ mod tests assert_eq!(s.scan.stats, (2, 0, 2)); } + #[test] + fn explicit_key_simple() + { + let data = " +? 'an explicit key' +: 'a value' +"; + let mut s = ScanIter::new(data); + + tokens!(s => + | Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream", + | Token::BlockMappingStart => "expected the start of a block mapping", + | Token::Key => "expected an explicit key", + | Token::Scalar(cow!("an explicit key"), ScalarStyle::SingleQuote) => "expected a scalar", + | Token::Value => "expected a value", + | Token::Scalar(cow!("a value"), ScalarStyle::SingleQuote) => "expected a scalar", + | Token::BlockEnd => "expected the end of a block mapping", + | Token::StreamEnd => "expected end of stream", + @ None => "expected stream to be finished" + ); + } + + #[test] + fn explicit_key_mapping_missing_value() + { + // A value is implied by the explicit key, and can be + // omitted from the document, while still being + // valid YAML + let data = "? 'sub mapping key': 'sub mapping value'"; + let mut s = ScanIter::new(data); + + tokens!(s => + | Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream", + | Token::BlockMappingStart => "expected the start of a block mapping", + | Token::Key => "expected an explicit key", + | Token::BlockMappingStart => "expected the start of a block mapping", + | Token::Key => "expected an explicit key", + | Token::Scalar(cow!("sub mapping key"), ScalarStyle::SingleQuote) => "expected a scalar", + | Token::Value => "expected a value", + | Token::Scalar(cow!("sub mapping value"), ScalarStyle::SingleQuote)=> "expected a scalar", + | Token::BlockEnd => "expected the end of a block mapping", + | Token::BlockEnd => "expected the end of a block mapping", + | Token::StreamEnd => "expected end of stream", + @ None => "expected stream to be finished" + ); + } + + #[test] + fn explicit_key_mapping() + { + let data = " +? 'key mapping': 'value' + 'another': 'value' +: 'bar' +"; + let mut s = ScanIter::new(data); + + tokens!(s => + | Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream", + | Token::BlockMappingStart => "expected the start of a block mapping", + | Token::Key => "expected an explicit key", + | Token::BlockMappingStart => "expected the start of a block mapping", + | Token::Key => "expected an explicit key", + | Token::Scalar(cow!("key mapping"), ScalarStyle::SingleQuote) => "expected a scalar", + | Token::Value => "expected a value", + | Token::Scalar(cow!("value"), ScalarStyle::SingleQuote) => "expected a scalar", + | Token::Key => "expected an explicit key", + | Token::Scalar(cow!("another"), ScalarStyle::SingleQuote) => "expected a scalar", + | Token::Value => "expected a value", + | Token::Scalar(cow!("value"), ScalarStyle::SingleQuote) => "expected a scalar", + | Token::BlockEnd => "expected the end of a block mapping", + | Token::Value => "expected a value", + | Token::Scalar(cow!("bar"), ScalarStyle::SingleQuote) => "expected a scalar", + | Token::BlockEnd => "expected the end of a block mapping", + | Token::StreamEnd => "expected end of stream", + @ None => "expected stream to be finished" + ); + } + #[test] fn flow_scalar_single_simple() {