Feature/scanner/explict key #21

Merged
bazaah merged 7 commits from feature/scanner/explict-key into master 2021-08-08 09:59:11 +00:00
3 changed files with 412 additions and 31 deletions

View file

@ -1,6 +1,9 @@
use std::ops::Add; use std::ops::Add;
use crate::scanner::error::{ScanError, ScanResult as Result}; use crate::{
scanner::error::{ScanError, ScanResult as Result},
token::Marker,
};
pub(in crate::scanner) const STARTING_INDENT: Indent = Indent(None); pub(in crate::scanner) const STARTING_INDENT: Indent = Indent(None);
@ -20,7 +23,7 @@ pub(in crate::scanner) struct Context
// Block context fields // Block context fields
indent: usize, indent: usize,
indents: Vec<usize>, indents: Vec<IndentEntry>,
started: bool, started: bool,
} }
@ -87,6 +90,16 @@ impl Context
} }
} }
pub fn indents(&self) -> &[IndentEntry]
{
&self.indents
}
pub fn indents_mut(&mut self) -> &mut Vec<IndentEntry>
{
&mut self.indents
}
/// Check if we are currently in the block context /// Check if we are currently in the block context
pub fn is_block(&self) -> bool pub fn is_block(&self) -> bool
{ {
@ -97,15 +110,12 @@ impl Context
/// flow context, returning the current level. Note /// flow context, returning the current level. Note
/// that this function will only increment the indent if /// that this function will only increment the indent if
/// .column > current_indent and .is_block returns true /// .column > current_indent and .is_block returns true
pub fn indent_increment(&mut self, column: usize) -> Result<Indent> pub fn indent_increment(&mut self, column: usize, line: usize, map: bool) -> Result<Indent>
{ {
if self.is_block() && self.indent() < column self.started = true;
{ self.indents.push(IndentEntry::new(self.indent, line, map));
self.started = true;
self.indents.push(self.indent);
self.indent = column; self.indent = column;
}
Ok(self.indent.into()) Ok(self.indent.into())
} }
@ -125,21 +135,79 @@ impl Context
{ {
while Indent(Some(self.indent)) > column while Indent(Some(self.indent)) > column
{ {
match self.indents.pop() if !self.pop_indent(&mut f)?
{ {
Some(indent) => break;
{
self.indent = indent;
f(self.indent)?;
},
None => break,
} }
} }
} }
Ok(old - self.indents.len()) Ok(old - self.indents.len())
} }
pub fn pop_indent<F>(&mut self, mut f: F) -> Result<bool>
where
F: FnMut(usize) -> Result<()>,
{
match self.indents.pop()
{
Some(entry) =>
{
self.indent = entry.indent;
f(self.indent)?;
Ok(true)
},
None => Ok(false),
}
}
}
/// Stack entry for tracking indentation levels, and
/// associated metadata
#[derive(Debug, Clone, Copy)]
pub(in crate::scanner) struct IndentEntry
{
indent: usize,
/// The type of indentation started, either
/// BlockSequenceStart or BlockMappingStart
pub kind: Marker,
/// Line the indentation was set on. Note that this may
/// be different from the original line _if_
/// .zero_indented is true, as it is used for record
/// keeping
pub line: usize,
/// Flag for checking if the associated indent for a
/// zero indented sequence
pub zero_indented: bool,
}
impl IndentEntry
{
pub fn new(indent: usize, line: usize, map: bool) -> Self
{
let kind = match map
{
true => Marker::BlockMappingStart,
false => Marker::BlockSequenceStart,
};
Self {
indent,
kind,
line,
zero_indented: false,
}
}
pub fn indent(&self) -> Indent
{
self.indent.into()
}
} }
/// A wrapper around usize, that allows it us to express the /// A wrapper around usize, that allows it us to express the

View file

@ -42,6 +42,9 @@ pub enum ScanError
/// A block entry was not expected or allowed /// A block entry was not expected or allowed
InvalidBlockEntry, InvalidBlockEntry,
/// A mapping key was not expected or allowed
InvalidKey,
/// A mapping value was not expected or allowed /// A mapping value was not expected or allowed
InvalidValue, InvalidValue,

View file

@ -27,7 +27,7 @@ use crate::{
scalar::flow::scan_flow_scalar, scalar::flow::scan_flow_scalar,
tag::{scan_node_tag, scan_tag_directive}, tag::{scan_node_tag, scan_tag_directive},
}, },
token::{StreamEncoding, Token}, token::{Marker, StreamEncoding, Token},
}; };
type Tokens<'de> = Queue<TokenEntry<'de>>; type Tokens<'de> = Queue<TokenEntry<'de>>;
@ -101,6 +101,7 @@ impl Scanner
self.expire_stale_saved_key()?; self.expire_stale_saved_key()?;
self.pop_zero_indent_sequence(*base, tokens)?;
self.unroll_indent(tokens, self.stats.column)?; self.unroll_indent(tokens, self.stats.column)?;
if base.is_empty() || self.state == StreamState::Done if base.is_empty() || self.state == StreamState::Done
@ -142,7 +143,10 @@ impl Scanner
}, },
// Is it an explicit key? // Is it an explicit key?
// TODO [EXPLICIT_KEY, ..] if self.context.is_flow() || isWhiteSpaceZ!(~base, 1) =>
{
self.explicit_key(base, tokens)
},
// Is it a value? // Is it a value?
[VALUE, ..] if isWhiteSpaceZ!(~base, 1) || self.context.is_flow() => [VALUE, ..] if isWhiteSpaceZ!(~base, 1) || self.context.is_flow() =>
@ -465,6 +469,69 @@ impl Scanner
Ok(()) Ok(())
} }
fn explicit_key<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>) -> Result<()>
{
let block_context = self.context.is_block();
/*
* If in the block context we may need to add indentation
* tokens to the stream, and we need an additional
* check that keys are currently legal.
*
* This can occur, for example if you have the following
* YAML:
*
* !!str ? 'whoops, tag is': 'in the wrong place'
* ^^^^^^^
* Invalid token sequence
*
* As node decorators (tags, anchors, aliases) must be
* directly preceding the node
*/
if block_context
{
// Ensure that keys are legal
if !self.simple_key_allowed
{
return Err(ScanError::InvalidKey);
}
// Increase the indentation level, and push a
// BlockMappingStart token to the queue, if
// required
roll_indent(
&mut self.context,
tokens,
self.stats.read,
self.stats.lines,
self.stats.column,
BLOCK_MAP,
)?;
}
// Remove any saved implicit key
self.remove_saved_key()?;
/* Another key may follow an explicit key in the block
* context, typically when this explicit key is a
* mapping node, and the mapping starts inline with the
* explicit key. E.g:
*
* ? my key: value
* : value
*
* is equivalent to
*
* ? { my key: value }: value
*/
self.simple_key_allowed = block_context;
advance!(*base, :self.stats, 1);
enqueue!(Token::Key, :self.stats => tokens);
Ok(())
}
/// Fetch a value token (':') from .base, adding to /// Fetch a value token (':') from .base, adding to
/// .tokens. Also handles unwinding any saved /// .tokens. Also handles unwinding any saved
/// keys and indentation increases, as needed /// keys and indentation increases, as needed
@ -481,8 +548,9 @@ impl Scanner
// block mapping start token // block mapping start token
roll_indent( roll_indent(
&mut self.context, &mut self.context,
key_stats.read,
tokens, tokens,
key_stats.read,
key_stats.lines,
key_stats.column, key_stats.column,
BLOCK_MAP, BLOCK_MAP,
)?; )?;
@ -511,8 +579,9 @@ impl Scanner
// block mapping start token // block mapping start token
roll_indent( roll_indent(
&mut self.context, &mut self.context,
self.stats.read,
tokens, tokens,
self.stats.read,
self.stats.lines,
self.stats.column, self.stats.column,
BLOCK_MAP, BLOCK_MAP,
)?; )?;
@ -618,14 +687,33 @@ impl Scanner
{ {
true => roll_indent( true => roll_indent(
&mut self.context, &mut self.context,
self.stats.read,
tokens, tokens,
self.stats.read,
self.stats.lines,
self.stats.column, self.stats.column,
!BLOCK_MAP, !BLOCK_MAP,
), ),
false => Err(ScanError::InvalidBlockEntry), false => Err(ScanError::InvalidBlockEntry),
}?; }?;
// Check if the current block context is zero
// indented
let is_zero_indented = self.context.indents().last().map_or(false, |entry| {
entry.indent() == self.stats.column && entry.line < self.stats.lines
});
// If it is, we need to update the line to the
// current, to disarm pop_zero_indent_sequence
if is_zero_indented
{
let current = self.stats.lines;
if let Some(entry) = self.context.indents_mut().last_mut()
{
entry.line = current;
}
}
// Reset saved key // Reset saved key
self.remove_saved_key()?; self.remove_saved_key()?;
@ -682,6 +770,40 @@ impl Scanner
Ok(()) Ok(())
} }
/// Manages the decrement of zero indented block
/// sequences
fn pop_zero_indent_sequence<'de>(
&mut self,
base: &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
if let Some(entry) = self.context.indents().last()
{
/*
* Pop an indentation level if, and only if:
* 1. Current line != entry's line
* 2. Current indentation is for a sequence
* 3. The next byte sequence is not a block entry
* 4. The entry was flagged zero_indented
*/
if entry.line < self.stats.lines
&& entry.zero_indented
&& entry.kind == Marker::BlockSequenceStart
&& (!check!(~base => b'-'))
{
let read = self.stats.read;
self.context.pop_indent(|_| {
enqueue!(Token::BlockEnd, read => tokens);
Ok(())
})?;
}
}
Ok(())
}
/// Save a position in the buffer as a potential simple /// Save a position in the buffer as a potential simple
/// key location, if a simple key is possible /// key location, if a simple key is possible
fn save_key(&mut self, required: bool) -> Result<()> fn save_key(&mut self, required: bool) -> Result<()>
@ -911,23 +1033,49 @@ fn eat_whitespace(base: &str, stats: &mut MStats, comments: bool) -> usize
/// indent token to the indent stack if required /// indent token to the indent stack if required
fn roll_indent<'de>( fn roll_indent<'de>(
context: &mut Context, context: &mut Context,
mark: usize,
tokens: &mut Tokens<'de>, tokens: &mut Tokens<'de>,
mark: usize,
line: usize,
column: usize, column: usize,
map: bool, map: bool,
) -> Result<()> ) -> Result<()>
{ {
if context.is_block() && context.indent() < column let token = match map
{ {
context.indent_increment(column)?; true => Token::BlockMappingStart,
false => Token::BlockSequenceStart,
};
let token = match map if context.is_block()
{
// If the indent is greater, we don't need to worry about
// same level sequences
if context.indent() < column
{ {
true => Token::BlockMappingStart, context.indent_increment(column, line, map)?;
false => Token::BlockSequenceStart,
};
enqueue!(token, mark => tokens); enqueue!(token, mark => tokens);
}
// Otherwise we need to check if this is:
// 1. A sequence
// 2. At the same indentation level
// 3. Is the first element of this sequence
else if (!map) && context.indent() == column
{
let add_token = context
.indents()
.last()
.map_or(false, |entry| entry.kind == Marker::BlockMappingStart);
if add_token
{
context.indent_increment(column, line, map)?;
context.indents_mut().last_mut().unwrap().zero_indented = true;
enqueue!(token, mark => tokens);
}
}
} }
Ok(()) Ok(())
@ -1044,6 +1192,7 @@ const FLOW_SEQUENCE_START: u8 = b'[';
const FLOW_SEQUENCE_END: u8 = b']'; const FLOW_SEQUENCE_END: u8 = b']';
const FLOW_ENTRY: u8 = b','; const FLOW_ENTRY: u8 = b',';
const BLOCK_ENTRY: u8 = b'-'; const BLOCK_ENTRY: u8 = b'-';
const EXPLICIT_KEY: u8 = b'?';
const COMMENTS: bool = true; const COMMENTS: bool = true;
const REQUIRED: bool = true; const REQUIRED: bool = true;
@ -1304,7 +1453,8 @@ mod tests
use ScalarStyle::SingleQuote; use ScalarStyle::SingleQuote;
let data = " let data = "
- - 'a' - - 'a'
- 'nested' -
'nested'
- 'block' - 'block'
- 'sequence' - 'sequence'
"; ";
@ -1409,6 +1559,87 @@ mod tests
); );
} }
#[test]
fn block_collection_sequence_no_indent()
{
use ScalarStyle::SingleQuote;
let data = "
'one':
- 'two'
- 'three'
";
let mut s = ScanIter::new(data);
tokens!(s =>
| Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream",
| Token::BlockMappingStart => "expected start of block mapping",
| Token::Key => "expected an implicit key",
| Token::Scalar(cow!("one"), SingleQuote) => "expected a flow scalar",
| Token::Value => "expected a value",
| Token::BlockSequenceStart => "expected start of block sequence",
| Token::BlockEntry => "expected a sequence entry",
| Token::Scalar(cow!("two"), SingleQuote) => "expected a flow scalar",
| Token::BlockEntry => "expected a sequence entry",
| Token::Scalar(cow!("three"), SingleQuote) => "expected a flow scalar",
| Token::BlockEnd => "expected end of nested mapping",
| Token::BlockEnd => "expected end of block mapping",
| Token::StreamEnd => "expected end of stream",
@ None => "expected stream to be finished"
);
}
#[test]
fn block_collection_sequence_no_indent_nested()
{
use ScalarStyle::SingleQuote;
let data = "
'one':
'two':
- 'three'
'four':
- 'five'
'six':
- 'seven'
";
let mut s = ScanIter::new(data);
tokens!(s =>
| Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream",
| Token::BlockMappingStart => "expected start of block mapping",
| Token::Key => "expected an implicit key",
| Token::Scalar(cow!("one"), SingleQuote) => "expected a flow scalar",
| Token::Value => "expected a value",
| Token::BlockMappingStart => "expected start of nested mapping",
| Token::Key => "expected an implicit key",
| Token::Scalar(cow!("two"), SingleQuote) => "expected a flow scalar",
| Token::Value => "expected a value",
| Token::BlockSequenceStart => "expected start of zero indented sequence",
| Token::BlockEntry => "expected a sequence entry",
| Token::Scalar(cow!("three"), SingleQuote) => "expected a flow scalar",
| Token::BlockEnd => "expected end of zero indented sequence",
| Token::Key => "expected an implicit key",
| Token::Scalar(cow!("four"), SingleQuote) => "expected a flow scalar",
| Token::Value => "expected a value",
| Token::BlockSequenceStart => "expected start of zero indented sequence",
| Token::BlockEntry => "expected a sequence entry",
| Token::Scalar(cow!("five"), SingleQuote) => "expected a flow scalar",
| Token::BlockEnd => "expected end of zero indented sequence",
| Token::BlockEnd => "expected end of nested mapping",
| Token::Key => "expected an implicit key",
| Token::Scalar(cow!("six"), SingleQuote) => "expected a flow scalar",
| Token::Value => "expected a value",
| Token::BlockSequenceStart => "expected start of zero indented sequence",
| Token::BlockEntry => "expected a sequence entry",
| Token::Scalar(cow!("seven"), SingleQuote) => "expected a flow scalar",
| Token::BlockEnd => "expected end of zero indented sequence",
| Token::BlockEnd => "expected end of block mapping",
| Token::StreamEnd => "expected end of stream",
@ None => "expected stream to be finished"
);
}
#[test] #[test]
fn chomp_comments() fn chomp_comments()
{ {
@ -1677,6 +1908,85 @@ mod tests
assert_eq!(s.scan.stats, (2, 0, 2)); assert_eq!(s.scan.stats, (2, 0, 2));
} }
#[test]
fn explicit_key_simple()
{
let data = "
? 'an explicit key'
: 'a value'
";
let mut s = ScanIter::new(data);
tokens!(s =>
| Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream",
| Token::BlockMappingStart => "expected the start of a block mapping",
| Token::Key => "expected an explicit key",
| Token::Scalar(cow!("an explicit key"), ScalarStyle::SingleQuote) => "expected a scalar",
| Token::Value => "expected a value",
| Token::Scalar(cow!("a value"), ScalarStyle::SingleQuote) => "expected a scalar",
| Token::BlockEnd => "expected the end of a block mapping",
| Token::StreamEnd => "expected end of stream",
@ None => "expected stream to be finished"
);
}
#[test]
fn explicit_key_mapping_missing_value()
{
// A value is implied by the explicit key, and can be
// omitted from the document, while still being
// valid YAML
let data = "? 'sub mapping key': 'sub mapping value'";
let mut s = ScanIter::new(data);
tokens!(s =>
| Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream",
| Token::BlockMappingStart => "expected the start of a block mapping",
| Token::Key => "expected an explicit key",
| Token::BlockMappingStart => "expected the start of a block mapping",
| Token::Key => "expected an explicit key",
| Token::Scalar(cow!("sub mapping key"), ScalarStyle::SingleQuote) => "expected a scalar",
| Token::Value => "expected a value",
| Token::Scalar(cow!("sub mapping value"), ScalarStyle::SingleQuote)=> "expected a scalar",
| Token::BlockEnd => "expected the end of a block mapping",
| Token::BlockEnd => "expected the end of a block mapping",
| Token::StreamEnd => "expected end of stream",
@ None => "expected stream to be finished"
);
}
#[test]
fn explicit_key_mapping()
{
let data = "
? 'key mapping': 'value'
'another': 'value'
: 'bar'
";
let mut s = ScanIter::new(data);
tokens!(s =>
| Token::StreamStart(StreamEncoding::UTF8) => "expected start of stream",
| Token::BlockMappingStart => "expected the start of a block mapping",
| Token::Key => "expected an explicit key",
| Token::BlockMappingStart => "expected the start of a block mapping",
| Token::Key => "expected an explicit key",
| Token::Scalar(cow!("key mapping"), ScalarStyle::SingleQuote) => "expected a scalar",
| Token::Value => "expected a value",
| Token::Scalar(cow!("value"), ScalarStyle::SingleQuote) => "expected a scalar",
| Token::Key => "expected an explicit key",
| Token::Scalar(cow!("another"), ScalarStyle::SingleQuote) => "expected a scalar",
| Token::Value => "expected a value",
| Token::Scalar(cow!("value"), ScalarStyle::SingleQuote) => "expected a scalar",
| Token::BlockEnd => "expected the end of a block mapping",
| Token::Value => "expected a value",
| Token::Scalar(cow!("bar"), ScalarStyle::SingleQuote) => "expected a scalar",
| Token::BlockEnd => "expected the end of a block mapping",
| Token::StreamEnd => "expected end of stream",
@ None => "expected stream to be finished"
);
}
#[test] #[test]
fn flow_scalar_single_simple() fn flow_scalar_single_simple()
{ {