2021-09-09 19:29:29 +00:00
16 changed files with 535 additions and 142 deletions
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@ -23,6 +23,9 @@ jobs:
          - name: "Unit Tests"
            cmd: test
            args: --lib --bins
+          - name: "Unit Tests: feature.test_buffer_small"
+            cmd: test
+            args: --lib --features=test_buffer_small
        include:
          - os: ubuntu-latest
            sccache-path: /home/runner/.cache/sccache
--- a/Cargo.lock
+++ b/Cargo.lock
@ -30,6 +30,18 @@ version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"

+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
 [[package]]
 name = "ctor"
 version = "0.1.20"
@ -139,5 +151,7 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "atoi",
+ "bitflags",
+ "cfg-if",
 "pretty_assertions",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -6,9 +6,18 @@ edition = "2018"

 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

+[features]
+# PRIVATE! FOR USE IN TEST ONLY!
+test_buffer = []
+test_buffer_large = ["test_buffer"]
+test_buffer_medium = ["test_buffer"]
+test_buffer_small = ["test_buffer"]
+
 [dependencies]
 atoi = "0.4"
+bitflags = "1"

 [dev-dependencies]
 anyhow = "1"
 pretty_assertions = "0.7"
+cfg-if = "1"
--- a/src/scanner/anchor.rs
+++ b/src/scanner/anchor.rs
@ -1,5 +1,6 @@
 use super::{
    error::{ScanError, ScanResult as Result},
+    flag::Flags,
    stats::MStats,
    ALIAS, ANCHOR,
 };
@ -8,6 +9,7 @@ use crate::token::Token;
 /// Scan an anchor or alias from the underlying .buffer
 /// returning the relevant Token
 pub(in crate::scanner) fn scan_anchor<'de>(
+    opts: Flags,
    buffer: &mut &'de str,
    stats: &mut MStats,
    kind: &AnchorKind,
@ -17,7 +19,7 @@ pub(in crate::scanner) fn scan_anchor<'de>(

    // *anchor 'rest of the line'
    //  ^^^^^^
-    let anchor = take_while(buffer.as_bytes(), u8::is_ascii_alphanumeric);
+    let anchor = take_while(opts, buffer.as_bytes(), u8::is_ascii_alphanumeric)?;

    let anchor = advance!(<- *buffer, :stats, anchor.len());

@ -33,6 +35,7 @@ pub(in crate::scanner) fn scan_anchor<'de>(
    // There does not necessarily need to be a whitespace so we
    // also check against a list of valid starting
    // tokens
+    cache!(~buffer, 1, opts)?;
    check!(~buffer
        => b' ' | b'\n' | b'?' | b',' | b']' | b'}' | b'%' | b'@' | b'`',
        else ScanError::InvalidAnchorName
@ -62,10 +65,10 @@ impl AnchorKind
    /// starts from the given .byte
    pub fn new(byte: &u8) -> Option<Self>
    {
-        let s = match byte
+        let s = match *byte
        {
-            &ALIAS => Self::Alias,
-            &ANCHOR => Self::Anchor,
+            ALIAS => Self::Alias,
+            ANCHOR => Self::Anchor,
            _ => return None,
        };

@ -73,7 +76,7 @@ impl AnchorKind
    }
 }

-fn take_while<F>(b: &[u8], f: F) -> &[u8]
+fn take_while<F>(opts: Flags, base: &[u8], f: F) -> Result<&[u8]>
 where
    F: Fn(&u8) -> bool,
 {
@ -81,10 +84,12 @@ where

    loop
    {
-        match b.get(index)
+        let i = cache!(base, @index, 1, opts)?;
+
+        match base.get(index)
        {
-            Some(b) if f(b) => index += 1,
-            _ => return &b[..index],
+            Some(b) if f(b) => index += i,
+            _ => return Ok(&base[..index]),
        }
    }
 }
--- a/src/scanner/directive.rs
+++ b/src/scanner/directive.rs
@ -5,13 +5,14 @@ use super::{
    stats::MStats,
 };
 use crate::{
-    scanner::{eat_whitespace, tag::scan_tag_directive, COMMENTS},
+    scanner::{eat_whitespace, flag::Flags, tag::scan_tag_directive, COMMENTS},
    token::Token,
 };

 /// Scans a version or tag directive from .buffer, based on
 /// the .kind of directive, returning the relevant Token.
 pub(in crate::scanner) fn scan_directive<'de>(
+    opts: Flags,
    buffer: &mut &'de str,
    mut stats: &mut MStats,
    kind: &DirectiveKind,
@ -22,21 +23,25 @@ pub(in crate::scanner) fn scan_directive<'de>(
        DirectiveKind::Version =>
        {
            // Chomp any preceding whitespace
-            advance!(*buffer, eat_whitespace(buffer, &mut stats, !COMMENTS));
+            advance!(
+                *buffer,
+                eat_whitespace(opts, buffer, &mut stats, !COMMENTS)?
+            );

            // %YAML 1.1
            //       ^
-            let (major, skip) = scan_directive_version(buffer)?;
+            let (major, skip) = scan_directive_version(opts, buffer)?;
            advance!(*buffer, :stats, skip);

            // %YAML 1.1
            //        ^
+            cache!(~buffer, 1, opts)?;
            check!(~buffer => b'.', else ScanError::InvalidVersion)?;
            advance!(*buffer, :stats, 1);

            // %YAML 1.1
            //         ^
-            let (minor, skip) = scan_directive_version(buffer)?;
+            let (minor, skip) = scan_directive_version(opts, buffer)?;
            advance!(*buffer, :stats, skip);

            Ok(Token::VersionDirective(major, minor))
@ -44,10 +49,13 @@ pub(in crate::scanner) fn scan_directive<'de>(
        DirectiveKind::Tag =>
        {
            // Chomp any spaces up to the handle
-            advance!(*buffer, eat_whitespace(buffer, &mut stats, !COMMENTS));
+            advance!(
+                *buffer,
+                eat_whitespace(opts, buffer, &mut stats, !COMMENTS)?
+            );

            // Scan the directive, copying if necessary
-            let (token, amt) = scan_tag_directive(buffer, &mut stats)?;
+            let (token, amt) = scan_tag_directive(opts, buffer, &mut stats)?;
            advance!(*buffer, amt);

            Ok(token)
@ -97,15 +105,15 @@ impl DirectiveKind
    }
 }

-fn scan_directive_version(b: &str) -> Result<(u8, usize)>
+fn scan_directive_version(opts: Flags, b: &str) -> Result<(u8, usize)>
 {
-    let v_slice = take_while(b.as_bytes(), u8::is_ascii_digit);
+    let v_slice = take_while(opts, b.as_bytes(), u8::is_ascii_digit)?;
    let v = atoi(v_slice).ok_or(ScanError::InvalidVersion)?;

    Ok((v, v_slice.len()))
 }

-fn take_while<F>(b: &[u8], f: F) -> &[u8]
+fn take_while<F>(opts: Flags, base: &[u8], f: F) -> Result<&[u8]>
 where
    F: Fn(&u8) -> bool,
 {
@ -113,10 +121,12 @@ where

    loop
    {
-        match b.get(index)
+        let i = cache!(base, @index, 1, opts)?;
+
+        match base.get(index)
        {
-            Some(b) if f(b) => index += 1,
-            _ => return &b[..index],
+            Some(b) if f(b) => index += i,
+            _ => return Ok(&base[..index]),
        }
    }
 }
--- a/src/scanner/error.rs
+++ b/src/scanner/error.rs
@ -72,6 +72,10 @@ pub enum ScanError

    /// An integer overflowed
    IntOverflow,
+
+    /// The underlying buffer should be extended before
+    /// calling the Scanner again
+    Extend,
 }

 impl fmt::Display for ScanError
--- a/src/scanner/flag.rs
+++ b/src/scanner/flag.rs
@ -0,0 +1,35 @@
+use bitflags::bitflags;
+
+/// An empty, zeroed flag set. This is the default set, with
+/// all other flags disabled.
+pub const O_ZEROED: Flags = Flags::empty();
+/// Hints to the Scanner if the given byte slice can be
+/// extended. Typically used when processing data in chunks,
+/// or in circumstances when there may be more data in the
+/// future.
+///
+/// If this flag is set the Scanner will return a
+/// ScanError::Extend if the byte stream terminates before a
+/// token can be scanned.
+pub const O_EXTENDABLE: Flags = Flags::EXTENDABLE;
+/// Sets the Scanner to lazily process the underlying byte
+/// stream.
+///
+/// In particular, the Scanner will not fully process
+/// scalars, only locating the start and end markers in the
+/// stream. This means that any allocations, escape parsing
+/// or line joins will be deferred until the caller
+/// explicitly requests the token. This _also applies to
+/// errors_ in the scalar itself, which will not be caught
+/// until the caller requests the token!
+pub const O_LAZY: Flags = Flags::LAZY;
+
+bitflags! {
+    /// Directives controlling various behaviors of the Scanner,
+    /// see each O_ variant for an explanation of how each works
+    #[derive(Default)]
+    pub struct Flags: u32 {
+        const EXTENDABLE    = 0b00000001;
+        const LAZY          = 0b00000010;
+    }
+}
--- a/src/scanner/macros.rs
+++ b/src/scanner/macros.rs
@ -87,6 +87,60 @@ macro_rules! cow {
    };
 }

+/// Check that the underlying .buffer has at least the given
+/// number of UTF8 .codepoints available, returning an error
+/// if O_EXTENDABLE is set in .opts. Returns the number of
+/// _bytes_ read.
+///
+/// Modifiers
+///     ~ .buffer := .buffer.as_bytes()
+///
+/// Variants
+///     /1 .buffer, .codepoints
+///         := /4 .buffer, @0, .codepoints, O_ZEROED
+///     /2 .buffer, @.offset, .codepoints
+///         := /4 .buffer, @.offset, .codepoints, O_ZEROED
+///     /3 .buffer, .codepoints, .opts
+///         := /4 .buffer @0, .codepoints, .opts
+///     /4 .buffer, @.offset, .codepoints, .opts
+macro_rules! cache {
+    (~$buffer:expr $(, @$offset:expr )?, $codepoints:expr $(, $opts:expr )?) => {
+        cache!($buffer.as_bytes(), $( @$offset, )? $codepoints $(, $opts )?)
+    };
+    ($buffer:expr $(, @$offset:expr )?, $codepoints:expr $(, $opts:expr )?) => {
+        cache!(@inner $buffer, $( @$offset, )? @0, $codepoints $(, $opts )?, $crate::scanner::flag::O_ZEROED)
+    };
+    (@inner $buffer:expr, @$offset:expr, $( @$_:expr, )? $codepoints:expr, $opts:expr $(, $__:expr )?) => {
+        cache!(@priv $buffer, $offset, $codepoints, $opts.contains($crate::scanner::flag::O_EXTENDABLE))
+    };
+    (@priv $buffer:expr, $offset:expr, $codepoints:expr, $extend:expr) => {{
+        let mut ret = Ok(0);
+        let mut bytes = $offset;
+        for _ in 0..$codepoints
+        {
+            match widthOf!($buffer, bytes)
+            {
+                0 =>
+                {
+                    if $extend
+                    {
+                        ret = Err($crate::scanner::error::ScanError::Extend);
+                    }
+
+                    break;
+                },
+                n =>
+                {
+                    bytes += n;
+                    ret = ret.map(|r| r + n);
+                },
+            }
+        }
+
+        ret
+    }};
+}
+
 /// Check the .buffer (@ .offset) matches the given
 /// .pattern, optionally returning an .error.
 ///
--- a/src/scanner/mod.rs
+++ b/src/scanner/mod.rs
@ -8,6 +8,7 @@ mod context;
 mod directive;
 mod entry;
 mod error;
+mod flag;
 mod key;
 mod scalar;
 mod stats;
@ -21,6 +22,7 @@ use crate::{
        directive::{scan_directive, DirectiveKind},
        entry::TokenEntry,
        error::{ScanError, ScanResult as Result},
+        flag::*,
        key::{Key, KeyPossible},
        scalar::{block::scan_block_scalar, flow::scan_flow_scalar, plain::scan_plain_scalar},
        stats::MStats,
@ -66,7 +68,12 @@ impl Scanner

    /// Scan some tokens from the given .base into .tokens
    /// returning the number added.
-    pub fn scan_tokens<'de>(&mut self, base: &'de str, tokens: &mut Tokens<'de>) -> Result<usize>
+    pub fn scan_tokens<'de>(
+        &mut self,
+        opts: Flags,
+        base: &'de str,
+        tokens: &mut Tokens<'de>,
+    ) -> Result<usize>
    {
        let mut num_tokens = 0;
        let starting_tokens = tokens.len();
@ -76,9 +83,14 @@ impl Scanner
        {
            if let Some(mut buffer) = base.get(self.offset..)
            {
-                self.scan_next_token(&mut buffer, tokens)?;
+                let run = self.scan_next_token(opts, &mut buffer, tokens);

-                self.offset = base.len() - buffer.len();
+                if matches!(run, Err(ScanError::Extend) | Ok(_))
+                {
+                    self.offset = base.len() - buffer.len();
+                }
+
+                run?;

                num_tokens = tokens.len() - starting_tokens;
            }
@ -87,8 +99,12 @@ impl Scanner
        Ok(num_tokens)
    }

-    fn scan_next_token<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>)
-        -> Result<()>
+    fn scan_next_token<'de>(
+        &mut self,
+        opts: Flags,
+        base: &mut &'de str,
+        tokens: &mut Tokens<'de>,
+    ) -> Result<()>
    {
        // Is it the beginning of the stream?
        if self.state == StreamState::Start
@ -98,7 +114,7 @@ impl Scanner
        }

        // Eat whitespace to the next delimiter
-        self.eat_whitespace(base, COMMENTS);
+        self.eat_whitespace(opts, base, COMMENTS)?;

        // Remove any saved key positions that cannot contain keys
        // anymore
@ -114,11 +130,17 @@ impl Scanner
            return self.fetch_stream_end(*base, tokens);
        }

+        // 4 characters is the longest token we can encounter, one
+        // of:
+        //  - '--- '
+        //  - '... '
+        cache!(~base, 4, opts)?;
+
        // Fetch the next token(s)
        match base.as_bytes()
        {
            // Is it a directive?
-            [DIRECTIVE, ..] if self.stats.column == 0 => self.fetch_directive(base, tokens),
+            [DIRECTIVE, ..] if self.stats.column == 0 => self.fetch_directive(opts, base, tokens),

            // Is it a document marker?
            [b @ b'-', b'-', b'-', ..] | [b @ b'.', b'.', b'.', ..]
@ -161,25 +183,25 @@ impl Scanner
            },

            // Is it an anchor or alias?
-            [ANCHOR, ..] | [ALIAS, ..] => self.fetch_anchor(base, tokens),
+            [ANCHOR, ..] | [ALIAS, ..] => self.fetch_anchor(opts, base, tokens),

            // Is it a tag?
-            [TAG, ..] => self.fetch_tag(base, tokens),
+            [TAG, ..] => self.fetch_tag(opts, base, tokens),

            // Is it a block scalar?
            [c @ LITERAL, ..] | [c @ FOLDED, ..] if self.context.is_block() =>
            {
-                self.fetch_block_scalar(base, tokens, *c == FOLDED)
+                self.fetch_block_scalar(opts, base, tokens, *c == FOLDED)
            },

            // Is it a flow scalar?
-            [SINGLE, ..] | [DOUBLE, ..] => self.fetch_flow_scalar(base, tokens),
+            [SINGLE, ..] | [DOUBLE, ..] => self.fetch_flow_scalar(opts, base, tokens),

            // Is it a plain scalar?
-            _ if self.is_plain_scalar(*base) => self.fetch_plain_scalar(base, tokens),
+            _ if self.is_plain_scalar(*base) => self.fetch_plain_scalar(opts, base, tokens),

            // Otherwise its an error
-            _ => return Err(ScanError::UnknownDelimiter),
+            _ => Err(ScanError::UnknownDelimiter),
        }
    }

@ -253,8 +275,12 @@ impl Scanner
        Ok(())
    }

-    fn fetch_directive<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>)
-        -> Result<()>
+    fn fetch_directive<'de>(
+        &mut self,
+        opts: Flags,
+        base: &mut &'de str,
+        tokens: &mut Tokens<'de>,
+    ) -> Result<()>
    {
        let mut buffer = *base;
        let mut stats = MStats::new();
@ -264,11 +290,8 @@ impl Scanner
            return Ok(());
        }

-        // Reset indent to starting level
-        self.unroll_indent(tokens, STARTING_INDENT)?;
-
-        // Reset saved key
-        self.remove_saved_key()?;
+        // Ensure we can read the 'YAML' or 'TAG' identifiers
+        cache!(~buffer, @1, 4, opts)?;

        // Safety: we check above that we have len >= 1 (e.g a '%')
        //
@ -282,7 +305,13 @@ impl Scanner
        advance!(buffer, :stats, 1 + kind.len());

        // Scan the directive token from the .buffer
-        let token = scan_directive(&mut buffer, &mut stats, &kind)?;
+        let token = scan_directive(opts, &mut buffer, &mut stats, &kind)?;
+
+        // Reset indent to starting level
+        self.unroll_indent(tokens, STARTING_INDENT)?;
+
+        // Reset saved key
+        self.remove_saved_key()?;

        // A key cannot follow a directive (a newline is required)
        self.simple_key_allowed = false;
@ -298,7 +327,12 @@ impl Scanner
        Ok(())
    }

-    fn fetch_tag<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>) -> Result<()>
+    fn fetch_tag<'de>(
+        &mut self,
+        opts: Flags,
+        base: &mut &'de str,
+        tokens: &mut Tokens<'de>,
+    ) -> Result<()>
    {
        let mut buffer = *base;
        let mut stats = MStats::new();
@ -308,11 +342,11 @@ impl Scanner
            return Ok(());
        }

-        self.save_key(!REQUIRED)?;
-
-        let (token, amt) = scan_node_tag(buffer, &mut stats)?;
+        let (token, amt) = scan_node_tag(opts, buffer, &mut stats)?;
        advance!(buffer, amt);

+        self.save_key(!REQUIRED)?;
+
        // A key may not start after a tag (only before)
        self.simple_key_allowed = false;

@ -327,7 +361,12 @@ impl Scanner
        Ok(())
    }

-    fn fetch_anchor<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>) -> Result<()>
+    fn fetch_anchor<'de>(
+        &mut self,
+        opts: Flags,
+        base: &mut &'de str,
+        tokens: &mut Tokens<'de>,
+    ) -> Result<()>
    {
        let mut buffer = *base;
        let mut stats = MStats::new();
@ -343,12 +382,12 @@ impl Scanner
            _ => return Ok(()),
        };

+        // Scan the token from the .buffer
+        let token = scan_anchor(opts, &mut buffer, &mut stats, &kind)?;
+
        // An anchor / alias may start a simple key
        self.save_key(!REQUIRED)?;

-        // Scan the token from the .buffer
-        let token = scan_anchor(&mut buffer, &mut stats, &kind)?;
-
        // A key may not start after an anchor (only before)
        self.simple_key_allowed = false;

@ -365,6 +404,7 @@ impl Scanner

    fn fetch_flow_scalar<'de>(
        &mut self,
+        opts: Flags,
        base: &mut &'de str,
        tokens: &mut Tokens<'de>,
    ) -> Result<()>
@ -378,11 +418,11 @@ impl Scanner
            return Ok(());
        }

-        self.save_key(!REQUIRED)?;
-
-        let (range, amt) = scan_flow_scalar(buffer, &mut stats, single)?;
+        let (range, amt) = scan_flow_scalar(opts, buffer, &mut stats, single)?;
        let token = range.into_token(buffer)?;

+        self.save_key(!REQUIRED)?;
+
        // A key cannot follow a flow scalar, as we're either
        // currently in a key (which should be followed by a
        // value), or a value which needs a separator (e.g line
@ -399,6 +439,7 @@ impl Scanner

    fn fetch_plain_scalar<'de>(
        &mut self,
+        opts: Flags,
        base: &mut &'de str,
        tokens: &mut Tokens<'de>,
    ) -> Result<()>
@ -406,9 +447,9 @@ impl Scanner
        let buffer = *base;
        let mut stats = self.stats.clone();

-        self.save_key(!REQUIRED)?;
+        let (token, amt) = scan_plain_scalar(opts, buffer, &mut stats, &self.context)?;

-        let (token, amt) = scan_plain_scalar(buffer, &mut stats, &self.context)?;
+        self.save_key(!REQUIRED)?;

        // A simple key cannot follow a plain scalar, there must be
        // an indicator or new line before a key is valid
@ -425,6 +466,7 @@ impl Scanner

    fn fetch_block_scalar<'de>(
        &mut self,
+        opts: Flags,
        base: &mut &'de str,
        tokens: &mut Tokens<'de>,
        fold: bool,
@ -440,7 +482,7 @@ impl Scanner
        // always follow a block scalar.
        self.simple_key_allowed = true;

-        let (token, amt) = scan_block_scalar(buffer, &mut stats, &self.context, fold)?;
+        let (token, amt) = scan_block_scalar(opts, buffer, &mut stats, &self.context, fold)?;

        advance!(*base, amt);
        self.stats = stats;
@ -869,11 +911,11 @@ impl Scanner
    /// Chomp whitespace and optionally comments until we
    /// reach the next token, updating buffer[0] to the
    /// beginning of the new token
-    fn eat_whitespace(&mut self, buffer: &mut &str, comments: bool) -> usize
+    fn eat_whitespace(&mut self, opts: Flags, buffer: &mut &str, comments: bool) -> Result<usize>
    {
        let mut stats = MStats::new();

-        let amt = eat_whitespace(*buffer, &mut stats, comments);
+        let amt = eat_whitespace(opts, *buffer, &mut stats, comments)?;

        // A new line may start a key in the block context
        //
@ -887,7 +929,7 @@ impl Scanner
        advance!(*buffer, amt);
        self.stats += stats;

-        amt
+        Ok(amt)
    }
 }

@ -902,7 +944,7 @@ enum StreamState
 /// Chomp whitespace and .comments if allowed until a non
 /// whitespace character is encountered, returning the
 /// amount chomped
-fn eat_whitespace(base: &str, stats: &mut MStats, comments: bool) -> usize
+fn eat_whitespace(opts: Flags, base: &str, stats: &mut MStats, comments: bool) -> Result<usize>
 {
    let mut buffer = base;
    let mut chomp_line = false;
@ -910,6 +952,8 @@ fn eat_whitespace(base: &str, stats: &mut MStats, comments: bool) -> usize

    loop
    {
+        cache!(~buffer, 1, opts)?;
+
        let (blank, brk) = (isBlank!(~buffer), isBreak!(~buffer));

        match (blank, brk)
@ -943,7 +987,7 @@ fn eat_whitespace(base: &str, stats: &mut MStats, comments: bool) -> usize
        }
    }

-    base.len() - buffer.len()
+    Ok(base.len() - buffer.len())
 }

 /// Roll the indentation level and push a block collection
@ -1077,12 +1121,22 @@ mod tests
    mod tag;
    mod whitespace;

+    #[cfg(feature = "test_buffer")]
+    mod str_reader;
+
    use super::*;
    use crate::token::{ScalarStyle::*, Token::*};

    struct ScanIter<'de>
    {
-        data:   &'de str,
+        #[cfg(feature = "test_buffer")]
+        data: str_reader::StrReader<'de>,
+        #[cfg(feature = "test_buffer")]
+        opts: Flags,
+
+        #[cfg(not(feature = "test_buffer"))]
+        data: &'de str,
+
        scan:   Scanner,
        tokens: Tokens<'de>,

@ -1094,7 +1148,14 @@ mod tests
        pub fn new(data: &'de str) -> Self
        {
            Self {
+                #[cfg(feature = "test_buffer")]
+                data: str_reader::StrReader::new(data, str_reader::StrReader::BUF_SIZE),
+                #[cfg(feature = "test_buffer")]
+                opts: O_ZEROED | O_EXTENDABLE,
+
+                #[cfg(not(feature = "test_buffer"))]
                data,
+
                scan: Scanner::new(),
                tokens: Tokens::new(),
                done: false,
@ -1105,10 +1166,7 @@ mod tests
        {
            if (!self.done) && self.tokens.is_empty()
            {
-                if let 0 = self.scan.scan_tokens(self.data, &mut self.tokens)?
-                {
-                    self.done = true
-                }
+                self.get_next_token()?;
            }

            if !self.done
@ -1120,6 +1178,52 @@ mod tests
                Ok(None)
            }
        }
+
+        #[cfg(feature = "test_buffer")]
+        fn get_next_token(&mut self) -> Result<()>
+        {
+            let count = loop
+            {
+                match self
+                    .scan
+                    .scan_tokens(self.opts, self.data.read(), &mut self.tokens)
+                {
+                    Ok(count) => break count,
+                    Err(e) if e == ScanError::Extend =>
+                    {
+                        self.data.expand(str_reader::StrReader::BUF_EXTEND);
+
+                        if !self.data.expandable()
+                        {
+                            self.opts.remove(O_EXTENDABLE)
+                        }
+
+                        continue;
+                    },
+                    Err(e) => return Err(e),
+                };
+            };
+
+            if count == 0
+            {
+                self.done = true
+            }
+
+            Ok(())
+        }
+
+        #[cfg(not(feature = "test_buffer"))]
+        fn get_next_token(&mut self) -> Result<()>
+        {
+            if let 0 = self
+                .scan
+                .scan_tokens(O_ZEROED, self.data, &mut self.tokens)?
+            {
+                self.done = true
+            }
+
+            Ok(())
+        }
    }

    impl<'de> Iterator for ScanIter<'de>
--- a/src/scanner/scalar/block.rs
+++ b/src/scanner/scalar/block.rs
@ -26,6 +26,7 @@ use crate::{
    scanner::{
        context::Context,
        error::{ScanError, ScanResult as Result},
+        flag::Flags,
        stats::MStats,
    },
    token::{ScalarStyle, Slice, Token},
@ -40,6 +41,7 @@ use crate::{
 ///     YAML 1.2: Section 8.1
 ///     yaml.org/spec/1.2/#c-b-block-header(m,t)
 pub(in crate::scanner) fn scan_block_scalar<'de>(
+    opts: Flags,
    base: &'de str,
    stats: &mut MStats,
    cxt: &Context,
@ -78,14 +80,16 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
    };

    // Eat the '|' or '>'
+    cache!(~buffer, 1, opts)?;
    advance!(buffer, :local_stats, 1);

    // Calculate any headers this scalar may have
-    let (chomp, explicit) = scan_headers(&mut buffer, &mut local_stats)?;
+    let (chomp, explicit) = scan_headers(opts, &mut buffer, &mut local_stats)?;

    // The header line must contain nothing after the headers
    // excluding a comment until the line ending
-    skip_blanks(&mut buffer, &mut local_stats, COMMENTS)?;
+    skip_blanks(opts, &mut buffer, &mut local_stats, COMMENTS)?;
+    cache!(~buffer, 1, opts)?;
    if !isWhiteSpaceZ!(~buffer)
    {
        return Err(ScanError::InvalidBlockScalar);
@ -102,6 +106,7 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
        None =>
        {
            indent = detect_indent_level(
+                opts,
                &mut buffer,
                &mut local_stats,
                cxt,
@ -201,8 +206,11 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
        }

        // Eat the line's content until the line break (or EOF)
+        cache!(~buffer, 1, opts)?;
        while !isBreakZ!(~buffer)
        {
+            cache!(~buffer, 1, opts)?;
+
            if !can_borrow
            {
                scratch.push(buffer.as_bytes()[0])
@ -218,6 +226,7 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
        }

        // Eat the line break (if not EOF)
+        cache!(~buffer, 1, opts)?;
        if isBreak!(~buffer)
        {
            advance!(buffer, :local_stats, @line);
@ -226,6 +235,7 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(

        // Chomp indentation until the next indented line
        scan_indent(
+            opts,
            &mut buffer,
            &mut local_stats,
            &mut lines,
@ -246,12 +256,18 @@ pub(in crate::scanner) fn scan_block_scalar<'de>(
 }

 /// Retrieve a block scalar's headers
-fn scan_headers(buffer: &mut &str, stats: &mut MStats) -> Result<(ChompStyle, IndentHeader)>
+fn scan_headers(
+    opts: Flags,
+    buffer: &mut &str,
+    stats: &mut MStats,
+) -> Result<(ChompStyle, IndentHeader)>
 {
    let mut skip = 0;
    let mut indent = None;
    let mut chomp = ChompStyle::Clip;

+    cache!(~buffer, 2, opts)?;
+
    // Set the explicit indent if it exists.
    //
    // Note that we silently eat an invalid indent (0) rather
@ -291,6 +307,7 @@ fn scan_headers(buffer: &mut &str, stats: &mut MStats) -> Result<(ChompStyle, In

 /// Chomp the indentation spaces of a block scalar
 fn scan_indent(
+    opts: Flags,
    buffer: &mut &str,
    stats: &mut MStats,
    lines: &mut usize,
@ -303,6 +320,8 @@ fn scan_indent(
        return Ok(false);
    }

+    cache!(~buffer, 1, opts)?;
+
    while stats.column < indent && isWhiteSpace!(~buffer)
    {
        // Indentation space, chomp
@ -321,6 +340,8 @@ fn scan_indent(
            *lines += 1;
            advance!(*buffer, :stats, @line);
        }
+
+        cache!(~buffer, 1, opts)?;
    }

    Ok(true)
@ -433,6 +454,7 @@ fn scan_chomp<'de>(
 /// Auto-detect the indentation level from the first non
 /// header line of a block scalar
 fn detect_indent_level(
+    opts: Flags,
    buffer: &mut &str,
    stats: &mut MStats,
    cxt: &Context,
@ -444,9 +466,13 @@ fn detect_indent_level(

    loop
    {
+        cache!(~buffer, 1, opts)?;
+
        // Chomp indentation spaces, erroring on a tab
        while isBlank!(~buffer)
        {
+            cache!(~buffer, 1, opts)?;
+
            if check!(~buffer => b'\t')
            {
                return Err(ScanError::InvalidTab);
@ -467,6 +493,7 @@ fn detect_indent_level(
        }

        // If its not a line break we're done, exit the loop
+        cache!(~buffer, 1, opts)?;
        if !isBreak!(~buffer)
        {
            break;
@ -489,10 +516,13 @@ fn detect_indent_level(

 /// Skip any blanks (and .comments) until we reach a line
 /// ending or non blank character
-fn skip_blanks(buffer: &mut &str, stats: &mut MStats, comments: bool) -> Result<()>
+fn skip_blanks(opts: Flags, buffer: &mut &str, stats: &mut MStats, comments: bool) -> Result<()>
 {
+    cache!(~buffer, 1, opts)?;
+
    while isBlank!(~buffer)
    {
+        cache!(~buffer, 1, opts)?;
        advance!(*buffer, :stats, 1);
    }

@ -500,6 +530,7 @@ fn skip_blanks(buffer: &mut &str, stats: &mut MStats, comments: bool) -> Result<
    {
        while !isBreakZ!(~buffer)
        {
+            cache!(~buffer, 1, opts)?;
            advance!(*buffer, :stats, 1);
        }
    }
@ -577,6 +608,7 @@ mod tests
    use ScalarStyle::{Folded, Literal};

    use super::*;
+    use crate::scanner::flag::O_ZEROED;

    type TestResult = anyhow::Result<()>;

@ -615,7 +647,7 @@ mod tests
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("this is a simple block scalar"), Literal);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;

        assert_eq!(token, expected);

@ -630,7 +662,7 @@ mod tests
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("trailing lines...\n"), Literal);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;

        assert_eq!(token, expected);

@ -645,7 +677,7 @@ mod tests
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("trailing lines..."), Literal);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;

        assert_eq!(token, expected);

@ -660,7 +692,7 @@ mod tests
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("trailing lines...\n\n\n"), Literal);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;

        assert_eq!(token, expected);

@ -679,7 +711,7 @@ mod tests
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("some folded\nlines\nhere\n"), Literal);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;

        assert_eq!(token, expected);

@ -700,7 +732,7 @@ mod tests
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("\n\nsome folded\nlines\nhere"), Literal);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;

        assert_eq!(token, expected);

@ -721,7 +753,7 @@ mod tests
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("some folded\nlines\nhere\n\n\n"), Literal);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;

        assert_eq!(token, expected);

@ -742,7 +774,7 @@ some.other.key: value";
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("some folded\nlines\nhere\n\n\n"), Literal);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;

        assert_eq!(token, expected);

@ -763,7 +795,7 @@ some.other.key: value";
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("this\n\nhas\n\nbreaks"), Literal);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;

        assert_eq!(token, expected);

@ -778,7 +810,7 @@ some.other.key: value";
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("simple block scalar"), Literal);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, LITERAL)?;

        assert_eq!(token, expected);

@ -795,7 +827,7 @@ some.other.key: value";
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("this is a simple block scalar"), Folded);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;

        assert_eq!(token, expected);

@ -810,7 +842,7 @@ some.other.key: value";
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("trailing lines...\n"), Folded);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;

        assert_eq!(token, expected);

@ -825,7 +857,7 @@ some.other.key: value";
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("trailing lines..."), Folded);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;

        assert_eq!(token, expected);

@ -840,7 +872,7 @@ some.other.key: value";
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("trailing lines...\n\n\n"), Folded);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;

        assert_eq!(token, expected);

@ -859,7 +891,7 @@ some.other.key: value";
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("some folded lines here\n"), Folded);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;

        assert_eq!(token, expected);

@ -880,7 +912,7 @@ some.other.key: value";
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("\n\nsome folded lines here"), Folded);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;

        assert_eq!(token, expected);

@ -901,7 +933,7 @@ some.other.key: value";
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("some folded lines here\n\n\n"), Folded);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;

        assert_eq!(token, expected);

@ -922,7 +954,7 @@ some.other.key: value";
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("some folded lines here\n\n\n"), Folded);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;

        assert_eq!(token, expected);

@ -943,7 +975,7 @@ some.other.key: value";
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("this\nhas\nbreaks"), Folded);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;

        assert_eq!(token, expected);

@ -958,7 +990,7 @@ some.other.key: value";
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("simple block scalar"), Folded);

-        let (token, _amt) = scan_block_scalar(data, &mut stats, &cxt, !LITERAL)?;
+        let (token, _amt) = scan_block_scalar(O_ZEROED, data, &mut stats, &cxt, !LITERAL)?;

        assert_eq!(token, expected);

--- a/src/scanner/scalar/escape.rs
+++ b/src/scanner/scalar/escape.rs
@ -1,7 +1,10 @@
 //! This module exports function(s) for handling scalar
 //! escapes in YAML documents.

-use crate::scanner::error::{ScanError, ScanResult as Result};
+use crate::scanner::{
+    error::{ScanError, ScanResult as Result},
+    flag::Flags,
+};

 /// Unescape a given YAML escape sequence as defined in
 /// [Section 5.7][Link]. Specifically, YAML defines 18
@ -15,12 +18,17 @@ use crate::scanner::error::{ScanError, ScanResult as Result};
 /// escape sequence.
 ///
 /// [Link]: https://yaml.org/spec/1.2/spec.html#c-escape
-pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Result<usize>
+pub(in crate::scanner) fn flow_unescape(
+    opts: Flags,
+    base: &str,
+    scratch: &mut Vec<u8>,
+) -> Result<usize>
 {
    let mut buffer = base;
    let mut escape_len: Option<u8> = None;

    // Not an escape sequence, early exit
+    cache!(~buffer, 1, opts)?;
    if !check!(~buffer => b'\\')
    {
        return Ok(0);
@ -30,6 +38,7 @@ pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Re

    // See 5.7: Escaped Characters
    // yaml.org/spec/1.2/spec.html#id2776092
+    cache!(~buffer, 1, opts)?;
    match buffer.as_bytes()
    {
        [b'0', ..] => scratch.push(b'\0'),
@ -60,6 +69,9 @@ pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Re

    if let Some(sequence) = escape_len
    {
+        // Note that we cache the _entire_ escape sequence before
+        // calling write_unicode_point
+        cache!(~buffer, sequence, opts)?;
        let amt = write_unicode_point(buffer, scratch, sequence)?;
        advance!(buffer, amt);
    }
@ -73,6 +85,7 @@ pub(in crate::scanner) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Re
 ///
 /// [Link]: https://yaml.org/spec/1.2/spec.html#ns-uri-char
 pub(in crate::scanner) fn tag_uri_unescape(
+    opts: Flags,
    base: &str,
    scratch: &mut Vec<u8>,
    _directive: bool,
@ -82,6 +95,8 @@ pub(in crate::scanner) fn tag_uri_unescape(
    let mut codepoint_len: i8 = 0;

    while {
+        cache!(~buffer, 3, opts)?;
+
        if buffer.len() < 3
        {
            return Err(ScanError::UnexpectedEOF);
@ -224,6 +239,7 @@ mod tests
    use pretty_assertions::assert_eq;

    use super::*;
+    use crate::scanner::flag::O_ZEROED;

    type TestResult = anyhow::Result<()>;

@ -268,7 +284,7 @@ mod tests
        for (i, (&t, &ex)) in data.into_iter().zip(expected).enumerate()
        {
            scratch.clear();
-            flow_unescape(t, scratch)
+            flow_unescape(O_ZEROED, t, scratch)
                .map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;

            assert_eq!(scratch, ex, "on iteration {}", i)
@ -329,7 +345,7 @@ mod tests
            let mut c: [u8; 4] = [0; 4];
            scratch.clear();

-            flow_unescape(t, scratch)
+            flow_unescape(O_ZEROED, t, scratch)
                .map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;

            assert_eq!(
@ -370,7 +386,7 @@ mod tests
        {
            scratch.clear();

-            let consumed = flow_unescape(t, scratch)
+            let consumed = flow_unescape(O_ZEROED, t, scratch)
                .map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;

            assert_eq!(
@ -417,7 +433,7 @@ mod tests
        {
            scratch.clear();

-            let consumed = tag_uri_unescape(t, scratch, true)
+            let consumed = tag_uri_unescape(O_ZEROED, t, scratch, true)
                .map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;

            assert_eq!(
@ -446,7 +462,7 @@ mod tests
        let scratch = &mut Vec::new();
        let expected = ScanError::UnexpectedEOF;

-        match tag_uri_unescape(data, scratch, true)
+        match tag_uri_unescape(O_ZEROED, data, scratch, true)
        {
            Err(e) if e == expected => Ok(()),

@ -466,7 +482,7 @@ mod tests
        let scratch = &mut Vec::new();
        let expected = ScanError::UnknownEscape;

-        match tag_uri_unescape(data, scratch, true)
+        match tag_uri_unescape(O_ZEROED, data, scratch, true)
        {
            Err(e) if e == expected => Ok(()),

--- a/src/scanner/scalar/flow.rs
+++ b/src/scanner/scalar/flow.rs
@ -3,6 +3,7 @@ use std::ops::Range;
 use crate::{
    scanner::{
        error::{ScanError, ScanResult as Result},
+        flag::Flags,
        scalar::escape::flow_unescape,
        stats::MStats,
    },
@ -15,6 +16,7 @@ use crate::{
 /// the underlying .base, however it may be required to copy
 /// into .scratch and borrow from that lifetime.
 pub(in crate::scanner) fn scan_flow_scalar(
+    opts: Flags,
    base: &str,
    stats: &mut MStats,
    single: bool,
@ -35,6 +37,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
    };

    // Eat left quote
+    cache!(~buffer, 1, opts)?;
    advance!(buffer, :stats, 1);

    'scalar: loop
@ -44,6 +47,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
        // Even in a scalar context, YAML prohibits starting a line
        // with document stream tokens followed by a blank
        // character
+        cache!(~buffer, 4, opts)?;
        if isDocumentIndicator!(~buffer, :stats)
        {
            return Err(ScanError::InvalidFlowScalar);
@ -55,9 +59,14 @@ pub(in crate::scanner) fn scan_flow_scalar(
            return Err(ScanError::UnexpectedEOF);
        }

+        cache!(~buffer, 1, opts)?;
+
        // Consume non whitespace characters
        while !isWhiteSpaceZ!(~buffer)
        {
+            // Longest sequence we can hit is 2 characters ('')
+            cache!(~buffer, 2, opts)?;
+
            // if we encounter an escaped quote we can no longer borrow
            // from .base, we must unescape the quote into .scratch
            if kind == SingleQuote && check!(~buffer => [SINGLE, SINGLE, ..])
@ -88,7 +97,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
            {
                set_no_borrow(&mut can_borrow, base, buffer, &mut scratch);

-                let read = flow_unescape(buffer, &mut scratch)?;
+                let read = flow_unescape(opts, buffer, &mut scratch)?;
                advance!(buffer, :stats, read);
            }
            // Its a non blank character, add it
@ -126,6 +135,8 @@ pub(in crate::scanner) fn scan_flow_scalar(
        // Consume whitespace
        loop
        {
+            cache!(~buffer, 1, opts)?;
+
            match (isBlank!(~buffer), isBreak!(~buffer))
            {
                // No more whitespace, exit loop
@ -206,6 +217,7 @@ pub(in crate::scanner) fn scan_flow_scalar(
    };

    // Eat the right quote
+    cache!(~buffer, 1, opts)?;
    advance!(buffer, :stats, 1);

    let advance = base.len() - buffer.len();
@ -296,6 +308,7 @@ mod tests
    use pretty_assertions::assert_eq;

    use super::*;
+    use crate::scanner::flag::O_ZEROED;

    type TestResult = anyhow::Result<()>;

@ -308,7 +321,7 @@ mod tests
        let stats = &mut MStats::new();
        let expected = Token::Scalar(cow!(""), ScalarStyle::SingleQuote);

-        let (range, read) = scan_flow_scalar(data, stats, true)?;
+        let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
        let scalar = range.into_token(data)?;

        assert_eq!(read, 2);
@ -328,7 +341,7 @@ mod tests
        let stats = &mut MStats::new();
        let expected = Token::Scalar(cow!("hello world"), ScalarStyle::SingleQuote);

-        let (range, read) = scan_flow_scalar(data, stats, true)?;
+        let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
        let scalar = range.into_token(data)?;

        assert_eq!(read, 13);
@ -352,7 +365,7 @@ fourth'"#;
        let cmp = "first second third fourth";
        let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);

-        let (range, _read) = scan_flow_scalar(data, stats, true)?;
+        let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
        let scalar = range.into_token(data)?;

        if !(scalar == expected)
@ -372,7 +385,7 @@ fourth'"#;
        let cmp = "first second";
        let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);

-        let (range, _read) = scan_flow_scalar(data, stats, true)?;
+        let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
        let scalar = range.into_token(data)?;

        if !(scalar == expected)
@ -395,7 +408,7 @@ fourth'"#;
        let cmp = "first second third\nfourth";
        let expected = Token::Scalar(cow!(cmp), ScalarStyle::SingleQuote);

-        let (range, _read) = scan_flow_scalar(data, stats, true)?;
+        let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, true)?;
        let scalar = range.into_token(data)?;

        if !(scalar == expected)
@ -417,7 +430,7 @@ fourth'"#;
        {
            stats = MStats::new();

-            match scan_flow_scalar(t, &mut stats, true)
+            match scan_flow_scalar(O_ZEROED, t, &mut stats, true)
            {
                Err(e) => assert_eq!(
                    e, expected,
@ -443,7 +456,7 @@ fourth'"#;
        {
            stats = MStats::new();

-            match scan_flow_scalar(t, &mut stats, true)
+            match scan_flow_scalar(O_ZEROED, t, &mut stats, true)
            {
                Err(e) => assert_eq!(
                    e, expected,
@ -467,7 +480,7 @@ fourth'"#;
        let stats = &mut MStats::new();
        let expected = Token::Scalar(cow!(""), ScalarStyle::DoubleQuote);

-        let (range, read) = scan_flow_scalar(data, stats, false)?;
+        let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
        let scalar = range.into_token(data)?;

        assert_eq!(read, 2);
@ -487,7 +500,7 @@ fourth'"#;
        let stats = &mut MStats::new();
        let expected = Token::Scalar(cow!("hello world"), ScalarStyle::DoubleQuote);

-        let (range, read) = scan_flow_scalar(data, stats, false)?;
+        let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
        let scalar = range.into_token(data)?;

        assert_eq!(read, 13);
@ -507,7 +520,7 @@ fourth'"#;
        let stats = &mut MStats::new();
        let expected = Token::Scalar(cow!("hello α Ω ッ"), ScalarStyle::DoubleQuote);

-        let (range, read) = scan_flow_scalar(data, stats, false)?;
+        let (range, read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
        let scalar = range.into_token(data)?;

        if !(scalar == expected)
@ -537,7 +550,7 @@ fourth""#;
        let cmp = "first second third fourth";
        let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);

-        let (range, _read) = scan_flow_scalar(data, stats, false)?;
+        let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
        let scalar = range.into_token(data)?;

        if !(scalar == expected)
@ -560,7 +573,7 @@ fourth""#;
        let cmp = "first second third\nfourth";
        let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);

-        let (range, _read) = scan_flow_scalar(data, stats, false)?;
+        let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
        let scalar = range.into_token(data)?;

        if !(scalar == expected)
@ -580,7 +593,7 @@ fourth""#;
        let cmp = "first second";
        let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);

-        let (range, _read) = scan_flow_scalar(data, stats, false)?;
+        let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
        let scalar = range.into_token(data)?;

        if !(scalar == expected)
@ -604,7 +617,7 @@ rst  \
        let cmp = "first  second third\nfourth";
        let expected = Token::Scalar(cow!(cmp), ScalarStyle::DoubleQuote);

-        let (range, _read) = scan_flow_scalar(data, stats, false)?;
+        let (range, _read) = scan_flow_scalar(O_ZEROED, data, stats, false)?;
        let scalar = range.into_token(data)?;

        if !(scalar == expected)
--- a/src/scanner/scalar/plain.rs
+++ b/src/scanner/scalar/plain.rs
@ -2,6 +2,7 @@ use crate::{
    scanner::{
        context::Context,
        error::{ScanError, ScanResult as Result},
+        flag::Flags,
        stats::MStats,
    },
    token::{ScalarStyle, Token},
@ -17,6 +18,7 @@ use crate::{
 ///     YAML 1.2: Section 7.3.3
 ///     yaml.org/spec/1.2/spec.html#ns-plain-first(c)
 pub(in crate::scanner) fn scan_plain_scalar<'de>(
+    opts: Flags,
    base: &'de str,
    stats: &mut MStats,
    cxt: &Context,
@ -53,6 +55,7 @@ pub(in crate::scanner) fn scan_plain_scalar<'de>(

    // Inside flow contexts you *may not* start a plain scalar
    // with a ':', '?', or '-' followed by a flow indicator
+    cache!(~buffer, 2, opts)?;
    if flow_context && check!(~buffer => b':' | b'?' | b'-') && flow_indicator(buffer, 1)
    {
        return Err(ScanError::InvalidPlainScalar);
@ -60,6 +63,10 @@ pub(in crate::scanner) fn scan_plain_scalar<'de>(

    'scalar: loop
    {
+        // 4 is the largest character sequence we can encounter
+        // (document indicators)
+        cache!(~buffer, 4, opts)?;
+
        if buffer.is_empty()
        {
            break 'scalar;
@ -110,6 +117,8 @@ pub(in crate::scanner) fn scan_plain_scalar<'de>(
        // Handle non whitespace characters
        while !isWhiteSpaceZ!(~buffer)
        {
+            cache!(~buffer, 2, opts)?;
+
            if (check!(~buffer => b':') && isWhiteSpaceZ!(~buffer, 1))
                || flow_context && flow_indicator(buffer, 0)
            {
@ -128,6 +137,8 @@ pub(in crate::scanner) fn scan_plain_scalar<'de>(
        // Handle whitespace characters
        loop
        {
+            cache!(~buffer, 1, opts)?;
+
            match (isBlank!(~buffer), isBreak!(~buffer))
            {
                // No more whitespace, exit loop
@ -242,6 +253,7 @@ mod tests
    use ScalarStyle::Plain;

    use super::*;
+    use crate::scanner::flag::O_ZEROED;

    type TestResult = anyhow::Result<()>;

@ -280,7 +292,7 @@ mod tests

        for (i, &data) in tests.iter().enumerate()
        {
-            let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)
+            let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)
                .map_err(|e| anyhow!("iteration {}: {}", i, e))?;

            assert_eq!(token, expected, "on iteration {}", i);
@ -301,7 +313,7 @@ mod tests

        for (i, &data) in tests.iter().enumerate()
        {
-            let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)
+            let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)
                .map_err(|e| anyhow!("iteration {}: {}", i, e))?;

            assert_eq!(token, expected, "on iteration {}", i);
@ -320,7 +332,7 @@ mod tests
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!(""), Plain);

-        let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
+        let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;

        assert_eq!(token, expected);

@ -339,7 +351,7 @@ mod tests
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("hello"), Plain);

-        let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
+        let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;

        assert_eq!(token, expected);

@ -356,7 +368,7 @@ mod tests
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("hello, world!"), Plain);

-        let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
+        let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;

        assert_eq!(token, expected);

@ -379,7 +391,7 @@ mod tests
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("hello this is a multi-line scalar"), Plain);

-        let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
+        let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;

        assert_eq!(token, expected);

@ -406,7 +418,7 @@ mod tests
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("this is\n\na scalar\nwith line#breaks"), Plain);

-        let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
+        let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;

        assert_eq!(token, expected);

@ -423,7 +435,7 @@ mod tests
        let cxt = cxt!(block -> [0]);
        let expected = Token::Scalar(cow!("hello"), Plain);

-        let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
+        let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;

        assert_eq!(token, expected);

@ -442,7 +454,7 @@ mod tests
        let cxt = cxt!(flow -> 1);
        let expected = Token::Scalar(cow!("hello"), Plain);

-        let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
+        let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;

        assert_eq!(token, expected);

@ -461,7 +473,7 @@ mod tests

        for (i, &data) in tests.iter().enumerate()
        {
-            let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)
+            let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)
                .map_err(|e| anyhow!("iteration {}: {}", i, e))?;

            assert_eq!(token, expected, "on iteration {}", i);
@ -485,7 +497,7 @@ string!";
        let cxt = cxt!(flow -> 1);
        let expected = Token::Scalar(cow!("hello this is a multi-line string!"), Plain);

-        let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
+        let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;

        assert_eq!(token, expected);

@ -514,7 +526,7 @@ breaks
        let cxt = cxt!(flow -> 1);
        let expected = Token::Scalar(cow!("hello this\nbig\nstring\nhas\nline\nbreaks"), Plain);

-        let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
+        let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;

        assert_eq!(token, expected);

@ -531,7 +543,7 @@ breaks
        let cxt = cxt!(flow -> 1);
        let expected = Token::Scalar(cow!("hello"), Plain);

-        let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
+        let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;

        assert_eq!(token, expected);

@ -548,7 +560,7 @@ breaks
        let cxt = cxt!(flow -> 1);
        let expected = Token::Scalar(cow!("hello"), Plain);

-        let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
+        let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;

        assert_eq!(token, expected);

@ -569,7 +581,7 @@ breaks
        let cxt = cxt!(flow -> 1);
        let expected = Token::Scalar(cow!("hello"), Plain);

-        let (token, amt) = scan_plain_scalar(data, &mut stats, &cxt)?;
+        let (token, amt) = scan_plain_scalar(O_ZEROED, data, &mut stats, &cxt)?;

        assert_eq!(token, expected);

--- a/src/scanner/tag.rs
+++ b/src/scanner/tag.rs
@ -66,6 +66,7 @@ use crate::{
    scanner::{
        eat_whitespace,
        error::{ScanError, ScanResult as Result},
+        flag::Flags,
        scalar::escape::tag_uri_unescape,
        stats::MStats,
    },
@ -79,6 +80,7 @@ use crate::{
 /// possible, but may also copy the directive's handle and
 /// prefix into .scratch if borrowing is not possible.
 pub(in crate::scanner) fn scan_tag_directive<'de>(
+    opts: Flags,
    base: &'de str,
    stats: &mut MStats,
 ) -> Result<(Token<'de>, usize)>
@ -88,7 +90,7 @@ pub(in crate::scanner) fn scan_tag_directive<'de>(

    // %TAG !named! :tag:prefix # a comment\n
    //      ^^^^^^^
-    let (handle, amt) = match scan_tag_handle(buffer, stats)?
+    let (handle, amt) = match scan_tag_handle(opts, buffer, stats)?
    {
        Some((handle, amt)) => (handle.into_inner(), amt),
        None => return Err(ScanError::InvalidTagHandle),
@ -99,14 +101,15 @@ pub(in crate::scanner) fn scan_tag_directive<'de>(
    //             ^
    // Check that there is >= 1 whitespace between handle and
    // prefix
+    cache!(~buffer, 1, opts)?;
    isBlank!(~buffer, else ScanError::InvalidTagPrefix)?;

    // Chomp whitespace to prefix
-    advance!(buffer, eat_whitespace(buffer, stats, false));
+    advance!(buffer, eat_whitespace(opts, buffer, stats, false)?);

    // %TAG !named! :tag:prefix # a comment\n
    //              ^^^^^^^^^^^
-    let (prefix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?;
+    let (prefix, amt) = scan_tag_uri(opts, buffer, stats, &mut can_borrow, false)?;

    // %TAG !named! tag-prefix # a comment\n
    //                        ^
@ -146,6 +149,7 @@ pub(in crate::scanner) fn scan_tag_directive<'de>(
 /// ("!", "") => A non resolving tag
 /// (handle, suffix) => A primary, secondary or named tag
 pub(in crate::scanner) fn scan_node_tag<'de>(
+    opts: Flags,
    base: &'de str,
    stats: &mut MStats,
 ) -> Result<(Token<'de>, usize)>
@ -163,6 +167,8 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
     * a zero length sub-slice out.
     */

+    cache!(~buffer, 2, opts)?;
+
    // !<global:verbatim:tag:> "node"
    // ^^
    // If its a verbatim tag scan it
@ -172,10 +178,11 @@ pub(in crate::scanner) fn scan_node_tag<'de>(

        // !<global:verbatim:tag:> "node"
        //   ^^^^^^^^^^^^^^^^^^^^
-        let (verbatim, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, true)?;
+        let (verbatim, amt) = scan_tag_uri(opts, buffer, stats, &mut can_borrow, true)?;

        // !<global:verbatim:tag:> "node"
        //                       ^
+        cache!(~buffer, @amt + 1, 1, opts)?;
        check!(~buffer, amt + 1 => b'>', else ScanError::InvalidTagSuffix)?;

        let token = assemble_tag(&buffer[0..0], verbatim, can_borrow);
@ -185,7 +192,7 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
    // Otherwise scan it as a normal tag
    else
    {
-        match scan_tag_handle(buffer, stats)?
+        match scan_tag_handle(opts, buffer, stats)?
        {
            // ! "node"
            // ^
@ -200,7 +207,7 @@ pub(in crate::scanner) fn scan_node_tag<'de>(

                // !!global "node" OR !named!global "node"
                //   ^^^^^^                  ^^^^^^
-                let (suffix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?;
+                let (suffix, amt) = scan_tag_uri(opts, buffer, stats, &mut can_borrow, false)?;

                let token = assemble_tag(h, suffix, can_borrow);

@ -210,6 +217,8 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
            // Handle scan couldn't find a closing !, meaning this is a local tag
            None =>
            {
+                cache!(~buffer, 1, opts)?;
+
                // !local "node"
                // ^
                let handle = &buffer[..1];
@ -217,7 +226,7 @@ pub(in crate::scanner) fn scan_node_tag<'de>(

                // !local "node"
                //  ^^^^^
-                let (suffix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?;
+                let (suffix, amt) = scan_tag_uri(opts, buffer, stats, &mut can_borrow, false)?;

                let token = assemble_tag(handle, suffix, can_borrow);

@ -240,6 +249,7 @@ pub(in crate::scanner) fn scan_node_tag<'de>(
 ///
 /// [Link]: https://yaml.org/spec/1.2/spec.html#ns-global-tag-prefix
 pub(in crate::scanner) fn scan_tag_uri<'de>(
+    opts: Flags,
    base: &'de str,
    stats: &mut MStats,
    can_borrow: &mut bool,
@ -251,6 +261,8 @@ pub(in crate::scanner) fn scan_tag_uri<'de>(

    loop
    {
+        cache!(~buffer, 1, opts)?;
+
        match buffer.as_bytes()
        {
            // If its a normal allowed character, add it
@ -293,7 +305,7 @@ pub(in crate::scanner) fn scan_tag_uri<'de>(

                    *can_borrow = false;
                }
-                let amt = tag_uri_unescape(buffer, &mut scratch, true)?;
+                let amt = tag_uri_unescape(opts, buffer, &mut scratch, true)?;
                advance!(buffer, :stats, amt);
            },
            // EOF before loop end is an error
@ -320,6 +332,7 @@ pub(in crate::scanner) fn scan_tag_uri<'de>(
 /// Scans a tag handle from .base, attempting to return the
 /// fragment if the handle is unambiguous.
 pub(in crate::scanner) fn scan_tag_handle<'b>(
+    opts: Flags,
    base: &'b str,
    stats: &mut MStats,
 ) -> Result<Option<(TagHandle<'b>, usize)>>
@ -331,6 +344,7 @@ pub(in crate::scanner) fn scan_tag_handle<'b>(
    // !!tag
    // ^
    // Check that we are indeed starting a handle
+    cache!(~buffer, 1, opts)?;
    check!(~buffer => b'!', else ScanError::InvalidTagHandle)?;

    // %TAG !handle! tag-prefix # a comment \n
@ -338,9 +352,10 @@ pub(in crate::scanner) fn scan_tag_handle<'b>(
    // !handle!tag
    //  ^^^^^^
    // Safety: we just proved above we have >= 1 byte ('!')
-    let name = take_while(buffer[1..].as_bytes(), u8::is_ascii_alphanumeric);
+    let name = take_while(opts, buffer[1..].as_bytes(), u8::is_ascii_alphanumeric)?;
    let mut offset = 1 + name.len();

+    cache!(~buffer, @offset, 1, opts)?;
    match buffer.as_bytes().get(offset)
    {
        // If we find a closing '!', then it must either be a secondary or named handle
@ -391,7 +406,7 @@ impl<'a> TagHandle<'a>
    }
 }

-fn take_while<F>(b: &[u8], f: F) -> &[u8]
+fn take_while<F>(opts: Flags, base: &[u8], f: F) -> Result<&[u8]>
 where
    F: Fn(&u8) -> bool,
 {
@ -399,10 +414,12 @@ where

    loop
    {
-        match b.get(index)
+        let i = cache!(base, @index, 1, opts)?;
+
+        match base.get(index)
        {
-            Some(b) if f(b) => index += 1,
-            _ => return &b[..index],
+            Some(b) if f(b) => index += i,
+            _ => return Ok(&base[..index]),
        }
    }
 }
--- a/src/scanner/tests/str_reader.rs
+++ b/src/scanner/tests/str_reader.rs
@ -0,0 +1,65 @@
+use cfg_if::cfg_if;
+
+#[derive(Debug, Clone)]
+pub(super) struct StrReader<'de>
+{
+    s:    &'de str,
+    size: usize,
+}
+
+impl<'de> StrReader<'de>
+{
+    cfg_if! {
+        if #[cfg(feature = "test_buffer_large")]
+        {
+            pub const BUF_SIZE: usize = 4 * 1024;
+            pub const BUF_EXTEND: usize = 64;
+        }
+        else if #[cfg(feature = "test_buffer_medium")]
+        {
+            pub const BUF_SIZE: usize = 8;
+            pub const BUF_EXTEND: usize = 8;
+        }
+        else if #[cfg(feature = "test_buffer_small")]
+        {
+            pub const BUF_SIZE: usize = 1;
+            pub const BUF_EXTEND: usize = 1;
+        }
+    }
+
+    pub fn new(s: &'de str, size: usize) -> Self
+    {
+        let size = std::cmp::min(s.len(), size);
+
+        Self { s, size }
+    }
+
+    pub fn read(&self) -> &'de str
+    {
+        &self.s[..self.size]
+    }
+
+    pub fn expand(&mut self, size: usize)
+    {
+        let new = self.size + size;
+
+        match self.s.len() > new
+        {
+            true => self.size = new,
+            false => self.size = self.s.len(),
+        }
+    }
+
+    pub fn expandable(&self) -> bool
+    {
+        self.size < self.s.len()
+    }
+}
+
+impl std::fmt::Display for StrReader<'_>
+{
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result
+    {
+        self.s.fmt(f)
+    }
+}
--- a/src/scanner/tests/whitespace.rs
+++ b/src/scanner/tests/whitespace.rs
@ -27,7 +27,7 @@ fn eat()
    let mut buffer = data;
    let mut s = Scanner::new();

-    s.eat_whitespace(&mut buffer, false);
+    s.eat_whitespace(O_ZEROED, &mut buffer, false).unwrap();

    assert_eq!(buffer, "abc");
    assert_eq!(s.stats, (3, 0, 3))
@ -40,7 +40,7 @@ fn eat_none()
    let mut buffer = data;
    let mut s = Scanner::new();

-    s.eat_whitespace(&mut buffer, false);
+    s.eat_whitespace(O_ZEROED, &mut buffer, false).unwrap();

    assert_eq!(buffer, "abc");
    assert_eq!(s.stats, (0, 0, 0))