From 8986f36f003480cdd43de2e4e18b2617dc5a0ea0 Mon Sep 17 00:00:00 2001 From: Bazaah Date: Sat, 24 Jul 2021 15:16:31 +0000 Subject: [PATCH] scanner/tag: remove ref, return owned Slice variants This is a part of an API changes I"ll be making, which will allow allocations in the scanner code. This change is being made for a few reasons. 1. Allows me to make the Scanner API nicer, as callers will only need to pass in the underlying data being scanned, and will not be tied to a mutable lifetime which limits them to scanning tokens one at a time. 2. Makes the code simpler, as I no longer need to ensure the mutable 'owned' lifetime is honored throughout the call stack. 3. I'll need to allocate anyway for the indentation stack, and thus not allocating in other places that are sensible is less important. --- src/scanner/tag.rs | 74 ++++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 49 deletions(-) diff --git a/src/scanner/tag.rs b/src/scanner/tag.rs index 3f88ac0..9134551 100644 --- a/src/scanner/tag.rs +++ b/src/scanner/tag.rs @@ -62,8 +62,6 @@ //! same character ('!') to mean three different things //! depending on the context. What a massive headache. -use std::ops::Range; - use crate::{ scanner::{ eat_whitespace, @@ -71,7 +69,7 @@ use crate::{ scalar::escape::tag_uri_unescape, MStats, }, - token::{Ref, Token}, + token::{Slice, Token}, }; /// Scan a tag directive from .base returning a tag @@ -83,8 +81,7 @@ use crate::{ pub(in crate::scanner) fn scan_tag_directive<'b, 'c>( base: &'b str, stats: &mut MStats, - scratch: &'c mut Vec, -) -> Result<(Ref<'b, 'c>, usize)> +) -> Result<(Token<'b>, usize)> { let mut buffer = base; let mut can_borrow = true; @@ -109,7 +106,7 @@ pub(in crate::scanner) fn scan_tag_directive<'b, 'c>( // %TAG !named! :tag:prefix # a comment\n // ^^^^^^^^^^^ - let (prefix, amt) = scan_tag_uri(buffer, stats, scratch, &mut can_borrow, false)?; + let (prefix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?; // %TAG !named! tag-prefix # a comment\n // ^ @@ -120,20 +117,14 @@ pub(in crate::scanner) fn scan_tag_directive<'b, 'c>( // .buffer let token = if can_borrow { - Token::TagDirective(cow!(handle), cow!(&buffer[prefix])).borrowed() + Token::TagDirective(cow!(handle), prefix) } // Otherwise, we'll need to copy both the handle and prefix, to unify our // lifetimes. Note that this isn't strictly necessary, but requiring Token to // contain two unrelated lifetimes is just asking for pain and suffering. else { - let start = scratch.len(); - scratch.extend_from_slice(handle.as_bytes()); - - let handle = std::str::from_utf8(&scratch[start..]).unwrap(); - let prefix = std::str::from_utf8(&scratch[prefix]).unwrap(); - - Token::TagDirective(cow!(handle), cow!(prefix)).copied() + Token::TagDirective(cow!(handle), prefix).into_owned() }; advance!(buffer, amt); @@ -157,8 +148,7 @@ pub(in crate::scanner) fn scan_tag_directive<'b, 'c>( pub(in crate::scanner) fn scan_node_tag<'b, 'c>( base: &'b str, stats: &mut MStats, - scratch: &'c mut Vec, -) -> Result<(Ref<'b, 'c>, usize)> +) -> Result<(Token<'b>, usize)> { let mut buffer = base; let mut can_borrow = true; @@ -182,13 +172,13 @@ pub(in crate::scanner) fn scan_node_tag<'b, 'c>( // ! "node" // ^^^^^^^^^^^^^^^^^^^^ - let (verbatim, amt) = scan_tag_uri(buffer, stats, scratch, &mut can_borrow, true)?; + let (verbatim, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, true)?; // ! "node" // ^ check!(~buffer, amt + 1 => b'>', else ScanError::InvalidTagSuffix)?; - let token = assemble_tag(buffer, scratch, &buffer[0..0], verbatim, can_borrow); + let token = assemble_tag(&buffer[0..0], verbatim, can_borrow); (token, amt + 1) } @@ -200,10 +190,7 @@ pub(in crate::scanner) fn scan_node_tag<'b, 'c>( // ! "node" // ^ // Single ! without a suffix disables tag resolution - Some((TagHandle::Primary(h), amt)) => - { - (Token::Tag(cow!(h), cow!(&buffer[0..0])).borrowed(), amt) - }, + Some((TagHandle::Primary(h), amt)) => (Token::Tag(cow!(h), cow!(&buffer[0..0])), amt), // !!global "node" OR !named!global "node" // ^^ ^^^^^^^ // Got a secondary or named tag, scan the suffix now @@ -213,9 +200,9 @@ pub(in crate::scanner) fn scan_node_tag<'b, 'c>( // !!global "node" OR !named!global "node" // ^^^^^^ ^^^^^^ - let (suffix, amt) = scan_tag_uri(buffer, stats, scratch, &mut can_borrow, false)?; + let (suffix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?; - let token = assemble_tag(buffer, scratch, h, suffix, can_borrow); + let token = assemble_tag(h, suffix, can_borrow); (token, amt) }, @@ -230,9 +217,9 @@ pub(in crate::scanner) fn scan_node_tag<'b, 'c>( // !local "node" // ^^^^^ - let (suffix, amt) = scan_tag_uri(buffer, stats, scratch, &mut can_borrow, false)?; + let (suffix, amt) = scan_tag_uri(buffer, stats, &mut can_borrow, false)?; - let token = assemble_tag(buffer, scratch, handle, suffix, can_borrow); + let token = assemble_tag(handle, suffix, can_borrow); (token, amt) }, @@ -252,16 +239,15 @@ pub(in crate::scanner) fn scan_node_tag<'b, 'c>( /// into .base or .scratch. /// /// [Link]: https://yaml.org/spec/1.2/spec.html#ns-global-tag-prefix -pub(in crate::scanner) fn scan_tag_uri( - base: &str, +pub(in crate::scanner) fn scan_tag_uri<'de>( + base: &'de str, stats: &mut MStats, - scratch: &mut Vec, can_borrow: &mut bool, verbatim: bool, -) -> Result<(Range, usize)> +) -> Result<(Slice<'de>, usize)> { let mut buffer = base; - let start = scratch.len(); + let mut scratch = Vec::new(); loop { @@ -307,7 +293,7 @@ pub(in crate::scanner) fn scan_tag_uri( *can_borrow = false; } - let amt = tag_uri_unescape(buffer, scratch, true)?; + let amt = tag_uri_unescape(buffer, &mut scratch, true)?; advance!(buffer, :stats, amt); }, // EOF before loop end is an error @@ -321,11 +307,13 @@ pub(in crate::scanner) fn scan_tag_uri( if *can_borrow { - Ok((0..advance, advance)) + Ok((cow!(&base[0..advance]), advance)) } else { - Ok((start..scratch.len(), advance)) + let utf8 = String::from_utf8(scratch).unwrap(); + + Ok((cow!(utf8), advance)) } } @@ -421,26 +409,14 @@ where /// Helper function for constructing /// [Ref][Ref]<[Token::Tag][Token]>s -fn assemble_tag<'b, 'c>( - buffer: &'b str, - scratch: &'c mut Vec, - handle: &'b str, - suffix: Range, - can_borrow: bool, -) -> Ref<'b, 'c> +fn assemble_tag<'de>(handle: &'de str, suffix: Slice<'de>, can_borrow: bool) -> Token<'de> { if can_borrow { - Token::Tag(cow!(handle), cow!(&buffer[suffix])).borrowed() + Token::Tag(cow!(handle), suffix) } else { - let start = scratch.len(); - scratch.extend_from_slice(handle.as_bytes()); - - let h = std::str::from_utf8(&scratch[start..]).unwrap(); - let t = std::str::from_utf8(&scratch[suffix]).unwrap(); - - Token::Tag(cow!(h), cow!(t)).copied() + Token::Tag(cow!(handle), suffix).into_owned() } }