Feature/tag/urlencode #11
|
@ -70,7 +70,7 @@ macro_rules! check {
|
||||||
(@priv $buffer:expr, $offset:expr => $( $match:tt )|+) => {
|
(@priv $buffer:expr, $offset:expr => $( $match:tt )|+) => {
|
||||||
match $buffer.get($offset..) {
|
match $buffer.get($offset..) {
|
||||||
Some(buffer) => check!(@priv buffer => $( $match )|+),
|
Some(buffer) => check!(@priv buffer => $( $match )|+),
|
||||||
None => check!(@eofck $( $match )|+ )
|
None => check!(@eofck $( $match )|+ ),
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
(@priv $buffer:expr => $( $match:tt )|+) => {
|
(@priv $buffer:expr => $( $match:tt )|+) => {
|
||||||
|
@ -81,16 +81,16 @@ macro_rules! check {
|
||||||
};
|
};
|
||||||
(@priv $buffer:expr, $offset:expr => $( $match:tt )|+, else $error:expr) => {
|
(@priv $buffer:expr, $offset:expr => $( $match:tt )|+, else $error:expr) => {
|
||||||
match $buffer.get($offset..) {
|
match $buffer.get($offset..) {
|
||||||
Some(buffer) => check!(@priv buffer => $( $match )|+ else $error),
|
Some(buffer) => check!(@priv buffer => $( $match )|+, else $error),
|
||||||
None if check!(@eofck $( $match )|+ ) => Ok(())
|
None if check!(@eofck $( $match )|+ ) => Ok(()),
|
||||||
_ => Err($crate::scanner::error::ScanError::UnexpectedEOF)
|
_ => Err($crate::scanner::error::ScanError::UnexpectedEOF),
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
(@priv $buffer:expr => $( $match:tt )|+, else $error:expr) => {
|
(@priv $buffer:expr => $( $match:tt )|+, else $error:expr) => {
|
||||||
match $buffer {
|
match $buffer {
|
||||||
$( check!(@ptn $match) )|+ => Ok(()),
|
$( check!(@ptn $match) )|+ => Ok(()),
|
||||||
[] => Err($crate::scanner::error::ScanError::UnexpectedEOF),
|
[] => Err($crate::scanner::error::ScanError::UnexpectedEOF),
|
||||||
_ => Err($error)
|
_ => Err($error),
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -186,6 +186,29 @@ macro_rules! isBlankZ {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Checks if byte (@ .offset) in .buffer is hexadecimal
|
||||||
|
///
|
||||||
|
/// Modifiers:
|
||||||
|
/// ~ .buffer := .buffer.as_bytes()
|
||||||
|
///
|
||||||
|
/// Variants:
|
||||||
|
/// /1 .buffer := /2 .buffer, 0
|
||||||
|
/// /2 .buffer, .offset
|
||||||
|
/// /3 .buffer, else .error
|
||||||
|
/// := /4 .buffer, 0, else .error
|
||||||
|
/// /4 .buffer, .offset, else .error
|
||||||
|
macro_rules! isHex {
|
||||||
|
(~ $buffer:expr $(, $offset:expr )? $(, else $error:expr )? ) => {
|
||||||
|
isHex!($buffer.as_bytes() $(, $offset)? $(, else $error)? )
|
||||||
|
};
|
||||||
|
($buffer:expr $(, $offset:expr )? $(, else $error:expr)? ) => {
|
||||||
|
check!($buffer $(, $offset)? =>
|
||||||
|
[b'0'..=b'9', ..] | [b'A'..=b'F', ..] | [b'a'..=b'f', ..]
|
||||||
|
$(, else $error )?
|
||||||
|
)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests
|
mod tests
|
||||||
{
|
{
|
||||||
|
|
|
@ -9,8 +9,8 @@ use crate::scanner::error::{ScanError, ScanResult as Result};
|
||||||
/// 32 bit unicode points.
|
/// 32 bit unicode points.
|
||||||
///
|
///
|
||||||
/// It writes the unescaped character to .scratch, returning
|
/// It writes the unescaped character to .scratch, returning
|
||||||
/// the length of .buffer advanced, or an error if the
|
/// the length of .base advanced, or an error if the
|
||||||
/// escape sequence is invalid. It expects .buffer->0 is a
|
/// escape sequence is invalid. It expects .base->0 is a
|
||||||
/// backslash (\\), as this is the only valid start of an
|
/// backslash (\\), as this is the only valid start of an
|
||||||
/// escape sequence.
|
/// escape sequence.
|
||||||
///
|
///
|
||||||
|
@ -67,6 +67,67 @@ pub(super) fn flow_unescape(base: &str, scratch: &mut Vec<u8>) -> Result<usize>
|
||||||
Ok(base.len() - buffer.len())
|
Ok(base.len() - buffer.len())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Unescape a percent encoded UTF8 tag escape sequence as
|
||||||
|
/// defined in [Section 5.6][Link], writing the code point
|
||||||
|
/// to the scratch, returning the length of .base consumed.
|
||||||
|
///
|
||||||
|
/// [Link]: https://yaml.org/spec/1.2/spec.html#ns-uri-char
|
||||||
|
pub(super) fn tag_uri_unescape(base: &str, scratch: &mut Vec<u8>, _directive: bool)
|
||||||
|
-> Result<usize>
|
||||||
|
{
|
||||||
|
let mut buffer = base;
|
||||||
|
let mut codepoint_len: i8 = 0;
|
||||||
|
|
||||||
|
while {
|
||||||
|
if buffer.len() < 3
|
||||||
|
{
|
||||||
|
return Err(ScanError::UnexpectedEOF);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !(check!(~buffer => b'%') && isHex!(~buffer, 1) && isHex!(~buffer, 2))
|
||||||
|
{
|
||||||
|
return Err(ScanError::UnknownEscape);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Safety: we just checked that there are at least three
|
||||||
|
// bytes in the buffer
|
||||||
|
let octet: u8 = (as_hex(buffer.as_bytes()[1]) << 4) + as_hex(buffer.as_bytes()[2]);
|
||||||
|
|
||||||
|
match codepoint_len
|
||||||
|
{
|
||||||
|
// First time through, determine how many octets this codepoint has
|
||||||
|
0 =>
|
||||||
|
{
|
||||||
|
codepoint_len = match octet
|
||||||
|
{
|
||||||
|
o if (o & 0x80) == 0x00 => 1,
|
||||||
|
o if (o & 0xE0) == 0xC0 => 2,
|
||||||
|
o if (o & 0xF0) == 0xE0 => 3,
|
||||||
|
o if (o & 0xF8) == 0xF0 => 4,
|
||||||
|
_ => return Err(ScanError::UnknownEscape),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// Else ensure that the trailing octet is valid
|
||||||
|
_ =>
|
||||||
|
{
|
||||||
|
if (octet & 0xC0) != 0x80
|
||||||
|
{
|
||||||
|
return Err(ScanError::UnknownEscape);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
scratch.push(octet);
|
||||||
|
codepoint_len -= 1;
|
||||||
|
advance!(buffer, 3);
|
||||||
|
|
||||||
|
codepoint_len > 0
|
||||||
|
}
|
||||||
|
{}
|
||||||
|
|
||||||
|
Ok(base.len() - buffer.len())
|
||||||
|
}
|
||||||
|
|
||||||
/// Writes a UTF8 codepoint to the scratch space
|
/// Writes a UTF8 codepoint to the scratch space
|
||||||
fn write_unicode_point(base: &str, scratch: &mut Vec<u8>, codepoint_len: u8) -> Result<usize>
|
fn write_unicode_point(base: &str, scratch: &mut Vec<u8>, codepoint_len: u8) -> Result<usize>
|
||||||
{
|
{
|
||||||
|
@ -86,7 +147,7 @@ fn write_unicode_point(base: &str, scratch: &mut Vec<u8>, codepoint_len: u8) ->
|
||||||
None => return Err(ScanError::UnexpectedEOF),
|
None => return Err(ScanError::UnexpectedEOF),
|
||||||
Some(c) if !c.is_ascii_hexdigit() => return Err(ScanError::UnknownEscape),
|
Some(c) if !c.is_ascii_hexdigit() => return Err(ScanError::UnknownEscape),
|
||||||
|
|
||||||
Some(b) => value = (value << 4) + as_hex(*b),
|
Some(b) => value = (value << 4) + as_hex(*b) as u32,
|
||||||
}
|
}
|
||||||
advance!(buffer, 1, i);
|
advance!(buffer, 1, i);
|
||||||
}
|
}
|
||||||
|
@ -128,9 +189,9 @@ fn write_unicode_point(base: &str, scratch: &mut Vec<u8>, codepoint_len: u8) ->
|
||||||
*/
|
*/
|
||||||
#[allow(clippy::manual_range_contains)]
|
#[allow(clippy::manual_range_contains)]
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_hex(b: u8) -> u32
|
fn as_hex(b: u8) -> u8
|
||||||
{
|
{
|
||||||
let ret = if b >= b'A' && b <= b'F'
|
if b >= b'A' && b <= b'F'
|
||||||
{
|
{
|
||||||
b - b'A' + 10
|
b - b'A' + 10
|
||||||
}
|
}
|
||||||
|
@ -141,9 +202,7 @@ fn as_hex(b: u8) -> u32
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
b - b'0'
|
b - b'0'
|
||||||
};
|
}
|
||||||
|
|
||||||
ret as u32
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <Next Line> (U+0085)
|
/// <Next Line> (U+0085)
|
||||||
|
@ -158,7 +217,7 @@ const PS: [u8; 3] = [b'\xE2', b'\x80', b'\xA9'];
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests
|
mod tests
|
||||||
{
|
{
|
||||||
use anyhow::anyhow;
|
use anyhow::{anyhow, bail};
|
||||||
use pretty_assertions::assert_eq;
|
use pretty_assertions::assert_eq;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
@ -323,4 +382,97 @@ mod tests
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tag_uri_unescape_codepoint() -> TestResult
|
||||||
|
{
|
||||||
|
let data = &[
|
||||||
|
r#"%C2%85"#,
|
||||||
|
r#"%c5%b4"#,
|
||||||
|
r#"%E2%B1%bf"#,
|
||||||
|
r#"%E2%B8%BF"#,
|
||||||
|
r#"%f0%90%8f%95"#,
|
||||||
|
r#"%F0%90%AD%81"#,
|
||||||
|
];
|
||||||
|
let expected: &[&[u8]] = &[
|
||||||
|
&[0xC2, 0x85],
|
||||||
|
&[0xC5, 0xB4],
|
||||||
|
&[0xE2, 0xB1, 0xBF],
|
||||||
|
&[0xE2, 0xB8, 0xBF],
|
||||||
|
&[0xF0, 0x90, 0x8F, 0x95],
|
||||||
|
&[0xF0, 0x90, 0xAD, 0x81],
|
||||||
|
];
|
||||||
|
let scratch = &mut Vec::new();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
data.len(),
|
||||||
|
expected.len(),
|
||||||
|
"test data and expected data are not the same length"
|
||||||
|
);
|
||||||
|
|
||||||
|
for (i, (&t, &e)) in data.into_iter().zip(expected).enumerate()
|
||||||
|
{
|
||||||
|
scratch.clear();
|
||||||
|
|
||||||
|
let consumed = tag_uri_unescape(t, scratch, true)
|
||||||
|
.map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
&*scratch, e,
|
||||||
|
"on iteration {}, expected byte sequence {:?}, got {:?}",
|
||||||
|
i, e, &*scratch
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
consumed,
|
||||||
|
t.len(),
|
||||||
|
"on iteration {}, expected to consume {}, got {}",
|
||||||
|
i,
|
||||||
|
t.len(),
|
||||||
|
consumed
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tag_uri_unescape_eof() -> TestResult
|
||||||
|
{
|
||||||
|
let data = r#"%C2%8"#;
|
||||||
|
let scratch = &mut Vec::new();
|
||||||
|
let expected = ScanError::UnexpectedEOF;
|
||||||
|
|
||||||
|
match tag_uri_unescape(data, scratch, true)
|
||||||
|
{
|
||||||
|
Err(e) if e == expected => Ok(()),
|
||||||
|
|
||||||
|
Err(e) => bail!("expected error: {}, got different error: {}", expected, e),
|
||||||
|
Ok(amt) => bail!(
|
||||||
|
"expected error: {}, got unexpected value: {}",
|
||||||
|
expected,
|
||||||
|
amt
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tag_uri_unescape_invalid() -> TestResult
|
||||||
|
{
|
||||||
|
let data = r#"\xC285"#;
|
||||||
|
let scratch = &mut Vec::new();
|
||||||
|
let expected = ScanError::UnknownEscape;
|
||||||
|
|
||||||
|
match tag_uri_unescape(data, scratch, true)
|
||||||
|
{
|
||||||
|
Err(e) if e == expected => Ok(()),
|
||||||
|
|
||||||
|
Err(e) => bail!("expected error: {}, got different error: {}", expected, e),
|
||||||
|
Ok(amt) => bail!(
|
||||||
|
"expected error: {}, got unexpected value: {}",
|
||||||
|
expected,
|
||||||
|
amt
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue