Optimize escape_ascii
This commit is contained in:
parent
68e4d9654e
commit
6524acf04b
2 changed files with 109 additions and 25 deletions
|
@ -18,28 +18,12 @@ const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u
|
||||||
(output, 0..2)
|
(output, 0..2)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Escapes an ASCII character.
|
#[inline]
|
||||||
///
|
const fn hex_escape<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
|
||||||
/// Returns a buffer and the length of the escaped representation.
|
|
||||||
const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
|
|
||||||
const { assert!(N >= 4) };
|
const { assert!(N >= 4) };
|
||||||
|
|
||||||
match byte {
|
|
||||||
b'\t' => backslash(ascii::Char::SmallT),
|
|
||||||
b'\r' => backslash(ascii::Char::SmallR),
|
|
||||||
b'\n' => backslash(ascii::Char::SmallN),
|
|
||||||
b'\\' => backslash(ascii::Char::ReverseSolidus),
|
|
||||||
b'\'' => backslash(ascii::Char::Apostrophe),
|
|
||||||
b'\"' => backslash(ascii::Char::QuotationMark),
|
|
||||||
byte => {
|
|
||||||
let mut output = [ascii::Char::Null; N];
|
let mut output = [ascii::Char::Null; N];
|
||||||
|
|
||||||
if let Some(c) = byte.as_ascii()
|
|
||||||
&& !byte.is_ascii_control()
|
|
||||||
{
|
|
||||||
output[0] = c;
|
|
||||||
(output, 0..1)
|
|
||||||
} else {
|
|
||||||
let hi = HEX_DIGITS[(byte >> 4) as usize];
|
let hi = HEX_DIGITS[(byte >> 4) as usize];
|
||||||
let lo = HEX_DIGITS[(byte & 0xf) as usize];
|
let lo = HEX_DIGITS[(byte & 0xf) as usize];
|
||||||
|
|
||||||
|
@ -50,6 +34,90 @@ const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>)
|
||||||
|
|
||||||
(output, 0..4)
|
(output, 0..4)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
const fn verbatim<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) {
|
||||||
|
const { assert!(N >= 1) };
|
||||||
|
|
||||||
|
let mut output = [ascii::Char::Null; N];
|
||||||
|
|
||||||
|
output[0] = a;
|
||||||
|
|
||||||
|
(output, 0..1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Escapes an ASCII character.
|
||||||
|
///
|
||||||
|
/// Returns a buffer and the length of the escaped representation.
|
||||||
|
const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
|
||||||
|
const { assert!(N >= 4) };
|
||||||
|
|
||||||
|
#[cfg(feature = "optimize_for_size")]
|
||||||
|
{
|
||||||
|
match byte {
|
||||||
|
b'\t' => backslash(ascii::Char::SmallT),
|
||||||
|
b'\r' => backslash(ascii::Char::SmallR),
|
||||||
|
b'\n' => backslash(ascii::Char::SmallN),
|
||||||
|
b'\\' => backslash(ascii::Char::ReverseSolidus),
|
||||||
|
b'\'' => backslash(ascii::Char::Apostrophe),
|
||||||
|
b'"' => backslash(ascii::Char::QuotationMark),
|
||||||
|
0x00..=0x1F | 0x7F => hex_escape(byte),
|
||||||
|
_ => match ascii::Char::from_u8(byte) {
|
||||||
|
Some(a) => verbatim(a),
|
||||||
|
None => hex_escape(byte),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(feature = "optimize_for_size"))]
|
||||||
|
{
|
||||||
|
/// Lookup table helps us determine how to display character.
|
||||||
|
///
|
||||||
|
/// Since ASCII characters will always be 7 bits, we can exploit this to store the 8th bit to
|
||||||
|
/// indicate whether the result is escaped or unescaped.
|
||||||
|
///
|
||||||
|
/// We additionally use 0x80 (escaped NUL character) to indicate hex-escaped bytes, since
|
||||||
|
/// escaped NUL will not occur.
|
||||||
|
const LOOKUP: [u8; 256] = {
|
||||||
|
let mut arr = [0; 256];
|
||||||
|
let mut idx = 0;
|
||||||
|
while idx <= 255 {
|
||||||
|
arr[idx] = match idx as u8 {
|
||||||
|
// use 8th bit to indicate escaped
|
||||||
|
b'\t' => 0x80 | b't',
|
||||||
|
b'\r' => 0x80 | b'r',
|
||||||
|
b'\n' => 0x80 | b'n',
|
||||||
|
b'\\' => 0x80 | b'\\',
|
||||||
|
b'\'' => 0x80 | b'\'',
|
||||||
|
b'"' => 0x80 | b'"',
|
||||||
|
|
||||||
|
// use NUL to indicate hex-escaped
|
||||||
|
0x00..=0x1F | 0x7F..=0xFF => 0x80 | b'\0',
|
||||||
|
|
||||||
|
idx => idx,
|
||||||
|
};
|
||||||
|
idx += 1;
|
||||||
|
}
|
||||||
|
arr
|
||||||
|
};
|
||||||
|
|
||||||
|
let lookup = LOOKUP[byte as usize];
|
||||||
|
|
||||||
|
// 8th bit indicates escape
|
||||||
|
let lookup_escaped = lookup & 0x80 != 0;
|
||||||
|
|
||||||
|
// SAFETY: We explicitly mask out the eighth bit to get a 7-bit ASCII character.
|
||||||
|
let lookup_ascii = unsafe { ascii::Char::from_u8_unchecked(lookup & 0x7F) };
|
||||||
|
|
||||||
|
if lookup_escaped {
|
||||||
|
// NUL indicates hex-escaped
|
||||||
|
if matches!(lookup_ascii, ascii::Char::Null) {
|
||||||
|
hex_escape(byte)
|
||||||
|
} else {
|
||||||
|
backslash(lookup_ascii)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
verbatim(lookup_ascii)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -481,9 +481,25 @@ fn ascii_ctype_const() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_ascii_display() {
|
fn test_escape_ascii() {
|
||||||
assert_eq!(b"foo'bar".escape_ascii().to_string(), r#"foo\'bar"#);
|
let mut buf = [0u8; 0x1F + 7]; // 0..=0x1F plus two quotes, slash, \x7F, \x80, \xFF
|
||||||
assert_eq!(b"\0\xff".escape_ascii().to_string(), r#"\x00\xff"#);
|
for idx in 0..=0x1F {
|
||||||
|
buf[idx] = idx as u8;
|
||||||
|
}
|
||||||
|
buf[0x20] = b'\'';
|
||||||
|
buf[0x21] = b'"';
|
||||||
|
buf[0x22] = b'\\';
|
||||||
|
buf[0x23] = 0x7F;
|
||||||
|
buf[0x24] = 0x80;
|
||||||
|
buf[0x25] = 0xff;
|
||||||
|
assert_eq!(
|
||||||
|
buf.escape_ascii().to_string(),
|
||||||
|
r#"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\'\"\\\x7f\x80\xff"#
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_escape_ascii_iter() {
|
||||||
let mut it = b"\0fastpath\xffremainder\xff".escape_ascii();
|
let mut it = b"\0fastpath\xffremainder\xff".escape_ascii();
|
||||||
let _ = it.advance_by(4);
|
let _ = it.advance_by(4);
|
||||||
let _ = it.advance_back_by(4);
|
let _ = it.advance_back_by(4);
|
||||||
|
|
Loading…
Add table
Reference in a new issue