mirror of https://github.com/fafhrd91/actix-web
rework Quoter
This commit is contained in:
parent
b15a7d3c0d
commit
8d7afb8cfe
|
@ -32,6 +32,7 @@ criterion = { version = "0.3", features = ["html_reports"] }
|
||||||
firestorm = { version = "0.5", features = ["enable_system_time"] }
|
firestorm = { version = "0.5", features = ["enable_system_time"] }
|
||||||
http = "0.2.5"
|
http = "0.2.5"
|
||||||
serde = { version = "1", features = ["derive"] }
|
serde = { version = "1", features = ["derive"] }
|
||||||
|
percent-encoding = "2.1"
|
||||||
|
|
||||||
[[bench]]
|
[[bench]]
|
||||||
name = "router"
|
name = "router"
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
|
|
||||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
macro_rules! register {
|
macro_rules! register {
|
||||||
(colon) => {{
|
(colon) => {{
|
||||||
register!(finish => ":p1", ":p2", ":p3", ":p4")
|
register!(finish => ":p1", ":p2", ":p3", ":p4")
|
||||||
|
@ -162,6 +164,49 @@ fn call() -> impl Iterator<Item = &'static str> {
|
||||||
IntoIterator::into_iter(arr)
|
IntoIterator::into_iter(arr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn compare_quoters(c: &mut Criterion) {
|
||||||
|
let mut group = c.benchmark_group("Compare Quoters");
|
||||||
|
|
||||||
|
let quoter = actix_router::Quoter::new(b"", b"");
|
||||||
|
let path = (0..=127).map(|c| format!("%{:02X}", c)).collect::<String>();
|
||||||
|
|
||||||
|
group.bench_function("quoter_x", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
for route in call() {
|
||||||
|
black_box(quoter.requote(route.as_bytes()));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
group.bench_function("percent_encode", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
for route in call() {
|
||||||
|
let decode = percent_encoding::percent_decode(route.as_bytes());
|
||||||
|
black_box(Into::<Cow<'_, [u8]>>::into(decode));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
group.bench_function("quoter_x_stat", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
for _ in 0..10 {
|
||||||
|
black_box(quoter.requote(path.as_bytes()));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
group.bench_function("percent_encode_stat", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
for _ in 0..10 {
|
||||||
|
let decode = percent_encoding::percent_decode(path.as_bytes());
|
||||||
|
black_box(Into::<Cow<'_, [u8]>>::into(decode));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
group.finish();
|
||||||
|
}
|
||||||
|
|
||||||
fn compare_routers(c: &mut Criterion) {
|
fn compare_routers(c: &mut Criterion) {
|
||||||
let mut group = c.benchmark_group("Compare Routers");
|
let mut group = c.benchmark_group("Compare Routers");
|
||||||
|
|
||||||
|
@ -191,5 +236,5 @@ fn compare_routers(c: &mut Criterion) {
|
||||||
group.finish();
|
group.finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
criterion_group!(benches, compare_routers);
|
criterion_group!(benches, compare_quoters);
|
||||||
criterion_main!(benches);
|
criterion_main!(benches);
|
||||||
|
|
|
@ -1,132 +1,87 @@
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
const GEN_DELIMS: &[u8] = b":/?#[]@";
|
mod charsets {
|
||||||
|
pub const GEN_DELIMS: &[u8] = b":/?#[]@";
|
||||||
#[allow(dead_code)]
|
pub const SUB_DELIMS_WITHOUT_QS: &[u8] = b"!$'()*,";
|
||||||
const SUB_DELIMS_WITHOUT_QS: &[u8] = b"!$'()*,";
|
pub const SUB_DELIMS: &[u8] = b"!$'()*,+?=;";
|
||||||
|
pub const RESERVED: &[u8] = b":/?#[]@!$'()*,+?=;";
|
||||||
#[allow(dead_code)]
|
pub const UNRESERVED: &[u8] = b"abcdefghijklmnopqrstuvwxyz
|
||||||
const SUB_DELIMS: &[u8] = b"!$'()*,+?=;";
|
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
const RESERVED: &[u8] = b":/?#[]@!$'()*,+?=;";
|
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
const UNRESERVED: &[u8] = b"abcdefghijklmnopqrstuvwxyz
|
|
||||||
ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||||
1234567890
|
1234567890
|
||||||
-._~";
|
-._~";
|
||||||
|
pub const ALLOWED: &[u8] = b"abcdefghijklmnopqrstuvwxyz
|
||||||
const ALLOWED: &[u8] = b"abcdefghijklmnopqrstuvwxyz
|
|
||||||
ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||||
1234567890
|
1234567890
|
||||||
-._~
|
-._~
|
||||||
!$'()*,";
|
!$'()*,";
|
||||||
|
pub const QS: &[u8] = b"+&=;b";
|
||||||
|
}
|
||||||
|
|
||||||
const QS: &[u8] = b"+&=;b";
|
/// Partial Percent-decoding.
|
||||||
|
|
||||||
/// A quoter
|
|
||||||
pub struct Quoter {
|
pub struct Quoter {
|
||||||
/// Simple bit-map of safe values in the 0-127 ASCII range.
|
|
||||||
safe_table: [u8; 16],
|
|
||||||
|
|
||||||
/// Simple bit-map of protected values in the 0-127 ASCII range.
|
/// Simple bit-map of protected values in the 0-127 ASCII range.
|
||||||
protected_table: [u8; 16],
|
protected_table: [u8; 16],
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Quoter {
|
impl Quoter {
|
||||||
pub fn new(safe: &[u8], protected: &[u8]) -> Quoter {
|
/// Constructs a new `Quoter` instance given a set of protected ASCII bytes.
|
||||||
let mut quoter = Quoter {
|
///
|
||||||
safe_table: [0; 16],
|
/// The first argument is ignored and is kept for backward compatibility only.
|
||||||
protected_table: [0; 16],
|
///
|
||||||
};
|
/// # Panic
|
||||||
|
/// Panics if any of the `protected` bytes is not in the 0-127 ASCII range.
|
||||||
// prepare safe table
|
pub fn new(_: &[u8], protected: &[u8]) -> Quoter {
|
||||||
for ch in 0..128 {
|
let mut protected_table = [0; 16];
|
||||||
if ALLOWED.contains(&ch) {
|
|
||||||
set_bit(&mut quoter.safe_table, ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
if QS.contains(&ch) {
|
|
||||||
set_bit(&mut quoter.safe_table, ch);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for &ch in safe {
|
|
||||||
set_bit(&mut quoter.safe_table, ch)
|
|
||||||
}
|
|
||||||
|
|
||||||
// prepare protected table
|
// prepare protected table
|
||||||
for &ch in protected {
|
for &ch in protected {
|
||||||
set_bit(&mut quoter.safe_table, ch);
|
set_bit(&mut protected_table, ch);
|
||||||
set_bit(&mut quoter.protected_table, ch);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
quoter
|
Quoter { protected_table }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Decodes safe percent-encoded sequences from `val`.
|
/// Decodes the next escape sequence, if any, and advances `val`.
|
||||||
|
#[inline(always)]
|
||||||
|
fn decode_next<'a>(&self, val: &mut &'a [u8]) -> Option<(&'a [u8], u8)> {
|
||||||
|
for i in 0..val.len() {
|
||||||
|
if let (prev, [b'%', p1, p2, rem @ ..]) = val.split_at(i) {
|
||||||
|
if let Some(ch) = hex_pair_to_char(*p1, *p2)
|
||||||
|
// ingore protected ascii bytes
|
||||||
|
.filter(|&ch| !(ch < 128 && bit_at(&self.protected_table, ch)))
|
||||||
|
{
|
||||||
|
*val = rem;
|
||||||
|
return Some((prev, ch));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Partially percent-decodes the given bytes.
|
||||||
|
/// Escape sequences of the protected set are *not* decoded.
|
||||||
///
|
///
|
||||||
/// Returns `None` when no modification to the original byte string was required.
|
/// Returns `None` when no modification to the original bytes was required.
|
||||||
///
|
///
|
||||||
/// Non-ASCII bytes are accepted as valid input.
|
/// Invalid/incomplete percent-encoding sequences are passed unmodified.
|
||||||
///
|
|
||||||
/// Behavior for invalid/incomplete percent-encoding sequences is unspecified and may include
|
|
||||||
/// removing the invalid sequence from the output or passing it as-is.
|
|
||||||
pub fn requote(&self, val: &[u8]) -> Option<Vec<u8>> {
|
pub fn requote(&self, val: &[u8]) -> Option<Vec<u8>> {
|
||||||
let mut has_pct = 0;
|
let mut remaining = val;
|
||||||
let mut pct = [b'%', 0, 0];
|
|
||||||
let mut idx = 0;
|
|
||||||
let mut cloned: Option<Vec<u8>> = None;
|
|
||||||
|
|
||||||
let len = val.len();
|
let (prev, ch) = self.decode_next(&mut remaining)?;
|
||||||
|
let mut buf = Vec::<u8>::with_capacity(val.len());
|
||||||
while idx < len {
|
buf.extend_from_slice(prev);
|
||||||
let ch = val[idx];
|
|
||||||
|
|
||||||
if has_pct != 0 {
|
|
||||||
pct[has_pct] = val[idx];
|
|
||||||
has_pct += 1;
|
|
||||||
|
|
||||||
if has_pct == 3 {
|
|
||||||
has_pct = 0;
|
|
||||||
let buf = cloned.as_mut().unwrap();
|
|
||||||
|
|
||||||
if let Some(ch) = hex_pair_to_char(pct[1], pct[2]) {
|
|
||||||
if ch < 128 {
|
|
||||||
if bit_at(&self.protected_table, ch) {
|
|
||||||
buf.extend_from_slice(&pct);
|
|
||||||
idx += 1;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if bit_at(&self.safe_table, ch) {
|
|
||||||
buf.push(ch);
|
buf.push(ch);
|
||||||
idx += 1;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
while let Some((prev, ch)) = self.decode_next(&mut remaining) {
|
||||||
|
// this ugly conditional achieves +50% perf in cases where this is a tight loop.
|
||||||
|
if prev.len() != 0 {
|
||||||
|
buf.extend_from_slice(prev);
|
||||||
|
}
|
||||||
buf.push(ch);
|
buf.push(ch);
|
||||||
} else {
|
|
||||||
buf.extend_from_slice(&pct[..]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if ch == b'%' {
|
|
||||||
has_pct = 1;
|
|
||||||
|
|
||||||
if cloned.is_none() {
|
|
||||||
let mut c = Vec::with_capacity(len);
|
|
||||||
c.extend_from_slice(&val[..idx]);
|
|
||||||
cloned = Some(c);
|
|
||||||
}
|
|
||||||
} else if let Some(ref mut cloned) = cloned {
|
|
||||||
cloned.push(ch)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
idx += 1;
|
buf.extend_from_slice(remaining);
|
||||||
}
|
Some(buf)
|
||||||
|
|
||||||
cloned
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn requote_str_lossy(&self, val: &str) -> Option<String> {
|
pub(crate) fn requote_str_lossy(&self, val: &str) -> Option<String> {
|
||||||
|
@ -135,24 +90,6 @@ impl Quoter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts an ASCII character in the hex-encoded set (`0-9`, `A-F`, `a-f`) to its integer
|
|
||||||
/// representation from `0x0`–`0xF`.
|
|
||||||
///
|
|
||||||
/// - `0x30 ('0') => 0x0`
|
|
||||||
/// - `0x39 ('9') => 0x9`
|
|
||||||
/// - `0x41 ('a') => 0xA`
|
|
||||||
/// - `0x61 ('A') => 0xA`
|
|
||||||
/// - `0x46 ('f') => 0xF`
|
|
||||||
/// - `0x66 ('F') => 0xF`
|
|
||||||
fn from_ascii_hex(v: u8) -> Option<u8> {
|
|
||||||
match v {
|
|
||||||
b'0'..=b'9' => Some(v - 0x30), // ord('0') == 0x30
|
|
||||||
b'A'..=b'F' => Some(v - 0x41 + 10), // ord('A') == 0x41
|
|
||||||
b'a'..=b'f' => Some(v - 0x61 + 10), // ord('a') == 0x61
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Decode a ASCII hex-encoded pair to an integer.
|
/// Decode a ASCII hex-encoded pair to an integer.
|
||||||
///
|
///
|
||||||
/// Returns `None` if either portion of the decoded pair does not evaluate to a valid hex value.
|
/// Returns `None` if either portion of the decoded pair does not evaluate to a valid hex value.
|
||||||
|
@ -160,11 +97,13 @@ fn from_ascii_hex(v: u8) -> Option<u8> {
|
||||||
/// - `0x33 ('3'), 0x30 ('0') => 0x30 ('0')`
|
/// - `0x33 ('3'), 0x30 ('0') => 0x30 ('0')`
|
||||||
/// - `0x34 ('4'), 0x31 ('1') => 0x41 ('A')`
|
/// - `0x34 ('4'), 0x31 ('1') => 0x41 ('A')`
|
||||||
/// - `0x36 ('6'), 0x31 ('1') => 0x61 ('a')`
|
/// - `0x36 ('6'), 0x31 ('1') => 0x61 ('a')`
|
||||||
|
#[inline(always)]
|
||||||
fn hex_pair_to_char(d1: u8, d2: u8) -> Option<u8> {
|
fn hex_pair_to_char(d1: u8, d2: u8) -> Option<u8> {
|
||||||
let (d_high, d_low) = (from_ascii_hex(d1)?, from_ascii_hex(d2)?);
|
let d_high = char::from(d1).to_digit(16)?;
|
||||||
|
let d_low = char::from(d2).to_digit(16)?;
|
||||||
|
|
||||||
// left shift high nibble by 4 bits
|
// left shift high nibble by 4 bits
|
||||||
Some(d_high << 4 | d_low)
|
Some((d_high as u8) << 4 | (d_low as u8))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sets bit in given bit-map to 1=true.
|
/// Sets bit in given bit-map to 1=true.
|
||||||
|
@ -179,7 +118,7 @@ fn set_bit(array: &mut [u8], ch: u8) {
|
||||||
///
|
///
|
||||||
/// # Panics
|
/// # Panics
|
||||||
/// Panics if `ch` index is out of bounds.
|
/// Panics if `ch` index is out of bounds.
|
||||||
fn bit_at(array: &[u8], ch: u8) -> bool {
|
fn bit_at(array: &[u8; 16], ch: u8) -> bool {
|
||||||
array[(ch >> 3) as usize] & (0b1 << (ch & 0b111)) != 0
|
array[(ch >> 3) as usize] & (0b1 << (ch & 0b111)) != 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -187,37 +126,16 @@ fn bit_at(array: &[u8], ch: u8) -> bool {
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn hex_encoding() {
|
|
||||||
let hex = b"0123456789abcdefABCDEF";
|
|
||||||
|
|
||||||
for i in 0..256 {
|
|
||||||
let c = i as u8;
|
|
||||||
if hex.contains(&c) {
|
|
||||||
assert!(from_ascii_hex(c).is_some())
|
|
||||||
} else {
|
|
||||||
assert!(from_ascii_hex(c).is_none())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let expected = [
|
|
||||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 10, 11, 12, 13, 14, 15,
|
|
||||||
];
|
|
||||||
for i in 0..hex.len() {
|
|
||||||
assert_eq!(from_ascii_hex(hex[i]).unwrap(), expected[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn custom_quoter() {
|
fn custom_quoter() {
|
||||||
let q = Quoter::new(b"", b"+");
|
let q = Quoter::new(b"", b"+");
|
||||||
assert_eq!(q.requote(b"/a%25c").unwrap(), b"/a%c");
|
assert_eq!(q.requote(b"/a%25c").unwrap(), b"/a%c");
|
||||||
assert_eq!(q.requote(b"/a%2Bc").unwrap(), b"/a%2Bc");
|
assert_eq!(q.requote(b"/a%2Bc"), None);
|
||||||
|
|
||||||
let q = Quoter::new(b"%+", b"/");
|
let q = Quoter::new(b"%+", b"/");
|
||||||
assert_eq!(q.requote(b"/a%25b%2Bc").unwrap(), b"/a%b+c");
|
assert_eq!(q.requote(b"/a%25b%2Bc").unwrap(), b"/a%b+c");
|
||||||
assert_eq!(q.requote(b"/a%2fb").unwrap(), b"/a%2fb");
|
assert_eq!(q.requote(b"/a%2fb"), None);
|
||||||
assert_eq!(q.requote(b"/a%2Fb").unwrap(), b"/a%2Fb");
|
assert_eq!(q.requote(b"/a%2Fb"), None);
|
||||||
assert_eq!(q.requote(b"/a%0Ab").unwrap(), b"/a\nb");
|
assert_eq!(q.requote(b"/a%0Ab").unwrap(), b"/a\nb");
|
||||||
assert_eq!(q.requote(b"/a%FE\xffb").unwrap(), b"/a\xfe\xffb");
|
assert_eq!(q.requote(b"/a%FE\xffb").unwrap(), b"/a\xfe\xffb");
|
||||||
assert_eq!(q.requote(b"/a\xfe\xffb"), None);
|
assert_eq!(q.requote(b"/a\xfe\xffb"), None);
|
||||||
|
@ -233,7 +151,8 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn invalid_sequences() {
|
fn invalid_sequences() {
|
||||||
let q = Quoter::new(b"%+", b"/");
|
let q = Quoter::new(b"%+", b"/");
|
||||||
assert_eq!(q.requote(b"/a%2x%2X%%").unwrap(), b"/a%2x%2X");
|
assert_eq!(q.requote(b"/a%2x%2X%%"), None);
|
||||||
|
assert_eq!(q.requote(b"/a%20%2X%%").unwrap(), b"/a %2X%%");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -241,16 +160,4 @@ mod tests {
|
||||||
let q = Quoter::new(b"", b"");
|
let q = Quoter::new(b"", b"");
|
||||||
assert_eq!(q.requote(b"/abc/../efg"), None);
|
assert_eq!(q.requote(b"/abc/../efg"), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn pointless_safe_table() {
|
|
||||||
for safe_ch in 0..=127 {
|
|
||||||
let q = Quoter::new(&[safe_ch], b"");
|
|
||||||
|
|
||||||
for i in 0..=255 {
|
|
||||||
let test = format!("/%{:02X}/", i);
|
|
||||||
assert_eq!(q.requote(test.as_bytes()).unwrap(), &[b'/', i, b'/']);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue