diff --git a/actix-router/Cargo.toml b/actix-router/Cargo.toml index 6fcef125d..8ce4bdb43 100644 --- a/actix-router/Cargo.toml +++ b/actix-router/Cargo.toml @@ -32,6 +32,7 @@ criterion = { version = "0.3", features = ["html_reports"] } firestorm = { version = "0.5", features = ["enable_system_time"] } http = "0.2.5" serde = { version = "1", features = ["derive"] } +percent-encoding = "2.1" [[bench]] name = "router" diff --git a/actix-router/benches/router.rs b/actix-router/benches/router.rs index 6f6b67b48..e0f49378e 100644 --- a/actix-router/benches/router.rs +++ b/actix-router/benches/router.rs @@ -2,6 +2,8 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use std::borrow::Cow; + macro_rules! register { (colon) => {{ register!(finish => ":p1", ":p2", ":p3", ":p4") @@ -162,6 +164,49 @@ fn call() -> impl Iterator { IntoIterator::into_iter(arr) } +fn compare_quoters(c: &mut Criterion) { + let mut group = c.benchmark_group("Compare Quoters"); + + let quoter = actix_router::Quoter::new(b"", b""); + let path = (0..=127).map(|c| format!("%{:02X}", c)).collect::(); + + group.bench_function("quoter_x", |b| { + b.iter(|| { + for route in call() { + black_box(quoter.requote(route.as_bytes())); + } + }); + }); + + group.bench_function("percent_encode", |b| { + b.iter(|| { + for route in call() { + let decode = percent_encoding::percent_decode(route.as_bytes()); + black_box(Into::>::into(decode)); + } + }); + }); + + group.bench_function("quoter_x_stat", |b| { + b.iter(|| { + for _ in 0..10 { + black_box(quoter.requote(path.as_bytes())); + } + }); + }); + + group.bench_function("percent_encode_stat", |b| { + b.iter(|| { + for _ in 0..10 { + let decode = percent_encoding::percent_decode(path.as_bytes()); + black_box(Into::>::into(decode)); + } + }); + }); + + group.finish(); +} + fn compare_routers(c: &mut Criterion) { let mut group = c.benchmark_group("Compare Routers"); @@ -191,5 +236,5 @@ fn compare_routers(c: &mut Criterion) { group.finish(); } -criterion_group!(benches, compare_routers); +criterion_group!(benches, compare_quoters); criterion_main!(benches); diff --git a/actix-router/src/quoter.rs b/actix-router/src/quoter.rs index 5058cf652..bdfb0f993 100644 --- a/actix-router/src/quoter.rs +++ b/actix-router/src/quoter.rs @@ -1,132 +1,87 @@ #[allow(dead_code)] -const GEN_DELIMS: &[u8] = b":/?#[]@"; - -#[allow(dead_code)] -const SUB_DELIMS_WITHOUT_QS: &[u8] = b"!$'()*,"; - -#[allow(dead_code)] -const SUB_DELIMS: &[u8] = b"!$'()*,+?=;"; - -#[allow(dead_code)] -const RESERVED: &[u8] = b":/?#[]@!$'()*,+?=;"; - -#[allow(dead_code)] -const UNRESERVED: &[u8] = b"abcdefghijklmnopqrstuvwxyz +mod charsets { + pub const GEN_DELIMS: &[u8] = b":/?#[]@"; + pub const SUB_DELIMS_WITHOUT_QS: &[u8] = b"!$'()*,"; + pub const SUB_DELIMS: &[u8] = b"!$'()*,+?=;"; + pub const RESERVED: &[u8] = b":/?#[]@!$'()*,+?=;"; + pub const UNRESERVED: &[u8] = b"abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 -._~"; - -const ALLOWED: &[u8] = b"abcdefghijklmnopqrstuvwxyz + pub const ALLOWED: &[u8] = b"abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 -._~ !$'()*,"; + pub const QS: &[u8] = b"+&=;b"; +} -const QS: &[u8] = b"+&=;b"; - -/// A quoter +/// Partial Percent-decoding. pub struct Quoter { - /// Simple bit-map of safe values in the 0-127 ASCII range. - safe_table: [u8; 16], - /// Simple bit-map of protected values in the 0-127 ASCII range. protected_table: [u8; 16], } impl Quoter { - pub fn new(safe: &[u8], protected: &[u8]) -> Quoter { - let mut quoter = Quoter { - safe_table: [0; 16], - protected_table: [0; 16], - }; - - // prepare safe table - for ch in 0..128 { - if ALLOWED.contains(&ch) { - set_bit(&mut quoter.safe_table, ch); - } - - if QS.contains(&ch) { - set_bit(&mut quoter.safe_table, ch); - } - } - - for &ch in safe { - set_bit(&mut quoter.safe_table, ch) - } + /// Constructs a new `Quoter` instance given a set of protected ASCII bytes. + /// + /// The first argument is ignored and is kept for backward compatibility only. + /// + /// # Panic + /// Panics if any of the `protected` bytes is not in the 0-127 ASCII range. + pub fn new(_: &[u8], protected: &[u8]) -> Quoter { + let mut protected_table = [0; 16]; // prepare protected table for &ch in protected { - set_bit(&mut quoter.safe_table, ch); - set_bit(&mut quoter.protected_table, ch); + set_bit(&mut protected_table, ch); } - quoter + Quoter { protected_table } } - /// Decodes safe percent-encoded sequences from `val`. - /// - /// Returns `None` when no modification to the original byte string was required. - /// - /// Non-ASCII bytes are accepted as valid input. - /// - /// Behavior for invalid/incomplete percent-encoding sequences is unspecified and may include - /// removing the invalid sequence from the output or passing it as-is. - pub fn requote(&self, val: &[u8]) -> Option> { - let mut has_pct = 0; - let mut pct = [b'%', 0, 0]; - let mut idx = 0; - let mut cloned: Option> = None; - - let len = val.len(); - - while idx < len { - let ch = val[idx]; - - if has_pct != 0 { - pct[has_pct] = val[idx]; - has_pct += 1; - - if has_pct == 3 { - has_pct = 0; - let buf = cloned.as_mut().unwrap(); - - if let Some(ch) = hex_pair_to_char(pct[1], pct[2]) { - if ch < 128 { - if bit_at(&self.protected_table, ch) { - buf.extend_from_slice(&pct); - idx += 1; - continue; - } - - if bit_at(&self.safe_table, ch) { - buf.push(ch); - idx += 1; - continue; - } - } - - buf.push(ch); - } else { - buf.extend_from_slice(&pct[..]); - } + /// Decodes the next escape sequence, if any, and advances `val`. + #[inline(always)] + fn decode_next<'a>(&self, val: &mut &'a [u8]) -> Option<(&'a [u8], u8)> { + for i in 0..val.len() { + if let (prev, [b'%', p1, p2, rem @ ..]) = val.split_at(i) { + if let Some(ch) = hex_pair_to_char(*p1, *p2) + // ingore protected ascii bytes + .filter(|&ch| !(ch < 128 && bit_at(&self.protected_table, ch))) + { + *val = rem; + return Some((prev, ch)); } - } else if ch == b'%' { - has_pct = 1; - - if cloned.is_none() { - let mut c = Vec::with_capacity(len); - c.extend_from_slice(&val[..idx]); - cloned = Some(c); - } - } else if let Some(ref mut cloned) = cloned { - cloned.push(ch) } - - idx += 1; } - cloned + None + } + + /// Partially percent-decodes the given bytes. + /// Escape sequences of the protected set are *not* decoded. + /// + /// Returns `None` when no modification to the original bytes was required. + /// + /// Invalid/incomplete percent-encoding sequences are passed unmodified. + pub fn requote(&self, val: &[u8]) -> Option> { + let mut remaining = val; + + let (prev, ch) = self.decode_next(&mut remaining)?; + let mut buf = Vec::::with_capacity(val.len()); + buf.extend_from_slice(prev); + buf.push(ch); + + while let Some((prev, ch)) = self.decode_next(&mut remaining) { + // this ugly conditional achieves +50% perf in cases where this is a tight loop. + if prev.len() != 0 { + buf.extend_from_slice(prev); + } + buf.push(ch); + } + + buf.extend_from_slice(remaining); + Some(buf) } pub(crate) fn requote_str_lossy(&self, val: &str) -> Option { @@ -135,24 +90,6 @@ impl Quoter { } } -/// Converts an ASCII character in the hex-encoded set (`0-9`, `A-F`, `a-f`) to its integer -/// representation from `0x0`–`0xF`. -/// -/// - `0x30 ('0') => 0x0` -/// - `0x39 ('9') => 0x9` -/// - `0x41 ('a') => 0xA` -/// - `0x61 ('A') => 0xA` -/// - `0x46 ('f') => 0xF` -/// - `0x66 ('F') => 0xF` -fn from_ascii_hex(v: u8) -> Option { - match v { - b'0'..=b'9' => Some(v - 0x30), // ord('0') == 0x30 - b'A'..=b'F' => Some(v - 0x41 + 10), // ord('A') == 0x41 - b'a'..=b'f' => Some(v - 0x61 + 10), // ord('a') == 0x61 - _ => None, - } -} - /// Decode a ASCII hex-encoded pair to an integer. /// /// Returns `None` if either portion of the decoded pair does not evaluate to a valid hex value. @@ -160,11 +97,13 @@ fn from_ascii_hex(v: u8) -> Option { /// - `0x33 ('3'), 0x30 ('0') => 0x30 ('0')` /// - `0x34 ('4'), 0x31 ('1') => 0x41 ('A')` /// - `0x36 ('6'), 0x31 ('1') => 0x61 ('a')` +#[inline(always)] fn hex_pair_to_char(d1: u8, d2: u8) -> Option { - let (d_high, d_low) = (from_ascii_hex(d1)?, from_ascii_hex(d2)?); + let d_high = char::from(d1).to_digit(16)?; + let d_low = char::from(d2).to_digit(16)?; // left shift high nibble by 4 bits - Some(d_high << 4 | d_low) + Some((d_high as u8) << 4 | (d_low as u8)) } /// Sets bit in given bit-map to 1=true. @@ -179,7 +118,7 @@ fn set_bit(array: &mut [u8], ch: u8) { /// /// # Panics /// Panics if `ch` index is out of bounds. -fn bit_at(array: &[u8], ch: u8) -> bool { +fn bit_at(array: &[u8; 16], ch: u8) -> bool { array[(ch >> 3) as usize] & (0b1 << (ch & 0b111)) != 0 } @@ -187,37 +126,16 @@ fn bit_at(array: &[u8], ch: u8) -> bool { mod tests { use super::*; - #[test] - fn hex_encoding() { - let hex = b"0123456789abcdefABCDEF"; - - for i in 0..256 { - let c = i as u8; - if hex.contains(&c) { - assert!(from_ascii_hex(c).is_some()) - } else { - assert!(from_ascii_hex(c).is_none()) - } - } - - let expected = [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 10, 11, 12, 13, 14, 15, - ]; - for i in 0..hex.len() { - assert_eq!(from_ascii_hex(hex[i]).unwrap(), expected[i]); - } - } - #[test] fn custom_quoter() { let q = Quoter::new(b"", b"+"); assert_eq!(q.requote(b"/a%25c").unwrap(), b"/a%c"); - assert_eq!(q.requote(b"/a%2Bc").unwrap(), b"/a%2Bc"); + assert_eq!(q.requote(b"/a%2Bc"), None); let q = Quoter::new(b"%+", b"/"); assert_eq!(q.requote(b"/a%25b%2Bc").unwrap(), b"/a%b+c"); - assert_eq!(q.requote(b"/a%2fb").unwrap(), b"/a%2fb"); - assert_eq!(q.requote(b"/a%2Fb").unwrap(), b"/a%2Fb"); + assert_eq!(q.requote(b"/a%2fb"), None); + assert_eq!(q.requote(b"/a%2Fb"), None); assert_eq!(q.requote(b"/a%0Ab").unwrap(), b"/a\nb"); assert_eq!(q.requote(b"/a%FE\xffb").unwrap(), b"/a\xfe\xffb"); assert_eq!(q.requote(b"/a\xfe\xffb"), None); @@ -233,7 +151,8 @@ mod tests { #[test] fn invalid_sequences() { let q = Quoter::new(b"%+", b"/"); - assert_eq!(q.requote(b"/a%2x%2X%%").unwrap(), b"/a%2x%2X"); + assert_eq!(q.requote(b"/a%2x%2X%%"), None); + assert_eq!(q.requote(b"/a%20%2X%%").unwrap(), b"/a %2X%%"); } #[test] @@ -241,16 +160,4 @@ mod tests { let q = Quoter::new(b"", b""); assert_eq!(q.requote(b"/abc/../efg"), None); } - - #[test] - fn pointless_safe_table() { - for safe_ch in 0..=127 { - let q = Quoter::new(&[safe_ch], b""); - - for i in 0..=255 { - let test = format!("/%{:02X}/", i); - assert_eq!(q.requote(test.as_bytes()).unwrap(), &[b'/', i, b'/']); - } - } - } }