diff --git a/actix-router/Cargo.toml b/actix-router/Cargo.toml index bd0da7589..79218f364 100644 --- a/actix-router/Cargo.toml +++ b/actix-router/Cargo.toml @@ -22,6 +22,7 @@ unicode = ["dep:regex"] [dependencies] bytestring = ">=0.1.5, <2" +cfg-if = "1" http = { version = "0.2.7", optional = true } regex = { version = "1.5", optional = true } regex-lite = "0.1" @@ -37,6 +38,7 @@ percent-encoding = "2.1" [[bench]] name = "router" harness = false +required-features = ["unicode"] [[bench]] name = "quoter" diff --git a/actix-router/src/lib.rs b/actix-router/src/lib.rs index f10093436..c4d0d2c87 100644 --- a/actix-router/src/lib.rs +++ b/actix-router/src/lib.rs @@ -10,6 +10,7 @@ mod de; mod path; mod pattern; mod quoter; +mod regex_set; mod resource; mod resource_path; mod router; diff --git a/actix-router/src/regex_set.rs b/actix-router/src/regex_set.rs new file mode 100644 index 000000000..fe93c1d20 --- /dev/null +++ b/actix-router/src/regex_set.rs @@ -0,0 +1,64 @@ +//! Abstraction over `regex` and `regex-lite` depending on whether we have `unicode` crate feature +//! enabled. + +use cfg_if::cfg_if; +#[cfg(feature = "unicode")] +pub(crate) use regex::{escape, Regex}; +#[cfg(not(feature = "unicode"))] +pub(crate) use regex_lite::{escape, Regex}; + +#[cfg(feature = "unicode")] +#[derive(Debug, Clone)] +pub(crate) struct RegexSet(regex::RegexSet); + +#[cfg(not(feature = "unicode"))] +#[derive(Debug, Clone)] +pub(crate) struct RegexSet(Vec); + +impl RegexSet { + /// Create a new regex set. + /// + /// # Panics + /// + /// Panics if any path patterns are malformed. + pub(crate) fn new(re_set: Vec) -> Self { + cfg_if! { + if #[cfg(feature = "unicode")] { + Self(regex::RegexSet::new(re_set).unwrap()) + } else { + Self(re_set.iter().map(|re| Regex::new(re).unwrap()).collect()) + } + } + } + + /// Create a new empty regex set. + pub(crate) fn empty() -> Self { + cfg_if! { + if #[cfg(feature = "unicode")] { + Self(regex::RegexSet::empty()) + } else { + Self(Vec::new()) + } + } + } + + pub(crate) fn is_match(&self, path: &str) -> bool { + cfg_if! { + if #[cfg(feature = "unicode")] { + self.0.is_match(path) + } else { + self.0.iter().any(|re| re.is_match(path)) + } + } + } + + pub(crate) fn first_match_idx(&self, path: &str) -> Option { + cfg_if! { + if #[cfg(feature = "unicode")] { + self.0.matches(path).into_iter().next() + } else { + Some(self.0.iter().enumerate().find(|(_, re)| re.is_match(path))?.0) + } + } + } +} diff --git a/actix-router/src/resource.rs b/actix-router/src/resource.rs index 797b0eb20..3a102945b 100644 --- a/actix-router/src/resource.rs +++ b/actix-router/src/resource.rs @@ -5,13 +5,13 @@ use std::{ mem, }; -#[cfg(feature = "unicode")] -use regex::{escape, Regex}; -#[cfg(not(feature = "unicode"))] -use regex_lite::{escape, Regex}; use tracing::error; -use crate::{path::PathItem, IntoPatterns, Patterns, Resource, ResourcePath}; +use crate::{ + path::PathItem, + regex_set::{escape, Regex, RegexSet}, + IntoPatterns, Patterns, Resource, ResourcePath, +}; const MAX_DYNAMIC_SEGMENTS: usize = 16; @@ -236,7 +236,7 @@ enum PatternSegment { Var(String), } -#[derive(Clone, Debug)] +#[derive(Debug, Clone)] #[allow(clippy::large_enum_variant)] enum PatternType { /// Single constant/literal segment. @@ -246,7 +246,7 @@ enum PatternType { Dynamic(Regex, Vec<&'static str>), /// Regular expression set and list of component expressions plus dynamic segment names. - DynamicSet(Vec, Vec<(Regex, Vec<&'static str>)>), + DynamicSet(RegexSet, Vec<(Regex, Vec<&'static str>)>), } impl ResourceDef { @@ -560,7 +560,7 @@ impl ResourceDef { match &self.pat_type { PatternType::Static(pattern) => self.static_match(pattern, path).is_some(), PatternType::Dynamic(re, _) => re.is_match(path), - PatternType::DynamicSet(re, _) => re.iter().any(|re| re.is_match(path)), + PatternType::DynamicSet(re, _) => re.is_match(path), } } @@ -606,7 +606,7 @@ impl ResourceDef { PatternType::Dynamic(re, _) => Some(re.captures(path)?[1].len()), PatternType::DynamicSet(re, params) => { - let idx = re.iter().enumerate().find(|(_, re)| re.is_match(path))?.0; + let idx = re.first_match_idx(path)?; let (ref pattern, _) = params[idx]; Some(pattern.captures(path)?[1].len()) } @@ -709,9 +709,8 @@ impl ResourceDef { PatternType::DynamicSet(re, params) => { let path = path.unprocessed(); - let (pattern, names) = match re.iter().enumerate().find(|(_, re)| re.is_match(path)) - { - Some((idx, _)) => ¶ms[idx], + let (pattern, names) = match re.first_match_idx(path) { + Some(idx) => ¶ms[idx], _ => return false, }; @@ -853,9 +852,10 @@ impl ResourceDef { // since zero length pattern sets are possible // just return a useless `ResourceDef` - Patterns::List(patterns) if patterns.is_empty() => { - (PatternType::DynamicSet(Vec::new(), Vec::new()), Vec::new()) - } + Patterns::List(patterns) if patterns.is_empty() => ( + PatternType::DynamicSet(RegexSet::empty(), Vec::new()), + Vec::new(), + ), Patterns::List(patterns) => { let mut re_set = Vec::with_capacity(patterns.len()); @@ -873,7 +873,7 @@ impl ResourceDef { } } - let pattern_re_set = re_set.iter().map(|re| Regex::new(re).unwrap()).collect(); + let pattern_re_set = RegexSet::new(re_set); let segments = segments.unwrap_or_default(); (