From a9738b24c5b4c8c890f4b0482ecdd2eff9fb64ee Mon Sep 17 00:00:00 2001 From: LoveSy Date: Fri, 16 Feb 2024 00:02:08 +0800 Subject: [PATCH] - Add `unicode` feature to switch between `regex` and `regex-lite` as a trade-off between full unicode support and binary size. --- actix-router/CHANGES.md | 1 + actix-router/Cargo.toml | 6 +++-- actix-router/src/resource.rs | 25 +++++++++++-------- actix-web/CHANGES.md | 1 + actix-web/Cargo.toml | 10 +++++--- .../src/http/header/content_disposition.rs | 3 +++ actix-web/src/middleware/logger.rs | 22 +++++++++------- actix-web/src/middleware/normalize.rs | 3 +++ 8 files changed, 46 insertions(+), 25 deletions(-) diff --git a/actix-router/CHANGES.md b/actix-router/CHANGES.md index a80b15e69..6b473a9a6 100644 --- a/actix-router/CHANGES.md +++ b/actix-router/CHANGES.md @@ -2,6 +2,7 @@ ## Unreleased +- Add `unicode` feature to switch between `regex` and `regex-lite` as a trade-off between full unicode support and binary size. - Minimum supported Rust version (MSRV) is now 1.72. ## 0.5.2 diff --git a/actix-router/Cargo.toml b/actix-router/Cargo.toml index de39944cc..bd0da7589 100644 --- a/actix-router/Cargo.toml +++ b/actix-router/Cargo.toml @@ -17,12 +17,14 @@ name = "actix_router" path = "src/lib.rs" [features] -default = ["http"] +default = ["http", "unicode"] +unicode = ["dep:regex"] [dependencies] bytestring = ">=0.1.5, <2" http = { version = "0.2.7", optional = true } -regex = "1.5" +regex = { version = "1.5", optional = true } +regex-lite = "0.1" serde = "1" tracing = { version = "0.1.30", default-features = false, features = ["log"] } diff --git a/actix-router/src/resource.rs b/actix-router/src/resource.rs index abd132211..797b0eb20 100644 --- a/actix-router/src/resource.rs +++ b/actix-router/src/resource.rs @@ -5,7 +5,10 @@ use std::{ mem, }; -use regex::{escape, Regex, RegexSet}; +#[cfg(feature = "unicode")] +use regex::{escape, Regex}; +#[cfg(not(feature = "unicode"))] +use regex_lite::{escape, Regex}; use tracing::error; use crate::{path::PathItem, IntoPatterns, Patterns, Resource, ResourcePath}; @@ -243,7 +246,7 @@ enum PatternType { Dynamic(Regex, Vec<&'static str>), /// Regular expression set and list of component expressions plus dynamic segment names. - DynamicSet(RegexSet, Vec<(Regex, Vec<&'static str>)>), + DynamicSet(Vec, Vec<(Regex, Vec<&'static str>)>), } impl ResourceDef { @@ -557,7 +560,7 @@ impl ResourceDef { match &self.pat_type { PatternType::Static(pattern) => self.static_match(pattern, path).is_some(), PatternType::Dynamic(re, _) => re.is_match(path), - PatternType::DynamicSet(re, _) => re.is_match(path), + PatternType::DynamicSet(re, _) => re.iter().any(|re| re.is_match(path)), } } @@ -603,7 +606,7 @@ impl ResourceDef { PatternType::Dynamic(re, _) => Some(re.captures(path)?[1].len()), PatternType::DynamicSet(re, params) => { - let idx = re.matches(path).into_iter().next()?; + let idx = re.iter().enumerate().find(|(_, re)| re.is_match(path))?.0; let (ref pattern, _) = params[idx]; Some(pattern.captures(path)?[1].len()) } @@ -706,8 +709,9 @@ impl ResourceDef { PatternType::DynamicSet(re, params) => { let path = path.unprocessed(); - let (pattern, names) = match re.matches(path).into_iter().next() { - Some(idx) => ¶ms[idx], + let (pattern, names) = match re.iter().enumerate().find(|(_, re)| re.is_match(path)) + { + Some((idx, _)) => ¶ms[idx], _ => return false, }; @@ -849,10 +853,9 @@ impl ResourceDef { // since zero length pattern sets are possible // just return a useless `ResourceDef` - Patterns::List(patterns) if patterns.is_empty() => ( - PatternType::DynamicSet(RegexSet::empty(), Vec::new()), - Vec::new(), - ), + Patterns::List(patterns) if patterns.is_empty() => { + (PatternType::DynamicSet(Vec::new(), Vec::new()), Vec::new()) + } Patterns::List(patterns) => { let mut re_set = Vec::with_capacity(patterns.len()); @@ -870,7 +873,7 @@ impl ResourceDef { } } - let pattern_re_set = RegexSet::new(re_set).unwrap(); + let pattern_re_set = re_set.iter().map(|re| Regex::new(re).unwrap()).collect(); let segments = segments.unwrap_or_default(); ( diff --git a/actix-web/CHANGES.md b/actix-web/CHANGES.md index 88215293a..aacaacc3a 100644 --- a/actix-web/CHANGES.md +++ b/actix-web/CHANGES.md @@ -4,6 +4,7 @@ ### Changed +- Add `unicode` feature to switch between `regex` and `regex-lite` as a trade-off between full unicode support and binary size. - Minimum supported Rust version (MSRV) is now 1.72. ## 4.5.1 diff --git a/actix-web/Cargo.toml b/actix-web/Cargo.toml index b045589bd..c67146917 100644 --- a/actix-web/Cargo.toml +++ b/actix-web/Cargo.toml @@ -40,7 +40,7 @@ name = "actix_web" path = "src/lib.rs" [features] -default = ["macros", "compress-brotli", "compress-gzip", "compress-zstd", "cookies", "http2"] +default = ["macros", "compress-brotli", "compress-gzip", "compress-zstd", "cookies", "http2", "unicode"] # Brotli algorithm content-encoding support compress-brotli = ["actix-http/compress-brotli", "__compress"] @@ -72,6 +72,9 @@ rustls-0_21 = ["http2", "actix-http/rustls-0_21", "actix-tls/accept", "actix-tls # TLS via Rustls v0.22 rustls-0_22 = ["http2", "actix-http/rustls-0_22", "actix-tls/accept", "actix-tls/rustls-0_22"] +# Full unicode support +unicode = ["dep:regex", "actix-router/unicode"] + # Internal (PRIVATE!) features used to aid testing and checking feature status. # Don't rely on these whatsoever. They may disappear at anytime. __compress = [] @@ -89,7 +92,7 @@ actix-utils = "3" actix-tls = { version = "3.3", default-features = false, optional = true } actix-http = { version = "3.6", features = ["ws"] } -actix-router = "0.5" +actix-router = { path = "../actix-router", default-features = false, features = ["http"] } actix-web-codegen = { version = "4.2", optional = true } ahash = "0.8" @@ -107,7 +110,8 @@ log = "0.4" mime = "0.3" once_cell = "1.5" pin-project-lite = "0.2.7" -regex = "1.5.5" +regex = { version = "1.5.5", optional = true } +regex-lite = "0.1" serde = "1.0" serde_json = "1.0" serde_urlencoded = "0.7" diff --git a/actix-web/src/http/header/content_disposition.rs b/actix-web/src/http/header/content_disposition.rs index 0606f5aef..9725cd19b 100644 --- a/actix-web/src/http/header/content_disposition.rs +++ b/actix-web/src/http/header/content_disposition.rs @@ -13,7 +13,10 @@ use std::fmt::{self, Write}; use once_cell::sync::Lazy; +#[cfg(feature = "unicode")] use regex::Regex; +#[cfg(not(feature = "unicode"))] +use regex_lite::Regex; use super::{ExtendedValue, Header, TryIntoHeaderValue, Writer}; use crate::http::header; diff --git a/actix-web/src/middleware/logger.rs b/actix-web/src/middleware/logger.rs index ce42c3af1..dc1b02399 100644 --- a/actix-web/src/middleware/logger.rs +++ b/actix-web/src/middleware/logger.rs @@ -18,7 +18,10 @@ use bytes::Bytes; use futures_core::ready; use log::{debug, warn}; use pin_project_lite::pin_project; -use regex::{Regex, RegexSet}; +#[cfg(feature = "unicode")] +use regex::Regex; +#[cfg(not(feature = "unicode"))] +use regex_lite::Regex; use time::{format_description::well_known::Rfc3339, OffsetDateTime}; use crate::{ @@ -87,7 +90,7 @@ pub struct Logger(Rc); struct Inner { format: Format, exclude: HashSet, - exclude_regex: RegexSet, + exclude_regex: Vec, log_target: Cow<'static, str>, } @@ -97,7 +100,7 @@ impl Logger { Logger(Rc::new(Inner { format: Format::new(format), exclude: HashSet::new(), - exclude_regex: RegexSet::empty(), + exclude_regex: Vec::new(), log_target: Cow::Borrowed(module_path!()), })) } @@ -114,10 +117,7 @@ impl Logger { /// Ignore and do not log access info for paths that match regex. pub fn exclude_regex>(mut self, path: T) -> Self { let inner = Rc::get_mut(&mut self.0).unwrap(); - let mut patterns = inner.exclude_regex.patterns().to_vec(); - patterns.push(path.into()); - let regex_set = RegexSet::new(patterns).unwrap(); - inner.exclude_regex = regex_set; + inner.exclude_regex.push(Regex::new(&path.into()).unwrap()); self } @@ -240,7 +240,7 @@ impl Default for Logger { Logger(Rc::new(Inner { format: Format::default(), exclude: HashSet::new(), - exclude_regex: RegexSet::empty(), + exclude_regex: Vec::new(), log_target: Cow::Borrowed(module_path!()), })) } @@ -300,7 +300,11 @@ where fn call(&self, req: ServiceRequest) -> Self::Future { let excluded = self.inner.exclude.contains(req.path()) - || self.inner.exclude_regex.is_match(req.path()); + || self + .inner + .exclude_regex + .iter() + .any(|r| r.is_match(req.path())); if excluded { LoggerResponse { diff --git a/actix-web/src/middleware/normalize.rs b/actix-web/src/middleware/normalize.rs index 3f20431c0..482107ecb 100644 --- a/actix-web/src/middleware/normalize.rs +++ b/actix-web/src/middleware/normalize.rs @@ -4,7 +4,10 @@ use actix_http::uri::{PathAndQuery, Uri}; use actix_service::{Service, Transform}; use actix_utils::future::{ready, Ready}; use bytes::Bytes; +#[cfg(feature = "unicode")] use regex::Regex; +#[cfg(not(feature = "unicode"))] +use regex_lite::Regex; use crate::{ service::{ServiceRequest, ServiceResponse},