From e0a2dd84b69d85b0436b0caea5126557dab0b7f2 Mon Sep 17 00:00:00 2001 From: Ali MJ Al-Nasrawy Date: Wed, 21 Jul 2021 16:13:40 +0300 Subject: [PATCH] redefine ResourceDef behavior rigorously --- actix-router/src/resource.rs | 162 +++++++++++++++++++++++++---------- 1 file changed, 118 insertions(+), 44 deletions(-) diff --git a/actix-router/src/resource.rs b/actix-router/src/resource.rs index 58016bb3..623bf7b2 100644 --- a/actix-router/src/resource.rs +++ b/actix-router/src/resource.rs @@ -28,9 +28,27 @@ const REGEX_FLAGS: &str = "(?s-m)"; /// regex engine. /// /// +/// # Pattern Format and Matching Behavior +/// +/// Resource pattern is defined as a string of zero or more _segments_ where each segment is +/// preceeded by a slash `/`. +/// +/// This means that pattern string *must* either be empty or begin with a slash (`/`). +/// This also implies that a trailing slash in pattern defines an empty segment. +/// For example, the pattern `"/user/"` have two segment: `["user", ""]` +/// +/// A key point to undertand is that `ResourceDef` matches segments, not string. +/// It matches segments individually. +/// For exmaple, the pattern `/user/` is not considered a prefix for the path `/user/123/456`, +/// because the second segment doesn't match: `["user", ""]` vs `["user", "123", "456"]`. +/// +/// This definition is consistent with the definition of absolute URL path in +/// [RFC 3986 (section 3.3)](https://datatracker.ietf.org/doc/html/rfc3986#section-3.3) +/// +/// /// # Static Resources -/// A static resource is the most basic type of definition. Pass a regular string to -/// [new][Self::new]. Conforming paths must match the string exactly. +/// A static resource is the most basic type of definition. Pass a pattern to +/// [new][Self::new]. Conforming paths must match the pattern exactly. /// /// ## Examples /// ``` @@ -39,6 +57,7 @@ const REGEX_FLAGS: &str = "(?s-m)"; /// /// assert!(resource.is_match("/home")); /// +/// assert!(!resource.is_match("/home/")); /// assert!(!resource.is_match("/home/new")); /// assert!(!resource.is_match("/homes")); /// assert!(!resource.is_match("/search")); @@ -87,10 +106,11 @@ const REGEX_FLAGS: &str = "(?s-m)"; /// # Prefix Resources /// A prefix resource is defined as pattern that can match just the start of a path. /// -/// This library chooses to restrict that definition slightly. In particular, when matching, the -/// prefix must be separated from the remaining part of the path by a `/` character, either at the -/// end of the prefix pattern or at the start of the the remaining slice. In practice, this is not -/// much of a limitation. +/// Prefix patterns with a trailing slash may have a weird, though correct, behavior. +/// It basically defines and requires an empty segment to match. +/// Examples are given below.. +/// +/// Empty pattern matches any path as a prefix. /// /// Prefix resources can contain dynamic segments. /// @@ -102,9 +122,13 @@ const REGEX_FLAGS: &str = "(?s-m)"; /// assert!(resource.is_match("/home/new")); /// assert!(!resource.is_match("/homes")); /// +/// // prefix pattern with a trailing slash /// let resource = ResourceDef::prefix("/user/{id}/"); +/// eprintln!("{:?}", resource); /// assert!(resource.is_match("/user/123/")); -/// assert!(resource.is_match("/user/123/stars")); +/// assert!(resource.is_match("/user/123//stars")); +/// assert!(!resource.is_match("/user/123/stars")); +/// assert!(!resource.is_match("/user/123")); /// ``` /// /// @@ -117,6 +141,10 @@ const REGEX_FLAGS: &str = "(?s-m)"; /// `{name:regex}`. For example, `/user/{id:\d+}` will only match paths where the user ID /// is numeric. /// +/// The regex could pontentially match multiple segments. If this is not wanted, then care must be +/// taken to avoid matching a slash `/`. It is guaranteed, however, that the match ends in a +/// segment boundary; the pattern `r"(/|$)` is always appended to the regex. +/// /// By default, dynamic segments use this regex: `[^/]+`. This shows why it is the case, as shown in /// the earlier section, that segments capture a slice of the path up to the next `/` character. /// @@ -237,7 +265,9 @@ impl ResourceDef { /// Multi-pattern resources can be constructed by providing a slice (or vec) of patterns. /// /// # Panics - /// Panics if path pattern is malformed. + /// Panics if the pattern is neither empty nor starts with `/`, + /// if it has more than 16 dynamic segments + /// and when custom regex fails to compile. /// /// # Examples /// ``` @@ -298,7 +328,7 @@ impl ResourceDef { } } - /// Constructs a new resource definition using a string pattern that performs prefix matching. + /// Constructs a new resource definition using a pattern that performs prefix matching. /// /// More specifically, the regular expressions generated for matching are different when using /// this method vs using `new`; they will not be appended with the `$` meta-character that @@ -308,7 +338,7 @@ impl ResourceDef { /// resource definition with a tail segment; use [`new`][Self::new] in this case. /// /// # Panics - /// Panics if path regex pattern is malformed. + /// Panics if pattern is malformed; same as with [`new`][Self::new]. /// /// # Examples /// ``` @@ -568,10 +598,7 @@ impl ResourceDef { match self.pat_type { PatternType::Static(ref s) => s == path, - - PatternType::Prefix(ref prefix) if prefix == path => true, - PatternType::Prefix(ref prefix) => is_strict_prefix(prefix, path), - + PatternType::Prefix(ref prefix) => is_prefix(prefix, path), PatternType::Dynamic(ref re, _) => re.is_match(path), PatternType::DynamicSet(ref re, _) => re.is_match(path), } @@ -624,16 +651,15 @@ impl ResourceDef { } } - PatternType::Prefix(prefix) if path == prefix => Some(prefix.len()), - PatternType::Prefix(prefix) if is_strict_prefix(prefix, path) => Some(prefix.len()), + PatternType::Prefix(prefix) if is_prefix(prefix, path) => Some(prefix.len()), PatternType::Prefix(_) => None, - PatternType::Dynamic(re, _) => re.find(path).map(|m| m.end()), + PatternType::Dynamic(re, _) => Some(re.captures(path)?[1].len()), PatternType::DynamicSet(re, params) => { let idx = re.matches(path).into_iter().next()?; let (ref pattern, _) = params[idx]; - pattern.find(path).map(|m| m.end()) + Some(pattern.captures(path)?[1].len()) } } } @@ -756,7 +782,7 @@ impl ResourceDef { } }; - (captures[0].len(), Some(names)) + (captures[1].len(), Some(names)) } PatternType::DynamicSet(re, params) => { @@ -782,7 +808,7 @@ impl ResourceDef { } } - (captures[0].len(), Some(names)) + (captures[1].len(), Some(names)) } }; @@ -984,6 +1010,10 @@ impl ResourceDef { ) -> (PatternType, Vec) { profile_method!(parse); + if !pattern.is_empty() && !pattern.starts_with('/') { + panic!("Resource pattern must either be empty or begins with '/'",); + } + let mut unprocessed = pattern; if !force_dynamic && unprocessed.find('{').is_none() && !unprocessed.ends_with('*') { @@ -1045,8 +1075,14 @@ impl ResourceDef { ); } + // Store the pattern at capture group #1 to have context info outside it + let mut re = format!("({})", re); + + // Ensure the match ends in a new segment boundary if !is_prefix && !has_tail_segment { re.push('$'); + } else if is_prefix && !has_tail_segment { + re.push_str(r"(/|$)"); } let re = match Regex::new(&re) { @@ -1118,10 +1154,12 @@ pub(crate) fn insert_slash(path: &str) -> Cow<'_, str> { } /// Returns true if `prefix` acts as a proper prefix (i.e., separated by a slash) in `path`. -/// -/// The `strict` refers to the fact that this will return `false` if `prefix == path`. -fn is_strict_prefix(prefix: &str, path: &str) -> bool { - path.starts_with(prefix) && (prefix.ends_with('/') || path[prefix.len()..].starts_with('/')) +fn is_prefix(prefix: &str, path: &str) -> bool { + match path.strip_prefix(prefix) { + // Ensure the match ends at segment boundary + Some(rem) if rem.is_empty() || rem.starts_with('/') => true, + _ => false, + } } #[cfg(test)] @@ -1435,58 +1473,70 @@ mod tests { let re = ResourceDef::prefix("/name/"); assert!(re.is_match("/name/")); - assert!(re.is_match("/name/gs")); + assert!(re.is_match("/name//gs")); + assert!(!re.is_match("/name/gs")); assert!(!re.is_match("/name")); let mut path = Path::new("/name/gs"); + assert!(!re.capture_match_info(&mut path)); + + let mut path = Path::new("/name//gs"); assert!(re.capture_match_info(&mut path)); - assert_eq!(path.unprocessed(), "gs"); + assert_eq!(path.unprocessed(), "/gs"); let re = ResourceDef::root_prefix("name/"); assert!(re.is_match("/name/")); - assert!(re.is_match("/name/gs")); + assert!(re.is_match("/name//gs")); + assert!(!re.is_match("/name/gs")); assert!(!re.is_match("/name")); let mut path = Path::new("/name/gs"); - assert!(re.capture_match_info(&mut path)); - assert_eq!(path.unprocessed(), "gs"); + assert!(!re.capture_match_info(&mut path)); } #[test] fn prefix_dynamic() { - let re = ResourceDef::prefix("/{name}/"); + let re = ResourceDef::prefix("/{name}"); assert!(re.is_prefix()); assert!(re.is_match("/name/")); assert!(re.is_match("/name/gs")); - assert!(!re.is_match("/name")); + assert!(re.is_match("/name")); - assert_eq!(re.find_match("/name/"), Some(6)); - assert_eq!(re.find_match("/name/gs"), Some(6)); - assert_eq!(re.find_match("/name"), None); + assert_eq!(re.find_match("/name/"), Some(5)); + assert_eq!(re.find_match("/name/gs"), Some(5)); + assert_eq!(re.find_match("/name"), Some(5)); + assert_eq!(re.find_match(""), None); let mut path = Path::new("/test2/"); assert!(re.capture_match_info(&mut path)); assert_eq!(&path["name"], "test2"); assert_eq!(&path[0], "test2"); - assert_eq!(path.unprocessed(), ""); + assert_eq!(path.unprocessed(), "/"); let mut path = Path::new("/test2/subpath1/subpath2/index.html"); assert!(re.capture_match_info(&mut path)); assert_eq!(&path["name"], "test2"); assert_eq!(&path[0], "test2"); - assert_eq!(path.unprocessed(), "subpath1/subpath2/index.html"); - - let resource = ResourceDef::prefix(r"/id/{id:\d{3}}"); - assert!(resource.is_match("/id/1234")); - assert_eq!(resource.find_match("/id/1234"), Some(7)); + assert_eq!(path.unprocessed(), "/subpath1/subpath2/index.html"); let resource = ResourceDef::prefix("/user"); // input string shorter than prefix assert!(resource.find_match("/foo").is_none()); } + #[test] + fn prefix_empty() { + let re = ResourceDef::prefix(""); + + assert!(re.is_prefix()); + + assert!(re.is_match("")); + assert!(re.is_match("/")); + assert!(re.is_match("/name/test/test")); + } + #[test] fn build_path_list() { let mut s = String::new(); @@ -1554,6 +1604,21 @@ mod tests { assert!(path.get("uid").is_some()); } + #[test] + fn dynamic_prefix_proper_segmentation() { + let resource = ResourceDef::prefix(r"/id/{id:\d{3}}"); + + assert!(resource.is_match("/id/123")); + assert!(resource.is_match("/id/123/foo")); + assert!(!resource.is_match("/id/1234")); + assert!(!resource.is_match("/id/123a")); + + assert_eq!(resource.find_match("/id/123"), Some(7)); + assert_eq!(resource.find_match("/id/123/foo"), Some(7)); + assert_eq!(resource.find_match("/id/1234"), None); + assert_eq!(resource.find_match("/id/123a"), None); + } + #[test] fn build_path_map() { let resource = ResourceDef::new("/user/{item1}/{item2}/"); @@ -1590,14 +1655,17 @@ mod tests { } #[test] - fn consistent_match_length() { - let result = Some(5); + fn prefix_trailing_slash() { + // The prefix "/abc/" matches two segments: ["user", ""] + // These are not prefixes let re = ResourceDef::prefix("/abc/"); - assert_eq!(re.find_match("/abc/def"), result); + assert_eq!(re.find_match("/abc/def"), None); + assert_eq!(re.find_match("/abc//def"), Some(5)); let re = ResourceDef::prefix("/{id}/"); - assert_eq!(re.find_match("/abc/def"), result); + assert_eq!(re.find_match("/abc/def"), None); + assert_eq!(re.find_match("/abc//def"), Some(5)); } #[test] @@ -1637,6 +1705,12 @@ mod tests { match_methods_agree!(prefix r"/id/{id:\d{3}}" => "/id/123", "/id/1234"); } + #[test] + #[should_panic] + fn no_leading_slash() { + ResourceDef::new("user/"); + } + #[test] #[should_panic] fn invalid_dynamic_segment_delimiter() {