redefine ResourceDef behavior rigorously

This commit is contained in:
Ali MJ Al-Nasrawy 2021-07-21 16:13:40 +03:00
parent f8f1ac94bc
commit e0a2dd84b6
1 changed files with 118 additions and 44 deletions

View File

@ -28,9 +28,27 @@ const REGEX_FLAGS: &str = "(?s-m)";
/// regex engine.
///
///
/// # Pattern Format and Matching Behavior
///
/// Resource pattern is defined as a string of zero or more _segments_ where each segment is
/// preceeded by a slash `/`.
///
/// This means that pattern string *must* either be empty or begin with a slash (`/`).
/// This also implies that a trailing slash in pattern defines an empty segment.
/// For example, the pattern `"/user/"` have two segment: `["user", ""]`
///
/// A key point to undertand is that `ResourceDef` matches segments, not string.
/// It matches segments individually.
/// For exmaple, the pattern `/user/` is not considered a prefix for the path `/user/123/456`,
/// because the second segment doesn't match: `["user", ""]` vs `["user", "123", "456"]`.
///
/// This definition is consistent with the definition of absolute URL path in
/// [RFC 3986 (section 3.3)](https://datatracker.ietf.org/doc/html/rfc3986#section-3.3)
///
///
/// # Static Resources
/// A static resource is the most basic type of definition. Pass a regular string to
/// [new][Self::new]. Conforming paths must match the string exactly.
/// A static resource is the most basic type of definition. Pass a pattern to
/// [new][Self::new]. Conforming paths must match the pattern exactly.
///
/// ## Examples
/// ```
@ -39,6 +57,7 @@ const REGEX_FLAGS: &str = "(?s-m)";
///
/// assert!(resource.is_match("/home"));
///
/// assert!(!resource.is_match("/home/"));
/// assert!(!resource.is_match("/home/new"));
/// assert!(!resource.is_match("/homes"));
/// assert!(!resource.is_match("/search"));
@ -87,10 +106,11 @@ const REGEX_FLAGS: &str = "(?s-m)";
/// # Prefix Resources
/// A prefix resource is defined as pattern that can match just the start of a path.
///
/// This library chooses to restrict that definition slightly. In particular, when matching, the
/// prefix must be separated from the remaining part of the path by a `/` character, either at the
/// end of the prefix pattern or at the start of the the remaining slice. In practice, this is not
/// much of a limitation.
/// Prefix patterns with a trailing slash may have a weird, though correct, behavior.
/// It basically defines and requires an empty segment to match.
/// Examples are given below..
///
/// Empty pattern matches any path as a prefix.
///
/// Prefix resources can contain dynamic segments.
///
@ -102,9 +122,13 @@ const REGEX_FLAGS: &str = "(?s-m)";
/// assert!(resource.is_match("/home/new"));
/// assert!(!resource.is_match("/homes"));
///
/// // prefix pattern with a trailing slash
/// let resource = ResourceDef::prefix("/user/{id}/");
/// eprintln!("{:?}", resource);
/// assert!(resource.is_match("/user/123/"));
/// assert!(resource.is_match("/user/123/stars"));
/// assert!(resource.is_match("/user/123//stars"));
/// assert!(!resource.is_match("/user/123/stars"));
/// assert!(!resource.is_match("/user/123"));
/// ```
///
///
@ -117,6 +141,10 @@ const REGEX_FLAGS: &str = "(?s-m)";
/// `{name:regex}`. For example, `/user/{id:\d+}` will only match paths where the user ID
/// is numeric.
///
/// The regex could pontentially match multiple segments. If this is not wanted, then care must be
/// taken to avoid matching a slash `/`. It is guaranteed, however, that the match ends in a
/// segment boundary; the pattern `r"(/|$)` is always appended to the regex.
///
/// By default, dynamic segments use this regex: `[^/]+`. This shows why it is the case, as shown in
/// the earlier section, that segments capture a slice of the path up to the next `/` character.
///
@ -237,7 +265,9 @@ impl ResourceDef {
/// Multi-pattern resources can be constructed by providing a slice (or vec) of patterns.
///
/// # Panics
/// Panics if path pattern is malformed.
/// Panics if the pattern is neither empty nor starts with `/`,
/// if it has more than 16 dynamic segments
/// and when custom regex fails to compile.
///
/// # Examples
/// ```
@ -298,7 +328,7 @@ impl ResourceDef {
}
}
/// Constructs a new resource definition using a string pattern that performs prefix matching.
/// Constructs a new resource definition using a pattern that performs prefix matching.
///
/// More specifically, the regular expressions generated for matching are different when using
/// this method vs using `new`; they will not be appended with the `$` meta-character that
@ -308,7 +338,7 @@ impl ResourceDef {
/// resource definition with a tail segment; use [`new`][Self::new] in this case.
///
/// # Panics
/// Panics if path regex pattern is malformed.
/// Panics if pattern is malformed; same as with [`new`][Self::new].
///
/// # Examples
/// ```
@ -568,10 +598,7 @@ impl ResourceDef {
match self.pat_type {
PatternType::Static(ref s) => s == path,
PatternType::Prefix(ref prefix) if prefix == path => true,
PatternType::Prefix(ref prefix) => is_strict_prefix(prefix, path),
PatternType::Prefix(ref prefix) => is_prefix(prefix, path),
PatternType::Dynamic(ref re, _) => re.is_match(path),
PatternType::DynamicSet(ref re, _) => re.is_match(path),
}
@ -624,16 +651,15 @@ impl ResourceDef {
}
}
PatternType::Prefix(prefix) if path == prefix => Some(prefix.len()),
PatternType::Prefix(prefix) if is_strict_prefix(prefix, path) => Some(prefix.len()),
PatternType::Prefix(prefix) if is_prefix(prefix, path) => Some(prefix.len()),
PatternType::Prefix(_) => None,
PatternType::Dynamic(re, _) => re.find(path).map(|m| m.end()),
PatternType::Dynamic(re, _) => Some(re.captures(path)?[1].len()),
PatternType::DynamicSet(re, params) => {
let idx = re.matches(path).into_iter().next()?;
let (ref pattern, _) = params[idx];
pattern.find(path).map(|m| m.end())
Some(pattern.captures(path)?[1].len())
}
}
}
@ -756,7 +782,7 @@ impl ResourceDef {
}
};
(captures[0].len(), Some(names))
(captures[1].len(), Some(names))
}
PatternType::DynamicSet(re, params) => {
@ -782,7 +808,7 @@ impl ResourceDef {
}
}
(captures[0].len(), Some(names))
(captures[1].len(), Some(names))
}
};
@ -984,6 +1010,10 @@ impl ResourceDef {
) -> (PatternType, Vec<PatternSegment>) {
profile_method!(parse);
if !pattern.is_empty() && !pattern.starts_with('/') {
panic!("Resource pattern must either be empty or begins with '/'",);
}
let mut unprocessed = pattern;
if !force_dynamic && unprocessed.find('{').is_none() && !unprocessed.ends_with('*') {
@ -1045,8 +1075,14 @@ impl ResourceDef {
);
}
// Store the pattern at capture group #1 to have context info outside it
let mut re = format!("({})", re);
// Ensure the match ends in a new segment boundary
if !is_prefix && !has_tail_segment {
re.push('$');
} else if is_prefix && !has_tail_segment {
re.push_str(r"(/|$)");
}
let re = match Regex::new(&re) {
@ -1118,10 +1154,12 @@ pub(crate) fn insert_slash(path: &str) -> Cow<'_, str> {
}
/// Returns true if `prefix` acts as a proper prefix (i.e., separated by a slash) in `path`.
///
/// The `strict` refers to the fact that this will return `false` if `prefix == path`.
fn is_strict_prefix(prefix: &str, path: &str) -> bool {
path.starts_with(prefix) && (prefix.ends_with('/') || path[prefix.len()..].starts_with('/'))
fn is_prefix(prefix: &str, path: &str) -> bool {
match path.strip_prefix(prefix) {
// Ensure the match ends at segment boundary
Some(rem) if rem.is_empty() || rem.starts_with('/') => true,
_ => false,
}
}
#[cfg(test)]
@ -1435,58 +1473,70 @@ mod tests {
let re = ResourceDef::prefix("/name/");
assert!(re.is_match("/name/"));
assert!(re.is_match("/name/gs"));
assert!(re.is_match("/name//gs"));
assert!(!re.is_match("/name/gs"));
assert!(!re.is_match("/name"));
let mut path = Path::new("/name/gs");
assert!(!re.capture_match_info(&mut path));
let mut path = Path::new("/name//gs");
assert!(re.capture_match_info(&mut path));
assert_eq!(path.unprocessed(), "gs");
assert_eq!(path.unprocessed(), "/gs");
let re = ResourceDef::root_prefix("name/");
assert!(re.is_match("/name/"));
assert!(re.is_match("/name/gs"));
assert!(re.is_match("/name//gs"));
assert!(!re.is_match("/name/gs"));
assert!(!re.is_match("/name"));
let mut path = Path::new("/name/gs");
assert!(re.capture_match_info(&mut path));
assert_eq!(path.unprocessed(), "gs");
assert!(!re.capture_match_info(&mut path));
}
#[test]
fn prefix_dynamic() {
let re = ResourceDef::prefix("/{name}/");
let re = ResourceDef::prefix("/{name}");
assert!(re.is_prefix());
assert!(re.is_match("/name/"));
assert!(re.is_match("/name/gs"));
assert!(!re.is_match("/name"));
assert!(re.is_match("/name"));
assert_eq!(re.find_match("/name/"), Some(6));
assert_eq!(re.find_match("/name/gs"), Some(6));
assert_eq!(re.find_match("/name"), None);
assert_eq!(re.find_match("/name/"), Some(5));
assert_eq!(re.find_match("/name/gs"), Some(5));
assert_eq!(re.find_match("/name"), Some(5));
assert_eq!(re.find_match(""), None);
let mut path = Path::new("/test2/");
assert!(re.capture_match_info(&mut path));
assert_eq!(&path["name"], "test2");
assert_eq!(&path[0], "test2");
assert_eq!(path.unprocessed(), "");
assert_eq!(path.unprocessed(), "/");
let mut path = Path::new("/test2/subpath1/subpath2/index.html");
assert!(re.capture_match_info(&mut path));
assert_eq!(&path["name"], "test2");
assert_eq!(&path[0], "test2");
assert_eq!(path.unprocessed(), "subpath1/subpath2/index.html");
let resource = ResourceDef::prefix(r"/id/{id:\d{3}}");
assert!(resource.is_match("/id/1234"));
assert_eq!(resource.find_match("/id/1234"), Some(7));
assert_eq!(path.unprocessed(), "/subpath1/subpath2/index.html");
let resource = ResourceDef::prefix("/user");
// input string shorter than prefix
assert!(resource.find_match("/foo").is_none());
}
#[test]
fn prefix_empty() {
let re = ResourceDef::prefix("");
assert!(re.is_prefix());
assert!(re.is_match(""));
assert!(re.is_match("/"));
assert!(re.is_match("/name/test/test"));
}
#[test]
fn build_path_list() {
let mut s = String::new();
@ -1554,6 +1604,21 @@ mod tests {
assert!(path.get("uid").is_some());
}
#[test]
fn dynamic_prefix_proper_segmentation() {
let resource = ResourceDef::prefix(r"/id/{id:\d{3}}");
assert!(resource.is_match("/id/123"));
assert!(resource.is_match("/id/123/foo"));
assert!(!resource.is_match("/id/1234"));
assert!(!resource.is_match("/id/123a"));
assert_eq!(resource.find_match("/id/123"), Some(7));
assert_eq!(resource.find_match("/id/123/foo"), Some(7));
assert_eq!(resource.find_match("/id/1234"), None);
assert_eq!(resource.find_match("/id/123a"), None);
}
#[test]
fn build_path_map() {
let resource = ResourceDef::new("/user/{item1}/{item2}/");
@ -1590,14 +1655,17 @@ mod tests {
}
#[test]
fn consistent_match_length() {
let result = Some(5);
fn prefix_trailing_slash() {
// The prefix "/abc/" matches two segments: ["user", ""]
// These are not prefixes
let re = ResourceDef::prefix("/abc/");
assert_eq!(re.find_match("/abc/def"), result);
assert_eq!(re.find_match("/abc/def"), None);
assert_eq!(re.find_match("/abc//def"), Some(5));
let re = ResourceDef::prefix("/{id}/");
assert_eq!(re.find_match("/abc/def"), result);
assert_eq!(re.find_match("/abc/def"), None);
assert_eq!(re.find_match("/abc//def"), Some(5));
}
#[test]
@ -1637,6 +1705,12 @@ mod tests {
match_methods_agree!(prefix r"/id/{id:\d{3}}" => "/id/123", "/id/1234");
}
#[test]
#[should_panic]
fn no_leading_slash() {
ResourceDef::new("user/");
}
#[test]
#[should_panic]
fn invalid_dynamic_segment_delimiter() {