From d9204f996c480aee0a79ba201ce5c3f551747f12 Mon Sep 17 00:00:00 2001 From: Rob Ede Date: Sat, 1 Jan 2022 08:59:19 +0000 Subject: [PATCH] fix accept encoding header --- CHANGES.md | 4 + src/http/header/accept.rs | 2 +- src/http/header/accept_encoding.rs | 354 +++++++++++++++++++++++------ src/http/header/accept_language.rs | 10 +- src/http/header/encoding.rs | 4 +- 5 files changed, 293 insertions(+), 81 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index b6d3b103d..bd1e0550c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,18 +4,22 @@ ### Added - `guard::GuardContext` for use with the `Guard` trait. [#2552] - `ServiceRequest::guard_ctx` for obtaining a guard context. [#2552] +- `impl Hash` for `http::header::Encoding`. [#2501] +- `AcceptEncoding::negotiate`. [#2501] ### Changed - `Guard` trait now receives a `&GuardContext`. [#2552] - `guard::fn_guard` functions now receives a `&GuardContext`. [#2552] - Some guards now return `impl Guard` and their concrete types are made private: `guard::{Header}` and all the method guards. [#2552] - The `Not` guard is now generic over the type of guard it wraps. [#2552] +- `AcceptEncoding::preference` now returns `Option>`. [#2501] ### Fixed - Rename `ConnectionInfo::{remote_addr => peer_addr}`, deprecating the old name. [#2554] - `ConnectionInfo::peer_addr` will not return the port number. [#2554] - `ConnectionInfo::realip_remote_addr` will not return the port number if sourcing the IP from the peer's socket address. [#2554] +[#2501]: https://github.com/actix/actix-web/pull/2501 [#2552]: https://github.com/actix/actix-web/pull/2552 [#2554]: https://github.com/actix/actix-web/pull/2554 diff --git a/src/http/header/accept.rs b/src/http/header/accept.rs index 6e43ecc06..368a05bb2 100644 --- a/src/http/header/accept.rs +++ b/src/http/header/accept.rs @@ -163,7 +163,7 @@ impl Accept { use actix_http::header::Quality; let mut max_item = None; - let mut max_pref = Quality::MIN; + let mut max_pref = Quality::ZERO; // uses manual max lookup loop since we want the first occurrence in the case of same // preference but `Iterator::max_by_key` would give us the last occurrence diff --git a/src/http/header/accept_encoding.rs b/src/http/header/accept_encoding.rs index 0774944c6..6d658767f 100644 --- a/src/http/header/accept_encoding.rs +++ b/src/http/header/accept_encoding.rs @@ -1,17 +1,15 @@ -use actix_http::header::QualityItem; +use std::collections::HashSet; -use super::{common_header, Encoding, Preference, Quality}; +use super::{common_header, Encoding, Preference, Quality, QualityItem}; use crate::http::header; common_header! { /// `Accept-Encoding` header, defined /// in [RFC 7231](https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.4) /// - /// The `Accept-Encoding` header field can be used by user agents to - /// indicate what response content-codings are - /// acceptable in the response. An `identity` token is used as a synonym - /// for "no encoding" in order to communicate when no encoding is - /// preferred. + /// The `Accept-Encoding` header field can be used by user agents to indicate what response + /// content-codings are acceptable in the response. An `identity` token is used as a synonym + /// for "no encoding" in order to communicate when no encoding is preferred. /// /// # ABNF /// ```plain @@ -60,7 +58,7 @@ common_header! { /// AcceptEncoding(vec![ /// QualityItem::max(Encoding::Chunked), /// QualityItem::new(Encoding::Gzip, q(0.60)), - /// QualityItem::min(Encoding::EncodingExt("*".to_owned())), + /// QualityItem::zero(Encoding::EncodingExt("*".to_owned())), /// ]) /// ); /// ``` @@ -95,27 +93,99 @@ common_header! { vec![b"gzip, *; q=0"], Some(AcceptEncoding(vec![ QualityItem::max(Preference::Specific(Encoding::Gzip)), - QualityItem::min(Preference::Any), + QualityItem::zero(Preference::Any), ])) ); } } impl AcceptEncoding { - // TODO: method for getting best content encoding based on q-factors, available from server side - // and if none are acceptable return None + /// Selects the most acceptable encoding according to client preference and supported types. + /// + /// The "identity" encoding is not assumed and should be included in the `supported` iterator + /// if a non-encoded representation can be selected. + /// + /// If `None` is returned, this indicates that none of the supported encodings are acceptable to + /// the client. The caller should generate a 406 Not Acceptable response (unencoded) that + /// includes the server's supported encodings in the body plus a [`Vary`] header. + /// + /// [`Vary`]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Vary + pub fn negotiate<'a>( + &self, + supported: impl Iterator, + ) -> Option { + // 1. If no Accept-Encoding field is in the request, any content-coding is considered + // acceptable by the user agent. + + let supported_set = supported.collect::>(); + + if supported_set.is_empty() { + return None; + } + + if self.0.is_empty() { + // though it is not recommended to encode in this case, return identity encoding + return Some(Encoding::Identity); + } + + // 2. If the representation has no content-coding, then it is acceptable by default unless + // specifically excluded by the Accept-Encoding field stating either "identity;q=0" or + // "*;q=0" without a more specific entry for "identity". + + let acceptable_items = self.ranked_items().collect::>(); + + let identity_acceptable = is_identity_acceptable(&acceptable_items); + let identity_supported = supported_set.contains(&Encoding::Identity); + + if identity_acceptable && identity_supported && supported_set.len() == 1 { + return Some(Encoding::Identity); + } + + // 3. If the representation's content-coding is one of the content-codings listed in the + // Accept-Encoding field, then it is acceptable unless it is accompanied by a qvalue of 0. + + // 4. If multiple content-codings are acceptable, then the acceptable content-coding with + // the highest non-zero qvalue is preferred. + + let matched = acceptable_items + .into_iter() + .filter(|q| q.quality > Quality::ZERO) + // search relies on item list being in descending order of quality + .find(|q| { + let enc = &q.item; + matches!(enc, Preference::Specific(enc) if supported_set.contains(enc)) + }) + .map(|q| q.item); + + match matched { + Some(Preference::Specific(enc)) => Some(enc), + + _ if identity_acceptable => Some(Encoding::Identity), + + _ => None, + } + } /// Extracts the most preferable encoding, accounting for [q-factor weighting]. /// /// If no q-factors are provided, the first encoding is chosen. Note that items without /// q-factors are given the maximum preference value. /// - /// As per the spec, returns [`Preference::Any`] if contained list is empty. + /// As per the spec, returns [`Preference::Any`] if acceptable list is empty. Though, if this is + /// returned, it is recommended to use an un-encoded representation. + /// + /// If `None` is returned, it means that the client has signalled that no representations + /// are acceptable. This should never occur for a well behaved user-agent. /// /// [q-factor weighting]: https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.2 - pub fn preference(&self) -> Preference { + pub fn preference(&self) -> Option> { + // empty header indicates no preference + if self.0.is_empty() { + return Some(Preference::Any); + } + let mut max_item = None; - let mut max_pref = Quality::MIN; + let mut max_pref = Quality::ZERO; // uses manual max lookup loop since we want the first occurrence in the case of same // preference but `Iterator::max_by_key` would give us the last occurrence @@ -129,7 +199,23 @@ impl AcceptEncoding { } } - max_item.unwrap_or(Preference::Any) + // Return max_item if any items were above 0 quality... + max_item.or_else(|| { + // ...or else check for "*" or "identity". We can elide quality checks since + // entering this block means all items had "q=0". + match self.0.iter().find(|pref| { + matches!( + pref.item, + Preference::Any | Preference::Specific(Encoding::Identity) + ) + }) { + // "identity" or "*" found so no representation is acceptable + Some(_) => None, + + // implicit "identity" is acceptable + None => Some(Preference::Specific(Encoding::Identity)), + } + }) } /// Returns a sorted list of encodings from highest to lowest precedence, accounting @@ -137,8 +223,12 @@ impl AcceptEncoding { /// /// [q-factor weighting]: https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.2 pub fn ranked(&self) -> Vec> { + self.ranked_items().map(|q| q.item).collect() + } + + fn ranked_items(&self) -> impl Iterator>> { if self.0.is_empty() { - return vec![]; + return vec![].into_iter(); } let mut types = self.0.clone(); @@ -149,84 +239,202 @@ impl AcceptEncoding { b.quality.cmp(&a.quality) }); - types.into_iter().map(|qitem| qitem.item).collect() + types.into_iter() } } +/// Returns true if "identity" is an acceptable encoding. +/// +/// Internal algorithm relies on item list being in descending order of quality. +fn is_identity_acceptable(items: &'_ [QualityItem>]) -> bool { + if items.is_empty() { + return true; + } + + // Loop algorithm depends on items being sorted in descending order of quality. As such, it + // is sufficient to return (q > 0) when reaching either an "identity" or "*" item. + for q in items { + match (q.quality, &q.item) { + // occurrence of "identity;q=n"; return true if quality is non-zero + (q, Preference::Specific(Encoding::Identity)) => return q > Quality::ZERO, + + // occurrence of "*;q=n"; return true if quality is non-zero + (q, Preference::Any) => return q > Quality::ZERO, + + _ => {} + } + } + + // implicit acceptable identity + true +} + #[cfg(test)] mod tests { use super::*; use crate::http::header::*; + macro_rules! accept_encoding { + () => { AcceptEncoding(vec![]) }; + ($($q:expr),+ $(,)?) => { AcceptEncoding(vec![$($q.parse().unwrap()),+]) }; + } + + /// Parses an encoding string. + fn enc(enc: &str) -> Preference { + enc.parse().unwrap() + } + + #[test] + fn detect_identity_acceptable() { + macro_rules! accept_encoding_ranked { + () => { accept_encoding!().ranked_items().collect::>() }; + ($($q:expr),+ $(,)?) => { accept_encoding!($($q),+).ranked_items().collect::>() }; + } + + let test = accept_encoding_ranked!(); + assert!(is_identity_acceptable(&test)); + let test = accept_encoding_ranked!("gzip"); + assert!(is_identity_acceptable(&test)); + let test = accept_encoding_ranked!("gzip", "br"); + assert!(is_identity_acceptable(&test)); + let test = accept_encoding_ranked!("gzip", "*;q=0.1"); + assert!(is_identity_acceptable(&test)); + let test = accept_encoding_ranked!("gzip", "identity;q=0.1"); + assert!(is_identity_acceptable(&test)); + let test = accept_encoding_ranked!("gzip", "identity;q=0.1", "*;q=0"); + assert!(is_identity_acceptable(&test)); + let test = accept_encoding_ranked!("gzip", "*;q=0", "identity;q=0.1"); + assert!(is_identity_acceptable(&test)); + + let test = accept_encoding_ranked!("gzip", "*;q=0"); + assert!(!is_identity_acceptable(&test)); + let test = accept_encoding_ranked!("gzip", "identity;q=0"); + assert!(!is_identity_acceptable(&test)); + let test = accept_encoding_ranked!("gzip", "identity;q=0", "*;q=0"); + assert!(!is_identity_acceptable(&test)); + let test = accept_encoding_ranked!("gzip", "*;q=0", "identity;q=0"); + assert!(!is_identity_acceptable(&test)); + } + + #[test] + fn encoding_negotiation() { + // no preference + let test = accept_encoding!(); + assert_eq!(test.negotiate([].iter()), None); + + let test = accept_encoding!(); + assert_eq!( + test.negotiate([Encoding::Identity].iter()), + Some(Encoding::Identity), + ); + + let test = accept_encoding!("identity;q=0"); + assert_eq!(test.negotiate([Encoding::Identity].iter()), None); + + let test = accept_encoding!("*;q=0"); + assert_eq!(test.negotiate([Encoding::Identity].iter()), None); + + let test = accept_encoding!(); + assert_eq!( + test.negotiate([Encoding::Gzip, Encoding::Identity].iter()), + Some(Encoding::Identity), + ); + + let test = accept_encoding!("gzip"); + assert_eq!( + test.negotiate([Encoding::Gzip, Encoding::Identity].iter()), + Some(Encoding::Gzip), + ); + assert_eq!( + test.negotiate([Encoding::Brotli, Encoding::Identity].iter()), + Some(Encoding::Identity), + ); + assert_eq!( + test.negotiate([Encoding::Brotli, Encoding::Gzip, Encoding::Identity].iter()), + Some(Encoding::Gzip), + ); + + let test = accept_encoding!("gzip", "identity;q=0"); + assert_eq!( + test.negotiate([Encoding::Gzip, Encoding::Identity].iter()), + Some(Encoding::Gzip), + ); + assert_eq!( + test.negotiate([Encoding::Brotli, Encoding::Identity].iter()), + None + ); + + let test = accept_encoding!("gzip", "*;q=0"); + assert_eq!( + test.negotiate([Encoding::Gzip, Encoding::Identity].iter()), + Some(Encoding::Gzip), + ); + assert_eq!( + test.negotiate([Encoding::Brotli, Encoding::Identity].iter()), + None + ); + + let test = accept_encoding!("gzip", "deflate", "br"); + assert_eq!( + test.negotiate([Encoding::Gzip, Encoding::Identity].iter()), + Some(Encoding::Gzip), + ); + assert_eq!( + test.negotiate([Encoding::Brotli, Encoding::Identity].iter()), + Some(Encoding::Brotli) + ); + assert_eq!( + test.negotiate([Encoding::Deflate, Encoding::Identity].iter()), + Some(Encoding::Deflate) + ); + assert_eq!( + test.negotiate([Encoding::Gzip, Encoding::Deflate, Encoding::Identity].iter()), + Some(Encoding::Gzip) + ); + assert_eq!( + test.negotiate([Encoding::Gzip, Encoding::Brotli, Encoding::Identity].iter()), + Some(Encoding::Gzip) + ); + assert_eq!( + test.negotiate([Encoding::Brotli, Encoding::Gzip, Encoding::Identity].iter()), + Some(Encoding::Gzip) + ); + } + #[test] fn ranking_precedence() { - let test = AcceptLanguage(vec![]); + let test = accept_encoding!(); assert!(test.ranked().is_empty()); - let test = AcceptLanguage(vec![QualityItem::max("gzip".parse().unwrap())]); - assert_eq!(test.ranked(), vec!["gzip".parse().unwrap()]); + let test = accept_encoding!("gzip"); + assert_eq!(test.ranked(), vec![enc("gzip")]); - let test = AcceptLanguage(vec![ - QualityItem::new("gzip".parse().unwrap(), q(0.900)), - QualityItem::new("*".parse().unwrap(), q(0.700)), - QualityItem::new("br".parse().unwrap(), q(1.0)), - ]); - assert_eq!( - test.ranked(), - vec![ - "br".parse().unwrap(), - "gzip".parse().unwrap(), - "*".parse().unwrap(), - ] - ); + let test = accept_encoding!("gzip;q=0.900", "*;q=0.700", "br;q=1.0"); + assert_eq!(test.ranked(), vec![enc("br"), enc("gzip"), enc("*")]); - let test = AcceptLanguage(vec![ - QualityItem::max("br".parse().unwrap()), - QualityItem::max("gzip".parse().unwrap()), - QualityItem::max("*".parse().unwrap()), - ]); - assert_eq!( - test.ranked(), - vec![ - "br".parse().unwrap(), - "gzip".parse().unwrap(), - "*".parse().unwrap(), - ] - ); + let test = accept_encoding!("br", "gzip", "*"); + assert_eq!(test.ranked(), vec![enc("br"), enc("gzip"), enc("*")]); } #[test] fn preference_selection() { - assert_eq!(AcceptLanguage(vec![]).preference(), Preference::Any); + assert_eq!(accept_encoding!().preference(), Some(Preference::Any)); - assert_eq!( - AcceptLanguage(vec!["compress;q=0; *;q=0".parse().unwrap()]).preference(), - Preference::Any - ); + assert_eq!(accept_encoding!("identity;q=0").preference(), None); + assert_eq!(accept_encoding!("*;q=0").preference(), None); + assert_eq!(accept_encoding!("compress;q=0", "*;q=0").preference(), None); + assert_eq!(accept_encoding!("identity;q=0", "*;q=0").preference(), None); - assert_eq!( - AcceptLanguage(vec!["identity;q=0; *;q=0".parse().unwrap()]).preference(), - Preference::Any - ); + let test = accept_encoding!("*;q=0.5"); + assert_eq!(test.preference().unwrap(), enc("*")); - let test = AcceptLanguage(vec![ - QualityItem::new("br".parse().unwrap(), q(0.900)), - QualityItem::new("gzip".parse().unwrap(), q(1.0)), - QualityItem::new("*".parse().unwrap(), q(0.500)), - ]); - assert_eq!( - test.preference(), - Preference::Specific("gzip".parse().unwrap()) - ); + let test = accept_encoding!("br;q=0"); + assert_eq!(test.preference().unwrap(), enc("identity")); - let test = AcceptLanguage(vec![ - QualityItem::max("br".parse().unwrap()), - QualityItem::max("gzip".parse().unwrap()), - QualityItem::max("*".parse().unwrap()), - ]); - assert_eq!( - test.preference(), - Preference::Specific("br".parse().unwrap()) - ); + let test = accept_encoding!("br;q=0.900", "gzip;q=1.0", "*;q=0.500"); + assert_eq!(test.preference().unwrap(), enc("gzip")); + + let test = accept_encoding!("br", "gzip", "*"); + assert_eq!(test.preference().unwrap(), enc("br")); } } diff --git a/src/http/header/accept_language.rs b/src/http/header/accept_language.rs index 4aaaeafc4..9943e121f 100644 --- a/src/http/header/accept_language.rs +++ b/src/http/header/accept_language.rs @@ -37,7 +37,7 @@ common_header! { /// let mut builder = HttpResponse::Ok(); /// builder.insert_header( /// AcceptLanguage(vec![ - /// QualityItem::max("en-US".parse().unwrap()) + /// "en-US".parse().unwrap(), /// ]) /// ); /// ``` @@ -49,9 +49,9 @@ common_header! { /// let mut builder = HttpResponse::Ok(); /// builder.insert_header( /// AcceptLanguage(vec![ - /// QualityItem::max("da".parse().unwrap()), - /// QualityItem::new("en-GB".parse().unwrap(), q(0.8)), - /// QualityItem::new("en".parse().unwrap(), q(0.7)), + /// "da".parse().unwrap(), + /// "en-GB;q=0.8".parse().unwrap(), + /// "en;q=0.7".parse().unwrap(), /// ]) /// ); /// ``` @@ -103,7 +103,7 @@ impl AcceptLanguage { /// [q-factor weighting]: https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.2 pub fn preference(&self) -> Preference { let mut max_item = None; - let mut max_pref = Quality::MIN; + let mut max_pref = Quality::ZERO; // uses manual max lookup loop since we want the first occurrence in the case of same // preference but `Iterator::max_by_key` would give us the last occurrence diff --git a/src/http/header/encoding.rs b/src/http/header/encoding.rs index a61edda67..214144ac6 100644 --- a/src/http/header/encoding.rs +++ b/src/http/header/encoding.rs @@ -5,7 +5,7 @@ pub use self::Encoding::{ }; /// A value to represent an encoding used in `Transfer-Encoding` or `Accept-Encoding` header. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Encoding { /// The `chunked` encoding. Chunked, @@ -22,7 +22,7 @@ pub enum Encoding { /// The `compress` encoding. Compress, - /// The `identity` encoding. + /// The `identity` encoding. Does not affect content. Identity, /// The `trailers` encoding.