From edcde6707657d3e8bdd1df21533fe80aa235f0dd Mon Sep 17 00:00:00 2001
From: Hung-I Wang <Gowee@users.noreply.github.com>
Date: Wed, 6 Nov 2019 22:08:37 +0800
Subject: [PATCH] Fix escaping/encoding problems in Content-Disposition header
 (#1151)

* Fix filename encoding in Content-Disposition of acitx_files::NamedFile

* Add more comments on how to use Content-Disposition header properly & Fix some trivial problems

* Improve Content-Disposition filename(*) parameters of actix_files::NamedFile

* Tweak Content-Disposition parse to accept empty param value in quoted-string

* Fix typos in comments in .../content_disposition.rs (pointed out by @JohnTitor)

* Update CHANGES.md

* Update CHANGES.md again
---
 CHANGES.md                                    |  1 +
 actix-files/src/lib.rs                        | 25 ++++++
 actix-files/src/named.rs                      | 13 ++-
 .../src/header/common/content_disposition.rs  | 90 +++++++++++++++++--
 4 files changed, 119 insertions(+), 10 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 689ab13d..dcb57630 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -5,6 +5,7 @@
 ### Added
 
 * Add `Payload::into_inner` method and make stored `def::Payload` public. (#1110)
+* Add an additional `filename*` param in the `Content-Disposition` header of `actix_files::NamedFile` to be more compatible. (#1151)
 
 ### Changed
 
diff --git a/actix-files/src/lib.rs b/actix-files/src/lib.rs
index 61674ca3..16f40a20 100644
--- a/actix-files/src/lib.rs
+++ b/actix-files/src/lib.rs
@@ -733,6 +733,31 @@ mod tests {
         assert!(resp.headers().get(header::CONTENT_DISPOSITION).is_none());
     }
 
+    #[test]
+    fn test_named_file_non_ascii_file_name() {
+        let mut file =
+            NamedFile::from_file(File::open("Cargo.toml").unwrap(), "貨物.toml")
+                .unwrap();
+        {
+            file.file();
+            let _f: &File = &file;
+        }
+        {
+            let _f: &mut File = &mut file;
+        }
+
+        let req = TestRequest::default().to_http_request();
+        let resp = file.respond_to(&req).unwrap();
+        assert_eq!(
+            resp.headers().get(header::CONTENT_TYPE).unwrap(),
+            "text/x-toml"
+        );
+        assert_eq!(
+            resp.headers().get(header::CONTENT_DISPOSITION).unwrap(),
+            "inline; filename=\"貨物.toml\"; filename*=UTF-8''%E8%B2%A8%E7%89%A9.toml"
+        );
+    }
+
     #[test]
     fn test_named_file_set_content_type() {
         let mut file = NamedFile::open("Cargo.toml")
diff --git a/actix-files/src/named.rs b/actix-files/src/named.rs
index ca1a909a..955982ca 100644
--- a/actix-files/src/named.rs
+++ b/actix-files/src/named.rs
@@ -13,7 +13,7 @@ use mime_guess::from_path;
 
 use actix_http::body::SizedStream;
 use actix_web::http::header::{
-    self, ContentDisposition, DispositionParam, DispositionType,
+    self, Charset, ContentDisposition, DispositionParam, DispositionType, ExtendedValue,
 };
 use actix_web::http::{ContentEncoding, StatusCode};
 use actix_web::middleware::BodyEncoding;
@@ -93,9 +93,18 @@ impl NamedFile {
                 mime::IMAGE | mime::TEXT | mime::VIDEO => DispositionType::Inline,
                 _ => DispositionType::Attachment,
             };
+            let mut parameters =
+                vec![DispositionParam::Filename(String::from(filename.as_ref()))];
+            if !filename.is_ascii() {
+                parameters.push(DispositionParam::FilenameExt(ExtendedValue {
+                    charset: Charset::Ext(String::from("UTF-8")),
+                    language_tag: None,
+                    value: filename.into_owned().into_bytes(),
+                }))
+            }
             let cd = ContentDisposition {
                 disposition: disposition_type,
-                parameters: vec![DispositionParam::Filename(filename.into_owned())],
+                parameters: parameters,
             };
             (ct, cd)
         };
diff --git a/actix-http/src/header/common/content_disposition.rs b/actix-http/src/header/common/content_disposition.rs
index 14fcc351..b2b6f34d 100644
--- a/actix-http/src/header/common/content_disposition.rs
+++ b/actix-http/src/header/common/content_disposition.rs
@@ -76,6 +76,11 @@ pub enum DispositionParam {
     /// the form.
     Name(String),
     /// A plain file name.
+    ///
+    /// It is [not supposed](https://tools.ietf.org/html/rfc6266#appendix-D) to contain any
+    /// non-ASCII characters when used in a *Content-Disposition* HTTP response header, where
+    /// [`FilenameExt`](DispositionParam::FilenameExt) with charset UTF-8 may be used instead
+    /// in case there are Unicode characters in file names.
     Filename(String),
     /// An extended file name. It must not exist for `ContentType::Formdata` according to
     /// [RFC7578 Section 4.2](https://tools.ietf.org/html/rfc7578#section-4.2).
@@ -220,7 +225,16 @@ impl DispositionParam {
 /// ext-token           = <the characters in token, followed by "*">
 /// ```
 ///
-/// **Note**: filename* [must not](https://tools.ietf.org/html/rfc7578#section-4.2) be used within
+/// # Note
+///
+/// filename is [not supposed](https://tools.ietf.org/html/rfc6266#appendix-D) to contain any
+/// non-ASCII characters when used in a *Content-Disposition* HTTP response header, where
+/// filename* with charset UTF-8 may be used instead in case there are Unicode characters in file
+/// names.
+/// filename is [acceptable](https://tools.ietf.org/html/rfc7578#section-4.2) to be UTF-8 encoded
+/// directly in a *Content-Disposition* header for *multipart/form-data*, though.
+///
+/// filename* [must not](https://tools.ietf.org/html/rfc7578#section-4.2) be used within
 /// *multipart/form-data*.
 ///
 /// # Example
@@ -251,6 +265,22 @@ impl DispositionParam {
 /// };
 /// assert_eq!(cd2.get_name(), Some("file")); // field name
 /// assert_eq!(cd2.get_filename(), Some("bill.odt"));
+///
+/// // HTTP response header with Unicode characters in file names
+/// let cd3 = ContentDisposition {
+///     disposition: DispositionType::Attachment,
+///     parameters: vec![
+///         DispositionParam::FilenameExt(ExtendedValue {
+///             charset: Charset::Ext(String::from("UTF-8")),
+///             language_tag: None,
+///             value: String::from("\u{1f600}.svg").into_bytes(),
+///         }),
+///         // fallback for better compatibility
+///         DispositionParam::Filename(String::from("Grinning-Face-Emoji.svg"))
+///     ],
+/// };
+/// assert_eq!(cd3.get_filename_ext().map(|ev| ev.value.as_ref()),
+///            Some("\u{1f600}.svg".as_bytes()));
 /// ```
 ///
 /// # WARN
@@ -333,15 +363,17 @@ impl ContentDisposition {
                     // token: won't contains semicolon according to RFC 2616 Section 2.2
                     let (token, new_left) = split_once_and_trim(left, ';');
                     left = new_left;
+                    if token.is_empty() {
+                        // quoted-string can be empty, but token cannot be empty
+                        return Err(crate::error::ParseError::Header);
+                    }
                     token.to_owned()
                 };
-                if value.is_empty() {
-                    return Err(crate::error::ParseError::Header);
-                }
 
                 let param = if param_name.eq_ignore_ascii_case("name") {
                     DispositionParam::Name(value)
                 } else if param_name.eq_ignore_ascii_case("filename") {
+                    // See also comments in test_from_raw_uncessary_percent_decode.
                     DispositionParam::Filename(value)
                 } else {
                     DispositionParam::Unknown(param_name.to_owned(), value)
@@ -466,11 +498,40 @@ impl fmt::Display for DispositionType {
 
 impl fmt::Display for DispositionParam {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        // All ASCII control charaters (0-30, 127) excepting horizontal tab, double quote, and
+        // All ASCII control characters (0-30, 127) including horizontal tab, double quote, and
         // backslash should be escaped in quoted-string (i.e. "foobar").
-        // Ref: RFC6266 S4.1 -> RFC2616 S2.2; RFC 7578 S4.2 -> RFC2183 S2 -> ... .
+        // Ref: RFC6266 S4.1 -> RFC2616 S3.6
+        // filename-parm  = "filename" "=" value
+        // value          = token | quoted-string
+        // quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
+        // qdtext         = <any TEXT except <">>
+        // quoted-pair    = "\" CHAR
+        // TEXT           = <any OCTET except CTLs,
+        //                  but including LWS>
+        // LWS            = [CRLF] 1*( SP | HT )
+        // OCTET          = <any 8-bit sequence of data>
+        // CHAR           = <any US-ASCII character (octets 0 - 127)>
+        // CTL            = <any US-ASCII control character
+        //                  (octets 0 - 31) and DEL (127)>
+        //
+        // Ref: RFC7578 S4.2 -> RFC2183 S2 -> RFC2045 S5.1
+        // parameter := attribute "=" value
+        // attribute := token
+        //              ; Matching of attributes
+        //              ; is ALWAYS case-insensitive.
+        // value := token / quoted-string
+        // token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
+        //             or tspecials>
+        // tspecials :=  "(" / ")" / "<" / ">" / "@" /
+        //               "," / ";" / ":" / "\" / <">
+        //               "/" / "[" / "]" / "?" / "="
+        //               ; Must be in quoted-string,
+        //               ; to use within parameter values
+        //
+        //
+        // See also comments in test_from_raw_uncessary_percent_decode.
         lazy_static! {
-            static ref RE: Regex = Regex::new("[\x01-\x08\x10\x1F\x7F\"\\\\]").unwrap();
+            static ref RE: Regex = Regex::new("[\x00-\x08\x10-\x1F\x7F\"\\\\]").unwrap();
         }
         match self {
             DispositionParam::Name(ref value) => write!(f, "name={}", value),
@@ -774,8 +835,18 @@ mod tests {
 
     #[test]
     fn test_from_raw_uncessary_percent_decode() {
+        // In fact, RFC7578 (multipart/form-data) Section 2 and 4.2 suggests that filename with
+        // non-ASCII characters MAY be percent-encoded.
+        // On the contrary, RFC6266 or other RFCs related to Content-Disposition response header
+        // do not mention such percent-encoding.
+        // So, it appears to be undecidable whether to percent-decode or not without
+        // knowing the usage scenario (multipart/form-data v.s. HTTP response header) and
+        // inevitable to unnecessarily percent-decode filename with %XX in the former scenario.
+        // Fortunately, it seems that almost all mainstream browsers just send UTF-8 encoded file
+        // names in quoted-string format (tested on Edge, IE11, Chrome and Firefox) without
+        // percent-encoding. So we do not bother to attempt to percent-decode.
         let a = HeaderValue::from_static(
-            "form-data; name=photo; filename=\"%74%65%73%74%2e%70%6e%67\"", // Should not be decoded!
+            "form-data; name=photo; filename=\"%74%65%73%74%2e%70%6e%67\"",
         );
         let a: ContentDisposition = ContentDisposition::from_raw(&a).unwrap();
         let b = ContentDisposition {
@@ -811,6 +882,9 @@ mod tests {
 
         let a = HeaderValue::from_static("inline; filename=  ");
         assert!(ContentDisposition::from_raw(&a).is_err());
+
+        let a = HeaderValue::from_static("inline; filename=\"\"");
+        assert!(ContentDisposition::from_raw(&a).expect("parse cd").get_filename().expect("filename").is_empty());
     }
 
     #[test]