From 4e321595bcf4a450efcfe5655e68b7c917c38fa8 Mon Sep 17 00:00:00 2001 From: Rob Ede Date: Wed, 2 Sep 2020 22:12:07 +0100 Subject: [PATCH 1/2] extract more config types from Data as well (#1641) --- CHANGES.md | 9 +++++-- src/types/form.rs | 38 +++++++++++++++++++++++------ src/types/json.rs | 58 +++++++++++++++++++++++++++++++++----------- src/types/payload.rs | 19 ++++++--------- 4 files changed, 89 insertions(+), 35 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 0d7311748..82c562f5c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,11 +3,16 @@ ## Unreleased - 2020-xx-xx ### Added * `middleware::NormalizePath` now has configurable behaviour for either always having a trailing - slash, or as the new addition, always trimming trailing slashes. + slash, or as the new addition, always trimming trailing slashes. [#1639] ### Changed -* Update actix-codec and actix-utils dependencies. +* Update actix-codec and actix-utils dependencies. [#1634] +* `FormConfig` and `JsonConfig` configurations are now also considered when set + using `App::data`. [#1641] +[#1639]: https://github.com/actix/actix-web/pull/1639 +[#1641]: https://github.com/actix/actix-web/pull/1641 +[#1634]: https://github.com/actix/actix-web/pull/1634 ## 3.0.0-beta.3 - 2020-08-17 ### Changed diff --git a/src/types/form.rs b/src/types/form.rs index ea061d553..de88c2a94 100644 --- a/src/types/form.rs +++ b/src/types/form.rs @@ -23,7 +23,7 @@ use crate::http::{ StatusCode, }; use crate::request::HttpRequest; -use crate::responder::Responder; +use crate::{responder::Responder, web}; /// Form data helper (`application/x-www-form-urlencoded`) /// @@ -121,8 +121,12 @@ where fn from_request(req: &HttpRequest, payload: &mut Payload) -> Self::Future { let req2 = req.clone(); let (limit, err) = req - .app_data::() - .map(|c| (c.limit, c.ehandler.clone())) + .app_data::() + .or_else(|| { + req.app_data::>() + .map(|d| d.as_ref()) + }) + .map(|c| (c.limit, c.err_handler.clone())) .unwrap_or((16384, None)); UrlEncoded::new(req, payload) @@ -200,7 +204,7 @@ impl Responder for Form { #[derive(Clone)] pub struct FormConfig { limit: usize, - ehandler: Option Error>>, + err_handler: Option Error>>, } impl FormConfig { @@ -215,7 +219,7 @@ impl FormConfig { where F: Fn(UrlencodedError, &HttpRequest) -> Error + 'static, { - self.ehandler = Some(Rc::new(f)); + self.err_handler = Some(Rc::new(f)); self } } @@ -223,8 +227,8 @@ impl FormConfig { impl Default for FormConfig { fn default() -> Self { FormConfig { - limit: 16384, - ehandler: None, + limit: 16_384, // 2^14 bytes (~16kB) + err_handler: None, } } } @@ -378,7 +382,7 @@ mod tests { use serde::{Deserialize, Serialize}; use super::*; - use crate::http::header::{HeaderValue, CONTENT_TYPE}; + use crate::http::header::{HeaderValue, CONTENT_LENGTH, CONTENT_TYPE}; use crate::test::TestRequest; #[derive(Deserialize, Serialize, Debug, PartialEq)] @@ -499,4 +503,22 @@ mod tests { use crate::responder::tests::BodyTest; assert_eq!(resp.body().bin_ref(), b"hello=world&counter=123"); } + + #[actix_rt::test] + async fn test_with_config_in_data_wrapper() { + let ctype = HeaderValue::from_static("application/x-www-form-urlencoded"); + + let (req, mut pl) = TestRequest::default() + .header(CONTENT_TYPE, ctype) + .header(CONTENT_LENGTH, HeaderValue::from_static("20")) + .set_payload(Bytes::from_static(b"hello=test&counter=4")) + .app_data(web::Data::new(FormConfig::default().limit(10))) + .to_http_parts(); + + let s = Form::::from_request(&req, &mut pl).await; + assert!(s.is_err()); + + let err_str = s.err().unwrap().to_string(); + assert!(err_str.contains("Urlencoded payload size is bigger")); + } } diff --git a/src/types/json.rs b/src/types/json.rs index 527b4b611..ab7978dff 100644 --- a/src/types/json.rs +++ b/src/types/json.rs @@ -20,7 +20,7 @@ use crate::dev::Decompress; use crate::error::{Error, JsonPayloadError}; use crate::extract::FromRequest; use crate::request::HttpRequest; -use crate::responder::Responder; +use crate::{responder::Responder, web}; /// Json helper /// @@ -179,10 +179,11 @@ where #[inline] fn from_request(req: &HttpRequest, payload: &mut Payload) -> Self::Future { let req2 = req.clone(); - let (limit, err, ctype) = req - .app_data::() - .map(|c| (c.limit, c.ehandler.clone(), c.content_type.clone())) - .unwrap_or((32768, None, None)); + let config = JsonConfig::from_req(req); + + let limit = config.limit; + let ctype = config.content_type.clone(); + let err_handler = config.err_handler.clone(); JsonBody::new(req, payload, ctype) .limit(limit) @@ -193,7 +194,8 @@ where Request path: {}", req2.path() ); - if let Some(err) = err { + + if let Some(err) = err_handler { Err((*err)(e, &req2)) } else { Err(e.into()) @@ -255,7 +257,8 @@ where #[derive(Clone)] pub struct JsonConfig { limit: usize, - ehandler: Option Error + Send + Sync>>, + err_handler: + Option Error + Send + Sync>>, content_type: Option bool + Send + Sync>>, } @@ -271,7 +274,7 @@ impl JsonConfig { where F: Fn(JsonPayloadError, &HttpRequest) -> Error + Send + Sync + 'static, { - self.ehandler = Some(Arc::new(f)); + self.err_handler = Some(Arc::new(f)); self } @@ -283,15 +286,26 @@ impl JsonConfig { self.content_type = Some(Arc::new(predicate)); self } + + /// Extract payload config from app data. Check both `T` and `Data`, in that order, and fall + /// back to the default payload config. + fn from_req(req: &HttpRequest) -> &Self { + req.app_data::() + .or_else(|| req.app_data::>().map(|d| d.as_ref())) + .unwrap_or_else(|| &DEFAULT_CONFIG) + } } +// Allow shared refs to default. +const DEFAULT_CONFIG: JsonConfig = JsonConfig { + limit: 32_768, // 2^15 bytes, (~32kB) + err_handler: None, + content_type: None, +}; + impl Default for JsonConfig { fn default() -> Self { - JsonConfig { - limit: 32768, - ehandler: None, - content_type: None, - } + DEFAULT_CONFIG.clone() } } @@ -422,7 +436,7 @@ mod tests { use super::*; use crate::error::InternalError; - use crate::http::header; + use crate::http::header::{self, HeaderValue, CONTENT_LENGTH, CONTENT_TYPE}; use crate::test::{load_stream, TestRequest}; use crate::HttpResponse; @@ -659,4 +673,20 @@ mod tests { let s = Json::::from_request(&req, &mut pl).await; assert!(s.is_err()) } + + #[actix_rt::test] + async fn test_with_config_in_data_wrapper() { + let (req, mut pl) = TestRequest::default() + .header(CONTENT_TYPE, HeaderValue::from_static("application/json")) + .header(CONTENT_LENGTH, HeaderValue::from_static("16")) + .set_payload(Bytes::from_static(b"{\"name\": \"test\"}")) + .app_data(web::Data::new(JsonConfig::default().limit(10))) + .to_http_parts(); + + let s = Json::::from_request(&req, &mut pl).await; + assert!(s.is_err()); + + let err_str = s.err().unwrap().to_string(); + assert!(err_str.contains("Json payload size is bigger than allowed")); + } } diff --git a/src/types/payload.rs b/src/types/payload.rs index 653abf089..bbdd89525 100644 --- a/src/types/payload.rs +++ b/src/types/payload.rs @@ -279,27 +279,24 @@ impl PayloadConfig { Ok(()) } - /// Allow payload config extraction from app data checking both `T` and `Data`, in that - /// order, and falling back to the default payload config. - fn from_req(req: &HttpRequest) -> &PayloadConfig { - req.app_data::() - .or_else(|| { - req.app_data::>() - .map(|d| d.as_ref()) - }) - .unwrap_or_else(|| &DEFAULT_PAYLOAD_CONFIG) + /// Extract payload config from app data. Check both `T` and `Data`, in that order, and fall + /// back to the default payload config. + fn from_req(req: &HttpRequest) -> &Self { + req.app_data::() + .or_else(|| req.app_data::>().map(|d| d.as_ref())) + .unwrap_or_else(|| &DEFAULT_CONFIG) } } // Allow shared refs to default. -static DEFAULT_PAYLOAD_CONFIG: PayloadConfig = PayloadConfig { +const DEFAULT_CONFIG: PayloadConfig = PayloadConfig { limit: 262_144, // 2^18 bytes (~256kB) mimetype: None, }; impl Default for PayloadConfig { fn default() -> Self { - DEFAULT_PAYLOAD_CONFIG.clone() + DEFAULT_CONFIG.clone() } } From 9a9d4b182eff659894f32ddab360e3060c747e3c Mon Sep 17 00:00:00 2001 From: Rob Ede Date: Thu, 3 Sep 2020 10:00:24 +0100 Subject: [PATCH 2/2] document all remaining unsafe usages (#1642) adds some debug assertions where appropriate --- actix-http/src/config.rs | 3 +- actix-http/src/h1/decoder.rs | 4 +- actix-http/src/h1/dispatcher.rs | 4 ++ actix-http/src/h1/encoder.rs | 69 +++++++++++++++++++++++++++------ actix-http/src/h2/dispatcher.rs | 8 ++-- actix-http/src/macros.rs | 5 ++- actix-http/src/ws/mask.rs | 15 ++++--- 7 files changed, 84 insertions(+), 24 deletions(-) diff --git a/actix-http/src/config.rs b/actix-http/src/config.rs index abf3d8ff9..b314d4c99 100644 --- a/actix-http/src/config.rs +++ b/actix-http/src/config.rs @@ -17,7 +17,7 @@ const DATE_VALUE_LENGTH: usize = 29; pub enum KeepAlive { /// Keep alive in seconds Timeout(usize), - /// Relay on OS to shutdown tcp connection + /// Rely on OS to shutdown tcp connection Os, /// Disabled Disabled, @@ -209,6 +209,7 @@ impl Date { date.update(); date } + fn update(&mut self) { self.pos = 0; write!( diff --git a/actix-http/src/h1/decoder.rs b/actix-http/src/h1/decoder.rs index 8fdd11be5..8e891dc5c 100644 --- a/actix-http/src/h1/decoder.rs +++ b/actix-http/src/h1/decoder.rs @@ -76,12 +76,14 @@ pub(crate) trait MessageType: Sized { let name = HeaderName::from_bytes(&slice[idx.name.0..idx.name.1]).unwrap(); - // SAFETY: httparse checks header value is valid UTF-8 + // SAFETY: httparse already checks header value is only visible ASCII bytes + // from_maybe_shared_unchecked contains debug assertions so they are omitted here let value = unsafe { HeaderValue::from_maybe_shared_unchecked( slice.slice(idx.value.0..idx.value.1), ) }; + match name { header::CONTENT_LENGTH => { if let Ok(s) = value.to_str() { diff --git a/actix-http/src/h1/dispatcher.rs b/actix-http/src/h1/dispatcher.rs index 00b36562e..7c4de9707 100644 --- a/actix-http/src/h1/dispatcher.rs +++ b/actix-http/src/h1/dispatcher.rs @@ -314,11 +314,15 @@ where Poll::Ready(Err(err)) => return Err(DispatchError::Io(err)), } } + if written == write_buf.len() { + // SAFETY: setting length to 0 is safe + // skips one length check vs truncate unsafe { write_buf.set_len(0) } } else { write_buf.advance(written); } + Ok(false) } diff --git a/actix-http/src/h1/encoder.rs b/actix-http/src/h1/encoder.rs index eb8c337dd..e16b4c3dc 100644 --- a/actix-http/src/h1/encoder.rs +++ b/actix-http/src/h1/encoder.rs @@ -129,89 +129,133 @@ pub(crate) trait MessageType: Sized { .chain(extra_headers.inner.iter()); // write headers - let mut pos = 0; + let mut has_date = false; - let mut remaining = dst.capacity() - dst.len(); + let mut buf = dst.bytes_mut().as_mut_ptr() as *mut u8; + let mut remaining = dst.capacity() - dst.len(); + + // tracks bytes written since last buffer resize + // since buf is a raw pointer to a bytes container storage but is written to without the + // container's knowledge, this is used to sync the containers cursor after data is written + let mut pos = 0; + for (key, value) in headers { match *key { CONNECTION => continue, TRANSFER_ENCODING | CONTENT_LENGTH if skip_len => continue, - DATE => { - has_date = true; - } + DATE => has_date = true, _ => (), } + let k = key.as_str().as_bytes(); + let k_len = k.len(); + match value { map::Value::One(ref val) => { let v = val.as_ref(); let v_len = v.len(); - let k_len = k.len(); + + // key length + value length + colon + space + \r\n let len = k_len + v_len + 4; + if len > remaining { + // not enough room in buffer for this header; reserve more space + + // SAFETY: all the bytes written up to position "pos" are initialized + // the written byte count and pointer advancement are kept in sync unsafe { dst.advance_mut(pos); } + pos = 0; dst.reserve(len * 2); remaining = dst.capacity() - dst.len(); + + // re-assign buf raw pointer since it's possible that the buffer was + // reallocated and/or resized buf = dst.bytes_mut().as_mut_ptr() as *mut u8; } - // use upper Camel-Case + + // SAFETY: on each write, it is enough to ensure that the advancement of the + // cursor matches the number of bytes written unsafe { + // use upper Camel-Case if camel_case { write_camel_case(k, from_raw_parts_mut(buf, k_len)) } else { write_data(k, buf, k_len) } + buf = buf.add(k_len); + write_data(b": ", buf, 2); buf = buf.add(2); + write_data(v, buf, v_len); buf = buf.add(v_len); + write_data(b"\r\n", buf, 2); buf = buf.add(2); - pos += len; - remaining -= len; } + + pos += len; + remaining -= len; } + map::Value::Multi(ref vec) => { for val in vec { let v = val.as_ref(); let v_len = v.len(); - let k_len = k.len(); let len = k_len + v_len + 4; + if len > remaining { + // SAFETY: all the bytes written up to position "pos" are initialized + // the written byte count and pointer advancement are kept in sync unsafe { dst.advance_mut(pos); } pos = 0; dst.reserve(len * 2); remaining = dst.capacity() - dst.len(); + + // re-assign buf raw pointer since it's possible that the buffer was + // reallocated and/or resized buf = dst.bytes_mut().as_mut_ptr() as *mut u8; } - // use upper Camel-Case + + // SAFETY: on each write, it is enough to ensure that the advancement of + // the cursor matches the number of bytes written unsafe { if camel_case { write_camel_case(k, from_raw_parts_mut(buf, k_len)); } else { write_data(k, buf, k_len); } + buf = buf.add(k_len); + write_data(b": ", buf, 2); buf = buf.add(2); + write_data(v, buf, v_len); buf = buf.add(v_len); + write_data(b"\r\n", buf, 2); buf = buf.add(2); }; + pos += len; remaining -= len; } } } } + + // final cursor synchronization with the bytes container + // + // SAFETY: all the bytes written up to position "pos" are initialized + // the written byte count and pointer advancement are kept in sync unsafe { dst.advance_mut(pos); } @@ -477,7 +521,10 @@ impl<'a> io::Write for Writer<'a> { } } +/// # Safety +/// Callers must ensure that the given length matches given value length. unsafe fn write_data(value: &[u8], buf: *mut u8, len: usize) { + debug_assert_eq!(value.len(), len); copy_nonoverlapping(value.as_ptr(), buf, len); } diff --git a/actix-http/src/h2/dispatcher.rs b/actix-http/src/h2/dispatcher.rs index 534ce928a..daa651f4d 100644 --- a/actix-http/src/h2/dispatcher.rs +++ b/actix-http/src/h2/dispatcher.rs @@ -227,9 +227,11 @@ where if !has_date { let mut bytes = BytesMut::with_capacity(29); self.config.set_date_header(&mut bytes); - res.headers_mut().insert(DATE, unsafe { - HeaderValue::from_maybe_shared_unchecked(bytes.freeze()) - }); + res.headers_mut().insert( + DATE, + // SAFETY: serialized date-times are known ASCII strings + unsafe { HeaderValue::from_maybe_shared_unchecked(bytes.freeze()) }, + ); } res diff --git a/actix-http/src/macros.rs b/actix-http/src/macros.rs index b970b14f2..e08d62ba6 100644 --- a/actix-http/src/macros.rs +++ b/actix-http/src/macros.rs @@ -38,7 +38,7 @@ macro_rules! downcast { /// Downcasts generic body to a specific type. pub fn downcast_ref(&self) -> Option<&T> { if self.__private_get_type_id__().0 == std::any::TypeId::of::() { - // Safety: external crates cannot override the default + // SAFETY: external crates cannot override the default // implementation of `__private_get_type_id__`, since // it requires returning a private type. We can therefore // rely on the returned `TypeId`, which ensures that this @@ -48,10 +48,11 @@ macro_rules! downcast { None } } + /// Downcasts a generic body to a mutable specific type. pub fn downcast_mut(&mut self) -> Option<&mut T> { if self.__private_get_type_id__().0 == std::any::TypeId::of::() { - // Safety: external crates cannot override the default + // SAFETY: external crates cannot override the default // implementation of `__private_get_type_id__`, since // it requires returning a private type. We can therefore // rely on the returned `TypeId`, which ensures that this diff --git a/actix-http/src/ws/mask.rs b/actix-http/src/ws/mask.rs index 367fb0212..726b1a4a1 100644 --- a/actix-http/src/ws/mask.rs +++ b/actix-http/src/ws/mask.rs @@ -7,6 +7,8 @@ use std::slice; struct ShortSlice<'a>(&'a mut [u8]); impl<'a> ShortSlice<'a> { + /// # Safety + /// Given slice must be shorter than 8 bytes. unsafe fn new(slice: &'a mut [u8]) -> Self { // Sanity check for debug builds debug_assert!(slice.len() < 8); @@ -46,13 +48,13 @@ pub(crate) fn apply_mask(buf: &mut [u8], mask_u32: u32) { } } -#[inline] // TODO: copy_nonoverlapping here compiles to call memcpy. While it is not so // inefficient, it could be done better. The compiler does not understand that // a `ShortSlice` must be smaller than a u64. +#[inline] #[allow(clippy::needless_pass_by_value)] fn xor_short(buf: ShortSlice<'_>, mask: u64) { - // Unsafe: we know that a `ShortSlice` fits in a u64 + // SAFETY: we know that a `ShortSlice` fits in a u64 unsafe { let (ptr, len) = (buf.0.as_mut_ptr(), buf.0.len()); let mut b: u64 = 0; @@ -64,8 +66,9 @@ fn xor_short(buf: ShortSlice<'_>, mask: u64) { } } +/// # Safety +/// Caller must ensure the buffer has the correct size and alignment. #[inline] -// Unsafe: caller must ensure the buffer has the correct size and alignment unsafe fn cast_slice(buf: &mut [u8]) -> &mut [u64] { // Assert correct size and alignment in debug builds debug_assert!(buf.len().trailing_zeros() >= 3); @@ -74,9 +77,9 @@ unsafe fn cast_slice(buf: &mut [u8]) -> &mut [u64] { slice::from_raw_parts_mut(buf.as_mut_ptr() as *mut u64, buf.len() >> 3) } -#[inline] // Splits a slice into three parts: an unaligned short head and tail, plus an aligned // u64 mid section. +#[inline] fn align_buf(buf: &mut [u8]) -> (ShortSlice<'_>, &mut [u64], ShortSlice<'_>) { let start_ptr = buf.as_ptr() as usize; let end_ptr = start_ptr + buf.len(); @@ -91,13 +94,13 @@ fn align_buf(buf: &mut [u8]) -> (ShortSlice<'_>, &mut [u64], ShortSlice<'_>) { let (tmp, tail) = buf.split_at_mut(end_aligned - start_ptr); let (head, mid) = tmp.split_at_mut(start_aligned - start_ptr); - // Unsafe: we know the middle section is correctly aligned, and the outer + // SAFETY: we know the middle section is correctly aligned, and the outer // sections are smaller than 8 bytes unsafe { (ShortSlice::new(head), cast_slice(mid), ShortSlice(tail)) } } else { // We didn't cross even one aligned boundary! - // Unsafe: The outer sections are smaller than 8 bytes + // SAFETY: The outer sections are smaller than 8 bytes unsafe { (ShortSlice::new(buf), &mut [], ShortSlice::new(&mut [])) } } }