This commit is contained in:
Yury Yarashevich 2023-10-23 10:40:22 +09:00 committed by GitHub
commit c3fe22e255
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 259 additions and 63 deletions

View File

@ -2,6 +2,8 @@
## Unreleased ## Unreleased
- Revise compression middleware to perform compression cooperatively, periodically yielding control to other tasks instead of offloading compression to a background thread.
## 3.4.0 ## 3.4.0
### Added ### Added

View File

@ -140,3 +140,8 @@ required-features = ["http2", "rustls-0_21"]
name = "response-body-compression" name = "response-body-compression"
harness = false harness = false
required-features = ["compress-brotli", "compress-gzip", "compress-zstd"] required-features = ["compress-brotli", "compress-gzip", "compress-zstd"]
[[bench]]
name = "compression-chunk-size"
harness = false
required-features = ["compress-brotli", "compress-gzip", "compress-zstd"]

View File

@ -0,0 +1,51 @@
#![allow(clippy::uninlined_format_args)]
use actix_http::{body, encoding::Encoder, ContentEncoding, ResponseHead, StatusCode};
use criterion::{criterion_group, criterion_main, Criterion};
const BODY: &[u8] = include_bytes!("../../Cargo.lock");
const CHUNK_SIZES: [usize; 7] = [512, 1024, 2048, 4096, 8192, 16384, 32768];
const CONTENT_ENCODING: [ContentEncoding; 4] = [
ContentEncoding::Deflate,
ContentEncoding::Gzip,
ContentEncoding::Zstd,
ContentEncoding::Brotli,
];
fn compression_responses(c: &mut Criterion) {
static_assertions::const_assert!(BODY.len() > CHUNK_SIZES[6]);
let mut group = c.benchmark_group("time to compress chunk");
for content_encoding in CONTENT_ENCODING {
for chunk_size in CHUNK_SIZES {
group.bench_function(
format!("{}-{}", content_encoding.as_str(), chunk_size),
|b| {
let rt = actix_rt::Runtime::new().unwrap();
b.iter(|| {
rt.block_on(async move {
let encoder = Encoder::response(
content_encoding,
&mut ResponseHead::new(StatusCode::OK),
&BODY[..chunk_size],
)
.with_encode_chunk_size(chunk_size);
body::to_bytes_limited(encoder, chunk_size + 256)
.await
.unwrap()
.unwrap();
});
});
},
);
}
}
group.finish();
}
criterion_group!(benches, compression_responses);
criterion_main!(benches);

View File

@ -2,14 +2,12 @@
use std::{ use std::{
error::Error as StdError, error::Error as StdError,
future::Future,
io::{self, Write as _}, io::{self, Write as _},
pin::Pin, pin::Pin,
task::{Context, Poll}, task::{Context, Poll},
}; };
use actix_rt::task::{spawn_blocking, JoinHandle}; use bytes::{Buf, Bytes};
use bytes::Bytes;
use derive_more::Display; use derive_more::Display;
#[cfg(feature = "compress-gzip")] #[cfg(feature = "compress-gzip")]
use flate2::write::{GzEncoder, ZlibEncoder}; use flate2::write::{GzEncoder, ZlibEncoder};
@ -26,14 +24,11 @@ use crate::{
ResponseHead, StatusCode, ResponseHead, StatusCode,
}; };
const MAX_CHUNK_SIZE_ENCODE_IN_PLACE: usize = 1024;
pin_project! { pin_project! {
pub struct Encoder<B> { pub struct Encoder<B> {
#[pin] #[pin]
body: EncoderBody<B>, body: EncoderBody<B>,
encoder: Option<ContentEncoder>, encoder: Option<Box<CooperativeContentEncoder>>,
fut: Option<JoinHandle<Result<ContentEncoder, io::Error>>>,
eof: bool, eof: bool,
} }
} }
@ -45,7 +40,6 @@ impl<B: MessageBody> Encoder<B> {
body: body::None::new(), body: body::None::new(),
}, },
encoder: None, encoder: None,
fut: None,
eof: true, eof: true,
} }
} }
@ -68,13 +62,12 @@ impl<B: MessageBody> Encoder<B> {
if should_encode { if should_encode {
// wrap body only if encoder is feature-enabled // wrap body only if encoder is feature-enabled
if let Some(enc) = ContentEncoder::select(encoding) { if let Some(coop_encoder) = CooperativeContentEncoder::select(encoding) {
update_head(encoding, head); update_head(encoding, head);
return Encoder { return Encoder {
body, body,
encoder: Some(enc), encoder: Some(coop_encoder),
fut: None,
eof: false, eof: false,
}; };
} }
@ -83,10 +76,18 @@ impl<B: MessageBody> Encoder<B> {
Encoder { Encoder {
body, body,
encoder: None, encoder: None,
fut: None,
eof: false, eof: false,
} }
} }
pub fn with_encode_chunk_size(mut self, size: usize) -> Self {
if size > 0 {
if let Some(coop_encoder) = self.encoder.as_mut() {
coop_encoder.preferred_chunk_size = size;
}
}
self
}
} }
pin_project! { pin_project! {
@ -169,22 +170,13 @@ where
return Poll::Ready(None); return Poll::Ready(None);
} }
if let Some(ref mut fut) = this.fut { if let Some(cooperative_encoder) = this.encoder.as_deref_mut() {
let mut encoder = ready!(Pin::new(fut).poll(cx)) match ready!(Pin::new(cooperative_encoder).poll_encoded_chunk(cx)) {
.map_err(|_| { Ok(Some(encoded_chunk)) => return Poll::Ready(Some(Ok(encoded_chunk))),
EncoderError::Io(io::Error::new( Ok(None) => {
io::ErrorKind::Other, // Need next chunk from uncompressed body
"Blocking task was cancelled unexpectedly", }
)) Err(err) => return Poll::Ready(Some(Err(err))),
})?
.map_err(EncoderError::Io)?;
let chunk = encoder.take();
*this.encoder = Some(encoder);
this.fut.take();
if !chunk.is_empty() {
return Poll::Ready(Some(Ok(chunk)));
} }
} }
@ -193,30 +185,19 @@ where
match result { match result {
Some(Err(err)) => return Poll::Ready(Some(Err(err))), Some(Err(err)) => return Poll::Ready(Some(Err(err))),
Some(Ok(chunk)) => { Some(Ok(chunk)) => match this.encoder.as_deref_mut() {
if let Some(mut encoder) = this.encoder.take() { None => return Poll::Ready(Some(Ok(chunk))),
if chunk.len() < MAX_CHUNK_SIZE_ENCODE_IN_PLACE { Some(encoder) => {
encoder.write(&chunk).map_err(EncoderError::Io)?; encoder.push_chunk(chunk);
let chunk = encoder.take();
*this.encoder = Some(encoder);
if !chunk.is_empty() {
return Poll::Ready(Some(Ok(chunk)));
}
} else {
*this.fut = Some(spawn_blocking(move || {
encoder.write(&chunk)?;
Ok(encoder)
}));
}
} else {
return Poll::Ready(Some(Ok(chunk)));
} }
} },
None => { None => {
if let Some(encoder) = this.encoder.take() { if let Some(coop_encoder) = this.encoder.take() {
let chunk = encoder.finish().map_err(EncoderError::Io)?; let chunk = coop_encoder
.content_encoder
.finish()
.map_err(EncoderError::Io)?;
if chunk.is_empty() { if chunk.is_empty() {
return Poll::Ready(None); return Poll::Ready(None);
@ -276,37 +257,123 @@ enum ContentEncoder {
Zstd(ZstdEncoder<'static, Writer>), Zstd(ZstdEncoder<'static, Writer>),
} }
impl ContentEncoder { struct CooperativeContentEncoder {
fn select(encoding: ContentEncoding) -> Option<Self> { content_encoder: ContentEncoder,
preferred_chunk_size: usize,
chunk_ready_to_encode: Option<Bytes>,
budget_used: u8,
}
impl CooperativeContentEncoder {
fn select(encoding: ContentEncoding) -> Option<Box<CooperativeContentEncoder>> {
// Chunk size picked as max chunk size which took less that 50 µs to compress on "cargo bench --bench compression-chunk-size"
// Rust 1.72 linux/arm64 in Docker on Apple M2 Pro: "time to compress chunk/deflate-16384" time: [39.114 µs 39.283 µs 39.457 µs]
const MAX_DEFLATE_CHUNK_SIZE: usize = 16384;
// Rust 1.72 linux/arm64 in Docker on Apple M2 Pro: "time to compress chunk/gzip-16384" time: [40.121 µs 40.340 µs 40.566 µs]
const MAX_GZIP_CHUNK_SIZE: usize = 16384;
// Rust 1.72 linux/arm64 in Docker on Apple M2 Pro: "time to compress chunk/br-8192" time: [46.076 µs 46.208 µs 46.343 µs]
const MAX_BROTLI_CHUNK_SIZE: usize = 8192;
// Rust 1.72 linux/arm64 in Docker on Apple M2 Pro: "time to compress chunk/zstd-16384" time: [32.872 µs 32.967 µs 33.068 µs]
const MAX_ZSTD_CHUNK_SIZE: usize = 16384;
match encoding { match encoding {
#[cfg(feature = "compress-gzip")] #[cfg(feature = "compress-gzip")]
ContentEncoding::Deflate => Some(ContentEncoder::Deflate(ZlibEncoder::new( ContentEncoding::Deflate => Some(Box::new(CooperativeContentEncoder {
Writer::new(), content_encoder: ContentEncoder::Deflate(ZlibEncoder::new(
flate2::Compression::fast(), Writer::new(),
))), flate2::Compression::fast(),
)),
preferred_chunk_size: MAX_DEFLATE_CHUNK_SIZE,
chunk_ready_to_encode: None,
budget_used: 0,
})),
#[cfg(feature = "compress-gzip")] #[cfg(feature = "compress-gzip")]
ContentEncoding::Gzip => Some(ContentEncoder::Gzip(GzEncoder::new( ContentEncoding::Gzip => Some(Box::new(CooperativeContentEncoder {
Writer::new(), content_encoder: ContentEncoder::Gzip(GzEncoder::new(
flate2::Compression::fast(), Writer::new(),
))), flate2::Compression::fast(),
)),
preferred_chunk_size: MAX_GZIP_CHUNK_SIZE,
chunk_ready_to_encode: None,
budget_used: 0,
})),
#[cfg(feature = "compress-brotli")] #[cfg(feature = "compress-brotli")]
ContentEncoding::Brotli => Some(ContentEncoder::Brotli(new_brotli_compressor())), ContentEncoding::Brotli => Some(Box::new(CooperativeContentEncoder {
content_encoder: ContentEncoder::Brotli(new_brotli_compressor()),
preferred_chunk_size: MAX_BROTLI_CHUNK_SIZE,
chunk_ready_to_encode: None,
budget_used: 0,
})),
#[cfg(feature = "compress-zstd")] #[cfg(feature = "compress-zstd")]
ContentEncoding::Zstd => { ContentEncoding::Zstd => {
let encoder = ZstdEncoder::new(Writer::new(), 3).ok()?; let encoder = ZstdEncoder::new(Writer::new(), 3).ok()?;
Some(ContentEncoder::Zstd(encoder)) Some(Box::new(CooperativeContentEncoder {
content_encoder: ContentEncoder::Zstd(encoder),
preferred_chunk_size: MAX_ZSTD_CHUNK_SIZE,
chunk_ready_to_encode: None,
budget_used: 0,
}))
} }
_ => None, _ => None,
} }
} }
fn push_chunk(&mut self, chunk: Bytes) {
debug_assert!(self.chunk_ready_to_encode.is_none());
self.chunk_ready_to_encode = Some(chunk);
self.budget_used = 0
}
fn poll_encoded_chunk(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<Result<Option<Bytes>, EncoderError>> {
// The maximum computing budget can be utilized before yielding voluntarily
// See inspiration:
// https://tokio.rs/blog/2020-04-preemption
// https://ryhl.io/blog/async-what-is-blocking/
const BUDGET_LIMIT: u8 = 8;
let this = self.get_mut();
loop {
if this.budget_used > BUDGET_LIMIT {
this.budget_used = 0;
cx.waker().wake_by_ref();
return Poll::Pending;
}
if let Some(mut chunk) = this.chunk_ready_to_encode.take() {
let encode_len = chunk.len().min(this.preferred_chunk_size);
this.content_encoder
.write(&chunk[..encode_len])
.map_err(EncoderError::Io)?;
chunk.advance(encode_len);
if !chunk.is_empty() {
this.chunk_ready_to_encode = Some(chunk);
}
let encoded_chunk = this.content_encoder.take();
if encoded_chunk.is_empty() {
continue;
}
this.budget_used += 1;
return Poll::Ready(Ok(Some(encoded_chunk)));
} else {
return Poll::Ready(Ok(None));
}
}
}
}
impl ContentEncoder {
#[inline] #[inline]
pub(crate) fn take(&mut self) -> Bytes { pub(crate) fn take(&mut self) -> Bytes {
match *self { match self {
#[cfg(feature = "compress-brotli")] #[cfg(feature = "compress-brotli")]
ContentEncoder::Brotli(ref mut encoder) => encoder.get_mut().take(), ContentEncoder::Brotli(ref mut encoder) => encoder.get_mut().take(),
@ -350,7 +417,7 @@ impl ContentEncoder {
} }
fn write(&mut self, data: &[u8]) -> Result<(), io::Error> { fn write(&mut self, data: &[u8]) -> Result<(), io::Error> {
match *self { match self {
#[cfg(feature = "compress-brotli")] #[cfg(feature = "compress-brotli")]
ContentEncoder::Brotli(ref mut encoder) => match encoder.write_all(data) { ContentEncoder::Brotli(ref mut encoder) => match encoder.write_all(data) {
Ok(_) => Ok(()), Ok(_) => Ok(()),
@ -426,3 +493,74 @@ impl From<EncoderError> for crate::Error {
crate::Error::new_encoder().with_cause(err) crate::Error::new_encoder().with_cause(err)
} }
} }
#[cfg(test)]
mod tests {
use bytes::BytesMut;
use rand::{seq::SliceRandom, Rng};
use super::*;
static EMPTY_BODY: &[u8] = &[];
static SHORT_BODY: &[u8] = &[1, 2, 3, 4, 6, 7, 8];
static LONG_BODY: &[u8] = include_bytes!("encoder.rs");
static BODIES: &[&[u8]] = &[EMPTY_BODY, SHORT_BODY, LONG_BODY];
async fn test_compression_of_content_encoding(encoding: ContentEncoding, body: &[u8]) {
let mut head = ResponseHead::new(StatusCode::OK);
let body_to_compress = {
let mut body = BytesMut::from(body);
body.shuffle(&mut rand::thread_rng());
body.freeze()
};
let compressed_body = Encoder::response(encoding, &mut head, body_to_compress.clone())
.with_encode_chunk_size(rand::thread_rng().gen_range(32..128));
let mut encoder = CooperativeContentEncoder::select(encoding).unwrap();
encoder.content_encoder.write(&body_to_compress).unwrap();
let reference_compressed_bytes = encoder.content_encoder.finish().unwrap();
let compressed_bytes =
body::to_bytes_limited(compressed_body, 256 + body_to_compress.len())
.await
.unwrap()
.unwrap();
assert_eq!(reference_compressed_bytes, compressed_bytes);
}
#[actix_rt::test]
#[cfg(feature = "compress-gzip")]
async fn test_gzip_compression_in_chunks_is_the_same_as_whole_chunk_compression() {
for body in BODIES {
test_compression_of_content_encoding(ContentEncoding::Gzip, body).await;
}
}
#[actix_rt::test]
#[cfg(feature = "compress-gzip")]
async fn test_deflate_compression_in_chunks_is_the_same_as_whole_chunk_compression() {
for body in BODIES {
test_compression_of_content_encoding(ContentEncoding::Deflate, body).await;
}
}
#[actix_rt::test]
#[cfg(feature = "compress-brotli")]
async fn test_brotli_compression_in_chunks_is_the_same_as_whole_chunk_compression() {
for body in BODIES {
test_compression_of_content_encoding(ContentEncoding::Brotli, body).await;
}
}
#[actix_rt::test]
#[cfg(feature = "compress-zstd")]
async fn test_zstd_compression_in_chunks_is_the_same_as_whole_chunk_compression() {
for body in BODIES {
test_compression_of_content_encoding(ContentEncoding::Zstd, body).await;
}
}
}