Merge 7ecbefde38 into 3193b81a3e

2023-10-23 10:40:22 +09:00 · 2023-10-23 10:40:22 +09:00 · c3fe22e255
parent 3193b81a3e 7ecbefde38
commit c3fe22e255
4 changed files with 259 additions and 63 deletions
--- a/actix-http/CHANGES.md
+++ b/actix-http/CHANGES.md
@ -2,6 +2,8 @@
 ## Unreleased
 - Revise compression middleware to perform compression cooperatively, periodically yielding control to other tasks instead of offloading compression to a background thread.
 ## 3.4.0
 ### Added
--- a/actix-http/Cargo.toml
+++ b/actix-http/Cargo.toml
@ -140,3 +140,8 @@ required-features = ["http2", "rustls-0_21"]
 name = "response-body-compression"
 harness = false
 required-features = ["compress-brotli", "compress-gzip", "compress-zstd"]
 [[bench]]
 name = "compression-chunk-size"
 harness = false
 required-features = ["compress-brotli", "compress-gzip", "compress-zstd"]
--- a/actix-http/benches/compression-chunk-size.rs
+++ b/actix-http/benches/compression-chunk-size.rs
@ -0,0 +1,51 @@
 #![allow(clippy::uninlined_format_args)]
 use actix_http::{body, encoding::Encoder, ContentEncoding, ResponseHead, StatusCode};
 use criterion::{criterion_group, criterion_main, Criterion};
 const BODY: &[u8] = include_bytes!("../../Cargo.lock");
 const CHUNK_SIZES: [usize; 7] = [512, 1024, 2048, 4096, 8192, 16384, 32768];
 const CONTENT_ENCODING: [ContentEncoding; 4] = [
    ContentEncoding::Deflate,
    ContentEncoding::Gzip,
    ContentEncoding::Zstd,
    ContentEncoding::Brotli,
 ];
 fn compression_responses(c: &mut Criterion) {
    static_assertions::const_assert!(BODY.len() > CHUNK_SIZES[6]);
    let mut group = c.benchmark_group("time to compress chunk");
    for content_encoding in CONTENT_ENCODING {
        for chunk_size in CHUNK_SIZES {
            group.bench_function(
                format!("{}-{}", content_encoding.as_str(), chunk_size),
                |b| {
                    let rt = actix_rt::Runtime::new().unwrap();
                    b.iter(|| {
                        rt.block_on(async move {
                            let encoder = Encoder::response(
                                content_encoding,
                                &mut ResponseHead::new(StatusCode::OK),
                                &BODY[..chunk_size],
                            )
                            .with_encode_chunk_size(chunk_size);
                            body::to_bytes_limited(encoder, chunk_size + 256)
                                .await
                                .unwrap()
                                .unwrap();
                        });
                    });
                },
            );
        }
    }
    group.finish();
 }
 criterion_group!(benches, compression_responses);
 criterion_main!(benches);
--- a/actix-http/src/encoding/encoder.rs
+++ b/actix-http/src/encoding/encoder.rs
@ -2,14 +2,12 @@
 use std::{
    error::Error as StdError,
    future::Future,
    io::{self, Write as _},
    pin::Pin,
    task::{Context, Poll},
 };
-use actix_rt::task::{spawn_blocking, JoinHandle};
+use bytes::{Buf, Bytes};
 use bytes::Bytes;
 use derive_more::Display;
 #[cfg(feature = "compress-gzip")]
 use flate2::write::{GzEncoder, ZlibEncoder};
@ -26,14 +24,11 @@ use crate::{
    ResponseHead, StatusCode,
 };
 const MAX_CHUNK_SIZE_ENCODE_IN_PLACE: usize = 1024;
 pin_project! {
    pub struct Encoder<B> {
        #[pin]
        body: EncoderBody<B>,
-        encoder: Option<ContentEncoder>,
+        encoder: Option<Box<CooperativeContentEncoder>>,
        fut: Option<JoinHandle<Result<ContentEncoder, io::Error>>>,
        eof: bool,
    }
 }
@ -45,7 +40,6 @@ impl<B: MessageBody> Encoder<B> {
                body: body::None::new(),
            },
            encoder: None,
            fut: None,
            eof: true,
        }
    }
@ -68,13 +62,12 @@ impl<B: MessageBody> Encoder<B> {
        if should_encode {
            // wrap body only if encoder is feature-enabled
-            if let Some(enc) = ContentEncoder::select(encoding) {
+            if let Some(coop_encoder) = CooperativeContentEncoder::select(encoding) {
                update_head(encoding, head);
                return Encoder {
                    body,
-                    encoder: Some(enc),
+                    encoder: Some(coop_encoder),
                    fut: None,
                    eof: false,
                };
            }
@ -83,10 +76,18 @@ impl<B: MessageBody> Encoder<B> {
        Encoder {
            body,
            encoder: None,
            fut: None,
            eof: false,
        }
    }
    pub fn with_encode_chunk_size(mut self, size: usize) -> Self {
        if size > 0 {
            if let Some(coop_encoder) = self.encoder.as_mut() {
                coop_encoder.preferred_chunk_size = size;
            }
        }
        self
    }
 }
 pin_project! {
@ -169,22 +170,13 @@ where
                return Poll::Ready(None);
            }
-            if let Some(ref mut fut) = this.fut {
+            if let Some(cooperative_encoder) = this.encoder.as_deref_mut() {
-                let mut encoder = ready!(Pin::new(fut).poll(cx))
+                match ready!(Pin::new(cooperative_encoder).poll_encoded_chunk(cx)) {
-                    .map_err(|_| {
+                    Ok(Some(encoded_chunk)) => return Poll::Ready(Some(Ok(encoded_chunk))),
-                        EncoderError::Io(io::Error::new(
+                    Ok(None) => {
-                            io::ErrorKind::Other,
+                        // Need next chunk from uncompressed body
-                            "Blocking task was cancelled unexpectedly",
+                    }
-                        ))
+                    Err(err) => return Poll::Ready(Some(Err(err))),
                    })?
                    .map_err(EncoderError::Io)?;
                let chunk = encoder.take();
                *this.encoder = Some(encoder);
                this.fut.take();
                if !chunk.is_empty() {
                    return Poll::Ready(Some(Ok(chunk)));
                }
            }
@ -193,30 +185,19 @@ where
            match result {
                Some(Err(err)) => return Poll::Ready(Some(Err(err))),
-                Some(Ok(chunk)) => {
+                Some(Ok(chunk)) => match this.encoder.as_deref_mut() {
-                    if let Some(mut encoder) = this.encoder.take() {
+                    None => return Poll::Ready(Some(Ok(chunk))),
-                        if chunk.len() < MAX_CHUNK_SIZE_ENCODE_IN_PLACE {
+                    Some(encoder) => {
-                            encoder.write(&chunk).map_err(EncoderError::Io)?;
+                        encoder.push_chunk(chunk);
                            let chunk = encoder.take();
                            *this.encoder = Some(encoder);
                            if !chunk.is_empty() {
                                return Poll::Ready(Some(Ok(chunk)));
                            }
                        } else {
                            *this.fut = Some(spawn_blocking(move || {
                                encoder.write(&chunk)?;
                                Ok(encoder)
                            }));
                        }
                    } else {
                        return Poll::Ready(Some(Ok(chunk)));
                    }
-                }
+                },
                None => {
-                    if let Some(encoder) = this.encoder.take() {
+                    if let Some(coop_encoder) = this.encoder.take() {
-                        let chunk = encoder.finish().map_err(EncoderError::Io)?;
+                        let chunk = coop_encoder
                            .content_encoder
                            .finish()
                            .map_err(EncoderError::Io)?;
                        if chunk.is_empty() {
                            return Poll::Ready(None);
@ -276,37 +257,123 @@ enum ContentEncoder {
    Zstd(ZstdEncoder<'static, Writer>),
 }
-impl ContentEncoder {
+struct CooperativeContentEncoder {
-    fn select(encoding: ContentEncoding) -> Option<Self> {
+    content_encoder: ContentEncoder,
    preferred_chunk_size: usize,
    chunk_ready_to_encode: Option<Bytes>,
    budget_used: u8,
 }
 impl CooperativeContentEncoder {
    fn select(encoding: ContentEncoding) -> Option<Box<CooperativeContentEncoder>> {
        // Chunk size picked as max chunk size which took less that 50 µs to compress on "cargo bench --bench compression-chunk-size"
        // Rust 1.72 linux/arm64 in Docker on Apple M2 Pro: "time to compress chunk/deflate-16384"  time: [39.114 µs 39.283 µs 39.457 µs]
        const MAX_DEFLATE_CHUNK_SIZE: usize = 16384;
        // Rust 1.72 linux/arm64 in Docker on Apple M2 Pro: "time to compress chunk/gzip-16384"     time: [40.121 µs 40.340 µs 40.566 µs]
        const MAX_GZIP_CHUNK_SIZE: usize = 16384;
        // Rust 1.72 linux/arm64 in Docker on Apple M2 Pro: "time to compress chunk/br-8192"        time: [46.076 µs 46.208 µs 46.343 µs]
        const MAX_BROTLI_CHUNK_SIZE: usize = 8192;
        // Rust 1.72 linux/arm64 in Docker on Apple M2 Pro: "time to compress chunk/zstd-16384"     time: [32.872 µs 32.967 µs 33.068 µs]
        const MAX_ZSTD_CHUNK_SIZE: usize = 16384;
        match encoding {
            #[cfg(feature = "compress-gzip")]
-            ContentEncoding::Deflate => Some(ContentEncoder::Deflate(ZlibEncoder::new(
+            ContentEncoding::Deflate => Some(Box::new(CooperativeContentEncoder {
-                Writer::new(),
+                content_encoder: ContentEncoder::Deflate(ZlibEncoder::new(
-                flate2::Compression::fast(),
+                    Writer::new(),
-            ))),
+                    flate2::Compression::fast(),
                )),
                preferred_chunk_size: MAX_DEFLATE_CHUNK_SIZE,
                chunk_ready_to_encode: None,
                budget_used: 0,
            })),
            #[cfg(feature = "compress-gzip")]
-            ContentEncoding::Gzip => Some(ContentEncoder::Gzip(GzEncoder::new(
+            ContentEncoding::Gzip => Some(Box::new(CooperativeContentEncoder {
-                Writer::new(),
+                content_encoder: ContentEncoder::Gzip(GzEncoder::new(
-                flate2::Compression::fast(),
+                    Writer::new(),
-            ))),
+                    flate2::Compression::fast(),
                )),
                preferred_chunk_size: MAX_GZIP_CHUNK_SIZE,
                chunk_ready_to_encode: None,
                budget_used: 0,
            })),
            #[cfg(feature = "compress-brotli")]
-            ContentEncoding::Brotli => Some(ContentEncoder::Brotli(new_brotli_compressor())),
+            ContentEncoding::Brotli => Some(Box::new(CooperativeContentEncoder {
                content_encoder: ContentEncoder::Brotli(new_brotli_compressor()),
                preferred_chunk_size: MAX_BROTLI_CHUNK_SIZE,
                chunk_ready_to_encode: None,
                budget_used: 0,
            })),
            #[cfg(feature = "compress-zstd")]
            ContentEncoding::Zstd => {
                let encoder = ZstdEncoder::new(Writer::new(), 3).ok()?;
-                Some(ContentEncoder::Zstd(encoder))
+                Some(Box::new(CooperativeContentEncoder {
                    content_encoder: ContentEncoder::Zstd(encoder),
                    preferred_chunk_size: MAX_ZSTD_CHUNK_SIZE,
                    chunk_ready_to_encode: None,
                    budget_used: 0,
                }))
            }
            _ => None,
        }
    }
    fn push_chunk(&mut self, chunk: Bytes) {
        debug_assert!(self.chunk_ready_to_encode.is_none());
        self.chunk_ready_to_encode = Some(chunk);
        self.budget_used = 0
    }
    fn poll_encoded_chunk(
        self: Pin<&mut Self>,
        cx: &mut Context<'_>,
    ) -> Poll<Result<Option<Bytes>, EncoderError>> {
        // The maximum computing budget can be utilized before yielding voluntarily
        // See inspiration: 
        // https://tokio.rs/blog/2020-04-preemption
        // https://ryhl.io/blog/async-what-is-blocking/
        const BUDGET_LIMIT: u8 = 8;
        let this = self.get_mut();
        loop {
            if this.budget_used > BUDGET_LIMIT {
                this.budget_used = 0;
                cx.waker().wake_by_ref();
                return Poll::Pending;
            }
            if let Some(mut chunk) = this.chunk_ready_to_encode.take() {
                let encode_len = chunk.len().min(this.preferred_chunk_size);
                this.content_encoder
                    .write(&chunk[..encode_len])
                    .map_err(EncoderError::Io)?;
                chunk.advance(encode_len);
                if !chunk.is_empty() {
                    this.chunk_ready_to_encode = Some(chunk);
                }
                let encoded_chunk = this.content_encoder.take();
                if encoded_chunk.is_empty() {
                    continue;
                }
                this.budget_used += 1;
                return Poll::Ready(Ok(Some(encoded_chunk)));
            } else {
                return Poll::Ready(Ok(None));
            }
        }
    }
 }
 impl ContentEncoder {
    #[inline]
    pub(crate) fn take(&mut self) -> Bytes {
-        match *self {
+        match self {
            #[cfg(feature = "compress-brotli")]
            ContentEncoder::Brotli(ref mut encoder) => encoder.get_mut().take(),
@ -350,7 +417,7 @@ impl ContentEncoder {
    }
    fn write(&mut self, data: &[u8]) -> Result<(), io::Error> {
-        match *self {
+        match self {
            #[cfg(feature = "compress-brotli")]
            ContentEncoder::Brotli(ref mut encoder) => match encoder.write_all(data) {
                Ok(_) => Ok(()),
@ -426,3 +493,74 @@ impl From<EncoderError> for crate::Error {
        crate::Error::new_encoder().with_cause(err)
    }
 }
 #[cfg(test)]
 mod tests {
    use bytes::BytesMut;
    use rand::{seq::SliceRandom, Rng};
    use super::*;
    static EMPTY_BODY: &[u8] = &[];
    static SHORT_BODY: &[u8] = &[1, 2, 3, 4, 6, 7, 8];
    static LONG_BODY: &[u8] = include_bytes!("encoder.rs");
    static BODIES: &[&[u8]] = &[EMPTY_BODY, SHORT_BODY, LONG_BODY];
    async fn test_compression_of_content_encoding(encoding: ContentEncoding, body: &[u8]) {
        let mut head = ResponseHead::new(StatusCode::OK);
        let body_to_compress = {
            let mut body = BytesMut::from(body);
            body.shuffle(&mut rand::thread_rng());
            body.freeze()
        };
        let compressed_body = Encoder::response(encoding, &mut head, body_to_compress.clone())
            .with_encode_chunk_size(rand::thread_rng().gen_range(32..128));
        let mut encoder = CooperativeContentEncoder::select(encoding).unwrap();
        encoder.content_encoder.write(&body_to_compress).unwrap();
        let reference_compressed_bytes = encoder.content_encoder.finish().unwrap();
        let compressed_bytes =
            body::to_bytes_limited(compressed_body, 256 + body_to_compress.len())
                .await
                .unwrap()
                .unwrap();
        assert_eq!(reference_compressed_bytes, compressed_bytes);
    }
    #[actix_rt::test]
    #[cfg(feature = "compress-gzip")]
    async fn test_gzip_compression_in_chunks_is_the_same_as_whole_chunk_compression() {
        for body in BODIES {
            test_compression_of_content_encoding(ContentEncoding::Gzip, body).await;
        }
    }
    #[actix_rt::test]
    #[cfg(feature = "compress-gzip")]
    async fn test_deflate_compression_in_chunks_is_the_same_as_whole_chunk_compression() {
        for body in BODIES {
            test_compression_of_content_encoding(ContentEncoding::Deflate, body).await;
        }
    }
    #[actix_rt::test]
    #[cfg(feature = "compress-brotli")]
    async fn test_brotli_compression_in_chunks_is_the_same_as_whole_chunk_compression() {
        for body in BODIES {
            test_compression_of_content_encoding(ContentEncoding::Brotli, body).await;
        }
    }
    #[actix_rt::test]
    #[cfg(feature = "compress-zstd")]
    async fn test_zstd_compression_in_chunks_is_the_same_as_whole_chunk_compression() {
        for body in BODIES {
            test_compression_of_content_encoding(ContentEncoding::Zstd, body).await;
        }
    }
 }