Improved encoding and decoding speed of Vec<u8> (#619)

* Improved encoding and decoding speed of Vec<u8>

* Added black_box calls to benches/string.rs
Added a SizeWriter because someone finally has a benchmark to show it's faster

* Improved performance for `impl<T> Encode for [T]`

* Added #[inline] to `impl Encoder for EncoderImpl`

---------

Co-authored-by: Victor Koenders <victor.koenders@qrtech.se>
This commit is contained in:
Trangar 2023-03-30 11:45:47 +02:00 committed by GitHub
parent 3aa269bfea
commit 9880abe499
6 changed files with 145 additions and 9 deletions

View File

@ -44,6 +44,8 @@ rand = "0.8"
uuid = { version = "1.1", features = ["serde"] }
chrono = { version = "0.4", features = ["serde"] }
glam = { version = "0.21", features = ["serde"] }
bincode_1 = { version = "1.3", package = "bincode" }
serde = { version = "1.0", features = ["derive"] }
[[bench]]
name = "varint"
@ -53,9 +55,14 @@ harness = false
name = "inline"
harness = false
[[bench]]
name = "string"
harness = false
[profile.bench]
codegen-units = 1
debug = 1
lto = true
[package.metadata.docs.rs]
all-features = true

73
benches/string.rs Normal file
View File

@ -0,0 +1,73 @@
// https://github.com/bincode-org/bincode/issues/618
use bincode::{Decode, Encode};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Default, Encode, Decode)]
pub struct MyStruct {
pub v: Vec<String>,
pub string: String,
pub number: usize,
}
impl MyStruct {
#[inline]
pub fn new(v: Vec<String>, string: String, number: usize) -> Self {
Self { v, string, number }
}
}
fn build_data(size: usize) -> Vec<MyStruct> {
(0..size)
.map(|i| {
let vec: Vec<String> = (0..i).map(|i| i.to_string().repeat(100)).collect();
MyStruct::new(vec, size.to_string(), size)
})
.collect()
}
fn index_item_decode(c: &mut Criterion) {
let data = build_data(100);
c.bench_function("bench v1", |b| {
b.iter(|| {
let _ = black_box(bincode_1::serialize(black_box(&data))).unwrap();
});
});
let config = bincode::config::standard();
c.bench_function("bench v2 (standard)", |b| {
b.iter(|| {
let _ = black_box(bincode::encode_to_vec(black_box(&data), config)).unwrap();
});
});
let config = bincode::config::legacy();
c.bench_function("bench v2 (legacy)", |b| {
b.iter(|| {
let _ = black_box(bincode::encode_to_vec(black_box(&data), config)).unwrap();
});
});
let encodedv1 = bincode_1::serialize(&data).unwrap();
let encodedv2 = bincode::encode_to_vec(&data, config).unwrap();
assert_eq!(encodedv1, encodedv2);
c.bench_function("bench v1 decode", |b| {
b.iter(|| {
let _: Vec<MyStruct> =
black_box(bincode_1::deserialize(black_box(&encodedv1))).unwrap();
});
});
c.bench_function("bench v2 decode (legacy)", |b| {
b.iter(|| {
let _: (Vec<MyStruct>, _) =
black_box(bincode::decode_from_slice(black_box(&encodedv1), config)).unwrap();
});
});
}
criterion_group!(benches, index_item_decode);
criterion_main!(benches);

View File

@ -32,6 +32,7 @@ impl<W: Writer, C: Config> EncoderImpl<W, C> {
}
/// Return the underlying writer
#[inline]
pub fn into_writer(self) -> W {
self.writer
}
@ -42,10 +43,12 @@ impl<W: Writer, C: Config> Encoder for EncoderImpl<W, C> {
type C = C;
#[inline]
fn writer(&mut self) -> &mut Self::W {
&mut self.writer
}
#[inline]
fn config(&self) -> &Self::C {
&self.config
}

View File

@ -295,10 +295,17 @@ impl Encode for char {
impl<T> Encode for [T]
where
T: Encode,
T: Encode + 'static,
{
fn encode<E: Encoder>(&self, encoder: &mut E) -> Result<(), EncodeError> {
super::encode_slice_len(encoder, self.len())?;
if core::any::TypeId::of::<T>() == core::any::TypeId::of::<u8>() {
let t: &[u8] = unsafe { core::mem::transmute(self) };
encoder.writer().write(t)?;
return Ok(());
}
for item in self {
item.encode(encoder)?;
}

View File

@ -65,3 +65,18 @@ impl<'storage> Writer for SliceWriter<'storage> {
Ok(())
}
}
/// A writer that counts how many bytes were written. This is useful for e.g. pre-allocating buffers bfeore writing to them.
#[derive(Default)]
pub struct SizeWriter {
/// the amount of bytes that were written so far
pub bytes_written: usize,
}
impl Writer for SizeWriter {
#[inline(always)]
fn write(&mut self, bytes: &[u8]) -> Result<(), EncodeError> {
self.bytes_written += bytes.len();
Ok(())
}
}

View File

@ -1,6 +1,10 @@
use crate::{
de::{BorrowDecoder, Decode, Decoder},
enc::{self, Encode, Encoder},
de::{read::Reader, BorrowDecoder, Decode, Decoder},
enc::{
self,
write::{SizeWriter, Writer},
Encode, Encoder,
},
error::{DecodeError, EncodeError},
impl_borrow_decode, BorrowDecode, Config,
};
@ -21,6 +25,12 @@ pub(crate) struct VecWriter {
}
impl VecWriter {
/// Create a new vec writer with the given capacity
pub fn with_capacity(cap: usize) -> Self {
Self {
inner: Vec::with_capacity(cap),
}
}
// May not be used in all feature combinations
#[allow(dead_code)]
pub(crate) fn collect(self) -> Vec<u8> {
@ -29,6 +39,7 @@ impl VecWriter {
}
impl enc::write::Writer for VecWriter {
#[inline(always)]
fn write(&mut self, bytes: &[u8]) -> Result<(), EncodeError> {
self.inner.extend_from_slice(bytes);
Ok(())
@ -40,7 +51,12 @@ impl enc::write::Writer for VecWriter {
/// [config]: config/index.html
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
pub fn encode_to_vec<E: enc::Encode, C: Config>(val: E, config: C) -> Result<Vec<u8>, EncodeError> {
let writer = VecWriter::default();
let size = {
let mut size_writer = enc::EncoderImpl::<_, C>::new(SizeWriter::default(), config);
val.encode(&mut size_writer)?;
size_writer.into_writer().bytes_written
};
let writer = VecWriter::with_capacity(size);
let mut encoder = enc::EncoderImpl::<_, C>::new(writer, config);
val.encode(&mut encoder)?;
Ok(encoder.into_writer().inner)
@ -262,10 +278,20 @@ where
impl<T> Decode for Vec<T>
where
T: Decode,
T: Decode + 'static,
{
fn decode<D: Decoder>(decoder: &mut D) -> Result<Self, DecodeError> {
let len = crate::de::decode_slice_len(decoder)?;
if core::any::TypeId::of::<T>() == core::any::TypeId::of::<u8>() {
decoder.claim_container_read::<T>(len)?;
// optimize for reading u8 vecs
let mut vec = Vec::new();
vec.resize(len, 0u8);
decoder.reader().read(&mut vec)?;
// Safety: Vec<T> is Vec<u8>
return Ok(unsafe { core::mem::transmute(vec) });
}
decoder.claim_container_read::<T>(len)?;
let mut vec = Vec::with_capacity(len);
@ -300,10 +326,15 @@ where
impl<T> Encode for Vec<T>
where
T: Encode,
T: Encode + 'static,
{
fn encode<E: Encoder>(&self, encoder: &mut E) -> Result<(), EncodeError> {
crate::enc::encode_slice_len(encoder, self.len())?;
if core::any::TypeId::of::<T>() == core::any::TypeId::of::<u8>() {
let slice: &[u8] = unsafe { core::mem::transmute(self.as_slice()) };
encoder.writer().write(slice)?;
return Ok(());
}
for item in self.iter() {
item.encode(encoder)?;
}
@ -364,7 +395,7 @@ where
impl<T> Decode for Box<[T]>
where
T: Decode,
T: Decode + 'static,
{
fn decode<D: Decoder>(decoder: &mut D) -> Result<Self, DecodeError> {
let vec = Vec::decode(decoder)?;
@ -444,7 +475,7 @@ where
impl<T> Decode for Rc<[T]>
where
T: Decode,
T: Decode + 'static,
{
fn decode<D: Decoder>(decoder: &mut D) -> Result<Self, DecodeError> {
let vec = Vec::decode(decoder)?;
@ -513,7 +544,7 @@ where
#[cfg(target_has_atomic = "ptr")]
impl<T> Decode for Arc<[T]>
where
T: Decode,
T: Decode + 'static,
{
fn decode<D: Decoder>(decoder: &mut D) -> Result<Self, DecodeError> {
let vec = Vec::decode(decoder)?;