diff --git a/Cargo.toml b/Cargo.toml index f92aa65..cd220dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,3 +23,13 @@ serde = "1.0.63" [dev-dependencies] serde_bytes = "0.11" serde_derive = "1.0.27" +criterion = "0.3" +rand = "0.8" + +[[bench]] +name = "varint" +harness = false + +[profile.bench] +codegen-units = 1 +debug = 1 diff --git a/benches/varint.rs b/benches/varint.rs new file mode 100644 index 0000000..690f7e9 --- /dev/null +++ b/benches/varint.rs @@ -0,0 +1,153 @@ +use criterion::{criterion_group, criterion_main, Criterion}; + +use bincode::Options; +use rand::distributions::Distribution; + +fn slice_varint_u8(c: &mut Criterion) { + let mut rng = rand::thread_rng(); + let dist = rand::distributions::Uniform::from(0..u8::MAX); + let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) + .take(10_000) + .collect(); + let options = bincode::options(); + let bytes = options.serialize(&input).unwrap(); + + c.bench_function("slice_varint_u8", |b| { + b.iter(|| { + let _: Vec = options.deserialize(&bytes).unwrap(); + }) + }); +} + +fn slice_varint_u16(c: &mut Criterion) { + let mut rng = rand::thread_rng(); + let dist = rand::distributions::Uniform::from(0..u16::MAX); + let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) + .take(10_000) + .collect(); + let options = bincode::options(); + let bytes = options.serialize(&input).unwrap(); + + c.bench_function("slice_varint_u16", |b| { + b.iter(|| { + let _: Vec = options.deserialize(&bytes).unwrap(); + }) + }); +} + +fn slice_varint_u32(c: &mut Criterion) { + let mut rng = rand::thread_rng(); + let dist = rand::distributions::Uniform::from(0..u32::MAX); + let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) + .take(10_000) + .collect(); + let options = bincode::options(); + let bytes = options.serialize(&input).unwrap(); + + c.bench_function("slice_varint_u32", |b| { + b.iter(|| { + let _: Vec = options.deserialize(&bytes).unwrap(); + }) + }); +} + +fn slice_varint_u64(c: &mut Criterion) { + let mut rng = rand::thread_rng(); + let dist = rand::distributions::Uniform::from(0..u64::MAX); + let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) + .take(10_000) + .collect(); + let options = bincode::options(); + let bytes = options.serialize(&input).unwrap(); + + c.bench_function("slice_varint_u64", |b| { + b.iter(|| { + let _: Vec = options.deserialize(&bytes).unwrap(); + }) + }); +} + +fn bufreader_varint_u8(c: &mut Criterion) { + let mut rng = rand::thread_rng(); + let dist = rand::distributions::Uniform::from(0..u8::MAX); + let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) + .take(10_000) + .collect(); + let options = bincode::options(); + let bytes = options.serialize(&input).unwrap(); + + c.bench_function("bufreader_varint_u8", |b| { + b.iter(|| { + let _: Vec = options + .deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..])) + .unwrap(); + }) + }); +} + +fn bufreader_varint_u16(c: &mut Criterion) { + let mut rng = rand::thread_rng(); + let dist = rand::distributions::Uniform::from(0..u16::MAX); + let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) + .take(10_000) + .collect(); + let options = bincode::options(); + let bytes = options.serialize(&input).unwrap(); + + c.bench_function("bufreader_varint_u16", |b| { + b.iter(|| { + let _: Vec = options + .deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..])) + .unwrap(); + }) + }); +} + +fn bufreader_varint_u32(c: &mut Criterion) { + let mut rng = rand::thread_rng(); + let dist = rand::distributions::Uniform::from(0..u32::MAX); + let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) + .take(10_000) + .collect(); + let options = bincode::options(); + let bytes = options.serialize(&input).unwrap(); + + c.bench_function("bufreader_varint_u32", |b| { + b.iter(|| { + let _: Vec = options + .deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..])) + .unwrap(); + }) + }); +} + +fn bufreader_varint_u64(c: &mut Criterion) { + let mut rng = rand::thread_rng(); + let dist = rand::distributions::Uniform::from(0..u64::MAX); + let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) + .take(10_000) + .collect(); + let options = bincode::options(); + let bytes = options.serialize(&input).unwrap(); + + c.bench_function("bufreader_varint_u64", |b| { + b.iter(|| { + let _: Vec = options + .deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..])) + .unwrap(); + }) + }); +} + +criterion_group!( + benches, + slice_varint_u8, + slice_varint_u16, + slice_varint_u32, + slice_varint_u64, + bufreader_varint_u8, + bufreader_varint_u16, + bufreader_varint_u32, + bufreader_varint_u64, +); +criterion_main!(benches); diff --git a/src/config/int.rs b/src/config/int.rs index 729881c..4976b74 100644 --- a/src/config/int.rs +++ b/src/config/int.rs @@ -1,7 +1,7 @@ use std::io::Write; use std::mem::size_of; -use super::Options; +use super::{BincodeByteOrder, Options}; use crate::de::read::BincodeRead; use crate::error::{ErrorKind, Result}; @@ -179,6 +179,36 @@ Byte 255 is treated as an extension point; it should not be encoding anything. Do you have a mismatched bincode version or configuration? "#; +#[inline(never)] +#[cold] +fn deserialize_varint_cold<'a, O, R>(reader: &mut R) -> Result +where + O: byteorder::ByteOrder, + R: BincodeRead<'a>, +{ + use byteorder::ReadBytesExt; + #[allow(ellipsis_inclusive_range_patterns)] + match reader.read_u8()? { + byte @ 0...crate::config::int::SINGLE_BYTE_MAX => Ok(byte as u64), + U16_BYTE => Ok(reader.read_u16::()? as u64), + U32_BYTE => Ok(reader.read_u32::()? as u64), + U64_BYTE => Ok(reader.read_u64::()? as u64), + other => invalid_varint_discriminant(other), + } +} + +#[inline(never)] +#[cold] +fn invalid_varint_discriminant(discriminant: u8) -> Result { + let msg = match discriminant { + U128_BYTE => { + "Invalid value (u128 range): you may have a version or configuration disagreement?" + } + _ => DESERIALIZE_EXTENSION_POINT_ERR, + }; + Err(Box::new(crate::ErrorKind::Custom(msg.to_string()))) +} + impl VarintEncoding { fn varint_size(n: u64) -> u64 { if n <= SINGLE_BYTE_MAX as u64 { @@ -238,22 +268,26 @@ impl VarintEncoding { } } + #[inline] fn deserialize_varint<'de, R: BincodeRead<'de>, O: Options>( de: &mut crate::de::Deserializer, ) -> Result { - #[allow(ellipsis_inclusive_range_patterns)] - match de.deserialize_byte()? { - byte @ 0...SINGLE_BYTE_MAX => Ok(byte as u64), - U16_BYTE => Ok(de.deserialize_literal_u16()? as u64), - U32_BYTE => Ok(de.deserialize_literal_u32()? as u64), - U64_BYTE => de.deserialize_literal_u64(), - U128_BYTE => Err(Box::new(ErrorKind::Custom( - "Invalid value (u128 range): you may have a version or configuration disagreement?" - .to_string(), - ))), - _ => Err(Box::new(ErrorKind::Custom( - DESERIALIZE_EXTENSION_POINT_ERR.to_string(), - ))), + let read_u16 = <::Endian as byteorder::ByteOrder>::read_u16; + let read_u32 = <::Endian as byteorder::ByteOrder>::read_u32; + let read_u64 = <::Endian as byteorder::ByteOrder>::read_u64; + if let Some(bytes) = de.reader.peek_read(9) { + let (discriminant, bytes) = bytes.split_at(1); + let (out, used) = match discriminant[0] { + byte @ 0..=crate::config::int::SINGLE_BYTE_MAX => (byte as u64, 1), + U16_BYTE => (read_u16(&bytes[..2]) as u64, 3), + U32_BYTE => (read_u32(&bytes[..4]) as u64, 5), + U64_BYTE => (read_u64(&bytes[..8]) as u64, 9), + other => return invalid_varint_discriminant(other), + }; + de.reader.consume(used); + Ok(out) + } else { + deserialize_varint_cold::<::Endian, R>(&mut de.reader) } } @@ -637,6 +671,7 @@ fn cast_u64_to_usize(n: u64) -> Result { )))) } } +#[inline] fn cast_u64_to_u32(n: u64) -> Result { if n <= u32::max_value() as u64 { Ok(n as u32) @@ -647,6 +682,8 @@ fn cast_u64_to_u32(n: u64) -> Result { )))) } } + +#[inline] fn cast_u64_to_u16(n: u64) -> Result { if n <= u16::max_value() as u64 { Ok(n as u16) diff --git a/src/de/mod.rs b/src/de/mod.rs index f15f1ec..6b2208a 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -29,14 +29,32 @@ pub struct Deserializer { options: O, } +#[inline(never)] +#[cold] +fn bincode_read_cold(reader: &mut R) -> Result<[u8; N]> +where + R: std::io::Read, +{ + let mut buf = [0u8; N]; + reader.read_exact(&mut buf)?; + Ok(buf) +} + macro_rules! impl_deserialize_literal { ($name:ident : $ty:ty = $read:ident()) => { #[inline] pub(crate) fn $name(&mut self) -> Result<$ty> { + const SIZE: usize = core::mem::size_of::<$ty>(); + let read = <::Endian as byteorder::ByteOrder>::$read; self.read_literal_type::<$ty>()?; - self.reader - .$read::<::Endian>() - .map_err(Into::into) + if let Some(buf) = self.reader.peek_read(SIZE) { + let v = read(buf); + self.reader.consume(SIZE); + Ok(v) + } else { + let bytes = bincode_read_cold::<_, SIZE>(&mut self.reader)?; + Ok(read(&bytes)) + } } }; } @@ -67,9 +85,16 @@ impl<'de, R: BincodeRead<'de>, O: Options> Deserializer { Deserializer { reader: r, options } } + #[inline] pub(crate) fn deserialize_byte(&mut self) -> Result { self.read_literal_type::()?; - self.reader.read_u8().map_err(Into::into) + if let Some(buf) = self.reader.peek_read(1) { + let byte = buf[0]; + self.reader.consume(1); + Ok(byte) + } else { + self.reader.read_u8().map_err(Into::into) + } } impl_deserialize_literal! { deserialize_literal_u16 : u16 = read_u16() } diff --git a/src/de/read.rs b/src/de/read.rs index ba0d8f6..8d6f8d9 100644 --- a/src/de/read.rs +++ b/src/de/read.rs @@ -23,6 +23,51 @@ pub trait BincodeRead<'storage>: io::Read { fn forward_read_bytes(&mut self, length: usize, visitor: V) -> Result where V: serde::de::Visitor<'storage>; + + /// If this reader wraps a buffer of any kind, this function lets callers access contents of + /// the buffer without passing data through a buffer first via the `std::io::Read` interface + #[inline] + fn peek_read(&self, _: usize) -> Option<&[u8]> { + None + } + + /// If an implementation of `peek_read` is provided, an implementation of this function + /// must be provided so that subsequent reads or peek-reads do not return the same bytes + #[inline] + fn consume(&mut self, _: usize) {} +} + +impl<'a, 'storage, T> BincodeRead<'storage> for &'a mut T +where + T: BincodeRead<'storage>, +{ + fn forward_read_str(&mut self, length: usize, visitor: V) -> Result + where + V: serde::de::Visitor<'storage>, + { + (*self).forward_read_str(length, visitor) + } + + fn get_byte_buffer(&mut self, length: usize) -> Result> { + (*self).get_byte_buffer(length) + } + + fn forward_read_bytes(&mut self, length: usize, visitor: V) -> Result + where + V: serde::de::Visitor<'storage>, + { + (*self).forward_read_bytes(length, visitor) + } + + #[inline] + fn peek_read(&self, n: usize) -> Option<&[u8]> { + (**self).peek_read(n) + } + + #[inline] + fn consume(&mut self, n: usize) { + (*self).consume(n) + } } /// A BincodeRead implementation for byte slices @@ -52,6 +97,7 @@ impl<'storage> SliceReader<'storage> { Ok(read_slice) } + #[inline] pub(crate) fn is_finished(&self) -> bool { self.slice.is_empty() } @@ -98,7 +144,8 @@ impl io::Read for IoReader { } impl<'storage> SliceReader<'storage> { - #[inline(always)] + #[inline(never)] + #[cold] fn unexpected_eof() -> Box { Box::new(crate::ErrorKind::Io(io::Error::new( io::ErrorKind::UnexpectedEof, @@ -121,11 +168,6 @@ impl<'storage> BincodeRead<'storage> for SliceReader<'storage> { visitor.visit_borrowed_str(string) } - #[inline(always)] - fn get_byte_buffer(&mut self, length: usize) -> Result> { - self.get_byte_slice(length).map(|x| x.to_vec()) - } - #[inline(always)] fn forward_read_bytes(&mut self, length: usize, visitor: V) -> Result where @@ -133,6 +175,21 @@ impl<'storage> BincodeRead<'storage> for SliceReader<'storage> { { visitor.visit_borrowed_bytes(self.get_byte_slice(length)?) } + + #[inline(always)] + fn get_byte_buffer(&mut self, length: usize) -> Result> { + self.get_byte_slice(length).map(|x| x.to_vec()) + } + + #[inline] + fn peek_read(&self, n: usize) -> Option<&'storage [u8]> { + self.slice.get(..n) + } + + #[inline] + fn consume(&mut self, n: usize) { + self.slice = &self.slice.get(n..).unwrap_or_default(); + } } impl IoReader @@ -180,6 +237,75 @@ where } } +impl<'storage, R> BincodeRead<'storage> for std::io::BufReader +where + R: io::Read, +{ + fn forward_read_str(&mut self, length: usize, visitor: V) -> Result + where + V: serde::de::Visitor<'storage>, + { + let mut consume = false; + let mut temp_buf = Vec::new(); + let buf = if let Some(buf) = self.peek_read(length) { + consume = true; + buf + } else { + temp_buf.resize(length, 0); + ::read_exact(self, &mut temp_buf)?; + &temp_buf + }; + let string = match ::std::str::from_utf8(&buf) { + Ok(s) => s, + Err(e) => return Err(crate::ErrorKind::InvalidUtf8Encoding(e).into()), + }; + + let res = visitor.visit_str::(string); + if consume { + self.consume(length); + } + res + } + + fn forward_read_bytes(&mut self, length: usize, visitor: V) -> Result + where + V: serde::de::Visitor<'storage>, + { + let mut consume = false; + let mut temp_buf = Vec::new(); + let buf = if let Some(buf) = self.peek_read(length) { + consume = true; + buf + } else { + temp_buf.resize(length, 0); + ::read_exact(self, &mut temp_buf)?; + &temp_buf + }; + + let res = visitor.visit_bytes::(buf); + if consume { + self.consume(length); + } + res + } + + fn get_byte_buffer(&mut self, length: usize) -> Result> { + let mut buf = vec![0; length]; + ::read_exact(self, &mut buf)?; + Ok(buf) + } + + #[inline] + fn peek_read(&self, n: usize) -> Option<&[u8]> { + self.buffer().get(..n) + } + + #[inline] + fn consume(&mut self, n: usize) { + ::consume(self, n); + } +} + #[cfg(test)] mod test { use super::IoReader;