Optimize varint parsing (#337)
* Add benchmarks for varint parsing * Enable more inlining * Outline error construction * Add provided functions to BincodeRead to customize reading of literals * Add #[inline] to deserialize_byte * Outline SliceReader::unexpected_eof so that deserialize_varint inlines * Implement BincodeRead for std::io::BufReader * Reimplement all of BincodeRead in terms of BufRead-like functions * Move branch into error-creation function to get below inline threshold
This commit is contained in:
parent
ab8866317c
commit
5c541a297a
10
Cargo.toml
10
Cargo.toml
|
|
@ -23,3 +23,13 @@ serde = "1.0.63"
|
|||
[dev-dependencies]
|
||||
serde_bytes = "0.11"
|
||||
serde_derive = "1.0.27"
|
||||
criterion = "0.3"
|
||||
rand = "0.8"
|
||||
|
||||
[[bench]]
|
||||
name = "varint"
|
||||
harness = false
|
||||
|
||||
[profile.bench]
|
||||
codegen-units = 1
|
||||
debug = 1
|
||||
|
|
|
|||
|
|
@ -0,0 +1,153 @@
|
|||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
|
||||
use bincode::Options;
|
||||
use rand::distributions::Distribution;
|
||||
|
||||
fn slice_varint_u8(c: &mut Criterion) {
|
||||
let mut rng = rand::thread_rng();
|
||||
let dist = rand::distributions::Uniform::from(0..u8::MAX);
|
||||
let input: Vec<u8> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
|
||||
.take(10_000)
|
||||
.collect();
|
||||
let options = bincode::options();
|
||||
let bytes = options.serialize(&input).unwrap();
|
||||
|
||||
c.bench_function("slice_varint_u8", |b| {
|
||||
b.iter(|| {
|
||||
let _: Vec<u8> = options.deserialize(&bytes).unwrap();
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn slice_varint_u16(c: &mut Criterion) {
|
||||
let mut rng = rand::thread_rng();
|
||||
let dist = rand::distributions::Uniform::from(0..u16::MAX);
|
||||
let input: Vec<u16> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
|
||||
.take(10_000)
|
||||
.collect();
|
||||
let options = bincode::options();
|
||||
let bytes = options.serialize(&input).unwrap();
|
||||
|
||||
c.bench_function("slice_varint_u16", |b| {
|
||||
b.iter(|| {
|
||||
let _: Vec<u16> = options.deserialize(&bytes).unwrap();
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn slice_varint_u32(c: &mut Criterion) {
|
||||
let mut rng = rand::thread_rng();
|
||||
let dist = rand::distributions::Uniform::from(0..u32::MAX);
|
||||
let input: Vec<u32> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
|
||||
.take(10_000)
|
||||
.collect();
|
||||
let options = bincode::options();
|
||||
let bytes = options.serialize(&input).unwrap();
|
||||
|
||||
c.bench_function("slice_varint_u32", |b| {
|
||||
b.iter(|| {
|
||||
let _: Vec<u32> = options.deserialize(&bytes).unwrap();
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn slice_varint_u64(c: &mut Criterion) {
|
||||
let mut rng = rand::thread_rng();
|
||||
let dist = rand::distributions::Uniform::from(0..u64::MAX);
|
||||
let input: Vec<u64> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
|
||||
.take(10_000)
|
||||
.collect();
|
||||
let options = bincode::options();
|
||||
let bytes = options.serialize(&input).unwrap();
|
||||
|
||||
c.bench_function("slice_varint_u64", |b| {
|
||||
b.iter(|| {
|
||||
let _: Vec<u64> = options.deserialize(&bytes).unwrap();
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bufreader_varint_u8(c: &mut Criterion) {
|
||||
let mut rng = rand::thread_rng();
|
||||
let dist = rand::distributions::Uniform::from(0..u8::MAX);
|
||||
let input: Vec<u8> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
|
||||
.take(10_000)
|
||||
.collect();
|
||||
let options = bincode::options();
|
||||
let bytes = options.serialize(&input).unwrap();
|
||||
|
||||
c.bench_function("bufreader_varint_u8", |b| {
|
||||
b.iter(|| {
|
||||
let _: Vec<u8> = options
|
||||
.deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..]))
|
||||
.unwrap();
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bufreader_varint_u16(c: &mut Criterion) {
|
||||
let mut rng = rand::thread_rng();
|
||||
let dist = rand::distributions::Uniform::from(0..u16::MAX);
|
||||
let input: Vec<u16> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
|
||||
.take(10_000)
|
||||
.collect();
|
||||
let options = bincode::options();
|
||||
let bytes = options.serialize(&input).unwrap();
|
||||
|
||||
c.bench_function("bufreader_varint_u16", |b| {
|
||||
b.iter(|| {
|
||||
let _: Vec<u16> = options
|
||||
.deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..]))
|
||||
.unwrap();
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bufreader_varint_u32(c: &mut Criterion) {
|
||||
let mut rng = rand::thread_rng();
|
||||
let dist = rand::distributions::Uniform::from(0..u32::MAX);
|
||||
let input: Vec<u32> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
|
||||
.take(10_000)
|
||||
.collect();
|
||||
let options = bincode::options();
|
||||
let bytes = options.serialize(&input).unwrap();
|
||||
|
||||
c.bench_function("bufreader_varint_u32", |b| {
|
||||
b.iter(|| {
|
||||
let _: Vec<u32> = options
|
||||
.deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..]))
|
||||
.unwrap();
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bufreader_varint_u64(c: &mut Criterion) {
|
||||
let mut rng = rand::thread_rng();
|
||||
let dist = rand::distributions::Uniform::from(0..u64::MAX);
|
||||
let input: Vec<u64> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
|
||||
.take(10_000)
|
||||
.collect();
|
||||
let options = bincode::options();
|
||||
let bytes = options.serialize(&input).unwrap();
|
||||
|
||||
c.bench_function("bufreader_varint_u64", |b| {
|
||||
b.iter(|| {
|
||||
let _: Vec<u64> = options
|
||||
.deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..]))
|
||||
.unwrap();
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
slice_varint_u8,
|
||||
slice_varint_u16,
|
||||
slice_varint_u32,
|
||||
slice_varint_u64,
|
||||
bufreader_varint_u8,
|
||||
bufreader_varint_u16,
|
||||
bufreader_varint_u32,
|
||||
bufreader_varint_u64,
|
||||
);
|
||||
criterion_main!(benches);
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
use std::io::Write;
|
||||
use std::mem::size_of;
|
||||
|
||||
use super::Options;
|
||||
use super::{BincodeByteOrder, Options};
|
||||
use crate::de::read::BincodeRead;
|
||||
use crate::error::{ErrorKind, Result};
|
||||
|
||||
|
|
@ -179,6 +179,36 @@ Byte 255 is treated as an extension point; it should not be encoding anything.
|
|||
Do you have a mismatched bincode version or configuration?
|
||||
"#;
|
||||
|
||||
#[inline(never)]
|
||||
#[cold]
|
||||
fn deserialize_varint_cold<'a, O, R>(reader: &mut R) -> Result<u64>
|
||||
where
|
||||
O: byteorder::ByteOrder,
|
||||
R: BincodeRead<'a>,
|
||||
{
|
||||
use byteorder::ReadBytesExt;
|
||||
#[allow(ellipsis_inclusive_range_patterns)]
|
||||
match reader.read_u8()? {
|
||||
byte @ 0...crate::config::int::SINGLE_BYTE_MAX => Ok(byte as u64),
|
||||
U16_BYTE => Ok(reader.read_u16::<O>()? as u64),
|
||||
U32_BYTE => Ok(reader.read_u32::<O>()? as u64),
|
||||
U64_BYTE => Ok(reader.read_u64::<O>()? as u64),
|
||||
other => invalid_varint_discriminant(other),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
#[cold]
|
||||
fn invalid_varint_discriminant(discriminant: u8) -> Result<u64> {
|
||||
let msg = match discriminant {
|
||||
U128_BYTE => {
|
||||
"Invalid value (u128 range): you may have a version or configuration disagreement?"
|
||||
}
|
||||
_ => DESERIALIZE_EXTENSION_POINT_ERR,
|
||||
};
|
||||
Err(Box::new(crate::ErrorKind::Custom(msg.to_string())))
|
||||
}
|
||||
|
||||
impl VarintEncoding {
|
||||
fn varint_size(n: u64) -> u64 {
|
||||
if n <= SINGLE_BYTE_MAX as u64 {
|
||||
|
|
@ -238,22 +268,26 @@ impl VarintEncoding {
|
|||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn deserialize_varint<'de, R: BincodeRead<'de>, O: Options>(
|
||||
de: &mut crate::de::Deserializer<R, O>,
|
||||
) -> Result<u64> {
|
||||
#[allow(ellipsis_inclusive_range_patterns)]
|
||||
match de.deserialize_byte()? {
|
||||
byte @ 0...SINGLE_BYTE_MAX => Ok(byte as u64),
|
||||
U16_BYTE => Ok(de.deserialize_literal_u16()? as u64),
|
||||
U32_BYTE => Ok(de.deserialize_literal_u32()? as u64),
|
||||
U64_BYTE => de.deserialize_literal_u64(),
|
||||
U128_BYTE => Err(Box::new(ErrorKind::Custom(
|
||||
"Invalid value (u128 range): you may have a version or configuration disagreement?"
|
||||
.to_string(),
|
||||
))),
|
||||
_ => Err(Box::new(ErrorKind::Custom(
|
||||
DESERIALIZE_EXTENSION_POINT_ERR.to_string(),
|
||||
))),
|
||||
let read_u16 = <<O::Endian as BincodeByteOrder>::Endian as byteorder::ByteOrder>::read_u16;
|
||||
let read_u32 = <<O::Endian as BincodeByteOrder>::Endian as byteorder::ByteOrder>::read_u32;
|
||||
let read_u64 = <<O::Endian as BincodeByteOrder>::Endian as byteorder::ByteOrder>::read_u64;
|
||||
if let Some(bytes) = de.reader.peek_read(9) {
|
||||
let (discriminant, bytes) = bytes.split_at(1);
|
||||
let (out, used) = match discriminant[0] {
|
||||
byte @ 0..=crate::config::int::SINGLE_BYTE_MAX => (byte as u64, 1),
|
||||
U16_BYTE => (read_u16(&bytes[..2]) as u64, 3),
|
||||
U32_BYTE => (read_u32(&bytes[..4]) as u64, 5),
|
||||
U64_BYTE => (read_u64(&bytes[..8]) as u64, 9),
|
||||
other => return invalid_varint_discriminant(other),
|
||||
};
|
||||
de.reader.consume(used);
|
||||
Ok(out)
|
||||
} else {
|
||||
deserialize_varint_cold::<<O::Endian as BincodeByteOrder>::Endian, R>(&mut de.reader)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -637,6 +671,7 @@ fn cast_u64_to_usize(n: u64) -> Result<usize> {
|
|||
))))
|
||||
}
|
||||
}
|
||||
#[inline]
|
||||
fn cast_u64_to_u32(n: u64) -> Result<u32> {
|
||||
if n <= u32::max_value() as u64 {
|
||||
Ok(n as u32)
|
||||
|
|
@ -647,6 +682,8 @@ fn cast_u64_to_u32(n: u64) -> Result<u32> {
|
|||
))))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn cast_u64_to_u16(n: u64) -> Result<u16> {
|
||||
if n <= u16::max_value() as u64 {
|
||||
Ok(n as u16)
|
||||
|
|
|
|||
|
|
@ -29,14 +29,32 @@ pub struct Deserializer<R, O: Options> {
|
|||
options: O,
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
#[cold]
|
||||
fn bincode_read_cold<R, const N: usize>(reader: &mut R) -> Result<[u8; N]>
|
||||
where
|
||||
R: std::io::Read,
|
||||
{
|
||||
let mut buf = [0u8; N];
|
||||
reader.read_exact(&mut buf)?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
macro_rules! impl_deserialize_literal {
|
||||
($name:ident : $ty:ty = $read:ident()) => {
|
||||
#[inline]
|
||||
pub(crate) fn $name(&mut self) -> Result<$ty> {
|
||||
const SIZE: usize = core::mem::size_of::<$ty>();
|
||||
let read = <<O::Endian as BincodeByteOrder>::Endian as byteorder::ByteOrder>::$read;
|
||||
self.read_literal_type::<$ty>()?;
|
||||
self.reader
|
||||
.$read::<<O::Endian as BincodeByteOrder>::Endian>()
|
||||
.map_err(Into::into)
|
||||
if let Some(buf) = self.reader.peek_read(SIZE) {
|
||||
let v = read(buf);
|
||||
self.reader.consume(SIZE);
|
||||
Ok(v)
|
||||
} else {
|
||||
let bytes = bincode_read_cold::<_, SIZE>(&mut self.reader)?;
|
||||
Ok(read(&bytes))
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
@ -67,9 +85,16 @@ impl<'de, R: BincodeRead<'de>, O: Options> Deserializer<R, O> {
|
|||
Deserializer { reader: r, options }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn deserialize_byte(&mut self) -> Result<u8> {
|
||||
self.read_literal_type::<u8>()?;
|
||||
self.reader.read_u8().map_err(Into::into)
|
||||
if let Some(buf) = self.reader.peek_read(1) {
|
||||
let byte = buf[0];
|
||||
self.reader.consume(1);
|
||||
Ok(byte)
|
||||
} else {
|
||||
self.reader.read_u8().map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl_deserialize_literal! { deserialize_literal_u16 : u16 = read_u16() }
|
||||
|
|
|
|||
138
src/de/read.rs
138
src/de/read.rs
|
|
@ -23,6 +23,51 @@ pub trait BincodeRead<'storage>: io::Read {
|
|||
fn forward_read_bytes<V>(&mut self, length: usize, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: serde::de::Visitor<'storage>;
|
||||
|
||||
/// If this reader wraps a buffer of any kind, this function lets callers access contents of
|
||||
/// the buffer without passing data through a buffer first via the `std::io::Read` interface
|
||||
#[inline]
|
||||
fn peek_read(&self, _: usize) -> Option<&[u8]> {
|
||||
None
|
||||
}
|
||||
|
||||
/// If an implementation of `peek_read` is provided, an implementation of this function
|
||||
/// must be provided so that subsequent reads or peek-reads do not return the same bytes
|
||||
#[inline]
|
||||
fn consume(&mut self, _: usize) {}
|
||||
}
|
||||
|
||||
impl<'a, 'storage, T> BincodeRead<'storage> for &'a mut T
|
||||
where
|
||||
T: BincodeRead<'storage>,
|
||||
{
|
||||
fn forward_read_str<V>(&mut self, length: usize, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: serde::de::Visitor<'storage>,
|
||||
{
|
||||
(*self).forward_read_str(length, visitor)
|
||||
}
|
||||
|
||||
fn get_byte_buffer(&mut self, length: usize) -> Result<Vec<u8>> {
|
||||
(*self).get_byte_buffer(length)
|
||||
}
|
||||
|
||||
fn forward_read_bytes<V>(&mut self, length: usize, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: serde::de::Visitor<'storage>,
|
||||
{
|
||||
(*self).forward_read_bytes(length, visitor)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn peek_read(&self, n: usize) -> Option<&[u8]> {
|
||||
(**self).peek_read(n)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn consume(&mut self, n: usize) {
|
||||
(*self).consume(n)
|
||||
}
|
||||
}
|
||||
|
||||
/// A BincodeRead implementation for byte slices
|
||||
|
|
@ -52,6 +97,7 @@ impl<'storage> SliceReader<'storage> {
|
|||
Ok(read_slice)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn is_finished(&self) -> bool {
|
||||
self.slice.is_empty()
|
||||
}
|
||||
|
|
@ -98,7 +144,8 @@ impl<R: io::Read> io::Read for IoReader<R> {
|
|||
}
|
||||
|
||||
impl<'storage> SliceReader<'storage> {
|
||||
#[inline(always)]
|
||||
#[inline(never)]
|
||||
#[cold]
|
||||
fn unexpected_eof() -> Box<crate::ErrorKind> {
|
||||
Box::new(crate::ErrorKind::Io(io::Error::new(
|
||||
io::ErrorKind::UnexpectedEof,
|
||||
|
|
@ -121,11 +168,6 @@ impl<'storage> BincodeRead<'storage> for SliceReader<'storage> {
|
|||
visitor.visit_borrowed_str(string)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn get_byte_buffer(&mut self, length: usize) -> Result<Vec<u8>> {
|
||||
self.get_byte_slice(length).map(|x| x.to_vec())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn forward_read_bytes<V>(&mut self, length: usize, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
|
|
@ -133,6 +175,21 @@ impl<'storage> BincodeRead<'storage> for SliceReader<'storage> {
|
|||
{
|
||||
visitor.visit_borrowed_bytes(self.get_byte_slice(length)?)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn get_byte_buffer(&mut self, length: usize) -> Result<Vec<u8>> {
|
||||
self.get_byte_slice(length).map(|x| x.to_vec())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn peek_read(&self, n: usize) -> Option<&'storage [u8]> {
|
||||
self.slice.get(..n)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn consume(&mut self, n: usize) {
|
||||
self.slice = &self.slice.get(n..).unwrap_or_default();
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> IoReader<R>
|
||||
|
|
@ -180,6 +237,75 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
impl<'storage, R> BincodeRead<'storage> for std::io::BufReader<R>
|
||||
where
|
||||
R: io::Read,
|
||||
{
|
||||
fn forward_read_str<V>(&mut self, length: usize, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: serde::de::Visitor<'storage>,
|
||||
{
|
||||
let mut consume = false;
|
||||
let mut temp_buf = Vec::new();
|
||||
let buf = if let Some(buf) = self.peek_read(length) {
|
||||
consume = true;
|
||||
buf
|
||||
} else {
|
||||
temp_buf.resize(length, 0);
|
||||
<Self as std::io::Read>::read_exact(self, &mut temp_buf)?;
|
||||
&temp_buf
|
||||
};
|
||||
let string = match ::std::str::from_utf8(&buf) {
|
||||
Ok(s) => s,
|
||||
Err(e) => return Err(crate::ErrorKind::InvalidUtf8Encoding(e).into()),
|
||||
};
|
||||
|
||||
let res = visitor.visit_str::<crate::Error>(string);
|
||||
if consume {
|
||||
self.consume(length);
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
fn forward_read_bytes<V>(&mut self, length: usize, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: serde::de::Visitor<'storage>,
|
||||
{
|
||||
let mut consume = false;
|
||||
let mut temp_buf = Vec::new();
|
||||
let buf = if let Some(buf) = self.peek_read(length) {
|
||||
consume = true;
|
||||
buf
|
||||
} else {
|
||||
temp_buf.resize(length, 0);
|
||||
<Self as std::io::Read>::read_exact(self, &mut temp_buf)?;
|
||||
&temp_buf
|
||||
};
|
||||
|
||||
let res = visitor.visit_bytes::<crate::Error>(buf);
|
||||
if consume {
|
||||
self.consume(length);
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
fn get_byte_buffer(&mut self, length: usize) -> Result<Vec<u8>> {
|
||||
let mut buf = vec![0; length];
|
||||
<Self as std::io::Read>::read_exact(self, &mut buf)?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn peek_read(&self, n: usize) -> Option<&[u8]> {
|
||||
self.buffer().get(..n)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn consume(&mut self, n: usize) {
|
||||
<Self as io::BufRead>::consume(self, n);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::IoReader;
|
||||
|
|
|
|||
Loading…
Reference in New Issue