Optimize varint parsing (#337)

* Add benchmarks for varint parsing

* Enable more inlining

* Outline error construction

* Add provided functions to BincodeRead to customize reading of literals

* Add #[inline] to deserialize_byte

* Outline SliceReader::unexpected_eof so that deserialize_varint inlines

* Implement BincodeRead for std::io::BufReader

* Reimplement all of BincodeRead in terms of BufRead-like functions

* Move branch into error-creation function to get below inline threshold
This commit is contained in:
Ben Kimock 2021-06-19 21:16:13 -04:00 committed by GitHub
parent ab8866317c
commit 5c541a297a
5 changed files with 375 additions and 24 deletions

View File

@ -23,3 +23,13 @@ serde = "1.0.63"
[dev-dependencies]
serde_bytes = "0.11"
serde_derive = "1.0.27"
criterion = "0.3"
rand = "0.8"
[[bench]]
name = "varint"
harness = false
[profile.bench]
codegen-units = 1
debug = 1

153
benches/varint.rs Normal file
View File

@ -0,0 +1,153 @@
use criterion::{criterion_group, criterion_main, Criterion};
use bincode::Options;
use rand::distributions::Distribution;
fn slice_varint_u8(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let dist = rand::distributions::Uniform::from(0..u8::MAX);
let input: Vec<u8> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
.take(10_000)
.collect();
let options = bincode::options();
let bytes = options.serialize(&input).unwrap();
c.bench_function("slice_varint_u8", |b| {
b.iter(|| {
let _: Vec<u8> = options.deserialize(&bytes).unwrap();
})
});
}
fn slice_varint_u16(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let dist = rand::distributions::Uniform::from(0..u16::MAX);
let input: Vec<u16> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
.take(10_000)
.collect();
let options = bincode::options();
let bytes = options.serialize(&input).unwrap();
c.bench_function("slice_varint_u16", |b| {
b.iter(|| {
let _: Vec<u16> = options.deserialize(&bytes).unwrap();
})
});
}
fn slice_varint_u32(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let dist = rand::distributions::Uniform::from(0..u32::MAX);
let input: Vec<u32> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
.take(10_000)
.collect();
let options = bincode::options();
let bytes = options.serialize(&input).unwrap();
c.bench_function("slice_varint_u32", |b| {
b.iter(|| {
let _: Vec<u32> = options.deserialize(&bytes).unwrap();
})
});
}
fn slice_varint_u64(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let dist = rand::distributions::Uniform::from(0..u64::MAX);
let input: Vec<u64> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
.take(10_000)
.collect();
let options = bincode::options();
let bytes = options.serialize(&input).unwrap();
c.bench_function("slice_varint_u64", |b| {
b.iter(|| {
let _: Vec<u64> = options.deserialize(&bytes).unwrap();
})
});
}
fn bufreader_varint_u8(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let dist = rand::distributions::Uniform::from(0..u8::MAX);
let input: Vec<u8> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
.take(10_000)
.collect();
let options = bincode::options();
let bytes = options.serialize(&input).unwrap();
c.bench_function("bufreader_varint_u8", |b| {
b.iter(|| {
let _: Vec<u8> = options
.deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..]))
.unwrap();
})
});
}
fn bufreader_varint_u16(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let dist = rand::distributions::Uniform::from(0..u16::MAX);
let input: Vec<u16> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
.take(10_000)
.collect();
let options = bincode::options();
let bytes = options.serialize(&input).unwrap();
c.bench_function("bufreader_varint_u16", |b| {
b.iter(|| {
let _: Vec<u16> = options
.deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..]))
.unwrap();
})
});
}
fn bufreader_varint_u32(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let dist = rand::distributions::Uniform::from(0..u32::MAX);
let input: Vec<u32> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
.take(10_000)
.collect();
let options = bincode::options();
let bytes = options.serialize(&input).unwrap();
c.bench_function("bufreader_varint_u32", |b| {
b.iter(|| {
let _: Vec<u32> = options
.deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..]))
.unwrap();
})
});
}
fn bufreader_varint_u64(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let dist = rand::distributions::Uniform::from(0..u64::MAX);
let input: Vec<u64> = std::iter::from_fn(|| Some(dist.sample(&mut rng)))
.take(10_000)
.collect();
let options = bincode::options();
let bytes = options.serialize(&input).unwrap();
c.bench_function("bufreader_varint_u64", |b| {
b.iter(|| {
let _: Vec<u64> = options
.deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..]))
.unwrap();
})
});
}
criterion_group!(
benches,
slice_varint_u8,
slice_varint_u16,
slice_varint_u32,
slice_varint_u64,
bufreader_varint_u8,
bufreader_varint_u16,
bufreader_varint_u32,
bufreader_varint_u64,
);
criterion_main!(benches);

View File

@ -1,7 +1,7 @@
use std::io::Write;
use std::mem::size_of;
use super::Options;
use super::{BincodeByteOrder, Options};
use crate::de::read::BincodeRead;
use crate::error::{ErrorKind, Result};
@ -179,6 +179,36 @@ Byte 255 is treated as an extension point; it should not be encoding anything.
Do you have a mismatched bincode version or configuration?
"#;
#[inline(never)]
#[cold]
fn deserialize_varint_cold<'a, O, R>(reader: &mut R) -> Result<u64>
where
O: byteorder::ByteOrder,
R: BincodeRead<'a>,
{
use byteorder::ReadBytesExt;
#[allow(ellipsis_inclusive_range_patterns)]
match reader.read_u8()? {
byte @ 0...crate::config::int::SINGLE_BYTE_MAX => Ok(byte as u64),
U16_BYTE => Ok(reader.read_u16::<O>()? as u64),
U32_BYTE => Ok(reader.read_u32::<O>()? as u64),
U64_BYTE => Ok(reader.read_u64::<O>()? as u64),
other => invalid_varint_discriminant(other),
}
}
#[inline(never)]
#[cold]
fn invalid_varint_discriminant(discriminant: u8) -> Result<u64> {
let msg = match discriminant {
U128_BYTE => {
"Invalid value (u128 range): you may have a version or configuration disagreement?"
}
_ => DESERIALIZE_EXTENSION_POINT_ERR,
};
Err(Box::new(crate::ErrorKind::Custom(msg.to_string())))
}
impl VarintEncoding {
fn varint_size(n: u64) -> u64 {
if n <= SINGLE_BYTE_MAX as u64 {
@ -238,22 +268,26 @@ impl VarintEncoding {
}
}
#[inline]
fn deserialize_varint<'de, R: BincodeRead<'de>, O: Options>(
de: &mut crate::de::Deserializer<R, O>,
) -> Result<u64> {
#[allow(ellipsis_inclusive_range_patterns)]
match de.deserialize_byte()? {
byte @ 0...SINGLE_BYTE_MAX => Ok(byte as u64),
U16_BYTE => Ok(de.deserialize_literal_u16()? as u64),
U32_BYTE => Ok(de.deserialize_literal_u32()? as u64),
U64_BYTE => de.deserialize_literal_u64(),
U128_BYTE => Err(Box::new(ErrorKind::Custom(
"Invalid value (u128 range): you may have a version or configuration disagreement?"
.to_string(),
))),
_ => Err(Box::new(ErrorKind::Custom(
DESERIALIZE_EXTENSION_POINT_ERR.to_string(),
))),
let read_u16 = <<O::Endian as BincodeByteOrder>::Endian as byteorder::ByteOrder>::read_u16;
let read_u32 = <<O::Endian as BincodeByteOrder>::Endian as byteorder::ByteOrder>::read_u32;
let read_u64 = <<O::Endian as BincodeByteOrder>::Endian as byteorder::ByteOrder>::read_u64;
if let Some(bytes) = de.reader.peek_read(9) {
let (discriminant, bytes) = bytes.split_at(1);
let (out, used) = match discriminant[0] {
byte @ 0..=crate::config::int::SINGLE_BYTE_MAX => (byte as u64, 1),
U16_BYTE => (read_u16(&bytes[..2]) as u64, 3),
U32_BYTE => (read_u32(&bytes[..4]) as u64, 5),
U64_BYTE => (read_u64(&bytes[..8]) as u64, 9),
other => return invalid_varint_discriminant(other),
};
de.reader.consume(used);
Ok(out)
} else {
deserialize_varint_cold::<<O::Endian as BincodeByteOrder>::Endian, R>(&mut de.reader)
}
}
@ -637,6 +671,7 @@ fn cast_u64_to_usize(n: u64) -> Result<usize> {
))))
}
}
#[inline]
fn cast_u64_to_u32(n: u64) -> Result<u32> {
if n <= u32::max_value() as u64 {
Ok(n as u32)
@ -647,6 +682,8 @@ fn cast_u64_to_u32(n: u64) -> Result<u32> {
))))
}
}
#[inline]
fn cast_u64_to_u16(n: u64) -> Result<u16> {
if n <= u16::max_value() as u64 {
Ok(n as u16)

View File

@ -29,14 +29,32 @@ pub struct Deserializer<R, O: Options> {
options: O,
}
#[inline(never)]
#[cold]
fn bincode_read_cold<R, const N: usize>(reader: &mut R) -> Result<[u8; N]>
where
R: std::io::Read,
{
let mut buf = [0u8; N];
reader.read_exact(&mut buf)?;
Ok(buf)
}
macro_rules! impl_deserialize_literal {
($name:ident : $ty:ty = $read:ident()) => {
#[inline]
pub(crate) fn $name(&mut self) -> Result<$ty> {
const SIZE: usize = core::mem::size_of::<$ty>();
let read = <<O::Endian as BincodeByteOrder>::Endian as byteorder::ByteOrder>::$read;
self.read_literal_type::<$ty>()?;
self.reader
.$read::<<O::Endian as BincodeByteOrder>::Endian>()
.map_err(Into::into)
if let Some(buf) = self.reader.peek_read(SIZE) {
let v = read(buf);
self.reader.consume(SIZE);
Ok(v)
} else {
let bytes = bincode_read_cold::<_, SIZE>(&mut self.reader)?;
Ok(read(&bytes))
}
}
};
}
@ -67,9 +85,16 @@ impl<'de, R: BincodeRead<'de>, O: Options> Deserializer<R, O> {
Deserializer { reader: r, options }
}
#[inline]
pub(crate) fn deserialize_byte(&mut self) -> Result<u8> {
self.read_literal_type::<u8>()?;
self.reader.read_u8().map_err(Into::into)
if let Some(buf) = self.reader.peek_read(1) {
let byte = buf[0];
self.reader.consume(1);
Ok(byte)
} else {
self.reader.read_u8().map_err(Into::into)
}
}
impl_deserialize_literal! { deserialize_literal_u16 : u16 = read_u16() }

View File

@ -23,6 +23,51 @@ pub trait BincodeRead<'storage>: io::Read {
fn forward_read_bytes<V>(&mut self, length: usize, visitor: V) -> Result<V::Value>
where
V: serde::de::Visitor<'storage>;
/// If this reader wraps a buffer of any kind, this function lets callers access contents of
/// the buffer without passing data through a buffer first via the `std::io::Read` interface
#[inline]
fn peek_read(&self, _: usize) -> Option<&[u8]> {
None
}
/// If an implementation of `peek_read` is provided, an implementation of this function
/// must be provided so that subsequent reads or peek-reads do not return the same bytes
#[inline]
fn consume(&mut self, _: usize) {}
}
impl<'a, 'storage, T> BincodeRead<'storage> for &'a mut T
where
T: BincodeRead<'storage>,
{
fn forward_read_str<V>(&mut self, length: usize, visitor: V) -> Result<V::Value>
where
V: serde::de::Visitor<'storage>,
{
(*self).forward_read_str(length, visitor)
}
fn get_byte_buffer(&mut self, length: usize) -> Result<Vec<u8>> {
(*self).get_byte_buffer(length)
}
fn forward_read_bytes<V>(&mut self, length: usize, visitor: V) -> Result<V::Value>
where
V: serde::de::Visitor<'storage>,
{
(*self).forward_read_bytes(length, visitor)
}
#[inline]
fn peek_read(&self, n: usize) -> Option<&[u8]> {
(**self).peek_read(n)
}
#[inline]
fn consume(&mut self, n: usize) {
(*self).consume(n)
}
}
/// A BincodeRead implementation for byte slices
@ -52,6 +97,7 @@ impl<'storage> SliceReader<'storage> {
Ok(read_slice)
}
#[inline]
pub(crate) fn is_finished(&self) -> bool {
self.slice.is_empty()
}
@ -98,7 +144,8 @@ impl<R: io::Read> io::Read for IoReader<R> {
}
impl<'storage> SliceReader<'storage> {
#[inline(always)]
#[inline(never)]
#[cold]
fn unexpected_eof() -> Box<crate::ErrorKind> {
Box::new(crate::ErrorKind::Io(io::Error::new(
io::ErrorKind::UnexpectedEof,
@ -121,11 +168,6 @@ impl<'storage> BincodeRead<'storage> for SliceReader<'storage> {
visitor.visit_borrowed_str(string)
}
#[inline(always)]
fn get_byte_buffer(&mut self, length: usize) -> Result<Vec<u8>> {
self.get_byte_slice(length).map(|x| x.to_vec())
}
#[inline(always)]
fn forward_read_bytes<V>(&mut self, length: usize, visitor: V) -> Result<V::Value>
where
@ -133,6 +175,21 @@ impl<'storage> BincodeRead<'storage> for SliceReader<'storage> {
{
visitor.visit_borrowed_bytes(self.get_byte_slice(length)?)
}
#[inline(always)]
fn get_byte_buffer(&mut self, length: usize) -> Result<Vec<u8>> {
self.get_byte_slice(length).map(|x| x.to_vec())
}
#[inline]
fn peek_read(&self, n: usize) -> Option<&'storage [u8]> {
self.slice.get(..n)
}
#[inline]
fn consume(&mut self, n: usize) {
self.slice = &self.slice.get(n..).unwrap_or_default();
}
}
impl<R> IoReader<R>
@ -180,6 +237,75 @@ where
}
}
impl<'storage, R> BincodeRead<'storage> for std::io::BufReader<R>
where
R: io::Read,
{
fn forward_read_str<V>(&mut self, length: usize, visitor: V) -> Result<V::Value>
where
V: serde::de::Visitor<'storage>,
{
let mut consume = false;
let mut temp_buf = Vec::new();
let buf = if let Some(buf) = self.peek_read(length) {
consume = true;
buf
} else {
temp_buf.resize(length, 0);
<Self as std::io::Read>::read_exact(self, &mut temp_buf)?;
&temp_buf
};
let string = match ::std::str::from_utf8(&buf) {
Ok(s) => s,
Err(e) => return Err(crate::ErrorKind::InvalidUtf8Encoding(e).into()),
};
let res = visitor.visit_str::<crate::Error>(string);
if consume {
self.consume(length);
}
res
}
fn forward_read_bytes<V>(&mut self, length: usize, visitor: V) -> Result<V::Value>
where
V: serde::de::Visitor<'storage>,
{
let mut consume = false;
let mut temp_buf = Vec::new();
let buf = if let Some(buf) = self.peek_read(length) {
consume = true;
buf
} else {
temp_buf.resize(length, 0);
<Self as std::io::Read>::read_exact(self, &mut temp_buf)?;
&temp_buf
};
let res = visitor.visit_bytes::<crate::Error>(buf);
if consume {
self.consume(length);
}
res
}
fn get_byte_buffer(&mut self, length: usize) -> Result<Vec<u8>> {
let mut buf = vec![0; length];
<Self as std::io::Read>::read_exact(self, &mut buf)?;
Ok(buf)
}
#[inline]
fn peek_read(&self, n: usize) -> Option<&[u8]> {
self.buffer().get(..n)
}
#[inline]
fn consume(&mut self, n: usize) {
<Self as io::BufRead>::consume(self, n);
}
}
#[cfg(test)]
mod test {
use super::IoReader;