Reintroduce varint optimizations

This commit is contained in:
Lena Hellström 2021-10-24 16:44:36 +02:00 committed by GitHub
parent 62b8f39f8f
commit 99de47a6c8
8 changed files with 388 additions and 69 deletions

View File

@ -31,10 +31,17 @@ impl Decode for bool {
}
impl Decode for u8 {
#[inline]
fn decode<D: Decoder>(mut decoder: D) -> Result<Self, DecodeError> {
let mut bytes = [0u8; 1];
decoder.reader().read(&mut bytes)?;
Ok(bytes[0])
if let Some(buf) = decoder.reader().peek_read(1) {
let byte = buf[0];
decoder.reader().consume(1);
Ok(byte)
} else {
let mut bytes = [0u8; 1];
decoder.reader().read(&mut bytes)?;
Ok(bytes[0])
}
}
}

View File

@ -17,6 +17,38 @@ use crate::error::DecodeError;
pub trait Reader {
/// Fill the given `bytes` argument with values. Exactly the length of the given slice must be filled, or else an error must be returned.
fn read(&mut self, bytes: &mut [u8]) -> Result<(), DecodeError>;
/// If this reader wraps a buffer of any kind, this function lets callers access contents of
/// the buffer without passing data through a buffer first.
#[inline]
fn peek_read(&self, _: usize) -> Option<&[u8]> {
None
}
/// If an implementation of `peek_read` is provided, an implementation of this function
/// must be provided so that subsequent reads or peek-reads do not return the same bytes
#[inline]
fn consume(&mut self, _: usize) {}
}
impl<'a, T> Reader for &'a mut T
where
T: Reader,
{
#[inline]
fn read(&mut self, bytes: &mut [u8]) -> Result<(), DecodeError> {
(**self).read(bytes)
}
#[inline]
fn peek_read(&self, n: usize) -> Option<&[u8]> {
(**self).peek_read(n)
}
#[inline]
fn consume(&mut self, n: usize) {
(*self).consume(n)
}
}
/// A reader for borrowed data. Implementors of this must also implement the [Reader] trait. See the module documentation for more information.
@ -61,6 +93,16 @@ impl<'storage> Reader for SliceReader<'storage> {
Ok(())
}
#[inline]
fn peek_read(&self, n: usize) -> Option<&'storage [u8]> {
self.slice.get(..n)
}
#[inline]
fn consume(&mut self, n: usize) {
self.slice = self.slice.get(n..).unwrap_or_default();
}
}
impl<'storage> BorrowReader<'storage> for SliceReader<'storage> {

View File

@ -144,6 +144,8 @@ pub enum IntegerType {
I64,
I128,
Isize,
Reserved,
}
impl IntegerType {

View File

@ -7,6 +7,7 @@ use crate::{
use core::time::Duration;
use std::{
ffi::{CStr, CString},
io::Read,
net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6},
path::{Path, PathBuf},
sync::{Mutex, RwLock},
@ -19,22 +20,52 @@ use std::{
///
/// [config]: config/index.html
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
pub fn decode_from_reader<D: Decode, C: Config, R: std::io::Read>(
pub fn decode_from_std_read<D: Decode, C: Config, R: std::io::Read>(
src: &mut R,
_config: C,
) -> Result<D, DecodeError> {
let mut decoder = DecoderImpl::<_, C>::new(src, _config);
let reader = IoReader { reader: src };
let mut decoder = DecoderImpl::<_, C>::new(reader, _config);
D::decode(&mut decoder)
}
impl<R: std::io::Read> Reader for R {
struct IoReader<R> {
reader: R,
}
impl<R> Reader for IoReader<R>
where
R: std::io::Read,
{
#[inline(always)]
fn read(&mut self, bytes: &mut [u8]) -> Result<(), DecodeError> {
match self.reader.read_exact(bytes) {
Ok(_) => Ok(()),
Err(_) => Err(DecodeError::UnexpectedEnd),
}
}
}
impl<R> Reader for std::io::BufReader<R>
where
R: std::io::Read,
{
fn read(&mut self, bytes: &mut [u8]) -> Result<(), DecodeError> {
match self.read_exact(bytes) {
Ok(_) => Ok(()),
Err(_) => Err(DecodeError::UnexpectedEnd),
}
}
#[inline]
fn peek_read(&self, n: usize) -> Option<&[u8]> {
self.buffer().get(..n)
}
#[inline]
fn consume(&mut self, n: usize) {
<Self as std::io::BufRead>::consume(self, n);
}
}
/// Encode the given value into any type that implements `std::io::Write`, e.g. `std::fs::File`, with the given `Config`.
@ -42,7 +73,7 @@ impl<R: std::io::Read> Reader for R {
///
/// [config]: config/index.html
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
pub fn encode_into_writer<E: Encode, C: Config, W: std::io::Write>(
pub fn encode_into_std_write<E: Encode, C: Config, W: std::io::Write>(
val: E,
dst: &mut W,
config: C,
@ -62,6 +93,7 @@ struct IoWriter<'a, W: std::io::Write> {
}
impl<'storage, W: std::io::Write> Writer for IoWriter<'storage, W> {
#[inline(always)]
fn write(&mut self, bytes: &[u8]) -> Result<(), EncodeError> {
self.writer
.write_all(bytes)

View File

@ -68,6 +68,8 @@ mod features;
pub(crate) mod utils;
pub(crate) mod varint;
use de::read::Reader;
use enc::write::Writer;
pub use features::*;
pub mod config;
@ -93,6 +95,21 @@ pub fn encode_into_slice<E: enc::Encode, C: Config>(
Ok(encoder.into_writer().bytes_written())
}
/// Encode the given value into a custom [Writer].
///
/// See the [config] module for more information on configurations.
///
/// [config]: config/index.html
pub fn encode_into_writer<E: enc::Encode, W: Writer, C: Config>(
val: E,
writer: W,
config: C,
) -> Result<(), error::EncodeError> {
let mut encoder = enc::EncoderImpl::<_, C>::new(writer, config);
val.encode(&mut encoder)?;
Ok(())
}
/// Attempt to decode a given type `D` from the given slice.
///
/// See the [config] module for more information on configurations.
@ -107,6 +124,19 @@ pub fn decode_from_slice<'a, D: de::BorrowDecode<'a>, C: Config>(
D::borrow_decode(&mut decoder)
}
/// Attempt to decode a given type `D` from the given [Reader].
///
/// See the [config] module for more information on configurations.
///
/// [config]: config/index.html
pub fn decode_from_reader<D: de::Decode, R: Reader, C: Config>(
reader: R,
_config: C,
) -> Result<D, error::DecodeError> {
let mut decoder = de::DecoderImpl::<_, C>::new(reader, _config);
D::decode(&mut decoder)
}
// TODO: Currently our doctests fail when trying to include the specs because the specs depend on `derive` and `alloc`.
// But we want to have the specs in the docs always
#[cfg(all(feature = "alloc", feature = "derive"))]

View File

@ -1,14 +1,22 @@
use super::{U128_BYTE, U16_BYTE, U32_BYTE, U64_BYTE};
use core::{convert::TryInto, u32};
use super::{SINGLE_BYTE_MAX, U128_BYTE, U16_BYTE, U32_BYTE, U64_BYTE};
use crate::{
config::Endian,
de::read::Reader,
error::{DecodeError, IntegerType},
};
pub fn varint_decode_u16<R: Reader>(read: &mut R, endian: Endian) -> Result<u16, DecodeError> {
let mut byte = [0u8; 1];
read.read(&mut byte)?;
match byte[0] {
#[inline(never)]
#[cold]
fn deserialize_varint_cold_u16<R>(read: &mut R, endian: Endian) -> Result<u16, DecodeError>
where
R: Reader,
{
let mut bytes = [0u8; 1];
read.read(&mut bytes)?;
match bytes[0] {
byte @ 0..=SINGLE_BYTE_MAX => Ok(byte as u16),
U16_BYTE => {
let mut bytes = [0u8; 2];
read.read(&mut bytes)?;
@ -17,26 +25,23 @@ pub fn varint_decode_u16<R: Reader>(read: &mut R, endian: Endian) -> Result<u16,
Endian::Little => u16::from_le_bytes(bytes),
})
}
U32_BYTE => Err(DecodeError::InvalidIntegerType {
expected: IntegerType::U16,
found: IntegerType::U32,
}),
U64_BYTE => Err(DecodeError::InvalidIntegerType {
expected: IntegerType::U16,
found: IntegerType::U64,
}),
U128_BYTE => Err(DecodeError::InvalidIntegerType {
expected: IntegerType::U16,
found: IntegerType::U128,
}),
x => Ok(x as u16),
U32_BYTE => invalid_varint_discriminant(IntegerType::U16, IntegerType::U32),
U64_BYTE => invalid_varint_discriminant(IntegerType::U16, IntegerType::U64),
U128_BYTE => invalid_varint_discriminant(IntegerType::U16, IntegerType::U128),
_ => invalid_varint_discriminant(IntegerType::U16, IntegerType::Reserved),
}
}
pub fn varint_decode_u32<R: Reader>(read: &mut R, endian: Endian) -> Result<u32, DecodeError> {
let mut byte = [0u8; 1];
read.read(&mut byte)?;
match byte[0] {
#[inline(never)]
#[cold]
fn deserialize_varint_cold_u32<R>(read: &mut R, endian: Endian) -> Result<u32, DecodeError>
where
R: Reader,
{
let mut bytes = [0u8; 1];
read.read(&mut bytes)?;
match bytes[0] {
byte @ 0..=SINGLE_BYTE_MAX => Ok(byte as u32),
U16_BYTE => {
let mut bytes = [0u8; 2];
read.read(&mut bytes)?;
@ -49,26 +54,26 @@ pub fn varint_decode_u32<R: Reader>(read: &mut R, endian: Endian) -> Result<u32,
let mut bytes = [0u8; 4];
read.read(&mut bytes)?;
Ok(match endian {
Endian::Big => u32::from_be_bytes(bytes),
Endian::Little => u32::from_le_bytes(bytes),
Endian::Big => u32::from_be_bytes(bytes) as u32,
Endian::Little => u32::from_le_bytes(bytes) as u32,
})
}
U64_BYTE => Err(DecodeError::InvalidIntegerType {
expected: IntegerType::U32,
found: IntegerType::U64,
}),
U128_BYTE => Err(DecodeError::InvalidIntegerType {
expected: IntegerType::U32,
found: IntegerType::U128,
}),
x => Ok(x as u32),
U64_BYTE => invalid_varint_discriminant(IntegerType::U32, IntegerType::U64),
U128_BYTE => invalid_varint_discriminant(IntegerType::U32, IntegerType::U128),
_ => invalid_varint_discriminant(IntegerType::U32, IntegerType::Reserved),
}
}
pub fn varint_decode_u64<R: Reader>(read: &mut R, endian: Endian) -> Result<u64, DecodeError> {
let mut byte = [0u8; 1];
read.read(&mut byte)?;
match byte[0] {
#[inline(never)]
#[cold]
fn deserialize_varint_cold_u64<R>(read: &mut R, endian: Endian) -> Result<u64, DecodeError>
where
R: Reader,
{
let mut bytes = [0u8; 1];
read.read(&mut bytes)?;
match bytes[0] {
byte @ 0..=SINGLE_BYTE_MAX => Ok(byte as u64),
U16_BYTE => {
let mut bytes = [0u8; 2];
read.read(&mut bytes)?;
@ -89,22 +94,25 @@ pub fn varint_decode_u64<R: Reader>(read: &mut R, endian: Endian) -> Result<u64,
let mut bytes = [0u8; 8];
read.read(&mut bytes)?;
Ok(match endian {
Endian::Big => u64::from_be_bytes(bytes),
Endian::Little => u64::from_le_bytes(bytes),
Endian::Big => u64::from_be_bytes(bytes) as u64,
Endian::Little => u64::from_le_bytes(bytes) as u64,
})
}
U128_BYTE => Err(DecodeError::InvalidIntegerType {
expected: IntegerType::U64,
found: IntegerType::U128,
}),
x => Ok(x as u64),
U128_BYTE => invalid_varint_discriminant(IntegerType::U64, IntegerType::U128),
_ => invalid_varint_discriminant(IntegerType::U64, IntegerType::Reserved),
}
}
pub fn varint_decode_usize<R: Reader>(read: &mut R, endian: Endian) -> Result<usize, DecodeError> {
let mut byte = [0u8; 1];
read.read(&mut byte)?;
match byte[0] {
#[inline(never)]
#[cold]
fn deserialize_varint_cold_usize<R>(read: &mut R, endian: Endian) -> Result<usize, DecodeError>
where
R: Reader,
{
let mut bytes = [0u8; 1];
read.read(&mut bytes)?;
match bytes[0] {
byte @ 0..=SINGLE_BYTE_MAX => Ok(byte as usize),
U16_BYTE => {
let mut bytes = [0u8; 2];
read.read(&mut bytes)?;
@ -129,18 +137,21 @@ pub fn varint_decode_usize<R: Reader>(read: &mut R, endian: Endian) -> Result<us
Endian::Little => u64::from_le_bytes(bytes) as usize,
})
}
U128_BYTE => Err(DecodeError::InvalidIntegerType {
expected: IntegerType::Usize,
found: IntegerType::U128,
}),
x => Ok(x as usize),
U128_BYTE => invalid_varint_discriminant(IntegerType::Usize, IntegerType::U128),
_ => invalid_varint_discriminant(IntegerType::Usize, IntegerType::Reserved),
}
}
pub fn varint_decode_u128<R: Reader>(read: &mut R, endian: Endian) -> Result<u128, DecodeError> {
let mut byte = [0u8; 1];
read.read(&mut byte)?;
match byte[0] {
#[inline(never)]
#[cold]
fn deserialize_varint_cold_u128<R>(read: &mut R, endian: Endian) -> Result<u128, DecodeError>
where
R: Reader,
{
let mut bytes = [0u8; 1];
read.read(&mut bytes)?;
match bytes[0] {
byte @ 0..=SINGLE_BYTE_MAX => Ok(byte as u128),
U16_BYTE => {
let mut bytes = [0u8; 2];
read.read(&mut bytes)?;
@ -173,7 +184,202 @@ pub fn varint_decode_u128<R: Reader>(read: &mut R, endian: Endian) -> Result<u12
Endian::Little => u128::from_le_bytes(bytes),
})
}
x => Ok(x as u128),
_ => invalid_varint_discriminant(IntegerType::U128, IntegerType::Reserved),
}
}
#[inline(never)]
#[cold]
fn invalid_varint_discriminant<T>(
expected: IntegerType,
found: IntegerType,
) -> Result<T, DecodeError> {
Err(DecodeError::InvalidIntegerType { expected, found })
}
pub fn varint_decode_u16<R: Reader>(read: &mut R, endian: Endian) -> Result<u16, DecodeError> {
if let Some(bytes) = read.peek_read(3) {
let (discriminant, bytes) = bytes.split_at(1);
let (out, used) = match discriminant[0] {
byte @ 0..=SINGLE_BYTE_MAX => (byte as u16, 1),
U16_BYTE => {
let val = match endian {
Endian::Big => u16::from_be_bytes(bytes[..2].try_into().unwrap()),
Endian::Little => u16::from_le_bytes(bytes[..2].try_into().unwrap()),
};
(val, 3)
}
U32_BYTE => return invalid_varint_discriminant(IntegerType::U16, IntegerType::U32),
U64_BYTE => return invalid_varint_discriminant(IntegerType::U16, IntegerType::U64),
U128_BYTE => return invalid_varint_discriminant(IntegerType::U16, IntegerType::U128),
_ => return invalid_varint_discriminant(IntegerType::U16, IntegerType::Reserved),
};
read.consume(used);
Ok(out)
} else {
deserialize_varint_cold_u16(read, endian)
}
}
pub fn varint_decode_u32<R: Reader>(read: &mut R, endian: Endian) -> Result<u32, DecodeError> {
if let Some(bytes) = read.peek_read(5) {
let (discriminant, bytes) = bytes.split_at(1);
let (out, used) = match discriminant[0] {
byte @ 0..=SINGLE_BYTE_MAX => (byte as u32, 1),
U16_BYTE => {
let val = match endian {
Endian::Big => u16::from_be_bytes(bytes[..2].try_into().unwrap()),
Endian::Little => u16::from_le_bytes(bytes[..2].try_into().unwrap()),
};
(val as u32, 3)
}
U32_BYTE => {
let val = match endian {
Endian::Big => u32::from_be_bytes(bytes[..4].try_into().unwrap()),
Endian::Little => u32::from_le_bytes(bytes[..4].try_into().unwrap()),
};
(val as u32, 5)
}
U64_BYTE => return invalid_varint_discriminant(IntegerType::U32, IntegerType::U64),
U128_BYTE => return invalid_varint_discriminant(IntegerType::U32, IntegerType::U128),
_ => return invalid_varint_discriminant(IntegerType::U32, IntegerType::Reserved),
};
read.consume(used);
Ok(out)
} else {
deserialize_varint_cold_u32(read, endian)
}
}
pub fn varint_decode_u64<R: Reader>(read: &mut R, endian: Endian) -> Result<u64, DecodeError> {
if let Some(bytes) = read.peek_read(9) {
let (discriminant, bytes) = bytes.split_at(1);
let (out, used) = match discriminant[0] {
byte @ 0..=SINGLE_BYTE_MAX => (byte as u64, 1),
U16_BYTE => {
let val = match endian {
Endian::Big => u16::from_be_bytes(bytes[..2].try_into().unwrap()),
Endian::Little => u16::from_le_bytes(bytes[..2].try_into().unwrap()),
};
(val as u64, 3)
}
U32_BYTE => {
let val = match endian {
Endian::Big => u32::from_be_bytes(bytes[..4].try_into().unwrap()),
Endian::Little => u32::from_le_bytes(bytes[..4].try_into().unwrap()),
};
(val as u64, 5)
}
U64_BYTE => {
let val = match endian {
Endian::Big => u64::from_be_bytes(bytes[..8].try_into().unwrap()),
Endian::Little => u64::from_le_bytes(bytes[..8].try_into().unwrap()),
};
(val as u64, 9)
}
U128_BYTE => return invalid_varint_discriminant(IntegerType::U32, IntegerType::U128),
_ => return invalid_varint_discriminant(IntegerType::U32, IntegerType::Reserved),
};
read.consume(used);
Ok(out)
} else {
deserialize_varint_cold_u64(read, endian)
}
}
pub fn varint_decode_usize<R: Reader>(read: &mut R, endian: Endian) -> Result<usize, DecodeError> {
if let Some(bytes) = read.peek_read(9) {
let (discriminant, bytes) = bytes.split_at(1);
let (out, used) = match discriminant[0] {
byte @ 0..=SINGLE_BYTE_MAX => (byte as usize, 1),
U16_BYTE => {
let val = match endian {
Endian::Big => u16::from_be_bytes(bytes[..2].try_into().unwrap()),
Endian::Little => u16::from_le_bytes(bytes[..2].try_into().unwrap()),
};
(val as usize, 3)
}
U32_BYTE => {
let val = match endian {
Endian::Big => u32::from_be_bytes(bytes[..4].try_into().unwrap()),
Endian::Little => u32::from_le_bytes(bytes[..4].try_into().unwrap()),
};
(val as usize, 5)
}
U64_BYTE => {
let val = match endian {
Endian::Big => u64::from_be_bytes(bytes[..8].try_into().unwrap()),
Endian::Little => u64::from_le_bytes(bytes[..8].try_into().unwrap()),
};
(val as usize, 9)
}
U128_BYTE => return invalid_varint_discriminant(IntegerType::Usize, IntegerType::U128),
_ => return invalid_varint_discriminant(IntegerType::Usize, IntegerType::Reserved),
};
read.consume(used);
Ok(out)
} else {
deserialize_varint_cold_usize(read, endian)
}
}
pub fn varint_decode_u128<R: Reader>(read: &mut R, endian: Endian) -> Result<u128, DecodeError> {
if let Some(bytes) = read.peek_read(17) {
let (discriminant, bytes) = bytes.split_at(1);
let (out, used) = match discriminant[0] {
byte @ 0..=SINGLE_BYTE_MAX => (byte as u128, 1),
U16_BYTE => {
let val = match endian {
Endian::Big => u16::from_be_bytes(bytes[..2].try_into().unwrap()),
Endian::Little => u16::from_le_bytes(bytes[..2].try_into().unwrap()),
};
(val as u128, 3)
}
U32_BYTE => {
let val = match endian {
Endian::Big => u32::from_be_bytes(bytes[..4].try_into().unwrap()),
Endian::Little => u32::from_le_bytes(bytes[..4].try_into().unwrap()),
};
(val as u128, 5)
}
U64_BYTE => {
let val = match endian {
Endian::Big => u64::from_be_bytes(bytes[..8].try_into().unwrap()),
Endian::Little => u64::from_le_bytes(bytes[..8].try_into().unwrap()),
};
(val as u128, 9)
}
U128_BYTE => {
let val = match endian {
Endian::Big => u128::from_be_bytes(bytes[..16].try_into().unwrap()),
Endian::Little => u128::from_le_bytes(bytes[..16].try_into().unwrap()),
};
(val as u128, 17)
}
_ => return invalid_varint_discriminant(IntegerType::Usize, IntegerType::Reserved),
};
read.consume(used);
Ok(out)
} else {
deserialize_varint_cold_u128(read, endian)
}
}

View File

@ -65,7 +65,7 @@ fn test_decode() {
};
let slice = [5, 10, 251, 0, 4];
let result: Test2<u32> =
bincode::decode_from_reader(&mut slice.as_ref(), Configuration::standard()).unwrap();
bincode::decode_from_std_read(&mut slice.as_ref(), Configuration::standard()).unwrap();
assert_eq!(result, start);
}

View File

@ -44,7 +44,7 @@ impl bincode::de::Decode for Foo {
#[test]
fn test_std_cursor() {
let mut cursor = Cursor::<&[u8]>::new(&[5, 10]);
let foo: Foo = bincode::decode_from_reader(&mut cursor, Configuration::standard()).unwrap();
let foo: Foo = bincode::decode_from_std_read(&mut cursor, Configuration::standard()).unwrap();
assert_eq!(foo.a, 5);
assert_eq!(foo.b, 10);
@ -55,12 +55,12 @@ fn test_std_file() {
let mut file = tempfile::tempfile().expect("Could not create temp file");
let bytes_written =
bincode::encode_into_writer(Foo { a: 30, b: 50 }, &mut file, Configuration::standard())
bincode::encode_into_std_write(Foo { a: 30, b: 50 }, &mut file, Configuration::standard())
.unwrap();
assert_eq!(bytes_written, 2);
file.seek(SeekFrom::Start(0)).unwrap();
let foo: Foo = bincode::decode_from_reader(&mut file, Configuration::standard()).unwrap();
let foo: Foo = bincode::decode_from_std_read(&mut file, Configuration::standard()).unwrap();
assert_eq!(foo.a, 30);
assert_eq!(foo.b, 50);