From 6692674f4390f77d428f2b398aee32b37ce09e3c Mon Sep 17 00:00:00 2001 From: Ty Overby Date: Thu, 20 Apr 2017 18:27:22 -0700 Subject: [PATCH] WIP Implement nocopy support for bincode (#150) * basic infrastructure * use BincodeRead internally * fix zero copy behavior * rearrange module layout; remove pub(crate) * add size-limit safety checks when deserializing str or bytes * fix-up tests --- Cargo.toml | 6 +- src/{serde/reader.rs => de/mod.rs} | 23 ++++-- src/de/read.rs | 120 ++++++++++++++++++++++++++++ src/{serde/mod.rs => internal.rs} | 19 +++-- src/lib.rs | 34 ++++---- src/{serde/writer.rs => ser/mod.rs} | 2 +- tests/test.rs | 36 ++++++--- 7 files changed, 193 insertions(+), 47 deletions(-) rename src/{serde/reader.rs => de/mod.rs} (93%) create mode 100644 src/de/read.rs rename src/{serde/mod.rs => internal.rs} (94%) rename src/{serde/writer.rs => ser/mod.rs} (99%) diff --git a/Cargo.toml b/Cargo.toml index 6b6d82a..8302789 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,8 +14,8 @@ description = "A binary serialization / deserialization strategy that uses Serde [dependencies] byteorder = "1.0.0" num-traits = "0.1.32" -serde = { git = "https://github.com/serde-rs/serde", branch = "master" } +serde = "1.*.*" [dev-dependencies] -serde_bytes = { git = "https://github.com/serde-rs/bytes", branch = "master" } -serde_derive = { git = "https://github.com/serde-rs/serde", branch = "master" } +serde_bytes = "0.10.*" +serde_derive = "1.*.*" diff --git a/src/serde/reader.rs b/src/de/mod.rs similarity index 93% rename from src/serde/reader.rs rename to src/de/mod.rs index f72a560..bbba6c4 100644 --- a/src/serde/reader.rs +++ b/src/de/mod.rs @@ -8,6 +8,9 @@ use serde_crate::de::IntoDeserializer; use serde_crate::de::Error as DeError; use ::SizeLimit; use super::{Result, Error, ErrorKind}; +use self::read::BincodeRead; + +pub mod read; const BLOCK_SIZE: usize = 65536; @@ -30,7 +33,7 @@ pub struct Deserializer { _phantom: PhantomData, } -impl Deserializer { +impl<'de, R: BincodeRead<'de>, E: ByteOrder, S: SizeLimit> Deserializer { /// Creates a new Deserializer with a given `Read`er and a size_limit. pub fn new(r: R, size_limit: S) -> Deserializer { Deserializer { @@ -91,7 +94,7 @@ macro_rules! impl_nums { } impl<'de, 'a, R, S, E> serde::Deserializer<'de> for &'a mut Deserializer -where R: Read, S: SizeLimit, E: ByteOrder { +where R: BincodeRead<'de>, S: SizeLimit, E: ByteOrder { type Error = Error; #[inline] @@ -181,7 +184,9 @@ where R: Read, S: SizeLimit, E: ByteOrder { fn deserialize_str(self, visitor: V) -> Result where V: serde::de::Visitor<'de>, { - visitor.visit_str(&try!(self.read_string())) + let len: usize = try!(serde::Deserialize::deserialize(&mut *self)); + try!(self.read_bytes(len as u64)); + self.reader.forward_read_str(len, visitor) } fn deserialize_string(self, visitor: V) -> Result @@ -193,7 +198,9 @@ where R: Read, S: SizeLimit, E: ByteOrder { fn deserialize_bytes(self, visitor: V) -> Result where V: serde::de::Visitor<'de>, { - visitor.visit_bytes(&try!(self.read_vec())) + let len: usize = try!(serde::Deserialize::deserialize(&mut *self)); + try!(self.read_bytes(len as u64)); + self.reader.forward_read_bytes(len, visitor) } fn deserialize_byte_buf(self, visitor: V) -> Result @@ -209,7 +216,7 @@ where R: Read, S: SizeLimit, E: ByteOrder { where V: serde::de::Visitor<'de>, { impl<'de, 'a, R: 'a, S, E> serde::de::EnumAccess<'de> for &'a mut Deserializer - where R: Read, S: SizeLimit, E: ByteOrder { + where R: BincodeRead<'de>, S: SizeLimit, E: ByteOrder { type Error = Error; type Variant = Self; @@ -235,7 +242,7 @@ where R: Read, S: SizeLimit, E: ByteOrder { len: usize, } - impl<'de, 'a, 'b: 'a, R: Read + 'b, S: SizeLimit, E: ByteOrder> serde::de::SeqAccess<'de> for Access<'a, R, S, E> { + impl<'de, 'a, 'b: 'a, R: BincodeRead<'de>+ 'b, S: SizeLimit, E: ByteOrder> serde::de::SeqAccess<'de> for Access<'a, R, S, E> { type Error = Error; fn next_element_seed(&mut self, seed: T) -> Result> @@ -288,7 +295,7 @@ where R: Read, S: SizeLimit, E: ByteOrder { len: usize, } - impl<'de, 'a, 'b: 'a, R: Read + 'b, S: SizeLimit, E: ByteOrder> serde::de::MapAccess<'de> for Access<'a, R, S, E> { + impl<'de, 'a, 'b: 'a, R: BincodeRead<'de> + 'b, S: SizeLimit, E: ByteOrder> serde::de::MapAccess<'de> for Access<'a, R, S, E> { type Error = Error; fn next_key_seed(&mut self, seed: K) -> Result> @@ -372,7 +379,7 @@ where R: Read, S: SizeLimit, E: ByteOrder { } impl<'de, 'a, R, S, E> serde::de::VariantAccess<'de> for &'a mut Deserializer -where R: Read, S: SizeLimit, E: ByteOrder { +where R: BincodeRead<'de>, S: SizeLimit, E: ByteOrder { type Error = Error; fn unit_variant(self) -> Result<()> { diff --git a/src/de/read.rs b/src/de/read.rs new file mode 100644 index 0000000..68ee1c5 --- /dev/null +++ b/src/de/read.rs @@ -0,0 +1,120 @@ +use std::io::{Read as IoRead, Result as IoResult, Error as IoError, ErrorKind as IoErrorKind}; +use ::Result; +use serde_crate as serde; + +/// A byte-oriented reading trait that is specialized for +/// slices and generic readers. +pub trait BincodeRead<'storage>: IoRead { + #[doc(hidden)] + fn forward_read_str(&mut self, length: usize, visitor: V) -> Result + where V: serde::de::Visitor<'storage>; + + #[doc(hidden)] + fn forward_read_bytes>(&mut self, length: usize, visitor: V) -> Result + where V: serde::de::Visitor<'storage>; +} + +/// A BincodeRead implementation for byte slices +pub struct SliceReader<'storage> { + slice: &'storage [u8] +} + +/// A BincodeRead implementation for io::Readers +pub struct IoReadReader { + reader: R, + temp_buffer: Vec, +} + +impl <'storage> SliceReader<'storage> { + /// Constructs a slice reader + pub fn new(bytes: &'storage [u8]) -> SliceReader<'storage> { + SliceReader { + slice: bytes, + } + } +} + +impl IoReadReader { + /// Constructs an IoReadReader + pub fn new(r: R) -> IoReadReader { + IoReadReader { + reader: r, + temp_buffer: vec![], + } + } +} + +impl <'storage> IoRead for SliceReader<'storage> { + fn read(&mut self, out: & mut [u8]) -> IoResult { + (&mut self.slice).read(out) + } +} + +impl IoRead for IoReadReader { + fn read(&mut self, out: & mut [u8]) -> IoResult { + self.reader.read(out) + } +} + +impl <'storage> BincodeRead<'storage> for SliceReader<'storage> { + fn forward_read_str>(&mut self, length: usize, visitor: V) -> Result { + use ::ErrorKind; + if length > self.slice.len() { + return Err(Box::new(ErrorKind::IoError(IoError::new(IoErrorKind::UnexpectedEof, "")))); + } + + let string = match ::std::str::from_utf8(&self.slice[..length]) { + Ok(s) => s, + Err(_) => return Err(Box::new(ErrorKind::InvalidEncoding { + desc: "string was not valid utf8", + detail: None, + })), + }; + let r = visitor.visit_borrowed_str(string); + self.slice = &self.slice[length..]; + r + } + fn forward_read_bytes>(&mut self, length: usize, visitor: V) -> Result { + use ::ErrorKind; + if length > self.slice.len() { + return Err(Box::new(ErrorKind::IoError(IoError::new(IoErrorKind::UnexpectedEof, "")))); + } + + let r = visitor.visit_borrowed_bytes(&self.slice[..length]); + self.slice = &self.slice[length..]; + r + } +} + +impl BincodeRead<'static> for IoReadReader where R: IoRead { + fn forward_read_str>(&mut self, length: usize, visitor: V) -> Result { + use ::ErrorKind; + let current_length = self.temp_buffer.len(); + if length > current_length{ + self.temp_buffer.reserve_exact(length - current_length); + } + + self.reader.read_exact(&mut self.temp_buffer[..length])?; + let string = match ::std::str::from_utf8(&self.temp_buffer[..length]) { + Ok(s) => s, + Err(_) => return Err(Box::new(ErrorKind::InvalidEncoding { + desc: "string was not valid utf8", + detail: None, + })), + }; + + let r = visitor.visit_str(string); + r + } + fn forward_read_bytes>(&mut self, length: usize, visitor: V) -> Result { + let current_length = self.temp_buffer.len(); + if length > current_length{ + self.temp_buffer.reserve_exact(length - current_length); + } + + self.reader.read_exact(&mut self.temp_buffer[..length])?; + + let r = visitor.visit_bytes(&self.temp_buffer[..length]); + r + } +} diff --git a/src/serde/mod.rs b/src/internal.rs similarity index 94% rename from src/serde/mod.rs rename to src/internal.rs index f9988f3..d8aa74e 100644 --- a/src/serde/mod.rs +++ b/src/internal.rs @@ -8,21 +8,18 @@ use std::{error, fmt, result}; use ::SizeLimit; use byteorder::{ByteOrder}; -pub use self::reader::{ +pub use super::de::{ Deserializer, }; -pub use self::writer::{ +pub use super::ser::{ Serializer, }; -use self::writer::SizeChecker; +use super::ser::SizeChecker; use serde_crate as serde; -mod reader; -mod writer; - /// The result of a serialization or deserialization operation. pub type Result = result::Result; @@ -225,6 +222,7 @@ pub fn serialized_size_bounded(value: &T, max: u64) -> Option pub fn deserialize_from(reader: &mut R, size_limit: S) -> Result where R: Read, T: serde::de::DeserializeOwned, S: SizeLimit, E: ByteOrder { + let reader = ::de::read::IoReadReader::new(reader); let mut deserializer = Deserializer::<_, S, E>::new(reader, size_limit); serde::Deserialize::deserialize(&mut deserializer) } @@ -233,9 +231,10 @@ pub fn deserialize_from(reader: &mut R, size_limit: S) -> Re /// /// This method does not have a size-limit because if you already have the bytes /// in memory, then you don't gain anything by having a limiter. -pub fn deserialize(bytes: &[u8]) -> Result - where T: serde::de::DeserializeOwned, +pub fn deserialize<'a, T, E: ByteOrder>(bytes: &'a [u8]) -> Result + where T: serde::de::Deserialize<'a>, { - let mut reader = bytes; - deserialize_from::<_, _, _, E>(&mut reader, super::Infinite) + let reader = ::de::read::SliceReader::new(bytes); + let mut deserializer = Deserializer::<_, _, E>::new(reader, super::Infinite); + serde::Deserialize::deserialize(&mut deserializer) } diff --git a/src/lib.rs b/src/lib.rs index 4674671..f4d7681 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,30 +40,32 @@ extern crate byteorder; extern crate num_traits; extern crate serde as serde_crate; -mod serde; +mod ser; +mod de; +pub mod internal; -/// All of the core bincode functions and types with the ability to choose endianness. -pub mod endian_choice { - pub use super::serde::{Deserializer, Serializer, serialize, serialize_into, deserialize, deserialize_from}; +pub mod read_types { + //! The types that the deserializer uses for optimizations + pub use ::de::read::{SliceReader, BincodeRead, IoReadReader}; } use std::io::{Read, Write}; -pub use serde::{ErrorKind, Error, Result, serialized_size, serialized_size_bounded}; +pub use internal::{ErrorKind, Error, Result, serialized_size, serialized_size_bounded}; /// A Deserializer that uses LittleEndian byteorder -pub type Deserializer = serde::Deserializer; +pub type Deserializer = internal::Deserializer; /// A Serializer that uses LittleEndian byteorder -pub type Serializer = serde::Serializer; +pub type Serializer = internal::Serializer; /// Deserializes a slice of bytes into an object. /// /// This method does not have a size-limit because if you already have the bytes /// in memory, then you don't gain anything by having a limiter. -pub fn deserialize(bytes: &[u8]) -> serde::Result - where T: serde_crate::de::DeserializeOwned, +pub fn deserialize<'a, T>(bytes: &'a [u8]) -> internal::Result + where T: serde_crate::de::Deserialize<'a>, { - serde::deserialize::<_, byteorder::LittleEndian>(bytes) + internal::deserialize::<_, byteorder::LittleEndian>(bytes) } /// Deserializes an object directly from a `Buffer`ed Reader. @@ -75,10 +77,10 @@ pub fn deserialize(bytes: &[u8]) -> serde::Result /// If this returns an `Error`, assume that the buffer that you passed /// in is in an invalid state, as the error could be returned during any point /// in the reading. -pub fn deserialize_from(reader: &mut R, size_limit: S) -> serde::Result +pub fn deserialize_from(reader: &mut R, size_limit: S) -> internal::Result where R: Read, T: serde_crate::de::DeserializeOwned, S: SizeLimit { - serde::deserialize_from::<_, _, _, byteorder::LittleEndian>(reader, size_limit) + internal::deserialize_from::<_, _, _, byteorder::LittleEndian>(reader, size_limit) } /// Serializes an object directly into a `Writer`. @@ -89,20 +91,20 @@ pub fn deserialize_from(reader: &mut R, size_limit: S) -> serde /// If this returns an `Error` (other than SizeLimit), assume that the /// writer is in an invalid state, as writing could bail out in the middle of /// serializing. -pub fn serialize_into(writer: &mut W, value: &T, size_limit: S) -> serde::Result<()> +pub fn serialize_into(writer: &mut W, value: &T, size_limit: S) -> internal::Result<()> where W: Write, T: serde_crate::Serialize, S: SizeLimit { - serde::serialize_into::<_, _, _, byteorder::LittleEndian>(writer, value, size_limit) + internal::serialize_into::<_, _, _, byteorder::LittleEndian>(writer, value, size_limit) } /// Serializes a serializable object into a `Vec` of bytes. /// /// If the serialization would take more bytes than allowed by `size_limit`, /// an error is returned. -pub fn serialize(value: &T, size_limit: S) -> serde::Result> +pub fn serialize(value: &T, size_limit: S) -> internal::Result> where T: serde_crate::Serialize, S: SizeLimit { - serde::serialize::<_, _, byteorder::LittleEndian>(value, size_limit) + internal::serialize::<_, _, byteorder::LittleEndian>(value, size_limit) } /// A limit on the amount of bytes that can be read or written. diff --git a/src/serde/writer.rs b/src/ser/mod.rs similarity index 99% rename from src/serde/writer.rs rename to src/ser/mod.rs index 35ed0f6..9082afa 100644 --- a/src/serde/writer.rs +++ b/src/ser/mod.rs @@ -7,7 +7,7 @@ use serde_crate as serde; use byteorder::{WriteBytesExt, ByteOrder}; use super::{Result, Error, ErrorKind}; -use super::super::SizeLimit; +use super::SizeLimit; /// An Serializer that encodes values directly into a Writer. /// diff --git a/tests/test.rs b/tests/test.rs index f2739ee..db0031b 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -12,7 +12,7 @@ use std::borrow::Cow; use bincode::{Infinite, Bounded}; use bincode::{serialized_size, ErrorKind, Result}; -use bincode::endian_choice::{serialize, deserialize}; +use bincode::internal::{serialize, deserialize, deserialize_from}; use bincode::serialize as serialize_little; use bincode::deserialize as deserialize_little; @@ -22,23 +22,22 @@ fn the_same(element: V) where V: serde::Serialize+serde::de::DeserializeOwned+PartialEq+Debug+'static { let size = serialized_size(&element); + { - let encoded = serialize_little(&element, Infinite); - let encoded = encoded.unwrap(); - let decoded = deserialize_little(&encoded[..]); - let decoded = decoded.unwrap(); + let encoded = serialize_little(&element, Infinite).unwrap(); + let decoded = deserialize_little(&encoded[..]).unwrap(); assert_eq!(element, decoded); assert_eq!(size, encoded.len() as u64); } { - let encoded = serialize::<_, _, byteorder::BigEndian>(&element, Infinite); - let encoded = encoded.unwrap(); - let decoded = deserialize::<_, byteorder::BigEndian>(&encoded[..]); - let decoded = decoded.unwrap(); + let encoded = serialize::<_, _, byteorder::BigEndian>(&element, Infinite).unwrap(); + let decoded = deserialize::<_, byteorder::BigEndian>(&encoded[..]).unwrap(); + let decoded_reader = deserialize_from::<_, _, _, byteorder::BigEndian>(&mut &encoded[..], Infinite).unwrap(); assert_eq!(element, decoded); + assert_eq!(element, decoded_reader); assert_eq!(size, encoded.len() as u64); } } @@ -399,3 +398,22 @@ fn endian_difference() { let big = serialize::<_, _, byteorder::BigEndian>(&x, Infinite).unwrap(); assert_ne!(little, big); } + +#[test] +fn test_zero_copy_parse() { + #[derive(Serialize, Deserialize, Eq, PartialEq, Debug)] + struct Foo<'a> { + borrowed_str: &'a str, + borrowed_bytes: &'a [u8], + } + + let f = Foo { + borrowed_str: "hi", + borrowed_bytes: &[0, 1, 2, 3], + }; + { + let encoded = serialize_little(&f, Infinite).unwrap(); + let out: Foo = deserialize_little(&encoded[..]).unwrap(); + assert_eq!(out, f); + } +}