Merge branch 'master' of github.com:TyOverby/bincode

2015-01-16 13:21:46 -08:00 · 2015-01-16 13:21:46 -08:00 · aeaa4aafd4
parent 89a6b007b1 eb933c9f89
commit aeaa4aafd4
6 changed files with 171 additions and 41 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "bincode"
-version = "0.0.5"
+version = "0.0.6"
 authors = ["Ty Overby <ty@pre-alpha.com>", "Francesco Mazzoli <f@mazzo.li>"]

 repository = "https://github.com/TyOverby/bincode"
--- a/examples/basic.rs
+++ b/examples/basic.rs
@ -24,7 +24,7 @@ fn main() {
    // 8 bytes for the length of the vector, 4 bytes per float.
    assert_eq!(encoded.len(), 8 + 4 * 4);

-    let decoded: World = bincode::decode(encoded.as_slice()).unwrap();
+    let decoded: World = bincode::decode(&encoded[]).unwrap();

    assert!(world == decoded);
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -17,10 +17,76 @@ mod writer;
 mod reader;
 #[cfg(test)] mod test;

-#[derive(Clone, Copy)]
+///! `bincode` is a crate for encoding and decoding using a tiny binary
+///! serialization strategy.
+///!
+///! There are simple functions for encoding to `Vec<u8>` and decoding from
+///! `&[u8]`, but the meat of the library is the `encode_into` and `decode_from`
+///! functions which respectively allow encoding into a `std::io::Writer`
+///! and decoding from a `std::io::Buffer`.
+///!
+///! ### Using Basic Functions
+///!
+///! ```rust
+///! extern crate bincode;
+///! fn main() {
+///!     // The object that we will serialize.
+///!     let target = Some("hello world".to_string());
+///!     // The maximum size of the encoded message.
+///!     let limit = bincode::SizeLimit::Bounded(20);
+///!
+///!     let encoded: Vec<u8>        = bincode::encode(&target, limit).unwrap();
+///!     let decoded: Option<String> = bincode::decode(&encoded[]).unwrap();
+///!     assert_eq!(target, decoded);
+///! }
+///! ```
+///!
+///! ### Using Into/From Functions
+///!
+///! ```rust
+///! extern crate bincode;
+///! use std::io::pipe::PipeStream;
+///! use std::io::BufferedReader;
+///! fn main() {
+///!     // The pipes that we will be using to send values across.
+///!     let streams = PipeStream::pair().unwrap();
+///!     let (mut reader, mut writer) = (BufferedReader::new(streams.reader),
+///!                                     streams.writer);
+///!     // The object that we will send across.
+///!     let target = Some(5u32);
+///!     // The max-size of the encoded bytes.
+///!     let limit = bincode::SizeLimit::Bounded(10);
+///!
+///!     // Do the actual encoding and decoding.
+///!     bincode::encode_into(&target, &mut writer, limit);
+///!     let out: Option<u32> = bincode::decode_from(&mut reader, limit).unwrap();
+///!     assert_eq!(target, out);
+///! }
+///! ```
+///!
+
+/// A limit on the size of bytes to be read or written.
+///
+/// Size limits are an incredibly important part of both encoding and decoding.
+///
+/// In order to prevent DOS attacks on a decoder, it is important to limit the
+/// amount of bytes that a single encoded message can be; otherwise, if you
+/// are decoding bytes right off of a TCP stream for example, it would be
+/// possible for an attacker to flood your server with a 3TB vec, causing the
+/// decoder to run out of memory and crash your application!
+/// Because of this, you can provide a maximum-number-of-bytes that can be read
+/// during decoding, and the decoder will explicitly fail if it has to read
+/// any more than that.
+///
+/// On the other side, you want to make sure that you aren't encoding a message
+/// that is larger than your decoder expects.  By supplying a size limit to an
+/// encoding function, the encoder will verify that the structure can be encoded
+/// within that limit.  This verification occurs before any bytes are written to
+/// the Writer, so recovering from an the error is possible.
+#[derive(Clone, Copy, Show, Hash, Eq, PartialEq, Ord, PartialOrd)]
 pub enum SizeLimit {
    Infinite,
-    UpperBound(u64)
+    Bounded(u64)
 }

 /// Encodes an encodable object into a `Vec` of bytes.
@ -36,6 +102,9 @@ pub fn encode<T: Encodable>(t: &T, size_limit: SizeLimit) -> EncodingResult<Vec<
 }

 /// Decodes a slice of bytes into an object.
+///
+/// This method does not have a size-limit because if you already have the bytes
+/// in memory, then you don't gain anything by having a limiter.
 pub fn decode<T: Decodable>(b: &[u8]) -> DecodingResult<T> {
    let mut b = b;
    decode_from(&mut b, SizeLimit::Infinite)
@ -45,10 +114,14 @@ pub fn decode<T: Decodable>(b: &[u8]) -> DecodingResult<T> {
 ///
 /// If the encoding would take more bytes than allowed by `size_limit`, an error
 /// is returned and *no bytes* will be written into the `Writer`.
+///
+/// If this returns an `EncodingError` (other than SizeLimit), assume that the
+/// writer is in an invalid state, as writing could bail out in the middle of
+/// encoding.
 pub fn encode_into<T: Encodable, W: Writer>(t: &T, w: &mut W, size_limit: SizeLimit) -> EncodingResult<()> {
    try!(match size_limit {
        SizeLimit::Infinite => Ok(()),
-        SizeLimit::UpperBound(x) => {
+        SizeLimit::Bounded(x) => {
            let mut size_checker = SizeChecker::new(x);
            t.encode(&mut size_checker)
        }
@ -57,11 +130,15 @@ pub fn encode_into<T: Encodable, W: Writer>(t: &T, w: &mut W, size_limit: SizeLi
    t.encode(&mut writer::EncoderWriter::new(w, size_limit))
 }

-/// Decoes an object directly from a Buffered Reader.
+/// Decoes an object directly from a `Buffer`ed Reader.
 ///
 /// If the provided `SizeLimit` is reached, the decode will bail immediately.
 /// A SizeLimit can help prevent an attacker from flooding your server with
 /// a neverending stream of values that runs your server out of memory.
+///
+/// If this returns an `DecodingError`, assume that the buffer that you passed
+/// in is in an invalid state, as the error could be returned during any point
+/// in the reading.
 pub fn decode_from<R: Buffer, T: Decodable>(r: &mut R, size_limit: SizeLimit) ->
 DecodingResult<T> {
    Decodable::decode(&mut reader::DecoderReader::new(r, size_limit))
--- a/src/reader.rs
+++ b/src/reader.rs
@ -1,24 +1,61 @@
 use std::io::{Buffer, Reader, IoError};
 use std::num::{cast, NumCast};
 use std::error::{Error, FromError};
+use std::fmt;

 use rustc_serialize::Decoder;

 use super::SizeLimit;

-#[derive(PartialEq, Clone, Show)]
-pub struct InvalidBytes {
+#[derive(Eq, PartialEq, Clone, Show)]
+pub struct InvalidEncoding {
    desc: &'static str,
    detail: Option<String>,
 }

-#[derive(PartialEq, Clone, Show)]
+impl fmt::String for InvalidEncoding {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            InvalidEncoding { detail: None, desc } =>
+                write!(fmt, "{}", desc),
+            InvalidEncoding { detail: Some(ref detail), desc } =>
+                write!(fmt, "{} ({})", desc, detail)
+        }
+    }
+}
+
+/// An error that can be produced during decoding.
+///
+/// If decoding from a Buffer, assume that the buffer has been left
+/// in an invalid state.
+#[derive(Eq, PartialEq, Clone, Show)]
 pub enum DecodingError {
+    /// If the error stems from the reader that is being used
+    /// during decoding, that error will be stored and returned here.
    IoError(IoError),
-    InvalidBytes(InvalidBytes),
+    /// If the bytes in the reader are not decodable because of an invalid
+    /// encoding, this error will be returned.  This error is only possible
+    /// if a stream is corrupted.  A stream produced from `encode` or `encode_into`
+    /// should **never** produce an InvalidEncoding error.
+    InvalidEncoding(InvalidEncoding),
+    /// If decoding a message takes more than the provided size limit, this
+    /// error is returned.
    SizeLimit
 }

+impl fmt::String for DecodingError {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            DecodingError::IoError(ref ioerr) =>
+                write!(fmt, "IoError: {}", ioerr),
+            DecodingError::InvalidEncoding(ref ib) =>
+                write!(fmt, "InvalidEncoding: {}", ib),
+            DecodingError::SizeLimit =>
+                write!(fmt, "SizeLimit")
+        }
+    }
+}
+
 pub type DecodingResult<T> = Result<T, DecodingError>;

 fn wrap_io(err: IoError) -> DecodingError {
@ -29,7 +66,7 @@ impl Error for DecodingError {
    fn description(&self) -> &str {
        match *self {
            DecodingError::IoError(ref err)     => err.description(),
-            DecodingError::InvalidBytes(ref ib) => ib.desc,
+            DecodingError::InvalidEncoding(ref ib) => ib.desc,
            DecodingError::SizeLimit => "the size limit for decoding has been reached"
        }
    }
@ -37,7 +74,7 @@ impl Error for DecodingError {
    fn detail(&self) -> Option<String> {
        match *self {
            DecodingError::IoError(ref err)     => err.detail(),
-            DecodingError::InvalidBytes(ref ib) => ib.detail.clone(),
+            DecodingError::InvalidEncoding(ref ib) => ib.detail.clone(),
            DecodingError::SizeLimit => None
        }
    }
@ -49,13 +86,17 @@ impl FromError<IoError> for DecodingError {
    }
 }

+/// A Decoder that reads bytes from a buffer.
+///
+/// This struct should rarely be used.
+/// In most cases, prefer the `decode_from` function.
 pub struct DecoderReader<'a, R: 'a> {
    reader: &'a mut R,
    size_limit: SizeLimit,
    read: u64
 }

-impl<'a, R: Reader+Buffer> DecoderReader<'a, R> {
+impl<'a, R: Buffer> DecoderReader<'a, R> {
    pub fn new(r: &'a mut R, size_limit: SizeLimit) -> DecoderReader<'a, R> {
        DecoderReader {
            reader: r,
@ -71,8 +112,8 @@ impl <'a, A> DecoderReader<'a, A> {
        self.read += cast(count).unwrap();
        match self.size_limit {
            SizeLimit::Infinite => Ok(()),
-            SizeLimit::UpperBound(x) if self.read <= x => Ok(()),
-            SizeLimit::UpperBound(_) => Err(DecodingError::SizeLimit)
+            SizeLimit::Bounded(x) if self.read <= x => Ok(()),
+            SizeLimit::Bounded(_) => Err(DecodingError::SizeLimit)
        }
    }

@ -82,7 +123,7 @@ impl <'a, A> DecoderReader<'a, A> {
    }
 }

-impl<'a, R: Reader+Buffer> Decoder for DecoderReader<'a, R> {
+impl<'a, R: Buffer> Decoder for DecoderReader<'a, R> {
    type Error = DecodingError;

    fn read_nil(&mut self) -> DecodingResult<()> {
@ -131,7 +172,7 @@ impl<'a, R: Reader+Buffer> Decoder for DecoderReader<'a, R> {
        match x {
            1 => Ok(true),
            0 => Ok(false),
-            _ => Err(DecodingError::InvalidBytes(InvalidBytes{
+            _ => Err(DecodingError::InvalidEncoding(InvalidEncoding{
                desc: "invalid u8 when decoding bool",
                detail: Some(format!("Expected 0 or 1, got {}", x))
            })),
@ -158,7 +199,7 @@ impl<'a, R: Reader+Buffer> Decoder for DecoderReader<'a, R> {
        let vector = try!(self.reader.read_exact(len));
        match String::from_utf8(vector) {
            Ok(s) => Ok(s),
-            Err(err) => Err(DecodingError::InvalidBytes(InvalidBytes {
+            Err(err) => Err(DecodingError::InvalidEncoding(InvalidEncoding {
                desc: "error while decoding utf8 string",
                detail: Some(format!("Decoding error: {}", err))
            })),
@ -173,7 +214,7 @@ impl<'a, R: Reader+Buffer> Decoder for DecoderReader<'a, R> {
            let id = try!(self.read_u32());
            let id = id as usize;
            if id >= names.len() {
-                Err(DecodingError::InvalidBytes(InvalidBytes {
+                Err(DecodingError::InvalidEncoding(InvalidEncoding {
                    desc: "out of bounds tag when reading enum variant",
                    detail: Some(format!("Expected tag < {}, got {}", names.len(), id))
                }))
@ -231,7 +272,7 @@ impl<'a, R: Reader+Buffer> Decoder for DecoderReader<'a, R> {
            match x {
                1 => f(self, true),
                0 => f(self, false),
-                _ => Err(DecodingError::InvalidBytes(InvalidBytes {
+                _ => Err(DecodingError::InvalidEncoding(InvalidEncoding {
                    desc: "invalid tag when decoding Option",
                    detail: Some(format!("Expected 0 or 1, got {}", x))
                })),
@ -260,7 +301,7 @@ impl<'a, R: Reader+Buffer> Decoder for DecoderReader<'a, R> {
            f(self)
        }
    fn error(&mut self, err: &str) -> DecodingError {
-        DecodingError::InvalidBytes(InvalidBytes {
+        DecodingError::InvalidEncoding(InvalidEncoding {
            desc: "user-induced error",
            detail: Some(err.to_string()),
        })
--- a/src/test.rs
+++ b/src/test.rs
@ -18,7 +18,7 @@ use super::{
    DecodingError,
    DecodingResult
 };
-use super::SizeLimit::{Infinite, UpperBound};
+use super::SizeLimit::{Infinite, Bounded};

 fn the_same<'a, V>(element: V) where V: Encodable, V: Decodable, V: PartialEq, V: Show {
    assert!(element == decode(encode(&element, Infinite).unwrap().as_slice()).unwrap());
@ -169,39 +169,40 @@ fn unicode() {
    the_same("aåååååååa".to_string());
 }

-fn is_invalid_bytes<T>(res: DecodingResult<T>) {
-    match res {
-        Ok(_) => panic!("Expecting error"),
-        Err(DecodingError::IoError(_)) => panic!("Expecting InvalidBytes"),
-        Err(DecodingError::SizeLimit) => panic!("Expecting InvalidBytes"),
-        Err(DecodingError::InvalidBytes(_)) => {},
-    }
-}

 #[test]
 fn decoding_errors() {
-    is_invalid_bytes(decode::<bool>(vec![0xA].as_slice()));
-    is_invalid_bytes(decode::<String>(vec![0, 0, 0, 0, 0, 0, 0, 1, 0xFF].as_slice()));
+    fn is_invalid_encoding<T>(res: DecodingResult<T>) {
+        match res {
+            Ok(_) => panic!("Expecting error"),
+            Err(DecodingError::IoError(_)) => panic!("Expecting InvalidEncoding"),
+            Err(DecodingError::SizeLimit) => panic!("Expecting InvalidEncoding"),
+            Err(DecodingError::InvalidEncoding(_)) => {},
+        }
+    }
+
+    is_invalid_encoding(decode::<bool>(vec![0xA].as_slice()));
+    is_invalid_encoding(decode::<String>(vec![0, 0, 0, 0, 0, 0, 0, 1, 0xFF].as_slice()));
    // Out-of-bounds variant
    #[derive(RustcEncodable, RustcDecodable)]
    enum Test {
        One,
        Two,
    };
-    is_invalid_bytes(decode::<Test>(vec![0, 0, 0, 5].as_slice()));
-    is_invalid_bytes(decode::<Option<u8>>(vec![5, 0].as_slice()));
+    is_invalid_encoding(decode::<Test>(vec![0, 0, 0, 5].as_slice()));
+    is_invalid_encoding(decode::<Option<u8>>(vec![5, 0].as_slice()));
 }

 #[test]
 fn too_big_decode() {
    let encoded = vec![0,0,0,3];
    let mut encoded_ref = encoded.as_slice();
-    let decoded: Result<u32, _> = decode_from(&mut encoded_ref, UpperBound(3));
+    let decoded: Result<u32, _> = decode_from(&mut encoded_ref, Bounded(3));
    assert!(decoded.is_err());

    let encoded = vec![0,0,0,3];
    let mut encoded_ref = encoded.as_slice();
-    let decoded: Result<u32, _> = decode_from(&mut encoded_ref, UpperBound(4));
+    let decoded: Result<u32, _> = decode_from(&mut encoded_ref, Bounded(4));
    assert!(decoded.is_ok());
 }

@ -209,17 +210,17 @@ fn too_big_decode() {
 fn too_big_char_decode() {
    let encoded = vec![0x41];
    let mut encoded_ref = encoded.as_slice();
-    let decoded: Result<char, _> = decode_from(&mut encoded_ref, UpperBound(1));
+    let decoded: Result<char, _> = decode_from(&mut encoded_ref, Bounded(1));
    assert_eq!(decoded, Ok('A'));
 }

 #[test]
 fn too_big_encode() {
-    assert!(encode(&0u32, UpperBound(3)).is_err());
-    assert!(encode(&0u32, UpperBound(4)).is_ok());
+    assert!(encode(&0u32, Bounded(3)).is_err());
+    assert!(encode(&0u32, Bounded(4)).is_ok());

-    assert!(encode(&"abcde", UpperBound(4)).is_err());
-    assert!(encode(&"abcde", UpperBound(5)).is_ok());
+    assert!(encode(&"abcde", Bounded(4)).is_err());
+    assert!(encode(&"abcde", Bounded(5)).is_ok());
 }

 #[test]
--- a/src/writer.rs
+++ b/src/writer.rs
@ -8,12 +8,23 @@ use super::SizeLimit;

 pub type EncodingResult<T> = Result<T, EncodingError>;

+
+/// An error that can be produced during encoding.
 #[derive(Show)]
 pub enum EncodingError {
+    /// An error originating from the underlying `Writer`.
    IoError(IoError),
+    /// An object could not be encoded with the given size limit.
+    ///
+    /// This error is returned before any bytes are written to the
+    /// output `Writer`.
    SizeLimit
 }

+/// An Encoder that encodes values directly into a Writer.
+///
+/// This struct should not be used often.
+/// For most cases, prefer the `encode_into` function.
 pub struct EncoderWriter<'a, W: 'a> {
    writer: &'a mut W,
    _size_limit: SizeLimit