From 873815ab0aa8382f5982eb2f5c06c65e1d89cc2b Mon Sep 17 00:00:00 2001 From: Ty Overby Date: Mon, 16 Feb 2015 11:33:21 -0800 Subject: [PATCH] prepare for newio --- Cargo.toml | 1 + src/lib.rs | 63 +++++++++++++------------ src/reader.rs | 125 ++++++++++++++++++++++++++++++++++++++++++++++---- src/writer.rs | 15 +++--- 4 files changed, 154 insertions(+), 50 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2c03135..a56f7f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,4 @@ rustc-serialize = "0.2.10" [dependencies.byteorder] git = "https://github.com/BurntSushi/byteorder.git" +branch = "newio" diff --git a/src/lib.rs b/src/lib.rs index e6e8499..6d2c716 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,18 +2,21 @@ #![crate_type = "rlib"] #![crate_type = "dylib"] -#![allow(unstable)] +#![feature(hash, core, io, unicode, collections)] extern crate "rustc-serialize" as rustc_serialize; extern crate byteorder; +extern crate unicode; -use std::old_io::{Buffer, MemWriter}; +//use std::old_io::{Buffer, MemWriter}; use rustc_serialize::{Encodable, Decodable}; pub use writer::{EncoderWriter, EncodingResult, EncodingError}; pub use reader::{DecoderReader, DecodingResult, DecodingError}; use writer::SizeChecker; +use std::io::{Write, BufRead}; + mod writer; mod reader; #[cfg(test)] mod test; @@ -43,30 +46,6 @@ mod reader; ///! } ///! ``` ///! -///! ### Using Into/From Functions -///! -///! ```rust -///! #![allow(unstable)] -///! extern crate bincode; -///! use std::old_io::pipe::PipeStream; -///! use std::old_io::BufferedReader; -///! fn main() { -///! // The pipes that we will be using to send values across. -///! let streams = PipeStream::pair().unwrap(); -///! let (mut reader, mut writer) = (BufferedReader::new(streams.reader), -///! streams.writer); -///! // The object that we will send across. -///! let target = Some(5u32); -///! // The max-size of the encoded bytes. -///! let limit = bincode::SizeLimit::Bounded(10); -///! -///! // Do the actual encoding and decoding. -///! bincode::encode_into(&target, &mut writer, limit).ok(); -///! let out: Option = bincode::decode_from(&mut reader, limit).unwrap(); -///! assert_eq!(target, out); -///! } -///! ``` -///! /// A limit on the size of bytes to be read or written. /// @@ -97,9 +76,19 @@ pub enum SizeLimit { /// If the encoding would take more bytes than allowed by `size_limit`, /// an error is returned. pub fn encode(t: &T, size_limit: SizeLimit) -> EncodingResult> { - let mut w = MemWriter::new(); - match encode_into(t, &mut w, size_limit) { - Ok(()) => Ok(w.into_inner()), + // Since we are putting values directly into a vector, we can do size + // computation out here and pre-allocate a buffer of *exactly* + // the right size. + let mut w = if let SizeLimit::Bounded(l) = size_limit { + let actual_size = encoded_size_bounded(t, l); + let actual_size = try!(actual_size.ok_or(EncodingError::SizeLimit)); + Vec::with_capacity(actual_size as usize) + } else { + vec![] + }; + + match encode_into(t, &mut w, SizeLimit::Infinite) { + Ok(()) => Ok(w), Err(e) => Err(e) } } @@ -121,7 +110,7 @@ pub fn decode(b: &[u8]) -> DecodingResult { /// If this returns an `EncodingError` (other than SizeLimit), assume that the /// writer is in an invalid state, as writing could bail out in the middle of /// encoding. -pub fn encode_into(t: &T, w: &mut W, size_limit: SizeLimit) -> EncodingResult<()> { +pub fn encode_into(t: &T, w: &mut W, size_limit: SizeLimit) -> EncodingResult<()> { try!(match size_limit { SizeLimit::Infinite => Ok(()), SizeLimit::Bounded(x) => { @@ -130,7 +119,7 @@ pub fn encode_into(t: &T, w: &mut W, size_limit: SizeLi } }); - t.encode(&mut writer::EncoderWriter::new(w, size_limit)) + t.encode(&mut writer::EncoderWriter::new(w)) } /// Decoes an object directly from a `Buffer`ed Reader. @@ -142,7 +131,7 @@ pub fn encode_into(t: &T, w: &mut W, size_limit: SizeLi /// If this returns an `DecodingError`, assume that the buffer that you passed /// in is in an invalid state, as the error could be returned during any point /// in the reading. -pub fn decode_from(r: &mut R, size_limit: SizeLimit) -> +pub fn decode_from(r: &mut R, size_limit: SizeLimit) -> DecodingResult { Decodable::decode(&mut reader::DecoderReader::new(r, size_limit)) } @@ -158,3 +147,13 @@ pub fn encoded_size(t: &T) -> u64 { t.encode(&mut size_checker).ok(); size_checker.written } + +/// Given a maximum size limit, check how large an object would be if it +/// were to be encoded. +/// +/// If it can be encoded in `max` or fewer bytes, that number will be returned +/// inside `Some`. If it goes over bounds, then None is returned. +pub fn encoded_size_bounded(t: &T, max: u64) -> Option { + let mut size_checker = SizeChecker::new(max); + t.encode(&mut size_checker).ok().map(|_| size_checker.written) +} diff --git a/src/reader.rs b/src/reader.rs index 386323d..f5b9709 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -1,11 +1,14 @@ -use std::old_io::{Buffer, Reader, IoError}; +use std::io::{BufRead, Read}; +use std::io::Error as IoError; +use std::io::Result as IoResult; use std::num::{cast, NumCast}; use std::error::{Error, FromError}; use std::fmt; use rustc_serialize::Decoder; -use byteorder::{BigEndian, ReaderBytesExt}; +use byteorder::{BigEndian, ReadBytesExt}; +use unicode; use super::SizeLimit; @@ -98,7 +101,7 @@ pub struct DecoderReader<'a, R: 'a> { read: u64 } -impl<'a, R: Buffer> DecoderReader<'a, R> { +impl<'a, R: BufRead> DecoderReader<'a, R> { pub fn new(r: &'a mut R, size_limit: SizeLimit) -> DecoderReader<'a, R> { DecoderReader { reader: r, @@ -125,7 +128,7 @@ impl <'a, A> DecoderReader<'a, A> { } } -impl<'a, R: Buffer> Decoder for DecoderReader<'a, R> { +impl<'a, R: BufRead> Decoder for DecoderReader<'a, R> { type Error = DecodingError; fn read_nil(&mut self) -> DecodingResult<()> { @@ -189,16 +192,46 @@ impl<'a, R: Buffer> Decoder for DecoderReader<'a, R> { self.reader.read_f32::().map_err(wrap_io) } fn read_char(&mut self) -> DecodingResult { - let c = try!(self.reader.read_char().map_err(wrap_io)); - try!(self.read_bytes(c.len_utf8())); - Ok(c) + use std::str; + let error = DecodingError::InvalidEncoding(InvalidEncoding { + desc: "Invalid char encoding", + detail: None + }); + + let mut buf = [0]; + + let _ = try!(self.reader.read(&mut buf[])); + let first_byte = buf[0]; + let width = unicode::str::utf8_char_width(first_byte); + if width == 1 { return Ok(first_byte as char) } + if width == 0 { return Err(error)} + let mut buf = [first_byte, 0, 0, 0]; + { + let mut start = 1; + while start < width { + match try!(self.reader.read(&mut buf[start .. width])) { + n if n == width - start => break, + n if n < width - start => { start += n; } + _ => return Err(error) + } + } + } + + let res = try!(match str::from_utf8(&buf[..width]).ok() { + Some(s) => Ok(s.char_at(0)), + None => Err(error) + }); + + try!(self.read_bytes(res.len_utf8())); + Ok(res) } + fn read_str(&mut self) -> DecodingResult { let len = try!(self.read_usize()); - try!(self.read_bytes(len)); - let vector = try!(self.reader.read_exact(len)); + + let vector = try!(read_exact(&mut self.reader, len)); match String::from_utf8(vector) { Ok(s) => Ok(s), Err(err) => Err(DecodingError::InvalidEncoding(InvalidEncoding { @@ -309,3 +342,77 @@ impl<'a, R: Buffer> Decoder for DecoderReader<'a, R> { }) } } + +fn read_at_least(reader: &mut R, min: usize, buf: &mut [u8]) -> IoResult { + use std::io::ErrorKind; + if min > buf.len() { + return Err(IoError::new( + ErrorKind::InvalidInput, "the buffer is too short", None)); + } + + let mut read = 0; + while read < min { + let mut zeroes = 0; + loop { + match reader.read(&mut buf[read..]) { + Ok(0) => { + zeroes += 1; + if zeroes >= 1000 { + return Err(IoError::new(ErrorKind::Other, + "no progress was made", + None )); + } + } + Ok(n) => { + read += n; + break; + } + err@Err(_) => return err + } + } + } + Ok(read) +} + +unsafe fn slice_vec_capacity<'a, T>(v: &'a mut Vec, start: usize, end: usize) -> &'a mut [T] { + use std::raw::Slice; + use std::ptr::PtrExt; + use std::mem::transmute; + + assert!(start <= end); + assert!(end <= v.capacity()); + transmute(Slice { + data: v.as_ptr().offset(start as isize), + len: end - start + }) +} + + +fn push_at_least(reader: &mut R, min: usize, len: usize, buf: &mut Vec) -> IoResult { + use std::io::ErrorKind; + if min > len { + return Err(IoError::new(ErrorKind::InvalidInput, "the buffer is too short", None)); + } + + let start_len = buf.len(); + buf.reserve(len); + + + let mut read = 0; + while read < min { + read += { + let s = unsafe { slice_vec_capacity(buf, start_len + read, start_len + len) }; + try!(read_at_least(reader, 1, s)) + }; + unsafe { buf.set_len(start_len + read) }; + } + Ok(read) +} + +fn read_exact(reader: &mut R, len: usize) -> IoResult> { + let mut buf = Vec::with_capacity(len); + match push_at_least(reader, len, len, &mut buf) { + Ok(_) => Ok(buf), + Err(e) => Err(e), + } +} diff --git a/src/writer.rs b/src/writer.rs index ea6d866..fe0c3ac 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -1,12 +1,11 @@ -use std::old_io::{Writer, IoError}; +use std::io::Write; +use std::io::Error as IoError; use std::error::Error; use std::num::Int; use std::fmt; use rustc_serialize::Encoder; -use byteorder::{BigEndian, WriterBytesExt}; - -use super::SizeLimit; +use byteorder::{BigEndian, WriteBytesExt}; pub type EncodingResult = Result; @@ -29,7 +28,6 @@ pub enum EncodingError { /// For most cases, prefer the `encode_into` function. pub struct EncoderWriter<'a, W: 'a> { writer: &'a mut W, - _size_limit: SizeLimit } pub struct SizeChecker { @@ -66,11 +64,10 @@ impl Error for EncodingError { } } -impl <'a, W: Writer> EncoderWriter<'a, W> { - pub fn new(w: &'a mut W, size_limit: SizeLimit) -> EncoderWriter<'a, W> { +impl <'a, W: Write> EncoderWriter<'a, W> { + pub fn new(w: &'a mut W) -> EncoderWriter<'a, W> { EncoderWriter { writer: w, - _size_limit: size_limit } } } @@ -98,7 +95,7 @@ impl SizeChecker { } } -impl<'a, W: Writer> Encoder for EncoderWriter<'a, W> { +impl<'a, W: Write> Encoder for EncoderWriter<'a, W> { type Error = EncodingError; fn emit_nil(&mut self) -> EncodingResult<()> { Ok(()) }