From 2eee182f1fa83997b6a8f6019e43893fa22aa4cf Mon Sep 17 00:00:00 2001 From: Oliver Schneider Date: Thu, 30 Jun 2016 14:02:07 +0200 Subject: [PATCH] fix char serialization and deserialization --- src/serde/reader.rs | 3 +-- src/serde/writer.rs | 52 +++++++++++++++++++++++++++++++++++++++++++++ tests/test.rs | 10 +++++++++ 3 files changed, 63 insertions(+), 2 deletions(-) diff --git a/src/serde/reader.rs b/src/serde/reader.rs index 854262a..3d58e34 100644 --- a/src/serde/reader.rs +++ b/src/serde/reader.rs @@ -289,7 +289,6 @@ impl<'a, R: Read> serde::Deserializer for Deserializer<'a, R> { None => Err(error) }); - try!(self.read_bytes(res.len_utf8() as u64)); visitor.visit_char(res) } @@ -345,7 +344,7 @@ impl<'a, R: Read> serde::Deserializer for Deserializer<'a, R> { visitor.visit_seq(TupleVisitor(self)) } - fn deserialize_fixed_size_array(&mut self, + fn deserialize_fixed_size_array(&mut self, _: usize, visitor: V) -> DeserializeResult where V: serde::de::Visitor, diff --git a/src/serde/writer.rs b/src/serde/writer.rs index 6469062..6e1afb9 100644 --- a/src/serde/writer.rs +++ b/src/serde/writer.rs @@ -141,6 +141,10 @@ impl<'a, W: Write> serde::Serializer for Serializer<'a, W> { self.writer.write_all(v.as_bytes()).map_err(SerializeError::IoError) } + fn serialize_char(&mut self, c: char) -> SerializeResult<()> { + self.writer.write_all(encode_utf8(c).as_slice()).map_err(SerializeError::IoError) + } + fn serialize_none(&mut self) -> SerializeResult<()> { self.writer.write_u8(0).map_err(wrap_io) } @@ -352,6 +356,10 @@ impl serde::Serializer for SizeChecker { self.add_raw(v.len()) } + fn serialize_char(&mut self, c: char) -> SerializeResult<()> { + self.add_raw(encode_utf8(c).as_slice().len()) + } + fn serialize_none(&mut self) -> SerializeResult<()> { self.add_value(0 as u8) } @@ -464,3 +472,47 @@ impl serde::Serializer for SizeChecker { Ok(()) } } + +const TAG_CONT: u8 = 0b1000_0000; +const TAG_TWO_B: u8 = 0b1100_0000; +const TAG_THREE_B: u8 = 0b1110_0000; +const TAG_FOUR_B: u8 = 0b1111_0000; +const MAX_ONE_B: u32 = 0x80; +const MAX_TWO_B: u32 = 0x800; +const MAX_THREE_B: u32 = 0x10000; + +fn encode_utf8(c: char) -> EncodeUtf8 { + let code = c as u32; + let mut buf = [0; 4]; + let pos = if code < MAX_ONE_B { + buf[3] = code as u8; + 3 + } else if code < MAX_TWO_B { + buf[2] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; + buf[3] = (code & 0x3F) as u8 | TAG_CONT; + 2 + } else if code < MAX_THREE_B { + buf[1] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; + buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; + buf[3] = (code & 0x3F) as u8 | TAG_CONT; + 1 + } else { + buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; + buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; + buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; + buf[3] = (code & 0x3F) as u8 | TAG_CONT; + 0 + }; + EncodeUtf8 { buf: buf, pos: pos } +} + +struct EncodeUtf8 { + buf: [u8; 4], + pos: usize, +} + +impl EncodeUtf8 { + fn as_slice(&self) -> &[u8] { + &self.buf[self.pos..] + } +} diff --git a/tests/test.rs b/tests/test.rs index f5f067c..9039e9e 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -290,6 +290,16 @@ fn too_big_deserialize() { assert!(deserialized.is_ok()); } +#[test] +fn char_serialization() { + let chars = "Aa\0☺♪"; + for c in chars.chars() { + let encoded = serialize(&c, Bounded(4)).expect("serializing char failed"); + let decoded: char = deserialize(&encoded).expect("deserializing failed"); + assert_eq!(decoded, c); + } +} + #[test] fn too_big_char_decode() { let encoded = vec![0x41];