Merge pull request #81 from oli-obk/master

fix char serialization and deserialization
This commit is contained in:
Anthony Ramine 2016-06-30 14:12:11 +02:00 committed by GitHub
commit cea0ef6fed
3 changed files with 63 additions and 2 deletions

View File

@ -289,7 +289,6 @@ impl<'a, R: Read> serde::Deserializer for Deserializer<'a, R> {
None => Err(error) None => Err(error)
}); });
try!(self.read_bytes(res.len_utf8() as u64));
visitor.visit_char(res) visitor.visit_char(res)
} }

View File

@ -141,6 +141,10 @@ impl<'a, W: Write> serde::Serializer for Serializer<'a, W> {
self.writer.write_all(v.as_bytes()).map_err(SerializeError::IoError) self.writer.write_all(v.as_bytes()).map_err(SerializeError::IoError)
} }
fn serialize_char(&mut self, c: char) -> SerializeResult<()> {
self.writer.write_all(encode_utf8(c).as_slice()).map_err(SerializeError::IoError)
}
fn serialize_none(&mut self) -> SerializeResult<()> { fn serialize_none(&mut self) -> SerializeResult<()> {
self.writer.write_u8(0).map_err(wrap_io) self.writer.write_u8(0).map_err(wrap_io)
} }
@ -352,6 +356,10 @@ impl serde::Serializer for SizeChecker {
self.add_raw(v.len()) self.add_raw(v.len())
} }
fn serialize_char(&mut self, c: char) -> SerializeResult<()> {
self.add_raw(encode_utf8(c).as_slice().len())
}
fn serialize_none(&mut self) -> SerializeResult<()> { fn serialize_none(&mut self) -> SerializeResult<()> {
self.add_value(0 as u8) self.add_value(0 as u8)
} }
@ -464,3 +472,47 @@ impl serde::Serializer for SizeChecker {
Ok(()) Ok(())
} }
} }
const TAG_CONT: u8 = 0b1000_0000;
const TAG_TWO_B: u8 = 0b1100_0000;
const TAG_THREE_B: u8 = 0b1110_0000;
const TAG_FOUR_B: u8 = 0b1111_0000;
const MAX_ONE_B: u32 = 0x80;
const MAX_TWO_B: u32 = 0x800;
const MAX_THREE_B: u32 = 0x10000;
fn encode_utf8(c: char) -> EncodeUtf8 {
let code = c as u32;
let mut buf = [0; 4];
let pos = if code < MAX_ONE_B {
buf[3] = code as u8;
3
} else if code < MAX_TWO_B {
buf[2] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
buf[3] = (code & 0x3F) as u8 | TAG_CONT;
2
} else if code < MAX_THREE_B {
buf[1] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
buf[3] = (code & 0x3F) as u8 | TAG_CONT;
1
} else {
buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
buf[3] = (code & 0x3F) as u8 | TAG_CONT;
0
};
EncodeUtf8 { buf: buf, pos: pos }
}
struct EncodeUtf8 {
buf: [u8; 4],
pos: usize,
}
impl EncodeUtf8 {
fn as_slice(&self) -> &[u8] {
&self.buf[self.pos..]
}
}

View File

@ -290,6 +290,16 @@ fn too_big_deserialize() {
assert!(deserialized.is_ok()); assert!(deserialized.is_ok());
} }
#[test]
fn char_serialization() {
let chars = "Aa\0☺♪";
for c in chars.chars() {
let encoded = serialize(&c, Bounded(4)).expect("serializing char failed");
let decoded: char = deserialize(&encoded).expect("deserializing failed");
assert_eq!(decoded, c);
}
}
#[test] #[test]
fn too_big_char_decode() { fn too_big_char_decode() {
let encoded = vec![0x41]; let encoded = vec![0x41];