mirror of https://git.sr.ht/~stygianentity/bincode
Added support for char encoding/decoding
This commit is contained in:
parent
b480d2b3b3
commit
c4cb220fb2
|
|
@ -231,4 +231,51 @@ impl<'a, 'de, R: Reader<'de>, C: Config> Decode for &'a mut Decoder<R, C> {
|
|||
self.reader.read(&mut array)?;
|
||||
Ok(array)
|
||||
}
|
||||
|
||||
fn decode_char(&mut self) -> Result<char, DecodeError> {
|
||||
let mut array = [0u8; 4];
|
||||
|
||||
// Look at the first byte to see how many bytes must be read
|
||||
self.reader.read(&mut array[..1])?;
|
||||
|
||||
let width = utf8_char_width(array[0]);
|
||||
if width == 0 {
|
||||
return Err(DecodeError::InvalidCharEncoding(array));
|
||||
}
|
||||
if width == 1 {
|
||||
return Ok(array[0] as char);
|
||||
}
|
||||
|
||||
// read the remaining pain
|
||||
self.reader.read(&mut array[1..width])?;
|
||||
let res = core::str::from_utf8(&array[..width])
|
||||
.ok()
|
||||
.and_then(|s| s.chars().next())
|
||||
.ok_or(DecodeError::InvalidCharEncoding(array))?;
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
||||
const UTF8_CHAR_WIDTH: [u8; 256] = [
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, // 0x1F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, // 0x3F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, // 0x5F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, // 0x7F
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, // 0x9F
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, // 0xBF
|
||||
0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, // 0xDF
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF
|
||||
4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF
|
||||
];
|
||||
|
||||
// This function is a copy of core::str::utf8_char_width
|
||||
const fn utf8_char_width(b: u8) -> usize {
|
||||
UTF8_CHAR_WIDTH[b as usize] as usize
|
||||
}
|
||||
|
|
|
|||
|
|
@ -85,6 +85,12 @@ impl<'de> Decodable for f64 {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'de> Decodable for char {
|
||||
fn decode<D: Decode>(mut decoder: D) -> Result<Self, DecodeError> {
|
||||
decoder.decode_char()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'de: 'a> BorrowDecodable<'de> for &'a [u8] {
|
||||
fn borrow_decode<D: BorrowDecode<'de>>(mut decoder: D) -> Result<Self, DecodeError> {
|
||||
let len = usize::decode(&mut decoder)?;
|
||||
|
|
@ -174,6 +180,10 @@ where
|
|||
fn decode_array<const N: usize>(&mut self) -> Result<[u8; N], DecodeError> {
|
||||
T::decode_array::<N>(self)
|
||||
}
|
||||
|
||||
fn decode_char(&mut self) -> Result<char, DecodeError> {
|
||||
T::decode_char(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'de, T> BorrowDecode<'de> for &'a mut T
|
||||
|
|
|
|||
|
|
@ -66,6 +66,9 @@ pub trait Decode {
|
|||
fn decode_f64(&mut self) -> Result<f64, DecodeError>;
|
||||
/// Attempt to decode an array of `N` entries.
|
||||
fn decode_array<const N: usize>(&mut self) -> Result<[u8; N], DecodeError>;
|
||||
|
||||
/// Attempt to decode a `char`
|
||||
fn decode_char(&mut self) -> Result<char, DecodeError>;
|
||||
}
|
||||
|
||||
/// Any source that can decode basic types. This type is most notably implemented for [Decoder].
|
||||
|
|
|
|||
|
|
@ -197,4 +197,42 @@ impl<'a, W: Writer, C: Config> Encode for &'a mut Encoder<W, C> {
|
|||
fn encode_array<const N: usize>(&mut self, val: [u8; N]) -> Result<(), EncodeError> {
|
||||
self.writer.write(&val)
|
||||
}
|
||||
|
||||
fn encode_char(&mut self, val: char) -> Result<(), EncodeError> {
|
||||
encode_utf8(&mut self.writer, val)
|
||||
}
|
||||
}
|
||||
|
||||
const TAG_CONT: u8 = 0b1000_0000;
|
||||
const TAG_TWO_B: u8 = 0b1100_0000;
|
||||
const TAG_THREE_B: u8 = 0b1110_0000;
|
||||
const TAG_FOUR_B: u8 = 0b1111_0000;
|
||||
const MAX_ONE_B: u32 = 0x80;
|
||||
const MAX_TWO_B: u32 = 0x800;
|
||||
const MAX_THREE_B: u32 = 0x10000;
|
||||
|
||||
fn encode_utf8(writer: &mut impl Writer, c: char) -> Result<(), EncodeError> {
|
||||
let code = c as u32;
|
||||
|
||||
if code < MAX_ONE_B {
|
||||
writer.write(&[c as u8])
|
||||
} else if code < MAX_TWO_B {
|
||||
let mut buf = [0u8; 2];
|
||||
buf[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
|
||||
buf[1] = (code & 0x3F) as u8 | TAG_CONT;
|
||||
writer.write(&buf)
|
||||
} else if code < MAX_THREE_B {
|
||||
let mut buf = [0u8; 3];
|
||||
buf[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
|
||||
buf[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||
buf[2] = (code & 0x3F) as u8 | TAG_CONT;
|
||||
writer.write(&buf)
|
||||
} else {
|
||||
let mut buf = [0u8; 4];
|
||||
buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
|
||||
buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
|
||||
buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||
buf[3] = (code & 0x3F) as u8 | TAG_CONT;
|
||||
writer.write(&buf)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -85,6 +85,12 @@ impl Encodeable for f64 {
|
|||
}
|
||||
}
|
||||
|
||||
impl Encodeable for char {
|
||||
fn encode<E: Encode>(&self, mut encoder: E) -> Result<(), EncodeError> {
|
||||
encoder.encode_char(*self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Encodeable for &'_ [u8] {
|
||||
fn encode<E: Encode>(&self, mut encoder: E) -> Result<(), EncodeError> {
|
||||
encoder.encode_slice(*self)
|
||||
|
|
@ -157,4 +163,8 @@ where
|
|||
fn encode_array<const N: usize>(&mut self, val: [u8; N]) -> Result<(), EncodeError> {
|
||||
T::encode_array(self, val)
|
||||
}
|
||||
|
||||
fn encode_char(&mut self, val: char) -> Result<(), EncodeError> {
|
||||
T::encode_char(self, val)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,4 +53,7 @@ pub trait Encode {
|
|||
fn encode_slice(&mut self, val: &[u8]) -> Result<(), EncodeError>;
|
||||
/// Encode an array. Exactly `N` bytes must be encoded, else an error should be thrown.
|
||||
fn encode_array<const N: usize>(&mut self, val: [u8; N]) -> Result<(), EncodeError>;
|
||||
|
||||
/// Encode a single utf8 char
|
||||
fn encode_char(&mut self, val: char) -> Result<(), EncodeError>;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,9 @@ pub enum DecodeError {
|
|||
|
||||
/// The decoder tried to decode a `str`, but an utf8 error was encountered.
|
||||
Utf8(core::str::Utf8Error),
|
||||
|
||||
/// The decoder tried to decode a `char` and failed. The given buffer contains the bytes that are read at the moment of failure.
|
||||
InvalidCharEncoding([u8; 4]),
|
||||
}
|
||||
|
||||
/// Integer types. Used by [DecodeError]. These types have no purpose other than being shown in errors.
|
||||
|
|
|
|||
|
|
@ -12,7 +12,13 @@ where
|
|||
C: Config,
|
||||
{
|
||||
let mut buffer = [0u8; 32];
|
||||
bincode::encode_into_slice_with_config(element.clone(), &mut buffer, config).unwrap();
|
||||
let len = bincode::encode_into_slice_with_config(element.clone(), &mut buffer, config).unwrap();
|
||||
println!(
|
||||
"{:?}: {:?} ({:?})",
|
||||
element,
|
||||
&buffer[..len],
|
||||
core::any::type_name::<C>()
|
||||
);
|
||||
let decoded: V = bincode::decode_with_config(&mut buffer, config).unwrap();
|
||||
|
||||
assert_eq!(element, decoded);
|
||||
|
|
@ -68,6 +74,11 @@ fn test_numbers() {
|
|||
|
||||
the_same(5.0f32);
|
||||
the_same(5.0f64);
|
||||
|
||||
for char in "aÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö文".chars()
|
||||
{
|
||||
the_same(char);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
Loading…
Reference in New Issue