mirror of https://git.sr.ht/~stygianentity/bincode
Added support for char encoding/decoding
This commit is contained in:
parent
b480d2b3b3
commit
c4cb220fb2
|
|
@ -231,4 +231,51 @@ impl<'a, 'de, R: Reader<'de>, C: Config> Decode for &'a mut Decoder<R, C> {
|
||||||
self.reader.read(&mut array)?;
|
self.reader.read(&mut array)?;
|
||||||
Ok(array)
|
Ok(array)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn decode_char(&mut self) -> Result<char, DecodeError> {
|
||||||
|
let mut array = [0u8; 4];
|
||||||
|
|
||||||
|
// Look at the first byte to see how many bytes must be read
|
||||||
|
self.reader.read(&mut array[..1])?;
|
||||||
|
|
||||||
|
let width = utf8_char_width(array[0]);
|
||||||
|
if width == 0 {
|
||||||
|
return Err(DecodeError::InvalidCharEncoding(array));
|
||||||
|
}
|
||||||
|
if width == 1 {
|
||||||
|
return Ok(array[0] as char);
|
||||||
|
}
|
||||||
|
|
||||||
|
// read the remaining pain
|
||||||
|
self.reader.read(&mut array[1..width])?;
|
||||||
|
let res = core::str::from_utf8(&array[..width])
|
||||||
|
.ok()
|
||||||
|
.and_then(|s| s.chars().next())
|
||||||
|
.ok_or(DecodeError::InvalidCharEncoding(array))?;
|
||||||
|
Ok(res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const UTF8_CHAR_WIDTH: [u8; 256] = [
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, // 0x1F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, // 0x3F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, // 0x5F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, // 0x7F
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, // 0x9F
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, // 0xBF
|
||||||
|
0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
2, // 0xDF
|
||||||
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF
|
||||||
|
4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF
|
||||||
|
];
|
||||||
|
|
||||||
|
// This function is a copy of core::str::utf8_char_width
|
||||||
|
const fn utf8_char_width(b: u8) -> usize {
|
||||||
|
UTF8_CHAR_WIDTH[b as usize] as usize
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -85,6 +85,12 @@ impl<'de> Decodable for f64 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'de> Decodable for char {
|
||||||
|
fn decode<D: Decode>(mut decoder: D) -> Result<Self, DecodeError> {
|
||||||
|
decoder.decode_char()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a, 'de: 'a> BorrowDecodable<'de> for &'a [u8] {
|
impl<'a, 'de: 'a> BorrowDecodable<'de> for &'a [u8] {
|
||||||
fn borrow_decode<D: BorrowDecode<'de>>(mut decoder: D) -> Result<Self, DecodeError> {
|
fn borrow_decode<D: BorrowDecode<'de>>(mut decoder: D) -> Result<Self, DecodeError> {
|
||||||
let len = usize::decode(&mut decoder)?;
|
let len = usize::decode(&mut decoder)?;
|
||||||
|
|
@ -174,6 +180,10 @@ where
|
||||||
fn decode_array<const N: usize>(&mut self) -> Result<[u8; N], DecodeError> {
|
fn decode_array<const N: usize>(&mut self) -> Result<[u8; N], DecodeError> {
|
||||||
T::decode_array::<N>(self)
|
T::decode_array::<N>(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn decode_char(&mut self) -> Result<char, DecodeError> {
|
||||||
|
T::decode_char(self)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 'de, T> BorrowDecode<'de> for &'a mut T
|
impl<'a, 'de, T> BorrowDecode<'de> for &'a mut T
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,9 @@ pub trait Decode {
|
||||||
fn decode_f64(&mut self) -> Result<f64, DecodeError>;
|
fn decode_f64(&mut self) -> Result<f64, DecodeError>;
|
||||||
/// Attempt to decode an array of `N` entries.
|
/// Attempt to decode an array of `N` entries.
|
||||||
fn decode_array<const N: usize>(&mut self) -> Result<[u8; N], DecodeError>;
|
fn decode_array<const N: usize>(&mut self) -> Result<[u8; N], DecodeError>;
|
||||||
|
|
||||||
|
/// Attempt to decode a `char`
|
||||||
|
fn decode_char(&mut self) -> Result<char, DecodeError>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Any source that can decode basic types. This type is most notably implemented for [Decoder].
|
/// Any source that can decode basic types. This type is most notably implemented for [Decoder].
|
||||||
|
|
|
||||||
|
|
@ -197,4 +197,42 @@ impl<'a, W: Writer, C: Config> Encode for &'a mut Encoder<W, C> {
|
||||||
fn encode_array<const N: usize>(&mut self, val: [u8; N]) -> Result<(), EncodeError> {
|
fn encode_array<const N: usize>(&mut self, val: [u8; N]) -> Result<(), EncodeError> {
|
||||||
self.writer.write(&val)
|
self.writer.write(&val)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn encode_char(&mut self, val: char) -> Result<(), EncodeError> {
|
||||||
|
encode_utf8(&mut self.writer, val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const TAG_CONT: u8 = 0b1000_0000;
|
||||||
|
const TAG_TWO_B: u8 = 0b1100_0000;
|
||||||
|
const TAG_THREE_B: u8 = 0b1110_0000;
|
||||||
|
const TAG_FOUR_B: u8 = 0b1111_0000;
|
||||||
|
const MAX_ONE_B: u32 = 0x80;
|
||||||
|
const MAX_TWO_B: u32 = 0x800;
|
||||||
|
const MAX_THREE_B: u32 = 0x10000;
|
||||||
|
|
||||||
|
fn encode_utf8(writer: &mut impl Writer, c: char) -> Result<(), EncodeError> {
|
||||||
|
let code = c as u32;
|
||||||
|
|
||||||
|
if code < MAX_ONE_B {
|
||||||
|
writer.write(&[c as u8])
|
||||||
|
} else if code < MAX_TWO_B {
|
||||||
|
let mut buf = [0u8; 2];
|
||||||
|
buf[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
|
||||||
|
buf[1] = (code & 0x3F) as u8 | TAG_CONT;
|
||||||
|
writer.write(&buf)
|
||||||
|
} else if code < MAX_THREE_B {
|
||||||
|
let mut buf = [0u8; 3];
|
||||||
|
buf[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
|
||||||
|
buf[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||||
|
buf[2] = (code & 0x3F) as u8 | TAG_CONT;
|
||||||
|
writer.write(&buf)
|
||||||
|
} else {
|
||||||
|
let mut buf = [0u8; 4];
|
||||||
|
buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
|
||||||
|
buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
|
||||||
|
buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||||
|
buf[3] = (code & 0x3F) as u8 | TAG_CONT;
|
||||||
|
writer.write(&buf)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -85,6 +85,12 @@ impl Encodeable for f64 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Encodeable for char {
|
||||||
|
fn encode<E: Encode>(&self, mut encoder: E) -> Result<(), EncodeError> {
|
||||||
|
encoder.encode_char(*self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Encodeable for &'_ [u8] {
|
impl Encodeable for &'_ [u8] {
|
||||||
fn encode<E: Encode>(&self, mut encoder: E) -> Result<(), EncodeError> {
|
fn encode<E: Encode>(&self, mut encoder: E) -> Result<(), EncodeError> {
|
||||||
encoder.encode_slice(*self)
|
encoder.encode_slice(*self)
|
||||||
|
|
@ -157,4 +163,8 @@ where
|
||||||
fn encode_array<const N: usize>(&mut self, val: [u8; N]) -> Result<(), EncodeError> {
|
fn encode_array<const N: usize>(&mut self, val: [u8; N]) -> Result<(), EncodeError> {
|
||||||
T::encode_array(self, val)
|
T::encode_array(self, val)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn encode_char(&mut self, val: char) -> Result<(), EncodeError> {
|
||||||
|
T::encode_char(self, val)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -53,4 +53,7 @@ pub trait Encode {
|
||||||
fn encode_slice(&mut self, val: &[u8]) -> Result<(), EncodeError>;
|
fn encode_slice(&mut self, val: &[u8]) -> Result<(), EncodeError>;
|
||||||
/// Encode an array. Exactly `N` bytes must be encoded, else an error should be thrown.
|
/// Encode an array. Exactly `N` bytes must be encoded, else an error should be thrown.
|
||||||
fn encode_array<const N: usize>(&mut self, val: [u8; N]) -> Result<(), EncodeError>;
|
fn encode_array<const N: usize>(&mut self, val: [u8; N]) -> Result<(), EncodeError>;
|
||||||
|
|
||||||
|
/// Encode a single utf8 char
|
||||||
|
fn encode_char(&mut self, val: char) -> Result<(), EncodeError>;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,9 @@ pub enum DecodeError {
|
||||||
|
|
||||||
/// The decoder tried to decode a `str`, but an utf8 error was encountered.
|
/// The decoder tried to decode a `str`, but an utf8 error was encountered.
|
||||||
Utf8(core::str::Utf8Error),
|
Utf8(core::str::Utf8Error),
|
||||||
|
|
||||||
|
/// The decoder tried to decode a `char` and failed. The given buffer contains the bytes that are read at the moment of failure.
|
||||||
|
InvalidCharEncoding([u8; 4]),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Integer types. Used by [DecodeError]. These types have no purpose other than being shown in errors.
|
/// Integer types. Used by [DecodeError]. These types have no purpose other than being shown in errors.
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,13 @@ where
|
||||||
C: Config,
|
C: Config,
|
||||||
{
|
{
|
||||||
let mut buffer = [0u8; 32];
|
let mut buffer = [0u8; 32];
|
||||||
bincode::encode_into_slice_with_config(element.clone(), &mut buffer, config).unwrap();
|
let len = bincode::encode_into_slice_with_config(element.clone(), &mut buffer, config).unwrap();
|
||||||
|
println!(
|
||||||
|
"{:?}: {:?} ({:?})",
|
||||||
|
element,
|
||||||
|
&buffer[..len],
|
||||||
|
core::any::type_name::<C>()
|
||||||
|
);
|
||||||
let decoded: V = bincode::decode_with_config(&mut buffer, config).unwrap();
|
let decoded: V = bincode::decode_with_config(&mut buffer, config).unwrap();
|
||||||
|
|
||||||
assert_eq!(element, decoded);
|
assert_eq!(element, decoded);
|
||||||
|
|
@ -68,6 +74,11 @@ fn test_numbers() {
|
||||||
|
|
||||||
the_same(5.0f32);
|
the_same(5.0f32);
|
||||||
the_same(5.0f64);
|
the_same(5.0f64);
|
||||||
|
|
||||||
|
for char in "aÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö文".chars()
|
||||||
|
{
|
||||||
|
the_same(char);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
Loading…
Reference in New Issue