refactor(parser): refactor number parsing to support generic number types

This commit is contained in:
Kat Marchán 2024-10-08 22:44:00 -07:00
parent 0940bb1608
commit fb7c92b59f
No known key found for this signature in database
GPG Key ID: AEB529C08A3C7E9E
3 changed files with 208 additions and 87 deletions

View File

@ -17,6 +17,7 @@ span = []
[dependencies]
miette = "7.2.0"
num = "0.4.2"
thiserror = "1.0.40"
winnow = { version = "0.6.20", features = ["alloc", "unstable-recover"] }

View File

@ -78,7 +78,7 @@ pub struct KdlDiagnostic {
pub kind: KdlErrorKind,
}
/// A type reprenting additional information specific to the type of error being returned.
/// A type representing additional information specific to the type of error being returned.
#[derive(Debug, Diagnostic, Clone, Eq, PartialEq, Error)]
pub enum KdlErrorKind {
/// An error occurred while parsing an integer.
@ -91,6 +91,11 @@ pub enum KdlErrorKind {
#[diagnostic(code(kdl::parse_float))]
ParseFloatError(ParseFloatError),
/// Tried to parse a negative number as an unsigned integer.
#[error("Tried to parse a negative number as an unsigned integer.")]
#[diagnostic(code(kdl::negative_unsigned))]
NegativeUnsignedError,
/// Generic parsing error. The given context string denotes the component
/// that failed to parse.
#[error("Expected {0}.")]

View File

@ -5,6 +5,7 @@ use std::{
use miette::{Severity, SourceSpan};
use num::CheckedMul;
use winnow::{
ascii::{digit1, hex_digit1, oct_digit1, Caseless},
combinator::{
@ -128,6 +129,24 @@ impl<'a> FromExternalError<Input<'a>, ParseFloatError> for KdlParseError {
}
}
struct NegativeUnsignedError;
impl<'a> FromExternalError<Input<'a>, NegativeUnsignedError> for KdlParseError {
fn from_external_error(
_input: &Input<'a>,
_kind: ErrorKind,
_e: NegativeUnsignedError,
) -> Self {
KdlParseError {
span: None,
label: None,
help: None,
context: None,
kind: Some(KdlErrorKind::NegativeUnsignedError),
}
}
}
impl<I: Stream + Location> FromRecoverableError<I, Self> for KdlParseError {
#[inline]
fn from_recoverable_error(
@ -639,7 +658,7 @@ fn signed_ident(input: &mut Input<'_>) -> PResult<()> {
/// `dotted-ident := sign? '.' ((identifier-char - digit) identifier-char*)?`
fn dotted_ident(input: &mut Input<'_>) -> PResult<()> {
(
opt(sign),
opt(signum),
".",
not(digit1),
repeat(0.., identifier_char).map(|_: ()| ()),
@ -1214,30 +1233,30 @@ fn multi_line_comment_test() {
/// `number := keyword-number | hex | octal | binary | decimal`
fn number(input: &mut Input<'_>) -> PResult<KdlValue> {
alt((hex, octal, binary, float, integer)).parse_next(input)
alt((float_value, integer_value)).parse_next(input)
}
/// ```text
/// decimal := sign? integer ('.' integer)? exponent?
/// exponent := ('e' | 'E') sign? integer
/// ```
fn float(input: &mut Input<'_>) -> PResult<KdlValue> {
fn float_value(input: &mut Input<'_>) -> PResult<KdlValue> {
float.map(KdlValue::Float).parse_next(input)
}
fn float<T: ParseFloat>(input: &mut Input<'_>) -> PResult<T> {
alt((
(
integer,
opt(preceded('.', cut_err(integer_base))),
decimal::<i128>,
opt(preceded('.', cut_err(udecimal::<i128>))),
Caseless("e"),
opt(one_of(['-', '+'])),
cut_err(integer_base),
cut_err(udecimal::<i128>),
)
.take(),
(integer, '.', cut_err(integer_base)).take(),
(decimal::<i128>, '.', cut_err(udecimal::<i128>)).take(),
))
.try_map(|float_str| {
str::replace(float_str, "_", "")
.parse::<f64>()
.map(KdlValue::Float)
})
.try_map(|float_str| T::parse_float(&str::replace(float_str, "_", "")))
.context(lbl("float"))
.parse_next(input)
}
@ -1248,19 +1267,21 @@ fn float_test() {
use winnow::token::take;
assert_eq!(
float.parse(new_input("12_34.56")).unwrap(),
float_value.parse(new_input("12_34.56")).unwrap(),
KdlValue::Float(1234.56)
);
assert_eq!(
float.parse(new_input("1234_.56")).unwrap(),
float_value.parse(new_input("1234_.56")).unwrap(),
KdlValue::Float(1234.56)
);
assert_eq!(
(float, take(1usize)).parse(new_input("1234.56c")).unwrap(),
(float_value, take(1usize))
.parse(new_input("1234.56c"))
.unwrap(),
(KdlValue::Float(1234.56), "c")
);
assert!(float.parse(new_input("_1234.56")).is_err());
assert!(float.parse(new_input("1234a.56")).is_err());
assert!(float_value.parse(new_input("_1234.56")).is_err());
assert!(float_value.parse(new_input("1234a.56")).is_err());
assert_eq!(
value
.parse(new_input("2.5"))
@ -1270,32 +1291,37 @@ fn float_test() {
);
}
fn integer_value(input: &mut Input<'_>) -> PResult<KdlValue> {
alt((hex, octal, binary, decimal))
.map(KdlValue::Integer)
.parse_next(input)
}
/// Non-float decimal
fn integer(input: &mut Input<'_>) -> PResult<KdlValue> {
let mult = sign.parse_next(input)?;
integer_base
.map(|x| KdlValue::Integer(x * mult))
.context(lbl("integer"))
fn decimal<T: FromStrRadix + MaybeNegatable>(input: &mut Input<'_>) -> PResult<T> {
let positive = signum.parse_next(input)?;
udecimal::<T>
.try_map(|x| {
if positive {
Ok(x)
} else {
x.negated().ok_or(NegativeUnsignedError)
}
})
.parse_next(input)
}
#[cfg(test)]
#[test]
fn integer_test() {
assert_eq!(
integer.parse(new_input("12_34")).unwrap(),
KdlValue::Integer(1234)
);
assert_eq!(
integer.parse(new_input("1234_")).unwrap(),
KdlValue::Integer(1234)
);
assert!(integer.parse(new_input("_1234")).is_err());
assert!(integer.parse(new_input("1234a")).is_err());
fn decimal_test() {
assert_eq!(decimal::<i128>.parse(new_input("12_34")).unwrap(), 1234);
assert_eq!(decimal::<i128>.parse(new_input("1234_")).unwrap(), 1234);
assert!(decimal::<i128>.parse(new_input("_1234")).is_err());
assert!(decimal::<i128>.parse(new_input("1234a")).is_err());
}
/// `integer := digit (digit | '_')*`
fn integer_base(input: &mut Input<'_>) -> PResult<i128> {
fn udecimal<T: FromStrRadix>(input: &mut Input<'_>) -> PResult<T> {
(
digit1,
cut_err(repeat(
@ -1304,14 +1330,26 @@ fn integer_base(input: &mut Input<'_>) -> PResult<i128> {
)),
)
.try_map(|(l, r): (&str, Vec<&str>)| {
format!("{l}{}", str::replace(&r.join(""), "_", "")).parse()
T::from_str_radix(&format!("{l}{}", str::replace(&r.join(""), "_", "")), 10)
})
.parse_next(input)
}
/// `hex := sign? '0x' hex-digit (hex-digit | '_')*`
fn hex(input: &mut Input<'_>) -> PResult<KdlValue> {
let mult = sign.parse_next(input)?;
fn hex<T: FromStrRadix + MaybeNegatable>(input: &mut Input<'_>) -> PResult<T> {
let positive = signum.parse_next(input)?;
uhex::<T>
.try_map(|x| {
if positive {
Ok(x)
} else {
x.negated().ok_or(NegativeUnsignedError)
}
})
.parse_next(input)
}
fn uhex<T: FromStrRadix>(input: &mut Input<'_>) -> PResult<T> {
alt(("0x", "0X")).parse_next(input)?;
cut_err((
hex_digit1,
@ -1321,9 +1359,7 @@ fn hex(input: &mut Input<'_>) -> PResult<KdlValue> {
),
))
.try_map(|(l, r): (&str, Vec<&str>)| {
i128::from_str_radix(&format!("{l}{}", str::replace(&r.join(""), "_", "")), 16)
.map(|x| x * mult)
.map(KdlValue::Integer)
T::from_str_radix(&format!("{l}{}", str::replace(&r.join(""), "_", "")), 16)
})
.context(lbl("hexadecimal"))
.parse_next(input)
@ -1333,30 +1369,43 @@ fn hex(input: &mut Input<'_>) -> PResult<KdlValue> {
#[test]
fn test_hex() {
assert_eq!(
hex.parse(new_input("0xdead_beef123")).unwrap(),
KdlValue::Integer(0xdeadbeef123)
hex::<i128>.parse(new_input("0xdead_beef123")).unwrap(),
0xdeadbeef123
);
assert_eq!(
hex.parse(new_input("0xDeAd_BeEf123")).unwrap(),
KdlValue::Integer(0xdeadbeef123)
hex::<i128>.parse(new_input("0xDeAd_BeEf123")).unwrap(),
0xdeadbeef123
);
assert_eq!(
hex.parse(new_input("0xdeadbeef123_")).unwrap(),
KdlValue::Integer(0xdeadbeef123)
hex::<i128>.parse(new_input("0xdeadbeef123_")).unwrap(),
0xdeadbeef123
);
assert!(
hex.parse(new_input("0xABCDEF0123456789abcdef0123456789"))
hex::<i128>
.parse(new_input("0xABCDEF0123456789abcdef0123456789"))
.is_err(),
"i128 overflow"
"i128 overflow"
);
assert!(hex.parse(new_input("0x_deadbeef123")).is_err());
assert!(hex::<i128>.parse(new_input("0x_deadbeef123")).is_err());
assert!(hex.parse(new_input("0xbeefg1")).is_err());
assert!(hex::<i128>.parse(new_input("0xbeefg1")).is_err());
}
/// `octal := sign? '0o' [0-7] [0-7_]*`
fn octal(input: &mut Input<'_>) -> PResult<KdlValue> {
let mult = sign.parse_next(input)?;
fn octal<T: FromStrRadix + MaybeNegatable>(input: &mut Input<'_>) -> PResult<T> {
let positive = signum.parse_next(input)?;
uoctal::<T>
.try_map(|x| {
if positive {
Ok(x)
} else {
x.negated().ok_or(NegativeUnsignedError)
}
})
.parse_next(input)
}
fn uoctal<T: FromStrRadix>(input: &mut Input<'_>) -> PResult<T> {
alt(("0o", "0O")).parse_next(input)?;
cut_err((
oct_digit1,
@ -1366,9 +1415,7 @@ fn octal(input: &mut Input<'_>) -> PResult<KdlValue> {
),
))
.try_map(|(l, r): (&str, Vec<&str>)| {
i128::from_str_radix(&format!("{l}{}", str::replace(&r.join(""), "_", "")), 8)
.map(|x| x * mult)
.map(KdlValue::Integer)
T::from_str_radix(&format!("{l}{}", str::replace(&r.join(""), "_", "")), 8)
})
.context(lbl("octal"))
.parse_next(input)
@ -1377,28 +1424,32 @@ fn octal(input: &mut Input<'_>) -> PResult<KdlValue> {
#[cfg(test)]
#[test]
fn test_octal() {
assert_eq!(
octal.parse(new_input("0o12_34")).unwrap(),
KdlValue::Integer(0o1234)
);
assert_eq!(
octal.parse(new_input("0o1234_")).unwrap(),
KdlValue::Integer(0o1234)
);
assert!(octal.parse(new_input("0o_12_34")).is_err());
assert!(octal.parse(new_input("0o89")).is_err());
assert_eq!(octal::<i128>.parse(new_input("0o12_34")).unwrap(), 0o1234);
assert_eq!(octal::<i128>.parse(new_input("0o1234_")).unwrap(), 0o1234);
assert!(octal::<i128>.parse(new_input("0o_12_34")).is_err());
assert!(octal::<i128>.parse(new_input("0o89")).is_err());
}
/// `binary := sign? '0b' ('0' | '1') ('0' | '1' | '_')*`
fn binary(input: &mut Input<'_>) -> PResult<KdlValue> {
let mult = sign.parse_next(input)?;
fn binary<T: FromStrRadix + MaybeNegatable>(input: &mut Input<'_>) -> PResult<T> {
let positive = signum.parse_next(input)?;
ubinary::<T>
.try_map(|x| {
if positive {
Ok(x)
} else {
x.negated().ok_or(NegativeUnsignedError)
}
})
.parse_next(input)
}
fn ubinary<T: FromStrRadix>(input: &mut Input<'_>) -> PResult<T> {
alt(("0b", "0B")).parse_next(input)?;
cut_err(
(alt(("0", "1")), repeat(0.., alt(("0", "1", "_")))).try_map(
move |(x, xs): (&str, Vec<&str>)| {
i128::from_str_radix(&format!("{x}{}", str::replace(&xs.join(""), "_", "")), 2)
.map(|x| x * mult)
.map(KdlValue::Integer)
T::from_str_radix(&format!("{x}{}", str::replace(&xs.join(""), "_", "")), 2)
},
),
)
@ -1411,32 +1462,96 @@ fn binary(input: &mut Input<'_>) -> PResult<KdlValue> {
fn test_binary() {
use winnow::token::take;
assert_eq!(binary::<i128>.parse(new_input("0b10_01")).unwrap(), 0b1001);
assert_eq!(binary::<i128>.parse(new_input("0b1001_")).unwrap(), 0b1001);
assert!(binary::<i128>.parse(new_input("0b_10_01")).is_err());
assert_eq!(
binary.parse(new_input("0b10_01")).unwrap(),
KdlValue::Integer(0b1001)
(binary::<i128>, take(4usize))
.parse(new_input("0b12389"))
.unwrap(),
(1, "2389")
);
assert_eq!(
binary.parse(new_input("0b1001_")).unwrap(),
KdlValue::Integer(0b1001)
);
assert!(binary.parse(new_input("0b_10_01")).is_err());
assert_eq!(
(binary, take(4usize)).parse(new_input("0b12389")).unwrap(),
(KdlValue::Integer(1), "2389")
);
assert!(binary.parse(new_input("123")).is_err());
assert!(binary::<i128>.parse(new_input("123")).is_err());
}
fn sign(input: &mut Input<'_>) -> PResult<i128> {
fn signum(input: &mut Input<'_>) -> PResult<bool> {
let sign = opt(alt(('+', '-'))).parse_next(input)?;
let mult = if let Some(sign) = sign {
if sign == '+' {
1
true
} else {
-1
false
}
} else {
1
true
};
Ok(mult)
}
trait FromStrRadix {
fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError>
where
Self: Sized;
}
macro_rules! impl_from_str_radix {
($($t:ty),*) => {
$(
impl FromStrRadix for $t {
fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
<$t>::from_str_radix(s, radix)
}
}
)*
};
}
impl_from_str_radix!(i8, i16, i32, i64, i128, isize, u8, u16, u32, u64, u128, usize);
trait MaybeNegatable: CheckedMul {
fn negated(&self) -> Option<Self>;
}
macro_rules! impl_negatable_signed {
($($t:ty),*) => {
$(
impl MaybeNegatable for $t {
fn negated(&self) -> Option<Self> {
Some(self * -1)
}
}
)*
};
}
macro_rules! impl_negatable_unsigned {
($($t:ty),*) => {
$(
impl MaybeNegatable for $t {
fn negated(&self) -> Option<Self> {
None
}
}
)*
};
}
trait ParseFloat {
fn parse_float(input: &str) -> Result<Self, ParseFloatError>
where
Self: Sized;
}
impl ParseFloat for f32 {
fn parse_float(input: &str) -> Result<Self, ParseFloatError> {
input.parse()
}
}
impl ParseFloat for f64 {
fn parse_float(input: &str) -> Result<Self, ParseFloatError> {
input.parse()
}
}
impl_negatable_signed!(i8, i16, i32, i64, i128, isize);
impl_negatable_unsigned!(u8, u16, u32, u64, u128, usize);