diff --git a/README.md b/README.md index 9dee505..397beef 100644 --- a/README.md +++ b/README.md @@ -106,11 +106,11 @@ to jump in and give us your 2 cents! ## Grammar ``` -document := linespace* (node (newline document)? linespace*)? +nodes := linespace* (node (newline nodes)? linespace*)? node := identifier (node-space node-argument)* (node-space node-document)? single-line-comment? node-argument := prop | value -node-document := '{' document '}' +node-children := '{' nodes '}' node-space := ws* escline ws* | ws+ identifier := [a-zA-Z] [a-zA-Z0-9!#$%&'*+\-./:<>?@\^_|~]* | string diff --git a/src/error.rs b/src/error.rs index 47a539e..7fd6345 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,48 +1,74 @@ -use nom::error::{ContextError, ParseError}; - -use thiserror::Error; -#[derive(Debug, Clone, Eq, PartialEq, Error)] -#[error("Error parsing document. {kind}")] -pub struct Error { - pub input: String, - pub offset: usize, - pub kind: ErrorKind, -} - -#[derive(Debug, Clone, Eq, PartialEq, Error)] -pub enum ErrorKind { - #[error("Failed to parse {0} component of semver string.")] - Context(&'static str), - #[error("Incomplete input to semver parser.")] - IncompleteInput, - #[error("An unspecified error occurred.")] - Other, -} - -#[derive(Debug, Clone, Eq, PartialEq)] -pub(crate) struct NodeParseError { - pub(crate) input: I, - pub(crate) context: Option<&'static str>, - pub(crate) kind: Option, -} - -impl ParseError for NodeParseError { - fn from_error_kind(input: I, _kind: nom::error::ErrorKind) -> Self { - Self { - input, - context: None, - kind: None, - } - } - - fn append(_input: I, _kind: nom::error::ErrorKind, other: Self) -> Self { - other - } -} - -impl ContextError for NodeParseError { - fn add_context(_input: I, ctx: &'static str, mut other: Self) -> Self { - other.context = Some(ctx); - other - } -} +use std::num::{ParseFloatError, ParseIntError}; + +use nom::error::{ContextError, ErrorKind, FromExternalError, ParseError}; + +use thiserror::Error; +#[derive(Debug, Clone, Eq, PartialEq, Error)] +#[error("Error parsing document. {kind}")] +pub struct KdlError { + pub input: String, + pub offset: usize, + pub kind: KdlErrorKind, +} + +#[derive(Debug, Clone, Eq, PartialEq, Error)] +pub enum KdlErrorKind { + #[error(transparent)] + ParseIntError(ParseIntError), + #[error(transparent)] + ParseFloatError(ParseFloatError), + #[error("Failed to parse {0} component of semver string.")] + Context(&'static str), + #[error("Incomplete input to semver parser.")] + IncompleteInput, + #[error("An unspecified error occurred.")] + Other, +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub(crate) struct KdlParseError { + pub(crate) input: I, + pub(crate) context: Option<&'static str>, + pub(crate) kind: Option, +} + +impl ParseError for KdlParseError { + fn from_error_kind(input: I, _kind: nom::error::ErrorKind) -> Self { + Self { + input, + context: None, + kind: None, + } + } + + fn append(_input: I, _kind: nom::error::ErrorKind, other: Self) -> Self { + other + } +} + +impl ContextError for KdlParseError { + fn add_context(_input: I, ctx: &'static str, mut other: Self) -> Self { + other.context = Some(ctx); + other + } +} + +impl<'a> FromExternalError<&'a str, ParseIntError> for KdlParseError<&'a str> { + fn from_external_error(input: &'a str, _kind: ErrorKind, e: ParseIntError) -> Self { + KdlParseError { + input, + context: None, + kind: Some(KdlErrorKind::ParseIntError(e)), + } + } +} + +impl<'a> FromExternalError<&'a str, ParseFloatError> for KdlParseError<&'a str> { + fn from_external_error(input: &'a str, _kind: ErrorKind, e: ParseFloatError) -> Self { + KdlParseError { + input, + context: None, + kind: Some(KdlErrorKind::ParseFloatError(e)), + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 30d1d30..a4e9757 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,14 +1,14 @@ use nom::combinator::all_consuming; use nom::Err; -pub use crate::error::{Error, ErrorKind}; +pub use crate::error::{KdlError, KdlErrorKind}; pub use crate::node::Node; mod error; mod node; mod parser; -pub fn parse_document(input: I) -> Result, Error> +pub fn parse_document(input: I) -> Result, KdlError> where I: AsRef, { @@ -16,21 +16,21 @@ where match all_consuming(parser::nodes)(input) { Ok((_, arg)) => Ok(arg), Err(err) => Err(match err { - Err::Error(e) | Err::Failure(e) => Error { + Err::Error(e) | Err::Failure(e) => KdlError { input: input.into(), offset: e.input.as_ptr() as usize - input.as_ptr() as usize, kind: if let Some(kind) = e.kind { kind } else if let Some(ctx) = e.context { - ErrorKind::Context(ctx) + KdlErrorKind::Context(ctx) } else { - ErrorKind::Other + KdlErrorKind::Other }, }, - Err::Incomplete(_) => Error { + Err::Incomplete(_) => KdlError { input: input.into(), offset: input.len() - 1, - kind: ErrorKind::IncompleteInput, + kind: KdlErrorKind::IncompleteInput, }, }), } diff --git a/src/parser.rs b/src/parser.rs index 2173976..280b149 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,18 +3,17 @@ use std::collections::HashMap; use nom::branch::alt; use nom::bytes::complete::{is_not, tag, take_until}; use nom::character::complete::{alpha1, alphanumeric1, char, one_of}; -use nom::combinator::{eof, map, opt, recognize, value}; +use nom::combinator::{eof, map, map_res, opt, recognize, value}; use nom::multi::{many0, many1}; -use nom::sequence::{delimited, pair, preceded}; +use nom::sequence::{delimited, pair, preceded, terminated, tuple}; use nom::IResult; -use crate::error::NodeParseError; +use crate::error::KdlParseError; use crate::node::{Node, NodeValue}; -/// `document := linespace* (node (newline document)?)?` -pub(crate) fn nodes(input: &str) -> IResult<&str, Vec, NodeParseError<&str>> { - // TODO: this is wrong - many0(node)(input) +/// `nodes := linespace* (node (newline document)?)?` +pub(crate) fn nodes(input: &str) -> IResult<&str, Vec, KdlParseError<&str>> { + many0(delimited(many0(linespace), node, newline))(input) } #[derive(Clone)] @@ -24,7 +23,7 @@ enum NodeArg<'a> { } /// `node := identifier (node-space node-argument)* (node-space node-document)?` -pub(crate) fn node(input: &str) -> IResult<&str, Node, NodeParseError<&str>> { +pub(crate) fn node(input: &str) -> IResult<&str, Node, KdlParseError<&str>> { let (input, tag) = identifier(input)?; let (input, args) = many0(preceded(node_space, node_arg))(input)?; let (input, children) = opt(preceded(node_space, node_children))(input)?; @@ -57,7 +56,7 @@ pub(crate) fn node(input: &str) -> IResult<&str, Node, NodeParseError<&str>> { } /// `identifier := [a-zA-Z_] [a-zA-Z0-9!#$%&'*+\-./:<>?@\^_|~]* | string` -fn identifier(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { +fn identifier(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { alt(( recognize(pair( alt((alpha1, tag("_"))), @@ -70,7 +69,7 @@ fn identifier(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { ))(input) } -fn node_arg(input: &str) -> IResult<&str, NodeArg, NodeParseError<&str>> { +fn node_arg(input: &str) -> IResult<&str, NodeArg, KdlParseError<&str>> { alt(( map(property, |(key, val)| NodeArg::Property(key, val)), map(node_value, NodeArg::Value), @@ -78,7 +77,7 @@ fn node_arg(input: &str) -> IResult<&str, NodeArg, NodeParseError<&str>> { } /// `prop := identifier '=' value` -fn property(input: &str) -> IResult<&str, (&str, NodeValue), NodeParseError<&str>> { +fn property(input: &str) -> IResult<&str, (&str, NodeValue), KdlParseError<&str>> { let (input, key) = identifier(input)?; let (input, _) = tag("=")(input)?; let (input, val) = node_value(input)?; @@ -86,7 +85,7 @@ fn property(input: &str) -> IResult<&str, (&str, NodeValue), NodeParseError<&str } /// `value := string | raw_string | number | boolean | 'null'` -fn node_value(input: &str) -> IResult<&str, NodeValue, NodeParseError<&str>> { +fn node_value(input: &str) -> IResult<&str, NodeValue, KdlParseError<&str>> { alt(( map(string, |s| NodeValue::String(s.into())), map(raw_string, |s| NodeValue::String(s.into())), @@ -96,21 +95,21 @@ fn node_value(input: &str) -> IResult<&str, NodeValue, NodeParseError<&str>> { ))(input) } -fn node_children(_input: &str) -> IResult<&str, Vec, NodeParseError<&str>> { - todo!() +/// `node-children := '{' nodes '}'` +fn node_children(input: &str) -> IResult<&str, Vec, KdlParseError<&str>> { + delimited(tag("{"), nodes, tag("}"))(input) } // TODO: This should be much more specific about what escapes are allowed. /// `string := '"' ('\\' ["\\] | [^"])* '"'` -fn string(_input: &str) -> IResult<&str, &str, NodeParseError<&str>> { +fn string(_input: &str) -> IResult<&str, &str, KdlParseError<&str>> { todo!() } -// TODO: this is clever but... I don't like the recursion here. /// `raw-string := 'r' raw-string-hash` /// `raw-string-hash := '#' raw-string-hash '#' | raw-string-quotes` /// `raw-string-quotes := '"' .* '"'` -fn raw_string(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { +fn raw_string(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { let (input, _) = char('r')(input)?; let (input, hashes) = recognize(many0(char('#')))(input)?; let (input, _) = char('"')(input)?; @@ -121,12 +120,101 @@ fn raw_string(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { } /// `number := decimal | hex | octal | binary` -fn number(_input: &str) -> IResult<&str, NodeValue, NodeParseError<&str>> { - todo!() +fn number(input: &str) -> IResult<&str, NodeValue, KdlParseError<&str>> { + alt(( + map(integer, NodeValue::Int), + map(hexadecimal, NodeValue::Int), + map(octal, NodeValue::Int), + map(binary, NodeValue::Int), + map(float, NodeValue::Float), + ))(input) +} + +/// ```ignore +/// decimal := integer ('.' [0-9]+)? exponent? +/// exponent := ('e' | 'E') integer +/// integer := sign? [1-9] [0-9_]* +/// sign := '+' | '-' +/// ``` +fn float(input: &str) -> IResult<&str, f64, KdlParseError<&str>> { + map_res( + alt(( + recognize(tuple(( + integer, + opt(preceded(char('.'), integer)), + one_of("eE"), + opt(one_of("+-")), + integer, + ))), + recognize(tuple((integer, char('.'), integer))), + )), + |x| str::replace(x, "_", "").parse::(), + )(input) +} + +/// ```ignore +/// decimal := integer ('.' [0-9]+)? exponent? +/// exponent := ('e' | 'E') integer +/// integer := sign? [1-9] [0-9_]* +/// sign := '+' | '-' +/// ``` +fn integer(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { + let (input, sign) = opt(alt((char('+'), char('-'))))(input)?; + let mult = if let Some(sign) = sign { + if sign == '+' { + 1 + } else { + -1 + } + } else { + 1 + }; + map_res( + recognize(many1(terminated(one_of("0123456789"), many0(char('_'))))), + move |out: &str| { + i64::from_str_radix(&str::replace(&out, "_", ""), 10).map(move |x| x * mult) + }, + )(input) +} + +/// `hex := '0x' [0-9a-fA-F] [0-9a-fA-F_]*` +fn hexadecimal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { + map_res( + preceded( + alt((tag("0x"), tag("0X"))), + recognize(many1(terminated( + one_of("0123456789abcdefABCDEF"), + many0(char('_')), + ))), + ), + move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 16), + )(input) +} + +/// `octal := '0o' [0-7] [0-7_]*` +fn octal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { + map_res( + preceded( + alt((tag("0o"), tag("0O"))), + recognize(many1(terminated(one_of("01234567"), many0(char('_'))))), + ), + move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 8), + )(input) +} + +/// `binary := '0b' ('0' | '1') ('0' | '1' | '_')*` +fn binary(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { + map_res( + preceded( + alt((tag("0b"), tag("0B"))), + recognize(many1(terminated(one_of("01"), many0(char('_'))))), + ), + move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 2), + )(input) } /// `boolean := 'true' | 'false'` -fn boolean(input: &str) -> IResult<&str, NodeValue, NodeParseError<&str>> { +fn boolean(input: &str) -> IResult<&str, NodeValue, KdlParseError<&str>> { alt(( value(NodeValue::Boolean(true), tag("true")), value(NodeValue::Boolean(false), tag("false")), @@ -134,7 +222,7 @@ fn boolean(input: &str) -> IResult<&str, NodeValue, NodeParseError<&str>> { } /// `node-space := ws* escline ws* | ws+` -fn node_space(input: &str) -> IResult<&str, (), NodeParseError<&str>> { +fn node_space(input: &str) -> IResult<&str, (), KdlParseError<&str>> { alt(( delimited(many0(whitespace), escline, many0(whitespace)), map(many1(whitespace), |_| ()), @@ -142,45 +230,124 @@ fn node_space(input: &str) -> IResult<&str, (), NodeParseError<&str>> { } /// `single-line-comment := '//' ('\r' [^\n] | [^\r\n])* (newline | eof)` -fn single_line_comment(input: &str) -> IResult<&str, (), NodeParseError<&str>> { +fn single_line_comment(input: &str) -> IResult<&str, (), KdlParseError<&str>> { let (input, _) = tag("//")(input)?; let (input, _) = alt((take_until("\r\n"), is_not("\n")))(input)?; - let (input, _) = alt((newline, eof))(input)?; + let (input, _) = alt((newline, value((), eof)))(input)?; Ok((input, ())) } /// `multi-line-comment := '/*' ('*' [^\/] | [^*])* '*/'` -fn multi_line_comment(input: &str) -> IResult<&str, (), NodeParseError<&str>> { +fn multi_line_comment(input: &str) -> IResult<&str, (), KdlParseError<&str>> { delimited(tag("/*"), value((), take_until("*/")), tag("*/"))(input) } /// `escline := '\\' ws* (single-line-comment | newline)` -fn escline(input: &str) -> IResult<&str, (), NodeParseError<&str>> { +fn escline(input: &str) -> IResult<&str, (), KdlParseError<&str>> { let (input, _) = tag("\\")(input)?; let (input, _) = many0(whitespace)(input)?; - let (input, _) = alt((recognize(single_line_comment), newline))(input)?; + let (input, _) = alt((single_line_comment, newline))(input)?; Ok((input, ())) } +/// `linespace := newline | ws | single-line-comment` +fn linespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> { + value((), alt((newline, whitespace, single_line_comment)))(input) +} + /// `ws := bom | ' ' | '\t' | multi-line-comment` -fn whitespace(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { +fn whitespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> { // TODO: bom? - alt(( - /*bom,*/ tag(" "), - tag("\t"), - recognize(multi_line_comment), - ))(input) + value( + (), + alt(( + /*bom,*/ tag(" "), + tag("\t"), + recognize(multi_line_comment), + )), + )(input) } /// `newline := ('\r' '\n') | '\n'` -fn newline(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { - alt((tag("\r\n"), tag("\n")))(input) +fn newline(input: &str) -> IResult<&str, (), KdlParseError<&str>> { + value((), alt((tag("\r\n"), tag("\n"))))(input) } #[cfg(test)] mod tests { use super::*; + #[test] + fn test_float() { + assert_eq!(float("1.0"), Ok(("", 1.0f64))); + assert_eq!(float("0.0"), Ok(("", 0.0f64))); + assert_eq!(float("-1.0"), Ok(("", -1.0f64))); + assert_eq!(float("+1.0"), Ok(("", 1.0f64))); + assert_eq!(float("1.0e10"), Ok(("", 1.0e10f64))); + assert_eq!(float("1.0e-10"), Ok(("", 1.0e-10f64))); + assert_eq!(float("-1.0e-10"), Ok(("", -1.0e-10f64))); + assert_eq!(float("123_456_789.0"), Ok(("", 123456789.0f64))); + assert_eq!(float("123_456_789.0_"), Ok(("", 123456789.0f64))); + assert!(float("?1.0").is_err()); + assert!(float("_1.0").is_err()); + assert!(float("1._0").is_err()); + assert!(float("1.").is_err()); + assert!(float(".0").is_err()); + } + + #[test] + fn test_integer() { + assert_eq!(integer("0123456789"), Ok(("", 123456789))); + assert_eq!(integer("0123_456_789"), Ok(("", 123456789))); + assert_eq!(integer("0123_456_789_"), Ok(("", 123456789))); + assert_eq!(integer("+0123456789"), Ok(("", 123456789))); + assert_eq!(integer("-0123456789"), Ok(("", -123456789))); + assert!(integer("?0123456789").is_err()); + assert!(integer("_0123456789").is_err()); + assert!(integer("a").is_err()); + assert!(integer("--").is_err()); + } + + #[test] + fn test_hexadecimal() { + assert_eq!( + hexadecimal("0x0123456789abcdef"), + Ok(("", 0x0123456789abcdef)) + ); + assert_eq!( + hexadecimal("0x01234567_89abcdef"), + Ok(("", 0x0123456789abcdef)) + ); + assert_eq!( + hexadecimal("0x01234567_89abcdef_"), + Ok(("", 0x0123456789abcdef)) + ); + assert!(hexadecimal("0x_123").is_err()); + assert!(hexadecimal("0xg").is_err()); + assert!(hexadecimal("0xx").is_err()); + } + + #[test] + fn test_octal() { + assert_eq!(octal("0o01234567"), Ok(("", 0o01234567))); + assert_eq!(octal("0o0123_4567"), Ok(("", 0o01234567))); + assert_eq!(octal("0o01234567_"), Ok(("", 0o01234567))); + assert!(octal("0o_123").is_err()); + assert!(octal("0o8").is_err()); + assert!(octal("0oo").is_err()); + } + + #[test] + fn test_binary() { + assert_eq!(binary("0b0101"), Ok(("", 0b0101))); + assert_eq!(binary("0b01_10"), Ok(("", 0b0110))); + assert_eq!(binary("0b01___10"), Ok(("", 0b0110))); + assert_eq!(binary("0b0110_"), Ok(("", 0b0110))); + assert!(binary("0b_0110").is_err()); + assert!(binary("0b20").is_err()); + assert!(binary("0bb").is_err()); + } + #[test] fn test_raw_string() { assert_eq!(raw_string(r#"r"foo""#), Ok(("", "foo"))); @@ -237,20 +404,17 @@ mod tests { #[test] fn test_whitespace() { - assert_eq!(whitespace(" "), Ok(("", " "))); - assert_eq!(whitespace("\t"), Ok(("", "\t"))); - assert_eq!( - whitespace("/* \nfoo\r\n */ etc"), - Ok((" etc", "/* \nfoo\r\n */")) - ); + assert_eq!(whitespace(" "), Ok(("", ()))); + assert_eq!(whitespace("\t"), Ok(("", ()))); + assert_eq!(whitespace("/* \nfoo\r\n */ etc"), Ok((" etc", ()))); assert!(whitespace("hi").is_err()) } #[test] fn test_newline() { - assert_eq!(newline("\n"), Ok(("", "\n"))); - assert_eq!(newline("\r\n"), Ok(("", "\r\n"))); - assert_eq!(newline("\n\n"), Ok(("\n", "\n"))); + assert_eq!(newline("\n"), Ok(("", ()))); + assert_eq!(newline("\r\n"), Ok(("", ()))); + assert_eq!(newline("\n\n"), Ok(("\n", ()))); assert!(newline("\r").is_err()); assert!(newline("blah").is_err()); }