This commit is contained in:
Kat Marchán 2020-12-13 16:47:21 -08:00
parent 54299642da
commit 842d1da3b9
No known key found for this signature in database
GPG Key ID: AEB529C08A3C7E9E
4 changed files with 290 additions and 100 deletions

View File

@ -106,11 +106,11 @@ to jump in and give us your 2 cents!
## Grammar
```
document := linespace* (node (newline document)? linespace*)?
nodes := linespace* (node (newline nodes)? linespace*)?
node := identifier (node-space node-argument)* (node-space node-document)? single-line-comment?
node-argument := prop | value
node-document := '{' document '}'
node-children := '{' nodes '}'
node-space := ws* escline ws* | ws+
identifier := [a-zA-Z] [a-zA-Z0-9!#$%&'*+\-./:<>?@\^_|~]* | string

View File

@ -1,48 +1,74 @@
use nom::error::{ContextError, ParseError};
use thiserror::Error;
#[derive(Debug, Clone, Eq, PartialEq, Error)]
#[error("Error parsing document. {kind}")]
pub struct Error {
pub input: String,
pub offset: usize,
pub kind: ErrorKind,
}
#[derive(Debug, Clone, Eq, PartialEq, Error)]
pub enum ErrorKind {
#[error("Failed to parse {0} component of semver string.")]
Context(&'static str),
#[error("Incomplete input to semver parser.")]
IncompleteInput,
#[error("An unspecified error occurred.")]
Other,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub(crate) struct NodeParseError<I> {
pub(crate) input: I,
pub(crate) context: Option<&'static str>,
pub(crate) kind: Option<ErrorKind>,
}
impl<I> ParseError<I> for NodeParseError<I> {
fn from_error_kind(input: I, _kind: nom::error::ErrorKind) -> Self {
Self {
input,
context: None,
kind: None,
}
}
fn append(_input: I, _kind: nom::error::ErrorKind, other: Self) -> Self {
other
}
}
impl<I> ContextError<I> for NodeParseError<I> {
fn add_context(_input: I, ctx: &'static str, mut other: Self) -> Self {
other.context = Some(ctx);
other
}
}
use std::num::{ParseFloatError, ParseIntError};
use nom::error::{ContextError, ErrorKind, FromExternalError, ParseError};
use thiserror::Error;
#[derive(Debug, Clone, Eq, PartialEq, Error)]
#[error("Error parsing document. {kind}")]
pub struct KdlError {
pub input: String,
pub offset: usize,
pub kind: KdlErrorKind,
}
#[derive(Debug, Clone, Eq, PartialEq, Error)]
pub enum KdlErrorKind {
#[error(transparent)]
ParseIntError(ParseIntError),
#[error(transparent)]
ParseFloatError(ParseFloatError),
#[error("Failed to parse {0} component of semver string.")]
Context(&'static str),
#[error("Incomplete input to semver parser.")]
IncompleteInput,
#[error("An unspecified error occurred.")]
Other,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub(crate) struct KdlParseError<I> {
pub(crate) input: I,
pub(crate) context: Option<&'static str>,
pub(crate) kind: Option<KdlErrorKind>,
}
impl<I> ParseError<I> for KdlParseError<I> {
fn from_error_kind(input: I, _kind: nom::error::ErrorKind) -> Self {
Self {
input,
context: None,
kind: None,
}
}
fn append(_input: I, _kind: nom::error::ErrorKind, other: Self) -> Self {
other
}
}
impl<I> ContextError<I> for KdlParseError<I> {
fn add_context(_input: I, ctx: &'static str, mut other: Self) -> Self {
other.context = Some(ctx);
other
}
}
impl<'a> FromExternalError<&'a str, ParseIntError> for KdlParseError<&'a str> {
fn from_external_error(input: &'a str, _kind: ErrorKind, e: ParseIntError) -> Self {
KdlParseError {
input,
context: None,
kind: Some(KdlErrorKind::ParseIntError(e)),
}
}
}
impl<'a> FromExternalError<&'a str, ParseFloatError> for KdlParseError<&'a str> {
fn from_external_error(input: &'a str, _kind: ErrorKind, e: ParseFloatError) -> Self {
KdlParseError {
input,
context: None,
kind: Some(KdlErrorKind::ParseFloatError(e)),
}
}
}

View File

@ -1,14 +1,14 @@
use nom::combinator::all_consuming;
use nom::Err;
pub use crate::error::{Error, ErrorKind};
pub use crate::error::{KdlError, KdlErrorKind};
pub use crate::node::Node;
mod error;
mod node;
mod parser;
pub fn parse_document<I>(input: I) -> Result<Vec<Node>, Error>
pub fn parse_document<I>(input: I) -> Result<Vec<Node>, KdlError>
where
I: AsRef<str>,
{
@ -16,21 +16,21 @@ where
match all_consuming(parser::nodes)(input) {
Ok((_, arg)) => Ok(arg),
Err(err) => Err(match err {
Err::Error(e) | Err::Failure(e) => Error {
Err::Error(e) | Err::Failure(e) => KdlError {
input: input.into(),
offset: e.input.as_ptr() as usize - input.as_ptr() as usize,
kind: if let Some(kind) = e.kind {
kind
} else if let Some(ctx) = e.context {
ErrorKind::Context(ctx)
KdlErrorKind::Context(ctx)
} else {
ErrorKind::Other
KdlErrorKind::Other
},
},
Err::Incomplete(_) => Error {
Err::Incomplete(_) => KdlError {
input: input.into(),
offset: input.len() - 1,
kind: ErrorKind::IncompleteInput,
kind: KdlErrorKind::IncompleteInput,
},
}),
}

View File

@ -3,18 +3,17 @@ use std::collections::HashMap;
use nom::branch::alt;
use nom::bytes::complete::{is_not, tag, take_until};
use nom::character::complete::{alpha1, alphanumeric1, char, one_of};
use nom::combinator::{eof, map, opt, recognize, value};
use nom::combinator::{eof, map, map_res, opt, recognize, value};
use nom::multi::{many0, many1};
use nom::sequence::{delimited, pair, preceded};
use nom::sequence::{delimited, pair, preceded, terminated, tuple};
use nom::IResult;
use crate::error::NodeParseError;
use crate::error::KdlParseError;
use crate::node::{Node, NodeValue};
/// `document := linespace* (node (newline document)?)?`
pub(crate) fn nodes(input: &str) -> IResult<&str, Vec<Node>, NodeParseError<&str>> {
// TODO: this is wrong
many0(node)(input)
/// `nodes := linespace* (node (newline document)?)?`
pub(crate) fn nodes(input: &str) -> IResult<&str, Vec<Node>, KdlParseError<&str>> {
many0(delimited(many0(linespace), node, newline))(input)
}
#[derive(Clone)]
@ -24,7 +23,7 @@ enum NodeArg<'a> {
}
/// `node := identifier (node-space node-argument)* (node-space node-document)?`
pub(crate) fn node(input: &str) -> IResult<&str, Node, NodeParseError<&str>> {
pub(crate) fn node(input: &str) -> IResult<&str, Node, KdlParseError<&str>> {
let (input, tag) = identifier(input)?;
let (input, args) = many0(preceded(node_space, node_arg))(input)?;
let (input, children) = opt(preceded(node_space, node_children))(input)?;
@ -57,7 +56,7 @@ pub(crate) fn node(input: &str) -> IResult<&str, Node, NodeParseError<&str>> {
}
/// `identifier := [a-zA-Z_] [a-zA-Z0-9!#$%&'*+\-./:<>?@\^_|~]* | string`
fn identifier(input: &str) -> IResult<&str, &str, NodeParseError<&str>> {
fn identifier(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
alt((
recognize(pair(
alt((alpha1, tag("_"))),
@ -70,7 +69,7 @@ fn identifier(input: &str) -> IResult<&str, &str, NodeParseError<&str>> {
))(input)
}
fn node_arg(input: &str) -> IResult<&str, NodeArg, NodeParseError<&str>> {
fn node_arg(input: &str) -> IResult<&str, NodeArg, KdlParseError<&str>> {
alt((
map(property, |(key, val)| NodeArg::Property(key, val)),
map(node_value, NodeArg::Value),
@ -78,7 +77,7 @@ fn node_arg(input: &str) -> IResult<&str, NodeArg, NodeParseError<&str>> {
}
/// `prop := identifier '=' value`
fn property(input: &str) -> IResult<&str, (&str, NodeValue), NodeParseError<&str>> {
fn property(input: &str) -> IResult<&str, (&str, NodeValue), KdlParseError<&str>> {
let (input, key) = identifier(input)?;
let (input, _) = tag("=")(input)?;
let (input, val) = node_value(input)?;
@ -86,7 +85,7 @@ fn property(input: &str) -> IResult<&str, (&str, NodeValue), NodeParseError<&str
}
/// `value := string | raw_string | number | boolean | 'null'`
fn node_value(input: &str) -> IResult<&str, NodeValue, NodeParseError<&str>> {
fn node_value(input: &str) -> IResult<&str, NodeValue, KdlParseError<&str>> {
alt((
map(string, |s| NodeValue::String(s.into())),
map(raw_string, |s| NodeValue::String(s.into())),
@ -96,21 +95,21 @@ fn node_value(input: &str) -> IResult<&str, NodeValue, NodeParseError<&str>> {
))(input)
}
fn node_children(_input: &str) -> IResult<&str, Vec<Node>, NodeParseError<&str>> {
todo!()
/// `node-children := '{' nodes '}'`
fn node_children(input: &str) -> IResult<&str, Vec<Node>, KdlParseError<&str>> {
delimited(tag("{"), nodes, tag("}"))(input)
}
// TODO: This should be much more specific about what escapes are allowed.
/// `string := '"' ('\\' ["\\] | [^"])* '"'`
fn string(_input: &str) -> IResult<&str, &str, NodeParseError<&str>> {
fn string(_input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
todo!()
}
// TODO: this is clever but... I don't like the recursion here.
/// `raw-string := 'r' raw-string-hash`
/// `raw-string-hash := '#' raw-string-hash '#' | raw-string-quotes`
/// `raw-string-quotes := '"' .* '"'`
fn raw_string(input: &str) -> IResult<&str, &str, NodeParseError<&str>> {
fn raw_string(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
let (input, _) = char('r')(input)?;
let (input, hashes) = recognize(many0(char('#')))(input)?;
let (input, _) = char('"')(input)?;
@ -121,12 +120,101 @@ fn raw_string(input: &str) -> IResult<&str, &str, NodeParseError<&str>> {
}
/// `number := decimal | hex | octal | binary`
fn number(_input: &str) -> IResult<&str, NodeValue, NodeParseError<&str>> {
todo!()
fn number(input: &str) -> IResult<&str, NodeValue, KdlParseError<&str>> {
alt((
map(integer, NodeValue::Int),
map(hexadecimal, NodeValue::Int),
map(octal, NodeValue::Int),
map(binary, NodeValue::Int),
map(float, NodeValue::Float),
))(input)
}
/// ```ignore
/// decimal := integer ('.' [0-9]+)? exponent?
/// exponent := ('e' | 'E') integer
/// integer := sign? [1-9] [0-9_]*
/// sign := '+' | '-'
/// ```
fn float(input: &str) -> IResult<&str, f64, KdlParseError<&str>> {
map_res(
alt((
recognize(tuple((
integer,
opt(preceded(char('.'), integer)),
one_of("eE"),
opt(one_of("+-")),
integer,
))),
recognize(tuple((integer, char('.'), integer))),
)),
|x| str::replace(x, "_", "").parse::<f64>(),
)(input)
}
/// ```ignore
/// decimal := integer ('.' [0-9]+)? exponent?
/// exponent := ('e' | 'E') integer
/// integer := sign? [1-9] [0-9_]*
/// sign := '+' | '-'
/// ```
fn integer(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
let (input, sign) = opt(alt((char('+'), char('-'))))(input)?;
let mult = if let Some(sign) = sign {
if sign == '+' {
1
} else {
-1
}
} else {
1
};
map_res(
recognize(many1(terminated(one_of("0123456789"), many0(char('_'))))),
move |out: &str| {
i64::from_str_radix(&str::replace(&out, "_", ""), 10).map(move |x| x * mult)
},
)(input)
}
/// `hex := '0x' [0-9a-fA-F] [0-9a-fA-F_]*`
fn hexadecimal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
map_res(
preceded(
alt((tag("0x"), tag("0X"))),
recognize(many1(terminated(
one_of("0123456789abcdefABCDEF"),
many0(char('_')),
))),
),
move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 16),
)(input)
}
/// `octal := '0o' [0-7] [0-7_]*`
fn octal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
map_res(
preceded(
alt((tag("0o"), tag("0O"))),
recognize(many1(terminated(one_of("01234567"), many0(char('_'))))),
),
move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 8),
)(input)
}
/// `binary := '0b' ('0' | '1') ('0' | '1' | '_')*`
fn binary(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
map_res(
preceded(
alt((tag("0b"), tag("0B"))),
recognize(many1(terminated(one_of("01"), many0(char('_'))))),
),
move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 2),
)(input)
}
/// `boolean := 'true' | 'false'`
fn boolean(input: &str) -> IResult<&str, NodeValue, NodeParseError<&str>> {
fn boolean(input: &str) -> IResult<&str, NodeValue, KdlParseError<&str>> {
alt((
value(NodeValue::Boolean(true), tag("true")),
value(NodeValue::Boolean(false), tag("false")),
@ -134,7 +222,7 @@ fn boolean(input: &str) -> IResult<&str, NodeValue, NodeParseError<&str>> {
}
/// `node-space := ws* escline ws* | ws+`
fn node_space(input: &str) -> IResult<&str, (), NodeParseError<&str>> {
fn node_space(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
alt((
delimited(many0(whitespace), escline, many0(whitespace)),
map(many1(whitespace), |_| ()),
@ -142,45 +230,124 @@ fn node_space(input: &str) -> IResult<&str, (), NodeParseError<&str>> {
}
/// `single-line-comment := '//' ('\r' [^\n] | [^\r\n])* (newline | eof)`
fn single_line_comment(input: &str) -> IResult<&str, (), NodeParseError<&str>> {
fn single_line_comment(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
let (input, _) = tag("//")(input)?;
let (input, _) = alt((take_until("\r\n"), is_not("\n")))(input)?;
let (input, _) = alt((newline, eof))(input)?;
let (input, _) = alt((newline, value((), eof)))(input)?;
Ok((input, ()))
}
/// `multi-line-comment := '/*' ('*' [^\/] | [^*])* '*/'`
fn multi_line_comment(input: &str) -> IResult<&str, (), NodeParseError<&str>> {
fn multi_line_comment(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
delimited(tag("/*"), value((), take_until("*/")), tag("*/"))(input)
}
/// `escline := '\\' ws* (single-line-comment | newline)`
fn escline(input: &str) -> IResult<&str, (), NodeParseError<&str>> {
fn escline(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
let (input, _) = tag("\\")(input)?;
let (input, _) = many0(whitespace)(input)?;
let (input, _) = alt((recognize(single_line_comment), newline))(input)?;
let (input, _) = alt((single_line_comment, newline))(input)?;
Ok((input, ()))
}
/// `linespace := newline | ws | single-line-comment`
fn linespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
value((), alt((newline, whitespace, single_line_comment)))(input)
}
/// `ws := bom | ' ' | '\t' | multi-line-comment`
fn whitespace(input: &str) -> IResult<&str, &str, NodeParseError<&str>> {
fn whitespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
// TODO: bom?
alt((
/*bom,*/ tag(" "),
tag("\t"),
recognize(multi_line_comment),
))(input)
value(
(),
alt((
/*bom,*/ tag(" "),
tag("\t"),
recognize(multi_line_comment),
)),
)(input)
}
/// `newline := ('\r' '\n') | '\n'`
fn newline(input: &str) -> IResult<&str, &str, NodeParseError<&str>> {
alt((tag("\r\n"), tag("\n")))(input)
fn newline(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
value((), alt((tag("\r\n"), tag("\n"))))(input)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_float() {
assert_eq!(float("1.0"), Ok(("", 1.0f64)));
assert_eq!(float("0.0"), Ok(("", 0.0f64)));
assert_eq!(float("-1.0"), Ok(("", -1.0f64)));
assert_eq!(float("+1.0"), Ok(("", 1.0f64)));
assert_eq!(float("1.0e10"), Ok(("", 1.0e10f64)));
assert_eq!(float("1.0e-10"), Ok(("", 1.0e-10f64)));
assert_eq!(float("-1.0e-10"), Ok(("", -1.0e-10f64)));
assert_eq!(float("123_456_789.0"), Ok(("", 123456789.0f64)));
assert_eq!(float("123_456_789.0_"), Ok(("", 123456789.0f64)));
assert!(float("?1.0").is_err());
assert!(float("_1.0").is_err());
assert!(float("1._0").is_err());
assert!(float("1.").is_err());
assert!(float(".0").is_err());
}
#[test]
fn test_integer() {
assert_eq!(integer("0123456789"), Ok(("", 123456789)));
assert_eq!(integer("0123_456_789"), Ok(("", 123456789)));
assert_eq!(integer("0123_456_789_"), Ok(("", 123456789)));
assert_eq!(integer("+0123456789"), Ok(("", 123456789)));
assert_eq!(integer("-0123456789"), Ok(("", -123456789)));
assert!(integer("?0123456789").is_err());
assert!(integer("_0123456789").is_err());
assert!(integer("a").is_err());
assert!(integer("--").is_err());
}
#[test]
fn test_hexadecimal() {
assert_eq!(
hexadecimal("0x0123456789abcdef"),
Ok(("", 0x0123456789abcdef))
);
assert_eq!(
hexadecimal("0x01234567_89abcdef"),
Ok(("", 0x0123456789abcdef))
);
assert_eq!(
hexadecimal("0x01234567_89abcdef_"),
Ok(("", 0x0123456789abcdef))
);
assert!(hexadecimal("0x_123").is_err());
assert!(hexadecimal("0xg").is_err());
assert!(hexadecimal("0xx").is_err());
}
#[test]
fn test_octal() {
assert_eq!(octal("0o01234567"), Ok(("", 0o01234567)));
assert_eq!(octal("0o0123_4567"), Ok(("", 0o01234567)));
assert_eq!(octal("0o01234567_"), Ok(("", 0o01234567)));
assert!(octal("0o_123").is_err());
assert!(octal("0o8").is_err());
assert!(octal("0oo").is_err());
}
#[test]
fn test_binary() {
assert_eq!(binary("0b0101"), Ok(("", 0b0101)));
assert_eq!(binary("0b01_10"), Ok(("", 0b0110)));
assert_eq!(binary("0b01___10"), Ok(("", 0b0110)));
assert_eq!(binary("0b0110_"), Ok(("", 0b0110)));
assert!(binary("0b_0110").is_err());
assert!(binary("0b20").is_err());
assert!(binary("0bb").is_err());
}
#[test]
fn test_raw_string() {
assert_eq!(raw_string(r#"r"foo""#), Ok(("", "foo")));
@ -237,20 +404,17 @@ mod tests {
#[test]
fn test_whitespace() {
assert_eq!(whitespace(" "), Ok(("", " ")));
assert_eq!(whitespace("\t"), Ok(("", "\t")));
assert_eq!(
whitespace("/* \nfoo\r\n */ etc"),
Ok((" etc", "/* \nfoo\r\n */"))
);
assert_eq!(whitespace(" "), Ok(("", ())));
assert_eq!(whitespace("\t"), Ok(("", ())));
assert_eq!(whitespace("/* \nfoo\r\n */ etc"), Ok((" etc", ())));
assert!(whitespace("hi").is_err())
}
#[test]
fn test_newline() {
assert_eq!(newline("\n"), Ok(("", "\n")));
assert_eq!(newline("\r\n"), Ok(("", "\r\n")));
assert_eq!(newline("\n\n"), Ok(("\n", "\n")));
assert_eq!(newline("\n"), Ok(("", ())));
assert_eq!(newline("\r\n"), Ok(("", ())));
assert_eq!(newline("\n\n"), Ok(("\n", ())));
assert!(newline("\r").is_err());
assert!(newline("blah").is_err());
}