diff --git a/README.md b/README.md index 8f66807..7adf80a 100644 --- a/README.md +++ b/README.md @@ -1,157 +1,157 @@ -# kdl - Kat's Document Language - -kdl is a document language, mostly based on [SDLang](https://sdlang.org), with -xml-like semantics that looks like you're invoking a bunch of CLI commands! - -It's meant to be used both as a serialization format and a configuration -language, and is relatively light on syntax compared to XML. - -## Intro - -The basic syntax is similar to SDLang: - -```kdl -// This is a node with a single string value -title "Hello, World" - -// Multiple values are supported, too -bookmarks 12 15 188 1234 - -// Nodes can have properties -author "Alex Monad" email="alex@example.com" active=true - -// Nodes can be arbitrarily nested -contents { - section "First section" { - paragraph "This is the first paragraph" - paragraph "This is the second paragraph" - } -} - -// Nodes can be separated into multiple lines -title \ - "Some title" - -// Comment formats: - -// C++ style - -/* -C style multiline -*/ - -tag /*foo=true*/ bar=false -``` - -But kdl changes a few details: - -```kdl -// Files must be utf8 encoded! -smile "😁" - -// Instead of anonymous nodes, nodes and properties can be wrapped -// in "" for arbitrary node names. -"!@#$@$%Q#$%~@!40" "1.2.3" "!!!!!"=true - -// The following is a legal bare identifier: -foo123~!@#$%^&*.:'|<>/?+ "weeee" - -// kdl specifically allows properties and values to be -// interspersed with each other, much like CLI commands. -foo bar=true "baz" quux=false 1 2 3 - -// strings can be multiline as-is, without a different syntax. -string "my -multiline -value" - -// raw/unescaped strings use the "r" prefix on string literals and -// otherwise behave the same, including multiline support. -raw r"C:\Users\kdl" - -// You can add any number of # after the r and the last " to -// disambiguate literal " characters. -other-raw r#"hello"world"# - -// There is a single decimal number type, much like JSON's. -num 1.234e-42 - -// Numbers can have underscores to help readability: -bignum 1_000_000 - -// There is additional support for literal hexadecimal, octal, and binary input. -my-hex 0xdeadbeef -my-octal 0o755 -my-binary 0b1010_1101 -``` - -The following SDLang features are removed altogether: - -* "Anonymous" nodes -* Binary data literals -* Date/time formats -* `on` and `off` booleans -* Backtick strings -* Semicolons -* Namespaces with `:` -* Shell style (`#`) and Lua-style (`--`) comments -* Distinction between 32/64/128-bit numbers. There's just numbers. - -## Design and Discussion - -kdl is still extremely new, and discussion about the format should happen over -on the [discussions page](https://github.com/zkat/kdl/discussions). Feel free -to jump in and give us your 2 cents! - -## Grammar - -``` -nodes := linespace* (node (newline nodes)? linespace*)? - -node := identifier (node-space node-argument)* (node-space node-document)? single-line-comment? -node-argument := prop | value -node-children := '{' nodes '}' -node-space := ws* escline ws* | ws+ - -identifier := [a-zA-Z] [a-zA-Z0-9!$%&'*+\-./:<>?@\^_|~]* | string -prop := identifier '=' value -value := string | raw_string | number | boolean | 'null' - -string := '"' character* '"' -character := '\' escape | [^\"] -escape := ["\\/bfnrt] | 'u{' hex-digit{1, 6} '}' -hex-digit := [0-9a-fA-F] - -raw-string := 'r' raw-string-hash -raw-string-hash := '#' raw-string-hash '#' | raw-string-quotes -raw-string-quotes := '"' .* '"' - -number := decimal | hex | octal | binary - -decimal := integer ('.' [0-9]+)? exponent? -exponent := ('e' | 'E') integer -integer := sign? [0-9] [0-9_]* -sign := '+' | '-' - -hex := '0x' hex-digit (hex-digit | '_')* -octal := '0o' [0-7] [0-7_]* -binary := '0b' ('0' | '1') ('0' | '1' | '_')* - -boolean := 'true' | 'false' - -escline := '\\' ws* (single-line-comment | newline) - -linespace := newline | ws | single-line-comment - -newline := ('\r' '\n') | '\n' - -ws := bom | ' ' | '\t' | multi-line-comment - -single-line-comment := '//' ('\r' [^\n] | [^\r\n])* newline -multi-line-comment := '/*' ('*' [^\/] | [^*])* '*/' -``` - -## LICENSE - -The above grammar/spec is licensed CC-BY-SA. The included [LICENSE.md -file](LICENSE.md) in this repository only covers this implementation. +# kdl - Kat's Document Language + +kdl is a document language, mostly based on [SDLang](https://sdlang.org), with +xml-like semantics that looks like you're invoking a bunch of CLI commands! + +It's meant to be used both as a serialization format and a configuration +language, and is relatively light on syntax compared to XML. + +## Intro + +The basic syntax is similar to SDLang: + +```kdl +// This is a node with a single string value +title "Hello, World" + +// Multiple values are supported, too +bookmarks 12 15 188 1234 + +// Nodes can have properties +author "Alex Monad" email="alex@example.com" active=true + +// Nodes can be arbitrarily nested +contents { + section "First section" { + paragraph "This is the first paragraph" + paragraph "This is the second paragraph" + } +} + +// Nodes can be separated into multiple lines +title \ + "Some title" + +// Comment formats: + +// C++ style + +/* +C style multiline +*/ + +tag /*foo=true*/ bar=false +``` + +But kdl changes a few details: + +```kdl +// Files must be utf8 encoded! +smile "😁" + +// Instead of anonymous nodes, nodes and properties can be wrapped +// in "" for arbitrary node names. +"!@#$@$%Q#$%~@!40" "1.2.3" "!!!!!"=true + +// The following is a legal bare identifier: +foo123~!@#$%^&*.:'|<>/?+ "weeee" + +// kdl specifically allows properties and values to be +// interspersed with each other, much like CLI commands. +foo bar=true "baz" quux=false 1 2 3 + +// strings can be multiline as-is, without a different syntax. +string "my +multiline +value" + +// raw/unescaped strings use the "r" prefix on string literals and +// otherwise behave the same, including multiline support. +raw r"C:\Users\kdl" + +// You can add any number of # after the r and the last " to +// disambiguate literal " characters. +other-raw r#"hello"world"# + +// There is a single decimal number type, much like JSON's. +num 1.234e-42 + +// Numbers can have underscores to help readability: +bignum 1_000_000 + +// There is additional support for literal hexadecimal, octal, and binary input. +my-hex 0xdeadbeef +my-octal 0o755 +my-binary 0b1010_1101 +``` + +The following SDLang features are removed altogether: + +* "Anonymous" nodes +* Binary data literals +* Date/time formats +* `on` and `off` booleans +* Backtick strings +* Semicolons +* Namespaces with `:` +* Shell style (`#`) and Lua-style (`--`) comments +* Distinction between 32/64/128-bit numbers. There's just numbers. + +## Design and Discussion + +kdl is still extremely new, and discussion about the format should happen over +on the [discussions page](https://github.com/zkat/kdl/discussions). Feel free +to jump in and give us your 2 cents! + +## Grammar + +``` +nodes := linespace* (node (newline nodes)? linespace*)? + +node := identifier (node-space node-argument)* (node-space node-document)? single-line-comment? +node-argument := prop | value +node-children := '{' nodes '}' +node-space := ws* escline ws* | ws+ + +identifier := [a-zA-Z] [a-zA-Z0-9!$%&'*+\-./:<>?@\^_|~]* | string +prop := identifier '=' value +value := string | raw_string | number | boolean | 'null' + +string := '"' character* '"' +character := '\' escape | [^\"] +escape := ["\\/bfnrt] | 'u{' hex-digit{1, 6} '}' +hex-digit := [0-9a-fA-F] + +raw-string := 'r' raw-string-hash +raw-string-hash := '#' raw-string-hash '#' | raw-string-quotes +raw-string-quotes := '"' .* '"' + +number := decimal | hex | octal | binary + +decimal := integer ('.' [0-9]+)? exponent? +exponent := ('e' | 'E') integer +integer := sign? [0-9] [0-9_]* +sign := '+' | '-' + +hex := '0x' hex-digit (hex-digit | '_')* +octal := '0o' [0-7] [0-7_]* +binary := '0b' ('0' | '1') ('0' | '1' | '_')* + +boolean := 'true' | 'false' + +escline := '\\' ws* (single-line-comment | newline) + +linespace := newline | ws | single-line-comment + +newline := ('\r' '\n') | '\n' + +ws := bom | ' ' | '\t' | multi-line-comment + +single-line-comment := '//' ('\r' [^\n] | [^\r\n])* newline +multi-line-comment := '/*' ('*' [^\/] | [^*])* '*/' +``` + +## LICENSE + +The above grammar/spec is licensed CC-BY-SA. The included [LICENSE.md +file](LICENSE.md) in this repository only covers this implementation. diff --git a/src/lib.rs b/src/lib.rs index a190ccf..f1abdec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,37 +1,37 @@ -use nom::combinator::all_consuming; -use nom::Err; - -pub use crate::error::{KdlError, KdlErrorKind}; -pub use crate::node::KdlNode; - -mod error; -mod node; -mod parser; - -pub fn parse_document(input: I) -> Result, KdlError> -where - I: AsRef, -{ - let input = &input.as_ref()[..]; - match all_consuming(parser::nodes)(input) { - Ok((_, arg)) => Ok(arg), - Err(err) => Err(match err { - Err::Error(e) | Err::Failure(e) => KdlError { - input: input.into(), - offset: e.input.as_ptr() as usize - input.as_ptr() as usize, - kind: if let Some(kind) = e.kind { - kind - } else if let Some(ctx) = e.context { - KdlErrorKind::Context(ctx) - } else { - KdlErrorKind::Other - }, - }, - Err::Incomplete(_) => KdlError { - input: input.into(), - offset: input.len() - 1, - kind: KdlErrorKind::IncompleteInput, - }, - }), - } -} +use nom::combinator::all_consuming; +use nom::Err; + +pub use crate::error::{KdlError, KdlErrorKind}; +pub use crate::node::KdlNode; + +mod error; +mod node; +mod parser; + +pub fn parse_document(input: I) -> Result, KdlError> +where + I: AsRef, +{ + let input = &input.as_ref()[..]; + match all_consuming(parser::nodes)(input) { + Ok((_, arg)) => Ok(arg), + Err(err) => Err(match err { + Err::Error(e) | Err::Failure(e) => KdlError { + input: input.into(), + offset: e.input.as_ptr() as usize - input.as_ptr() as usize, + kind: if let Some(kind) = e.kind { + kind + } else if let Some(ctx) = e.context { + KdlErrorKind::Context(ctx) + } else { + KdlErrorKind::Other + }, + }, + Err::Incomplete(_) => KdlError { + input: input.into(), + offset: input.len() - 1, + kind: KdlErrorKind::IncompleteInput, + }, + }), + } +} diff --git a/src/node.rs b/src/node.rs index 40f3269..72860e7 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,18 +1,18 @@ -use std::collections::HashMap; - -#[derive(Debug, Clone, PartialEq)] -pub struct KdlNode { - pub name: String, - pub values: Vec, - pub properties: HashMap, - pub children: Vec, -} - -#[derive(Debug, Clone, PartialEq)] -pub enum KdlNodeValue { - Int(i64), - Float(f64), - String(String), - Boolean(bool), - Null, -} +use std::collections::HashMap; + +#[derive(Debug, Clone, PartialEq)] +pub struct KdlNode { + pub name: String, + pub values: Vec, + pub properties: HashMap, + pub children: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum KdlNodeValue { + Int(i64), + Float(f64), + String(String), + Boolean(bool), + Null, +} diff --git a/src/parser.rs b/src/parser.rs index c5c8cf1..9a2d4fb 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,473 +1,473 @@ -use std::collections::HashMap; - -use nom::branch::alt; -use nom::bytes::complete::{is_not, tag, take_until, take_while_m_n}; -use nom::character::complete::{alpha1, alphanumeric1, char, none_of, one_of}; -use nom::combinator::{eof, map, map_opt, map_res, opt, recognize, value}; -use nom::multi::{fold_many0, many0, many1}; -use nom::sequence::{delimited, pair, preceded, terminated, tuple}; -use nom::IResult; - -use crate::error::KdlParseError; -use crate::node::{KdlNode, KdlNodeValue}; - -/// `nodes := linespace* (node (newline document)?)?` -pub(crate) fn nodes(input: &str) -> IResult<&str, Vec, KdlParseError<&str>> { - many0(delimited(many0(linespace), node, newline))(input) -} - -#[derive(Clone)] -enum NodeArg { - Value(KdlNodeValue), - Property(String, KdlNodeValue), -} - -/// `node := identifier (node-space node-argument)* (node-space node-document)?` -pub(crate) fn node(input: &str) -> IResult<&str, KdlNode, KdlParseError<&str>> { - let (input, tag) = identifier(input)?; - let (input, args) = many0(preceded(node_space, node_arg))(input)?; - let (input, children) = opt(preceded(node_space, node_children))(input)?; - let (values, properties): (Vec, Vec) = args - .into_iter() - .partition(|arg| matches!(arg, NodeArg::Value(_))); - Ok(( - input, - KdlNode { - name: tag, - children: children.unwrap_or_else(Vec::new), - values: values - .into_iter() - .map(|arg| match arg { - NodeArg::Value(val) => val, - _ => unreachable!(), - }) - .collect(), - properties: properties.into_iter().fold(HashMap::new(), |mut acc, arg| { - match arg { - NodeArg::Property(key, value) => { - acc.insert(key, value); - } - _ => unreachable!(), - } - acc - }), - }, - )) -} - -/// `identifier := [a-zA-Z_] [a-zA-Z0-9!$%&'*+\-./:<>?@\^_|~]* | string` -fn identifier(input: &str) -> IResult<&str, String, KdlParseError<&str>> { - alt(( - map( - recognize(pair( - alt((alpha1, tag("_"))), - many0(alt((alphanumeric1, recognize(one_of("~!@$%^&*-_+./:<>?"))))), - )), - String::from, - ), - string, - ))(input) -} - -fn node_arg(input: &str) -> IResult<&str, NodeArg, KdlParseError<&str>> { - alt(( - map(property, |(key, val)| NodeArg::Property(key, val)), - map(node_value, NodeArg::Value), - ))(input) -} - -/// `prop := identifier '=' value` -fn property(input: &str) -> IResult<&str, (String, KdlNodeValue), KdlParseError<&str>> { - let (input, key) = identifier(input)?; - let (input, _) = tag("=")(input)?; - let (input, val) = node_value(input)?; - Ok((input, (key, val))) -} - -/// `value := string | raw_string | number | boolean | 'null'` -fn node_value(input: &str) -> IResult<&str, KdlNodeValue, KdlParseError<&str>> { - alt(( - map(string, KdlNodeValue::String), - map(raw_string, |s| KdlNodeValue::String(s.into())), - number, - boolean, - value(KdlNodeValue::Null, tag("null")), - ))(input) -} - -/// `node-children := '{' nodes '}'` -fn node_children(input: &str) -> IResult<&str, Vec, KdlParseError<&str>> { - delimited(tag("{"), nodes, tag("}"))(input) -} - -/// `string := '"' character* '"'` -fn string(input: &str) -> IResult<&str, String, KdlParseError<&str>> { - delimited( - char('"'), - fold_many0(character, String::new(), |mut acc, ch| { - acc.push(ch); - acc - }), - char('"'), - )(input) -} - -/// `character := '\' escape | [^\"]` -fn character(input: &str) -> IResult<&str, char, KdlParseError<&str>> { - alt((preceded(char('\\'), escape), none_of("\\\"")))(input) -} - -/// `escape := ["\\/bfnrt] | 'u{' hex-digit{1, 6} '}'` -fn escape(input: &str) -> IResult<&str, char, KdlParseError<&str>> { - alt(( - delimited(tag("u{"), unicode, char('}')), - value('"', char('"')), - value('\\', char('\\')), - value('/', char('/')), - value('\u{08}', char('b')), - value('\u{0C}', char('f')), - value('\n', char('n')), - value('\r', char('r')), - value('\t', char('t')), - ))(input) -} - -fn unicode(input: &str) -> IResult<&str, char, KdlParseError<&str>> { - map_opt( - map_res( - take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()), - |hex| u32::from_str_radix(hex, 16), - ), - std::char::from_u32, - )(input) -} - -/// `raw-string := 'r' raw-string-hash` -/// `raw-string-hash := '#' raw-string-hash '#' | raw-string-quotes` -/// `raw-string-quotes := '"' .* '"'` -fn raw_string(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { - let (input, _) = char('r')(input)?; - let (input, hashes) = recognize(many0(char('#')))(input)?; - let (input, _) = char('"')(input)?; - let close = format!("\"{}", hashes); - let (input, string) = take_until(&close[..])(input)?; - let (input, _) = tag(&close[..])(input)?; - Ok((input, string)) -} - -/// `number := decimal | hex | octal | binary` -fn number(input: &str) -> IResult<&str, KdlNodeValue, KdlParseError<&str>> { - alt(( - map(integer, KdlNodeValue::Int), - map(hexadecimal, KdlNodeValue::Int), - map(octal, KdlNodeValue::Int), - map(binary, KdlNodeValue::Int), - map(float, KdlNodeValue::Float), - ))(input) -} - -/// ```ignore -/// decimal := integer ('.' [0-9]+)? exponent? -/// exponent := ('e' | 'E') integer -/// integer := sign? [1-9] [0-9_]* -/// sign := '+' | '-' -/// ``` -fn float(input: &str) -> IResult<&str, f64, KdlParseError<&str>> { - map_res( - alt(( - recognize(tuple(( - integer, - opt(preceded(char('.'), integer)), - one_of("eE"), - opt(one_of("+-")), - integer, - ))), - recognize(tuple((integer, char('.'), integer))), - )), - |x| str::replace(x, "_", "").parse::(), - )(input) -} - -/// ```ignore -/// decimal := integer ('.' [0-9]+)? exponent? -/// exponent := ('e' | 'E') integer -/// integer := sign? [1-9] [0-9_]* -/// sign := '+' | '-' -/// ``` -fn integer(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { - let (input, sign) = opt(alt((char('+'), char('-'))))(input)?; - let mult = if let Some(sign) = sign { - if sign == '+' { - 1 - } else { - -1 - } - } else { - 1 - }; - map_res( - recognize(many1(terminated(one_of("0123456789"), many0(char('_'))))), - move |out: &str| { - i64::from_str_radix(&str::replace(&out, "_", ""), 10).map(move |x| x * mult) - }, - )(input) -} - -/// `hex := '0x' [0-9a-fA-F] [0-9a-fA-F_]*` -fn hexadecimal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { - map_res( - preceded( - alt((tag("0x"), tag("0X"))), - recognize(many1(terminated( - one_of("0123456789abcdefABCDEF"), - many0(char('_')), - ))), - ), - move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 16), - )(input) -} - -/// `octal := '0o' [0-7] [0-7_]*` -fn octal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { - map_res( - preceded( - alt((tag("0o"), tag("0O"))), - recognize(many1(terminated(one_of("01234567"), many0(char('_'))))), - ), - move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 8), - )(input) -} - -/// `binary := '0b' ('0' | '1') ('0' | '1' | '_')*` -fn binary(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { - map_res( - preceded( - alt((tag("0b"), tag("0B"))), - recognize(many1(terminated(one_of("01"), many0(char('_'))))), - ), - move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 2), - )(input) -} - -/// `boolean := 'true' | 'false'` -fn boolean(input: &str) -> IResult<&str, KdlNodeValue, KdlParseError<&str>> { - alt(( - value(KdlNodeValue::Boolean(true), tag("true")), - value(KdlNodeValue::Boolean(false), tag("false")), - ))(input) -} - -/// `node-space := ws* escline ws* | ws+` -fn node_space(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - alt(( - delimited(many0(whitespace), escline, many0(whitespace)), - map(many1(whitespace), |_| ()), - ))(input) -} - -/// `single-line-comment := '//' ('\r' [^\n] | [^\r\n])* (newline | eof)` -fn single_line_comment(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - let (input, _) = tag("//")(input)?; - let (input, _) = alt((take_until("\r\n"), is_not("\n")))(input)?; - let (input, _) = alt((newline, value((), eof)))(input)?; - Ok((input, ())) -} - -/// `multi-line-comment := '/*' ('*' [^\/] | [^*])* '*/'` -fn multi_line_comment(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - delimited(tag("/*"), value((), take_until("*/")), tag("*/"))(input) -} - -/// `escline := '\\' ws* (single-line-comment | newline)` -fn escline(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - let (input, _) = tag("\\")(input)?; - let (input, _) = many0(whitespace)(input)?; - let (input, _) = alt((single_line_comment, newline))(input)?; - Ok((input, ())) -} - -/// `linespace := newline | ws | single-line-comment` -fn linespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - value((), alt((newline, whitespace, single_line_comment)))(input) -} - -/// `ws := bom | ' ' | '\t' | multi-line-comment` -fn whitespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - // TODO: bom? - value( - (), - alt(( - /*bom,*/ tag(" "), - tag("\t"), - recognize(multi_line_comment), - )), - )(input) -} - -/// `newline := ('\r' '\n') | '\n'` -fn newline(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - value((), alt((tag("\r\n"), tag("\n"))))(input) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_string() { - assert_eq!(string("\"\""), Ok(("", "".into()))); - assert_eq!(string("\"hello\""), Ok(("", "hello".into()))); - assert_eq!(string("\"hello\nworld\""), Ok(("", "hello\nworld".into()))); - assert_eq!(string("\"\u{10FFF}\""), Ok(("", "\u{10FFF}".into()))); - assert_eq!( - string(r#""\"\\\/\b\f\n\r\t""#), - Ok(("", "\"\\/\u{08}\u{0C}\n\r\t".into())) - ); - assert_eq!(string(r#""\u{10}""#), Ok(("", "\u{10}".into()))); - assert!(string(r#""\i""#).is_err()); - assert!(string(r#""\u{c0ffee}""#).is_err()); - } - - #[test] - fn test_float() { - assert_eq!(float("1.0"), Ok(("", 1.0f64))); - assert_eq!(float("0.0"), Ok(("", 0.0f64))); - assert_eq!(float("-1.0"), Ok(("", -1.0f64))); - assert_eq!(float("+1.0"), Ok(("", 1.0f64))); - assert_eq!(float("1.0e10"), Ok(("", 1.0e10f64))); - assert_eq!(float("1.0e-10"), Ok(("", 1.0e-10f64))); - assert_eq!(float("-1.0e-10"), Ok(("", -1.0e-10f64))); - assert_eq!(float("123_456_789.0"), Ok(("", 123456789.0f64))); - assert_eq!(float("123_456_789.0_"), Ok(("", 123456789.0f64))); - assert!(float("?1.0").is_err()); - assert!(float("_1.0").is_err()); - assert!(float("1._0").is_err()); - assert!(float("1.").is_err()); - assert!(float(".0").is_err()); - } - - #[test] - fn test_integer() { - assert_eq!(integer("0"), Ok(("", 0))); - assert_eq!(integer("0123456789"), Ok(("", 123456789))); - assert_eq!(integer("0123_456_789"), Ok(("", 123456789))); - assert_eq!(integer("0123_456_789_"), Ok(("", 123456789))); - assert_eq!(integer("+0123456789"), Ok(("", 123456789))); - assert_eq!(integer("-0123456789"), Ok(("", -123456789))); - assert!(integer("?0123456789").is_err()); - assert!(integer("_0123456789").is_err()); - assert!(integer("a").is_err()); - assert!(integer("--").is_err()); - } - - #[test] - fn test_hexadecimal() { - assert_eq!( - hexadecimal("0x0123456789abcdef"), - Ok(("", 0x0123456789abcdef)) - ); - assert_eq!( - hexadecimal("0x01234567_89abcdef"), - Ok(("", 0x0123456789abcdef)) - ); - assert_eq!( - hexadecimal("0x01234567_89abcdef_"), - Ok(("", 0x0123456789abcdef)) - ); - assert!(hexadecimal("0x_123").is_err()); - assert!(hexadecimal("0xg").is_err()); - assert!(hexadecimal("0xx").is_err()); - } - - #[test] - fn test_octal() { - assert_eq!(octal("0o01234567"), Ok(("", 0o01234567))); - assert_eq!(octal("0o0123_4567"), Ok(("", 0o01234567))); - assert_eq!(octal("0o01234567_"), Ok(("", 0o01234567))); - assert!(octal("0o_123").is_err()); - assert!(octal("0o8").is_err()); - assert!(octal("0oo").is_err()); - } - - #[test] - fn test_binary() { - assert_eq!(binary("0b0101"), Ok(("", 0b0101))); - assert_eq!(binary("0b01_10"), Ok(("", 0b0110))); - assert_eq!(binary("0b01___10"), Ok(("", 0b0110))); - assert_eq!(binary("0b0110_"), Ok(("", 0b0110))); - assert!(binary("0b_0110").is_err()); - assert!(binary("0b20").is_err()); - assert!(binary("0bb").is_err()); - } - - #[test] - fn test_raw_string() { - assert_eq!(raw_string(r#"r"foo""#), Ok(("", "foo"))); - assert_eq!(raw_string("r\"foo\nbar\""), Ok(("", "foo\nbar"))); - assert_eq!(raw_string(r##"r#"foo"#"##), Ok(("", "foo"))); - assert_eq!(raw_string(r###"r##"foo"##"###), Ok(("", "foo"))); - assert_eq!(raw_string(r#"r"\nfoo\r""#), Ok(("", r"\nfoo\r"))); - assert!(raw_string(r###"r##"foo"#"###).is_err()); - } - - #[test] - fn test_boolean() { - assert_eq!(boolean("true"), Ok(("", KdlNodeValue::Boolean(true)))); - assert_eq!(boolean("false"), Ok(("", KdlNodeValue::Boolean(false)))); - assert!(boolean("blah").is_err()); - } - - #[test] - fn test_node_space() { - assert_eq!(node_space(" "), Ok(("", ()))); - assert_eq!(node_space("\t "), Ok(("", ()))); - assert_eq!(node_space("\t \\ // hello\n "), Ok(("", ()))); - assert!(node_space("blah").is_err()); - } - - #[test] - fn test_single_line_comment() { - assert_eq!(single_line_comment("//hello"), Ok(("", ()))); - assert_eq!(single_line_comment("// \thello"), Ok(("", ()))); - assert_eq!(single_line_comment("//hello\n"), Ok(("", ()))); - assert_eq!(single_line_comment("//hello\r\n"), Ok(("", ()))); - assert_eq!(single_line_comment("//hello\n\r"), Ok(("\r", ()))); - assert_eq!(single_line_comment("//hello\rworld"), Ok(("", ()))); - } - - #[test] - fn test_multi_line_comment() { - assert_eq!(multi_line_comment("/*hello*/"), Ok(("", ()))); - assert_eq!(multi_line_comment("/*hello*/\n"), Ok(("\n", ()))); - assert_eq!(multi_line_comment("/*\nhello\r\n*/"), Ok(("", ()))); - assert_eq!(multi_line_comment("/*\nhello** /\n*/"), Ok(("", ()))); - assert_eq!(multi_line_comment("/**\nhello** /\n*/"), Ok(("", ()))); - assert_eq!(multi_line_comment("/*hello*/world"), Ok(("world", ()))); - } - - #[test] - fn test_escline() { - assert_eq!(escline("\\\nfoo"), Ok(("foo", ()))); - assert_eq!(escline("\\\n foo"), Ok((" foo", ()))); - assert_eq!(escline("\\ \t \nfoo"), Ok(("foo", ()))); - assert_eq!(escline("\\ // test \nfoo"), Ok(("foo", ()))); - assert_eq!(escline("\\ // test \n foo"), Ok((" foo", ()))); - } - - #[test] - fn test_whitespace() { - assert_eq!(whitespace(" "), Ok(("", ()))); - assert_eq!(whitespace("\t"), Ok(("", ()))); - assert_eq!(whitespace("/* \nfoo\r\n */ etc"), Ok((" etc", ()))); - assert!(whitespace("hi").is_err()) - } - - #[test] - fn test_newline() { - assert_eq!(newline("\n"), Ok(("", ()))); - assert_eq!(newline("\r\n"), Ok(("", ()))); - assert_eq!(newline("\n\n"), Ok(("\n", ()))); - assert!(newline("\r").is_err()); - assert!(newline("blah").is_err()); - } -} +use std::collections::HashMap; + +use nom::branch::alt; +use nom::bytes::complete::{is_not, tag, take_until, take_while_m_n}; +use nom::character::complete::{alpha1, alphanumeric1, char, none_of, one_of}; +use nom::combinator::{eof, map, map_opt, map_res, opt, recognize, value}; +use nom::multi::{fold_many0, many0, many1}; +use nom::sequence::{delimited, pair, preceded, terminated, tuple}; +use nom::IResult; + +use crate::error::KdlParseError; +use crate::node::{KdlNode, KdlNodeValue}; + +/// `nodes := linespace* (node (newline document)?)?` +pub(crate) fn nodes(input: &str) -> IResult<&str, Vec, KdlParseError<&str>> { + many0(delimited(many0(linespace), node, newline))(input) +} + +#[derive(Clone)] +enum NodeArg { + Value(KdlNodeValue), + Property(String, KdlNodeValue), +} + +/// `node := identifier (node-space node-argument)* (node-space node-document)?` +pub(crate) fn node(input: &str) -> IResult<&str, KdlNode, KdlParseError<&str>> { + let (input, tag) = identifier(input)?; + let (input, args) = many0(preceded(node_space, node_arg))(input)?; + let (input, children) = opt(preceded(node_space, node_children))(input)?; + let (values, properties): (Vec, Vec) = args + .into_iter() + .partition(|arg| matches!(arg, NodeArg::Value(_))); + Ok(( + input, + KdlNode { + name: tag, + children: children.unwrap_or_else(Vec::new), + values: values + .into_iter() + .map(|arg| match arg { + NodeArg::Value(val) => val, + _ => unreachable!(), + }) + .collect(), + properties: properties.into_iter().fold(HashMap::new(), |mut acc, arg| { + match arg { + NodeArg::Property(key, value) => { + acc.insert(key, value); + } + _ => unreachable!(), + } + acc + }), + }, + )) +} + +/// `identifier := [a-zA-Z_] [a-zA-Z0-9!$%&'*+\-./:<>?@\^_|~]* | string` +fn identifier(input: &str) -> IResult<&str, String, KdlParseError<&str>> { + alt(( + map( + recognize(pair( + alt((alpha1, tag("_"))), + many0(alt((alphanumeric1, recognize(one_of("~!@$%^&*-_+./:<>?"))))), + )), + String::from, + ), + string, + ))(input) +} + +fn node_arg(input: &str) -> IResult<&str, NodeArg, KdlParseError<&str>> { + alt(( + map(property, |(key, val)| NodeArg::Property(key, val)), + map(node_value, NodeArg::Value), + ))(input) +} + +/// `prop := identifier '=' value` +fn property(input: &str) -> IResult<&str, (String, KdlNodeValue), KdlParseError<&str>> { + let (input, key) = identifier(input)?; + let (input, _) = tag("=")(input)?; + let (input, val) = node_value(input)?; + Ok((input, (key, val))) +} + +/// `value := string | raw_string | number | boolean | 'null'` +fn node_value(input: &str) -> IResult<&str, KdlNodeValue, KdlParseError<&str>> { + alt(( + map(string, KdlNodeValue::String), + map(raw_string, |s| KdlNodeValue::String(s.into())), + number, + boolean, + value(KdlNodeValue::Null, tag("null")), + ))(input) +} + +/// `node-children := '{' nodes '}'` +fn node_children(input: &str) -> IResult<&str, Vec, KdlParseError<&str>> { + delimited(tag("{"), nodes, tag("}"))(input) +} + +/// `string := '"' character* '"'` +fn string(input: &str) -> IResult<&str, String, KdlParseError<&str>> { + delimited( + char('"'), + fold_many0(character, String::new(), |mut acc, ch| { + acc.push(ch); + acc + }), + char('"'), + )(input) +} + +/// `character := '\' escape | [^\"]` +fn character(input: &str) -> IResult<&str, char, KdlParseError<&str>> { + alt((preceded(char('\\'), escape), none_of("\\\"")))(input) +} + +/// `escape := ["\\/bfnrt] | 'u{' hex-digit{1, 6} '}'` +fn escape(input: &str) -> IResult<&str, char, KdlParseError<&str>> { + alt(( + delimited(tag("u{"), unicode, char('}')), + value('"', char('"')), + value('\\', char('\\')), + value('/', char('/')), + value('\u{08}', char('b')), + value('\u{0C}', char('f')), + value('\n', char('n')), + value('\r', char('r')), + value('\t', char('t')), + ))(input) +} + +fn unicode(input: &str) -> IResult<&str, char, KdlParseError<&str>> { + map_opt( + map_res( + take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()), + |hex| u32::from_str_radix(hex, 16), + ), + std::char::from_u32, + )(input) +} + +/// `raw-string := 'r' raw-string-hash` +/// `raw-string-hash := '#' raw-string-hash '#' | raw-string-quotes` +/// `raw-string-quotes := '"' .* '"'` +fn raw_string(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { + let (input, _) = char('r')(input)?; + let (input, hashes) = recognize(many0(char('#')))(input)?; + let (input, _) = char('"')(input)?; + let close = format!("\"{}", hashes); + let (input, string) = take_until(&close[..])(input)?; + let (input, _) = tag(&close[..])(input)?; + Ok((input, string)) +} + +/// `number := decimal | hex | octal | binary` +fn number(input: &str) -> IResult<&str, KdlNodeValue, KdlParseError<&str>> { + alt(( + map(integer, KdlNodeValue::Int), + map(hexadecimal, KdlNodeValue::Int), + map(octal, KdlNodeValue::Int), + map(binary, KdlNodeValue::Int), + map(float, KdlNodeValue::Float), + ))(input) +} + +/// ```ignore +/// decimal := integer ('.' [0-9]+)? exponent? +/// exponent := ('e' | 'E') integer +/// integer := sign? [1-9] [0-9_]* +/// sign := '+' | '-' +/// ``` +fn float(input: &str) -> IResult<&str, f64, KdlParseError<&str>> { + map_res( + alt(( + recognize(tuple(( + integer, + opt(preceded(char('.'), integer)), + one_of("eE"), + opt(one_of("+-")), + integer, + ))), + recognize(tuple((integer, char('.'), integer))), + )), + |x| str::replace(x, "_", "").parse::(), + )(input) +} + +/// ```ignore +/// decimal := integer ('.' [0-9]+)? exponent? +/// exponent := ('e' | 'E') integer +/// integer := sign? [1-9] [0-9_]* +/// sign := '+' | '-' +/// ``` +fn integer(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { + let (input, sign) = opt(alt((char('+'), char('-'))))(input)?; + let mult = if let Some(sign) = sign { + if sign == '+' { + 1 + } else { + -1 + } + } else { + 1 + }; + map_res( + recognize(many1(terminated(one_of("0123456789"), many0(char('_'))))), + move |out: &str| { + i64::from_str_radix(&str::replace(&out, "_", ""), 10).map(move |x| x * mult) + }, + )(input) +} + +/// `hex := '0x' [0-9a-fA-F] [0-9a-fA-F_]*` +fn hexadecimal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { + map_res( + preceded( + alt((tag("0x"), tag("0X"))), + recognize(many1(terminated( + one_of("0123456789abcdefABCDEF"), + many0(char('_')), + ))), + ), + move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 16), + )(input) +} + +/// `octal := '0o' [0-7] [0-7_]*` +fn octal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { + map_res( + preceded( + alt((tag("0o"), tag("0O"))), + recognize(many1(terminated(one_of("01234567"), many0(char('_'))))), + ), + move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 8), + )(input) +} + +/// `binary := '0b' ('0' | '1') ('0' | '1' | '_')*` +fn binary(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { + map_res( + preceded( + alt((tag("0b"), tag("0B"))), + recognize(many1(terminated(one_of("01"), many0(char('_'))))), + ), + move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 2), + )(input) +} + +/// `boolean := 'true' | 'false'` +fn boolean(input: &str) -> IResult<&str, KdlNodeValue, KdlParseError<&str>> { + alt(( + value(KdlNodeValue::Boolean(true), tag("true")), + value(KdlNodeValue::Boolean(false), tag("false")), + ))(input) +} + +/// `node-space := ws* escline ws* | ws+` +fn node_space(input: &str) -> IResult<&str, (), KdlParseError<&str>> { + alt(( + delimited(many0(whitespace), escline, many0(whitespace)), + map(many1(whitespace), |_| ()), + ))(input) +} + +/// `single-line-comment := '//' ('\r' [^\n] | [^\r\n])* (newline | eof)` +fn single_line_comment(input: &str) -> IResult<&str, (), KdlParseError<&str>> { + let (input, _) = tag("//")(input)?; + let (input, _) = alt((take_until("\r\n"), is_not("\n")))(input)?; + let (input, _) = alt((newline, value((), eof)))(input)?; + Ok((input, ())) +} + +/// `multi-line-comment := '/*' ('*' [^\/] | [^*])* '*/'` +fn multi_line_comment(input: &str) -> IResult<&str, (), KdlParseError<&str>> { + delimited(tag("/*"), value((), take_until("*/")), tag("*/"))(input) +} + +/// `escline := '\\' ws* (single-line-comment | newline)` +fn escline(input: &str) -> IResult<&str, (), KdlParseError<&str>> { + let (input, _) = tag("\\")(input)?; + let (input, _) = many0(whitespace)(input)?; + let (input, _) = alt((single_line_comment, newline))(input)?; + Ok((input, ())) +} + +/// `linespace := newline | ws | single-line-comment` +fn linespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> { + value((), alt((newline, whitespace, single_line_comment)))(input) +} + +/// `ws := bom | ' ' | '\t' | multi-line-comment` +fn whitespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> { + // TODO: bom? + value( + (), + alt(( + /*bom,*/ tag(" "), + tag("\t"), + recognize(multi_line_comment), + )), + )(input) +} + +/// `newline := ('\r' '\n') | '\n'` +fn newline(input: &str) -> IResult<&str, (), KdlParseError<&str>> { + value((), alt((tag("\r\n"), tag("\n"))))(input) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_string() { + assert_eq!(string("\"\""), Ok(("", "".into()))); + assert_eq!(string("\"hello\""), Ok(("", "hello".into()))); + assert_eq!(string("\"hello\nworld\""), Ok(("", "hello\nworld".into()))); + assert_eq!(string("\"\u{10FFF}\""), Ok(("", "\u{10FFF}".into()))); + assert_eq!( + string(r#""\"\\\/\b\f\n\r\t""#), + Ok(("", "\"\\/\u{08}\u{0C}\n\r\t".into())) + ); + assert_eq!(string(r#""\u{10}""#), Ok(("", "\u{10}".into()))); + assert!(string(r#""\i""#).is_err()); + assert!(string(r#""\u{c0ffee}""#).is_err()); + } + + #[test] + fn test_float() { + assert_eq!(float("1.0"), Ok(("", 1.0f64))); + assert_eq!(float("0.0"), Ok(("", 0.0f64))); + assert_eq!(float("-1.0"), Ok(("", -1.0f64))); + assert_eq!(float("+1.0"), Ok(("", 1.0f64))); + assert_eq!(float("1.0e10"), Ok(("", 1.0e10f64))); + assert_eq!(float("1.0e-10"), Ok(("", 1.0e-10f64))); + assert_eq!(float("-1.0e-10"), Ok(("", -1.0e-10f64))); + assert_eq!(float("123_456_789.0"), Ok(("", 123456789.0f64))); + assert_eq!(float("123_456_789.0_"), Ok(("", 123456789.0f64))); + assert!(float("?1.0").is_err()); + assert!(float("_1.0").is_err()); + assert!(float("1._0").is_err()); + assert!(float("1.").is_err()); + assert!(float(".0").is_err()); + } + + #[test] + fn test_integer() { + assert_eq!(integer("0"), Ok(("", 0))); + assert_eq!(integer("0123456789"), Ok(("", 123456789))); + assert_eq!(integer("0123_456_789"), Ok(("", 123456789))); + assert_eq!(integer("0123_456_789_"), Ok(("", 123456789))); + assert_eq!(integer("+0123456789"), Ok(("", 123456789))); + assert_eq!(integer("-0123456789"), Ok(("", -123456789))); + assert!(integer("?0123456789").is_err()); + assert!(integer("_0123456789").is_err()); + assert!(integer("a").is_err()); + assert!(integer("--").is_err()); + } + + #[test] + fn test_hexadecimal() { + assert_eq!( + hexadecimal("0x0123456789abcdef"), + Ok(("", 0x0123456789abcdef)) + ); + assert_eq!( + hexadecimal("0x01234567_89abcdef"), + Ok(("", 0x0123456789abcdef)) + ); + assert_eq!( + hexadecimal("0x01234567_89abcdef_"), + Ok(("", 0x0123456789abcdef)) + ); + assert!(hexadecimal("0x_123").is_err()); + assert!(hexadecimal("0xg").is_err()); + assert!(hexadecimal("0xx").is_err()); + } + + #[test] + fn test_octal() { + assert_eq!(octal("0o01234567"), Ok(("", 0o01234567))); + assert_eq!(octal("0o0123_4567"), Ok(("", 0o01234567))); + assert_eq!(octal("0o01234567_"), Ok(("", 0o01234567))); + assert!(octal("0o_123").is_err()); + assert!(octal("0o8").is_err()); + assert!(octal("0oo").is_err()); + } + + #[test] + fn test_binary() { + assert_eq!(binary("0b0101"), Ok(("", 0b0101))); + assert_eq!(binary("0b01_10"), Ok(("", 0b0110))); + assert_eq!(binary("0b01___10"), Ok(("", 0b0110))); + assert_eq!(binary("0b0110_"), Ok(("", 0b0110))); + assert!(binary("0b_0110").is_err()); + assert!(binary("0b20").is_err()); + assert!(binary("0bb").is_err()); + } + + #[test] + fn test_raw_string() { + assert_eq!(raw_string(r#"r"foo""#), Ok(("", "foo"))); + assert_eq!(raw_string("r\"foo\nbar\""), Ok(("", "foo\nbar"))); + assert_eq!(raw_string(r##"r#"foo"#"##), Ok(("", "foo"))); + assert_eq!(raw_string(r###"r##"foo"##"###), Ok(("", "foo"))); + assert_eq!(raw_string(r#"r"\nfoo\r""#), Ok(("", r"\nfoo\r"))); + assert!(raw_string(r###"r##"foo"#"###).is_err()); + } + + #[test] + fn test_boolean() { + assert_eq!(boolean("true"), Ok(("", KdlNodeValue::Boolean(true)))); + assert_eq!(boolean("false"), Ok(("", KdlNodeValue::Boolean(false)))); + assert!(boolean("blah").is_err()); + } + + #[test] + fn test_node_space() { + assert_eq!(node_space(" "), Ok(("", ()))); + assert_eq!(node_space("\t "), Ok(("", ()))); + assert_eq!(node_space("\t \\ // hello\n "), Ok(("", ()))); + assert!(node_space("blah").is_err()); + } + + #[test] + fn test_single_line_comment() { + assert_eq!(single_line_comment("//hello"), Ok(("", ()))); + assert_eq!(single_line_comment("// \thello"), Ok(("", ()))); + assert_eq!(single_line_comment("//hello\n"), Ok(("", ()))); + assert_eq!(single_line_comment("//hello\r\n"), Ok(("", ()))); + assert_eq!(single_line_comment("//hello\n\r"), Ok(("\r", ()))); + assert_eq!(single_line_comment("//hello\rworld"), Ok(("", ()))); + } + + #[test] + fn test_multi_line_comment() { + assert_eq!(multi_line_comment("/*hello*/"), Ok(("", ()))); + assert_eq!(multi_line_comment("/*hello*/\n"), Ok(("\n", ()))); + assert_eq!(multi_line_comment("/*\nhello\r\n*/"), Ok(("", ()))); + assert_eq!(multi_line_comment("/*\nhello** /\n*/"), Ok(("", ()))); + assert_eq!(multi_line_comment("/**\nhello** /\n*/"), Ok(("", ()))); + assert_eq!(multi_line_comment("/*hello*/world"), Ok(("world", ()))); + } + + #[test] + fn test_escline() { + assert_eq!(escline("\\\nfoo"), Ok(("foo", ()))); + assert_eq!(escline("\\\n foo"), Ok((" foo", ()))); + assert_eq!(escline("\\ \t \nfoo"), Ok(("foo", ()))); + assert_eq!(escline("\\ // test \nfoo"), Ok(("foo", ()))); + assert_eq!(escline("\\ // test \n foo"), Ok((" foo", ()))); + } + + #[test] + fn test_whitespace() { + assert_eq!(whitespace(" "), Ok(("", ()))); + assert_eq!(whitespace("\t"), Ok(("", ()))); + assert_eq!(whitespace("/* \nfoo\r\n */ etc"), Ok((" etc", ()))); + assert!(whitespace("hi").is_err()) + } + + #[test] + fn test_newline() { + assert_eq!(newline("\n"), Ok(("", ()))); + assert_eq!(newline("\r\n"), Ok(("", ()))); + assert_eq!(newline("\n\n"), Ok(("\n", ()))); + assert!(newline("\r").is_err()); + assert!(newline("blah").is_err()); + } +}