switch to LF line endings

This commit is contained in:
Kat Marchán 2020-12-14 00:07:14 -08:00
parent be237e7d69
commit 75c176ae54
No known key found for this signature in database
GPG Key ID: AEB529C08A3C7E9E
4 changed files with 685 additions and 685 deletions

314
README.md
View File

@ -1,157 +1,157 @@
# kdl - Kat's Document Language
kdl is a document language, mostly based on [SDLang](https://sdlang.org), with
xml-like semantics that looks like you're invoking a bunch of CLI commands!
It's meant to be used both as a serialization format and a configuration
language, and is relatively light on syntax compared to XML.
## Intro
The basic syntax is similar to SDLang:
```kdl
// This is a node with a single string value
title "Hello, World"
// Multiple values are supported, too
bookmarks 12 15 188 1234
// Nodes can have properties
author "Alex Monad" email="alex@example.com" active=true
// Nodes can be arbitrarily nested
contents {
section "First section" {
paragraph "This is the first paragraph"
paragraph "This is the second paragraph"
}
}
// Nodes can be separated into multiple lines
title \
"Some title"
// Comment formats:
// C++ style
/*
C style multiline
*/
tag /*foo=true*/ bar=false
```
But kdl changes a few details:
```kdl
// Files must be utf8 encoded!
smile "😁"
// Instead of anonymous nodes, nodes and properties can be wrapped
// in "" for arbitrary node names.
"!@#$@$%Q#$%~@!40" "1.2.3" "!!!!!"=true
// The following is a legal bare identifier:
foo123~!@#$%^&*.:'|<>/?+ "weeee"
// kdl specifically allows properties and values to be
// interspersed with each other, much like CLI commands.
foo bar=true "baz" quux=false 1 2 3
// strings can be multiline as-is, without a different syntax.
string "my
multiline
value"
// raw/unescaped strings use the "r" prefix on string literals and
// otherwise behave the same, including multiline support.
raw r"C:\Users\kdl"
// You can add any number of # after the r and the last " to
// disambiguate literal " characters.
other-raw r#"hello"world"#
// There is a single decimal number type, much like JSON's.
num 1.234e-42
// Numbers can have underscores to help readability:
bignum 1_000_000
// There is additional support for literal hexadecimal, octal, and binary input.
my-hex 0xdeadbeef
my-octal 0o755
my-binary 0b1010_1101
```
The following SDLang features are removed altogether:
* "Anonymous" nodes
* Binary data literals
* Date/time formats
* `on` and `off` booleans
* Backtick strings
* Semicolons
* Namespaces with `:`
* Shell style (`#`) and Lua-style (`--`) comments
* Distinction between 32/64/128-bit numbers. There's just numbers.
## Design and Discussion
kdl is still extremely new, and discussion about the format should happen over
on the [discussions page](https://github.com/zkat/kdl/discussions). Feel free
to jump in and give us your 2 cents!
## Grammar
```
nodes := linespace* (node (newline nodes)? linespace*)?
node := identifier (node-space node-argument)* (node-space node-document)? single-line-comment?
node-argument := prop | value
node-children := '{' nodes '}'
node-space := ws* escline ws* | ws+
identifier := [a-zA-Z] [a-zA-Z0-9!$%&'*+\-./:<>?@\^_|~]* | string
prop := identifier '=' value
value := string | raw_string | number | boolean | 'null'
string := '"' character* '"'
character := '\' escape | [^\"]
escape := ["\\/bfnrt] | 'u{' hex-digit{1, 6} '}'
hex-digit := [0-9a-fA-F]
raw-string := 'r' raw-string-hash
raw-string-hash := '#' raw-string-hash '#' | raw-string-quotes
raw-string-quotes := '"' .* '"'
number := decimal | hex | octal | binary
decimal := integer ('.' [0-9]+)? exponent?
exponent := ('e' | 'E') integer
integer := sign? [0-9] [0-9_]*
sign := '+' | '-'
hex := '0x' hex-digit (hex-digit | '_')*
octal := '0o' [0-7] [0-7_]*
binary := '0b' ('0' | '1') ('0' | '1' | '_')*
boolean := 'true' | 'false'
escline := '\\' ws* (single-line-comment | newline)
linespace := newline | ws | single-line-comment
newline := ('\r' '\n') | '\n'
ws := bom | ' ' | '\t' | multi-line-comment
single-line-comment := '//' ('\r' [^\n] | [^\r\n])* newline
multi-line-comment := '/*' ('*' [^\/] | [^*])* '*/'
```
## LICENSE
The above grammar/spec is licensed CC-BY-SA. The included [LICENSE.md
file](LICENSE.md) in this repository only covers this implementation.
# kdl - Kat's Document Language
kdl is a document language, mostly based on [SDLang](https://sdlang.org), with
xml-like semantics that looks like you're invoking a bunch of CLI commands!
It's meant to be used both as a serialization format and a configuration
language, and is relatively light on syntax compared to XML.
## Intro
The basic syntax is similar to SDLang:
```kdl
// This is a node with a single string value
title "Hello, World"
// Multiple values are supported, too
bookmarks 12 15 188 1234
// Nodes can have properties
author "Alex Monad" email="alex@example.com" active=true
// Nodes can be arbitrarily nested
contents {
section "First section" {
paragraph "This is the first paragraph"
paragraph "This is the second paragraph"
}
}
// Nodes can be separated into multiple lines
title \
"Some title"
// Comment formats:
// C++ style
/*
C style multiline
*/
tag /*foo=true*/ bar=false
```
But kdl changes a few details:
```kdl
// Files must be utf8 encoded!
smile "😁"
// Instead of anonymous nodes, nodes and properties can be wrapped
// in "" for arbitrary node names.
"!@#$@$%Q#$%~@!40" "1.2.3" "!!!!!"=true
// The following is a legal bare identifier:
foo123~!@#$%^&*.:'|<>/?+ "weeee"
// kdl specifically allows properties and values to be
// interspersed with each other, much like CLI commands.
foo bar=true "baz" quux=false 1 2 3
// strings can be multiline as-is, without a different syntax.
string "my
multiline
value"
// raw/unescaped strings use the "r" prefix on string literals and
// otherwise behave the same, including multiline support.
raw r"C:\Users\kdl"
// You can add any number of # after the r and the last " to
// disambiguate literal " characters.
other-raw r#"hello"world"#
// There is a single decimal number type, much like JSON's.
num 1.234e-42
// Numbers can have underscores to help readability:
bignum 1_000_000
// There is additional support for literal hexadecimal, octal, and binary input.
my-hex 0xdeadbeef
my-octal 0o755
my-binary 0b1010_1101
```
The following SDLang features are removed altogether:
* "Anonymous" nodes
* Binary data literals
* Date/time formats
* `on` and `off` booleans
* Backtick strings
* Semicolons
* Namespaces with `:`
* Shell style (`#`) and Lua-style (`--`) comments
* Distinction between 32/64/128-bit numbers. There's just numbers.
## Design and Discussion
kdl is still extremely new, and discussion about the format should happen over
on the [discussions page](https://github.com/zkat/kdl/discussions). Feel free
to jump in and give us your 2 cents!
## Grammar
```
nodes := linespace* (node (newline nodes)? linespace*)?
node := identifier (node-space node-argument)* (node-space node-document)? single-line-comment?
node-argument := prop | value
node-children := '{' nodes '}'
node-space := ws* escline ws* | ws+
identifier := [a-zA-Z] [a-zA-Z0-9!$%&'*+\-./:<>?@\^_|~]* | string
prop := identifier '=' value
value := string | raw_string | number | boolean | 'null'
string := '"' character* '"'
character := '\' escape | [^\"]
escape := ["\\/bfnrt] | 'u{' hex-digit{1, 6} '}'
hex-digit := [0-9a-fA-F]
raw-string := 'r' raw-string-hash
raw-string-hash := '#' raw-string-hash '#' | raw-string-quotes
raw-string-quotes := '"' .* '"'
number := decimal | hex | octal | binary
decimal := integer ('.' [0-9]+)? exponent?
exponent := ('e' | 'E') integer
integer := sign? [0-9] [0-9_]*
sign := '+' | '-'
hex := '0x' hex-digit (hex-digit | '_')*
octal := '0o' [0-7] [0-7_]*
binary := '0b' ('0' | '1') ('0' | '1' | '_')*
boolean := 'true' | 'false'
escline := '\\' ws* (single-line-comment | newline)
linespace := newline | ws | single-line-comment
newline := ('\r' '\n') | '\n'
ws := bom | ' ' | '\t' | multi-line-comment
single-line-comment := '//' ('\r' [^\n] | [^\r\n])* newline
multi-line-comment := '/*' ('*' [^\/] | [^*])* '*/'
```
## LICENSE
The above grammar/spec is licensed CC-BY-SA. The included [LICENSE.md
file](LICENSE.md) in this repository only covers this implementation.

View File

@ -1,37 +1,37 @@
use nom::combinator::all_consuming;
use nom::Err;
pub use crate::error::{KdlError, KdlErrorKind};
pub use crate::node::KdlNode;
mod error;
mod node;
mod parser;
pub fn parse_document<I>(input: I) -> Result<Vec<KdlNode>, KdlError>
where
I: AsRef<str>,
{
let input = &input.as_ref()[..];
match all_consuming(parser::nodes)(input) {
Ok((_, arg)) => Ok(arg),
Err(err) => Err(match err {
Err::Error(e) | Err::Failure(e) => KdlError {
input: input.into(),
offset: e.input.as_ptr() as usize - input.as_ptr() as usize,
kind: if let Some(kind) = e.kind {
kind
} else if let Some(ctx) = e.context {
KdlErrorKind::Context(ctx)
} else {
KdlErrorKind::Other
},
},
Err::Incomplete(_) => KdlError {
input: input.into(),
offset: input.len() - 1,
kind: KdlErrorKind::IncompleteInput,
},
}),
}
}
use nom::combinator::all_consuming;
use nom::Err;
pub use crate::error::{KdlError, KdlErrorKind};
pub use crate::node::KdlNode;
mod error;
mod node;
mod parser;
pub fn parse_document<I>(input: I) -> Result<Vec<KdlNode>, KdlError>
where
I: AsRef<str>,
{
let input = &input.as_ref()[..];
match all_consuming(parser::nodes)(input) {
Ok((_, arg)) => Ok(arg),
Err(err) => Err(match err {
Err::Error(e) | Err::Failure(e) => KdlError {
input: input.into(),
offset: e.input.as_ptr() as usize - input.as_ptr() as usize,
kind: if let Some(kind) = e.kind {
kind
} else if let Some(ctx) = e.context {
KdlErrorKind::Context(ctx)
} else {
KdlErrorKind::Other
},
},
Err::Incomplete(_) => KdlError {
input: input.into(),
offset: input.len() - 1,
kind: KdlErrorKind::IncompleteInput,
},
}),
}
}

View File

@ -1,18 +1,18 @@
use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq)]
pub struct KdlNode {
pub name: String,
pub values: Vec<KdlNodeValue>,
pub properties: HashMap<String, KdlNodeValue>,
pub children: Vec<KdlNode>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum KdlNodeValue {
Int(i64),
Float(f64),
String(String),
Boolean(bool),
Null,
}
use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq)]
pub struct KdlNode {
pub name: String,
pub values: Vec<KdlNodeValue>,
pub properties: HashMap<String, KdlNodeValue>,
pub children: Vec<KdlNode>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum KdlNodeValue {
Int(i64),
Float(f64),
String(String),
Boolean(bool),
Null,
}

View File

@ -1,473 +1,473 @@
use std::collections::HashMap;
use nom::branch::alt;
use nom::bytes::complete::{is_not, tag, take_until, take_while_m_n};
use nom::character::complete::{alpha1, alphanumeric1, char, none_of, one_of};
use nom::combinator::{eof, map, map_opt, map_res, opt, recognize, value};
use nom::multi::{fold_many0, many0, many1};
use nom::sequence::{delimited, pair, preceded, terminated, tuple};
use nom::IResult;
use crate::error::KdlParseError;
use crate::node::{KdlNode, KdlNodeValue};
/// `nodes := linespace* (node (newline document)?)?`
pub(crate) fn nodes(input: &str) -> IResult<&str, Vec<KdlNode>, KdlParseError<&str>> {
many0(delimited(many0(linespace), node, newline))(input)
}
#[derive(Clone)]
enum NodeArg {
Value(KdlNodeValue),
Property(String, KdlNodeValue),
}
/// `node := identifier (node-space node-argument)* (node-space node-document)?`
pub(crate) fn node(input: &str) -> IResult<&str, KdlNode, KdlParseError<&str>> {
let (input, tag) = identifier(input)?;
let (input, args) = many0(preceded(node_space, node_arg))(input)?;
let (input, children) = opt(preceded(node_space, node_children))(input)?;
let (values, properties): (Vec<NodeArg>, Vec<NodeArg>) = args
.into_iter()
.partition(|arg| matches!(arg, NodeArg::Value(_)));
Ok((
input,
KdlNode {
name: tag,
children: children.unwrap_or_else(Vec::new),
values: values
.into_iter()
.map(|arg| match arg {
NodeArg::Value(val) => val,
_ => unreachable!(),
})
.collect(),
properties: properties.into_iter().fold(HashMap::new(), |mut acc, arg| {
match arg {
NodeArg::Property(key, value) => {
acc.insert(key, value);
}
_ => unreachable!(),
}
acc
}),
},
))
}
/// `identifier := [a-zA-Z_] [a-zA-Z0-9!$%&'*+\-./:<>?@\^_|~]* | string`
fn identifier(input: &str) -> IResult<&str, String, KdlParseError<&str>> {
alt((
map(
recognize(pair(
alt((alpha1, tag("_"))),
many0(alt((alphanumeric1, recognize(one_of("~!@$%^&*-_+./:<>?"))))),
)),
String::from,
),
string,
))(input)
}
fn node_arg(input: &str) -> IResult<&str, NodeArg, KdlParseError<&str>> {
alt((
map(property, |(key, val)| NodeArg::Property(key, val)),
map(node_value, NodeArg::Value),
))(input)
}
/// `prop := identifier '=' value`
fn property(input: &str) -> IResult<&str, (String, KdlNodeValue), KdlParseError<&str>> {
let (input, key) = identifier(input)?;
let (input, _) = tag("=")(input)?;
let (input, val) = node_value(input)?;
Ok((input, (key, val)))
}
/// `value := string | raw_string | number | boolean | 'null'`
fn node_value(input: &str) -> IResult<&str, KdlNodeValue, KdlParseError<&str>> {
alt((
map(string, KdlNodeValue::String),
map(raw_string, |s| KdlNodeValue::String(s.into())),
number,
boolean,
value(KdlNodeValue::Null, tag("null")),
))(input)
}
/// `node-children := '{' nodes '}'`
fn node_children(input: &str) -> IResult<&str, Vec<KdlNode>, KdlParseError<&str>> {
delimited(tag("{"), nodes, tag("}"))(input)
}
/// `string := '"' character* '"'`
fn string(input: &str) -> IResult<&str, String, KdlParseError<&str>> {
delimited(
char('"'),
fold_many0(character, String::new(), |mut acc, ch| {
acc.push(ch);
acc
}),
char('"'),
)(input)
}
/// `character := '\' escape | [^\"]`
fn character(input: &str) -> IResult<&str, char, KdlParseError<&str>> {
alt((preceded(char('\\'), escape), none_of("\\\"")))(input)
}
/// `escape := ["\\/bfnrt] | 'u{' hex-digit{1, 6} '}'`
fn escape(input: &str) -> IResult<&str, char, KdlParseError<&str>> {
alt((
delimited(tag("u{"), unicode, char('}')),
value('"', char('"')),
value('\\', char('\\')),
value('/', char('/')),
value('\u{08}', char('b')),
value('\u{0C}', char('f')),
value('\n', char('n')),
value('\r', char('r')),
value('\t', char('t')),
))(input)
}
fn unicode(input: &str) -> IResult<&str, char, KdlParseError<&str>> {
map_opt(
map_res(
take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()),
|hex| u32::from_str_radix(hex, 16),
),
std::char::from_u32,
)(input)
}
/// `raw-string := 'r' raw-string-hash`
/// `raw-string-hash := '#' raw-string-hash '#' | raw-string-quotes`
/// `raw-string-quotes := '"' .* '"'`
fn raw_string(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
let (input, _) = char('r')(input)?;
let (input, hashes) = recognize(many0(char('#')))(input)?;
let (input, _) = char('"')(input)?;
let close = format!("\"{}", hashes);
let (input, string) = take_until(&close[..])(input)?;
let (input, _) = tag(&close[..])(input)?;
Ok((input, string))
}
/// `number := decimal | hex | octal | binary`
fn number(input: &str) -> IResult<&str, KdlNodeValue, KdlParseError<&str>> {
alt((
map(integer, KdlNodeValue::Int),
map(hexadecimal, KdlNodeValue::Int),
map(octal, KdlNodeValue::Int),
map(binary, KdlNodeValue::Int),
map(float, KdlNodeValue::Float),
))(input)
}
/// ```ignore
/// decimal := integer ('.' [0-9]+)? exponent?
/// exponent := ('e' | 'E') integer
/// integer := sign? [1-9] [0-9_]*
/// sign := '+' | '-'
/// ```
fn float(input: &str) -> IResult<&str, f64, KdlParseError<&str>> {
map_res(
alt((
recognize(tuple((
integer,
opt(preceded(char('.'), integer)),
one_of("eE"),
opt(one_of("+-")),
integer,
))),
recognize(tuple((integer, char('.'), integer))),
)),
|x| str::replace(x, "_", "").parse::<f64>(),
)(input)
}
/// ```ignore
/// decimal := integer ('.' [0-9]+)? exponent?
/// exponent := ('e' | 'E') integer
/// integer := sign? [1-9] [0-9_]*
/// sign := '+' | '-'
/// ```
fn integer(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
let (input, sign) = opt(alt((char('+'), char('-'))))(input)?;
let mult = if let Some(sign) = sign {
if sign == '+' {
1
} else {
-1
}
} else {
1
};
map_res(
recognize(many1(terminated(one_of("0123456789"), many0(char('_'))))),
move |out: &str| {
i64::from_str_radix(&str::replace(&out, "_", ""), 10).map(move |x| x * mult)
},
)(input)
}
/// `hex := '0x' [0-9a-fA-F] [0-9a-fA-F_]*`
fn hexadecimal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
map_res(
preceded(
alt((tag("0x"), tag("0X"))),
recognize(many1(terminated(
one_of("0123456789abcdefABCDEF"),
many0(char('_')),
))),
),
move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 16),
)(input)
}
/// `octal := '0o' [0-7] [0-7_]*`
fn octal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
map_res(
preceded(
alt((tag("0o"), tag("0O"))),
recognize(many1(terminated(one_of("01234567"), many0(char('_'))))),
),
move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 8),
)(input)
}
/// `binary := '0b' ('0' | '1') ('0' | '1' | '_')*`
fn binary(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
map_res(
preceded(
alt((tag("0b"), tag("0B"))),
recognize(many1(terminated(one_of("01"), many0(char('_'))))),
),
move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 2),
)(input)
}
/// `boolean := 'true' | 'false'`
fn boolean(input: &str) -> IResult<&str, KdlNodeValue, KdlParseError<&str>> {
alt((
value(KdlNodeValue::Boolean(true), tag("true")),
value(KdlNodeValue::Boolean(false), tag("false")),
))(input)
}
/// `node-space := ws* escline ws* | ws+`
fn node_space(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
alt((
delimited(many0(whitespace), escline, many0(whitespace)),
map(many1(whitespace), |_| ()),
))(input)
}
/// `single-line-comment := '//' ('\r' [^\n] | [^\r\n])* (newline | eof)`
fn single_line_comment(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
let (input, _) = tag("//")(input)?;
let (input, _) = alt((take_until("\r\n"), is_not("\n")))(input)?;
let (input, _) = alt((newline, value((), eof)))(input)?;
Ok((input, ()))
}
/// `multi-line-comment := '/*' ('*' [^\/] | [^*])* '*/'`
fn multi_line_comment(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
delimited(tag("/*"), value((), take_until("*/")), tag("*/"))(input)
}
/// `escline := '\\' ws* (single-line-comment | newline)`
fn escline(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
let (input, _) = tag("\\")(input)?;
let (input, _) = many0(whitespace)(input)?;
let (input, _) = alt((single_line_comment, newline))(input)?;
Ok((input, ()))
}
/// `linespace := newline | ws | single-line-comment`
fn linespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
value((), alt((newline, whitespace, single_line_comment)))(input)
}
/// `ws := bom | ' ' | '\t' | multi-line-comment`
fn whitespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
// TODO: bom?
value(
(),
alt((
/*bom,*/ tag(" "),
tag("\t"),
recognize(multi_line_comment),
)),
)(input)
}
/// `newline := ('\r' '\n') | '\n'`
fn newline(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
value((), alt((tag("\r\n"), tag("\n"))))(input)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string() {
assert_eq!(string("\"\""), Ok(("", "".into())));
assert_eq!(string("\"hello\""), Ok(("", "hello".into())));
assert_eq!(string("\"hello\nworld\""), Ok(("", "hello\nworld".into())));
assert_eq!(string("\"\u{10FFF}\""), Ok(("", "\u{10FFF}".into())));
assert_eq!(
string(r#""\"\\\/\b\f\n\r\t""#),
Ok(("", "\"\\/\u{08}\u{0C}\n\r\t".into()))
);
assert_eq!(string(r#""\u{10}""#), Ok(("", "\u{10}".into())));
assert!(string(r#""\i""#).is_err());
assert!(string(r#""\u{c0ffee}""#).is_err());
}
#[test]
fn test_float() {
assert_eq!(float("1.0"), Ok(("", 1.0f64)));
assert_eq!(float("0.0"), Ok(("", 0.0f64)));
assert_eq!(float("-1.0"), Ok(("", -1.0f64)));
assert_eq!(float("+1.0"), Ok(("", 1.0f64)));
assert_eq!(float("1.0e10"), Ok(("", 1.0e10f64)));
assert_eq!(float("1.0e-10"), Ok(("", 1.0e-10f64)));
assert_eq!(float("-1.0e-10"), Ok(("", -1.0e-10f64)));
assert_eq!(float("123_456_789.0"), Ok(("", 123456789.0f64)));
assert_eq!(float("123_456_789.0_"), Ok(("", 123456789.0f64)));
assert!(float("?1.0").is_err());
assert!(float("_1.0").is_err());
assert!(float("1._0").is_err());
assert!(float("1.").is_err());
assert!(float(".0").is_err());
}
#[test]
fn test_integer() {
assert_eq!(integer("0"), Ok(("", 0)));
assert_eq!(integer("0123456789"), Ok(("", 123456789)));
assert_eq!(integer("0123_456_789"), Ok(("", 123456789)));
assert_eq!(integer("0123_456_789_"), Ok(("", 123456789)));
assert_eq!(integer("+0123456789"), Ok(("", 123456789)));
assert_eq!(integer("-0123456789"), Ok(("", -123456789)));
assert!(integer("?0123456789").is_err());
assert!(integer("_0123456789").is_err());
assert!(integer("a").is_err());
assert!(integer("--").is_err());
}
#[test]
fn test_hexadecimal() {
assert_eq!(
hexadecimal("0x0123456789abcdef"),
Ok(("", 0x0123456789abcdef))
);
assert_eq!(
hexadecimal("0x01234567_89abcdef"),
Ok(("", 0x0123456789abcdef))
);
assert_eq!(
hexadecimal("0x01234567_89abcdef_"),
Ok(("", 0x0123456789abcdef))
);
assert!(hexadecimal("0x_123").is_err());
assert!(hexadecimal("0xg").is_err());
assert!(hexadecimal("0xx").is_err());
}
#[test]
fn test_octal() {
assert_eq!(octal("0o01234567"), Ok(("", 0o01234567)));
assert_eq!(octal("0o0123_4567"), Ok(("", 0o01234567)));
assert_eq!(octal("0o01234567_"), Ok(("", 0o01234567)));
assert!(octal("0o_123").is_err());
assert!(octal("0o8").is_err());
assert!(octal("0oo").is_err());
}
#[test]
fn test_binary() {
assert_eq!(binary("0b0101"), Ok(("", 0b0101)));
assert_eq!(binary("0b01_10"), Ok(("", 0b0110)));
assert_eq!(binary("0b01___10"), Ok(("", 0b0110)));
assert_eq!(binary("0b0110_"), Ok(("", 0b0110)));
assert!(binary("0b_0110").is_err());
assert!(binary("0b20").is_err());
assert!(binary("0bb").is_err());
}
#[test]
fn test_raw_string() {
assert_eq!(raw_string(r#"r"foo""#), Ok(("", "foo")));
assert_eq!(raw_string("r\"foo\nbar\""), Ok(("", "foo\nbar")));
assert_eq!(raw_string(r##"r#"foo"#"##), Ok(("", "foo")));
assert_eq!(raw_string(r###"r##"foo"##"###), Ok(("", "foo")));
assert_eq!(raw_string(r#"r"\nfoo\r""#), Ok(("", r"\nfoo\r")));
assert!(raw_string(r###"r##"foo"#"###).is_err());
}
#[test]
fn test_boolean() {
assert_eq!(boolean("true"), Ok(("", KdlNodeValue::Boolean(true))));
assert_eq!(boolean("false"), Ok(("", KdlNodeValue::Boolean(false))));
assert!(boolean("blah").is_err());
}
#[test]
fn test_node_space() {
assert_eq!(node_space(" "), Ok(("", ())));
assert_eq!(node_space("\t "), Ok(("", ())));
assert_eq!(node_space("\t \\ // hello\n "), Ok(("", ())));
assert!(node_space("blah").is_err());
}
#[test]
fn test_single_line_comment() {
assert_eq!(single_line_comment("//hello"), Ok(("", ())));
assert_eq!(single_line_comment("// \thello"), Ok(("", ())));
assert_eq!(single_line_comment("//hello\n"), Ok(("", ())));
assert_eq!(single_line_comment("//hello\r\n"), Ok(("", ())));
assert_eq!(single_line_comment("//hello\n\r"), Ok(("\r", ())));
assert_eq!(single_line_comment("//hello\rworld"), Ok(("", ())));
}
#[test]
fn test_multi_line_comment() {
assert_eq!(multi_line_comment("/*hello*/"), Ok(("", ())));
assert_eq!(multi_line_comment("/*hello*/\n"), Ok(("\n", ())));
assert_eq!(multi_line_comment("/*\nhello\r\n*/"), Ok(("", ())));
assert_eq!(multi_line_comment("/*\nhello** /\n*/"), Ok(("", ())));
assert_eq!(multi_line_comment("/**\nhello** /\n*/"), Ok(("", ())));
assert_eq!(multi_line_comment("/*hello*/world"), Ok(("world", ())));
}
#[test]
fn test_escline() {
assert_eq!(escline("\\\nfoo"), Ok(("foo", ())));
assert_eq!(escline("\\\n foo"), Ok((" foo", ())));
assert_eq!(escline("\\ \t \nfoo"), Ok(("foo", ())));
assert_eq!(escline("\\ // test \nfoo"), Ok(("foo", ())));
assert_eq!(escline("\\ // test \n foo"), Ok((" foo", ())));
}
#[test]
fn test_whitespace() {
assert_eq!(whitespace(" "), Ok(("", ())));
assert_eq!(whitespace("\t"), Ok(("", ())));
assert_eq!(whitespace("/* \nfoo\r\n */ etc"), Ok((" etc", ())));
assert!(whitespace("hi").is_err())
}
#[test]
fn test_newline() {
assert_eq!(newline("\n"), Ok(("", ())));
assert_eq!(newline("\r\n"), Ok(("", ())));
assert_eq!(newline("\n\n"), Ok(("\n", ())));
assert!(newline("\r").is_err());
assert!(newline("blah").is_err());
}
}
use std::collections::HashMap;
use nom::branch::alt;
use nom::bytes::complete::{is_not, tag, take_until, take_while_m_n};
use nom::character::complete::{alpha1, alphanumeric1, char, none_of, one_of};
use nom::combinator::{eof, map, map_opt, map_res, opt, recognize, value};
use nom::multi::{fold_many0, many0, many1};
use nom::sequence::{delimited, pair, preceded, terminated, tuple};
use nom::IResult;
use crate::error::KdlParseError;
use crate::node::{KdlNode, KdlNodeValue};
/// `nodes := linespace* (node (newline document)?)?`
pub(crate) fn nodes(input: &str) -> IResult<&str, Vec<KdlNode>, KdlParseError<&str>> {
many0(delimited(many0(linespace), node, newline))(input)
}
#[derive(Clone)]
enum NodeArg {
Value(KdlNodeValue),
Property(String, KdlNodeValue),
}
/// `node := identifier (node-space node-argument)* (node-space node-document)?`
pub(crate) fn node(input: &str) -> IResult<&str, KdlNode, KdlParseError<&str>> {
let (input, tag) = identifier(input)?;
let (input, args) = many0(preceded(node_space, node_arg))(input)?;
let (input, children) = opt(preceded(node_space, node_children))(input)?;
let (values, properties): (Vec<NodeArg>, Vec<NodeArg>) = args
.into_iter()
.partition(|arg| matches!(arg, NodeArg::Value(_)));
Ok((
input,
KdlNode {
name: tag,
children: children.unwrap_or_else(Vec::new),
values: values
.into_iter()
.map(|arg| match arg {
NodeArg::Value(val) => val,
_ => unreachable!(),
})
.collect(),
properties: properties.into_iter().fold(HashMap::new(), |mut acc, arg| {
match arg {
NodeArg::Property(key, value) => {
acc.insert(key, value);
}
_ => unreachable!(),
}
acc
}),
},
))
}
/// `identifier := [a-zA-Z_] [a-zA-Z0-9!$%&'*+\-./:<>?@\^_|~]* | string`
fn identifier(input: &str) -> IResult<&str, String, KdlParseError<&str>> {
alt((
map(
recognize(pair(
alt((alpha1, tag("_"))),
many0(alt((alphanumeric1, recognize(one_of("~!@$%^&*-_+./:<>?"))))),
)),
String::from,
),
string,
))(input)
}
fn node_arg(input: &str) -> IResult<&str, NodeArg, KdlParseError<&str>> {
alt((
map(property, |(key, val)| NodeArg::Property(key, val)),
map(node_value, NodeArg::Value),
))(input)
}
/// `prop := identifier '=' value`
fn property(input: &str) -> IResult<&str, (String, KdlNodeValue), KdlParseError<&str>> {
let (input, key) = identifier(input)?;
let (input, _) = tag("=")(input)?;
let (input, val) = node_value(input)?;
Ok((input, (key, val)))
}
/// `value := string | raw_string | number | boolean | 'null'`
fn node_value(input: &str) -> IResult<&str, KdlNodeValue, KdlParseError<&str>> {
alt((
map(string, KdlNodeValue::String),
map(raw_string, |s| KdlNodeValue::String(s.into())),
number,
boolean,
value(KdlNodeValue::Null, tag("null")),
))(input)
}
/// `node-children := '{' nodes '}'`
fn node_children(input: &str) -> IResult<&str, Vec<KdlNode>, KdlParseError<&str>> {
delimited(tag("{"), nodes, tag("}"))(input)
}
/// `string := '"' character* '"'`
fn string(input: &str) -> IResult<&str, String, KdlParseError<&str>> {
delimited(
char('"'),
fold_many0(character, String::new(), |mut acc, ch| {
acc.push(ch);
acc
}),
char('"'),
)(input)
}
/// `character := '\' escape | [^\"]`
fn character(input: &str) -> IResult<&str, char, KdlParseError<&str>> {
alt((preceded(char('\\'), escape), none_of("\\\"")))(input)
}
/// `escape := ["\\/bfnrt] | 'u{' hex-digit{1, 6} '}'`
fn escape(input: &str) -> IResult<&str, char, KdlParseError<&str>> {
alt((
delimited(tag("u{"), unicode, char('}')),
value('"', char('"')),
value('\\', char('\\')),
value('/', char('/')),
value('\u{08}', char('b')),
value('\u{0C}', char('f')),
value('\n', char('n')),
value('\r', char('r')),
value('\t', char('t')),
))(input)
}
fn unicode(input: &str) -> IResult<&str, char, KdlParseError<&str>> {
map_opt(
map_res(
take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()),
|hex| u32::from_str_radix(hex, 16),
),
std::char::from_u32,
)(input)
}
/// `raw-string := 'r' raw-string-hash`
/// `raw-string-hash := '#' raw-string-hash '#' | raw-string-quotes`
/// `raw-string-quotes := '"' .* '"'`
fn raw_string(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
let (input, _) = char('r')(input)?;
let (input, hashes) = recognize(many0(char('#')))(input)?;
let (input, _) = char('"')(input)?;
let close = format!("\"{}", hashes);
let (input, string) = take_until(&close[..])(input)?;
let (input, _) = tag(&close[..])(input)?;
Ok((input, string))
}
/// `number := decimal | hex | octal | binary`
fn number(input: &str) -> IResult<&str, KdlNodeValue, KdlParseError<&str>> {
alt((
map(integer, KdlNodeValue::Int),
map(hexadecimal, KdlNodeValue::Int),
map(octal, KdlNodeValue::Int),
map(binary, KdlNodeValue::Int),
map(float, KdlNodeValue::Float),
))(input)
}
/// ```ignore
/// decimal := integer ('.' [0-9]+)? exponent?
/// exponent := ('e' | 'E') integer
/// integer := sign? [1-9] [0-9_]*
/// sign := '+' | '-'
/// ```
fn float(input: &str) -> IResult<&str, f64, KdlParseError<&str>> {
map_res(
alt((
recognize(tuple((
integer,
opt(preceded(char('.'), integer)),
one_of("eE"),
opt(one_of("+-")),
integer,
))),
recognize(tuple((integer, char('.'), integer))),
)),
|x| str::replace(x, "_", "").parse::<f64>(),
)(input)
}
/// ```ignore
/// decimal := integer ('.' [0-9]+)? exponent?
/// exponent := ('e' | 'E') integer
/// integer := sign? [1-9] [0-9_]*
/// sign := '+' | '-'
/// ```
fn integer(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
let (input, sign) = opt(alt((char('+'), char('-'))))(input)?;
let mult = if let Some(sign) = sign {
if sign == '+' {
1
} else {
-1
}
} else {
1
};
map_res(
recognize(many1(terminated(one_of("0123456789"), many0(char('_'))))),
move |out: &str| {
i64::from_str_radix(&str::replace(&out, "_", ""), 10).map(move |x| x * mult)
},
)(input)
}
/// `hex := '0x' [0-9a-fA-F] [0-9a-fA-F_]*`
fn hexadecimal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
map_res(
preceded(
alt((tag("0x"), tag("0X"))),
recognize(many1(terminated(
one_of("0123456789abcdefABCDEF"),
many0(char('_')),
))),
),
move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 16),
)(input)
}
/// `octal := '0o' [0-7] [0-7_]*`
fn octal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
map_res(
preceded(
alt((tag("0o"), tag("0O"))),
recognize(many1(terminated(one_of("01234567"), many0(char('_'))))),
),
move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 8),
)(input)
}
/// `binary := '0b' ('0' | '1') ('0' | '1' | '_')*`
fn binary(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
map_res(
preceded(
alt((tag("0b"), tag("0B"))),
recognize(many1(terminated(one_of("01"), many0(char('_'))))),
),
move |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 2),
)(input)
}
/// `boolean := 'true' | 'false'`
fn boolean(input: &str) -> IResult<&str, KdlNodeValue, KdlParseError<&str>> {
alt((
value(KdlNodeValue::Boolean(true), tag("true")),
value(KdlNodeValue::Boolean(false), tag("false")),
))(input)
}
/// `node-space := ws* escline ws* | ws+`
fn node_space(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
alt((
delimited(many0(whitespace), escline, many0(whitespace)),
map(many1(whitespace), |_| ()),
))(input)
}
/// `single-line-comment := '//' ('\r' [^\n] | [^\r\n])* (newline | eof)`
fn single_line_comment(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
let (input, _) = tag("//")(input)?;
let (input, _) = alt((take_until("\r\n"), is_not("\n")))(input)?;
let (input, _) = alt((newline, value((), eof)))(input)?;
Ok((input, ()))
}
/// `multi-line-comment := '/*' ('*' [^\/] | [^*])* '*/'`
fn multi_line_comment(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
delimited(tag("/*"), value((), take_until("*/")), tag("*/"))(input)
}
/// `escline := '\\' ws* (single-line-comment | newline)`
fn escline(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
let (input, _) = tag("\\")(input)?;
let (input, _) = many0(whitespace)(input)?;
let (input, _) = alt((single_line_comment, newline))(input)?;
Ok((input, ()))
}
/// `linespace := newline | ws | single-line-comment`
fn linespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
value((), alt((newline, whitespace, single_line_comment)))(input)
}
/// `ws := bom | ' ' | '\t' | multi-line-comment`
fn whitespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
// TODO: bom?
value(
(),
alt((
/*bom,*/ tag(" "),
tag("\t"),
recognize(multi_line_comment),
)),
)(input)
}
/// `newline := ('\r' '\n') | '\n'`
fn newline(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
value((), alt((tag("\r\n"), tag("\n"))))(input)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string() {
assert_eq!(string("\"\""), Ok(("", "".into())));
assert_eq!(string("\"hello\""), Ok(("", "hello".into())));
assert_eq!(string("\"hello\nworld\""), Ok(("", "hello\nworld".into())));
assert_eq!(string("\"\u{10FFF}\""), Ok(("", "\u{10FFF}".into())));
assert_eq!(
string(r#""\"\\\/\b\f\n\r\t""#),
Ok(("", "\"\\/\u{08}\u{0C}\n\r\t".into()))
);
assert_eq!(string(r#""\u{10}""#), Ok(("", "\u{10}".into())));
assert!(string(r#""\i""#).is_err());
assert!(string(r#""\u{c0ffee}""#).is_err());
}
#[test]
fn test_float() {
assert_eq!(float("1.0"), Ok(("", 1.0f64)));
assert_eq!(float("0.0"), Ok(("", 0.0f64)));
assert_eq!(float("-1.0"), Ok(("", -1.0f64)));
assert_eq!(float("+1.0"), Ok(("", 1.0f64)));
assert_eq!(float("1.0e10"), Ok(("", 1.0e10f64)));
assert_eq!(float("1.0e-10"), Ok(("", 1.0e-10f64)));
assert_eq!(float("-1.0e-10"), Ok(("", -1.0e-10f64)));
assert_eq!(float("123_456_789.0"), Ok(("", 123456789.0f64)));
assert_eq!(float("123_456_789.0_"), Ok(("", 123456789.0f64)));
assert!(float("?1.0").is_err());
assert!(float("_1.0").is_err());
assert!(float("1._0").is_err());
assert!(float("1.").is_err());
assert!(float(".0").is_err());
}
#[test]
fn test_integer() {
assert_eq!(integer("0"), Ok(("", 0)));
assert_eq!(integer("0123456789"), Ok(("", 123456789)));
assert_eq!(integer("0123_456_789"), Ok(("", 123456789)));
assert_eq!(integer("0123_456_789_"), Ok(("", 123456789)));
assert_eq!(integer("+0123456789"), Ok(("", 123456789)));
assert_eq!(integer("-0123456789"), Ok(("", -123456789)));
assert!(integer("?0123456789").is_err());
assert!(integer("_0123456789").is_err());
assert!(integer("a").is_err());
assert!(integer("--").is_err());
}
#[test]
fn test_hexadecimal() {
assert_eq!(
hexadecimal("0x0123456789abcdef"),
Ok(("", 0x0123456789abcdef))
);
assert_eq!(
hexadecimal("0x01234567_89abcdef"),
Ok(("", 0x0123456789abcdef))
);
assert_eq!(
hexadecimal("0x01234567_89abcdef_"),
Ok(("", 0x0123456789abcdef))
);
assert!(hexadecimal("0x_123").is_err());
assert!(hexadecimal("0xg").is_err());
assert!(hexadecimal("0xx").is_err());
}
#[test]
fn test_octal() {
assert_eq!(octal("0o01234567"), Ok(("", 0o01234567)));
assert_eq!(octal("0o0123_4567"), Ok(("", 0o01234567)));
assert_eq!(octal("0o01234567_"), Ok(("", 0o01234567)));
assert!(octal("0o_123").is_err());
assert!(octal("0o8").is_err());
assert!(octal("0oo").is_err());
}
#[test]
fn test_binary() {
assert_eq!(binary("0b0101"), Ok(("", 0b0101)));
assert_eq!(binary("0b01_10"), Ok(("", 0b0110)));
assert_eq!(binary("0b01___10"), Ok(("", 0b0110)));
assert_eq!(binary("0b0110_"), Ok(("", 0b0110)));
assert!(binary("0b_0110").is_err());
assert!(binary("0b20").is_err());
assert!(binary("0bb").is_err());
}
#[test]
fn test_raw_string() {
assert_eq!(raw_string(r#"r"foo""#), Ok(("", "foo")));
assert_eq!(raw_string("r\"foo\nbar\""), Ok(("", "foo\nbar")));
assert_eq!(raw_string(r##"r#"foo"#"##), Ok(("", "foo")));
assert_eq!(raw_string(r###"r##"foo"##"###), Ok(("", "foo")));
assert_eq!(raw_string(r#"r"\nfoo\r""#), Ok(("", r"\nfoo\r")));
assert!(raw_string(r###"r##"foo"#"###).is_err());
}
#[test]
fn test_boolean() {
assert_eq!(boolean("true"), Ok(("", KdlNodeValue::Boolean(true))));
assert_eq!(boolean("false"), Ok(("", KdlNodeValue::Boolean(false))));
assert!(boolean("blah").is_err());
}
#[test]
fn test_node_space() {
assert_eq!(node_space(" "), Ok(("", ())));
assert_eq!(node_space("\t "), Ok(("", ())));
assert_eq!(node_space("\t \\ // hello\n "), Ok(("", ())));
assert!(node_space("blah").is_err());
}
#[test]
fn test_single_line_comment() {
assert_eq!(single_line_comment("//hello"), Ok(("", ())));
assert_eq!(single_line_comment("// \thello"), Ok(("", ())));
assert_eq!(single_line_comment("//hello\n"), Ok(("", ())));
assert_eq!(single_line_comment("//hello\r\n"), Ok(("", ())));
assert_eq!(single_line_comment("//hello\n\r"), Ok(("\r", ())));
assert_eq!(single_line_comment("//hello\rworld"), Ok(("", ())));
}
#[test]
fn test_multi_line_comment() {
assert_eq!(multi_line_comment("/*hello*/"), Ok(("", ())));
assert_eq!(multi_line_comment("/*hello*/\n"), Ok(("\n", ())));
assert_eq!(multi_line_comment("/*\nhello\r\n*/"), Ok(("", ())));
assert_eq!(multi_line_comment("/*\nhello** /\n*/"), Ok(("", ())));
assert_eq!(multi_line_comment("/**\nhello** /\n*/"), Ok(("", ())));
assert_eq!(multi_line_comment("/*hello*/world"), Ok(("world", ())));
}
#[test]
fn test_escline() {
assert_eq!(escline("\\\nfoo"), Ok(("foo", ())));
assert_eq!(escline("\\\n foo"), Ok((" foo", ())));
assert_eq!(escline("\\ \t \nfoo"), Ok(("foo", ())));
assert_eq!(escline("\\ // test \nfoo"), Ok(("foo", ())));
assert_eq!(escline("\\ // test \n foo"), Ok((" foo", ())));
}
#[test]
fn test_whitespace() {
assert_eq!(whitespace(" "), Ok(("", ())));
assert_eq!(whitespace("\t"), Ok(("", ())));
assert_eq!(whitespace("/* \nfoo\r\n */ etc"), Ok((" etc", ())));
assert!(whitespace("hi").is_err())
}
#[test]
fn test_newline() {
assert_eq!(newline("\n"), Ok(("", ())));
assert_eq!(newline("\r\n"), Ok(("", ())));
assert_eq!(newline("\n\n"), Ok(("\n", ())));
assert!(newline("\r").is_err());
assert!(newline("blah").is_err());
}
}