diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..c1e2c64 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,12 @@ +# EditorConfig is awesome: https://EditorConfig.org + +# top-most EditorConfig file +root = true + +[*] +indent_style = space +indent_size = 4 +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0f3c0fb..a80ffa8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - rust: [1.46.0, stable] + rust: [1.56.0, stable] os: [ubuntu-latest, macOS-latest, windows-latest] steps: diff --git a/Cargo.toml b/Cargo.toml index 1d99dee..c682e93 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,16 +1,17 @@ [package] name = "kdl" version = "3.0.1-alpha.0" -description = "Official Rust KDL parser" +description = "Document-oriented KDL parser and API. Allows formatting/whitespace/comment-preserving parsing and modification of KDL text." authors = ["Kat Marchán ", "KDL Community"] license = "Apache-2.0" readme = "README.md" homepage = "https://kdl.dev" repository = "https://github.com/kdl-org/kdl-rs" keywords = ["kdl", "document", "serialization", "config"] -edition = "2018" +edition = "2021" [dependencies] -nom = { version = "7.0.0", default-features = false } +miette = "4.5.0" +nom = { version = "7.1.1", default-features = false } phf = { version = "0.8.0", features = ["macros"] } thiserror = "1.0.22" diff --git a/README.md b/README.md index 7feef74..6efa1c8 100644 --- a/README.md +++ b/README.md @@ -1,64 +1,26 @@ -# KDL - The KDL Document Language +`kdl` is a "document-oriented" parser and API. That means that, unlike +serde-based implementations, it's meant to preserve formatting when editing, +as well as inserting values with custom formatting. This is useful when +working with human-maintained KDL files. -[KDL](https://github.com/kdl-org/kdl) is a document language with xml-like -semantics that looks like you're invoking a bunch of CLI commands! +You can think of this crate as +[`toml_edit`](https://crates.io/crates/toml_edit), but for KDL. -It's meant to be used both as a serialization format and a configuration -language, and is relatively light on syntax compared to XML. +### Example -There's a living -[specification](https://github.com/kdl-org/kdl/blob/main/SPEC.md), as well as -[various implementations](https://github.com/kdl-org/kdl#implementations). The language is based on -[SDLang](https://sdlang.org), with a number of modifications and -clarifications on its syntax and behavior. +```rust +use kdl::KdlDocument; -This repository is the official/reference implementation in Rust, and -corresponds to [the kdl crate](https://crates.io/crates/kdl) - -## Design and Discussion - -KDL is still extremely new, and discussion about the format should happen over -on the [spec repo's discussions -page](https://github.com/kdoclang/kdl/discussions). Feel free to jump in and -give us your 2 cents! - -## Example KDL File - -```text -author "Alex Monad" email="alex@example.com" active=true - -contents { - section "First section" { - paragraph "This is the first paragraph" - paragraph "This is the second paragraph" - } +let doc: KdlDocument = r#" +hello 1 2 3 +world prop="value" { + child 1 + child 2 } +"#.parse().expect("failed to parse KDL"); -// unicode! comments! -π 3.14159 -``` - -## Basic Library Example - -``` -use kdl::{KdlNode, KdlValue}; -use std::collections::HashMap; - -assert_eq!( - kdl::parse_document("node 1 key=true").unwrap(), - vec![ - KdlNode { - name: String::from("node"), - values: vec![KdlValue::Int(1)], - properties: { - let mut temp = HashMap::new(); - temp.insert(String::from("key"), KdlValue::Boolean(true)); - temp - }, - children: vec![], - } - ] -) +assert_eq!(doc.get_args("hello"), vec![&1.into(), &2.into(), &3.into()]); +assert_eq!(doc.get("world").map(|node| &node["prop"]), Some(&"value".into())); ``` ## License diff --git a/src/document.rs b/src/document.rs new file mode 100644 index 0000000..8fe48ff --- /dev/null +++ b/src/document.rs @@ -0,0 +1,360 @@ +use std::{fmt::Display, str::FromStr}; + +use nom::{combinator::all_consuming, Finish}; + +use crate::{parser::document, KdlError, KdlErrorKind, KdlNode, KdlValue}; + +/// Represents a KDL +/// [`Document`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#document). +/// +/// This type is also used to manage a [`KdlNode`]'s [`Children +/// Block`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#children-block), +/// when present. +#[derive(Debug, Default, Clone, PartialEq)] +pub struct KdlDocument { + pub(crate) leading: Option, + // TODO: Consider using `hashlink` for this, later? + pub(crate) nodes: Vec, + pub(crate) trailing: Option, +} + +impl KdlDocument { + /// Creates a new Document. + pub fn new() -> Self { + Default::default() + } + + /// Gets the first child node with a matching name. + pub fn get(&self, name: &str) -> Option<&KdlNode> { + self.nodes.iter().find(move |n| n.name().value() == name) + } + + /// Gets a reference to the first child node with a matching name. + pub fn get_mut(&mut self, name: &str) -> Option<&mut KdlNode> { + self.nodes + .iter_mut() + .find(move |n| n.name().value() == name) + } + + /// Gets the first argument (value) of the first child node with a + /// matching name. This is a shorthand utility for cases where a document + /// is being used as a key/value store. + /// + /// # Examples + /// + /// Given a document like this: + /// ```kdl + /// foo 1 + /// bar false + /// ``` + /// + /// You can fetch the value of `foo` in a single call like this: + /// ```rust + /// # use kdl::{KdlDocument, KdlValue}; + /// # let doc: KdlDocument = "foo 1\nbar false".parse().unwrap(); + /// assert_eq!(doc.get_arg("foo"), Some(&1.into())); + /// ``` + pub fn get_arg(&self, name: &str) -> Option<&KdlValue> { + self.get(name) + .and_then(|node| node.get(0)) + .map(|e| e.value()) + } + + /// Gets the all node arguments (value) of the first child node with a + /// matching name. This is a shorthand utility for cases where a document + /// is being used as a key/value store and the value is expected to be + /// array-ish. + /// + /// If a node has no arguments, this will return an empty array. + /// + /// # Examples + /// + /// Given a document like this: + /// ```kdl + /// foo 1 2 3 + /// bar false + /// ``` + /// + /// You can fetch the arguments for `foo` in a single call like this: + /// ```rust + /// # use kdl::{KdlDocument, KdlValue}; + /// # let doc: KdlDocument = "foo 1 2 3\nbar false".parse().unwrap(); + /// assert_eq!(doc.get_args("foo"), vec![&1.into(), &2.into(), &3.into()]); + /// ``` + pub fn get_args(&self, name: &str) -> Vec<&KdlValue> { + self.get(name) + .map(|n| n.entries()) + .unwrap_or_default() + .iter() + .filter(|e| e.name().is_none()) + .map(|e| e.value()) + .collect() + } + + pub fn get_arg_mut(&mut self, name: &str) -> Option<&mut KdlValue> { + self.get_mut(name) + .and_then(|node| node.get_mut(0)) + .map(|e| e.value_mut()) + } + + /// This utility makes it easy to interact with a KDL convention where + /// child nodes named `-` are treated as array-ish values. + /// + /// # Examples + /// + /// Given a document like this: + /// ```kdl + /// foo { + /// - 1 + /// - 2 + /// - false + /// } + /// ``` + /// + /// You can fetch the dashed child values of `foo` in a single call like this: + /// ```rust + /// # use kdl::{KdlDocument, KdlValue}; + /// # let doc: KdlDocument = "foo {\n - 1\n - 2\n - false\n}".parse().unwrap(); + /// assert_eq!(doc.get_dash_vals("foo"), vec![&1.into(), &2.into(), &false.into()]); + /// ``` + pub fn get_dash_vals(&self, name: &str) -> Vec<&KdlValue> { + self.get(name) + .and_then(|n| n.children()) + .map(|doc| doc.nodes()) + .unwrap_or_default() + .iter() + .filter(|e| e.name().value() == "-") + .map(|e| e.get(0)) + .filter(|v| v.is_some()) + .map(|v| v.unwrap().value()) + .collect() + } + + /// Returns a reference to this document's child nodes. + pub fn nodes(&self) -> &[KdlNode] { + &self.nodes + } + + /// Returns a mutable reference to this document's child nodes. + pub fn nodes_mut(&mut self) -> &mut Vec { + &mut self.nodes + } + + /// Gets leading text (whitespace, comments) for this KdlDocument. + pub fn leading(&self) -> Option<&str> { + self.leading.as_deref() + } + + /// Sets leading text (whitespace, comments) for this KdlDocument. + pub fn set_leading(&mut self, leading: impl Into) { + self.leading = Some(leading.into()); + } + + /// Gets trailing text (whitespace, comments) for this KdlDocument. + pub fn trailing(&self) -> Option<&str> { + self.trailing.as_deref() + } + + /// Sets trailing text (whitespace, comments) for this KdlDocument. + pub fn set_trailing(&mut self, trailing: impl Into) { + self.trailing = Some(trailing.into()); + } + + /// Auto-formats this Document. + /// + /// Note: This currently removes comments as well. + pub fn fmt(&mut self) { + self.leading = None; + self.trailing = None; + for node in &mut self.nodes { + node.fmt(); + } + } +} + +impl Display for KdlDocument { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.stringify(f, 0) + } +} + +impl KdlDocument { + pub(crate) fn stringify( + &self, + f: &mut std::fmt::Formatter<'_>, + indent: usize, + ) -> std::fmt::Result { + if let Some(leading) = &self.leading { + write!(f, "{}", leading)?; + } + for node in &self.nodes { + node.stringify(f, indent)?; + if node.trailing.is_none() { + writeln!(f)?; + } + } + if let Some(trailing) = &self.trailing { + write!(f, "{}", trailing)?; + } + Ok(()) + } +} + +impl IntoIterator for KdlDocument { + type Item = KdlNode; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.nodes.into_iter() + } +} + +impl KdlDocument { + /// Parse a KDL document from a string into a [`KdlDocument`] object model. + fn parse(input: &str) -> Result { + all_consuming(document)(input) + .finish() + .map(|(_, arg)| arg) + .map_err(|e| { + let prefix = &input[..(input.len() - e.input.len())]; + KdlError { + input: input.into(), + offset: prefix.chars().count(), + kind: if let Some(kind) = e.kind { + kind + } else if let Some(ctx) = e.context { + KdlErrorKind::Context(ctx) + } else { + KdlErrorKind::Other + }, + } + }) + } +} + +impl FromStr for KdlDocument { + type Err = KdlError; + + fn from_str(input: &str) -> Result { + KdlDocument::parse(input) + } +} + +#[cfg(test)] +mod test { + use crate::{KdlEntry, KdlValue}; + + use super::*; + + #[test] + fn parsing() { + let src = " +// This is the first node +foo 1 2 \"three\" null true bar=\"baz\" { + - 1 + - 2 + - \"three\" + something \"else\"\r +} + +null_id null_prop=null +true_id true_prop=null ++false true + + bar \"indented\" // trailing whitespace after this\t +/* +Some random comment + */ + +a; b; c; + +/-commented \"node\" + +another /*foo*/ \"node\" /-1 /*bar*/ null; +final;"; + let mut doc: KdlDocument = src.parse().unwrap(); + + assert_eq!(doc.leading, Some("".into())); + assert_eq!(doc.get_arg("foo"), Some(&1.into())); + assert_eq!( + doc.get_dash_vals("foo"), + vec![&1.into(), &2.into(), &"three".into()] + ); + + let foo = doc.get("foo").expect("expected a foo node"); + assert_eq!(foo.leading, Some("\n// This is the first node\n".into())); + assert_eq!(&foo[2], &"three".into()); + assert_eq!(&foo["bar"], &"baz".into()); + assert_eq!( + foo.children().unwrap().get_arg("something"), + Some(&"else".into()) + ); + assert_eq!(doc.get_arg("another"), Some(&"node".into())); + + let null = doc.get("null_id").expect("expected a null_id node"); + assert_eq!(&null["null_prop"], &KdlValue::Null); + + let tru = doc.get("true_id").expect("expected a true_id node"); + assert_eq!(&tru["true_prop"], &KdlValue::Null); + + let plusfalse = doc.get("+false").expect("expected a +false node"); + assert_eq!(&plusfalse[0], &KdlValue::Bool(true)); + + let bar = doc.get("bar").expect("expected a bar node"); + assert_eq!( + format!("{}", bar), + "\n bar \"indented\" // trailing whitespace after this\t\n" + ); + + let a = doc.get("a").expect("expected a node"); + assert_eq!( + format!("{}", a), + "/*\nSome random comment\n */\n\na; ".to_string() + ); + + let b = doc.get("b").expect("expected a node"); + assert_eq!(format!("{}", b), "b; ".to_string()); + + // Round-tripping works. + assert_eq!(format!("{}", doc), src); + + // Programmatic manipulation works. + let mut node: KdlNode = "new\n".parse().unwrap(); + // Manual entry parsing preserves formatting/reprs. + node.push("\"blah\"=0xDEADbeef".parse::().unwrap()); + doc.nodes_mut().push(node); + + assert_eq!( + format!("{}", doc), + format!("{}new \"blah\"=0xDEADbeef\n", src) + ); + } + + #[test] + fn construction() { + let mut doc = KdlDocument::new(); + doc.nodes_mut().push(KdlNode::new("foo")); + + let mut bar = KdlNode::new("bar"); + bar.insert("prop", "value"); + bar.push(1); + bar.push(2); + bar.push(false); + bar.push(KdlValue::Null); + + let subdoc = bar.ensure_children(); + subdoc.nodes_mut().push(KdlNode::new("barchild")); + doc.nodes_mut().push(bar); + doc.nodes_mut().push(KdlNode::new("baz")); + + assert_eq!( + r#"foo +bar prop="value" 1 2 false null { + barchild +} +baz +"#, + format!("{}", doc) + ); + } +} diff --git a/src/entry.rs b/src/entry.rs new file mode 100644 index 0000000..28c74a7 --- /dev/null +++ b/src/entry.rs @@ -0,0 +1,219 @@ +use std::{fmt::Display, str::FromStr}; + +use nom::{combinator::all_consuming, Finish}; + +use crate::{KdlError, KdlErrorKind, KdlIdentifier, KdlValue}; + +/// KDL Entries are the "arguments" to KDL nodes: either a (positional) +/// [`Argument`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#argument) or +/// a (key/value) +/// [`Property`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#property) +#[derive(Debug, Clone, PartialEq)] +pub struct KdlEntry { + pub(crate) leading: Option, + pub(crate) ty: Option, + pub(crate) value: KdlValue, + pub(crate) value_repr: Option, + pub(crate) name: Option, + pub(crate) trailing: Option, +} + +impl KdlEntry { + /// Creates a new Argument (positional) KdlEntry. + pub fn new(value: impl Into) -> Self { + KdlEntry { + leading: None, + ty: None, + value: value.into(), + value_repr: None, + name: None, + trailing: None, + } + } + + pub fn name(&self) -> Option<&KdlIdentifier> { + self.name.as_ref() + } + + /// Gets the entry's value. + pub fn value(&self) -> &KdlValue { + &self.value + } + + /// Gets a mutable reference to this entry's value. + pub fn value_mut(&mut self) -> &mut KdlValue { + &mut self.value + } + + /// Sets the entry's value. + pub fn set_value(&mut self, value: impl Into) { + self.value = value.into(); + } + + /// Creates a new Property (key/value) KdlEntry. + pub fn new_prop(key: impl Into, value: impl Into) -> Self { + KdlEntry { + leading: None, + ty: None, + value: value.into(), + value_repr: None, + name: Some(key.into()), + trailing: None, + } + } + + /// Gets leading text (whitespace, comments) for this KdlEntry. + pub fn leading(&self) -> Option<&str> { + self.leading.as_deref() + } + + /// Sets leading text (whitespace, comments) for this KdlEntry. + pub fn set_leading(&mut self, leading: impl Into) { + self.leading = Some(leading.into()); + } + + /// Gets trailing text (whitespace, comments) for this KdlEntry. + pub fn trailing(&self) -> Option<&str> { + self.trailing.as_deref() + } + + /// Sets trailing text (whitespace, comments) for this KdlEntry. + pub fn set_trailing(&mut self, trailing: impl Into) { + self.trailing = Some(trailing.into()); + } + + /// Gets the custom string representation for this KdlEntry's [`KdlValue`]. + pub fn value_repr(&self) -> Option<&str> { + self.value_repr.as_deref() + } + + /// Sets a custom string representation for this KdlEntry's [`KdlValue`]. + pub fn set_value_repr(&mut self, repr: impl Into) { + self.value_repr = Some(repr.into()); + } + + /// Auto-formats this entry. + pub fn fmt(&mut self) { + self.leading = None; + self.trailing = None; + self.value_repr = None; + if let Some(name) = &mut self.name { + name.fmt(); + } + } +} + +impl Display for KdlEntry { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(leading) = &self.leading { + write!(f, "{}", leading)?; + } + if let Some(ty) = &self.ty { + write!(f, "{}", ty)?; + } + if let Some(name) = &self.name { + write!(f, "{}=", name)?; + } + if let Some(repr) = &self.value_repr { + write!(f, "{}", repr)?; + } else { + write!(f, "{}", self.value)?; + } + if let Some(trailing) = &self.trailing { + write!(f, "{}", trailing)?; + } + Ok(()) + } +} + +impl From for KdlEntry +where + T: Into, +{ + fn from(value: T) -> Self { + KdlEntry::new(value) + } +} + +impl From<(K, V)> for KdlEntry +where + K: Into, + V: Into, +{ + fn from((key, value): (K, V)) -> Self { + KdlEntry::new_prop(key, value) + } +} + +impl FromStr for KdlEntry { + type Err = KdlError; + + fn from_str(s: &str) -> Result { + KdlEntry::parse(s) + } +} + +impl KdlEntry { + /// Parse a KDL document from a string into a [`KdlDocument`] object model. + fn parse(input: &str) -> Result { + all_consuming(crate::parser::entry)(input) + .finish() + .map(|(_, arg)| arg) + .map_err(|e| { + let prefix = &input[..(input.len() - e.input.len())]; + KdlError { + input: input.into(), + offset: prefix.chars().count(), + kind: if let Some(kind) = e.kind { + kind + } else if let Some(ctx) = e.context { + KdlErrorKind::Context(ctx) + } else { + KdlErrorKind::Other + }, + } + }) + } +} +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn new() { + let entry = KdlEntry::new(42); + assert_eq!( + entry, + KdlEntry { + leading: None, + ty: None, + value: KdlValue::Base10(42), + value_repr: None, + name: None, + trailing: None, + } + ); + + let entry = KdlEntry::new_prop("name", 42); + assert_eq!( + entry, + KdlEntry { + leading: None, + ty: None, + value: KdlValue::Base10(42), + value_repr: None, + name: Some("name".into()), + trailing: None, + } + ); + } + + #[test] + fn display() { + let entry = KdlEntry::new(KdlValue::Base10(42)); + assert_eq!(format!("{}", entry), "42"); + + let entry = KdlEntry::new_prop("name", KdlValue::Base10(42)); + assert_eq!(format!("{}", entry), "name=42"); + } +} diff --git a/src/error.rs b/src/error.rs index ad765eb..49f985c 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,7 +1,7 @@ use std::num::{ParseFloatError, ParseIntError}; +use miette::Diagnostic; use nom::error::{ContextError, ErrorKind, FromExternalError, ParseError}; - use thiserror::Error; #[cfg(doc)] @@ -11,16 +11,13 @@ use { }; /// An error that occurs when parsing a KDL document. -#[derive(Debug, Clone, Eq, PartialEq, Error)] -#[error("Error parsing document at line {line} column {column}. {kind}")] +#[derive(Debug, Diagnostic, Clone, Eq, PartialEq, Error)] +#[error("Error parsing document: {kind}")] +#[diagnostic(code("{kind.code()}"))] pub struct KdlError { pub input: String, /// Offset in chars of the error. pub offset: usize, - /// 1-based line number of the error. - pub line: usize, - /// 1-based column number (in chars) of the error. - pub column: usize, pub kind: KdlErrorKind, } @@ -31,7 +28,7 @@ pub enum KdlErrorKind { ParseIntError(ParseIntError), #[error(transparent)] ParseFloatError(ParseFloatError), - #[error("Failed to parse {0} component of semver string.")] + #[error("Failed to parse `{0}` component.")] Context(&'static str), #[error("An unspecified error occurred.")] Other, diff --git a/src/identifier.rs b/src/identifier.rs new file mode 100644 index 0000000..85baed8 --- /dev/null +++ b/src/identifier.rs @@ -0,0 +1,158 @@ +use std::fmt::Display; + +/// Represents a KDL +/// [Identifier](https://github.com/kdl-org/kdl/blob/main/SPEC.md#identifier). +#[derive(Debug, Clone, PartialEq)] +pub struct KdlIdentifier { + pub(crate) value: String, + pub(crate) repr: Option, +} + +impl KdlIdentifier { + /// Gets the string value for this identifier. + pub fn value(&self) -> &str { + &self.value + } + + /// Sets the string value for this identifier. + pub fn set_value(&mut self, value: impl Into) { + self.value = value.into(); + } + + /// Gets the custom string representation for this identifier, if any. + pub fn repr(&self) -> Option<&str> { + self.repr.as_deref() + } + + /// Sets a custom string representation for this identifier. + pub fn set_repr(&mut self, repr: impl Into) { + self.repr = Some(repr.into()); + } + + /// Auto-formats this identifier. + pub fn fmt(&mut self) { + self.repr = None; + } +} + +impl Display for KdlIdentifier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(repr) = &self.repr { + write!(f, "{}", repr) + } else if self.plain_value() { + write!(f, "{}", self.value) + } else { + write!(f, "{:?}", self.value) + } + } +} + +impl KdlIdentifier { + pub(crate) fn is_identifier_char(c: char) -> bool { + !((c as u32) < 0x20 + || (c as u32) > 0x10ffff + || matches!( + c, + '\\' | '/' + | '(' + | ')' + | '{' + | '}' + | '<' + | '>' + | ';' + | '[' + | ']' + | '=' + | ',' + | '"' + // Newlines + | '\r' + | '\n' + | '\u{0085}' + | '\u{000C}' + | '\u{2028}' + | '\u{2029}' + // Whitespace + | ' ' + | '\t' + | '\u{FEFF}' + | '\u{00A0}' + | '\u{1680}' + | '\u{2000}' + | '\u{2001}' + | '\u{2002}' + | '\u{2003}' + | '\u{2004}' + | '\u{2005}' + | '\u{2006}' + | '\u{2007}' + | '\u{2008}' + | '\u{2009}' + | '\u{200A}' + | '\u{202F}' + | '\u{205F}' + | '\u{3000}' + )) + } + + pub(crate) fn is_initial_char(c: char) -> bool { + !c.is_numeric() && Self::is_identifier_char(c) + } + + fn plain_value(&self) -> bool { + let mut iter = self.value.chars(); + if let Some(c) = iter.next() { + if !Self::is_initial_char(c) { + return false; + } + } else { + return false; + } + for char in iter { + if !Self::is_identifier_char(char) { + return false; + } + } + true + } +} + +impl From<&str> for KdlIdentifier { + fn from(value: &str) -> Self { + KdlIdentifier { + value: value.to_string(), + repr: None, + } + } +} + +impl From for KdlIdentifier { + fn from(value: String) -> Self { + KdlIdentifier { value, repr: None } + } +} + +impl From for String { + fn from(value: KdlIdentifier) -> Self { + value.value + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn formatting() { + let plain = KdlIdentifier::from("foo"); + assert_eq!(format!("{}", plain), "foo"); + + let quoted = KdlIdentifier::from("foo\"bar"); + assert_eq!(format!("{}", quoted), r#""foo\"bar""#); + + let mut custom_repr = KdlIdentifier::from("foo"); + custom_repr.set_repr(r#""foo/bar""#.to_string()); + assert_eq!(format!("{}", custom_repr), r#""foo/bar""#); + } +} diff --git a/src/lib.rs b/src/lib.rs index f3125ea..946d534 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,71 +1,44 @@ #![doc(html_logo_url = "https://kdl.dev/logo.svg")] -#![doc = include_str!("../README.md")] - -use nom::combinator::all_consuming; -use nom::Finish; - -pub use crate::error::{KdlError, KdlErrorKind, TryFromKdlNodeValueError}; -pub use crate::node::{KdlNode, KdlValue}; +/// `kdl` is a "document-oriented" parser and API. That means that, unlike +/// serde-based implementations, it's meant to preserve formatting when editing, +/// as well as inserting values with custom formatting. This is useful when +/// working with human-maintained KDL files. +/// +/// You can think of this crate as +/// [`toml_edit`](https://crates.io/crates/toml_edit), but for KDL. +/// +/// ### Example +/// +/// ```rust +/// use kdl::KdlDocument; +/// +/// let doc: KdlDocument = r#" +/// hello 1 2 3 +/// world prop="value" { +/// child 1 +/// child 2 +/// } +/// "#.parse().expect("failed to parse KDL"); +/// +/// assert_eq!(doc.get_args("hello"), vec![&1.into(), &2.into(), &3.into()]); +/// assert_eq!(doc.get("world").map(|node| &node["prop"]), Some(&"value".into())); +/// ``` +/// +/// ## License +/// +/// The code in this repository is covered by [the Apache-2.0 License](LICENSE.md). +pub use document::*; +pub use entry::*; +pub use error::*; +pub use identifier::*; +pub use node::*; +pub use value::*; +mod document; +mod entry; mod error; +mod identifier; mod node; mod nom_compat; mod parser; - -/// Parse a KDL document from a string into a list of [`KdlNode`]s. -/// -/// ``` -/// use kdl::{KdlNode, KdlValue}; -/// use std::collections::HashMap; -/// -/// assert_eq!( -/// kdl::parse_document("node 1 key=true").unwrap(), -/// vec![ -/// KdlNode { -/// name: String::from("node"), -/// values: vec![KdlValue::Int(1)], -/// properties: { -/// let mut temp = HashMap::new(); -/// temp.insert(String::from("key"), KdlValue::Boolean(true)); -/// temp -/// }, -/// children: vec![], -/// } -/// ] -/// ) -/// ``` -pub fn parse_document(input: I) -> Result, KdlError> -where - I: AsRef, -{ - let input = input.as_ref(); - all_consuming(parser::nodes)(input) - .finish() - .map(|(_, arg)| arg) - .map_err(|e| { - let prefix = &input[..(input.len() - e.input.len())]; - let (line, column) = calculate_line_column(prefix); - KdlError { - input: input.into(), - offset: prefix.chars().count(), - line, - column, - kind: if let Some(kind) = e.kind { - kind - } else if let Some(ctx) = e.context { - KdlErrorKind::Context(ctx) - } else { - KdlErrorKind::Other - }, - } - }) -} - -/// Calculates the line and column of the end of a `&str`. -/// -/// If the line ends on a newline, the (line, column) pair is placed on the previous line instead. -fn calculate_line_column(input: &str) -> (usize, usize) { - let (input, skipped_lines) = parser::count_leading_lines(input); - let input = parser::strip_trailing_newline(input); - (skipped_lines + 1, input.len() + 1) // +1 as we're 1-based -} +mod value; diff --git a/src/node.rs b/src/node.rs index b390880..971a615 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,365 +1,479 @@ -use std::{collections::HashMap, convert::TryFrom, fmt}; +use std::{ + fmt::Display, + ops::{Index, IndexMut}, + str::FromStr, +}; -use crate::TryFromKdlNodeValueError; +use nom::{combinator::all_consuming, Finish}; -/// A node representing the smallest unit of a KDL document. -/// -/// The anatomy of a node: -/// ```text -/// name "value" property_key="property value" { -/// child -/// } -/// ``` -/// -/// ## Example -/// -/// ``` -/// use kdl::{KdlNode, KdlValue}; -/// use std::collections::HashMap; -/// -/// const DOCUMENT: &str = r#" -/// name "value" property_key="property value" { -/// child -/// } -/// "#; -/// -/// assert_eq!( -/// kdl::parse_document(DOCUMENT).unwrap(), -/// vec![ -/// KdlNode { -/// name: String::from("name"), -/// values: vec![KdlValue::String("value".into())], -/// properties: { -/// let mut temp = HashMap::new(); -/// temp.insert( -/// String::from("property_key"), -/// KdlValue::String("property value".into()) -/// ); -/// temp -/// }, -/// children: vec![ -/// KdlNode { -/// name: String::from("child"), -/// ..Default::default() -/// } -/// ], -/// } -/// ] -/// ) -/// ``` -#[derive(Default, Debug, Clone, PartialEq)] -pub struct KdlNode { - pub name: String, - pub values: Vec, - pub properties: HashMap, - pub children: Vec, -} +use crate::{KdlDocument, KdlEntry, KdlError, KdlErrorKind, KdlIdentifier, KdlValue}; -/// A value present in either a node's values or in a node's properties. +/// Represents an individual KDL +/// [`Node`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#node) inside a +/// KDL Document. #[derive(Debug, Clone, PartialEq)] -pub enum KdlValue { - Int(i64), - Float(f64), - String(String), - Boolean(bool), - Null, +pub struct KdlNode { + pub(crate) leading: Option, + pub(crate) ty: Option, + pub(crate) name: KdlIdentifier, + // TODO: consider using `hashlink` for this instead, later. + pub(crate) entries: Vec, + pub(crate) before_children: Option, + pub(crate) children: Option, + pub(crate) trailing: Option, } -impl fmt::Display for KdlNode { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.write(f, 0) +impl KdlNode { + /// Creates a new KdlNode with a given name. + pub fn new(name: impl Into) -> Self { + Self { + name: name.into(), + leading: None, + ty: None, + entries: Vec::new(), + before_children: None, + children: None, + trailing: None, + } + } + + /// Gets this node's name. + pub fn name(&self) -> &KdlIdentifier { + &self.name + } + + /// Gets a mutable reference to this node's name. + pub fn name_mut(&mut self) -> &mut KdlIdentifier { + &mut self.name + } + + /// Sets this node's name. + pub fn set_name(&mut self, name: impl Into) { + self.name = name.into(); + } + + /// Returns a reference to this node's entries (arguments and properties). + pub fn entries(&self) -> &[KdlEntry] { + &self.entries + } + + /// Returns a mutable reference to this node's entries (arguments and + /// properties). + pub fn entries_mut(&mut self) -> &mut Vec { + &mut self.entries + } + + /// Gets leading text (whitespace, comments) for this node. + pub fn leading(&self) -> Option<&str> { + self.leading.as_deref() + } + + /// Sets leading text (whitespace, comments) for this node. + pub fn set_leading(&mut self, leading: impl Into) { + self.leading = Some(leading.into()); + } + + /// Gets text (whitespace, comments) right before the children block's starting `{`. + pub fn before_children(&self) -> Option<&str> { + self.before_children.as_deref() + } + + /// Gets text (whitespace, comments) right before the children block's starting `{`. + pub fn set_before_children(&mut self, before: impl Into) { + self.before_children = Some(before.into()); + } + + /// Gets trailing text (whitespace, comments) for this node. + pub fn trailing(&self) -> Option<&str> { + self.trailing.as_deref() + } + + /// Sets trailing text (whitespace, comments) for this node. + pub fn set_trailing(&mut self, trailing: impl Into) { + self.trailing = Some(trailing.into()); + } + + /// Fetches an entry by key. Number keys will look up arguments, strings + /// will look up properties. + pub fn get(&self, key: impl Into) -> Option<&KdlEntry> { + self.get_impl(key.into()) + } + + fn get_impl(&self, key: NodeKey) -> Option<&KdlEntry> { + match key { + NodeKey::Key(key) => { + for entry in &self.entries { + if entry.name.is_some() + && entry.name.as_ref().map(|i| i.value()) == Some(key.value()) + { + return Some(entry); + } + } + None + } + NodeKey::Index(idx) => { + let mut current_idx = 0; + for entry in &self.entries { + if entry.name.is_none() { + if current_idx == idx { + return Some(entry); + } + current_idx += 1; + if current_idx > idx + 1 { + return None; + } + } + } + None + } + } + } + + /// Fetches a mutable referene to an entry by key. Number keys will look + /// up arguments, strings will look up properties. + pub fn get_mut(&mut self, key: impl Into) -> Option<&mut KdlEntry> { + self.get_mut_impl(key.into()) + } + + fn get_mut_impl(&mut self, key: NodeKey) -> Option<&mut KdlEntry> { + match key { + NodeKey::Key(key) => { + for entry in &mut self.entries { + if entry.name.is_some() + && entry.name.as_ref().map(|i| i.value()) == Some(key.value()) + { + return Some(entry); + } + } + None + } + NodeKey::Index(idx) => { + let mut current_idx = 0; + for entry in &mut self.entries { + if entry.name.is_none() { + if current_idx >= idx { + return Some(entry); + } + current_idx += 1; + if current_idx >= idx { + return None; + } + } + } + None + } + } + } + + /// Inserts an entry into this node. If an entry already exists with the + /// same key, it will be replaced and the previous entry will be returned. + /// + /// Numerical keys will insert arguments, string keys will insert + /// properties. + pub fn insert( + &mut self, + key: impl Into, + entry: impl Into, + ) -> Option { + self.insert_impl(key.into(), entry.into()) + } + + fn insert_impl(&mut self, key: NodeKey, mut entry: KdlEntry) -> Option { + match key { + NodeKey::Key(ref key_val) => { + if entry.name.is_none() { + entry.name = Some(key_val.clone()); + } + if entry.name.as_ref().map(|i| i.value()) != Some(key_val.value()) { + panic!("Property name mismatch"); + } + if let Some(existing) = self.get_mut(key) { + std::mem::swap(existing, &mut entry); + Some(entry) + } else { + self.entries.push(entry); + None + } + } + NodeKey::Index(idx) => { + if entry.name.is_some() { + panic!("Cannot insert property with name under a numerical key"); + } + if let Some(existing) = self.get_mut(key) { + std::mem::swap(existing, &mut entry); + Some(entry) + } else { + let mut current_idx = 0; + for existing in &mut self.entries { + if existing.name.is_none() { + if current_idx == idx { + std::mem::swap(existing, &mut entry); + return Some(entry); + } + current_idx += 1; + if current_idx >= idx { + break; + } + } + } + if idx > current_idx { + panic!( + "Insertion index (is {}) should be <= len (is {})", + idx, current_idx + ); + } else { + self.entries.push(entry); + None + } + } + } + } + } + + /// Removes an entry from this node. If an entry already exists with the + /// same key, it will be returned. + /// + /// Numerical keys will remove arguments, string keys will remove + /// properties. + pub fn remove(&mut self, key: impl Into) -> Option { + self.remove_impl(key.into()) + } + + fn remove_impl(&mut self, key: NodeKey) -> Option { + match key { + NodeKey::Key(key) => { + for (idx, entry) in self.entries.iter_mut().enumerate() { + if entry.name.is_some() && entry.name.as_ref() == Some(&key) { + return Some(self.entries.remove(idx)); + } + } + None + } + NodeKey::Index(idx) => { + let mut current_idx = 0; + for entry in &mut self.entries { + if entry.name.is_none() { + if current_idx == idx { + return Some(self.entries.remove(idx)); + } + current_idx += 1; + if current_idx >= idx { + return None; + } + } + } + None + } + } + } + + /// Shorthand for `self.entries_mut().push(entry)`. + pub fn push(&mut self, entry: impl Into) { + self.entries.push(entry.into()); + } + + /// Shorthand for `self.entries_mut().clear()` + pub fn clear_entries(&mut self) { + self.entries.clear(); + } + + /// Returns a reference to this node's children, if any. + pub fn children(&self) -> Option<&KdlDocument> { + self.children.as_ref() + } + + /// Returns a mutable reference to this node's children, if any. + pub fn children_mut(&mut self) -> &mut Option { + &mut self.children + } + + /// Sets the KdlDocument representing this node's children. + pub fn set_children(&mut self, children: KdlDocument) { + self.children = Some(children); + } + + /// Removes this node's children completely. + pub fn clear_children(&mut self) { + self.children = None; + } + + /// Returns a mutable reference to this node's children [`KdlDocument`], + /// creating one first if one does not already exist. + pub fn ensure_children(&mut self) -> &mut KdlDocument { + if self.children.is_none() { + self.children = Some(KdlDocument::new()); + } + self.children_mut().as_mut().unwrap() + } + + /// Auto-formats this node and its contents. + pub fn fmt(&mut self) { + self.leading = None; + self.trailing = None; + for entry in &mut self.entries { + entry.fmt(); + } + if let Some(children) = &mut self.children { + children.fmt(); + } + } +} + +/// Represents a [`KdlNode`]'s entry key. +#[derive(Debug, Clone, PartialEq)] +pub enum NodeKey { + Key(KdlIdentifier), + Index(usize), +} + +impl From<&str> for NodeKey { + fn from(key: &str) -> Self { + NodeKey::Key(key.into()) + } +} + +impl From for NodeKey { + fn from(key: String) -> Self { + NodeKey::Key(key.into()) + } +} + +impl From for NodeKey { + fn from(key: usize) -> Self { + NodeKey::Index(key) + } +} + +impl Index for KdlNode { + type Output = KdlValue; + + fn index(&self, index: usize) -> &Self::Output { + self.get(index).expect("Argument out of range.").value() + } +} + +impl IndexMut for KdlNode { + fn index_mut(&mut self, index: usize) -> &mut Self::Output { + self.get_mut(index) + .expect("Argument out of range.") + .value_mut() + } +} + +impl Index<&str> for KdlNode { + type Output = KdlValue; + + fn index(&self, key: &str) -> &Self::Output { + self.get(key).expect("No such property.").value() + } +} + +impl IndexMut<&str> for KdlNode { + fn index_mut(&mut self, key: &str) -> &mut Self::Output { + if self.get(key).is_none() { + self.push((key, KdlValue::Null)); + } + self.get_mut(key) + .expect("Something went wrong.") + .value_mut() } } impl KdlNode { - fn write(&self, f: &mut fmt::Formatter<'_>, indent: usize) -> fmt::Result { - write!(f, "{:indent$}", "", indent = indent)?; + /// Parse a KDL document from a string into a [`KdlDocument`] object model. + fn parse(input: &str) -> Result { + all_consuming(crate::parser::node)(input) + .finish() + .map(|(_, arg)| arg) + .map_err(|e| { + let prefix = &input[..(input.len() - e.input.len())]; + KdlError { + input: input.into(), + offset: prefix.chars().count(), + kind: if let Some(kind) = e.kind { + kind + } else if let Some(ctx) = e.context { + KdlErrorKind::Context(ctx) + } else { + KdlErrorKind::Other + }, + } + }) + } +} - display_identifier(f, &self.name)?; - for arg in &self.values { - write!(f, " {}", arg)?; - } - for (prop, value) in &self.properties { - write!(f, " ")?; - display_identifier(f, prop)?; - write!(f, "={}", value)?; - } +impl FromStr for KdlNode { + type Err = KdlError; - if self.children.is_empty() { - return Ok(()); - } + fn from_str(input: &str) -> Result { + KdlNode::parse(input) + } +} - writeln!(f, " {{")?; - for child in &self.children { - child.write(f, indent + 4)?; - writeln!(f)?; - } - write!(f, "{:indent$}}}", "", indent = indent)?; +impl Display for KdlNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.stringify(f, 0) + } +} +impl KdlNode { + pub(crate) fn stringify( + &self, + f: &mut std::fmt::Formatter<'_>, + indent: usize, + ) -> std::fmt::Result { + if let Some(leading) = &self.leading { + write!(f, "{}", leading)?; + } else { + write!(f, "{:indent$}", "", indent = indent)?; + } + if let Some(ty) = &self.ty { + write!(f, "({})", ty)?; + } + write!(f, "{}", self.name)?; + let mut space_before_children = true; + for entry in &self.entries { + if entry.leading.is_none() { + write!(f, " ")?; + } + write!(f, "{}", entry)?; + space_before_children = entry.trailing.is_none(); + } + if let Some(children) = &self.children { + if space_before_children { + write!(f, " ")?; + } + write!(f, "{{")?; + if children.leading.is_none() { + writeln!(f)?; + } + children.stringify(f, indent + 4)?; + write!(f, "}}")?; + } + if let Some(trailing) = &self.trailing { + write!(f, "{}", trailing)?; + } Ok(()) } } -impl fmt::Display for KdlValue { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use KdlValue::*; - match self { - Int(x) => write!(f, "{}", x), - Float(x) => write!(f, "{}", x), - String(x) => display_string(f, x), - Boolean(x) => write!(f, "{}", x), - Null => write!(f, "null"), - } - } -} - -fn display_identifier(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result { - if let Ok(("", identifier)) = crate::parser::bare_identifier(s) { - write!(f, "{}", identifier) - } else { - display_string(f, s) - } -} - -fn display_string(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result { - write!(f, "\"")?; - for c in s.chars() { - match crate::parser::ESCAPE_CHARS.1.get(&c) { - None => write!(f, "{}", c)?, - Some(c) => write!(f, "\\{}", c)?, - } - } - write!(f, "\"")?; - Ok(()) -} - -// Support conversions from base types into KdlNodeValue - -impl From for KdlValue { - fn from(v: i64) -> Self { - Self::Int(v) - } -} - -impl From for KdlValue { - fn from(v: f64) -> Self { - Self::Float(v) - } -} - -impl From for KdlValue { - fn from(v: String) -> Self { - Self::String(v) - } -} - -impl From<&str> for KdlValue { - fn from(v: &str) -> Self { - Self::String(v.to_owned()) - } -} - -impl From for KdlValue { - fn from(v: bool) -> Self { - Self::Boolean(v) - } -} - -impl From> for KdlValue -where - T: Into, -{ - fn from(v: Option) -> Self { - v.map_or(KdlValue::Null, |v| v.into()) - } -} - -// Support reverse conversions using TryFrom - -// Synthesizes a TryFrom impl for both the base type and an Option variant. -// -// We need the Option variant because we can't write a blanket impl due to the existing -// impl TryFrom for T where U: Into -// even though KdlNodeValue does not implement Into>. -macro_rules! impl_try_from { - (<$($lt:lifetime)?> $source:ty => $typ:ty, $($good:pat => $value:expr),+; $($bad:ident),+) => { - impl<$($lt)?> TryFrom<$source> for $typ { - type Error = TryFromKdlNodeValueError; - fn try_from(value: $source) -> Result { - match value { - $( $good => Ok($value), )+ - $( KdlValue::$bad(_) => Err(TryFromKdlNodeValueError { - expected: stringify!($typ), - variant: stringify!($bad) - }), )+ - KdlValue::Null => Err(TryFromKdlNodeValueError { - expected: stringify!($typ), - variant: "Null" - }), - } - } - } - impl<$($lt)?> TryFrom<$source> for Option<$typ> { - type Error = TryFromKdlNodeValueError; - fn try_from(value: $source) -> Result { - match value { - $( $good => Ok(Some($value)), )+ - $( KdlValue::$bad(_) => Err(TryFromKdlNodeValueError { - expected: concat!("Option::<", stringify!($typ), ">"), - variant: stringify!($bad) - }), )+ - KdlValue::Null => Ok(None), - } - } - } - }; - (& $($lt:lifetime)?, $typ:ty, $($tt:tt)*) => { - impl_try_from!(<$($lt)?> & $($lt)? KdlValue => $typ, $($tt)*); - }; - ($typ:ty, $($tt:tt)*) => { - impl_try_from!(<> KdlValue => $typ, $($tt)*); - }; -} - -impl_try_from!(i64, KdlValue::Int(v) => v; Float, String, Boolean); -impl_try_from!(&, i64, KdlValue::Int(v) => *v; Float, String, Boolean); -impl_try_from!(f64, KdlValue::Float(v) => v; Int, String, Boolean); -impl_try_from!(&, f64, KdlValue::Float(v) => *v; Int, String, Boolean); -impl_try_from!(String, KdlValue::String(v) => v; Int, Float, Boolean); -impl_try_from!(&'a, &'a str, KdlValue::String(v) => &v[..]; Int, Float, Boolean); -impl_try_from!(bool, KdlValue::Boolean(v) => v; Int, Float, String); -impl_try_from!(&, bool, KdlValue::Boolean(v) => *v; Int, Float, String); #[cfg(test)] -mod tests { +mod test { use super::*; #[test] - fn display_value() { - assert_eq!("1", format!("{}", KdlValue::Int(1))); - assert_eq!("1.5", format!("{}", KdlValue::Float(1.5))); - assert_eq!("true", format!("{}", KdlValue::Boolean(true))); - assert_eq!("false", format!("{}", KdlValue::Boolean(false))); - assert_eq!("null", format!("{}", KdlValue::Null)); - assert_eq!( - r#""foo""#, - format!("{}", KdlValue::String("foo".to_owned())) - ); - assert_eq!( - r#""foo \"bar\" baz""#, - format!("{}", KdlValue::String(r#"foo "bar" baz"#.to_owned())) - ); - } + fn indexing() { + let mut node = KdlNode::new("foo"); + node.push("bar"); + node["foo"] = 1.into(); - #[test] - fn display_node() { - let mut value = KdlNode { - name: "foo".into(), - values: vec![1.into(), "two".into()], - properties: HashMap::new(), - children: vec![], - }; + assert_eq!(node[0], "bar".into()); + assert_eq!(node["foo"], 1.into()); - value.properties.insert("three".to_owned(), 3.into()); + node[0] = false.into(); + node["foo"] = KdlValue::Null; - assert_eq!(r#"foo 1 "two" three=3"#, format!("{}", value)); - } - - #[test] - fn display_nested_node() { - let value = KdlNode { - name: "a1".into(), - values: vec!["a".into(), 1.into()], - properties: HashMap::new(), - children: vec![ - KdlNode { - name: "b1".into(), - values: vec!["b".into(), 1.into()], - properties: HashMap::new(), - children: vec![KdlNode { - name: "c1".into(), - values: vec!["c".into(), 1.into()], - properties: HashMap::new(), - children: vec![], - }], - }, - KdlNode { - name: "b2".into(), - values: vec!["b".into(), 2.into()], - properties: HashMap::new(), - children: vec![KdlNode { - name: "c2".into(), - values: vec!["c".into(), 2.into()], - properties: HashMap::new(), - children: vec![], - }], - }, - ], - }; - - assert_eq!( - r#" -a1 "a" 1 { - b1 "b" 1 { - c1 "c" 1 - } - b2 "b" 2 { - c2 "c" 2 - } -}"#, - format!("\n{}", value) - ); - } - - #[test] - fn from() { - assert_eq!(KdlValue::from(1), KdlValue::Int(1)); - assert_eq!(KdlValue::from(1.5), KdlValue::Float(1.5)); - assert_eq!( - KdlValue::from("foo".to_owned()), - KdlValue::String("foo".to_owned()) - ); - assert_eq!(KdlValue::from("bar"), KdlValue::String("bar".to_owned())); - assert_eq!(KdlValue::from(true), KdlValue::Boolean(true)); - - assert_eq!(KdlValue::from(None::), KdlValue::Null); - assert_eq!(KdlValue::from(Some(1)), KdlValue::Int(1)); - } - - #[test] - fn try_from_success() { - assert_eq!(i64::try_from(KdlValue::Int(1)), Ok(1)); - assert_eq!(i64::try_from(&KdlValue::Int(1)), Ok(1)); - assert_eq!(f64::try_from(KdlValue::Float(1.5)), Ok(1.5)); - assert_eq!(f64::try_from(&KdlValue::Float(1.5)), Ok(1.5)); - assert_eq!( - String::try_from(KdlValue::String("foo".to_owned())), - Ok("foo".to_owned()) - ); - assert_eq!( - <&str as TryFrom<_>>::try_from(&KdlValue::String("foo".to_owned())), - Ok("foo") - ); - assert_eq!(bool::try_from(KdlValue::Boolean(true)), Ok(true)); - assert_eq!(bool::try_from(&KdlValue::Boolean(true)), Ok(true)); - - assert_eq!(Option::::try_from(KdlValue::Int(1)), Ok(Some(1))); - assert_eq!(Option::::try_from(KdlValue::Null), Ok(None)); - } - - #[test] - fn try_from_failure() { - // We don't expose the internal format of the error type, so let's just test the message - // for a couple of cases. - assert_eq!( - format!("{}", i64::try_from(KdlValue::Float(1.5)).unwrap_err()), - "Failed to convert from KdlNodeValue::Float to i64." - ); - assert_eq!( - format!( - "{}", - Option::::try_from(KdlValue::Float(1.5)).unwrap_err() - ), - "Failed to convert from KdlNodeValue::Float to Option::." - ); + assert_eq!(node[0], false.into()); + assert_eq!(node["foo"], KdlValue::Null); } } diff --git a/src/parser.rs b/src/parser.rs index 28f10e3..bbac117 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,230 +1,264 @@ -use std::{collections::HashMap, iter::from_fn}; +use std::ops::RangeTo; use crate::nom_compat::{many0, many1, many_till}; use nom::branch::alt; -use nom::bytes::complete::{tag, take_until, take_until1, take_while_m_n}; +use nom::bytes::complete::{tag, take_until, take_until1, take_while, take_while_m_n}; use nom::character::complete::{anychar, char, none_of, one_of}; -use nom::combinator::{ - all_consuming, eof, iterator, map, map_opt, map_res, not, opt, recognize, value, -}; -use nom::multi::fold_many0; +use nom::combinator::{eof, map, map_opt, map_res, opt, recognize}; +use nom::error::ParseError; use nom::sequence::{delimited, preceded, terminated, tuple}; -use nom::Finish; -use nom::IResult; +use nom::{IResult, Offset, Parser, Slice}; -use crate::error::KdlParseError; -use crate::node::{KdlNode, KdlValue}; +use crate::{KdlDocument, KdlEntry, KdlIdentifier, KdlNode, KdlParseError, KdlValue}; -/// `nodes := linespace* (node nodes?)? linespace*` -pub(crate) fn nodes(input: &str) -> IResult<&str, Vec, KdlParseError<&str>> { - let (input, _) = many0(linespace)(input)?; - let (input, nodes) = map(many0(terminated(node, many0(linespace))), |nodes| { - nodes.into_iter().flatten().collect() - })(input)?; - let (input, _) = many0(linespace)(input)?; - Ok((input, nodes)) +pub(crate) fn document(input: &str) -> IResult<&str, KdlDocument, KdlParseError<&str>> { + let (input, nodes) = many0(node)(input)?; + let (input, trailing) = all_whitespace(input)?; + let mut doc = KdlDocument::new(); + doc.set_leading(""); + doc.set_trailing(trailing); + *doc.nodes_mut() = nodes; + Ok((input, doc)) } -// The following two functions exist for the purposes of translating offsets into line/column pairs -// for error reporting. We're doing this here so we can make use of our `newline` definition, to -// ensure line/column information is reported accurately based on our definition of newlines, even -// if we update our definition of newlines later. - -/// Counts all lines in the input up to the final line. -/// -/// This counts and skips past all lines terminated in `newline` with the exception of the final -/// line, regardless of whether it's newline-terminated. If the input only contains a single line, -/// the input will be returned unmodified with a count of `0`. -pub(crate) fn count_leading_lines(input: &str) -> (&str, usize) { - let mut iter = iterator( - input, - terminated(many_till(value((), anychar), newline), not(eof)), - ); - let count = (&mut iter).count(); - match iter.finish().finish() { - Ok((input, _)) => (input, count), - // I don't believe this particular parser can error, but we need to handle it anyway - Err(e) => (e.input, count), - } -} - -/// Strips a single trailing `newline`, if present, from the input. -pub(crate) fn strip_trailing_newline(input: &str) -> &str { - // Nom doesn't support parsing in reverse, but we want to reuse our newline definition. The - // longest newline sequence is 2 characters, so we can just test the last char, and the - // second-to-last char, and validate that the parser consumes the full input. - let mut idx_iter = input.char_indices().map(|(idx, _)| idx); - let mut last = idx_iter.next_back(); - let mut second_last = idx_iter.next_back(); - // Start with the second-to-last, otherwise \r\n will be parsed as just the \n. - from_fn(|| second_last.take().or_else(|| last.take())) - .find(|&idx| all_consuming(newline)(&input[idx..]).is_ok()) - .map(|idx| &input[..idx]) - .unwrap_or(input) -} - -#[derive(Debug, Clone)] -enum NodeArg { - Value(KdlValue), - Property(String, KdlValue), -} - -/// `node := ('/-' ws*)? type_annotation? identifier (node-space node-space* node-props-and-args)* (node-space* node-children ws*)? node-space* node-terminator` -pub(crate) fn node(input: &str) -> IResult<&str, Option, KdlParseError<&str>> { - let (input, comment) = opt(terminated(tag("/-"), many0(whitespace)))(input)?; - let (input, _ty) = opt(type_annotation)(input)?; - let (input, tag) = identifier(input)?; - let (input, args) = many0(preceded(many1(node_space), node_prop_or_arg))(input)?; - let (input, children) = opt(delimited( +pub(crate) fn node(input: &str) -> IResult<&str, KdlNode, KdlParseError<&str>> { + let (input, leading) = all_whitespace(input)?; + let (input, name) = identifier(input)?; + let (input, entries) = many0(entry)(input)?; + let (input, children) = opt(children)(input)?; + let (input, trailing) = recognize(preceded( many0(node_space), - node_children, - many0(whitespace), + terminated(recognize(opt(tag(";"))), opt(alt((linespace, eof)))), ))(input)?; - let (input, _) = many0(node_space)(input)?; - let (input, _) = node_terminator(input)?; - if comment.is_some() { - Ok((input, None)) - } else { - let (values, properties): (Vec, Vec) = args - .into_iter() - .flatten() - .partition(|arg| matches!(arg, NodeArg::Value(_))); - Ok(( - input, - Some(KdlNode { - name: tag, - children: children.unwrap_or_else(Vec::new), - values: values - .into_iter() - .map(|arg| match arg { - NodeArg::Value(val) => val, - _ => unreachable!(), - }) - .collect(), - properties: properties.into_iter().fold(HashMap::new(), |mut acc, arg| { - match arg { - NodeArg::Property(key, value) => { - acc.insert(key, value); - } - _ => unreachable!(), - } - acc - }), - }), - )) + let mut node = KdlNode::new(name); + node.set_leading(leading); + node.set_trailing(trailing); + let ents = node.entries_mut(); + *ents = entries; + if let Some((before, children)) = children { + let childs = node.children_mut(); + *childs = Some(children); + node.set_before_children(before); } + Ok((input, node)) } -/// `identifier_char := unicode - linespace - [\/(){}<>;[]=,"] -fn identifier_char(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { - not(linespace)(input)?; - recognize(none_of(r#"\/(){}<>;[]=,""#))(input) +fn identifier(input: &str) -> IResult<&str, KdlIdentifier, KdlParseError<&str>> { + alt((plain_identifier, quoted_identifier))(input) } -/// `bare_identifier := ((identifier-char - digit - sign) identifier-char* | sign ((identifier-char - digit) identifier-char*)?) - keyword` -pub(crate) fn bare_identifier(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { - fn left(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - not(keyword)(input)?; - not(one_of("0123456789"))(input)?; - not(one_of("+-"))(input)?; - let (input, _) = identifier_char(input)?; - let (input, _) = many0(identifier_char)(input)?; - Ok((input, ())) - } - fn right(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - let (input, _) = one_of("+-")(input)?; - not(keyword)(input)?; - not(one_of("0123456789"))(input)?; - let (input, _) = opt(many1(identifier_char))(input)?; - Ok((input, ())) - } - recognize(alt((left, right)))(input) -} - -fn keyword(input: &str) -> IResult<&str, String, KdlParseError<&str>> { - map(alt((tag("true"), tag("false"), tag("null"))), String::from)(input) -} - -/// `identifier := bare_identifier | string` -fn identifier(input: &str) -> IResult<&str, String, KdlParseError<&str>> { - alt((string, (map(bare_identifier, String::from))))(input) -} - -/// `node-props-and-args := ('/-' node-space*)? (prop | value)` -fn node_prop_or_arg(input: &str) -> IResult<&str, Option, KdlParseError<&str>> { - let (input, comment) = opt(terminated(tag("/-"), many0(node_space)))(input)?; - let (input, proparg) = alt(( - map(property, |(key, val)| NodeArg::Property(key, val)), - map(node_value, NodeArg::Value), +fn plain_identifier(input: &str) -> IResult<&str, KdlIdentifier, KdlParseError<&str>> { + let (input, name) = recognize(preceded( + take_while_m_n(1, 1, KdlIdentifier::is_initial_char), + take_while(KdlIdentifier::is_identifier_char), ))(input)?; - if comment.is_some() { - Ok((input, None)) - } else { - Ok((input, Some(proparg))) - } + let mut ident = KdlIdentifier::from(name); + ident.set_repr(name); + Ok((input, ident)) } -/// `type-annotation := '(' identifier ')' -fn type_annotation(input: &str) -> IResult<&str, String, KdlParseError<&str>> { - delimited(tag("("), identifier, tag(")"))(input) +fn quoted_identifier(input: &str) -> IResult<&str, KdlIdentifier, KdlParseError<&str>> { + let (input, (raw, val)) = string(input)?; + let mut ident = KdlIdentifier::from(val.as_string().unwrap()); + ident.set_repr(raw); + Ok((input, ident)) } -/// `prop := identifier '=' value` -fn property(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { - let (input, key) = identifier(input)?; +pub(crate) fn entry(input: &str) -> IResult<&str, KdlEntry, KdlParseError<&str>> { + alt((property, argument))(input) +} + +fn property(input: &str) -> IResult<&str, KdlEntry, KdlParseError<&str>> { + let (input, leading) = recognize(many0(node_space))(input)?; + let (input, name) = identifier(input)?; let (input, _) = tag("=")(input)?; - let (input, val) = node_value(input)?; - Ok((input, (key, val))) + let (input, (raw, value)) = value(input)?; + let mut entry = KdlEntry::new_prop(name, value); + entry.set_leading(if leading.is_empty() { " " } else { leading }); + entry.set_value_repr(raw); + Ok((input, entry)) } -/// `value := type-annotation? (string | raw_string | number | boolean | 'null'`) -fn node_value(input: &str) -> IResult<&str, KdlValue, KdlParseError<&str>> { - let (input, _ty) = opt(type_annotation)(input)?; +fn argument(input: &str) -> IResult<&str, KdlEntry, KdlParseError<&str>> { + let (input, leading) = recognize(many0(node_space))(input)?; + let (input, (raw, value)) = value(input)?; + let mut entry = KdlEntry::new(value); + entry.set_leading(if leading.is_empty() { " " } else { leading }); + entry.set_value_repr(raw); + Ok((input, entry)) +} + +fn value(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { alt(( - map(string, KdlValue::String), - map(raw_string, |s| KdlValue::String(s.into())), - number, + null, boolean, - value(KdlValue::Null, tag("null")), + string, + raw_string, + hexadecimal, + octal, + binary, + float, + integer, ))(input) } -/// node-terminator := single-line-comment | newline | ';' | eof -fn node_terminator(input: &str) -> IResult<&str, (), KdlParseError<&str>> { +fn children(input: &str) -> IResult<&str, (&str, KdlDocument), KdlParseError<&str>> { + let (input, before) = alt((unicode_space, comment))(input)?; + let (input, _) = tag("{")(input)?; + let (input, children) = document(input)?; + let (input, _) = tag("}")(input)?; + Ok((input, (before, children))) +} + +fn all_whitespace(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { + recognize(many0(alt((comment, unicode_space, newline))))(input) +} + +fn whitespace(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { + recognize(alt((unicode_space, multi_line_comment)))(input) +} + +fn linespace(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { + recognize(alt((unicode_space, newline, single_line_comment)))(input) +} + +fn node_space(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { + recognize(alt(( + delimited(many0(whitespace), escline, many0(whitespace)), + recognize(many1(whitespace)), + node_slashdash, + )))(input) +} + +fn escline(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { + recognize(preceded( + tag("\\"), + preceded(many0(whitespace), alt((single_line_comment, newline))), + ))(input) +} + +fn unicode_space(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { alt(( - value((), eof), - single_line_comment, - newline, - value((), char(';')), + tag(" "), + tag("\t"), + tag("\u{FEFF}"), // BOM + tag("\u{00A0}"), + tag("\u{1680}"), + tag("\u{2000}"), + tag("\u{2001}"), + tag("\u{2002}"), + tag("\u{2003}"), + tag("\u{2004}"), + tag("\u{2005}"), + tag("\u{2006}"), + tag("\u{2007}"), + tag("\u{2008}"), + tag("\u{2009}"), + tag("\u{200A}"), + tag("\u{202F}"), + tag("\u{205F}"), + tag("\u{3000}"), ))(input) } -/// `node-children := ('/-' node-space*)? '{' nodes '}'` -fn node_children(input: &str) -> IResult<&str, Vec, KdlParseError<&str>> { - let (input, comment) = opt(terminated(tag("/-"), many0(node_space)))(input)?; - let (input, children) = delimited(tag("{"), nodes, tag("}"))(input)?; - if comment.is_some() { - Ok((input, Vec::new())) - } else { - Ok((input, children)) - } +/// `newline := All line-break unicode white_space +pub(crate) fn newline(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { + alt(( + tag("\r\n"), + tag("\r"), + tag("\n"), + tag("\u{0085}"), + tag("\u{000C}"), + tag("\u{2028}"), + tag("\u{2029}"), + ))(input) } -/// `string := '"' character* '"'` -fn string(input: &str) -> IResult<&str, String, KdlParseError<&str>> { - delimited( - char('"'), - fold_many0(character, String::new, |mut acc, ch| { - acc.push(ch); - acc - }), - char('"'), - )(input) +fn comment(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { + alt((single_line_comment, multi_line_comment, slashdash_comment))(input) +} + +/// `single-line-comment := '//' ('\r' [^\n] | [^\r\n])* (newline | eof)` +fn single_line_comment(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { + recognize(preceded(tag("//"), many_till(anychar, alt((newline, eof)))))(input) +} + +/// `multi-line-comment := '/*' commented-block +fn multi_line_comment(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { + recognize(preceded(tag("/*"), commented_block))(input) +} + +/// `commented-block := '*/' | (multi-line-comment | '*' | '/' | [^*/]+) commented-block` +fn commented_block(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { + alt(( + tag("*/"), + terminated( + alt((multi_line_comment, take_until1("*/"), tag("*"), tag("/"))), + commented_block, + ), + ))(input) +} + +fn node_slashdash(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { + recognize(preceded( + tag("/-"), + alt((recognize(entry), recognize(children))), + ))(input) +} + +fn slashdash_comment(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { + recognize(preceded(tag("/-"), node))(input) +} + +fn boolean(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { + alt(( + map(tag("true"), |s: &str| (s.into(), KdlValue::Bool(true))), + map(tag("false"), |s: &str| (s.into(), KdlValue::Bool(false))), + ))(input) +} + +fn null(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { + map(tag("null"), |s: &str| (s.into(), KdlValue::Null))(input) +} + +/// `escaped-string := '"' character* '"'` +fn string(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { + let (input, _) = tag("\"")(input)?; + let mut original = String::new(); + let mut value = String::new(); + original.push('"'); + let (input, chars) = many0(character)(input)?; + for (raw, processed) in chars { + original.push_str(raw); + value.push(processed); + } + let (input, _) = tag("\"")(input)?; + original.push('"'); + Ok((input, (original, KdlValue::String(value)))) } /// `character := '\' escape | [^\"]` -fn character(input: &str) -> IResult<&str, char, KdlParseError<&str>> { - alt((preceded(char('\\'), escape), none_of("\\\"")))(input) +fn character(input: &str) -> IResult<&str, (&str, char), KdlParseError<&str>> { + with_raw(alt((preceded(char('\\'), escape), none_of("\\\""))))(input) +} + +/// This is like `recognize`, but _also_ returns the actual value. +fn with_raw>, O, E: ParseError, F>( + mut parser: F, +) -> impl FnMut(I) -> IResult +where + F: Parser, +{ + move |input: I| { + let i = input.clone(); + match parser.parse(i) { + Ok((i, x)) => { + let index = input.offset(&i); + Ok((i, (input.slice(..index), x))) + } + Err(e) => Err(e), + } + } } // creates a (map, inverse map) tuple @@ -267,36 +301,25 @@ fn unicode(input: &str) -> IResult<&str, char, KdlParseError<&str>> { /// `raw-string := 'r' raw-string-hash` /// `raw-string-hash := '#' raw-string-hash '#' | raw-string-quotes` /// `raw-string-quotes := '"' .* '"'` -fn raw_string(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { +fn raw_string(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { + let mut raw = String::new(); let (input, _) = char('r')(input)?; + raw.push('r'); let (input, hashes) = recognize(many0(char('#')))(input)?; + raw.push_str(hashes); let (input, _) = char('"')(input)?; + raw.push('"'); let close = format!("\"{}", hashes); - let (input, string) = take_until(&close[..])(input)?; + let (input, value) = take_until(&close[..])(input)?; + raw.push_str(value); let (input, _) = tag(&close[..])(input)?; - Ok((input, string)) + raw.push_str(&close); + Ok((input, (raw, KdlValue::RawString(value.into())))) } -/// `number := decimal | hex | octal | binary` -fn number(input: &str) -> IResult<&str, KdlValue, KdlParseError<&str>> { - alt(( - map(hexadecimal, KdlValue::Int), - map(octal, KdlValue::Int), - map(binary, KdlValue::Int), - map(float, KdlValue::Float), - map(integer, KdlValue::Int), - ))(input) -} - -/// ```text -/// decimal := integer ('.' [0-9]+)? exponent? -/// exponent := ('e' | 'E') integer -/// integer := sign? [1-9] [0-9_]* -/// sign := '+' | '-' -/// ``` -fn float(input: &str) -> IResult<&str, f64, KdlParseError<&str>> { +fn float(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { map_res( - alt(( + with_raw(alt(( recognize(tuple(( integer, opt(preceded(char('.'), integer)), @@ -305,25 +328,93 @@ fn float(input: &str) -> IResult<&str, f64, KdlParseError<&str>> { integer, ))), recognize(tuple((integer, char('.'), integer))), - )), - |x| str::replace(x, "_", "").parse::(), + ))), + |(raw, x)| { + str::replace(x, "_", "") + .parse::() + .map(|x| (raw.into(), KdlValue::Base10Float(x))) + }, )(input) } /// ```text -/// decimal := integer ('.' [0-9]+)? exponent? -/// exponent := ('e' | 'E') integer /// integer := sign? [1-9] [0-9_]* /// sign := '+' | '-' /// ``` -fn integer(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { - let (input, sign) = sign(input)?; +fn integer(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { + let mut raw = String::new(); + let (input, (raw_sign, sign)) = with_raw(sign)(input)?; + raw.push_str(raw_sign); map_res( - recognize(many1(terminated(one_of("0123456789"), many0(char('_'))))), - move |out: &str| { + with_raw(recognize(many1(terminated( + one_of("0123456789"), + many0(char('_')), + )))), + move |(raw_int, out)| { + raw.push_str(raw_int); str::replace(out, "_", "") .parse::() .map(move |x| x * sign) + .map(|x| (raw.clone(), KdlValue::Base10(x))) + }, + )(input) +} + +fn hexadecimal(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { + let mut raw = String::new(); + let (input, (raw_sign, sign)) = with_raw(sign)(input)?; + raw.push_str(raw_sign); + map_res( + with_raw(preceded( + alt((tag("0x"), tag("0X"))), + recognize(many1(terminated( + one_of("0123456789abcdefABCDEF"), + many0(char('_')), + ))), + )), + move |(raw_body, hex): (&str, &str)| { + raw.push_str(raw_body); + i64::from_str_radix(&str::replace(hex, "_", ""), 16) + .map(|x| x * sign) + .map(|x| (raw.clone(), KdlValue::Base16(x))) + }, + )(input) +} + +/// `octal := sign? '0o' [0-7] [0-7_]*` +fn octal(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { + let mut raw = String::new(); + let (input, (raw_sign, sign)) = with_raw(sign)(input)?; + raw.push_str(raw_sign); + map_res( + with_raw(preceded( + alt((tag("0o"), tag("0O"))), + recognize(many1(terminated(one_of("01234567"), many0(char('_'))))), + )), + move |(raw_body, oct): (&str, &str)| { + raw.push_str(raw_body); + i64::from_str_radix(&str::replace(oct, "_", ""), 8) + .map(|x| x * sign) + .map(|x| (raw.clone(), KdlValue::Base8(x))) + }, + )(input) +} + +/// `binary := sign? '0b' ('0' | '1') ('0' | '1' | '_')*` +fn binary(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { + let mut raw = String::new(); + let (input, (raw_sign, sign)) = with_raw(sign)(input)?; + raw.push_str(raw_sign); + map_res( + with_raw(preceded( + alt((tag("0b"), tag("0B"))), + recognize(many1(terminated(one_of("01"), many0(char('_'))))), + )), + move |(raw_body, binary): (&str, &str)| { + raw.push_str(raw_body); + i64::from_str_radix(&str::replace(binary, "_", ""), 2) + .map(|x| x * sign) + .map(|x| (raw.clone(), KdlValue::Base2(x))) }, )(input) } @@ -342,703 +433,230 @@ fn sign(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { Ok((input, mult)) } -/// `hex := sign? '0x' [0-9a-fA-F] [0-9a-fA-F_]*` -fn hexadecimal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { - let (input, sign) = sign(input)?; - map_res( - preceded( - alt((tag("0x"), tag("0X"))), - recognize(many1(terminated( - one_of("0123456789abcdefABCDEF"), - many0(char('_')), - ))), - ), - move |out: &str| i64::from_str_radix(&str::replace(out, "_", ""), 16).map(|x| x * sign), - )(input) -} - -/// `octal := sign? '0o' [0-7] [0-7_]*` -fn octal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { - let (input, sign) = sign(input)?; - map_res( - preceded( - alt((tag("0o"), tag("0O"))), - recognize(many1(terminated(one_of("01234567"), many0(char('_'))))), - ), - move |out: &str| i64::from_str_radix(&str::replace(out, "_", ""), 8).map(|x| x * sign), - )(input) -} - -/// `binary := sign? '0b' ('0' | '1') ('0' | '1' | '_')*` -fn binary(input: &str) -> IResult<&str, i64, KdlParseError<&str>> { - let (input, sign) = sign(input)?; - map_res( - preceded( - alt((tag("0b"), tag("0B"))), - recognize(many1(terminated(one_of("01"), many0(char('_'))))), - ), - move |out: &str| i64::from_str_radix(&str::replace(out, "_", ""), 2).map(|x| x * sign), - )(input) -} - -/// `boolean := 'true' | 'false'` -fn boolean(input: &str) -> IResult<&str, KdlValue, KdlParseError<&str>> { - alt(( - value(KdlValue::Boolean(true), tag("true")), - value(KdlValue::Boolean(false), tag("false")), - ))(input) -} - -/// `node-space := ws* escline ws* | ws+` -fn node_space(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - alt(( - delimited(many0(whitespace), escline, many0(whitespace)), - map(many1(whitespace), |_| ()), - ))(input) -} - -/// `single-line-comment := '//' ('\r' [^\n] | [^\r\n])* (newline | eof)` -fn single_line_comment(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - let (input, _) = tag("//")(input)?; - let (input, _) = many_till(value((), anychar), alt((newline, value((), eof))))(input)?; - Ok((input, ())) -} - -/// `multi-line-comment := '/*' commented-block -fn multi_line_comment(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { - let (input, _) = tag("/*")(input)?; - commented_block(input) -} - -/// `commented-block := '*/' | (multi-line-comment | '*' | '/' | [^*/]+) commented-block` -fn commented_block(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { - alt(( - tag("*/"), - terminated( - alt((multi_line_comment, take_until1("*/"), tag("*"), tag("/"))), - commented_block, - ), - ))(input) -} - -/// `escline := '\\' ws* (single-line-comment | newline)` -fn escline(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - let (input, _) = tag("\\")(input)?; - let (input, _) = many0(whitespace)(input)?; - let (input, _) = alt((single_line_comment, newline))(input)?; - Ok((input, ())) -} - -/// `linespace := newline | ws | single-line-comment` -fn linespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - value((), alt((newline, whitespace, single_line_comment)))(input) -} - -/// `ws := bom | unicode-space | multi-line-comment` -fn whitespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - // TODO: bom? - value( - (), - alt(( - tag("\u{FEFF}"), - unicode_space, - recognize(multi_line_comment), - )), - )(input) -} - -fn unicode_space(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { - alt(( - tag(" "), - tag("\t"), - tag("\u{00A0}"), - tag("\u{1680}"), - tag("\u{2000}"), - tag("\u{2001}"), - tag("\u{2002}"), - tag("\u{2003}"), - tag("\u{2004}"), - tag("\u{2005}"), - tag("\u{2006}"), - tag("\u{2007}"), - tag("\u{2008}"), - tag("\u{2009}"), - tag("\u{200A}"), - tag("\u{202F}"), - tag("\u{205F}"), - tag("\u{3000}"), - ))(input) -} - -/// `newline := All line-break unicode white_space -fn newline(input: &str) -> IResult<&str, (), KdlParseError<&str>> { - value( - (), - alt(( - tag("\r\n"), - tag("\r"), - tag("\n"), - tag("\u{0085}"), - tag("\u{000C}"), - tag("\u{2028}"), - tag("\u{2029}"), - )), - )(input) -} - #[cfg(test)] -mod tests { +mod node_tests { use super::*; #[test] - fn test_nodes() { - assert_eq!( - nodes("node"), - Ok(( - "", - vec![KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }] - )) - ); - assert_eq!( - nodes("node\n"), - Ok(( - "", - vec![KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }] - )) - ); - assert_eq!( - nodes("\nnode\n"), - Ok(( - "", - vec![KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }] - )) - ); - assert_eq!( - nodes("node1\nnode2"), - Ok(( - "", - vec![ - KdlNode { - name: "node1".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }, - KdlNode { - name: "node2".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - } - ] - )) - ); - } + fn basic() { + match node("foo 1 \"bar\"=false") { + Ok(("", parsed)) => { + let mut ident = KdlIdentifier::from("foo"); + ident.set_repr("foo"); + assert_eq!(parsed.name(), &ident); - #[test] - fn test_node() { - assert_eq!( - node("node"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - assert_eq!( - node("node\n"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - assert_eq!( - node("node;"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - assert_eq!( - node("node 1"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: vec![KdlValue::Int(1)], - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - assert_eq!( - node("node 1 2 \"3\" true false null"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: vec![ - KdlValue::Int(1), - KdlValue::Int(2), - KdlValue::String("3".into()), - KdlValue::Boolean(true), - KdlValue::Boolean(false), - KdlValue::Null - ], - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); + let mut entries = parsed.entries().iter(); - assert_eq!( - node("node {\n node2\n}"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: vec![KdlNode { - name: "node2".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new() - }] - }) - )) - ); + let mut one = KdlEntry::new(1); + one.set_leading(" "); + one.set_value_repr("1"); + assert_eq!(entries.next(), Some(&one)); - assert_eq!( - node("node { node2; }"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: vec![KdlNode { - name: "node2".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new() - }] - }) - )) - ); - } - - #[test] - fn test_node_slashdash_comment() { - assert_eq!(node("/-node"), Ok(("", None))); - assert_eq!(node("/- node"), Ok(("", None))); - assert_eq!(node("/- node\n"), Ok(("", None))); - assert_eq!(node("/-node 1 2 3"), Ok(("", None))); - assert_eq!(node("/-node key=false"), Ok(("", None))); - assert_eq!(node("/-node{\nnode\n}"), Ok(("", None))); - assert_eq!( - node("/-node 1 2 3 key=\"value\" \\\n{\nnode\n}"), - Ok(("", None)) - ); - } - - #[test] - fn test_arg_slashdash_comment() { - assert_eq!( - node("node /-1"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - assert_eq!( - node("node /-1 2"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: vec![KdlValue::Int(2)], - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - assert_eq!( - node("node 1 /- 2 3"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: vec![KdlValue::Int(1), KdlValue::Int(3)], - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - assert_eq!( - node("node /--1"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - assert_eq!( - node("node /- -1"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - assert_eq!( - node("node \\\n/- -1"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - } - - #[test] - fn test_prop_slashdash_comment() { - let mut properties = HashMap::new(); - properties.insert("key".to_owned(), KdlValue::Int(1)); - assert_eq!( - node("node /-key=1"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - assert_eq!( - node("node /- key=1"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - assert_eq!( - node("node key=1 /-key2=2"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties, - children: Vec::new(), - }) - )) - ); - } - - #[test] - fn test_children_slashdash_comment() { - assert_eq!( - node("node /-{}"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - assert_eq!( - node("node /- {}"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - assert_eq!( - node("node /-{\nnode2\n}"), - Ok(( - "", - Some(KdlNode { - name: "node".into(), - values: Vec::new(), - properties: HashMap::new(), - children: Vec::new(), - }) - )) - ); - } - - #[test] - fn test_string() { - assert_eq!(string("\"\""), Ok(("", "".into()))); - assert_eq!(string("\"hello\""), Ok(("", "hello".into()))); - assert_eq!(string("\"hello\nworld\""), Ok(("", "hello\nworld".into()))); - assert_eq!(string("\"\u{10FFF}\""), Ok(("", "\u{10FFF}".into()))); - assert_eq!( - string(r#""\"\\\/\b\f\n\r\t""#), - Ok(("", "\"\\/\u{08}\u{0C}\n\r\t".into())) - ); - assert_eq!(string(r#""\u{10}""#), Ok(("", "\u{10}".into()))); - assert!(string(r#""\i""#).is_err()); - assert!(string(r#""\u{c0ffee}""#).is_err()); - } - - #[test] - fn test_float() { - assert_eq!(float("1.0"), Ok(("", 1.0f64))); - assert_eq!(float("0.0"), Ok(("", 0.0f64))); - assert_eq!(float("-1.0"), Ok(("", -1.0f64))); - assert_eq!(float("+1.0"), Ok(("", 1.0f64))); - assert_eq!(float("1.0e10"), Ok(("", 1.0e10f64))); - assert_eq!(float("1.0e-10"), Ok(("", 1.0e-10f64))); - assert_eq!(float("-1.0e-10"), Ok(("", -1.0e-10f64))); - assert_eq!(float("123_456_789.0"), Ok(("", 123456789.0f64))); - assert_eq!(float("123_456_789.0_"), Ok(("", 123456789.0f64))); - assert!(float("?1.0").is_err()); - assert!(float("_1.0").is_err()); - assert!(float("1._0").is_err()); - assert!(float("1.").is_err()); - assert!(float(".0").is_err()); - } - - #[test] - fn test_integer() { - assert_eq!(integer("0"), Ok(("", 0))); - assert_eq!(integer("0123456789"), Ok(("", 123456789))); - assert_eq!(integer("0123_456_789"), Ok(("", 123456789))); - assert_eq!(integer("0123_456_789_"), Ok(("", 123456789))); - assert_eq!(integer("+0123456789"), Ok(("", 123456789))); - assert_eq!(integer("-0123456789"), Ok(("", -123456789))); - assert!(integer("?0123456789").is_err()); - assert!(integer("_0123456789").is_err()); - assert!(integer("a").is_err()); - assert!(integer("--").is_err()); - } - - #[test] - fn test_hexadecimal() { - assert_eq!( - hexadecimal("0x0123456789abcdef"), - Ok(("", 0x0123456789abcdef)) - ); - assert_eq!( - hexadecimal("0x01234567_89abcdef"), - Ok(("", 0x0123456789abcdef)) - ); - assert_eq!( - hexadecimal("0x01234567_89abcdef_"), - Ok(("", 0x0123456789abcdef)) - ); - assert!(hexadecimal("0x_123").is_err()); - assert!(hexadecimal("0xg").is_err()); - assert!(hexadecimal("0xx").is_err()); - } - - #[test] - fn test_octal() { - assert_eq!(octal("0o01234567"), Ok(("", 0o01234567))); - assert_eq!(octal("0o0123_4567"), Ok(("", 0o01234567))); - assert_eq!(octal("0o01234567_"), Ok(("", 0o01234567))); - assert!(octal("0o_123").is_err()); - assert!(octal("0o8").is_err()); - assert!(octal("0oo").is_err()); - } - - #[test] - fn test_binary() { - assert_eq!(binary("0b0101"), Ok(("", 0b0101))); - assert_eq!(binary("0b01_10"), Ok(("", 0b0110))); - assert_eq!(binary("0b01___10"), Ok(("", 0b0110))); - assert_eq!(binary("0b0110_"), Ok(("", 0b0110))); - assert!(binary("0b_0110").is_err()); - assert!(binary("0b20").is_err()); - assert!(binary("0bb").is_err()); - } - - #[test] - fn test_raw_string() { - assert_eq!(raw_string(r#"r"foo""#), Ok(("", "foo"))); - assert_eq!(raw_string("r\"foo\nbar\""), Ok(("", "foo\nbar"))); - assert_eq!(raw_string(r##"r#"foo"#"##), Ok(("", "foo"))); - assert_eq!(raw_string(r###"r##"foo"##"###), Ok(("", "foo"))); - assert_eq!(raw_string(r#"r"\nfoo\r""#), Ok(("", r"\nfoo\r"))); - assert!(raw_string(r###"r##"foo"#"###).is_err()); - } - - #[test] - fn test_boolean() { - assert_eq!(boolean("true"), Ok(("", KdlValue::Boolean(true)))); - assert_eq!(boolean("false"), Ok(("", KdlValue::Boolean(false)))); - assert!(boolean("blah").is_err()); - } - - #[test] - fn test_node_space() { - assert_eq!(node_space(" "), Ok(("", ()))); - assert_eq!(node_space("\t "), Ok(("", ()))); - assert_eq!(node_space("\t \\ // hello\n "), Ok(("", ()))); - assert!(node_space("blah").is_err()); - } - - #[test] - fn test_single_line_comment() { - assert_eq!(single_line_comment("//hello"), Ok(("", ()))); - assert_eq!(single_line_comment("// \thello"), Ok(("", ()))); - assert_eq!(single_line_comment("//hello\n"), Ok(("", ()))); - assert_eq!(single_line_comment("//hello\r\n"), Ok(("", ()))); - assert_eq!(single_line_comment("//hello\n\r"), Ok(("\r", ()))); - assert_eq!(single_line_comment("//hello\rworld"), Ok(("world", ()))); - assert_eq!( - single_line_comment("//hello\nworld\r\n"), - Ok(("world\r\n", ())) - ); - } - - #[test] - fn test_multi_line_comment() { - assert_eq!(multi_line_comment("/*hello*/"), Ok(("", "hello"))); - assert_eq!(multi_line_comment("/*hello*/\n"), Ok(("\n", "hello"))); - assert_eq!( - multi_line_comment("/*\nhello\r\n*/"), - Ok(("", "\nhello\r\n")) - ); - assert_eq!( - multi_line_comment("/*\nhello** /\n*/"), - Ok(("", "\nhello** /\n")) - ); - assert_eq!( - multi_line_comment("/**\nhello** /\n*/"), - Ok(("", "*\nhello** /\n")) - ); - assert_eq!(multi_line_comment("/*hello*/world"), Ok(("world", "hello"))); - } - - #[test] - fn test_escline() { - assert_eq!(escline("\\\nfoo"), Ok(("foo", ()))); - assert_eq!(escline("\\\n foo"), Ok((" foo", ()))); - assert_eq!(escline("\\ \t \nfoo"), Ok(("foo", ()))); - assert_eq!(escline("\\ // test \nfoo"), Ok(("foo", ()))); - assert_eq!(escline("\\ // test \n foo"), Ok((" foo", ()))); - } - - #[test] - fn test_whitespace() { - assert_eq!(whitespace(" "), Ok(("", ()))); - assert_eq!(whitespace("\t"), Ok(("", ()))); - assert_eq!(whitespace("/* \nfoo\r\n */ etc"), Ok((" etc", ()))); - assert!(whitespace("hi").is_err()) - } - - #[test] - fn test_newline() { - assert_eq!(newline("\n"), Ok(("", ()))); - assert_eq!(newline("\r"), Ok(("", ()))); - assert_eq!(newline("\r\n"), Ok(("", ()))); - assert_eq!(newline("\n\n"), Ok(("\n", ()))); - assert!(newline("blah").is_err()); - } - - #[test] - fn test_count_leading_lines() { - assert_eq!(count_leading_lines(""), ("", 0)); - assert_eq!(count_leading_lines("foo"), ("foo", 0)); - assert_eq!(count_leading_lines("foo\n"), ("foo\n", 0)); - assert_eq!(count_leading_lines("foo\nbar"), ("bar", 1)); - assert_eq!(count_leading_lines("foo\nbar\n"), ("bar\n", 1)); - assert_eq!(count_leading_lines("\nfoo\n\nbar\n"), ("bar\n", 3)); - assert_eq!(count_leading_lines("foo\r\nbar\r\n"), ("bar\r\n", 1)); - assert_eq!(count_leading_lines("foo\nbar\rbaz"), ("baz", 2)); - assert_eq!(count_leading_lines("foo\nbar\n\n"), ("\n", 2)); - - assert_eq!( - count_leading_lines( - r#"// This example is a GitHub Action if it used KDL syntax. -// See .github/workflows/ci.yml for the file this was based on. -name "CI" - -on "push" "pull_request" - -env { - RUSTFLAGS "-Dwarnings" -"# - ), - (" RUSTFLAGS \"-Dwarnings\"\n", 7) - ); - } - - #[test] - fn test_strip_trailing_newline() { - assert_eq!(strip_trailing_newline(""), ""); - assert_eq!(strip_trailing_newline("foo"), "foo"); - assert_eq!(strip_trailing_newline("foo\n"), "foo"); - assert_eq!(strip_trailing_newline("foo\n\n"), "foo\n"); - assert_eq!(strip_trailing_newline("foo\nbar"), "foo\nbar"); - assert_eq!(strip_trailing_newline("foo\nbar\n"), "foo\nbar"); - assert_eq!(strip_trailing_newline("foo\r\n"), "foo"); - assert_eq!(strip_trailing_newline("\n"), ""); - assert_eq!(strip_trailing_newline("foo\r\n\r"), "foo\r\n"); - assert_eq!(strip_trailing_newline("foo\nx"), "foo\nx"); + let mut ident = KdlIdentifier::from("bar"); + ident.set_repr("\"bar\""); + let mut bar = KdlEntry::new_prop(ident, false); + bar.set_leading(" "); + bar.set_value_repr("false"); + assert_eq!(entries.next(), Some(&bar)); + } + Ok(_) => panic!("unexpected success"), + Err(e) => { + panic!("failed to parse: {:?}", e); + } + } + } +} + +#[cfg(test)] +mod whitespace_tests { + #[test] + fn basic() { + use super::all_whitespace; + + assert_eq!(all_whitespace(" \t\n\r"), Ok(("", " \t\n\r"))); + } +} + +#[cfg(test)] +mod comment_tests { + use super::*; + + #[test] + fn single_line() { + assert_eq!(comment("// Hello world"), Ok(("", "// Hello world"))); + } + + #[test] + fn multi_line() { + assert_eq!(comment("/* Hello world */"), Ok(("", "/* Hello world */"))); + } + + #[test] + fn slashdash() { + assert_eq!(comment("/-foo 1 2"), Ok(("", "/-foo 1 2"))); + } +} + +#[cfg(test)] +mod value_tests { + use super::*; + + #[test] + fn boolean_val() { + assert_eq!( + value("true"), + Ok(("", ("true".into(), KdlValue::Bool(true)))) + ); + assert_eq!( + value("false"), + Ok(("", ("false".into(), KdlValue::Bool(false)))) + ); + } + + #[test] + fn null_val() { + assert_eq!(value("null"), Ok(("", ("null".into(), KdlValue::Null)))); + } + + #[test] + fn binary_val() { + assert_eq!( + value("0b0101"), + Ok(("", ("0b0101".into(), KdlValue::Base2(0b0101)))) + ); + assert_eq!( + value("0b0101_1111"), + Ok(("", ("0b0101_1111".into(), KdlValue::Base2(0b0101_1111)))) + ); + assert_eq!( + value("-0b0101"), + Ok(("", ("-0b0101".into(), KdlValue::Base2(-0b0101)))) + ); + assert_eq!( + value("+0b0101"), + Ok(("", ("+0b0101".into(), KdlValue::Base2(0b0101)))) + ); + } + + #[test] + fn octal_val() { + assert_eq!( + value("0o01234567"), + Ok(("", ("0o01234567".into(), KdlValue::Base8(0o01234567)))) + ); + assert_eq!( + value("0o123_4567"), + Ok(("", ("0o123_4567".into(), KdlValue::Base8(0o1234567)))) + ); + assert_eq!( + value("-0o123"), + Ok(("", ("-0o123".into(), KdlValue::Base8(-0o123)))) + ); + assert_eq!( + value("+0o123"), + Ok(("", ("+0o123".into(), KdlValue::Base8(0o123)))) + ); + } + + #[test] + fn hexadecimal_val() { + assert_eq!( + value("0x0123456789abcdef"), + Ok(( + "", + ( + "0x0123456789abcdef".into(), + KdlValue::Base16(0x0123456789abcdef) + ) + )) + ); + assert_eq!( + value("0x123_4567"), + Ok(("", ("0x123_4567".into(), KdlValue::Base16(0x1234567)))) + ); + assert_eq!( + value("-0x123"), + Ok(("", ("-0x123".into(), KdlValue::Base16(-0x123)))) + ); + assert_eq!( + value("+0x123"), + Ok(("", ("+0x123".into(), KdlValue::Base16(0x123)))) + ); + } + + #[test] + fn integer_val() { + assert_eq!( + value("123_456"), + Ok(("", ("123_456".into(), KdlValue::Base10(123456)))) + ); + assert_eq!( + value("-123"), + Ok(("", ("-123".into(), KdlValue::Base10(-123)))) + ); + assert_eq!( + value("+123"), + Ok(("", ("+123".into(), KdlValue::Base10(123)))) + ); + } + + #[test] + fn float_val() { + assert_eq!( + value("123_456.789e-10"), + Ok(( + "", + ( + "123_456.789e-10".into(), + KdlValue::Base10Float(123_456.789e-10) + ) + )) + ); + assert_eq!( + value("-123.456"), + Ok(("", ("-123.456".into(), KdlValue::Base10Float(-123.456)))) + ); + assert_eq!( + value("+123.456"), + Ok(("", ("+123.456".into(), KdlValue::Base10Float(123.456)))) + ); + } + + #[test] + fn string_val() { + assert_eq!( + value(r#""Hello \n\u{2020}world""#), + Ok(( + "", + ( + r#""Hello \n\u{2020}world""#.into(), + KdlValue::String("Hello \n\u{2020}world".into()) + ) + )) + ); + } + + #[test] + fn raw_string_val() { + assert_eq!( + value(r#"r"Hello \n\u{2020}world""#), + Ok(( + "", + ( + r#"r"Hello \n\u{2020}world""#.into(), + KdlValue::RawString(r"Hello \n\u{2020}world".into()) + ) + )) + ); + assert_eq!( + value(r###"r##"Hello \n\u{2020}world"##"###), + Ok(( + "", + ( + r###"r##"Hello \n\u{2020}world"##"###.into(), + KdlValue::RawString(r"Hello \n\u{2020}world".into()) + ) + )) + ); } } diff --git a/src/value.rs b/src/value.rs new file mode 100644 index 0000000..dfa957b --- /dev/null +++ b/src/value.rs @@ -0,0 +1,266 @@ +use std::fmt::Display; + +/// A specific [KDL Value](https://github.com/kdl-org/kdl/blob/main/SPEC.md#value). +#[derive(Debug, Clone, PartialEq)] +pub enum KdlValue { + /// A [KDL Raw String](https://github.com/kdl-org/kdl/blob/main/SPEC.md#raw-string). + RawString(String), + + /// A [KDL String](https://github.com/kdl-org/kdl/blob/main/SPEC.md#string). + String(String), + + /// A [KDL + /// Number](https://github.com/kdl-org/kdl/blob/main/SPEC.md#number) in + /// binary form (e.g. `0b010101`). + Base2(i64), + + /// A [KDL + /// Number](https://github.com/kdl-org/kdl/blob/main/SPEC.md#number) in + /// octal form (e.g. `0o12345670`). + Base8(i64), + + /// A [KDL + /// Number](https://github.com/kdl-org/kdl/blob/main/SPEC.md#number) in + /// decimal form (e.g. `1234567890`). + Base10(i64), + + /// A [KDL + /// Number](https://github.com/kdl-org/kdl/blob/main/SPEC.md#number) in + /// decimal form (e.g. `1234567890.123`), interpreted as a Rust f64. + Base10Float(f64), + + /// A [KDL + /// Number](https://github.com/kdl-org/kdl/blob/main/SPEC.md#number) in + /// hexadecimal form (e.g. `1234567890abcdef`). + Base16(i64), + + /// A [KDL Boolean](https://github.com/kdl-org/kdl/blob/main/SPEC.md#boolean). + Bool(bool), + + /// The [KDL Null Value](https://github.com/kdl-org/kdl/blob/main/SPEC.md#null). + Null, +} + +impl KdlValue { + /// Returns `true` if the value is a [`KdlValue::RawString`]. + pub fn is_raw_string(&self) -> bool { + matches!(self, Self::RawString(..)) + } + + /// Returns `true` if the value is a [`KdlValue::String`]. + pub fn is_string(&self) -> bool { + matches!(self, Self::String(..)) + } + + /// Returns `true` if the value is a [`KdlValue::String`] or [`KdlValue::RawString`]. + pub fn is_string_value(&self) -> bool { + matches!(self, Self::String(..) | Self::RawString(..)) + } + + /// Returns `true` if the value is a [`KdlValue::Base2`]. + pub fn is_base2(&self) -> bool { + matches!(self, Self::Base2(..)) + } + + /// Returns `true` if the value is a [`KdlValue::Base8`]. + pub fn is_base8(&self) -> bool { + matches!(self, Self::Base8(..)) + } + + /// Returns `true` if the value is a [`KdlValue::Base10`]. + pub fn is_base10(&self) -> bool { + matches!(self, Self::Base10(..)) + } + + /// Returns `true` if the value is a [`KdlValue::Base16`]. + pub fn is_base16(&self) -> bool { + matches!(self, Self::Base16(..)) + } + + /// Returns `true` if the value is a [`KdlValue::Base2`], + /// [`KdlValue::Base8`], [`KdlValue::Base10`], or [`KdlValue::Base16`]. + pub fn is_i64_value(&self) -> bool { + matches!( + self, + Self::Base2(..) | Self::Base8(..) | Self::Base10(..) | Self::Base16(..) + ) + } + + /// Returns `true` if the value is a [`KdlValue::Base10Float`]. + pub fn is_base10_float(&self) -> bool { + matches!(self, Self::Base10Float(..)) + } + + /// Returns `true` if the value is a [`KdlValue::Base10Float`]. + pub fn is_float_value(&self) -> bool { + matches!(self, Self::Base10Float(..)) + } + + /// Returns `true` if the value is a [`KdlValue::Bool`]. + pub fn is_bool(&self) -> bool { + matches!(self, Self::Bool(..)) + } + + /// Returns `true` if the value is a [`KdlValue::Null`]. + pub fn is_null(&self) -> bool { + matches!(self, Self::Null) + } + + /// Returns `Some(&str)` if the `KdlValue` is a [`KdlValue::RawString`] or a + /// [`KdlValue::String`], otherwise returns `None`. + pub fn as_string(&self) -> Option<&str> { + use KdlValue::*; + match self { + String(s) | RawString(s) => Some(s), + _ => None, + } + } + + /// Returns `Some(i64)` if the `KdlValue` is a [`KdlValue::Base2`], + /// [`KdlValue::Base8`], [`KdlValue::Base10`], or [`KdlValue::Base16`], + /// otherwise returns `None`. + pub fn as_i64(&self) -> Option { + use KdlValue::*; + match self { + Base2(i) | Base8(i) | Base10(i) | Base16(i) => Some(*i), + _ => None, + } + } + + /// Returns `Some(f64)` if the `KdlValue` is a [`KdlValue::Base10Float`], + /// otherwise returns `None`. + pub fn as_f64(&self) -> Option { + if let Self::Base10Float(v) = self { + Some(*v) + } else { + None + } + } + + /// Returns `Some(bool)` if the `KdlValue` is a [`KdlValue::Bool`], otherwise returns `None`. + pub fn as_bool(&self) -> Option { + if let Self::Bool(v) = self { + Some(*v) + } else { + None + } + } +} + +impl Display for KdlValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::RawString(_) => self.write_raw_string(f), + Self::String(string) => write!(f, "{:?}", string), + Self::Base2(value) => write!(f, "0b{:b}", value), + Self::Base8(value) => write!(f, "0o{:o}", value), + Self::Base10(value) => write!(f, "{}", value), + Self::Base10Float(value) => write!(f, "{}", value), + Self::Base16(value) => write!(f, "0x{:x}", value), + Self::Bool(value) => write!(f, "{}", value), + Self::Null => write!(f, "null"), + } + } +} + +impl KdlValue { + fn write_raw_string(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "r")?; + let raw = self.as_string().unwrap(); + let mut consecutive = 0usize; + let mut maxhash = 0usize; + for char in raw.chars() { + if char == '#' { + consecutive += 1; + } else if char == '"' { + maxhash = maxhash.max(consecutive); + } else { + consecutive = 0; + } + } + write!(f, "{}", "#".repeat(maxhash + 1))?; + write!(f, "\"{}\"", raw)?; + write!(f, "{}", "#".repeat(maxhash + 1))?; + Ok(()) + } +} + +impl From for KdlValue { + fn from(value: i64) -> Self { + KdlValue::Base10(value) + } +} + +impl From for KdlValue { + fn from(value: f64) -> Self { + KdlValue::Base10Float(value) + } +} + +impl From<&str> for KdlValue { + fn from(value: &str) -> Self { + KdlValue::String(value.to_string()) + } +} + +impl From for KdlValue { + fn from(value: String) -> Self { + KdlValue::String(value) + } +} + +impl From for KdlValue { + fn from(value: bool) -> Self { + KdlValue::Bool(value) + } +} + +impl From> for KdlValue +where + T: Into, +{ + fn from(value: Option) -> Self { + match value { + Some(value) => value.into(), + None => KdlValue::Null, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn formatting() { + let raw = KdlValue::RawString(r###"r##"foor#"bar"#baz"##"###.into()); + assert_eq!( + format!("{}", raw), + r####"r###"r##"foor#"bar"#baz"##"###"#### + ); + + let string = KdlValue::String("foo\n".into()); + assert_eq!(format!("{}", string), r#""foo\n""#); + + let base2 = KdlValue::Base2(0b1010_1010); + assert_eq!(format!("{}", base2), "0b10101010"); + + let base8 = KdlValue::Base8(0o12345670); + assert_eq!(format!("{}", base8), "0o12345670"); + + let base10 = KdlValue::Base10(1234567890); + assert_eq!(format!("{}", base10), "1234567890"); + + let base10float = KdlValue::Base10Float(1234567890.12345); + assert_eq!(format!("{}", base10float), "1234567890.12345"); + + let base16 = KdlValue::Base16(0x1234567890ABCDEF); + assert_eq!(format!("{}", base16), "0x1234567890abcdef"); + + let boolean = KdlValue::Bool(true); + assert_eq!(format!("{}", boolean), "true"); + + let null = KdlValue::Null; + assert_eq!(format!("{}", null), "null"); + } +} diff --git a/tests/examples.rs b/tests/examples.rs deleted file mode 100644 index a8a5a52..0000000 --- a/tests/examples.rs +++ /dev/null @@ -1,161 +0,0 @@ -//! Tests the kdl files in the examples directory. - -use kdl::*; -use std::collections::HashMap; - -/// Helper for constructing nodes. -/// -/// This takes input that's similar to KDL itself, but each node must be terminated with either -/// a semicolon or a braced block. Nodes whose name contains characters not valid in Rust -/// identifiers must be written as a string literal instead. -macro_rules! nodes { - ([$v:ident]:name) => {}; - ([$v:ident]:name $name:ident $($tt:tt)*) => { - nodes!([$v]:values stringify!($name); {} {} $($tt)*) - }; - ([$v:ident]:name $name:literal $($tt:tt)*) => { - nodes!([$v]:values $name; {} {} $($tt)*) - }; - ([$v:ident]:values $name:expr; {$($value:literal,)*} $props:tt $new_value:literal $($tt:tt)*) => { - nodes!([$v]:values $name; {$($value,)* $new_value,} $props $($tt)*) - }; - ([$v:ident]:values $name:expr; $values:tt {$($key:ident=$prop:literal,)*} $new_key:ident=$new_prop:literal $($tt:tt)*) => { - nodes!([$v]:values $name; $values {$($key=$prop,)* $new_key=$new_prop,} $($tt)*) - }; - ([$v:ident]:values $name:expr; $values:tt $props:tt $(; $($tt:tt)*)?) => { - nodes!([$v]:values $name; $values $props {} $($($tt)*)?) - }; - ([$v:ident]:values $name:expr; {$($value:literal,)*} {$($key:ident=$prop:literal,)*} {$($child:tt)*} $($tail:tt)*) => { - $v.push(KdlNode { - name: $name.to_owned(), - values: vec![$( $value.to_owned().into() ),*], - properties: { - #[allow(unused_mut)] - let mut map = HashMap::new(); - $( - map.insert(stringify!($key).to_owned(), $prop.to_owned().into()); - )* - map - }, - children: nodes!($($child)*), - }); - nodes!([$v]:name $($tail)*); - }; - // Explicitly match literal and ident at the start instead of $($tt:tt)* - // so we get better errors than "recursion limit exceeded" if we fail to match. - (:start $($tt:tt)+) => {{ - let mut v = Vec::new(); - nodes!([v]:name $($tt)+); - v - }}; - ($name:literal $($tt:tt)*) => { - nodes!(:start $name $($tt)*) - }; - ($name:ident $($tt:tt)*) => { - nodes!(:start $name $($tt)*) - }; - () => { vec![] } -} - -const NUMBERS: &str = r#" -hex 0x32; -float 0.5; -binary 0b0110; -octal 0o755; -bignum 1_000_000; -scientific 1.234e-10; -"#; - -#[test] -fn test_numbers() { - let doc = parse_document(NUMBERS); - assert_eq!( - doc, - Ok(nodes! { - hex 0x32; - float 0.5; - binary 0b0110; - octal 0o755; - bignum 1_000_000; - scientific 1.234e-10; - }) - ); -} - -#[test] -fn test_ci() { - let doc = parse_document(include_str!("../examples/ci.kdl")); - let nodes = nodes! { - name "CI"; - on "push" "pull_request"; - env { - RUSTFLAGS "-Dwarnings" - } - jobs { - fmt_and_docs "Check fmt & build docs" { - "runs-on" "ubuntu-latest"; - steps { - step uses="actions/checkout@v1"; - step "Install Rust" uses="actions-rs/toolchain@v1" { - profile "minimal"; - toolchain "stable"; - components "rustfmt"; - override true; - } - step "rustfmt" run="cargo fmt --all -- --check"; - step "docs" run="cargo doc --no-deps"; - } - } - build_and_test "Build & Test" { - "runs-on" "${{ matrix.os }}"; - strategy { - matrix { - rust "1.46.0" "stable"; - os "ubuntu-latest" "macOS-latest" "windows-latest"; - } - } - - steps { - step uses="actions/checkout@v1"; - step "Install Rust" uses="actions-rs/toolchain@v1" { - profile "minimal"; - toolchain "${{ matrix.rust }}"; - components "clippy"; - override true; - } - step "Clippy" run="cargo clippy --all -- -D warnings"; - step "Run tests" run="cargo test --all --verbose"; - } - } - } - }; - assert_eq!(doc, Ok(nodes)); -} - -#[test] -fn test_cargo() { - let doc = parse_document(include_str!("../examples/Cargo.kdl")); - let nodes = nodes! { - package { - name "kdl"; - version "0.0.0"; - description "kat's document language"; - authors "Kat Marchán "; - "license-file" "LICENSE.md"; - edition "2018"; - } - dependencies { - nom "6.0.1"; - thiserror "1.0.22"; - } - }; - assert_eq!(doc, Ok(nodes)); -} - -#[test] -fn test_nuget() { - let doc = parse_document(include_str!("../examples/nuget.kdl")); - // This file is particularly large. It would be nice to validate it, but for now - // I'm just going to settle for making sure it parses. - doc.expect("Parsing failed"); -}