diff --git a/Cargo.toml b/Cargo.toml index e7a7e40..70dc57f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,10 @@ repository = "https://github.com/kdl-org/kdl-rs" keywords = ["kdl", "document", "serialization", "config"] edition = "2021" +[features] +default = ["span"] +span = [] + [dependencies] miette = "5.3.0" nom = { version = "7.1.1", default-features = false } diff --git a/src/document.rs b/src/document.rs index 1554fc9..ba0d012 100644 --- a/src/document.rs +++ b/src/document.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "span")] +use miette::SourceSpan; use std::{fmt::Display, str::FromStr}; use crate::{parser, KdlError, KdlNode, KdlValue}; @@ -16,11 +18,34 @@ use crate::{parser, KdlError, KdlNode, KdlValue}; /// # use kdl::KdlDocument; /// let kdl: KdlDocument = "foo 1 2 3\nbar 4 5 6".parse().expect("parse failed"); /// ``` -#[derive(Debug, Default, Clone, PartialEq)] +#[derive(Debug, Clone)] pub struct KdlDocument { pub(crate) leading: Option, pub(crate) nodes: Vec, pub(crate) trailing: Option, + #[cfg(feature = "span")] + pub(crate) span: SourceSpan, +} + +impl PartialEq for KdlDocument { + fn eq(&self, other: &Self) -> bool { + self.leading == other.leading + && self.nodes == other.nodes + && self.trailing == other.trailing + // Intentionally omitted: self.span == other.span + } +} + +impl Default for KdlDocument { + fn default() -> Self { + Self { + leading: Default::default(), + nodes: Default::default(), + trailing: Default::default(), + #[cfg(feature = "span")] + span: SourceSpan::from(0..0), + } + } } impl KdlDocument { @@ -29,6 +54,28 @@ impl KdlDocument { Default::default() } + /// Gets this document's span. + /// + /// This value will be properly initialized when created via [`KdlDocument::parse`] + /// but may become invalidated if the document is mutated. We do not currently + /// guarantee this to yield any particularly consistent results at that point. + #[cfg(feature = "span")] + pub fn span(&self) -> &SourceSpan { + &self.span + } + + /// Gets a mutable reference to this document's span. + #[cfg(feature = "span")] + pub fn span_mut(&mut self) -> &mut SourceSpan { + &mut self.span + } + + /// Sets this document's span. + #[cfg(feature = "span")] + pub fn set_span(&mut self, span: impl Into) { + self.span = span.into(); + } + /// Gets the first child node with a matching name. pub fn get(&self, name: &str) -> Option<&KdlNode> { self.nodes.iter().find(move |n| n.name().value() == name) @@ -266,12 +313,15 @@ impl FromStr for KdlDocument { type Err = KdlError; fn from_str(input: &str) -> Result { - parser::parse(input, parser::document) + let kdl_parser = parser::KdlParser::new(input); + kdl_parser.parse(parser::document(&kdl_parser)) } } #[cfg(test)] mod test { + #[cfg(feature = "span")] + use crate::KdlIdentifier; use crate::{KdlEntry, KdlValue}; use super::*; @@ -499,6 +549,181 @@ foo 1 bar=0xdeadbeef { Ok(()) } + #[cfg(feature = "span")] + fn check_spans_for_doc(doc: &KdlDocument, source: &impl miette::SourceCode) { + for node in doc.nodes() { + check_spans_for_node(node, source); + } + } + + #[cfg(feature = "span")] + fn check_spans_for_node(node: &KdlNode, source: &impl miette::SourceCode) { + check_span_for_ident(node.name(), source); + if let Some(ty) = node.ty() { + check_span_for_ident(ty, source); + } + + for entry in node.entries() { + if let Some(name) = entry.name() { + check_span_for_ident(name, source); + } + if let Some(ty) = entry.ty() { + check_span_for_ident(ty, source); + } + if let Some(repr) = entry.value_repr() { + if entry.name().is_none() && entry.ty().is_none() { + check_span(repr, entry.span(), source); + } + } + } + if let Some(children) = node.children() { + check_spans_for_doc(children, source); + } + } + + #[cfg(feature = "span")] + #[track_caller] + fn check_span_for_ident(ident: &KdlIdentifier, source: &impl miette::SourceCode) { + if let Some(repr) = ident.repr() { + check_span(repr, ident.span(), source); + } else { + check_span(ident.value(), ident.span(), source); + } + } + + #[cfg(feature = "span")] + #[track_caller] + fn check_span(expected: &str, span: &SourceSpan, source: &impl miette::SourceCode) { + let span = source.read_span(span, 0, 0).unwrap(); + let span = std::str::from_utf8(span.data()).unwrap(); + assert_eq!(span, expected); + } + + #[cfg(feature = "span")] + #[test] + fn span_test() -> miette::Result<()> { + let input = r####" +this { + is (a)"cool" document="to" read=(int)5 10.1 (u32)0x45 + and x="" { + "it" /*shh*/ "has"="💯" r##"the"## + Best🎊est + "syntax ever" + } + "yknow?" 0x10 +} +// that's +nice +inline { time; to; live "our" "dreams"; "y;all"; } +"####; + + let doc: KdlDocument = input.parse().unwrap(); + + // First check that all the identity-spans are correct + check_spans_for_doc(&doc, &input); + + // Now check some more interesting concrete spans + + // The whole document should presumably be "the input" again? + check_span(input, doc.span(), &input); + + // This one-liner node should be the whole line without leading whitespace + let is_node = doc + .get("this") + .unwrap() + .children() + .unwrap() + .get("is") + .unwrap(); + check_span( + r##"is (a)"cool" document="to" read=(int)5 10.1 (u32)0x45"##, + is_node.span(), + &input, + ); + + // Some simple with/without type hints + check_span(r#"(a)"cool""#, is_node.get(0).unwrap().span(), &input); + check_span( + r#"read=(int)5"#, + is_node.get("read").unwrap().span(), + &input, + ); + check_span(r#"10.1"#, is_node.get(1).unwrap().span(), &input); + check_span(r#"(u32)0x45"#, is_node.get(2).unwrap().span(), &input); + + // Now let's look at some messed up parts of that "and" node + let and_node = doc + .get("this") + .unwrap() + .children() + .unwrap() + .get("and") + .unwrap(); + + // The node is what you expect, the whole line and its two braces + check_span( + r####"and x="" { + "it" /*shh*/ "has"="💯" r##"the"## + Best🎊est + "syntax ever" + }"####, + and_node.span(), + &input, + ); + + // The child document is a little weird, it's the contents *inside* the braces + // with extra newlines on both ends. + check_span( + r####" + "it" /*shh*/ "has"="💯" r##"the"## + Best🎊est + "syntax ever" +"####, + and_node.children().unwrap().span(), + &input, + ); + + // Oh hey don't forget to check that "x" entry + check_span(r#"x="""#, and_node.get("x").unwrap().span(), &input); + + // Now the "it" node, more straightforward + let it_node = and_node.children().unwrap().get("it").unwrap(); + check_span( + r####""it" /*shh*/ "has"="💯" r##"the"##"####, + it_node.span(), + &input, + ); + check_span(r#""has"="💯""#, it_node.get("has").unwrap().span(), &input); + check_span( + r####"r##"the"##"####, + it_node.get(0).unwrap().span(), + &input, + ); + + // Make sure inline nodes work ok + let inline_node = doc.get("inline").unwrap(); + check_span( + r#"inline { time; to; live "our" "dreams"; "y;all"; }"#, + inline_node.span(), + &input, + ); + + let inline_children = inline_node.children().unwrap(); + check_span( + r#" time; to; live "our" "dreams"; "y;all"; "#, + inline_children.span(), + &input, + ); + + let inline_nodes = inline_children.nodes(); + check_span("time", inline_nodes[0].span(), &input); + check_span("to", inline_nodes[1].span(), &input); + check_span(r#"live "our" "dreams""#, inline_nodes[2].span(), &input); + check_span(r#""y;all""#, inline_nodes[3].span(), &input); + + Ok(()) + } + #[test] fn parse_examples() -> miette::Result<()> { include_str!("../examples/kdl-schema.kdl").parse::()?; diff --git a/src/entry.rs b/src/entry.rs index 6b04f26..dba2b6e 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "span")] +use miette::SourceSpan; use std::{fmt::Display, str::FromStr}; use crate::{parser, KdlError, KdlIdentifier, KdlValue}; @@ -6,7 +8,7 @@ use crate::{parser, KdlError, KdlIdentifier, KdlValue}; /// [`Argument`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#argument) or /// a (key/value) /// [`Property`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#property) -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone)] pub struct KdlEntry { pub(crate) leading: Option, pub(crate) ty: Option, @@ -14,6 +16,20 @@ pub struct KdlEntry { pub(crate) value_repr: Option, pub(crate) name: Option, pub(crate) trailing: Option, + #[cfg(feature = "span")] + pub(crate) span: SourceSpan, +} + +impl PartialEq for KdlEntry { + fn eq(&self, other: &Self) -> bool { + self.leading == other.leading + && self.ty == other.ty + && self.value == other.value + && self.value_repr == other.value_repr + && self.name == other.name + && self.trailing == other.trailing + // intentionally omitted: self.span == other.span + } } impl KdlEntry { @@ -26,6 +42,8 @@ impl KdlEntry { value_repr: None, name: None, trailing: None, + #[cfg(feature = "span")] + span: SourceSpan::from(0..0), } } @@ -49,6 +67,28 @@ impl KdlEntry { self.value = value.into(); } + /// Gets this entry's span. + /// + /// This value will be properly initialized when created via [`KdlDocument::parse`] + /// but may become invalidated if the document is mutated. We do not currently + /// guarantee this to yield any particularly consistent results at that point. + #[cfg(feature = "span")] + pub fn span(&self) -> &SourceSpan { + &self.span + } + + /// Gets a mutable reference to this entry's span. + #[cfg(feature = "span")] + pub fn span_mut(&mut self) -> &mut SourceSpan { + &mut self.span + } + + /// Sets this entry's span. + #[cfg(feature = "span")] + pub fn set_span(&mut self, span: impl Into) { + self.span = span.into(); + } + /// Gets the entry's type. pub fn ty(&self) -> Option<&KdlIdentifier> { self.ty.as_ref() @@ -73,6 +113,8 @@ impl KdlEntry { value_repr: None, name: Some(key.into()), trailing: None, + #[cfg(feature = "span")] + span: SourceSpan::from(0..0), } } @@ -187,7 +229,8 @@ impl FromStr for KdlEntry { type Err = KdlError; fn from_str(s: &str) -> Result { - parser::parse(s, parser::entry_with_trailing) + let kdl_parser = parser::KdlParser::new(s); + kdl_parser.parse(parser::entry_with_trailing(&kdl_parser)) } } @@ -218,6 +261,8 @@ mod test { value_repr: None, name: None, trailing: None, + #[cfg(feature = "span")] + span: SourceSpan::from(0..0), } ); @@ -231,6 +276,8 @@ mod test { value_repr: None, name: Some("name".into()), trailing: None, + #[cfg(feature = "span")] + span: SourceSpan::from(0..0), } ); } @@ -247,6 +294,8 @@ mod test { value_repr: Some("0xDEADbeef".into()), name: None, trailing: Some("\t\\\n".into()), + #[cfg(feature = "span")] + span: SourceSpan::from(0..0), } ); @@ -260,6 +309,8 @@ mod test { value_repr: Some("0xDEADbeef".into()), name: Some("\"foo\"".parse()?), trailing: Some("\t\\\n".into()), + #[cfg(feature = "span")] + span: SourceSpan::from(0..0), } ); diff --git a/src/fmt.rs b/src/fmt.rs index a58391b..3c69460 100644 --- a/src/fmt.rs +++ b/src/fmt.rs @@ -6,7 +6,10 @@ pub(crate) fn fmt_leading(leading: &mut String, indent: usize, no_comments: bool } let mut result = String::new(); if !no_comments { - let comments = crate::parser::parse(leading.trim(), crate::parser::leading_comments) + let input = leading.trim(); + let kdl_parser = crate::parser::KdlParser { full_input: input }; + let comments = kdl_parser + .parse(crate::parser::leading_comments(&kdl_parser)) .expect("invalid leading text"); for line in comments { let trimmed = line.trim(); @@ -26,7 +29,10 @@ pub(crate) fn fmt_trailing(decor: &mut String, no_comments: bool) { *decor = decor.trim().to_string(); let mut result = String::new(); if !no_comments { - let comments = crate::parser::parse(decor, crate::parser::trailing_comments) + let input = &*decor; + let kdl_parser = crate::parser::KdlParser { full_input: input }; + let comments = kdl_parser + .parse(crate::parser::trailing_comments(&kdl_parser)) .expect("invalid trailing text"); for comment in comments { result.push_str(comment); diff --git a/src/identifier.rs b/src/identifier.rs index 760d3dd..eb5444a 100644 --- a/src/identifier.rs +++ b/src/identifier.rs @@ -1,13 +1,32 @@ +#[cfg(feature = "span")] +use miette::SourceSpan; use std::{fmt::Display, str::FromStr}; use crate::{parser, KdlError}; /// Represents a KDL /// [Identifier](https://github.com/kdl-org/kdl/blob/main/SPEC.md#identifier). -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Eq)] pub struct KdlIdentifier { pub(crate) value: String, pub(crate) repr: Option, + #[cfg(feature = "span")] + pub(crate) span: SourceSpan, +} + +impl PartialEq for KdlIdentifier { + fn eq(&self, other: &Self) -> bool { + self.value == other.value && self.repr == other.repr + // intentionally omitted: self.span == other.span + } +} + +impl std::hash::Hash for KdlIdentifier { + fn hash(&self, state: &mut H) { + self.value.hash(state); + self.repr.hash(state); + // Intentionally omitted: self.span.hash(state); + } } impl KdlIdentifier { @@ -21,6 +40,28 @@ impl KdlIdentifier { self.value = value.into(); } + /// Gets this identifier's span. + /// + /// This value will be properly initialized when created via [`KdlDocument::parse`] + /// but may become invalidated if the document is mutated. We do not currently + /// guarantee this to yield any particularly consistent results at that point. + #[cfg(feature = "span")] + pub fn span(&self) -> &SourceSpan { + &self.span + } + + /// Gets a mutable reference to this identifier's span. + #[cfg(feature = "span")] + pub fn span_mut(&mut self) -> &mut SourceSpan { + &mut self.span + } + + /// Sets this identifier's span. + #[cfg(feature = "span")] + pub fn set_span(&mut self, span: impl Into) { + self.span = span.into(); + } + /// Gets the custom string representation for this identifier, if any. pub fn repr(&self) -> Option<&str> { self.repr.as_deref() @@ -142,13 +183,20 @@ impl From<&str> for KdlIdentifier { KdlIdentifier { value: value.to_string(), repr: None, + #[cfg(feature = "span")] + span: SourceSpan::from(0..0), } } } impl From for KdlIdentifier { fn from(value: String) -> Self { - KdlIdentifier { value, repr: None } + KdlIdentifier { + value, + repr: None, + #[cfg(feature = "span")] + span: SourceSpan::from(0..0), + } } } @@ -162,7 +210,8 @@ impl FromStr for KdlIdentifier { type Err = KdlError; fn from_str(s: &str) -> Result { - parser::parse(s, parser::identifier) + let kdl_parser = crate::parser::KdlParser::new(s); + kdl_parser.parse(parser::identifier(&kdl_parser)) } } @@ -178,6 +227,8 @@ mod test { KdlIdentifier { value: plain.to_string(), repr: Some(plain.to_string()), + #[cfg(feature = "span")] + span: SourceSpan::from(0..3), } ); @@ -187,6 +238,8 @@ mod test { KdlIdentifier { value: "foo\"bar".to_string(), repr: Some(quoted.to_string()), + #[cfg(feature = "span")] + span: SourceSpan::from(0..0), } ); diff --git a/src/node.rs b/src/node.rs index 0e5fab1..409c667 100644 --- a/src/node.rs +++ b/src/node.rs @@ -4,12 +4,15 @@ use std::{ str::FromStr, }; +#[cfg(feature = "span")] +use miette::SourceSpan; + use crate::{parser, KdlDocument, KdlEntry, KdlError, KdlIdentifier, KdlValue}; /// Represents an individual KDL /// [`Node`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#node) inside a /// KDL Document. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone)] pub struct KdlNode { pub(crate) leading: Option, pub(crate) ty: Option, @@ -19,6 +22,21 @@ pub struct KdlNode { pub(crate) before_children: Option, pub(crate) children: Option, pub(crate) trailing: Option, + #[cfg(feature = "span")] + pub(crate) span: SourceSpan, +} + +impl PartialEq for KdlNode { + fn eq(&self, other: &Self) -> bool { + self.leading == other.leading + && self.ty == other.ty + && self.name == other.name + && self.entries == other.entries + && self.before_children == other.before_children + && self.children == other.children + && self.trailing == other.trailing + // intentionally omitted: self.span == other.span + } } impl KdlNode { @@ -32,6 +50,8 @@ impl KdlNode { before_children: None, children: None, trailing: None, + #[cfg(feature = "span")] + span: SourceSpan::from(0..0), } } @@ -50,6 +70,28 @@ impl KdlNode { self.name = name.into(); } + /// Gets this node's span. + /// + /// This value will be properly initialized when created via [`KdlDocument::parse`] + /// but may become invalidated if the document is mutated. We do not currently + /// guarantee this to yield any particularly consistent results at that point. + #[cfg(feature = "span")] + pub fn span(&self) -> &SourceSpan { + &self.span + } + + /// Gets a mutable reference to this node's span. + #[cfg(feature = "span")] + pub fn span_mut(&mut self) -> &mut SourceSpan { + &mut self.span + } + + /// Sets this node's span. + #[cfg(feature = "span")] + pub fn set_span(&mut self, span: impl Into) { + self.span = span.into(); + } + /// Gets the node's type identifier, if any. pub fn ty(&self) -> Option<&KdlIdentifier> { self.ty.as_ref() @@ -434,7 +476,8 @@ impl FromStr for KdlNode { type Err = KdlError; fn from_str(input: &str) -> Result { - parser::parse(input, parser::node) + let kdl_parser = crate::parser::KdlParser::new(input); + kdl_parser.parse(parser::node(&kdl_parser)) } } diff --git a/src/parser.rs b/src/parser.rs index 06b4aa6..dc2bb36 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,6 +1,12 @@ +// A bunch of random variables/functions become dead when you disable +// span support and rather than turning the code into complete cfg +// swiss-cheese, it's simpler to just hush the compiler about it +#![cfg_attr(not(feature = "span"), allow(dead_code, unused_variables))] + use std::ops::RangeTo; use crate::nom_compat::{many0, many1, many_till}; +use miette::SourceSpan; use nom::branch::alt; use nom::bytes::complete::{tag, take_until, take_while, take_while_m_n}; use nom::character::complete::{anychar, char, none_of, one_of}; @@ -13,29 +19,87 @@ use crate::{ KdlDocument, KdlEntry, KdlError, KdlErrorKind, KdlIdentifier, KdlNode, KdlParseError, KdlValue, }; -pub(crate) fn parse<'a, T, P>(input: &'a str, parser: P) -> Result -where - P: Parser<&'a str, T, KdlParseError<&'a str>>, -{ - all_consuming(parser)(input) - .finish() - .map(|(_, arg)| arg) - .map_err(|e| { - let prefix = &input[..(input.len() - e.input.len())]; - KdlError { - input: input.into(), - span: (prefix.chars().count(), e.len).into(), - help: e.help, - label: e.label, - kind: if let Some(kind) = e.kind { - kind - } else if let Some(ctx) = e.context { - KdlErrorKind::Context(ctx) - } else { - KdlErrorKind::Other - }, - } - }) +/// The parser for the entire input. +/// +/// All of our parsing subroutines want to hold onto some global information +/// to generate things like spans, so instead of making them simple free +/// functions, we wrap their bodies in closures that take in a kdl_parser. +/// The free functions then becoming constructors that return those closures. +/// This is basically the same idea behind nom combinators like many0 which +/// take an input to configure the combinator and then return a function. +pub(crate) struct KdlParser<'a> { + pub(crate) full_input: &'a str, +} + +impl<'a> KdlParser<'a> { + pub(crate) fn new(full_input: &'a str) -> Self { + Self { full_input } + } + + pub(crate) fn parse(&self, parser: P) -> Result + where + P: Parser<&'a str, T, KdlParseError<&'a str>>, + { + all_consuming(parser)(self.full_input) + .finish() + .map(|(_, arg)| arg) + .map_err(|e| { + let span_substr = &e.input[..e.len]; + KdlError { + input: self.full_input.into(), + span: self.span_from_substr(span_substr), + help: e.help, + label: e.label, + kind: if let Some(kind) = e.kind { + kind + } else if let Some(ctx) = e.context { + KdlErrorKind::Context(ctx) + } else { + KdlErrorKind::Other + }, + } + }) + } + + /// Creates a span for an item using two substrings of self.full_input: + /// + /// * before: the remainder of the input before parsing the item + /// * after: the remainder input after parsing the item + /// + /// All we really care about are the addresses of the strings, the lengths don't matter + fn span_from_before_and_after(&self, before: &str, after: &str) -> SourceSpan { + let base_addr = self.full_input.as_ptr() as usize; + let before_addr = before.as_ptr() as usize; + let after_addr = after.as_ptr() as usize; + assert!( + before_addr >= base_addr, + "tried to get the span of a non-substring!" + ); + assert!( + after_addr >= before_addr, + "subslices were in wrong order for spanning!" + ); + + let start = before_addr - base_addr; + let end = after_addr - base_addr; + SourceSpan::from(start..end) + } + + /// Creates a span for an item using a substring of self.full_input + /// + /// Note that substr must be a literal substring, as in it must be + /// a pointer into the same string! + fn span_from_substr(&self, substr: &str) -> SourceSpan { + let base_addr = self.full_input.as_ptr() as usize; + let substr_addr = substr.as_ptr() as usize; + assert!( + substr_addr >= base_addr, + "tried to get the span of a non-substring!" + ); + let start = substr_addr - base_addr; + let end = start + substr.len(); + SourceSpan::from(start..end) + } } fn set_details<'a>( @@ -59,168 +123,237 @@ fn set_details<'a>( err } -pub(crate) fn document(input: &str) -> IResult<&str, KdlDocument, KdlParseError<&str>> { - let (input, nodes) = many0(node)(input)?; - let (input, trailing) = all_whitespace(input)?; - let mut doc = KdlDocument::new(); - doc.set_leading(""); - doc.set_trailing(trailing); - *doc.nodes_mut() = nodes; - Ok((input, doc)) +pub(crate) fn document<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlDocument, KdlParseError<&'a str>> + 'b { + move |input| { + let start = input; + let (input, nodes) = many0(node(kdl_parser))(input)?; + let (input, trailing) = all_whitespace(kdl_parser)(input)?; + let mut doc = KdlDocument::new(); + doc.set_leading(""); + doc.set_trailing(trailing); + *doc.nodes_mut() = nodes; + #[cfg(feature = "span")] + doc.set_span(kdl_parser.span_from_before_and_after(start, trailing)); + Ok((input, doc)) + } } -pub(crate) fn node(input: &str) -> IResult<&str, KdlNode, KdlParseError<&str>> { - let (input, leading) = all_whitespace(input)?; - let start = input; - let (input, ty) = opt(context("valid node type annotation", annotation))(input)?; - let (input, name) = context("valid node name", identifier)(input)?; - let (input, entries) = many0(context("valid node entry", entry))(input)?; - let (input, children) = opt(context("valid node children block", children))(input)?; - let (input, trailing) = context( - "valid node terminator", - cut(recognize(preceded( - many0(node_space), - alt(( - terminated(recognize(tag(";")), opt(alt((linespace, eof)))), - alt((newline, single_line_comment, eof)), +pub(crate) fn node<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlNode, KdlParseError<&'a str>> + 'b { + |input| { + let (input, leading) = all_whitespace(kdl_parser)(input)?; + let start = input; + let (input, ty) = opt(context( + "valid node type annotation", + annotation(kdl_parser), + ))(input)?; + let (input, name) = context("valid node name", identifier(kdl_parser))(input)?; + let (input, entries) = many0(context("valid node entry", entry(kdl_parser)))(input)?; + let (input, children) = + opt(context("valid node children block", children(kdl_parser)))(input)?; + let (input, trailing) = context( + "valid node terminator", + cut(recognize(preceded( + many0(node_space(kdl_parser)), + alt(( + terminated(recognize(tag(";")), opt(alt((linespace, eof)))), + alt((newline, single_line_comment, eof)), + )), + ))), + )(input) + .map_err(|e| { + set_details( + e, + start, + Some("parsed node"), + Some("Nodes can only be terminated by `;` or a valid line ending."), + ) + })?; + let mut node = KdlNode::new(name); + node.set_leading(leading); + node.set_trailing(trailing); + #[cfg(feature = "span")] + node.set_span(kdl_parser.span_from_before_and_after(start, trailing)); + node.ty = ty; + let ents = node.entries_mut(); + *ents = entries; + if let Some((before, children)) = children { + let childs = node.children_mut(); + *childs = Some(children); + node.set_before_children(before); + } + Ok((input, node)) + } +} + +pub(crate) fn identifier<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlIdentifier, KdlParseError<&'a str>> + 'b { + move |input| alt((quoted_identifier(kdl_parser), plain_identifier(kdl_parser)))(input) +} + +pub(crate) fn leading_comments<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, Vec<&'a str>, KdlParseError<&'a str>> + 'b { + move |input| { + terminated( + many0(preceded( + opt(many0(alt((newline, unicode_space)))), + comment(kdl_parser), )), - ))), - )(input) - .map_err(|e| { - set_details( - e, - start, - Some("parsed node"), - Some("Nodes can only be terminated by `;` or a valid line ending."), - ) - })?; - let mut node = KdlNode::new(name); - node.set_leading(leading); - node.set_trailing(trailing); - node.ty = ty; - let ents = node.entries_mut(); - *ents = entries; - if let Some((before, children)) = children { - let childs = node.children_mut(); - *childs = Some(children); - node.set_before_children(before); - } - Ok((input, node)) -} - -pub(crate) fn identifier(input: &str) -> IResult<&str, KdlIdentifier, KdlParseError<&str>> { - alt((quoted_identifier, plain_identifier))(input) -} - -pub(crate) fn leading_comments(input: &str) -> IResult<&str, Vec<&str>, KdlParseError<&str>> { - terminated( - many0(preceded(opt(many0(alt((newline, unicode_space)))), comment)), - opt(many0(alt((newline, unicode_space, eof)))), - )(input) -} - -pub(crate) fn trailing_comments(mut input: &str) -> IResult<&str, Vec<&str>, KdlParseError<&str>> { - let mut comments = vec![]; - loop { - let (inp, _) = opt(many0(alt((newline, unicode_space, tag("\\")))))(input)?; - let (inp, comment) = opt(comment)(inp)?; - if let Some(comment) = comment { - comments.push(comment); - } - let (inp, _) = opt(many0(alt((newline, unicode_space, tag("\\"), tag(";")))))(inp)?; - let (inp, end) = opt(eof)(inp)?; - if end.is_some() { - return Ok((inp, comments)); - } - if input == inp { - panic!("invalid trailing text"); - } - input = inp; + opt(many0(alt((newline, unicode_space, eof)))), + )(input) } } -fn plain_identifier(input: &str) -> IResult<&str, KdlIdentifier, KdlParseError<&str>> { - let start = input; - let (input, name) = recognize(preceded( - take_while_m_n(1, 1, KdlIdentifier::is_initial_char), - cut(take_while(KdlIdentifier::is_identifier_char)), - ))(input).map_err(|e| set_details(e, start, Some("invalid identifier character"), Some("See https://github.com/kdl-org/kdl/blob/main/SPEC.md#identifier for an explanation of valid KDL identifiers.")))?; - match name { - "false" | "true" | "null" => { - return Err(nom::Err::Error(KdlParseError { - input, - context: Some("non-keyword identifier"), - len: name.len(), - label: Some("reserved keyword"), - help: Some("Reserved keywords cannot be used as identifiers."), - kind: None, - touched: false, - })) +pub(crate) fn trailing_comments<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, Vec<&'a str>, KdlParseError<&'a str>> + 'b { + move |mut input| { + let mut comments = vec![]; + loop { + let (inp, _) = opt(many0(alt((newline, unicode_space, tag("\\")))))(input)?; + let (inp, comment) = opt(comment(kdl_parser))(inp)?; + if let Some(comment) = comment { + comments.push(comment); + } + let (inp, _) = opt(many0(alt((newline, unicode_space, tag("\\"), tag(";")))))(inp)?; + let (inp, end) = opt(eof)(inp)?; + if end.is_some() { + return Ok((inp, comments)); + } + if input == inp { + panic!("invalid trailing text"); + } + input = inp; } - _ => {} } - let mut ident = KdlIdentifier::from(name); - ident.set_repr(name); - Ok((input, ident)) } -fn quoted_identifier(input: &str) -> IResult<&str, KdlIdentifier, KdlParseError<&str>> { - let (input, (raw, val)) = alt((string, raw_string))(input)?; - let mut ident = KdlIdentifier::from(val.as_string().unwrap()); - ident.set_repr(raw); - Ok((input, ident)) +fn plain_identifier<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlIdentifier, KdlParseError<&'a str>> + 'b { + move |input| { + let start = input; + let (input, name) = recognize(preceded( + take_while_m_n(1, 1, KdlIdentifier::is_initial_char), + cut(take_while(KdlIdentifier::is_identifier_char)), + ))(input).map_err(|e| set_details(e, start, Some("invalid identifier character"), Some("See https://github.com/kdl-org/kdl/blob/main/SPEC.md#identifier for an explanation of valid KDL identifiers.")))?; + match name { + "false" | "true" | "null" => { + return Err(nom::Err::Error(KdlParseError { + input, + context: Some("non-keyword identifier"), + len: name.len(), + label: Some("reserved keyword"), + help: Some("Reserved keywords cannot be used as identifiers."), + kind: None, + touched: false, + })) + } + _ => {} + } + let mut ident = KdlIdentifier::from(name); + ident.set_repr(name); + #[cfg(feature = "span")] + ident.set_span(kdl_parser.span_from_before_and_after(start, input)); + Ok((input, ident)) + } } -pub(crate) fn entry_with_trailing(input: &str) -> IResult<&str, KdlEntry, KdlParseError<&str>> { - let (input, mut leading) = recognize(many0(node_space))(input)?; - if leading.is_empty() { - leading = " "; - }; - let (input, mut entry) = alt((property, argument))(input)?; - let (input, trailing) = recognize(many0(node_space))(input)?; - entry.set_leading(leading); - entry.set_trailing(trailing); - Ok((input, entry)) +fn quoted_identifier<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&str, KdlIdentifier, KdlParseError<&str>> + 'b { + move |input| { + let start = input; + let (input, (raw, val)) = alt((string, raw_string))(input)?; + let mut ident = KdlIdentifier::from(val.as_string().unwrap()); + ident.set_repr(raw); + #[cfg(feature = "span")] + ident.set_span(kdl_parser.span_from_before_and_after(start, input)); + Ok((input, ident)) + } } -fn entry(input: &str) -> IResult<&str, KdlEntry, KdlParseError<&str>> { - let (input, leading) = recognize(many1(node_space))(input)?; - let (input, mut entry) = alt((property, argument))(input)?; - entry.set_leading(leading); - Ok((input, entry)) +pub(crate) fn entry_with_trailing<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlEntry, KdlParseError<&'a str>> + 'b { + move |input| { + let (input, mut leading) = recognize(many0(node_space(kdl_parser)))(input)?; + if leading.is_empty() { + leading = " "; + }; + let (input, mut entry) = alt((property(kdl_parser), argument(kdl_parser)))(input)?; + let (input, trailing) = recognize(many0(node_space(kdl_parser)))(input)?; + entry.set_leading(leading); + entry.set_trailing(trailing); + Ok((input, entry)) + } } -fn entry_maybe_space(input: &str) -> IResult<&str, KdlEntry, KdlParseError<&str>> { - let (input, leading) = recognize(many0(node_space))(input)?; - let (input, mut entry) = alt((property, argument))(input)?; - entry.set_leading(leading); - Ok((input, entry)) +fn entry<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlEntry, KdlParseError<&'a str>> + 'b { + move |input| { + let (input, leading) = recognize(many1(node_space(kdl_parser)))(input)?; + let (input, mut entry) = alt((property(kdl_parser), argument(kdl_parser)))(input)?; + entry.set_leading(leading); + Ok((input, entry)) + } } -fn property(input: &str) -> IResult<&str, KdlEntry, KdlParseError<&str>> { - let (input, name) = identifier(input)?; - let (input, _) = context("'=' after property name", tag("="))(input)?; - let (input, ty) = opt(annotation)(input)?; - let (input, (raw, value)) = context("property value", cut(value))(input).map_err(|e| set_details(e, input, Some("invalid value"), Some("Please refer to https://github.com/kdl-org/kdl/blob/main/SPEC.md#value for valid KDL value syntaxes.")))?; - let mut entry = KdlEntry::new_prop(name, value); - entry.ty = ty; - entry.set_trailing(""); - entry.set_value_repr(raw); - Ok((input, entry)) +fn entry_maybe_space<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlEntry, KdlParseError<&'a str>> + 'b { + move |input| { + let (input, leading) = recognize(many0(node_space(kdl_parser)))(input)?; + let (input, mut entry) = alt((property(kdl_parser), argument(kdl_parser)))(input)?; + entry.set_leading(leading); + Ok((input, entry)) + } } -fn argument(input: &str) -> IResult<&str, KdlEntry, KdlParseError<&str>> { - let (input, ty) = opt(annotation)(input)?; - let (input, (raw, value)) = if ty.is_some() { - context("valid value", cut(value))(input) - } else { - context("valid value", value)(input) - }?; - let mut entry = KdlEntry::new(value); - entry.ty = ty; - entry.set_trailing(""); - entry.set_value_repr(raw); - Ok((input, entry)) +fn property<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlEntry, KdlParseError<&'a str>> + 'b { + move |input| { + let start = input; + let (input, name) = identifier(kdl_parser)(input)?; + let (input, _) = context("'=' after property name", tag("="))(input)?; + let (input, ty) = opt(annotation(kdl_parser))(input)?; + let (input, (raw, value)) = context("property value", cut(value))(input).map_err(|e| set_details(e, input, Some("invalid value"), Some("Please refer to https://github.com/kdl-org/kdl/blob/main/SPEC.md#value for valid KDL value syntaxes.")))?; + let mut entry = KdlEntry::new_prop(name, value); + entry.ty = ty; + entry.set_trailing(""); + entry.set_value_repr(raw); + #[cfg(feature = "span")] + entry.set_span(kdl_parser.span_from_before_and_after(start, input)); + Ok((input, entry)) + } +} + +fn argument<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlEntry, KdlParseError<&'a str>> + 'b { + move |input| { + let start = input; + let (input, ty) = opt(annotation(kdl_parser))(input)?; + let (input, (raw, value)) = if ty.is_some() { + context("valid value", cut(value))(input) + } else { + context("valid value", value)(input) + }?; + let mut entry = KdlEntry::new(value); + entry.ty = ty; + entry.set_trailing(""); + entry.set_value_repr(raw); + #[cfg(feature = "span")] + entry.set_span(kdl_parser.span_from_before_and_after(start, input)); + Ok((input, entry)) + } } fn value(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { @@ -237,27 +370,37 @@ fn value(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> ))(input) } -fn children(input: &str) -> IResult<&str, (&str, KdlDocument), KdlParseError<&str>> { - let (input, before) = recognize(many0(node_space))(input)?; - let start = input; - let (input, _) = tag("{")(input)?; - let (input, children) = document(input)?; - let (input, _) = cut(context("closing '}' in node children block", tag("}")))(input) - .map_err(|e| set_details(e, start, Some("children block body"), None))?; - Ok((input, (before, children))) +fn children<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, (&'a str, KdlDocument), KdlParseError<&'a str>> + 'b { + move |input| { + let (input, before) = recognize(many0(node_space(kdl_parser)))(input)?; + let start = input; + let (input, _) = tag("{")(input)?; + let (input, children) = document(kdl_parser)(input)?; + let (input, _) = cut(context("closing '}' in node children block", tag("}")))(input) + .map_err(|e| set_details(e, start, Some("children block body"), None))?; + Ok((input, (before, children))) + } } -fn annotation(input: &str) -> IResult<&str, KdlIdentifier, KdlParseError<&str>> { - let start = input; - let (input, _) = tag("(")(input)?; - let (input, ty) = cut(identifier)(input)?; - let (input, _) = context("closing ')' for type annotation", cut(tag(")")))(input) - .map_err(|e| set_details(e, start, Some("annotation"), Some("annotations can only be KDL identifiers (including string identifiers), and can't have any space inside the parentheses.")))?; - Ok((input, ty)) +fn annotation<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlIdentifier, KdlParseError<&'a str>> + 'b { + move |input| { + let start = input; + let (input, _) = tag("(")(input)?; + let (input, ty) = cut(identifier(kdl_parser))(input)?; + let (input, _) = context("closing ')' for type annotation", cut(tag(")")))(input) + .map_err(|e| set_details(e, start, Some("annotation"), Some("annotations can only be KDL identifiers (including string identifiers), and can't have any space inside the parentheses.")))?; + Ok((input, ty)) + } } -fn all_whitespace(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { - recognize(many0(alt((comment, unicode_space, newline))))(input) +fn all_whitespace<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, &'a str, KdlParseError<&'a str>> + 'b { + move |input| recognize(many0(alt((comment(kdl_parser), unicode_space, newline))))(input) } fn whitespace(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { @@ -268,15 +411,19 @@ fn linespace(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { recognize(alt((unicode_space, newline, single_line_comment)))(input) } -fn node_space(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { - context( - "node space", - recognize(alt(( - delimited(many0(whitespace), escline, many0(whitespace)), - recognize(many1(whitespace)), - node_slashdash, - ))), - )(input) +fn node_space<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, &'a str, KdlParseError<&'a str>> + 'b { + move |input| { + context( + "node space", + recognize(alt(( + delimited(many0(whitespace), escline, many0(whitespace)), + recognize(many1(whitespace)), + node_slashdash(kdl_parser), + ))), + )(input) + } } fn escline(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { @@ -329,8 +476,16 @@ pub(crate) fn newline(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { ))(input) } -fn comment(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { - alt((single_line_comment, multi_line_comment, slashdash_comment))(input) +fn comment<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, &'a str, KdlParseError<&'a str>> + 'b { + move |input| { + alt(( + single_line_comment, + multi_line_comment, + slashdash_comment(kdl_parser), + ))(input) + } } /// `single-line-comment := '//' ('\r' [^\n] | [^\r\n])* (newline | eof)` @@ -370,20 +525,31 @@ fn commented_block(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { ))(input) } -fn node_slashdash(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { - recognize(preceded( - tag("/-"), - context( - "node following a slashdash", - cut(alt((recognize(entry_maybe_space), recognize(children)))), - ), - ))(input) - .map_err(|e| set_details(e, input, Some("slashdash"), None)) +fn node_slashdash<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, &'a str, KdlParseError<&'a str>> + 'b { + move |input| { + recognize(preceded( + tag("/-"), + context( + "node following a slashdash", + cut(alt(( + recognize(entry_maybe_space(kdl_parser)), + recognize(children(kdl_parser)), + ))), + ), + ))(input) + .map_err(|e| set_details(e, input, Some("slashdash"), None)) + } } -fn slashdash_comment(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { - recognize(preceded(tag("/-"), cut(node)))(input) - .map_err(|e| set_details(e, input, Some("slashdash"), None)) +fn slashdash_comment<'a: 'b, 'b>( + kdl_parser: &'b KdlParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, &'a str, KdlParseError<&'a str>> + 'b { + move |input| { + recognize(preceded(tag("/-"), cut(node(kdl_parser))))(input) + .map_err(|e| set_details(e, input, Some("slashdash"), None)) + } } fn boolean(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { @@ -641,7 +807,9 @@ mod node_tests { #[test] fn basic() { - match node("foo 1 \"bar\"=false") { + let input = "foo 1 \"bar\"=false"; + let kdl_parser = crate::parser::KdlParser::new(input); + match node(&kdl_parser)(input) { Ok(("", parsed)) => { let mut ident = KdlIdentifier::from("foo"); ident.set_repr("foo"); @@ -667,7 +835,7 @@ mod node_tests { Err(e) => { panic!("failed to parse: {:?}", e); } - } + }; } } @@ -677,7 +845,9 @@ mod whitespace_tests { fn basic() { use super::all_whitespace; - assert_eq!(all_whitespace(" \t\n\r"), Ok(("", " \t\n\r"))); + let input = " \t\n\r"; + let kdl_parser = crate::parser::KdlParser::new(input); + assert_eq!(all_whitespace(&kdl_parser)(input), Ok(("", " \t\n\r"))); } } @@ -687,21 +857,30 @@ mod comment_tests { #[test] fn single_line() { - assert_eq!(comment("// Hello world"), Ok(("", "// Hello world"))); + let input = "// Hello world"; + let kdl_parser = crate::parser::KdlParser::new(input); + assert_eq!(comment(&kdl_parser)(input), Ok(("", "// Hello world"))); } #[test] fn multi_line() { - assert_eq!(comment("/* Hello world */"), Ok(("", "/* Hello world */"))); + let input = "/* Hello world */"; + let kdl_parser = crate::parser::KdlParser::new(input); + assert_eq!(comment(&kdl_parser)(input), Ok(("", "/* Hello world */"))); + + let input = "/* Hello /* world */ blah */"; + let kdl_parser = crate::parser::KdlParser::new(input); assert_eq!( - comment("/* Hello /* world */ blah */"), + comment(&kdl_parser)(input), Ok(("", "/* Hello /* world */ blah */")) ); } #[test] fn slashdash() { - assert_eq!(comment("/-foo 1 2"), Ok(("", "/-foo 1 2"))); + let input = "/-foo 1 2"; + let kdl_parser = crate::parser::KdlParser::new(input); + assert_eq!(comment(&kdl_parser)(input), Ok(("", "/-foo 1 2"))); } #[test] @@ -785,7 +964,9 @@ mod value_tests { ) )) ); - let (_, n) = node("node 0x0123_4567_89ab_cdef").expect("failed to parse node"); + let input = "node 0x0123_4567_89ab_cdef"; + let kdl_parser = crate::parser::KdlParser::new(input); + let (_, n) = node(&kdl_parser)(input).expect("failed to parse node"); assert_eq!(&n[0], &KdlValue::Base16(0x0123456789abcdef)); assert_eq!( value("0x123_4567"),