From 916adcf0fe26254b3df233439340af7bde73bcf4 Mon Sep 17 00:00:00 2001 From: abdelkadous Date: Wed, 3 Jun 2026 11:35:48 +0100 Subject: [PATCH] =?UTF-8?q?feature=20=E2=9C=A8=20(parser=5Fv2):=20init=20p?= =?UTF-8?q?arser=20that=20support=20v1=20and=20v2=20(not=20finshed=20yet),?= =?UTF-8?q?=20impl=20for=20document=20nodes=20node?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/de.rs | 1 + src/document.rs | 21 +- src/entry.rs | 4 +- src/identifier.rs | 4 +- src/node.rs | 6 +- src/se.rs | 1 + src/v2_parser.rs | 490 +++++++++++++++++++++++++++------------------- 7 files changed, 312 insertions(+), 215 deletions(-) diff --git a/src/de.rs b/src/de.rs index 83df6bf..ec224c3 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1702,6 +1702,7 @@ nothing #null } #[test] + #[allow(clippy::approx_constant)] fn float_values() { #[derive(Deserialize, Debug, PartialEq)] struct Config { diff --git a/src/document.rs b/src/document.rs index 6d313a1..639ab7c 100644 --- a/src/document.rs +++ b/src/document.rs @@ -4,7 +4,10 @@ use std::fmt::Display; #[cfg(feature = "v1")] use crate::KdlNodeFormat; -use crate::{FormatConfig, KdlError, KdlNode, KdlValue}; +use crate::{ + FormatConfig, KdlError, KdlNode, KdlValue, + v2_parser::{Input, KdlParser, KdlVersion}, +}; /// Represents a KDL /// [`Document`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#document). @@ -370,14 +373,15 @@ impl KdlDocument { /// Parses a KDL v2 string into a document. pub fn parse_v2(s: &str) -> Result { - crate::v2_parser::try_parse(crate::v2_parser::document, s) + let parser = KdlParser::new(KdlVersion::V2); + KdlParser::try_parse(|input: &mut Input<'_>| parser.document(input), s) } /// Parses a KDL v1 string into a document. #[cfg(feature = "v1")] pub fn parse_v1(s: &str) -> Result { - let ret: Result = s.parse(); - ret.map(|x| x.into()).map_err(|e| e.into()) + let parser = KdlParser::new(KdlVersion::V1); + KdlParser::try_parse(|input: &mut Input<'_>| parser.document(input), s) } /// Takes a KDL v1 document string and returns the same document, but @@ -920,10 +924,11 @@ foo 1 bar=0xdeadbeef { if let Some(ty) = entry.ty() { check_span_for_ident(ty, source); } - if let Some(KdlEntryFormat { value_repr, .. }) = entry.format() { - if entry.name().is_none() && entry.ty().is_none() { - check_span(value_repr, entry.span(), source); - } + if let Some(KdlEntryFormat { value_repr, .. }) = entry.format() + && entry.name().is_none() + && entry.ty().is_none() + { + check_span(value_repr, entry.span(), source); } } if let Some(children) = node.children() { diff --git a/src/entry.rs b/src/entry.rs index 93fcef4..cc94dcb 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -204,11 +204,11 @@ impl KdlEntry { pub fn parse(s: &str) -> Result { #[cfg(not(feature = "v1-fallback"))] { - v2_parser::try_parse(v2_parser::padded_node_entry, s) + v2_parser::KdlParser::try_parse(v2_parser::padded_node_entry, s) } #[cfg(feature = "v1-fallback")] { - v2_parser::try_parse(v2_parser::padded_node_entry, s) + v2_parser::KdlParser::try_parse(v2_parser::padded_node_entry, s) .or_else(|e| KdlEntry::parse_v1(s).map_err(|_| e)) } } diff --git a/src/identifier.rs b/src/identifier.rs index 49762f6..b492902 100644 --- a/src/identifier.rs +++ b/src/identifier.rs @@ -97,11 +97,11 @@ impl KdlIdentifier { pub fn parse(s: &str) -> Result { #[cfg(not(feature = "v1-fallback"))] { - v2_parser::try_parse(v2_parser::identifier, s) + v2_parser::KdlParser::try_parse(v2_parser::identifier, s) } #[cfg(feature = "v1-fallback")] { - v2_parser::try_parse(v2_parser::identifier, s) + v2_parser::KdlParser::try_parse(v2_parser::identifier, s) .or_else(|e| KdlIdentifier::parse_v1(s).map_err(|_| e)) } } diff --git a/src/node.rs b/src/node.rs index 6b3c75f..091b55d 100644 --- a/src/node.rs +++ b/src/node.rs @@ -338,11 +338,11 @@ impl KdlNode { pub fn parse(s: &str) -> Result { #[cfg(not(feature = "v1-fallback"))] { - v2_parser::try_parse(v2_parser::padded_node, s) + v2_parser::KdlParser::try_parse(v2_parser::padded_node, s) } #[cfg(feature = "v1-fallback")] { - v2_parser::try_parse(v2_parser::padded_node, s) + v2_parser::KdlParser::try_parse(v2_parser::padded_node, s) .or_else(|e| KdlNode::parse_v1(s).map_err(|_| e)) } } @@ -813,7 +813,7 @@ impl FromStr for KdlNode { type Err = KdlError; fn from_str(input: &str) -> Result { - v2_parser::try_parse(v2_parser::padded_node, input) + v2_parser::KdlParser::try_parse(v2_parser::padded_node, input) } } diff --git a/src/se.rs b/src/se.rs index 920527a..254f2a7 100644 --- a/src/se.rs +++ b/src/se.rs @@ -1833,6 +1833,7 @@ mod tests { } #[test] + #[allow(clippy::approx_constant)] fn float_value() { #[derive(Serialize)] struct Config { diff --git a/src/v2_parser.rs b/src/v2_parser.rs index d1fb22e..76b0a14 100644 --- a/src/v2_parser.rs +++ b/src/v2_parser.rs @@ -24,18 +24,232 @@ use crate::{ KdlIdentifier, KdlNode, KdlNodeFormat, KdlValue, }; -type Input<'a> = Recoverable, ErrMode>; +pub(crate) type Input<'a> = Recoverable, ErrMode>; type PResult = winnow::ModalResult; -pub(crate) fn try_parse<'a, P: ModalParser, T, KdlParseError>, T>( - mut parser: P, - input: &'a str, -) -> Result { - let (_, maybe_val, errs) = parser.recoverable_parse(LocatingSlice::new(input)); - if let (Some(v), true) = (maybe_val, errs.is_empty()) { - Ok(v) - } else { - Err(failure_from_errs(errs, input)) +pub(crate) enum KdlVersion { + V1, + V2, +} + +pub(crate) struct KdlParser { + kdl_version: KdlVersion, +} + +impl KdlParser { + pub(crate) fn new(kdl_version: KdlVersion) -> Self { + Self { kdl_version } + } + + pub(crate) fn try_parse<'a, P: ModalParser, T, KdlParseError>, T>( + mut parser: P, + input: &'a str, + ) -> Result { + let (_, maybe_val, errs) = parser.recoverable_parse(LocatingSlice::new(input)); + if let (Some(v), true) = (maybe_val, errs.is_empty()) { + Ok(v) + } else { + Err(failure_from_errs(errs, input)) + } + } + + /// `document := bom? nodes` + pub(crate) fn document(&self, input: &mut Input<'_>) -> PResult { + let bom = opt(bom.take()).parse_next(input)?; + let mut doc = (|input: &mut Input<'_>| self.nodes(input)).parse_next(input)?; + let badend = resume_after_cut( + cut_err(eof).context(cx().lbl("EOF").msg("Expected end of document")), + any.void(), + ) + .parse_next(input)? + .is_none(); + if badend { + (|input: &mut Input<'_>| self.document(input)).parse_next(input)?; + } + if let Some(bom) = bom + && let Some(fmt) = doc.format_mut() + { + fmt.leading = format!("{bom}{}", fmt.leading); + } + Ok(doc) + } + + /// `nodes := (line-space* node)* line-space*` + fn nodes(&self, input: &mut Input<'_>) -> PResult { + let mut leading = repeat( + 0.., + alt(( + line_space.void(), + (slashdash, (|input: &mut Input<'_>| self.base_node(input))).void(), + )), + ) + .map(|()| ()) + .take() + .parse_next(input)?; + let _start = input.checkpoint(); + let mut ns: Vec = separated( + 0.., + |input: &mut Input<'_>| self.node(input), + alt((node_terminator.void(), (eof.void(), any.void()).void())), + ) + .parse_next(input)?; + let _span = span_from_checkpoint(input, &_start); + opt(node_terminator).parse_next(input)?; + let trailing = repeat( + 0.., + alt(( + line_space.void(), + (slashdash, (|input: &mut Input<'_>| self.base_node(input))).void(), + )), + ) + .map(|()| ()) + .take() + .parse_next(input)?; + + // If there is a node, let it have the leading format + // This gives more consistent behavior + if let Some(first_node) = ns.get_mut(0) + && let Some(first_node_format) = first_node.format_mut() + { + first_node_format.leading = leading.into(); + leading = ""; + } + + Ok(KdlDocument { + nodes: ns, + format: Some(KdlDocumentFormat { + leading: leading.into(), + trailing: trailing.into(), + }), + #[cfg(feature = "span")] + span: _span, + }) + } + + /// base-node := slashdash? type? node-space* string + /// (node-space+ slashdash? node-prop-or-arg)* + /// (node-space+ slashdash node-children)* + /// (node-space+ node-children)? + /// (node-space+ slashdash node-children)* + /// node-space* + /// node := base-node node-space* node-terminator + /// final-node := base-node node-space* node-terminator? + fn node(&self, input: &mut Input<'_>) -> PResult { + let leading = repeat( + 0.., + alt(( + line_space.void(), + (slashdash, (|input: &mut Input<'_>| self.base_node(input))).void(), + )), + ) + .map(|()| ()) + .take() + .parse_next(input)?; + let mut nd = (|input: &mut Input<'_>| self.base_node(input)).parse_next(input)?; + if let Some(fmt) = nd.format_mut() { + fmt.leading = leading.into(); + } + Ok(nd) + } + + fn base_node(&self, input: &mut Input<'_>) -> PResult { + trace("children closing check", not(alt(("}".void(), eof.void())))).parse_next(input)?; + let _start = input.checkpoint(); + let open_curly = resume_after_cut( + cut_err(not("{").context( + cx().msg("Found child block instead of node name") + .lbl("node name") + .hlp("Did you forget to add the node name itself? Or perhaps terminated the node before its child block?"))), + "{".void(), + ) + .parse_next(input)?; + if open_curly.is_none() { + // If we got a weird misplaced `{`, we consume the "child block" here, + // because otherwise the error message is going to include the entire + // child block as its span, but we only want to point to the offending + // curly. + input.reset(&_start); + node_children.parse_next(input)?; + opt(slashdashed_children).parse_next(input)?; + peek(opt(node_terminator)).parse_next(input)?; + // We also return a fake node here, for good measure. + return Ok(KdlNode::new("<>")); + } + let ty = opt(ty).parse_next(input)?; + let after_ty = node_space0.take().parse_next(input)?; + let _before_ident = input.checkpoint(); + let name = resume_after_cut(cut_err(identifier).context( + cx().msg("Found invalid node name") + .lbl("node name") + .hlp("This can be any string type, including a quoted, raw, or multiline string, as well as a plain identifier string.") + + ), badval) + .parse_next(input)? + .unwrap_or_else(|| KdlIdentifier::from("/BAD_IDENT\\")); + let name_is_valid = name.repr.as_ref().map(|s| s.is_empty()) != Some(true); + // resume_after_cut() only picks up context from parsers passed into it. In + // order to add an error that's more specific about us wanting a _node name_ + // here, we have to do some shenanigans with a "fake" parse here. + // While this does result in double errors, I think it's still useful to get + // _both_ the error message for a string/ident parser error _and_ the error + // message for a node name being expected. + if !name_is_valid { + resume_after_cut((|input: &mut Input<'_>| -> PResult<()> { + Err(ErrMode::Cut(KdlParseError { + span: Some(span_from_checkpoint(input, &_before_ident)), + ..Default::default() + })) + }).context(cx().msg("Found invalid node name") + .lbl("node name") + .hlp("This can be any string type, including a quoted, raw, or multiline string, as well as a plain identifier string.")), + empty).parse_next(input)?; + } + let entries = repeat( + 0.., + (peek(node_space1), node_entry).map(|(_, e): ((), _)| e), + ) + .map(|e: Vec>| e.into_iter().flatten().collect::>()) + .parse_next(input)?; + let children = opt(( + before_node_children.take(), + trace("node children", node_children), + )) + .parse_next(input)?; + let (before_terminator, terminator) = if children.is_some() { + ( + opt(slashdashed_children).take(), + peek(opt(node_terminator).take()), + ) + .parse_next(input)? + } else { + ( + before_node_children.take(), + peek(opt(node_terminator).take()), + ) + .parse_next(input)? + }; + node_space0.parse_next(input)?; + let (before_inner_ty, ty, after_inner_ty) = ty.unwrap_or_default(); + let (before_children, children) = children + .map(|(before_children, children)| (before_children.into(), Some(children))) + .unwrap_or(("".into(), None)); + Ok(KdlNode { + ty, + name, + entries, + children, + format: Some(KdlNodeFormat { + before_ty_name: before_inner_ty.into(), + after_ty_name: after_inner_ty.into(), + after_ty: after_ty.into(), + before_children, + before_terminator: before_terminator.into(), + terminator: terminator.into(), + ..Default::default() + }), + #[cfg(feature = "span")] + span: span_from_checkpoint(input, &_start), + }) } } @@ -257,192 +471,15 @@ fn new_input(s: &str) -> Input<'_> { Recoverable::new(LocatingSlice::new(s)) } -/// `document := bom? nodes` -pub(crate) fn document(input: &mut Input<'_>) -> PResult { - let bom = opt(bom.take()).parse_next(input)?; - let mut doc = nodes.parse_next(input)?; - let badend = resume_after_cut( - cut_err(eof).context(cx().lbl("EOF").msg("Expected end of document")), - any.void(), - ) - .parse_next(input)? - .is_none(); - if badend { - document.parse_next(input)?; - } - if let Some(bom) = bom - && let Some(fmt) = doc.format_mut() - { - fmt.leading = format!("{bom}{}", fmt.leading); - } - Ok(doc) -} - -/// `nodes := (line-space* node)* line-space*` -fn nodes(input: &mut Input<'_>) -> PResult { - let mut leading = repeat(0.., alt((line_space.void(), (slashdash, base_node).void()))) - .map(|()| ()) - .take() - .parse_next(input)?; - let _start = input.checkpoint(); - let mut ns: Vec = separated( - 0.., - node, - alt((node_terminator.void(), (eof.void(), any.void()).void())), - ) - .parse_next(input)?; - let _span = span_from_checkpoint(input, &_start); - opt(node_terminator).parse_next(input)?; - let trailing = repeat(0.., alt((line_space.void(), (slashdash, base_node).void()))) - .map(|()| ()) - .take() - .parse_next(input)?; - - // If there is a node, let it have the leading format - // This gives more consistent behavior - if let Some(first_node) = ns.get_mut(0) - && let Some(first_node_format) = first_node.format_mut() - { - first_node_format.leading = leading.into(); - leading = ""; - } - - Ok(KdlDocument { - nodes: ns, - format: Some(KdlDocumentFormat { - leading: leading.into(), - trailing: trailing.into(), - }), - #[cfg(feature = "span")] - span: _span, - }) -} - -/// base-node := slashdash? type? node-space* string -/// (node-space+ slashdash? node-prop-or-arg)* -/// (node-space+ slashdash node-children)* -/// (node-space+ node-children)? -/// (node-space+ slashdash node-children)* -/// node-space* -/// node := base-node node-space* node-terminator -/// final-node := base-node node-space* node-terminator? -fn node(input: &mut Input<'_>) -> PResult { - let leading = repeat(0.., alt((line_space.void(), (slashdash, base_node).void()))) - .map(|()| ()) - .take() - .parse_next(input)?; - let mut nd = base_node.parse_next(input)?; - if let Some(fmt) = nd.format_mut() { - fmt.leading = leading.into(); - } - Ok(nd) -} - -fn base_node(input: &mut Input<'_>) -> PResult { - trace("children closing check", not(alt(("}".void(), eof.void())))).parse_next(input)?; - let _start = input.checkpoint(); - let open_curly = resume_after_cut( - cut_err(not("{").context( - cx().msg("Found child block instead of node name") - .lbl("node name") - .hlp("Did you forget to add the node name itself? Or perhaps terminated the node before its child block?"))), - "{".void(), - ) - .parse_next(input)?; - if open_curly.is_none() { - // If we got a weird misplaced `{`, we consume the "child block" here, - // because otherwise the error message is going to include the entire - // child block as its span, but we only want to point to the offending - // curly. - input.reset(&_start); - node_children.parse_next(input)?; - opt(slashdashed_children).parse_next(input)?; - peek(opt(node_terminator)).parse_next(input)?; - // We also return a fake node here, for good measure. - return Ok(KdlNode::new("<>")); - } - let ty = opt(ty).parse_next(input)?; - let after_ty = node_space0.take().parse_next(input)?; - let _before_ident = input.checkpoint(); - let name = resume_after_cut(cut_err(identifier).context( - cx().msg("Found invalid node name") - .lbl("node name") - .hlp("This can be any string type, including a quoted, raw, or multiline string, as well as a plain identifier string.") - - ), badval) - .parse_next(input)? - .unwrap_or_else(|| KdlIdentifier::from("/BAD_IDENT\\")); - let name_is_valid = name.repr.as_ref().map(|s| s.is_empty()) != Some(true); - // resume_after_cut() only picks up context from parsers passed into it. In - // order to add an error that's more specific about us wanting a _node name_ - // here, we have to do some shenanigans with a "fake" parse here. - // While this does result in double errors, I think it's still useful to get - // _both_ the error message for a string/ident parser error _and_ the error - // message for a node name being expected. - if !name_is_valid { - resume_after_cut((|input: &mut Input<'_>| -> PResult<()> { - Err(ErrMode::Cut(KdlParseError { - span: Some(span_from_checkpoint(input, &_before_ident)), - ..Default::default() - })) - }).context(cx().msg("Found invalid node name") - .lbl("node name") - .hlp("This can be any string type, including a quoted, raw, or multiline string, as well as a plain identifier string.")), - empty).parse_next(input)?; - } - let entries = repeat( - 0.., - (peek(node_space1), node_entry).map(|(_, e): ((), _)| e), - ) - .map(|e: Vec>| e.into_iter().flatten().collect::>()) - .parse_next(input)?; - let children = opt(( - before_node_children.take(), - trace("node children", node_children), - )) - .parse_next(input)?; - let (before_terminator, terminator) = if children.is_some() { - ( - opt(slashdashed_children).take(), - peek(opt(node_terminator).take()), - ) - .parse_next(input)? - } else { - ( - before_node_children.take(), - peek(opt(node_terminator).take()), - ) - .parse_next(input)? - }; - node_space0.parse_next(input)?; - let (before_inner_ty, ty, after_inner_ty) = ty.unwrap_or_default(); - let (before_children, children) = children - .map(|(before_children, children)| (before_children.into(), Some(children))) - .unwrap_or(("".into(), None)); - Ok(KdlNode { - ty, - name, - entries, - children, - format: Some(KdlNodeFormat { - before_ty_name: before_inner_ty.into(), - after_ty_name: after_inner_ty.into(), - after_ty: after_ty.into(), - before_children, - before_terminator: before_terminator.into(), - terminator: terminator.into(), - ..Default::default() - }), - #[cfg(feature = "span")] - span: span_from_checkpoint(input, &_start), - }) -} - #[cfg(test)] #[test] fn test_node() { + let parser_v1 = KdlParser::new(KdlVersion::V1); + let parser_v2 = KdlParser::new(KdlVersion::V2); assert_eq!( - node.parse(new_input("foo")).unwrap(), + (|input: &mut Input<'_>| parser_v2.node(input)) + .parse(new_input("foo")) + .unwrap(), KdlNode { ty: None, name: KdlIdentifier { @@ -460,7 +497,45 @@ fn test_node() { ); assert_eq!( - node.parse(new_input("foo bar")).unwrap(), + (|input: &mut Input<'_>| parser_v1.node(input)) + .parse(new_input("foo bat=true")) + .unwrap(), + KdlNode { + ty: None, + name: KdlIdentifier { + value: "foo".into(), + repr: Some("foo".into()), + #[cfg(feature = "span")] + span: SourceSpan::new(0.into(), 3), + }, + entries: vec![KdlEntry { + ty: None, + name: Some(KdlIdentifier { + value: "bat".into(), + repr: Some("bat".into()), + #[cfg(feature = "span")] + span: SourceSpan::new(4.into(), 3) + }), + value: KdlValue::Bool(true), + format: Some(KdlEntryFormat { + value_repr: "true".into(), + leading: " ".into(), + ..Default::default() + }), + #[cfg(feature = "span")] + span: SourceSpan::new(4.into(), 8) + }], + children: None, + format: Some(Default::default()), + #[cfg(feature = "span")] + span: (0..12).into() + } + ); + + assert_eq!( + (|input: &mut Input<'_>| parser_v2.node(input)) + .parse(new_input("foo bar")) + .unwrap(), KdlNode { ty: None, name: KdlIdentifier { @@ -492,8 +567,9 @@ fn test_node() { } pub(crate) fn padded_node(input: &mut Input<'_>) -> PResult { + let parser_v2 = KdlParser::new(KdlVersion::V2); let ((mut node, _terminator, trailing), _span) = ( - node, + (|input: &mut Input<'_>| parser_v2.node(input)), opt(node_terminator), repeat(0.., alt((line_space, node_space))) .map(|_: ()| ()) @@ -753,11 +829,16 @@ fn around_children_test() { /// `node-children := '{' nodes final-node? '}'` fn node_children(input: &mut Input<'_>) -> PResult { + let parser_v2 = KdlParser::new(KdlVersion::V2); + let _before_open = input.checkpoint(); let _before_open_loc = input.current_token_start(); "{".parse_next(input)?; let _after_open_loc = input.previous_token_end(); - let ns = trace("child nodes", nodes).parse_next(input)?; + let ns = trace("child nodes", |input: &mut Input<'_>| { + parser_v2.nodes(input) + }) + .parse_next(input)?; let _after_nodes = input.checkpoint(); let _after_nodes_loc = input.previous_token_end(); let close_res: PResult<_> = cut_err("}") @@ -1547,16 +1628,16 @@ mod string_tests { /// keyword-number := '#inf' | '#-inf' | '#nan' /// ```` fn keyword(input: &mut Input<'_>) -> PResult { - let _ = "#".parse_next(input)?; - not(one_of(['#', '"'])).parse_next(input)?; - cut_err(alt(( + // let _ = "#".parse_next(input)?; + // not(one_of(['#', '"'])).parse_next(input)?; + alt(( "true".value(KdlValue::Bool(true)), "false".value(KdlValue::Bool(false)), "null".value(KdlValue::Null), "nan".value(KdlValue::Float(f64::NAN)), "inf".value(KdlValue::Float(f64::INFINITY)), "-inf".value(KdlValue::Float(f64::NEG_INFINITY)), - ))) + )) .context(cx().lbl("keyword").hlp( "Available keywords in KDL are '#true', '#false', '#null', '#nan', '#inf', and '#-inf'; they are case-sensitive.", )) @@ -1610,7 +1691,11 @@ fn escline(input: &mut Input<'_>) -> PResult<()> { #[cfg(test)] #[test] fn escline_test() { - let node = node.parse(new_input("foo bar\\\n baz")).unwrap(); + let parser_v2 = KdlParser::new(KdlVersion::V2); + + let node = (|input: &mut Input<'_>| parser_v2.node(input)) + .parse(new_input("foo bar\\\n baz")) + .unwrap(); assert_eq!(node.entries().len(), 2); } @@ -1723,6 +1808,11 @@ fn slashdash(input: &mut Input<'_>) -> PResult<()> { #[cfg(test)] #[test] fn slashdash_tests() { + let parser_v2 = KdlParser::new(KdlVersion::V2); + + let mut document = |input: &mut Input<'_>| KdlParser::new(KdlVersion::V2).document(input); + let mut node = |input: &mut Input<'_>| parser_v2.node(input); + assert!(document.parse(new_input("/- foo bar")).is_ok()); assert!(document.parse(new_input("/- foo bar;")).is_ok()); assert!(document.parse(new_input("/-n 1;")).is_ok());