From 916adcf0fe26254b3df233439340af7bde73bcf4 Mon Sep 17 00:00:00 2001 From: abdelkadous Date: Wed, 3 Jun 2026 11:35:48 +0100 Subject: [PATCH 1/2] =?UTF-8?q?feature=20=E2=9C=A8=20(parser=5Fv2):=20init?= =?UTF-8?q?=20parser=20that=20support=20v1=20and=20v2=20(not=20finshed=20y?= =?UTF-8?q?et),=20impl=20for=20document=20nodes=20node?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/de.rs | 1 + src/document.rs | 21 +- src/entry.rs | 4 +- src/identifier.rs | 4 +- src/node.rs | 6 +- src/se.rs | 1 + src/v2_parser.rs | 490 +++++++++++++++++++++++++++------------------- 7 files changed, 312 insertions(+), 215 deletions(-) diff --git a/src/de.rs b/src/de.rs index 83df6bf..ec224c3 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1702,6 +1702,7 @@ nothing #null } #[test] + #[allow(clippy::approx_constant)] fn float_values() { #[derive(Deserialize, Debug, PartialEq)] struct Config { diff --git a/src/document.rs b/src/document.rs index 6d313a1..639ab7c 100644 --- a/src/document.rs +++ b/src/document.rs @@ -4,7 +4,10 @@ use std::fmt::Display; #[cfg(feature = "v1")] use crate::KdlNodeFormat; -use crate::{FormatConfig, KdlError, KdlNode, KdlValue}; +use crate::{ + FormatConfig, KdlError, KdlNode, KdlValue, + v2_parser::{Input, KdlParser, KdlVersion}, +}; /// Represents a KDL /// [`Document`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#document). @@ -370,14 +373,15 @@ impl KdlDocument { /// Parses a KDL v2 string into a document. pub fn parse_v2(s: &str) -> Result { - crate::v2_parser::try_parse(crate::v2_parser::document, s) + let parser = KdlParser::new(KdlVersion::V2); + KdlParser::try_parse(|input: &mut Input<'_>| parser.document(input), s) } /// Parses a KDL v1 string into a document. #[cfg(feature = "v1")] pub fn parse_v1(s: &str) -> Result { - let ret: Result = s.parse(); - ret.map(|x| x.into()).map_err(|e| e.into()) + let parser = KdlParser::new(KdlVersion::V1); + KdlParser::try_parse(|input: &mut Input<'_>| parser.document(input), s) } /// Takes a KDL v1 document string and returns the same document, but @@ -920,10 +924,11 @@ foo 1 bar=0xdeadbeef { if let Some(ty) = entry.ty() { check_span_for_ident(ty, source); } - if let Some(KdlEntryFormat { value_repr, .. }) = entry.format() { - if entry.name().is_none() && entry.ty().is_none() { - check_span(value_repr, entry.span(), source); - } + if let Some(KdlEntryFormat { value_repr, .. }) = entry.format() + && entry.name().is_none() + && entry.ty().is_none() + { + check_span(value_repr, entry.span(), source); } } if let Some(children) = node.children() { diff --git a/src/entry.rs b/src/entry.rs index 93fcef4..cc94dcb 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -204,11 +204,11 @@ impl KdlEntry { pub fn parse(s: &str) -> Result { #[cfg(not(feature = "v1-fallback"))] { - v2_parser::try_parse(v2_parser::padded_node_entry, s) + v2_parser::KdlParser::try_parse(v2_parser::padded_node_entry, s) } #[cfg(feature = "v1-fallback")] { - v2_parser::try_parse(v2_parser::padded_node_entry, s) + v2_parser::KdlParser::try_parse(v2_parser::padded_node_entry, s) .or_else(|e| KdlEntry::parse_v1(s).map_err(|_| e)) } } diff --git a/src/identifier.rs b/src/identifier.rs index 49762f6..b492902 100644 --- a/src/identifier.rs +++ b/src/identifier.rs @@ -97,11 +97,11 @@ impl KdlIdentifier { pub fn parse(s: &str) -> Result { #[cfg(not(feature = "v1-fallback"))] { - v2_parser::try_parse(v2_parser::identifier, s) + v2_parser::KdlParser::try_parse(v2_parser::identifier, s) } #[cfg(feature = "v1-fallback")] { - v2_parser::try_parse(v2_parser::identifier, s) + v2_parser::KdlParser::try_parse(v2_parser::identifier, s) .or_else(|e| KdlIdentifier::parse_v1(s).map_err(|_| e)) } } diff --git a/src/node.rs b/src/node.rs index 6b3c75f..091b55d 100644 --- a/src/node.rs +++ b/src/node.rs @@ -338,11 +338,11 @@ impl KdlNode { pub fn parse(s: &str) -> Result { #[cfg(not(feature = "v1-fallback"))] { - v2_parser::try_parse(v2_parser::padded_node, s) + v2_parser::KdlParser::try_parse(v2_parser::padded_node, s) } #[cfg(feature = "v1-fallback")] { - v2_parser::try_parse(v2_parser::padded_node, s) + v2_parser::KdlParser::try_parse(v2_parser::padded_node, s) .or_else(|e| KdlNode::parse_v1(s).map_err(|_| e)) } } @@ -813,7 +813,7 @@ impl FromStr for KdlNode { type Err = KdlError; fn from_str(input: &str) -> Result { - v2_parser::try_parse(v2_parser::padded_node, input) + v2_parser::KdlParser::try_parse(v2_parser::padded_node, input) } } diff --git a/src/se.rs b/src/se.rs index 920527a..254f2a7 100644 --- a/src/se.rs +++ b/src/se.rs @@ -1833,6 +1833,7 @@ mod tests { } #[test] + #[allow(clippy::approx_constant)] fn float_value() { #[derive(Serialize)] struct Config { diff --git a/src/v2_parser.rs b/src/v2_parser.rs index d1fb22e..76b0a14 100644 --- a/src/v2_parser.rs +++ b/src/v2_parser.rs @@ -24,18 +24,232 @@ use crate::{ KdlIdentifier, KdlNode, KdlNodeFormat, KdlValue, }; -type Input<'a> = Recoverable, ErrMode>; +pub(crate) type Input<'a> = Recoverable, ErrMode>; type PResult = winnow::ModalResult; -pub(crate) fn try_parse<'a, P: ModalParser, T, KdlParseError>, T>( - mut parser: P, - input: &'a str, -) -> Result { - let (_, maybe_val, errs) = parser.recoverable_parse(LocatingSlice::new(input)); - if let (Some(v), true) = (maybe_val, errs.is_empty()) { - Ok(v) - } else { - Err(failure_from_errs(errs, input)) +pub(crate) enum KdlVersion { + V1, + V2, +} + +pub(crate) struct KdlParser { + kdl_version: KdlVersion, +} + +impl KdlParser { + pub(crate) fn new(kdl_version: KdlVersion) -> Self { + Self { kdl_version } + } + + pub(crate) fn try_parse<'a, P: ModalParser, T, KdlParseError>, T>( + mut parser: P, + input: &'a str, + ) -> Result { + let (_, maybe_val, errs) = parser.recoverable_parse(LocatingSlice::new(input)); + if let (Some(v), true) = (maybe_val, errs.is_empty()) { + Ok(v) + } else { + Err(failure_from_errs(errs, input)) + } + } + + /// `document := bom? nodes` + pub(crate) fn document(&self, input: &mut Input<'_>) -> PResult { + let bom = opt(bom.take()).parse_next(input)?; + let mut doc = (|input: &mut Input<'_>| self.nodes(input)).parse_next(input)?; + let badend = resume_after_cut( + cut_err(eof).context(cx().lbl("EOF").msg("Expected end of document")), + any.void(), + ) + .parse_next(input)? + .is_none(); + if badend { + (|input: &mut Input<'_>| self.document(input)).parse_next(input)?; + } + if let Some(bom) = bom + && let Some(fmt) = doc.format_mut() + { + fmt.leading = format!("{bom}{}", fmt.leading); + } + Ok(doc) + } + + /// `nodes := (line-space* node)* line-space*` + fn nodes(&self, input: &mut Input<'_>) -> PResult { + let mut leading = repeat( + 0.., + alt(( + line_space.void(), + (slashdash, (|input: &mut Input<'_>| self.base_node(input))).void(), + )), + ) + .map(|()| ()) + .take() + .parse_next(input)?; + let _start = input.checkpoint(); + let mut ns: Vec = separated( + 0.., + |input: &mut Input<'_>| self.node(input), + alt((node_terminator.void(), (eof.void(), any.void()).void())), + ) + .parse_next(input)?; + let _span = span_from_checkpoint(input, &_start); + opt(node_terminator).parse_next(input)?; + let trailing = repeat( + 0.., + alt(( + line_space.void(), + (slashdash, (|input: &mut Input<'_>| self.base_node(input))).void(), + )), + ) + .map(|()| ()) + .take() + .parse_next(input)?; + + // If there is a node, let it have the leading format + // This gives more consistent behavior + if let Some(first_node) = ns.get_mut(0) + && let Some(first_node_format) = first_node.format_mut() + { + first_node_format.leading = leading.into(); + leading = ""; + } + + Ok(KdlDocument { + nodes: ns, + format: Some(KdlDocumentFormat { + leading: leading.into(), + trailing: trailing.into(), + }), + #[cfg(feature = "span")] + span: _span, + }) + } + + /// base-node := slashdash? type? node-space* string + /// (node-space+ slashdash? node-prop-or-arg)* + /// (node-space+ slashdash node-children)* + /// (node-space+ node-children)? + /// (node-space+ slashdash node-children)* + /// node-space* + /// node := base-node node-space* node-terminator + /// final-node := base-node node-space* node-terminator? + fn node(&self, input: &mut Input<'_>) -> PResult { + let leading = repeat( + 0.., + alt(( + line_space.void(), + (slashdash, (|input: &mut Input<'_>| self.base_node(input))).void(), + )), + ) + .map(|()| ()) + .take() + .parse_next(input)?; + let mut nd = (|input: &mut Input<'_>| self.base_node(input)).parse_next(input)?; + if let Some(fmt) = nd.format_mut() { + fmt.leading = leading.into(); + } + Ok(nd) + } + + fn base_node(&self, input: &mut Input<'_>) -> PResult { + trace("children closing check", not(alt(("}".void(), eof.void())))).parse_next(input)?; + let _start = input.checkpoint(); + let open_curly = resume_after_cut( + cut_err(not("{").context( + cx().msg("Found child block instead of node name") + .lbl("node name") + .hlp("Did you forget to add the node name itself? Or perhaps terminated the node before its child block?"))), + "{".void(), + ) + .parse_next(input)?; + if open_curly.is_none() { + // If we got a weird misplaced `{`, we consume the "child block" here, + // because otherwise the error message is going to include the entire + // child block as its span, but we only want to point to the offending + // curly. + input.reset(&_start); + node_children.parse_next(input)?; + opt(slashdashed_children).parse_next(input)?; + peek(opt(node_terminator)).parse_next(input)?; + // We also return a fake node here, for good measure. + return Ok(KdlNode::new("<>")); + } + let ty = opt(ty).parse_next(input)?; + let after_ty = node_space0.take().parse_next(input)?; + let _before_ident = input.checkpoint(); + let name = resume_after_cut(cut_err(identifier).context( + cx().msg("Found invalid node name") + .lbl("node name") + .hlp("This can be any string type, including a quoted, raw, or multiline string, as well as a plain identifier string.") + + ), badval) + .parse_next(input)? + .unwrap_or_else(|| KdlIdentifier::from("/BAD_IDENT\\")); + let name_is_valid = name.repr.as_ref().map(|s| s.is_empty()) != Some(true); + // resume_after_cut() only picks up context from parsers passed into it. In + // order to add an error that's more specific about us wanting a _node name_ + // here, we have to do some shenanigans with a "fake" parse here. + // While this does result in double errors, I think it's still useful to get + // _both_ the error message for a string/ident parser error _and_ the error + // message for a node name being expected. + if !name_is_valid { + resume_after_cut((|input: &mut Input<'_>| -> PResult<()> { + Err(ErrMode::Cut(KdlParseError { + span: Some(span_from_checkpoint(input, &_before_ident)), + ..Default::default() + })) + }).context(cx().msg("Found invalid node name") + .lbl("node name") + .hlp("This can be any string type, including a quoted, raw, or multiline string, as well as a plain identifier string.")), + empty).parse_next(input)?; + } + let entries = repeat( + 0.., + (peek(node_space1), node_entry).map(|(_, e): ((), _)| e), + ) + .map(|e: Vec>| e.into_iter().flatten().collect::>()) + .parse_next(input)?; + let children = opt(( + before_node_children.take(), + trace("node children", node_children), + )) + .parse_next(input)?; + let (before_terminator, terminator) = if children.is_some() { + ( + opt(slashdashed_children).take(), + peek(opt(node_terminator).take()), + ) + .parse_next(input)? + } else { + ( + before_node_children.take(), + peek(opt(node_terminator).take()), + ) + .parse_next(input)? + }; + node_space0.parse_next(input)?; + let (before_inner_ty, ty, after_inner_ty) = ty.unwrap_or_default(); + let (before_children, children) = children + .map(|(before_children, children)| (before_children.into(), Some(children))) + .unwrap_or(("".into(), None)); + Ok(KdlNode { + ty, + name, + entries, + children, + format: Some(KdlNodeFormat { + before_ty_name: before_inner_ty.into(), + after_ty_name: after_inner_ty.into(), + after_ty: after_ty.into(), + before_children, + before_terminator: before_terminator.into(), + terminator: terminator.into(), + ..Default::default() + }), + #[cfg(feature = "span")] + span: span_from_checkpoint(input, &_start), + }) } } @@ -257,192 +471,15 @@ fn new_input(s: &str) -> Input<'_> { Recoverable::new(LocatingSlice::new(s)) } -/// `document := bom? nodes` -pub(crate) fn document(input: &mut Input<'_>) -> PResult { - let bom = opt(bom.take()).parse_next(input)?; - let mut doc = nodes.parse_next(input)?; - let badend = resume_after_cut( - cut_err(eof).context(cx().lbl("EOF").msg("Expected end of document")), - any.void(), - ) - .parse_next(input)? - .is_none(); - if badend { - document.parse_next(input)?; - } - if let Some(bom) = bom - && let Some(fmt) = doc.format_mut() - { - fmt.leading = format!("{bom}{}", fmt.leading); - } - Ok(doc) -} - -/// `nodes := (line-space* node)* line-space*` -fn nodes(input: &mut Input<'_>) -> PResult { - let mut leading = repeat(0.., alt((line_space.void(), (slashdash, base_node).void()))) - .map(|()| ()) - .take() - .parse_next(input)?; - let _start = input.checkpoint(); - let mut ns: Vec = separated( - 0.., - node, - alt((node_terminator.void(), (eof.void(), any.void()).void())), - ) - .parse_next(input)?; - let _span = span_from_checkpoint(input, &_start); - opt(node_terminator).parse_next(input)?; - let trailing = repeat(0.., alt((line_space.void(), (slashdash, base_node).void()))) - .map(|()| ()) - .take() - .parse_next(input)?; - - // If there is a node, let it have the leading format - // This gives more consistent behavior - if let Some(first_node) = ns.get_mut(0) - && let Some(first_node_format) = first_node.format_mut() - { - first_node_format.leading = leading.into(); - leading = ""; - } - - Ok(KdlDocument { - nodes: ns, - format: Some(KdlDocumentFormat { - leading: leading.into(), - trailing: trailing.into(), - }), - #[cfg(feature = "span")] - span: _span, - }) -} - -/// base-node := slashdash? type? node-space* string -/// (node-space+ slashdash? node-prop-or-arg)* -/// (node-space+ slashdash node-children)* -/// (node-space+ node-children)? -/// (node-space+ slashdash node-children)* -/// node-space* -/// node := base-node node-space* node-terminator -/// final-node := base-node node-space* node-terminator? -fn node(input: &mut Input<'_>) -> PResult { - let leading = repeat(0.., alt((line_space.void(), (slashdash, base_node).void()))) - .map(|()| ()) - .take() - .parse_next(input)?; - let mut nd = base_node.parse_next(input)?; - if let Some(fmt) = nd.format_mut() { - fmt.leading = leading.into(); - } - Ok(nd) -} - -fn base_node(input: &mut Input<'_>) -> PResult { - trace("children closing check", not(alt(("}".void(), eof.void())))).parse_next(input)?; - let _start = input.checkpoint(); - let open_curly = resume_after_cut( - cut_err(not("{").context( - cx().msg("Found child block instead of node name") - .lbl("node name") - .hlp("Did you forget to add the node name itself? Or perhaps terminated the node before its child block?"))), - "{".void(), - ) - .parse_next(input)?; - if open_curly.is_none() { - // If we got a weird misplaced `{`, we consume the "child block" here, - // because otherwise the error message is going to include the entire - // child block as its span, but we only want to point to the offending - // curly. - input.reset(&_start); - node_children.parse_next(input)?; - opt(slashdashed_children).parse_next(input)?; - peek(opt(node_terminator)).parse_next(input)?; - // We also return a fake node here, for good measure. - return Ok(KdlNode::new("<>")); - } - let ty = opt(ty).parse_next(input)?; - let after_ty = node_space0.take().parse_next(input)?; - let _before_ident = input.checkpoint(); - let name = resume_after_cut(cut_err(identifier).context( - cx().msg("Found invalid node name") - .lbl("node name") - .hlp("This can be any string type, including a quoted, raw, or multiline string, as well as a plain identifier string.") - - ), badval) - .parse_next(input)? - .unwrap_or_else(|| KdlIdentifier::from("/BAD_IDENT\\")); - let name_is_valid = name.repr.as_ref().map(|s| s.is_empty()) != Some(true); - // resume_after_cut() only picks up context from parsers passed into it. In - // order to add an error that's more specific about us wanting a _node name_ - // here, we have to do some shenanigans with a "fake" parse here. - // While this does result in double errors, I think it's still useful to get - // _both_ the error message for a string/ident parser error _and_ the error - // message for a node name being expected. - if !name_is_valid { - resume_after_cut((|input: &mut Input<'_>| -> PResult<()> { - Err(ErrMode::Cut(KdlParseError { - span: Some(span_from_checkpoint(input, &_before_ident)), - ..Default::default() - })) - }).context(cx().msg("Found invalid node name") - .lbl("node name") - .hlp("This can be any string type, including a quoted, raw, or multiline string, as well as a plain identifier string.")), - empty).parse_next(input)?; - } - let entries = repeat( - 0.., - (peek(node_space1), node_entry).map(|(_, e): ((), _)| e), - ) - .map(|e: Vec>| e.into_iter().flatten().collect::>()) - .parse_next(input)?; - let children = opt(( - before_node_children.take(), - trace("node children", node_children), - )) - .parse_next(input)?; - let (before_terminator, terminator) = if children.is_some() { - ( - opt(slashdashed_children).take(), - peek(opt(node_terminator).take()), - ) - .parse_next(input)? - } else { - ( - before_node_children.take(), - peek(opt(node_terminator).take()), - ) - .parse_next(input)? - }; - node_space0.parse_next(input)?; - let (before_inner_ty, ty, after_inner_ty) = ty.unwrap_or_default(); - let (before_children, children) = children - .map(|(before_children, children)| (before_children.into(), Some(children))) - .unwrap_or(("".into(), None)); - Ok(KdlNode { - ty, - name, - entries, - children, - format: Some(KdlNodeFormat { - before_ty_name: before_inner_ty.into(), - after_ty_name: after_inner_ty.into(), - after_ty: after_ty.into(), - before_children, - before_terminator: before_terminator.into(), - terminator: terminator.into(), - ..Default::default() - }), - #[cfg(feature = "span")] - span: span_from_checkpoint(input, &_start), - }) -} - #[cfg(test)] #[test] fn test_node() { + let parser_v1 = KdlParser::new(KdlVersion::V1); + let parser_v2 = KdlParser::new(KdlVersion::V2); assert_eq!( - node.parse(new_input("foo")).unwrap(), + (|input: &mut Input<'_>| parser_v2.node(input)) + .parse(new_input("foo")) + .unwrap(), KdlNode { ty: None, name: KdlIdentifier { @@ -460,7 +497,45 @@ fn test_node() { ); assert_eq!( - node.parse(new_input("foo bar")).unwrap(), + (|input: &mut Input<'_>| parser_v1.node(input)) + .parse(new_input("foo bat=true")) + .unwrap(), + KdlNode { + ty: None, + name: KdlIdentifier { + value: "foo".into(), + repr: Some("foo".into()), + #[cfg(feature = "span")] + span: SourceSpan::new(0.into(), 3), + }, + entries: vec![KdlEntry { + ty: None, + name: Some(KdlIdentifier { + value: "bat".into(), + repr: Some("bat".into()), + #[cfg(feature = "span")] + span: SourceSpan::new(4.into(), 3) + }), + value: KdlValue::Bool(true), + format: Some(KdlEntryFormat { + value_repr: "true".into(), + leading: " ".into(), + ..Default::default() + }), + #[cfg(feature = "span")] + span: SourceSpan::new(4.into(), 8) + }], + children: None, + format: Some(Default::default()), + #[cfg(feature = "span")] + span: (0..12).into() + } + ); + + assert_eq!( + (|input: &mut Input<'_>| parser_v2.node(input)) + .parse(new_input("foo bar")) + .unwrap(), KdlNode { ty: None, name: KdlIdentifier { @@ -492,8 +567,9 @@ fn test_node() { } pub(crate) fn padded_node(input: &mut Input<'_>) -> PResult { + let parser_v2 = KdlParser::new(KdlVersion::V2); let ((mut node, _terminator, trailing), _span) = ( - node, + (|input: &mut Input<'_>| parser_v2.node(input)), opt(node_terminator), repeat(0.., alt((line_space, node_space))) .map(|_: ()| ()) @@ -753,11 +829,16 @@ fn around_children_test() { /// `node-children := '{' nodes final-node? '}'` fn node_children(input: &mut Input<'_>) -> PResult { + let parser_v2 = KdlParser::new(KdlVersion::V2); + let _before_open = input.checkpoint(); let _before_open_loc = input.current_token_start(); "{".parse_next(input)?; let _after_open_loc = input.previous_token_end(); - let ns = trace("child nodes", nodes).parse_next(input)?; + let ns = trace("child nodes", |input: &mut Input<'_>| { + parser_v2.nodes(input) + }) + .parse_next(input)?; let _after_nodes = input.checkpoint(); let _after_nodes_loc = input.previous_token_end(); let close_res: PResult<_> = cut_err("}") @@ -1547,16 +1628,16 @@ mod string_tests { /// keyword-number := '#inf' | '#-inf' | '#nan' /// ```` fn keyword(input: &mut Input<'_>) -> PResult { - let _ = "#".parse_next(input)?; - not(one_of(['#', '"'])).parse_next(input)?; - cut_err(alt(( + // let _ = "#".parse_next(input)?; + // not(one_of(['#', '"'])).parse_next(input)?; + alt(( "true".value(KdlValue::Bool(true)), "false".value(KdlValue::Bool(false)), "null".value(KdlValue::Null), "nan".value(KdlValue::Float(f64::NAN)), "inf".value(KdlValue::Float(f64::INFINITY)), "-inf".value(KdlValue::Float(f64::NEG_INFINITY)), - ))) + )) .context(cx().lbl("keyword").hlp( "Available keywords in KDL are '#true', '#false', '#null', '#nan', '#inf', and '#-inf'; they are case-sensitive.", )) @@ -1610,7 +1691,11 @@ fn escline(input: &mut Input<'_>) -> PResult<()> { #[cfg(test)] #[test] fn escline_test() { - let node = node.parse(new_input("foo bar\\\n baz")).unwrap(); + let parser_v2 = KdlParser::new(KdlVersion::V2); + + let node = (|input: &mut Input<'_>| parser_v2.node(input)) + .parse(new_input("foo bar\\\n baz")) + .unwrap(); assert_eq!(node.entries().len(), 2); } @@ -1723,6 +1808,11 @@ fn slashdash(input: &mut Input<'_>) -> PResult<()> { #[cfg(test)] #[test] fn slashdash_tests() { + let parser_v2 = KdlParser::new(KdlVersion::V2); + + let mut document = |input: &mut Input<'_>| KdlParser::new(KdlVersion::V2).document(input); + let mut node = |input: &mut Input<'_>| parser_v2.node(input); + assert!(document.parse(new_input("/- foo bar")).is_ok()); assert!(document.parse(new_input("/- foo bar;")).is_ok()); assert!(document.parse(new_input("/-n 1;")).is_ok()); From 5370c3dd056c27c615d1deb816a00097e287eb68 Mon Sep 17 00:00:00 2001 From: abdelkadous Date: Wed, 3 Jun 2026 17:20:53 +0100 Subject: [PATCH 2/2] =?UTF-8?q?refactor=20=F0=9F=94=A8=20():=20clearify=20?= =?UTF-8?q?the=20idea=20of=20general=20parser.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/document.rs | 6 +- src/entry.rs | 15 +- src/identifier.rs | 15 +- src/node.rs | 15 +- src/v2_parser.rs | 2581 ++++++++++++++++++++++++--------------------- 5 files changed, 1432 insertions(+), 1200 deletions(-) diff --git a/src/document.rs b/src/document.rs index 639ab7c..1ae4e75 100644 --- a/src/document.rs +++ b/src/document.rs @@ -6,7 +6,7 @@ use std::fmt::Display; use crate::KdlNodeFormat; use crate::{ FormatConfig, KdlError, KdlNode, KdlValue, - v2_parser::{Input, KdlParser, KdlVersion}, + v2_parser::{KdlParser, KdlVersion}, }; /// Represents a KDL @@ -374,14 +374,14 @@ impl KdlDocument { /// Parses a KDL v2 string into a document. pub fn parse_v2(s: &str) -> Result { let parser = KdlParser::new(KdlVersion::V2); - KdlParser::try_parse(|input: &mut Input<'_>| parser.document(input), s) + parser.try_parse(KdlParser::document, s) } /// Parses a KDL v1 string into a document. #[cfg(feature = "v1")] pub fn parse_v1(s: &str) -> Result { let parser = KdlParser::new(KdlVersion::V1); - KdlParser::try_parse(|input: &mut Input<'_>| parser.document(input), s) + parser.try_parse(KdlParser::document, s) } /// Takes a KDL v1 document string and returns the same document, but diff --git a/src/entry.rs b/src/entry.rs index cc94dcb..4a1c5d8 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -2,7 +2,10 @@ use miette::SourceSpan; use std::{fmt::Display, str::FromStr}; -use crate::{KdlError, KdlIdentifier, KdlValue, v2_parser}; +use crate::{ + KdlError, KdlIdentifier, KdlValue, + v2_parser::{self, KdlParser, KdlVersion}, +}; /// KDL Entries are the "arguments" to KDL nodes: either a (positional) /// [`Argument`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#argument) or @@ -202,13 +205,15 @@ impl KdlEntry { /// to parse again as a KDL v1 entry. If both fail, only the v2 parse /// errors will be returned. pub fn parse(s: &str) -> Result { + let parser_v2 = KdlParser::new(KdlVersion::V2); #[cfg(not(feature = "v1-fallback"))] { - v2_parser::KdlParser::try_parse(v2_parser::padded_node_entry, s) + parser_v2.try_parse(KdlParser::padded_node_entry, s) } #[cfg(feature = "v1-fallback")] { - v2_parser::KdlParser::try_parse(v2_parser::padded_node_entry, s) + parser_v2 + .try_parse(KdlParser::padded_node_entry, s) .or_else(|e| KdlEntry::parse_v1(s).map_err(|_| e)) } } @@ -216,8 +221,8 @@ impl KdlEntry { /// Parses a KDL v1 string into an entry. #[cfg(feature = "v1")] pub fn parse_v1(s: &str) -> Result { - let ret: Result = s.parse(); - ret.map(|x| x.into()).map_err(|e| e.into()) + let parser_v1 = KdlParser::new(KdlVersion::V1); + parser_v1.try_parse(KdlParser::padded_node_entry, s) } /// Makes sure this entry is in v2 format. diff --git a/src/identifier.rs b/src/identifier.rs index b492902..4f5a917 100644 --- a/src/identifier.rs +++ b/src/identifier.rs @@ -2,7 +2,10 @@ use miette::SourceSpan; use std::{fmt::Display, str::FromStr}; -use crate::{KdlError, KdlValue, v2_parser}; +use crate::{ + KdlError, KdlValue, + v2_parser::{KdlParser, KdlVersion}, +}; /// Represents a KDL /// [Identifier](https://github.com/kdl-org/kdl/blob/main/SPEC.md#identifier). @@ -95,13 +98,15 @@ impl KdlIdentifier { /// to parse again as a KDL v1 entry. If both fail, only the v2 parse /// errors will be returned. pub fn parse(s: &str) -> Result { + let parser_v2 = KdlParser::new(KdlVersion::V2); #[cfg(not(feature = "v1-fallback"))] { - v2_parser::KdlParser::try_parse(v2_parser::identifier, s) + parser_v2.try_parse(KdlParser::identifier, s) } #[cfg(feature = "v1-fallback")] { - v2_parser::KdlParser::try_parse(v2_parser::identifier, s) + parser_v2 + .try_parse(KdlParser::identifier, s) .or_else(|e| KdlIdentifier::parse_v1(s).map_err(|_| e)) } } @@ -109,8 +114,8 @@ impl KdlIdentifier { /// Parses a KDL v1 string into an entry. #[cfg(feature = "v1")] pub fn parse_v1(s: &str) -> Result { - let ret: Result = s.parse(); - ret.map(|x| x.into()).map_err(|e| e.into()) + let parser_v1 = KdlParser::new(KdlVersion::V1); + parser_v1.try_parse(KdlParser::identifier, s) } } diff --git a/src/node.rs b/src/node.rs index 091b55d..c9c6c30 100644 --- a/src/node.rs +++ b/src/node.rs @@ -11,7 +11,7 @@ use miette::SourceSpan; use crate::{ FormatConfig, KdlDocument, KdlDocumentFormat, KdlEntry, KdlError, KdlIdentifier, KdlValue, - v2_parser, + v2_parser::{self, KdlParser, KdlVersion}, }; /// Represents an individual KDL @@ -336,13 +336,15 @@ impl KdlNode { /// to parse again as a KDL v1 node. If both fail, only the v2 parse /// errors will be returned. pub fn parse(s: &str) -> Result { + let parser_v2 = KdlParser::new(KdlVersion::V2); #[cfg(not(feature = "v1-fallback"))] { - v2_parser::KdlParser::try_parse(v2_parser::padded_node, s) + KdlParser::try_parse(|input: &mut Input<'_>| parser_v2.padded_node(input), s) } #[cfg(feature = "v1-fallback")] { - v2_parser::KdlParser::try_parse(v2_parser::padded_node, s) + parser_v2 + .try_parse(KdlParser::padded_node, s) .or_else(|e| KdlNode::parse_v1(s).map_err(|_| e)) } } @@ -350,8 +352,8 @@ impl KdlNode { /// Parses a KDL v1 string into a document. #[cfg(feature = "v1")] pub fn parse_v1(s: &str) -> Result { - let ret: Result = s.parse(); - ret.map(|x| x.into()).map_err(|e| e.into()) + let parser_v1 = KdlParser::new(KdlVersion::V1); + parser_v1.try_parse(KdlParser::padded_node, s) } /// Makes sure this node is in v2 format. @@ -813,7 +815,8 @@ impl FromStr for KdlNode { type Err = KdlError; fn from_str(input: &str) -> Result { - v2_parser::KdlParser::try_parse(v2_parser::padded_node, input) + // TEST: need test + KdlNode::parse(input) } } diff --git a/src/v2_parser.rs b/src/v2_parser.rs index 76b0a14..3887d76 100644 --- a/src/v2_parser.rs +++ b/src/v2_parser.rs @@ -27,12 +27,15 @@ use crate::{ pub(crate) type Input<'a> = Recoverable, ErrMode>; type PResult = winnow::ModalResult; +#[derive(Clone)] pub(crate) enum KdlVersion { V1, V2, } pub(crate) struct KdlParser { + // NOTE(mantainer): the idea is to have a version value in side the parser so u can then + // match and do the correct thing for the correct case kdl_version: KdlVersion, } @@ -41,11 +44,13 @@ impl KdlParser { Self { kdl_version } } - pub(crate) fn try_parse<'a, P: ModalParser, T, KdlParseError>, T>( - mut parser: P, + pub(crate) fn try_parse<'a, T>( + &self, + parser: impl Fn(&Self, &mut Input<'a>) -> PResult, input: &'a str, ) -> Result { - let (_, maybe_val, errs) = parser.recoverable_parse(LocatingSlice::new(input)); + let (_, maybe_val, errs) = (|input: &mut Input<'a>| parser(self, input)) + .recoverable_parse(LocatingSlice::new(input)); if let (Some(v), true) = (maybe_val, errs.is_empty()) { Ok(v) } else { @@ -55,7 +60,7 @@ impl KdlParser { /// `document := bom? nodes` pub(crate) fn document(&self, input: &mut Input<'_>) -> PResult { - let bom = opt(bom.take()).parse_next(input)?; + let bom = opt(Self::bom.take()).parse_next(input)?; let mut doc = (|input: &mut Input<'_>| self.nodes(input)).parse_next(input)?; let badend = resume_after_cut( cut_err(eof).context(cx().lbl("EOF").msg("Expected end of document")), @@ -79,8 +84,13 @@ impl KdlParser { let mut leading = repeat( 0.., alt(( - line_space.void(), - (slashdash, (|input: &mut Input<'_>| self.base_node(input))).void(), + Self::line_space.void(), + ( + Self::slashdash, + // FIXME: have a look + (|input: &mut Input<'_>| self.base_node(input)), + ) + .void(), )), ) .map(|()| ()) @@ -90,16 +100,23 @@ impl KdlParser { let mut ns: Vec = separated( 0.., |input: &mut Input<'_>| self.node(input), - alt((node_terminator.void(), (eof.void(), any.void()).void())), + alt(( + Self::node_terminator.void(), + (eof.void(), any.void()).void(), + )), ) .parse_next(input)?; let _span = span_from_checkpoint(input, &_start); - opt(node_terminator).parse_next(input)?; + opt(Self::node_terminator).parse_next(input)?; let trailing = repeat( 0.., alt(( - line_space.void(), - (slashdash, (|input: &mut Input<'_>| self.base_node(input))).void(), + Self::line_space.void(), + ( + Self::slashdash, + (|input: &mut Input<'_>| self.base_node(input)), + ) + .void(), )), ) .map(|()| ()) @@ -138,8 +155,12 @@ impl KdlParser { let leading = repeat( 0.., alt(( - line_space.void(), - (slashdash, (|input: &mut Input<'_>| self.base_node(input))).void(), + Self::line_space.void(), + ( + Self::slashdash, + (|input: &mut Input<'_>| self.base_node(input)), + ) + .void(), )), ) .map(|()| ()) @@ -169,21 +190,21 @@ impl KdlParser { // child block as its span, but we only want to point to the offending // curly. input.reset(&_start); - node_children.parse_next(input)?; - opt(slashdashed_children).parse_next(input)?; - peek(opt(node_terminator)).parse_next(input)?; + Self::node_children.parse_next(input)?; + opt(Self::slashdashed_children).parse_next(input)?; + peek(opt(Self::node_terminator)).parse_next(input)?; // We also return a fake node here, for good measure. return Ok(KdlNode::new("<>")); } - let ty = opt(ty).parse_next(input)?; - let after_ty = node_space0.take().parse_next(input)?; + let ty = opt(|input: &mut Input<'_>| self.ty(input)).parse_next(input)?; + let after_ty = Self::node_space0.take().parse_next(input)?; let _before_ident = input.checkpoint(); - let name = resume_after_cut(cut_err(identifier).context( + let name = resume_after_cut(cut_err(|input: &mut Input<'_>| self.identifier(input)).context( cx().msg("Found invalid node name") .lbl("node name") .hlp("This can be any string type, including a quoted, raw, or multiline string, as well as a plain identifier string.") - ), badval) + ), Self::badval) .parse_next(input)? .unwrap_or_else(|| KdlIdentifier::from("/BAD_IDENT\\")); let name_is_valid = name.repr.as_ref().map(|s| s.is_empty()) != Some(true); @@ -206,29 +227,33 @@ impl KdlParser { } let entries = repeat( 0.., - (peek(node_space1), node_entry).map(|(_, e): ((), _)| e), + ( + peek(Self::node_space1), + (|input: &mut Input<'_>| self.node_entry(input)), + ) + .map(|(_, e): ((), _)| e), ) .map(|e: Vec>| e.into_iter().flatten().collect::>()) .parse_next(input)?; let children = opt(( - before_node_children.take(), - trace("node children", node_children), + (|input: &mut Input<'_>| self.before_node_children(input)).take(), + trace("node children", Self::node_children), )) .parse_next(input)?; let (before_terminator, terminator) = if children.is_some() { ( - opt(slashdashed_children).take(), - peek(opt(node_terminator).take()), + opt(Self::slashdashed_children).take(), + peek(opt(Self::node_terminator).take()), ) .parse_next(input)? } else { ( - before_node_children.take(), - peek(opt(node_terminator).take()), + (|input: &mut Input<'_>| self.before_node_children(input)).take(), + peek(opt(Self::node_terminator).take()), ) .parse_next(input)? }; - node_space0.parse_next(input)?; + Self::node_space0.parse_next(input)?; let (before_inner_ty, ty, after_inner_ty) = ty.unwrap_or_default(); let (before_children, children) = children .map(|(before_children, children)| (before_children.into(), Some(children))) @@ -251,6 +276,1166 @@ impl KdlParser { span: span_from_checkpoint(input, &_start), }) } + + /// `node-prop-or-arg := prop | value` + /// `prop := string optional-node-space equals-sign optional-node-space value` + fn node_entry(&self, input: &mut Input<'_>) -> PResult> { + let leading = ( + Self::node_space0, + opt(( + (|input: &mut Input<'_>| self.slashdashed_entries(input)), + Self::node_space1, + )), + ) + .take() + .parse_next(input)?; + let _start = input.checkpoint(); + let maybe_ident = trace( + "prop name or string val", + opt(|input: &mut Input<'_>| self.identifier(input)), + ) + .parse_next(input)?; + let ident_was_parsed = maybe_ident.is_some(); + let after_key = if ident_was_parsed { + opt((Self::node_space0.take(), Self::equals_sign)) + .parse_next(input)? + .map(|(after_key, _)| after_key) + } else { + None + }; + let entry = if let Some(after_key) = after_key { + let (after_eq, value) = ( + Self::node_space0.take(), + cut_err( + (|input: &mut Input<'_>| self.value(input)).context(cx().lbl("property value")), + ), + ) + .parse_next(input)?; + value.map(|mut value| { + value.name = maybe_ident; + if let Some(fmt) = value.format_mut() { + fmt.after_key = after_key.into(); + fmt.after_eq = after_eq.into(); + } + value + }) + } else if let Some(ident) = maybe_ident { + // It was ambiguous, but this ident is actually a value. + Some(KdlEntry { + format: Some(KdlEntryFormat { + value_repr: ident.repr.unwrap_or_else(|| ident.value.clone()), + ..Default::default() + }), + value: KdlValue::String(ident.value), + name: None, + ty: None, + #[cfg(feature = "span")] + span: (0..0).into(), + }) + } else { + trace( + "non-string value", + resume_after_cut(|input: &mut Input<'_>| self.value(input), Self::badval), + ) + .parse_next(input)? + .flatten() + }; + Ok(entry.map(|mut value| { + if let Some(fmt) = value.format_mut() { + fmt.leading = leading.into(); + } + #[cfg(feature = "span")] + { + value.span = span_from_checkpoint(input, &_start); + } + value + })) + } + + fn slashdashed_entries(&self, input: &mut Input<'_>) -> PResult<()> { + separated( + 1.., + ( + Self::slashdash, + (|input: &mut Input<'_>| self.node_entry(input)), + ), + Self::node_space1, + ) + .map(|()| ()) + .take() + .map(|x| x.to_string()) + .parse_next(input)?; + Ok(()) + } + + pub(crate) fn padded_node(&self, input: &mut Input<'_>) -> PResult { + let parser = KdlParser::new(self.kdl_version.clone()); + let ((mut node, _terminator, trailing), _span) = ( + (|input: &mut Input<'_>| parser.node(input)), + opt(Self::node_terminator), + repeat(0.., alt((Self::line_space, Self::node_space))) + .map(|_: ()| ()) + .take(), + ) + .with_span() + .parse_next(input)?; + if let Some(fmt) = node.format_mut() { + fmt.trailing = trailing.into(); + } + #[cfg(feature = "span")] + { + node.span = _span.into(); + } + Ok(node) + } + + pub(crate) fn padded_node_entry(&self, input: &mut Input<'_>) -> PResult { + let ((leading, entry, trailing), _span) = ( + repeat(0.., Self::line_space).map(|_: ()| ()).take(), + trace("node entry", |input: &mut Input<'_>| self.node_entry(input)), + repeat(0.., alt((Self::line_space, Self::node_space))) + .map(|_: ()| ()) + .take(), + ) + .with_span() + .parse_next(input)?; + if let Some(entry) = entry.map(|mut val| { + if let Some(fmt) = val.format_mut() { + fmt.leading = format!("{leading}{}", fmt.leading); + fmt.trailing = format!("{}{trailing}", fmt.trailing); + } + #[cfg(feature = "span")] + { + val.span = _span.into(); + } + val + }) { + Ok(entry) + } else { + fail.parse_next(input)? + } + } + + fn before_node_children(&self, input: &mut Input<'_>) -> PResult<()> { + alt(( + ( + Self::node_space1, + (|input: &mut Input<'_>| self.slashdashed_entries(input)), + // This second one will fail if `node_entry_leading` is empty. + Self::node_space1, + Self::slashdashed_children, + ) + .take(), + ( + Self::node_space1, + (|input: &mut Input<'_>| self.slashdashed_entries(input)), + ) + .take(), + (Self::node_space1, Self::slashdashed_children).take(), + Self::node_space0.take(), + )) + .void() + .parse_next(input)?; + Self::node_space0.parse_next(input)?; + Ok(()) + } + + pub(crate) fn identifier(&self, input: &mut Input<'_>) -> PResult { + let mut bad_ident = false; + let ((mut ident, raw), _span) = Self::string + .verify_map(|ident| { + ident + .or_else(|| { + // This is a sentinel we use later for better error messages + bad_ident = true; + Some(KdlValue::String("/BAD_IDENT\\".into())) + }) + .and_then(|v| match v { + KdlValue::String(s) => Some(KdlIdentifier::from(s)), + _ => None, + }) + }) + .with_taken() + .with_span() + .parse_next(input)?; + ident.set_repr(if bad_ident { "" } else { raw }); + #[cfg(feature = "span")] + { + ident.set_span(_span); + } + Ok(ident) + } + + /// `identifier-string := unambiguous-ident | signed-ident | dotted-ident` + fn identifier_string(input: &mut Input<'_>) -> PResult { + alt(( + Self::unambiguous_ident, + Self::signed_ident, + Self::dotted_ident, + )) + .take() + .map(|s| KdlValue::String(s.into())) + .parse_next(input) + } + + /// `unambiguous-ident := ((identifier-char - digit - sign - '.') identifier-char*) - 'true' - 'false' - 'null' - 'inf' - '-inf' - 'nan'` + fn unambiguous_ident(input: &mut Input<'_>) -> PResult<()> { + not(alt((digit1.void(), alt(("-", "+")).void(), ".".void()))).parse_next(input)?; + peek(Self::identifier_char).parse_next(input)?; + trace( + "identifier chars", + cut_err( + repeat(1.., Self::identifier_char) + .verify_map(|s: String| { + if matches!( + s.as_str(), + "true" | "false" | "null" | "inf" | "-inf" | "nan" + ) { + None + } else { + Some(s) + } + }) + .void(), + ), + ) + .parse_next(input) + } + + /// `signed-ident := sign ((identifier-char - digit - '.') identifier-char*)?` + fn signed_ident(input: &mut Input<'_>) -> PResult<()> { + alt(("+", "-")).parse_next(input)?; + not(alt((digit1.void(), ".".void()))).parse_next(input)?; + repeat(0.., Self::identifier_char).parse_next(input) + } + + /// `dotted-ident := sign? '.' ((identifier-char - digit) identifier-char*)?` + fn dotted_ident(input: &mut Input<'_>) -> PResult<()> { + ( + opt(Self::signum), + ".", + not(digit1), + repeat(0.., Self::identifier_char).map(|_: ()| ()), + ) + .void() + .parse_next(input) + } + + /// `node-children := '{' nodes final-node? '}'` + fn node_children(input: &mut Input<'_>) -> PResult { + let parser_v2 = KdlParser::new(KdlVersion::V2); + + let _before_open = input.checkpoint(); + let _before_open_loc = input.current_token_start(); + "{".parse_next(input)?; + let _after_open_loc = input.previous_token_end(); + let ns = trace("child nodes", |input: &mut Input<'_>| { + parser_v2.nodes(input) + }) + .parse_next(input)?; + let _after_nodes = input.checkpoint(); + let _after_nodes_loc = input.previous_token_end(); + let close_res: PResult<_> = cut_err("}") + .context(cx().msg("No closing '}' for child block").lbl("closed")) + .parse_next(input); + if close_res.is_err() { + return close_res.map(|_| KdlDocument::new()).or_else( + |mut e: ErrMode| { + e = match e { + ErrMode::Cut(mut pe) => { + pe.span = Some((_before_open_loc.._after_open_loc).into()); + ErrMode::Cut(pe) + } + e => return Err(e), + }; + input.record_err(&_before_open, &_before_open, e)?; + if !ns.is_empty() { + input.record_err( + &_after_nodes, + &_after_nodes, + ErrMode::Cut(KdlParseError { + message: Some("Closing '}' was not found after nodes".into()), + span: Some((_after_open_loc.._after_nodes_loc).into()), + label: Some("closed".into()), + help: None, + severity: Some(Severity::Error), + }), + )?; + } + Ok(KdlDocument::new()) + }, + ); + } + Ok(ns) + } + + /// `node-terminator := single-line-comment | newline | ';' | eof` + fn node_terminator(input: &mut Input<'_>) -> PResult<()> { + trace( + "node_terminator", + alt((";".void(), Self::newline, Self::single_line_comment)), + ) + .void() + .parse_next(input) + } + + /// `value := type? optional-node-space (string | number | keyword)` + fn value(&self, input: &mut Input<'_>) -> PResult> { + let ((ty, (value, raw)), _span) = trace( + "value", + ( + opt(( + |input: &mut Input<'_>| self.ty(input), + (|input: &mut Input<'_>| Self::node_space0(input)).take(), + )), + alt(( + Self::keyword.map(Some), + Self::number.map(Some), + Self::string, + )) + .with_taken(), + ), + ) + .with_span() + .parse_next(input)?; + let ((before_ty_name, ty, after_ty_name), after_ty) = ty.unwrap_or_default(); + Ok(value.map(|value| KdlEntry { + ty, + value, + name: None, + format: Some(KdlEntryFormat { + value_repr: raw.into(), + after_ty: after_ty.into(), + before_ty_name: before_ty_name.into(), + after_ty_name: after_ty_name.into(), + ..Default::default() + }), + #[cfg(feature = "span")] + span: _span.into(), + })) + } + + fn badval(input: &mut Input<'_>) -> PResult<()> { + trace( + "badval", + repeat_till(1.., any, peek(Self::value_terminator)), + ) + .map(|((), _)| ()) + .parse_next(input) + } + + fn value_terminator(input: &mut Input<'_>) -> PResult<()> { + alt(( + eof.void(), + "=".void(), + ")".void(), + "{".void(), + "}".void(), + Self::node_space, + Self::node_terminator, + )) + .parse_next(input) + } + + fn value_terminator_check(input: &mut Input<'_>) -> PResult<()> { + trace("value terminator check", cut_err(peek(Self::value_terminator).context(cx().hlp("A valid value was partially parsed, but was not followed by a value terminator. Did you want a space here?")))).parse_next(input) + } + + /// `type := '(' optional-node-space string optional-node-space ')'` + fn ty<'s>(&self, input: &mut Input<'s>) -> PResult<(&'s str, Option, &'s str)> { + "(".parse_next(input)?; + let (before_ty, ty, after_ty) = ( + Self::node_space0.take(), + resume_after_cut( + cut_err( + ( + |input: &mut Input<'_>| self.identifier(input), + peek(alt((Self::node_space, ")".void()))), + ) + .context( + cx().lbl("type name") + .msg("invalid contents inside type annotation"), + ), + ), + repeat_till( + 1.., + (not(Self::badval_ty_char), any), + peek(Self::badval_ty_char), + ) + .map(|((), _)| ()), + ) + .map(|opt| opt.map(|(i, _)| i)), + Self::node_space0.take(), + ) + .parse_next(input)?; + ")".parse_next(input)?; + Ok((before_ty, ty, after_ty)) + } + + fn badval_ty_char(input: &mut Input<'_>) -> PResult<()> { + alt(( + ")".void(), + "{".void(), + Self::node_space, + Self::node_terminator, + )) + .parse_next(input) + } + + /// `line-space := newline | ws | single-line-comment` + fn line_space(input: &mut Input<'_>) -> PResult<()> { + alt((Self::node_space, Self::newline, Self::single_line_comment)).parse_next(input) + } + + /// `node-space := ws* escline ws* | ws+` + fn node_space(input: &mut Input<'_>) -> PResult<()> { + alt(((Self::wss, Self::escline, Self::wss).void(), Self::wsp)).parse_next(input) + } + + fn node_space0(input: &mut Input<'_>) -> PResult<()> { + repeat(0.., Self::node_space).parse_next(input) + } + + fn node_space1(input: &mut Input<'_>) -> PResult<()> { + repeat(1.., Self::node_space).parse_next(input) + } + + /// string := identifier-string | quoted-string | raw-string ¶ + pub(crate) fn string(input: &mut Input<'_>) -> PResult> { + trace( + "string", + alt(( + resume_after_cut( + (Self::identifier_string, Self::value_terminator_check) + .context(cx().lbl("identifier string")), + Self::badval, + ), + resume_after_cut( + (Self::raw_string, Self::value_terminator_check) + .context(cx().lbl("raw string")), + alt((Self::raw_string_badval, Self::badval)).void(), + ), + resume_after_cut( + (Self::quoted_string, Self::value_terminator_check) + .context(cx().lbl("quoted string")), + alt((Self::quoted_string_badval, Self::badval)).void(), + ), + )), + ) + .map(|res| res.map(|(s, _)| s)) + .parse_next(input) + } + + /// `identifier-char := unicode - unicode-space - newline - [\\/(){};\[\]"#] - disallowed-literal-code-points - equals-sign` + fn identifier_char(input: &mut Input<'_>) -> PResult { + ( + not(alt(( + Self::unicode_space, + Self::newline, + Self::disallowed_unicode, + Self::equals_sign, + ))), + none_of(DISALLOWED_IDENT_CHARS), + ) + .map(|(_, c)| c) + .parse_next(input) + } + + /// `equals-sign := See Table ([Equals Sign](#equals-sign))` + fn equals_sign(input: &mut Input<'_>) -> PResult<()> { + "=".void().parse_next(input) + } + + /// ```text + /// quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline (unicode-space | ('\' (unicode-space | newline)+)*) '"""' + /// single-line-string-body := (string-character - newline)* + /// multi-line-string-body := (('"' | '""')? string-character)* + /// ``` + fn quoted_string(input: &mut Input<'_>) -> PResult { + let quotes = alt(( + ( + "\"\"\"", + cut_err(Self::newline).context(cx().lbl("multi-line string newline").msg( + "Multi-line string opening quotes must be immediately followed by a newline", + )), + ) + .take(), + "\"", + )) + .parse_next(input)?; + let is_multiline = quotes.len() > 1; + let ml_prefix: Option = if is_multiline { + Some( + cut_err(peek(preceded( + repeat_till( + 0.., + ( + repeat( + 0.., + ( + not(Self::newline), + alt(( + Self::ws_escape.void(), + trace( + "valid string body char(s)", + alt(( + ('\"', not("\"\"")).void(), + ('\"', not("\"")).void(), + Self::string_char.void(), + )), + ) + .void(), + )), + ), + ) + .map(|()| ()), + Self::newline, + ), + peek(terminated( + repeat(0.., alt((Self::ws_escape, Self::unicode_space))).map(|()| ()), + "\"\"\"", + )), + ) + .map(|((), ())| ()), + terminated( + repeat( + 0.., + alt((Self::ws_escape.map(|_| ""), Self::unicode_space.take())), + ) + .map(|s: String| s), + "\"\"\"", + ), + ))) + .context(cx().lbl("multi-line string")) + .parse_next(input)?, + ) + } else { + None + }; + let body = if let Some(prefix) = ml_prefix { + let parser = repeat_till( + 0.., + ( + cut_err(alt(((&prefix[..]).void(), peek(Self::empty_line).void()))) + .context(cx().msg("matching multiline string prefix").lbl("bad prefix").hlp("Multi-line string bodies must be prefixed by the exact same whitespace as the leading whitespace before the closing '\"\"\"'")), + alt(( + Self::empty_line.map(|s| s.to_string()), + repeat_till( + 0.., + ( + not(Self::newline), + alt(( + Self::ws_escape.map(|_| None), + alt(( + ('\"', not("\"\"")).map(|(c, ())| Some(c)), + ('\"', not("\"")).map(|(c, ())| Some(c)), + Self::string_char.map(Some), + )) + )) + ).map(|(_, c)| c), + Self::newline, + ) + // multiline string literal newlines are normalized to `\n` + .map(|(cs, _): (Vec>, _)| cs.into_iter().flatten().chain(vec!['\n']).collect::()), + )), + ) + .map(|(_, s)| s), + ( + &prefix[..], + repeat(0.., Self::ws_escape.void()).map(|()| ()), + peek("\"\"\""), + ), + ) + .map(|(s, _): (Vec, (_, _, _))| { + let mut s = s.join(""); + // Slice off the `\n` at the end of the last line. + s.truncate(s.len().saturating_sub(1)); + s + }) + .context(cx().lbl("multi-line quoted string")); + cut_err(parser).parse_next(input)? + } else { + let parser = repeat_till( + 0.., + ( + cut_err( + not(Self::newline).context( + cx().msg("Unexpected newline in single-line quoted string") + .hlp("You can make a string multi-line by wrapping it in '\"\"\"', with a newline immediately after the opening quotes."), + ), + ), + alt(( + Self::ws_escape.map(|_| None), + Self::string_char.map(Some), + )) + ).map(|(_, c)| c), + peek("\"") + ) + .map(|(cs, _): (Vec>, _)| cs.into_iter().flatten().collect::()) + .context(cx().lbl("quoted string")); + cut_err(parser).parse_next(input)? + }; + let closing_quotes = if is_multiline { + "\"\"\"".context(cx().msg("missing multiline string closing quotes").hlp("Multiline strings must be closed by '\"\"\"' on a standalone line, only prefixed by whitespace.")) + } else { + "\"".context( + cx().msg("missing string closing quote") + .hlp("Did you forget to escape something?"), + ) + }; + cut_err(closing_quotes).parse_next(input)?; + Ok(KdlValue::String(body)) + } + + fn empty_line(input: &mut Input<'_>) -> PResult<&'static str> { + repeat( + 0.., + alt((Self::ws_escape.void(), Self::unicode_space.void())), + ) + .map(|()| ()) + .parse_next(input)?; + Self::newline.parse_next(input)?; + Ok("\n") + } + + /// Like badval, but is able to slurp up invalid raw strings, which contain whitespace. + fn quoted_string_badval(input: &mut Input<'_>) -> PResult<()> { + // TODO(@zkat): this should have different behavior based on whether we're + // resuming a single or multi-line string. Right now, multi-liners end up + // with silly errors. + ( + repeat_till( + 0.., + (not(Self::quoted_string_terminator), any), + Self::quoted_string_terminator, + ), + Self::quoted_string_terminator, + ) + .map(|(((), _), _)| ()) + .parse_next(input) + } + + fn quoted_string_terminator(input: &mut Input<'_>) -> PResult<()> { + alt(("\"\"\"".void(), "\"".void(), peek(Self::value_terminator))).parse_next(input) + } + + /// ```text + /// string-character := '\' escape | [^\\"] - disallowed-literal-code-points + /// ``` + fn string_char(input: &mut Input<'_>) -> PResult { + alt(( + trace("escaped char", Self::escaped_char), + trace( + "regular string char", + (not(Self::disallowed_unicode), none_of(['\\', '"'])).map(|(_, c)| c), + ), + )) + .parse_next(input) + } + + fn ws_escape(input: &mut Input<'_>) -> PResult<()> { + trace( + "ws_escape", + ( + "\\", + repeat(1.., alt((Self::unicode_space, Self::newline))).map(|()| ()), + ), + ) + .void() + .parse_next(input) + } + + /// ```text + /// escape := ["\\bfnrts] | 'u{' hex-digit{1, 6} '}' | (unicode-space | newline)+ + /// hex-digit := [0-9a-fA-F] + /// ``` + fn escaped_char(input: &mut Input<'_>) -> PResult { + "\\".parse_next(input)?; + alt(( + alt(( + "\\".value('\\'), + "\"".value('\"'), + "b".value('\u{0008}'), + "f".value('\u{000C}'), + "n".value('\n'), + "r".value('\r'), + "t".value('\t'), + "s".value(' '), + )), + ( + "u{", + cut_err(take_while(1..=6, AsChar::is_hex_digit)), + cut_err("}"), + ) + .context(cx().lbl("unicode escape char")) + .verify_map(|(_, hx, _)| { + let val = u32::from_str_radix(hx, 16) + .expect("Should have already been validated to be a hex string."); + char::from_u32(val) + }), + )) + .parse_next(input) + } + + /// ```text + /// raw-string := '#' raw-string-quotes '#' | '#' raw-string '#' + /// raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body '"""' + /// single-line-raw-string-body := '' | (single-line-raw-string-char - '"') single-line-raw-string-char*? | '"' (single-line-raw-string-char - '"') single-line-raw-string-char*? + /// single-line-raw-string-char := unicode - newline - disallowed-literal-code-points + /// multi-line-raw-string-body := (unicode - disallowed-literal-code-points)*? + /// ``` + fn raw_string(input: &mut Input<'_>) -> PResult { + let _start_loc = input.current_token_start(); + let hashes: String = repeat(1.., "#").parse_next(input)?; + let quotes = alt((("\"\"\"", Self::newline).take(), "\"")).parse_next(input)?; + let is_multiline = quotes.len() > 1; + let ml_prefix: Option = if is_multiline { + Some( + peek(preceded( + repeat_till( + 0.., + ( + repeat( + 0.., + ( + not(Self::newline), + not(Self::disallowed_unicode), + not(("\"\"\"", &hashes[..])), + any, + ), + ) + .map(|()| ()), + Self::newline, + ), + peek(terminated( + repeat(0.., Self::unicode_space).map(|()| ()), + ("\"\"\"", &hashes[..]), + )), + ) + .map(|((), ())| ()), + terminated( + repeat(0.., Self::unicode_space).map(|()| ()).take(), + ("\"\"\"", &hashes[..]), + ), + )) + .parse_next(input)? + .to_string(), + ) + } else { + None + }; + let body = if let Some(prefix) = ml_prefix { + repeat_till( + 0.., + ( + cut_err(alt(((&prefix[..]).void(), peek(Self::empty_line).void()))) + .context(cx().lbl("matching multiline raw string prefix")), + alt(( + Self::empty_line.map(|s| s.to_string()), + repeat_till( + 0.., + (not(Self::newline), not(("\"\"\"", &hashes[..])), any) + .map(|((), (), _)| ()) + .take(), + Self::newline, + ) + // multiline string literal newlines are normalized to `\n` + .map(|(s, _): (Vec<&str>, _)| format!("{}\n", s.join(""))), + )), + ) + .map(|(_, s)| s), + ( + &prefix[..], + repeat(0.., Self::unicode_space).map(|()| ()).take(), + peek(("\"\"\"", &hashes[..])), + ), + ) + .map(|(s, _): (Vec, (_, _, _))| { + let mut s = s.join(""); + // Slice off the `\n` at the end of the last line. + s.truncate(s.len().saturating_sub(1)); + s + }) + .parse_next(input)? + } else { + repeat_till( + 0.., + ( + not(Self::disallowed_unicode), + not(Self::newline), + not(("\"", &hashes[..])), + any, + ) + .map(|(_, _, _, s)| s), + peek(("\"", &hashes[..])), + ) + .map(|(s, _): (String, _)| s) + .context(cx().lbl("raw string")) + .parse_next(input)? + }; + let closing_quotes = if is_multiline { + "\"\"\"".context(cx().lbl("multiline raw string closing quotes")) + } else { + "\"".context(cx().lbl("raw string closing quotes")) + }; + cut_err((closing_quotes, &hashes[..])).parse_next(input)?; + if body == "\"" { + Err(ErrMode::Cut(KdlParseError { + message: Some("Single-line raw strings cannot look like multi-line ones".into()), + span: Some((_start_loc..input.previous_token_end()).into()), + label: Some("triple quotes".into()), + help: Some("Consider using a regular escaped string if all you want is a single quote: \"\\\"\"".into()), + severity: Some(Severity::Error), + })) + } else { + Ok(KdlValue::String(body)) + } + } + + /// Like badval, but is able to slurp up invalid raw strings, which contain whitespace. + fn raw_string_badval(input: &mut Input<'_>) -> PResult<()> { + repeat_till( + 0.., + (not(alt(("#", "\""))), any), + ( + alt(("#", "\"")), + peek(alt((Self::ws, Self::newline, eof.void()))), + ), + ) + .map(|(v, _)| v) + .parse_next(input) + } + + // TODO(mantainer): have a look, this is how deal with deference bettwenn v1 and v2 + /// ```text + /// keyword := '#true' | '#false' | '#null' + /// keyword-number := '#inf' | '#-inf' | '#nan' + /// ```` + fn keyword(&self, input: &mut Input<'_>) -> PResult { + if let KdlVersion::V2 = self.kdl_version { + let _ = "#".parse_next(input)?; + not(one_of(['#', '"'])).parse_next(input)?; + } + alt(( + "true".value(KdlValue::Bool(true)), + "false".value(KdlValue::Bool(false)), + "null".value(KdlValue::Null), + "nan".value(KdlValue::Float(f64::NAN)), + "inf".value(KdlValue::Float(f64::INFINITY)), + "-inf".value(KdlValue::Float(f64::NEG_INFINITY)), + )) + .context(cx().lbl("keyword").hlp( + "Available keywords in KDL are '#true', '#false', '#null', '#nan', '#inf', and '#-inf'; they are case-sensitive.", + )) + .parse_next(input) + } + + /// `bom := '\u{FEFF}'` + fn bom(input: &mut Input<'_>) -> PResult<()> { + "\u{FEFF}".void().parse_next(input) + } + + /// `disallowed-literal-code-points := See Table (Disallowed Literal Code + /// Points)` + /// ```markdown + /// * The codepoints `U+0000-0008` or the codepoints `U+000E-001F` (various + /// control characters). + /// * `U+007F` (the Delete control character). + /// * Any codepoint that is not a [Unicode Scalar + /// Value](https://unicode.org/glossary/#unicode_scalar_value) (`U+D800-DFFF`). + /// * `U+200E-200F`, `U+202A-202E`, and `U+2066-2069`, the [unicode + /// "direction control" + /// characters](https://www.w3.org/International/questions/qa-bidi-unicode-controls) + /// * `U+FEFF`, aka Zero-width Non-breaking Space (ZWNBSP)/Byte Order Mark (BOM), + /// except as the first code point in a document. + /// ``` + fn disallowed_unicode(input: &mut Input<'_>) -> PResult<()> { + take_while(1.., is_disallowed_unicode) + .void() + .parse_next(input) + } + + /// `escline := '\\' ws* (single-line-comment | newline | eof)` + fn escline(input: &mut Input<'_>) -> PResult<()> { + "\\".parse_next(input)?; + Self::wss.parse_next(input)?; + alt((Self::single_line_comment, Self::newline, eof.void())).parse_next(input)?; + Self::wss.parse_next(input) + } + /// + /// `newline := ` + fn newline(input: &mut Input<'_>) -> PResult<()> { + alt(NEWLINES) + .void() + .context(cx().lbl("newline")) + .parse_next(input) + } + + fn wss(input: &mut Input<'_>) -> PResult<()> { + repeat(0.., Self::ws).parse_next(input) + } + + fn wsp(input: &mut Input<'_>) -> PResult<()> { + repeat(1.., Self::ws).parse_next(input) + } + + /// `ws := unicode-space | multi-line-comment`` + fn ws(input: &mut Input<'_>) -> PResult<()> { + alt((Self::unicode_space, Self::multi_line_comment)).parse_next(input) + } + + /// `unicode-space := ` + fn unicode_space(input: &mut Input<'_>) -> PResult<()> { + one_of(UNICODE_SPACES).void().parse_next(input) + } + + /// `single-line-comment := '//' ^newline* (newline | eof)` + fn single_line_comment(input: &mut Input<'_>) -> PResult<()> { + "//".parse_next(input)?; + repeat_till( + 0.., + (not(alt((Self::newline, eof.void()))), any), + alt((Self::newline, eof.void())), + ) + .map(|(_, _): ((), _)| ()) + .parse_next(input) + } + + /// `multi-line-comment := '/*' commented-block` + fn multi_line_comment(input: &mut Input<'_>) -> PResult<()> { + "/*".parse_next(input)?; + cut_err(Self::commented_block) + .context(cx().lbl("closing of multi-line comment")) + .parse_next(input) + } + + /// `commented-block := '*/' | (multi-line-comment | '*' | '/' | [^*/]+) commented-block` + fn commented_block(input: &mut Input<'_>) -> PResult<()> { + alt(( + "*/".void(), + preceded( + alt(( + Self::multi_line_comment, + "*".void(), + "/".void(), + repeat(1.., none_of(['*', '/'])).map(|()| ()), + )), + Self::commented_block, + ), + )) + .parse_next(input) + } + + /// slashdash := '/-' (node-space | line-space)* + fn slashdash(input: &mut Input<'_>) -> PResult<()> { + ( + "/-", + repeat(0.., alt((Self::node_space, Self::line_space))).map(|()| ()), + ) + .void() + .parse_next(input) + } + + /// `number := keyword-number | hex | octal | binary | decimal` + fn number(input: &mut Input<'_>) -> PResult { + alt((Self::float_value, Self::integer_value)).parse_next(input) + } + + /// ```text + /// decimal := sign? integer ('.' integer)? exponent? + /// exponent := ('e' | 'E') sign? integer + /// ``` + fn float_value(input: &mut Input<'_>) -> PResult { + Self::float.map(KdlValue::Float).parse_next(input) + } + + fn float(input: &mut Input<'_>) -> PResult { + ( + alt(( + ( + Self::decimal::, + opt(preceded( + '.', + cut_err( + Self::udecimal::.context( + cx().msg("Non-digit character found after the '.' of a float"), + ), + ), + )), + Caseless("e"), + opt(one_of(['-', '+'])), + cut_err(Self::udecimal::.context( + cx().msg("Non-digit character found in the exponent part of a float").hlp("Floats with exponent parts should look like '2.0e123', or '43.3E-4'."), + )), + ) + .take(), + ( + Self::decimal::, + '.', + cut_err( + Self::udecimal:: + .context(cx().msg("Non-digit character found after the '.' of a float")), + ), + ) + .take(), + )), + Self::value_terminator_check, + ) + .try_map(|(float_str, _)| T::parse_float(&str::replace(float_str, "_", ""))) + .context(cx().lbl("float")) + .parse_next(input) + } + + /// `integer := digit (digit | '_')*` + fn udecimal(input: &mut Input<'_>) -> PResult { + ( + digit1, + repeat( + 0.., + alt(("_", take_while(1.., AsChar::is_dec_digit).take())), + ), + ) + .try_map(|(l, r): (&str, Vec<&str>)| { + T::from_str_radix(&format!("{l}{}", str::replace(&r.join(""), "_", "")), 10) + }) + .parse_next(input) + } + + /// `hex := sign? '0x' hex-digit (hex-digit | '_')*` + fn hex(input: &mut Input<'_>) -> PResult { + let positive = Self::signum.parse_next(input)?; + Self::uhex:: + .try_map(|x| { + if positive { + Ok(x) + } else { + x.negated().ok_or(NegativeUnsignedError) + } + }) + .parse_next(input) + } + + fn uhex(input: &mut Input<'_>) -> PResult { + alt(("0x", "0X")).parse_next(input)?; + cut_err(( + hex_digit1, + repeat( + 0.., + alt(("_", take_while(1.., AsChar::is_hex_digit).take())), + ), + )) + .try_map(|(l, r): (&str, Vec<&str>)| { + T::from_str_radix(&format!("{l}{}", str::replace(&r.join(""), "_", "")), 16) + }) + .context(cx().lbl("hexadecimal")) + .parse_next(input) + } + + fn integer_value(input: &mut Input<'_>) -> PResult { + alt(( + (Self::hex, Self::value_terminator_check).context(cx().lbl("hexadecimal number")), + (Self::octal, Self::value_terminator_check).context(cx().lbl("octal number")), + (Self::binary, Self::value_terminator_check).context(cx().lbl("binary number")), + (Self::decimal, Self::value_terminator_check).context(cx().lbl("integer")), + )) + .map(|(val, _)| KdlValue::Integer(val)) + .parse_next(input) + } + + /// Non-float decimal + fn decimal(input: &mut Input<'_>) -> PResult { + let positive = Self::signum.parse_next(input)?; + Self::udecimal:: + .try_map(|x| { + if positive { + Ok(x) + } else { + x.negated().ok_or(NegativeUnsignedError) + } + }) + .parse_next(input) + } + + /// `octal := sign? '0o' [0-7] [0-7_]*` + fn octal(input: &mut Input<'_>) -> PResult { + let positive = Self::signum.parse_next(input)?; + Self::uoctal:: + .try_map(|x| { + if positive { + Ok(x) + } else { + x.negated().ok_or(NegativeUnsignedError) + } + }) + .parse_next(input) + } + + fn uoctal(input: &mut Input<'_>) -> PResult { + alt(("0o", "0O")).parse_next(input)?; + cut_err(( + oct_digit1, + repeat( + 0.., + alt(("_", take_while(1.., AsChar::is_oct_digit).take())), + ), + )) + .try_map(|(l, r): (&str, Vec<&str>)| { + T::from_str_radix(&format!("{l}{}", str::replace(&r.join(""), "_", "")), 8) + }) + .context(cx().lbl("octal")) + .parse_next(input) + } + + /// `binary := sign? '0b' ('0' | '1') ('0' | '1' | '_')*` + fn binary(input: &mut Input<'_>) -> PResult { + let positive = Self::signum.parse_next(input)?; + Self::ubinary:: + .try_map(|x| { + if positive { + Ok(x) + } else { + x.negated().ok_or(NegativeUnsignedError) + } + }) + .parse_next(input) + } + + fn ubinary(input: &mut Input<'_>) -> PResult { + alt(("0b", "0B")).parse_next(input)?; + cut_err( + (alt(("0", "1")), repeat(0.., alt(("0", "1", "_")))).try_map( + move |(x, xs): (&str, Vec<&str>)| { + T::from_str_radix(&format!("{x}{}", str::replace(&xs.join(""), "_", "")), 2) + }, + ), + ) + .context(cx().lbl("binary")) + .parse_next(input) + } + + fn signum(input: &mut Input<'_>) -> PResult { + let sign = opt(alt(('+', '-'))).parse_next(input)?; + let mult = if let Some(sign) = sign { + sign == '+' + } else { + true + }; + Ok(mult) + } + + fn slashdashed_children(input: &mut Input<'_>) -> PResult<()> { + Self::node_space0.parse_next(input)?; + trace( + "slashdashed children", + separated( + 1.., + (Self::slashdash.void(), Self::node_children.void()).void(), + Self::node_space1, + ), + ) + .map(|()| ()) + .parse_next(input) + } } pub(crate) fn failure_from_errs(errs: Vec>, input: &str) -> KdlError { @@ -566,128 +1751,14 @@ fn test_node() { ); } -pub(crate) fn padded_node(input: &mut Input<'_>) -> PResult { - let parser_v2 = KdlParser::new(KdlVersion::V2); - let ((mut node, _terminator, trailing), _span) = ( - (|input: &mut Input<'_>| parser_v2.node(input)), - opt(node_terminator), - repeat(0.., alt((line_space, node_space))) - .map(|_: ()| ()) - .take(), - ) - .with_span() - .parse_next(input)?; - if let Some(fmt) = node.format_mut() { - fmt.trailing = trailing.into(); - } - #[cfg(feature = "span")] - { - node.span = _span.into(); - } - Ok(node) -} - -pub(crate) fn padded_node_entry(input: &mut Input<'_>) -> PResult { - let ((leading, entry, trailing), _span) = ( - repeat(0.., line_space).map(|_: ()| ()).take(), - trace("node entry", node_entry), - repeat(0.., alt((line_space, node_space))) - .map(|_: ()| ()) - .take(), - ) - .with_span() - .parse_next(input)?; - if let Some(entry) = entry.map(|mut val| { - if let Some(fmt) = val.format_mut() { - fmt.leading = format!("{leading}{}", fmt.leading); - fmt.trailing = format!("{}{trailing}", fmt.trailing); - } - #[cfg(feature = "span")] - { - val.span = _span.into(); - } - val - }) { - Ok(entry) - } else { - fail.parse_next(input)? - } -} - -/// `node-prop-or-arg := prop | value` -/// `prop := string optional-node-space equals-sign optional-node-space value` -fn node_entry(input: &mut Input<'_>) -> PResult> { - let leading = (node_space0, opt((slashdashed_entries, node_space1))) - .take() - .parse_next(input)?; - let _start = input.checkpoint(); - let maybe_ident = trace("prop name or string val", opt(identifier)).parse_next(input)?; - let ident_was_parsed = maybe_ident.is_some(); - let after_key = if ident_was_parsed { - opt((node_space0.take(), equals_sign)) - .parse_next(input)? - .map(|(after_key, _)| after_key) - } else { - None - }; - let entry = if let Some(after_key) = after_key { - let (after_eq, value) = ( - node_space0.take(), - cut_err(value.context(cx().lbl("property value"))), - ) - .parse_next(input)?; - value.map(|mut value| { - value.name = maybe_ident; - if let Some(fmt) = value.format_mut() { - fmt.after_key = after_key.into(); - fmt.after_eq = after_eq.into(); - } - value - }) - } else if let Some(ident) = maybe_ident { - // It was ambiguous, but this ident is actually a value. - Some(KdlEntry { - format: Some(KdlEntryFormat { - value_repr: ident.repr.unwrap_or_else(|| ident.value.clone()), - ..Default::default() - }), - value: KdlValue::String(ident.value), - name: None, - ty: None, - #[cfg(feature = "span")] - span: (0..0).into(), - }) - } else { - trace("non-string value", resume_after_cut(value, badval)) - .parse_next(input)? - .flatten() - }; - Ok(entry.map(|mut value| { - if let Some(fmt) = value.format_mut() { - fmt.leading = leading.into(); - } - #[cfg(feature = "span")] - { - value.span = span_from_checkpoint(input, &_start); - } - value - })) -} - -fn slashdashed_entries(input: &mut Input<'_>) -> PResult<()> { - separated(1.., (slashdash, node_entry), node_space1) - .map(|()| ()) - .take() - .map(|x| x.to_string()) - .parse_next(input)?; - Ok(()) -} - #[cfg(test)] #[test] fn entry_test() { + let parser_v2 = KdlParser::new(KdlVersion::V2); assert_eq!( - node_entry.parse(new_input("foo=bar")).unwrap(), + (|input: &mut Input<'_>| parser_v2.node_entry(input)) + .parse(new_input("foo=bar")) + .unwrap(), Some(KdlEntry { ty: None, value: KdlValue::String("bar".into()), @@ -702,7 +1773,9 @@ fn entry_test() { ); assert_eq!( - node_entry.parse(new_input("foo")).unwrap(), + (|input: &mut Input<'_>| parser_v2.node_entry(input)) + .parse(new_input("foo")) + .unwrap(), Some(KdlEntry { ty: None, value: KdlValue::String("foo".into()), @@ -717,7 +1790,9 @@ fn entry_test() { ); assert_eq!( - node_entry.parse(new_input("/-foo bar")).unwrap(), + (|input: &mut Input<'_>| parser_v2.node_entry(input)) + .parse(new_input("/-foo bar")) + .unwrap(), Some(KdlEntry { ty: None, value: KdlValue::String("bar".into()), @@ -733,7 +1808,9 @@ fn entry_test() { ); assert_eq!( - node_entry.parse(new_input("/- foo=1 bar = 2")).unwrap(), + (|input: &mut Input<'_>| parser_v2.node_entry(input)) + .parse(new_input("/- foo=1 bar = 2")) + .unwrap(), Some(KdlEntry { ty: None, value: 2.into(), @@ -756,7 +1833,9 @@ fn entry_test() { ); assert_eq!( - node_entry.parse(new_input("/- \nfoo = 1 bar = 2")).unwrap(), + (|input: &mut Input<'_>| parser_v2.node_entry(input)) + .parse(new_input("/- \nfoo = 1 bar = 2")) + .unwrap(), Some(KdlEntry { ty: None, value: 2.into(), @@ -779,305 +1858,29 @@ fn entry_test() { ); } -fn before_node_children(input: &mut Input<'_>) -> PResult<()> { - alt(( - ( - node_space1, - slashdashed_entries, - // This second one will fail if `node_entry_leading` is empty. - node_space1, - slashdashed_children, - ) - .take(), - (node_space1, slashdashed_entries).take(), - (node_space1, slashdashed_children).take(), - node_space0.take(), - )) - .void() - .parse_next(input)?; - node_space0.parse_next(input)?; - Ok(()) -} - #[cfg(test)] #[test] fn before_node_children_test() { + let mut before_node_children = + |input: &mut Input<'_>| KdlParser::new(KdlVersion::V2).before_node_children(input); + assert!(before_node_children.parse(new_input(" /- { }")).is_ok()); assert!(before_node_children.parse(new_input(" /- { bar }")).is_ok()); } -fn slashdashed_children(input: &mut Input<'_>) -> PResult<()> { - node_space0.parse_next(input)?; - trace( - "slashdashed children", - separated( - 1.., - (slashdash.void(), node_children.void()).void(), - node_space1, - ), - ) - .map(|()| ()) - .parse_next(input) -} - #[cfg(test)] #[test] fn around_children_test() { - assert!(slashdashed_children.parse(new_input("/- { }")).is_ok()); - assert!(slashdashed_children.parse(new_input("/- { bar }")).is_ok()); -} - -/// `node-children := '{' nodes final-node? '}'` -fn node_children(input: &mut Input<'_>) -> PResult { - let parser_v2 = KdlParser::new(KdlVersion::V2); - - let _before_open = input.checkpoint(); - let _before_open_loc = input.current_token_start(); - "{".parse_next(input)?; - let _after_open_loc = input.previous_token_end(); - let ns = trace("child nodes", |input: &mut Input<'_>| { - parser_v2.nodes(input) - }) - .parse_next(input)?; - let _after_nodes = input.checkpoint(); - let _after_nodes_loc = input.previous_token_end(); - let close_res: PResult<_> = cut_err("}") - .context(cx().msg("No closing '}' for child block").lbl("closed")) - .parse_next(input); - if close_res.is_err() { - return close_res - .map(|_| KdlDocument::new()) - .or_else(|mut e: ErrMode| { - e = match e { - ErrMode::Cut(mut pe) => { - pe.span = Some((_before_open_loc.._after_open_loc).into()); - ErrMode::Cut(pe) - } - e => return Err(e), - }; - input.record_err(&_before_open, &_before_open, e)?; - if !ns.is_empty() { - input.record_err( - &_after_nodes, - &_after_nodes, - ErrMode::Cut(KdlParseError { - message: Some("Closing '}' was not found after nodes".into()), - span: Some((_after_open_loc.._after_nodes_loc).into()), - label: Some("closed".into()), - help: None, - severity: Some(Severity::Error), - }), - )?; - } - Ok(KdlDocument::new()) - }); - } - Ok(ns) -} - -/// `node-terminator := single-line-comment | newline | ';' | eof` -fn node_terminator(input: &mut Input<'_>) -> PResult<()> { - trace( - "node_terminator", - alt((";".void(), newline, single_line_comment)), - ) - .void() - .parse_next(input) -} - -/// `value := type? optional-node-space (string | number | keyword)` -fn value(input: &mut Input<'_>) -> PResult> { - let ((ty, (value, raw)), _span) = trace( - "value", - ( - opt((ty, node_space0.take())), - alt((keyword.map(Some), number.map(Some), string)).with_taken(), - ), - ) - .with_span() - .parse_next(input)?; - let ((before_ty_name, ty, after_ty_name), after_ty) = ty.unwrap_or_default(); - Ok(value.map(|value| KdlEntry { - ty, - value, - name: None, - format: Some(KdlEntryFormat { - value_repr: raw.into(), - after_ty: after_ty.into(), - before_ty_name: before_ty_name.into(), - after_ty_name: after_ty_name.into(), - ..Default::default() - }), - #[cfg(feature = "span")] - span: _span.into(), - })) -} - -fn badval(input: &mut Input<'_>) -> PResult<()> { - trace("badval", repeat_till(1.., any, peek(value_terminator))) - .map(|((), _)| ()) - .parse_next(input) -} - -fn value_terminator(input: &mut Input<'_>) -> PResult<()> { - alt(( - eof.void(), - "=".void(), - ")".void(), - "{".void(), - "}".void(), - node_space, - node_terminator, - )) - .parse_next(input) -} - -fn value_terminator_check(input: &mut Input<'_>) -> PResult<()> { - trace("value terminator check", cut_err(peek(value_terminator).context(cx().hlp("A valid value was partially parsed, but was not followed by a value terminator. Did you want a space here?")))).parse_next(input) -} - -/// `type := '(' optional-node-space string optional-node-space ')'` -fn ty<'s>(input: &mut Input<'s>) -> PResult<(&'s str, Option, &'s str)> { - "(".parse_next(input)?; - let (before_ty, ty, after_ty) = ( - node_space0.take(), - resume_after_cut( - cut_err( - (identifier, peek(alt((node_space, ")".void())))).context( - cx().lbl("type name") - .msg("invalid contents inside type annotation"), - ), - ), - repeat_till(1.., (not(badval_ty_char), any), peek(badval_ty_char)).map(|((), _)| ()), - ) - .map(|opt| opt.map(|(i, _)| i)), - node_space0.take(), - ) - .parse_next(input)?; - ")".parse_next(input)?; - Ok((before_ty, ty, after_ty)) -} - -fn badval_ty_char(input: &mut Input<'_>) -> PResult<()> { - alt((")".void(), "{".void(), node_space, node_terminator)).parse_next(input) -} - -/// `line-space := newline | ws | single-line-comment` -fn line_space(input: &mut Input<'_>) -> PResult<()> { - alt((node_space, newline, single_line_comment)).parse_next(input) -} - -/// `node-space := ws* escline ws* | ws+` -fn node_space(input: &mut Input<'_>) -> PResult<()> { - alt(((wss, escline, wss).void(), wsp)).parse_next(input) -} - -fn node_space0(input: &mut Input<'_>) -> PResult<()> { - repeat(0.., node_space).parse_next(input) -} - -fn node_space1(input: &mut Input<'_>) -> PResult<()> { - repeat(1.., node_space).parse_next(input) -} - -/// string := identifier-string | quoted-string | raw-string ¶ -pub(crate) fn string(input: &mut Input<'_>) -> PResult> { - trace( - "string", - alt(( - resume_after_cut( - (identifier_string, value_terminator_check).context(cx().lbl("identifier string")), - badval, - ), - resume_after_cut( - (raw_string, value_terminator_check).context(cx().lbl("raw string")), - alt((raw_string_badval, badval)).void(), - ), - resume_after_cut( - (quoted_string, value_terminator_check).context(cx().lbl("quoted string")), - alt((quoted_string_badval, badval)).void(), - ), - )), - ) - .map(|res| res.map(|(s, _)| s)) - .parse_next(input) -} - -pub(crate) fn identifier(input: &mut Input<'_>) -> PResult { - let mut bad_ident = false; - let ((mut ident, raw), _span) = string - .verify_map(|ident| { - ident - .or_else(|| { - // This is a sentinel we use later for better error messages - bad_ident = true; - Some(KdlValue::String("/BAD_IDENT\\".into())) - }) - .and_then(|v| match v { - KdlValue::String(s) => Some(KdlIdentifier::from(s)), - _ => None, - }) - }) - .with_taken() - .with_span() - .parse_next(input)?; - ident.set_repr(if bad_ident { "" } else { raw }); - #[cfg(feature = "span")] - { - ident.set_span(_span); - } - Ok(ident) -} - -/// `identifier-string := unambiguous-ident | signed-ident | dotted-ident` -fn identifier_string(input: &mut Input<'_>) -> PResult { - alt((unambiguous_ident, signed_ident, dotted_ident)) - .take() - .map(|s| KdlValue::String(s.into())) - .parse_next(input) -} - -/// `unambiguous-ident := ((identifier-char - digit - sign - '.') identifier-char*) - 'true' - 'false' - 'null' - 'inf' - '-inf' - 'nan'` -fn unambiguous_ident(input: &mut Input<'_>) -> PResult<()> { - not(alt((digit1.void(), alt(("-", "+")).void(), ".".void()))).parse_next(input)?; - peek(identifier_char).parse_next(input)?; - trace( - "identifier chars", - cut_err( - repeat(1.., identifier_char) - .verify_map(|s: String| { - if matches!( - s.as_str(), - "true" | "false" | "null" | "inf" | "-inf" | "nan" - ) { - None - } else { - Some(s) - } - }) - .void(), - ), - ) - .parse_next(input) -} - -/// `signed-ident := sign ((identifier-char - digit - '.') identifier-char*)?` -fn signed_ident(input: &mut Input<'_>) -> PResult<()> { - alt(("+", "-")).parse_next(input)?; - not(alt((digit1.void(), ".".void()))).parse_next(input)?; - repeat(0.., identifier_char).parse_next(input) -} - -/// `dotted-ident := sign? '.' ((identifier-char - digit) identifier-char*)?` -fn dotted_ident(input: &mut Input<'_>) -> PResult<()> { - ( - opt(signum), - ".", - not(digit1), - repeat(0.., identifier_char).map(|_: ()| ()), - ) - .void() - .parse_next(input) + assert!( + KdlParser::slashdashed_children + .parse(new_input("/- { }")) + .is_ok() + ); + assert!( + KdlParser::slashdashed_children + .parse(new_input("/- { bar }")) + .is_ok() + ); } static DISALLOWED_IDENT_CHARS: [char; 11] = @@ -1091,378 +1894,6 @@ pub(crate) fn is_disallowed_ident_char(c: char) -> bool { || c == '=' } -/// `identifier-char := unicode - unicode-space - newline - [\\/(){};\[\]"#] - disallowed-literal-code-points - equals-sign` -fn identifier_char(input: &mut Input<'_>) -> PResult { - ( - not(alt(( - unicode_space, - newline, - disallowed_unicode, - equals_sign, - ))), - none_of(DISALLOWED_IDENT_CHARS), - ) - .map(|(_, c)| c) - .parse_next(input) -} - -/// `equals-sign := See Table ([Equals Sign](#equals-sign))` -fn equals_sign(input: &mut Input<'_>) -> PResult<()> { - "=".void().parse_next(input) -} - -/// ```text -/// quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline (unicode-space | ('\' (unicode-space | newline)+)*) '"""' -/// single-line-string-body := (string-character - newline)* -/// multi-line-string-body := (('"' | '""')? string-character)* -/// ``` -fn quoted_string(input: &mut Input<'_>) -> PResult { - let quotes = - alt(( - ( - "\"\"\"", - cut_err(newline).context(cx().lbl("multi-line string newline").msg( - "Multi-line string opening quotes must be immediately followed by a newline", - )), - ) - .take(), - "\"", - )) - .parse_next(input)?; - let is_multiline = quotes.len() > 1; - let ml_prefix: Option = if is_multiline { - Some( - cut_err(peek(preceded( - repeat_till( - 0.., - ( - repeat( - 0.., - ( - not(newline), - alt(( - ws_escape.void(), - trace( - "valid string body char(s)", - alt(( - ('\"', not("\"\"")).void(), - ('\"', not("\"")).void(), - string_char.void(), - )), - ) - .void(), - )), - ), - ) - .map(|()| ()), - newline, - ), - peek(terminated( - repeat(0.., alt((ws_escape, unicode_space))).map(|()| ()), - "\"\"\"", - )), - ) - .map(|((), ())| ()), - terminated( - repeat(0.., alt((ws_escape.map(|_| ""), unicode_space.take()))) - .map(|s: String| s), - "\"\"\"", - ), - ))) - .context(cx().lbl("multi-line string")) - .parse_next(input)?, - ) - } else { - None - }; - let body = if let Some(prefix) = ml_prefix { - let parser = repeat_till( - 0.., - ( - cut_err(alt(((&prefix[..]).void(), peek(empty_line).void()))) - .context(cx().msg("matching multiline string prefix").lbl("bad prefix").hlp("Multi-line string bodies must be prefixed by the exact same whitespace as the leading whitespace before the closing '\"\"\"'")), - alt(( - empty_line.map(|s| s.to_string()), - repeat_till( - 0.., - ( - not(newline), - alt(( - ws_escape.map(|_| None), - alt(( - ('\"', not("\"\"")).map(|(c, ())| Some(c)), - ('\"', not("\"")).map(|(c, ())| Some(c)), - string_char.map(Some), - )) - )) - ).map(|(_, c)| c), - newline, - ) - // multiline string literal newlines are normalized to `\n` - .map(|(cs, _): (Vec>, _)| cs.into_iter().flatten().chain(vec!['\n']).collect::()), - )), - ) - .map(|(_, s)| s), - ( - &prefix[..], - repeat(0.., ws_escape.void()).map(|()| ()), - peek("\"\"\""), - ), - ) - .map(|(s, _): (Vec, (_, _, _))| { - let mut s = s.join(""); - // Slice off the `\n` at the end of the last line. - s.truncate(s.len().saturating_sub(1)); - s - }) - .context(cx().lbl("multi-line quoted string")); - cut_err(parser).parse_next(input)? - } else { - let parser = repeat_till( - 0.., - ( - cut_err( - not(newline).context( - cx().msg("Unexpected newline in single-line quoted string") - .hlp("You can make a string multi-line by wrapping it in '\"\"\"', with a newline immediately after the opening quotes."), - ), - ), - alt(( - ws_escape.map(|_| None), - string_char.map(Some), - )) - ).map(|(_, c)| c), - peek("\"") - ) - .map(|(cs, _): (Vec>, _)| cs.into_iter().flatten().collect::()) - .context(cx().lbl("quoted string")); - cut_err(parser).parse_next(input)? - }; - let closing_quotes = if is_multiline { - "\"\"\"".context(cx().msg("missing multiline string closing quotes").hlp("Multiline strings must be closed by '\"\"\"' on a standalone line, only prefixed by whitespace.")) - } else { - "\"".context( - cx().msg("missing string closing quote") - .hlp("Did you forget to escape something?"), - ) - }; - cut_err(closing_quotes).parse_next(input)?; - Ok(KdlValue::String(body)) -} - -fn empty_line(input: &mut Input<'_>) -> PResult<&'static str> { - repeat(0.., alt((ws_escape.void(), unicode_space.void()))) - .map(|()| ()) - .parse_next(input)?; - newline.parse_next(input)?; - Ok("\n") -} - -/// Like badval, but is able to slurp up invalid raw strings, which contain whitespace. -fn quoted_string_badval(input: &mut Input<'_>) -> PResult<()> { - // TODO(@zkat): this should have different behavior based on whether we're - // resuming a single or multi-line string. Right now, multi-liners end up - // with silly errors. - ( - repeat_till( - 0.., - (not(quoted_string_terminator), any), - quoted_string_terminator, - ), - quoted_string_terminator, - ) - .map(|(((), _), _)| ()) - .parse_next(input) -} - -fn quoted_string_terminator(input: &mut Input<'_>) -> PResult<()> { - alt(("\"\"\"".void(), "\"".void(), peek(value_terminator))).parse_next(input) -} - -/// ```text -/// string-character := '\' escape | [^\\"] - disallowed-literal-code-points -/// ``` -fn string_char(input: &mut Input<'_>) -> PResult { - alt(( - trace("escaped char", escaped_char), - trace( - "regular string char", - (not(disallowed_unicode), none_of(['\\', '"'])).map(|(_, c)| c), - ), - )) - .parse_next(input) -} - -fn ws_escape(input: &mut Input<'_>) -> PResult<()> { - trace( - "ws_escape", - ( - "\\", - repeat(1.., alt((unicode_space, newline))).map(|()| ()), - ), - ) - .void() - .parse_next(input) -} - -/// ```text -/// escape := ["\\bfnrts] | 'u{' hex-digit{1, 6} '}' | (unicode-space | newline)+ -/// hex-digit := [0-9a-fA-F] -/// ``` -fn escaped_char(input: &mut Input<'_>) -> PResult { - "\\".parse_next(input)?; - alt(( - alt(( - "\\".value('\\'), - "\"".value('\"'), - "b".value('\u{0008}'), - "f".value('\u{000C}'), - "n".value('\n'), - "r".value('\r'), - "t".value('\t'), - "s".value(' '), - )), - ( - "u{", - cut_err(take_while(1..=6, AsChar::is_hex_digit)), - cut_err("}"), - ) - .context(cx().lbl("unicode escape char")) - .verify_map(|(_, hx, _)| { - let val = u32::from_str_radix(hx, 16) - .expect("Should have already been validated to be a hex string."); - char::from_u32(val) - }), - )) - .parse_next(input) -} - -/// ```text -/// raw-string := '#' raw-string-quotes '#' | '#' raw-string '#' -/// raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body '"""' -/// single-line-raw-string-body := '' | (single-line-raw-string-char - '"') single-line-raw-string-char*? | '"' (single-line-raw-string-char - '"') single-line-raw-string-char*? -/// single-line-raw-string-char := unicode - newline - disallowed-literal-code-points -/// multi-line-raw-string-body := (unicode - disallowed-literal-code-points)*? -/// ``` -fn raw_string(input: &mut Input<'_>) -> PResult { - let _start_loc = input.current_token_start(); - let hashes: String = repeat(1.., "#").parse_next(input)?; - let quotes = alt((("\"\"\"", newline).take(), "\"")).parse_next(input)?; - let is_multiline = quotes.len() > 1; - let ml_prefix: Option = if is_multiline { - Some( - peek(preceded( - repeat_till( - 0.., - ( - repeat( - 0.., - ( - not(newline), - not(disallowed_unicode), - not(("\"\"\"", &hashes[..])), - any, - ), - ) - .map(|()| ()), - newline, - ), - peek(terminated( - repeat(0.., unicode_space).map(|()| ()), - ("\"\"\"", &hashes[..]), - )), - ) - .map(|((), ())| ()), - terminated( - repeat(0.., unicode_space).map(|()| ()).take(), - ("\"\"\"", &hashes[..]), - ), - )) - .parse_next(input)? - .to_string(), - ) - } else { - None - }; - let body = if let Some(prefix) = ml_prefix { - repeat_till( - 0.., - ( - cut_err(alt(((&prefix[..]).void(), peek(empty_line).void()))) - .context(cx().lbl("matching multiline raw string prefix")), - alt(( - empty_line.map(|s| s.to_string()), - repeat_till( - 0.., - (not(newline), not(("\"\"\"", &hashes[..])), any) - .map(|((), (), _)| ()) - .take(), - newline, - ) - // multiline string literal newlines are normalized to `\n` - .map(|(s, _): (Vec<&str>, _)| format!("{}\n", s.join(""))), - )), - ) - .map(|(_, s)| s), - ( - &prefix[..], - repeat(0.., unicode_space).map(|()| ()).take(), - peek(("\"\"\"", &hashes[..])), - ), - ) - .map(|(s, _): (Vec, (_, _, _))| { - let mut s = s.join(""); - // Slice off the `\n` at the end of the last line. - s.truncate(s.len().saturating_sub(1)); - s - }) - .parse_next(input)? - } else { - repeat_till( - 0.., - ( - not(disallowed_unicode), - not(newline), - not(("\"", &hashes[..])), - any, - ) - .map(|(_, _, _, s)| s), - peek(("\"", &hashes[..])), - ) - .map(|(s, _): (String, _)| s) - .context(cx().lbl("raw string")) - .parse_next(input)? - }; - let closing_quotes = if is_multiline { - "\"\"\"".context(cx().lbl("multiline raw string closing quotes")) - } else { - "\"".context(cx().lbl("raw string closing quotes")) - }; - cut_err((closing_quotes, &hashes[..])).parse_next(input)?; - if body == "\"" { - Err(ErrMode::Cut(KdlParseError { - message: Some("Single-line raw strings cannot look like multi-line ones".into()), - span: Some((_start_loc..input.previous_token_end()).into()), - label: Some("triple quotes".into()), - help: Some("Consider using a regular escaped string if all you want is a single quote: \"\\\"\"".into()), - severity: Some(Severity::Error), - })) - } else { - Ok(KdlValue::String(body)) - } -} - -/// Like badval, but is able to slurp up invalid raw strings, which contain whitespace. -fn raw_string_badval(input: &mut Input<'_>) -> PResult<()> { - repeat_till( - 0.., - (not(alt(("#", "\""))), any), - (alt(("#", "\"")), peek(alt((ws, newline, eof.void())))), - ) - .map(|(v, _)| v) - .parse_next(input) -} - #[cfg(test)] mod string_tests { use super::*; @@ -1470,11 +1901,11 @@ mod string_tests { #[test] fn identifier_string() { assert_eq!( - string.parse(new_input("foo")).unwrap(), + KdlParser::string.parse(new_input("foo")).unwrap(), Some(KdlValue::String("foo".into())) ); assert_eq!( - string.parse(new_input(",")).unwrap(), + KdlParser::string.parse(new_input(",")).unwrap(), Some(KdlValue::String(",".into())) ); } @@ -1482,15 +1913,19 @@ mod string_tests { #[test] fn single_line_quoted_string() { assert_eq!( - string.parse(new_input("\"foo\"")).unwrap(), + KdlParser::string.parse(new_input("\"foo\"")).unwrap(), Some(KdlValue::String("foo".into())) ); assert_eq!( - string.parse(new_input("\"foo\\u{0a}\"")).unwrap(), + KdlParser::string + .parse(new_input("\"foo\\u{0a}\"")) + .unwrap(), Some(KdlValue::String("foo\u{0a}".into())) ); assert_eq!( - string.parse(new_input("\"\\u{10FFFF}\"")).unwrap(), + KdlParser::string + .parse(new_input("\"\\u{10FFFF}\"")) + .unwrap(), Some(KdlValue::String("\u{10ffff}".into())) ); } @@ -1498,31 +1933,31 @@ mod string_tests { #[test] fn multiline_quoted_string() { assert_eq!( - string + KdlParser::string .parse(new_input("\"\"\"\nfoo\nbar\nbaz\n\"\"\"")) .unwrap(), Some(KdlValue::String("foo\nbar\nbaz".into())) ); assert_eq!( - string + KdlParser::string .parse(new_input("\"\"\"\n foo\n bar\n baz\n \"\"\"")) .unwrap(), Some(KdlValue::String("foo\n bar\nbaz".into())) ); assert_eq!( - string + KdlParser::string .parse(new_input("\"\"\"\nfoo\r\nbar\nbaz\n\"\"\"")) .unwrap(), Some(KdlValue::String("foo\nbar\nbaz".into())) ); assert_eq!( - string + KdlParser::string .parse(new_input("\"\"\"\n foo\n bar\n baz\n \"\"\"")) .unwrap(), Some(KdlValue::String("foo\n bar\n baz".into())) ); assert_eq!( - string + KdlParser::string .parse(new_input( "\"\"\"\n \\ foo\n \\ bar\n \\ baz\n \"\"\"" )) @@ -1530,26 +1965,28 @@ mod string_tests { Some(KdlValue::String("foo\n bar\n baz".into())) ); assert_eq!( - string + KdlParser::string .parse(new_input("\"\"\"\n\n string\t\n \"\"\"")) .unwrap(), Some(KdlValue::String("\nstring\t".into())), "Empty line without any indentation" ); assert_eq!( - string + KdlParser::string .parse(new_input("\"\"\"\n   \\\n   \n   \"\"\"")) .unwrap(), Some(KdlValue::String("".into())), "Escaped whitespace with proper prefix" ); assert_eq!( - string.parse(new_input("\"\"\"\n\\\"\"\"\n\"\"\"")).unwrap(), + KdlParser::string + .parse(new_input("\"\"\"\n\\\"\"\"\n\"\"\"")) + .unwrap(), Some(KdlValue::String("\"\"\"".into())) ); assert!( - string + KdlParser::string .parse(new_input("\"\"\"\nfoo\n bar\n baz\n \"\"\"")) .is_err() ); @@ -1558,52 +1995,63 @@ mod string_tests { #[test] fn raw_string() { assert_eq!( - string.parse(new_input("#\"foo\"#")).unwrap(), + KdlParser::string.parse(new_input("#\"foo\"#")).unwrap(), Some(KdlValue::String("foo".into())) ); - assert!(string.parse(new_input("#\"\"\"#")).is_err()); + assert!(KdlParser::string.parse(new_input("#\"\"\"#")).is_err()); } #[test] fn multiline_raw_string() { assert_eq!( - string + KdlParser::string .parse(new_input("#\"\"\"\nfoo\nbar\nbaz\n\"\"\"#")) .unwrap(), Some(KdlValue::String("foo\nbar\nbaz".into())) ); assert_eq!( - string + KdlParser::string .parse(new_input("#\"\"\"\nfoo\r\nbar\nbaz\n\"\"\"#")) .unwrap(), Some(KdlValue::String("foo\nbar\nbaz".into())) ); assert_eq!( - string + KdlParser::string .parse(new_input("##\"\"\"\n foo\n bar\n baz\n \"\"\"##")) .unwrap(), Some(KdlValue::String("foo\n bar\nbaz".into())) ); assert_eq!( - string + KdlParser::string .parse(new_input("#\"\"\"\n foo\n \\nbar\n baz\n \"\"\"#")) .unwrap(), Some(KdlValue::String("foo\n \\nbar\n baz".into())) ); assert!( - string + KdlParser::string .parse(new_input("#\"\"\"\nfoo\n bar\n baz\n \"\"\"#")) .is_err() ); - assert!(string.parse(new_input("#\"\nfoo\nbar\nbaz\n\"#")).is_err()); - assert!(string.parse(new_input("\"\nfoo\nbar\nbaz\n\"")).is_err()); + assert!( + KdlParser::string + .parse(new_input("#\"\nfoo\nbar\nbaz\n\"#")) + .is_err() + ); + assert!( + KdlParser::string + .parse(new_input("\"\nfoo\nbar\nbaz\n\"")) + .is_err() + ); } #[test] fn ident() { + let parser = KdlParser::new(KdlVersion::V2); assert_eq!( - identifier.parse(new_input("foo")).unwrap(), + (|input: &mut Input<'_>| parser.identifier(input)) + .parse(new_input("foo")) + .unwrap(), KdlIdentifier { value: "foo".into(), repr: Some("foo".into()), @@ -1612,7 +2060,9 @@ mod string_tests { } ); assert_eq!( - identifier.parse(new_input("+.")).unwrap(), + (|input: &mut Input<'_>| parser.identifier(input)) + .parse(new_input("+.")) + .unwrap(), KdlIdentifier { value: "+.".into(), repr: Some("+.".into()), @@ -1623,32 +2073,6 @@ mod string_tests { } } -/// ```text -/// keyword := '#true' | '#false' | '#null' -/// keyword-number := '#inf' | '#-inf' | '#nan' -/// ```` -fn keyword(input: &mut Input<'_>) -> PResult { - // let _ = "#".parse_next(input)?; - // not(one_of(['#', '"'])).parse_next(input)?; - alt(( - "true".value(KdlValue::Bool(true)), - "false".value(KdlValue::Bool(false)), - "null".value(KdlValue::Null), - "nan".value(KdlValue::Float(f64::NAN)), - "inf".value(KdlValue::Float(f64::INFINITY)), - "-inf".value(KdlValue::Float(f64::NEG_INFINITY)), - )) - .context(cx().lbl("keyword").hlp( - "Available keywords in KDL are '#true', '#false', '#null', '#nan', '#inf', and '#-inf'; they are case-sensitive.", - )) - .parse_next(input) -} - -/// `bom := '\u{FEFF}'` -fn bom(input: &mut Input<'_>) -> PResult<()> { - "\u{FEFF}".void().parse_next(input) -} - pub(crate) fn is_disallowed_unicode(c: char) -> bool { matches!(c, '\u{0000}'..='\u{0008}' @@ -1660,34 +2084,6 @@ pub(crate) fn is_disallowed_unicode(c: char) -> bool { ) } -/// `disallowed-literal-code-points := See Table (Disallowed Literal Code -/// Points)` -/// ```markdown -/// * The codepoints `U+0000-0008` or the codepoints `U+000E-001F` (various -/// control characters). -/// * `U+007F` (the Delete control character). -/// * Any codepoint that is not a [Unicode Scalar -/// Value](https://unicode.org/glossary/#unicode_scalar_value) (`U+D800-DFFF`). -/// * `U+200E-200F`, `U+202A-202E`, and `U+2066-2069`, the [unicode -/// "direction control" -/// characters](https://www.w3.org/International/questions/qa-bidi-unicode-controls) -/// * `U+FEFF`, aka Zero-width Non-breaking Space (ZWNBSP)/Byte Order Mark (BOM), -/// except as the first code point in a document. -/// ``` -fn disallowed_unicode(input: &mut Input<'_>) -> PResult<()> { - take_while(1.., is_disallowed_unicode) - .void() - .parse_next(input) -} - -/// `escline := '\\' ws* (single-line-comment | newline | eof)` -fn escline(input: &mut Input<'_>) -> PResult<()> { - "\\".parse_next(input)?; - wss.parse_next(input)?; - alt((single_line_comment, newline, eof.void())).parse_next(input)?; - wss.parse_next(input) -} - #[cfg(test)] #[test] fn escline_test() { @@ -1710,101 +2106,52 @@ pub(crate) static NEWLINES: [&str; 8] = [ "\u{2029}", ]; -/// `newline := ` -fn newline(input: &mut Input<'_>) -> PResult<()> { - alt(NEWLINES) - .void() - .context(cx().lbl("newline")) - .parse_next(input) -} - -fn wss(input: &mut Input<'_>) -> PResult<()> { - repeat(0.., ws).parse_next(input) -} - -fn wsp(input: &mut Input<'_>) -> PResult<()> { - repeat(1.., ws).parse_next(input) -} - -/// `ws := unicode-space | multi-line-comment`` -fn ws(input: &mut Input<'_>) -> PResult<()> { - alt((unicode_space, multi_line_comment)).parse_next(input) -} - static UNICODE_SPACES: [char; 18] = [ '\u{0009}', '\u{0020}', '\u{00A0}', '\u{1680}', '\u{2000}', '\u{2001}', '\u{2002}', '\u{2003}', '\u{2004}', '\u{2005}', '\u{2006}', '\u{2007}', '\u{2008}', '\u{2009}', '\u{200A}', '\u{202F}', '\u{205F}', '\u{3000}', ]; -/// `unicode-space := ` -fn unicode_space(input: &mut Input<'_>) -> PResult<()> { - one_of(UNICODE_SPACES).void().parse_next(input) -} - -/// `single-line-comment := '//' ^newline* (newline | eof)` -fn single_line_comment(input: &mut Input<'_>) -> PResult<()> { - "//".parse_next(input)?; - repeat_till( - 0.., - (not(alt((newline, eof.void()))), any), - alt((newline, eof.void())), - ) - .map(|(_, _): ((), _)| ()) - .parse_next(input) -} - -/// `multi-line-comment := '/*' commented-block` -fn multi_line_comment(input: &mut Input<'_>) -> PResult<()> { - "/*".parse_next(input)?; - cut_err(commented_block) - .context(cx().lbl("closing of multi-line comment")) - .parse_next(input) -} - -/// `commented-block := '*/' | (multi-line-comment | '*' | '/' | [^*/]+) commented-block` -fn commented_block(input: &mut Input<'_>) -> PResult<()> { - alt(( - "*/".void(), - preceded( - alt(( - multi_line_comment, - "*".void(), - "/".void(), - repeat(1.., none_of(['*', '/'])).map(|()| ()), - )), - commented_block, - ), - )) - .parse_next(input) -} - #[cfg(test)] #[test] fn multi_line_comment_test() { - assert!(multi_line_comment.parse(new_input("/* foo */")).is_ok()); - assert!(multi_line_comment.parse(new_input("/**/")).is_ok()); - assert!(multi_line_comment.parse(new_input("/*\nfoo\n*/")).is_ok()); - assert!(multi_line_comment.parse(new_input("/*\nfoo*/")).is_ok()); - assert!(multi_line_comment.parse(new_input("/*foo\n*/")).is_ok()); - assert!(multi_line_comment.parse(new_input("/* foo\n*/")).is_ok()); assert!( - multi_line_comment + KdlParser::multi_line_comment + .parse(new_input("/* foo */")) + .is_ok() + ); + assert!( + KdlParser::multi_line_comment + .parse(new_input("/**/")) + .is_ok() + ); + assert!( + KdlParser::multi_line_comment + .parse(new_input("/*\nfoo\n*/")) + .is_ok() + ); + assert!( + KdlParser::multi_line_comment + .parse(new_input("/*\nfoo*/")) + .is_ok() + ); + assert!( + KdlParser::multi_line_comment + .parse(new_input("/*foo\n*/")) + .is_ok() + ); + assert!( + KdlParser::multi_line_comment + .parse(new_input("/* foo\n*/")) + .is_ok() + ); + assert!( + KdlParser::multi_line_comment .parse(new_input("/* /*bar*/ foo\n*/")) .is_ok() ); } -/// slashdash := '/-' (node-space | line-space)* -fn slashdash(input: &mut Input<'_>) -> PResult<()> { - ( - "/-", - repeat(0.., alt((node_space, line_space))).map(|()| ()), - ) - .void() - .parse_next(input) -} - #[cfg(test)] #[test] fn slashdash_tests() { @@ -1812,6 +2159,7 @@ fn slashdash_tests() { let mut document = |input: &mut Input<'_>| KdlParser::new(KdlVersion::V2).document(input); let mut node = |input: &mut Input<'_>| parser_v2.node(input); + let mut node_entry = |input: &mut Input<'_>| parser_v2.node_entry(input); assert!(document.parse(new_input("/- foo bar")).is_ok()); assert!(document.parse(new_input("/- foo bar;")).is_ok()); @@ -1834,79 +2182,31 @@ fn slashdash_tests() { ); } -/// `number := keyword-number | hex | octal | binary | decimal` -fn number(input: &mut Input<'_>) -> PResult { - alt((float_value, integer_value)).parse_next(input) -} - -/// ```text -/// decimal := sign? integer ('.' integer)? exponent? -/// exponent := ('e' | 'E') sign? integer -/// ``` -fn float_value(input: &mut Input<'_>) -> PResult { - float.map(KdlValue::Float).parse_next(input) -} - -fn float(input: &mut Input<'_>) -> PResult { - ( - alt(( - ( - decimal::, - opt(preceded( - '.', - cut_err( - udecimal::.context( - cx().msg("Non-digit character found after the '.' of a float"), - ), - ), - )), - Caseless("e"), - opt(one_of(['-', '+'])), - cut_err(udecimal::.context( - cx().msg("Non-digit character found in the exponent part of a float").hlp("Floats with exponent parts should look like '2.0e123', or '43.3E-4'."), - )), - ) - .take(), - ( - decimal::, - '.', - cut_err( - udecimal:: - .context(cx().msg("Non-digit character found after the '.' of a float")), - ), - ) - .take(), - )), - value_terminator_check, - ) - .try_map(|(float_str, _)| T::parse_float(&str::replace(float_str, "_", ""))) - .context(cx().lbl("float")) - .parse_next(input) -} - #[cfg(test)] #[test] fn float_test() { use winnow::token::take; + let parser_v2 = KdlParser::new(KdlVersion::V2); + assert_eq!( - float_value.parse(new_input("12_34.56")).unwrap(), + KdlParser::float_value.parse(new_input("12_34.56")).unwrap(), KdlValue::Float(1234.56) ); assert_eq!( - float_value.parse(new_input("1234_.56")).unwrap(), + KdlParser::float_value.parse(new_input("1234_.56")).unwrap(), KdlValue::Float(1234.56) ); assert_eq!( - (float_value, take(1usize)) + (KdlParser::float_value, take(1usize)) .parse(new_input("1234.56 ")) .unwrap(), (KdlValue::Float(1234.56), " ") ); - assert!(float_value.parse(new_input("_1234.56")).is_err()); - assert!(float_value.parse(new_input("1234a.56")).is_err()); + assert!(KdlParser::float_value.parse(new_input("_1234.56")).is_err()); + assert!(KdlParser::float_value.parse(new_input("1234a.56")).is_err()); assert_eq!( - value + KdlParser::value .parse(new_input("2.5")) .unwrap() .map(|x| x.value().clone()), @@ -1914,175 +2214,90 @@ fn float_test() { ); } -fn integer_value(input: &mut Input<'_>) -> PResult { - alt(( - (hex, value_terminator_check).context(cx().lbl("hexadecimal number")), - (octal, value_terminator_check).context(cx().lbl("octal number")), - (binary, value_terminator_check).context(cx().lbl("binary number")), - (decimal, value_terminator_check).context(cx().lbl("integer")), - )) - .map(|(val, _)| KdlValue::Integer(val)) - .parse_next(input) -} - -/// Non-float decimal -fn decimal(input: &mut Input<'_>) -> PResult { - let positive = signum.parse_next(input)?; - udecimal:: - .try_map(|x| { - if positive { - Ok(x) - } else { - x.negated().ok_or(NegativeUnsignedError) - } - }) - .parse_next(input) -} - #[cfg(test)] #[test] fn decimal_test() { - assert_eq!(decimal::.parse(new_input("12_34")).unwrap(), 1234); - assert_eq!(decimal::.parse(new_input("1234_")).unwrap(), 1234); - assert!(decimal::.parse(new_input("_1234")).is_err()); - assert!(decimal::.parse(new_input("1234a")).is_err()); -} - -/// `integer := digit (digit | '_')*` -fn udecimal(input: &mut Input<'_>) -> PResult { - ( - digit1, - repeat( - 0.., - alt(("_", take_while(1.., AsChar::is_dec_digit).take())), - ), - ) - .try_map(|(l, r): (&str, Vec<&str>)| { - T::from_str_radix(&format!("{l}{}", str::replace(&r.join(""), "_", "")), 10) - }) - .parse_next(input) -} - -/// `hex := sign? '0x' hex-digit (hex-digit | '_')*` -fn hex(input: &mut Input<'_>) -> PResult { - let positive = signum.parse_next(input)?; - uhex:: - .try_map(|x| { - if positive { - Ok(x) - } else { - x.negated().ok_or(NegativeUnsignedError) - } - }) - .parse_next(input) -} - -fn uhex(input: &mut Input<'_>) -> PResult { - alt(("0x", "0X")).parse_next(input)?; - cut_err(( - hex_digit1, - repeat( - 0.., - alt(("_", take_while(1.., AsChar::is_hex_digit).take())), - ), - )) - .try_map(|(l, r): (&str, Vec<&str>)| { - T::from_str_radix(&format!("{l}{}", str::replace(&r.join(""), "_", "")), 16) - }) - .context(cx().lbl("hexadecimal")) - .parse_next(input) + assert_eq!( + KdlParser::decimal:: + .parse(new_input("12_34")) + .unwrap(), + 1234 + ); + assert_eq!( + KdlParser::decimal:: + .parse(new_input("1234_")) + .unwrap(), + 1234 + ); + assert!( + KdlParser::decimal:: + .parse(new_input("_1234")) + .is_err() + ); + assert!( + KdlParser::decimal:: + .parse(new_input("1234a")) + .is_err() + ); } #[cfg(test)] #[test] fn test_hex() { assert_eq!( - hex::.parse(new_input("0xdead_beef123")).unwrap(), + KdlParser::hex:: + .parse(new_input("0xdead_beef123")) + .unwrap(), 0xdeadbeef123 ); assert_eq!( - hex::.parse(new_input("0xDeAd_BeEf123")).unwrap(), + KdlParser::hex:: + .parse(new_input("0xDeAd_BeEf123")) + .unwrap(), 0xdeadbeef123 ); assert_eq!( - hex::.parse(new_input("0xdeadbeef123_")).unwrap(), + KdlParser::hex:: + .parse(new_input("0xdeadbeef123_")) + .unwrap(), 0xdeadbeef123 ); assert!( - hex:: + KdlParser::hex:: .parse(new_input("0xABCDEF0123456789abcdef0123456789")) .is_err(), "i128 overflow" ); - assert!(hex::.parse(new_input("0x_deadbeef123")).is_err()); + assert!( + KdlParser::hex:: + .parse(new_input("0x_deadbeef123")) + .is_err() + ); - assert!(hex::.parse(new_input("0xbeefg1")).is_err()); -} - -/// `octal := sign? '0o' [0-7] [0-7_]*` -fn octal(input: &mut Input<'_>) -> PResult { - let positive = signum.parse_next(input)?; - uoctal:: - .try_map(|x| { - if positive { - Ok(x) - } else { - x.negated().ok_or(NegativeUnsignedError) - } - }) - .parse_next(input) -} - -fn uoctal(input: &mut Input<'_>) -> PResult { - alt(("0o", "0O")).parse_next(input)?; - cut_err(( - oct_digit1, - repeat( - 0.., - alt(("_", take_while(1.., AsChar::is_oct_digit).take())), - ), - )) - .try_map(|(l, r): (&str, Vec<&str>)| { - T::from_str_radix(&format!("{l}{}", str::replace(&r.join(""), "_", "")), 8) - }) - .context(cx().lbl("octal")) - .parse_next(input) + assert!(KdlParser::hex::.parse(new_input("0xbeefg1")).is_err()); } #[cfg(test)] #[test] fn test_octal() { - assert_eq!(octal::.parse(new_input("0o12_34")).unwrap(), 0o1234); - assert_eq!(octal::.parse(new_input("0o1234_")).unwrap(), 0o1234); - assert!(octal::.parse(new_input("0o_12_34")).is_err()); - assert!(octal::.parse(new_input("0o89")).is_err()); -} - -/// `binary := sign? '0b' ('0' | '1') ('0' | '1' | '_')*` -fn binary(input: &mut Input<'_>) -> PResult { - let positive = signum.parse_next(input)?; - ubinary:: - .try_map(|x| { - if positive { - Ok(x) - } else { - x.negated().ok_or(NegativeUnsignedError) - } - }) - .parse_next(input) -} - -fn ubinary(input: &mut Input<'_>) -> PResult { - alt(("0b", "0B")).parse_next(input)?; - cut_err( - (alt(("0", "1")), repeat(0.., alt(("0", "1", "_")))).try_map( - move |(x, xs): (&str, Vec<&str>)| { - T::from_str_radix(&format!("{x}{}", str::replace(&xs.join(""), "_", "")), 2) - }, - ), - ) - .context(cx().lbl("binary")) - .parse_next(input) + assert_eq!( + KdlParser::octal:: + .parse(new_input("0o12_34")) + .unwrap(), + 0o1234 + ); + assert_eq!( + KdlParser::octal:: + .parse(new_input("0o1234_")) + .unwrap(), + 0o1234 + ); + assert!( + KdlParser::octal:: + .parse(new_input("0o_12_34")) + .is_err() + ); + assert!(KdlParser::octal::.parse(new_input("0o89")).is_err()); } #[cfg(test)] @@ -2090,26 +2305,30 @@ fn ubinary(input: &mut Input<'_>) -> PResult { fn test_binary() { use winnow::token::take; - assert_eq!(binary::.parse(new_input("0b10_01")).unwrap(), 0b1001); - assert_eq!(binary::.parse(new_input("0b1001_")).unwrap(), 0b1001); - assert!(binary::.parse(new_input("0b_10_01")).is_err()); assert_eq!( - (binary::, take(4usize)) + KdlParser::binary:: + .parse(new_input("0b10_01")) + .unwrap(), + 0b1001 + ); + assert_eq!( + KdlParser::binary:: + .parse(new_input("0b1001_")) + .unwrap(), + 0b1001 + ); + assert!( + KdlParser::binary:: + .parse(new_input("0b_10_01")) + .is_err() + ); + assert_eq!( + (KdlParser::binary::, take(4usize)) .parse(new_input("0b12389")) .unwrap(), (1, "2389") ); - assert!(binary::.parse(new_input("123")).is_err()); -} - -fn signum(input: &mut Input<'_>) -> PResult { - let sign = opt(alt(('+', '-'))).parse_next(input)?; - let mult = if let Some(sign) = sign { - sign == '+' - } else { - true - }; - Ok(mult) + assert!(KdlParser::binary::.parse(new_input("123")).is_err()); } trait FromStrRadix {