From a08885b597f9b1b9f2e85e13919ee5908cb5f785 Mon Sep 17 00:00:00 2001 From: John Rinehart Date: Wed, 13 May 2026 14:19:51 -0400 Subject: [PATCH] Keep standalone trailing comments indented Autoformat can receive standalone comments in trailing decor after a completed node. Preserve that intent by trimming trailing decor carefully and formatting document/node trailing comments at the active indentation level, so comments align with sibling nodes instead of collapsing to a single leading space. This also restores the broader autoformat regression by handling slashdash trailing decor and stale line-escape markers after node terminators, then unignores the test with current KDL v2 syntax and canonical formatter output. Constraint: Fix formatter behavior in kdl-rs so downstream tools such as kdlfmt inherit the behavior Rejected: Adding downstream-specific fixtures | tests should describe generic KDL formatter behavior Rejected: Reworking parser attachment of comment decor fields | higher risk than needed for a targeted formatter fix Confidence: high Scope-risk: moderate Directive: Re-check document and node trailing decor if parser attachment semantics change Tested: nix shell nixpkgs#cargo nixpkgs#gcc -c cargo test --all-features Tested: nix shell nixpkgs#cargo nixpkgs#rustfmt -c cargo fmt --check --- src/document.rs | 195 +++++++++++++++++++++++++++++++++++++++++++++--- src/fmt.rs | 182 +++++++++++++++++++++++++++++++++++++++++++- src/node.rs | 76 ++++++++++++++++--- 3 files changed, 431 insertions(+), 22 deletions(-) diff --git a/src/document.rs b/src/document.rs index 6d313a1..d7ceb15 100644 --- a/src/document.rs +++ b/src/document.rs @@ -260,7 +260,7 @@ impl KdlDocument { node.autoformat_config(config); } if let Some(KdlDocumentFormat { trailing, .. }) = (*self).format_mut() { - crate::fmt::autoformat_trailing(trailing, config.no_comments); + crate::fmt::autoformat_indented_trailing(trailing, config); if !has_nodes { trailing.push('\n'); } @@ -758,7 +758,6 @@ baz ); } - #[ignore = "There's still issues around formatting comments and esclines."] #[test] fn autoformat() -> miette::Result<()> { let mut doc: KdlDocument = r##" @@ -770,10 +769,10 @@ baz child2 2 /-3 // comment - child3 " + child3 """ string\t - " \ + """ \ { /* @@ -800,22 +799,21 @@ baz assert_eq!( doc.to_string(), r#"/* x */ -foo 1 bar=0xdeadbeef { +foo 1 bar=3735928559 { child1 1 // child 2 comment child2 2 /-3 // comment child3 "\nstring\t" { /* - - - multiline*/ + multiline*/ inner1 value inner2 { inner3 } } } -// trailing comment here"# +// trailing comment here +"# ); Ok(()) } @@ -838,6 +836,185 @@ foo 1 bar=0xdeadbeef { Ok(()) } + #[test] + fn autoformat_handles_same_line_terminator_comment() -> miette::Result<()> { + let mut doc: KdlDocument = r#"node 1 // keep me +next"# + .parse()?; + KdlDocument::autoformat(&mut doc); + assert_eq!( + doc.to_string(), + r#"node 1 // keep me +next +"# + ); + + let mut doc: KdlDocument = r#"node /* c */ // keep me +next"# + .parse()?; + KdlDocument::autoformat(&mut doc); + assert_eq!( + doc.to_string(), + r#"node /* c */ // keep me +next +"# + ); + + let mut doc: KdlDocument = r#"node 1 // remove me +next"# + .parse()?; + KdlDocument::autoformat_no_comments(&mut doc); + assert_eq!( + doc.to_string(), + r#"node 1 +next +"# + ); + Ok(()) + } + + #[test] + fn autoformat_strips_line_escape_before_decorated_terminator() -> miette::Result<()> { + let mut doc: KdlDocument = r#"node \ + /-1 // c +next"# + .parse()?; + KdlDocument::autoformat(&mut doc); + assert_eq!( + doc.to_string(), + r#"node /-1 // c +next +"# + ); + + let mut doc: KdlDocument = r#"node \ + /* c */ // keep me +next"# + .parse()?; + KdlDocument::autoformat(&mut doc); + assert_eq!( + doc.to_string(), + r#"node /* c */ // keep me +next +"# + ); + Ok(()) + } + + #[test] + fn autoformat_strips_line_escape_after_inline_decor() -> miette::Result<()> { + let mut doc: KdlDocument = r#"node /* c */ \ + // keep me +next"# + .parse()?; + KdlDocument::autoformat(&mut doc); + assert_eq!( + doc.to_string(), + r#"node /* c */ // keep me +next +"# + ); + + let reparsed: KdlDocument = doc.to_string().parse()?; + assert_eq!(reparsed.nodes().len(), 2); + Ok(()) + } + + #[test] + fn autoformat_preserves_non_line_escape_backslash_decor() -> miette::Result<()> { + let mut doc: KdlDocument = r#"node \ // keep"#.parse()?; + KdlDocument::autoformat(&mut doc); + assert_eq!( + doc.to_string(), + r#"node \ // keep + +"# + ); + + let mut doc: KdlDocument = r#"node \ /* keep */"#.parse()?; + KdlDocument::autoformat(&mut doc); + assert_eq!( + doc.to_string(), + r#"node \ /* keep */ + +"# + ); + Ok(()) + } + + #[test] + fn autoformat_normalizes_inline_block_comment_spacing() -> miette::Result<()> { + let mut doc: KdlDocument = r#"node /* c */ +next"# + .parse()?; + KdlDocument::autoformat(&mut doc); + assert_eq!( + doc.to_string(), + r#"node /* c */ + +next +"# + ); + Ok(()) + } + + #[test] + fn autoformat_indents_trailing_comment() -> miette::Result<()> { + let mut doc: KdlDocument = r#"root { + child "value" + // comment +} +"# + .parse()?; + + KdlDocument::autoformat(&mut doc); + + assert_eq!( + doc.to_string(), + r#"root { + child value + // comment +} +"# + ); + Ok(()) + } + + #[cfg(feature = "v1")] + #[test] + fn autoformat_v1_preserves_same_line_trailing_comments() -> miette::Result<()> { + let mut doc = KdlDocument::parse_v1( + r#"node 1 // comment +next"#, + )?; + + KdlDocument::autoformat(&mut doc); + + assert_eq!( + doc.to_string(), + r#"node 1 // comment +next +"# + ); + + let mut doc = KdlDocument::parse_v1( + r#"root { + child 1 // comment +}"#, + )?; + + KdlDocument::autoformat(&mut doc); + + assert_eq!( + doc.to_string(), + r#"root { + child 1 // comment +} +"# + ); + Ok(()) + } + #[test] fn simple_autoformat_two_spaces() -> miette::Result<()> { let mut doc: KdlDocument = "a { b { c { }; }; }".parse().unwrap(); diff --git a/src/fmt.rs b/src/fmt.rs index 238fb75..8fdab25 100644 --- a/src/fmt.rs +++ b/src/fmt.rs @@ -137,10 +137,19 @@ pub(crate) fn autoformat_trailing(decor: &mut String, no_comments: bool) { if decor.is_empty() { return; } - *decor = decor.trim().to_string(); + let starts_on_own_line = decor.starts_with(['\n', '\r']); + // Preserve leading whitespace/newlines only for comments that are intended + // to remain on their own lines. Same-line decor should still normalize to a + // single separator before the comment. + *decor = if starts_on_own_line { + decor.trim_end() + } else { + decor.trim() + } + .to_string(); let mut result = String::new(); if !decor.is_empty() && !no_comments { - if decor.trim_start() == &decor[..] { + if !starts_on_own_line && decor.trim_start() == &decor[..] { write!(result, " ").unwrap(); } for comment in decor.lines() { @@ -150,6 +159,175 @@ pub(crate) fn autoformat_trailing(decor: &mut String, no_comments: bool) { *decor = result; } +/// Removes parsed line-continuation backslashes before formatting node-boundary decor. +pub(crate) fn strip_leading_line_escape(decor: &mut String) { + while let Some((start, end)) = line_escape_range(decor) { + decor.replace_range(start..end, ""); + } +} + +fn line_escape_range(decor: &str) -> Option<(usize, usize)> { + let mut idx = 0; + while idx < decor.len() { + let rest = &decor[idx..]; + if rest.starts_with("/*") { + idx = skip_multiline_comment(decor, idx)?; + continue; + } + + let ch = rest.chars().next()?; + if ch == '\\' { + if let Some(end) = line_escape_end(decor, idx + ch.len_utf8()) { + return Some((idx, end)); + } + } + idx += ch.len_utf8(); + } + None +} + +fn line_escape_end(decor: &str, mut idx: usize) -> Option { + while idx < decor.len() { + let rest = &decor[idx..]; + if rest.starts_with("/*") { + idx = skip_multiline_comment(decor, idx)?; + continue; + } + + let ch = rest.chars().next()?; + if is_unicode_space(ch) { + idx += ch.len_utf8(); + continue; + } + + return newline_len(rest).map(|len| trim_start_idx(decor, idx + len)); + } + None +} + +fn skip_multiline_comment(decor: &str, start: usize) -> Option { + let mut depth = 1; + let mut idx = start + "/*".len(); + while idx < decor.len() { + let rest = &decor[idx..]; + if rest.starts_with("/*") { + depth += 1; + idx += "/*".len(); + } else if rest.starts_with("*/") { + depth -= 1; + idx += "*/".len(); + if depth == 0 { + return Some(idx); + } + } else { + idx += rest.chars().next()?.len_utf8(); + } + } + None +} + +fn newline_len(rest: &str) -> Option { + [ + "\r\n", "\r", "\n", "\u{0085}", "\u{000B}", "\u{000C}", "\u{2028}", "\u{2029}", + ] + .iter() + .find_map(|newline| rest.starts_with(newline).then_some(newline.len())) +} + +fn trim_start_idx(decor: &str, mut idx: usize) -> usize { + while idx < decor.len() { + let rest = &decor[idx..]; + let Some(ch) = rest.chars().next() else { + return idx; + }; + if !is_unicode_space(ch) { + return idx; + } + idx += ch.len_utf8(); + } + idx +} + +fn is_unicode_space(ch: char) -> bool { + matches!( + ch, + '\u{0009}' + | '\u{0020}' + | '\u{00A0}' + | '\u{1680}' + | '\u{2000}' + | '\u{2001}' + | '\u{2002}' + | '\u{2003}' + | '\u{2004}' + | '\u{2005}' + | '\u{2006}' + | '\u{2007}' + | '\u{2008}' + | '\u{2009}' + | '\u{200A}' + | '\u{202F}' + | '\u{205F}' + | '\u{3000}' + ) +} + +pub(crate) fn autoformat_indented_trailing(decor: &mut String, config: &FormatConfig<'_>) { + if decor.is_empty() { + return; + } + *decor = decor.trim().to_string(); + if decor == "\\" { + // A line escape only affects source layout. Once autoformatting has + // normalized node boundaries, keeping it would emit a stray backslash. + decor.clear(); + return; + } + *decor = format_indented_comment_lines(decor, config, ""); +} + +pub(crate) fn autoformat_node_terminator(terminator: &mut String, config: &FormatConfig<'_>) { + if !terminator.starts_with('\n') { + let decor = terminator.trim(); + // Same-line `//` comments are node terminators in v2; keep them on the + // node line unless the caller is explicitly stripping comments. + if !config.no_comments && decor.starts_with("//") { + *terminator = format!(" {decor}\n"); + return; + } + *terminator = "\n".into(); + return; + } + + let decor = terminator.trim(); + *terminator = format_indented_comment_lines(decor, config, "\n"); + if terminator.is_empty() { + terminator.push('\n'); + } +} + +fn format_indented_comment_lines(decor: &str, config: &FormatConfig<'_>, prefix: &str) -> String { + let mut result = String::from(prefix); + if decor.is_empty() || config.no_comments { + return result; + } + + for line in decor.lines() { + let trimmed = line.trim(); + if !trimmed.is_empty() { + push_indent(&mut result, config); + writeln!(result, "{trimmed}").unwrap(); + } + } + result +} + +fn push_indent(result: &mut String, config: &FormatConfig<'_>) { + for _ in 0..config.indent_level { + result.push_str(config.indent); + } +} + #[cfg(test)] mod test { use super::*; diff --git a/src/node.rs b/src/node.rs index af9914b..df042aa 100644 --- a/src/node.rs +++ b/src/node.rs @@ -285,17 +285,9 @@ impl KdlNode { }) = self.format_mut() { crate::fmt::autoformat_leading(leading, config); - crate::fmt::autoformat_trailing(before_terminator, config.no_comments); - crate::fmt::autoformat_trailing(trailing, config.no_comments); - *trailing = trailing.trim().into(); - if !terminator.starts_with('\n') { - *terminator = "\n".into(); - } - if let Some(c) = trailing.chars().next() { - if !c.is_whitespace() { - trailing.insert(0, ' '); - } - } + Self::autoformat_before_terminator(before_terminator, terminator, config); + Self::autoformat_after_terminator(before_terminator, terminator, trailing, config); + crate::fmt::autoformat_node_terminator(terminator, config); *before_children = " ".into(); } else { @@ -329,6 +321,52 @@ impl KdlNode { } } + fn autoformat_before_terminator( + before_terminator: &mut String, + terminator: &str, + config: &FormatConfig<'_>, + ) { + crate::fmt::strip_leading_line_escape(before_terminator); + let before_terminator_trimmed = before_terminator.trim(); + // Parser attachment can put slashdash decor, or inline decor before a + // same-line `//` terminator, in `before_terminator`. Both still belong + // on the formatted node line. + if !config.no_comments + && (is_single_line_slashdash(before_terminator_trimmed) + || (is_same_line_comment_terminator(terminator) + && is_single_line_decor(before_terminator_trimmed))) + { + *before_terminator = format!(" {before_terminator_trimmed}"); + return; + } + + crate::fmt::autoformat_trailing(before_terminator, config.no_comments); + } + + fn autoformat_after_terminator( + before_terminator: &mut String, + terminator: &mut String, + trailing: &mut String, + config: &FormatConfig<'_>, + ) { + let trailing_trimmed = trailing.trim(); + if !config.no_comments && is_single_line_slashdash(trailing_trimmed) { + // Slashdash decor can be attached on either side of the parsed + // terminator; it still formats as part of the node line. + *before_terminator = format!(" {trailing_trimmed}"); + trailing.clear(); + } else if !config.no_comments + && is_v1_same_line_trailing_comment(trailing, trailing_trimmed) + { + // KDL v1 conversion stores same-line `//` node comments in + // trailing decor; v2 stores them in the terminator. + *terminator = format!(" {trailing_trimmed}\n"); + trailing.clear(); + } else { + crate::fmt::autoformat_indented_trailing(trailing, config); + } + } + /// Parses a string into a node. /// /// If the `v1-fallback` feature is enabled, this method will first try to @@ -878,6 +916,22 @@ impl KdlNode { } } +fn is_single_line_decor(decor: &str) -> bool { + !decor.is_empty() && !decor.contains('\n') +} + +fn is_single_line_slashdash(decor: &str) -> bool { + decor.starts_with("/-") && is_single_line_decor(decor) +} + +fn is_same_line_comment_terminator(terminator: &str) -> bool { + !terminator.starts_with('\n') && terminator.trim().starts_with("//") +} + +fn is_v1_same_line_trailing_comment(trailing: &str, trimmed: &str) -> bool { + trimmed.starts_with("//") && !trailing.contains('\n') +} + /// Formatting details for [`KdlNode`]. #[derive(Debug, Clone, Default, Hash, Eq, PartialEq)] pub struct KdlNodeFormat {