diff --git a/Cargo.lock b/Cargo.lock index e3c4944..cc00f67 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -368,6 +368,15 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + [[package]] name = "is_ci" version = "1.2.0" @@ -395,6 +404,7 @@ dependencies = [ name = "kdl" version = "6.5.0" dependencies = [ + "indoc", "kdl 4.7.1", "miette 7.6.0", "num", @@ -829,6 +839,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "ryu" version = "1.0.20" diff --git a/Cargo.toml b/Cargo.toml index 8b6d0c8..b785cc0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ miette = { version = "7.6.0", default-features = false } miette = { workspace = true, features = ["derive", "fancy"] } thiserror = "2.0.12" pretty_assertions = "1.3.0" +indoc = "2" # The profile that 'dist' will build with [profile.dist] diff --git a/src/document.rs b/src/document.rs index 6d313a1..931c00e 100644 --- a/src/document.rs +++ b/src/document.rs @@ -897,6 +897,225 @@ foo 1 bar=0xdeadbeef { Ok(()) } + /// Parses `input`, runs autoformat with `preserve_multiline_strings(true)` + /// and the given indent, and returns the formatted string. + fn autoformat_preserve(input: &str, indent: &str) -> miette::Result { + let mut doc: KdlDocument = input.parse()?; + KdlDocument::autoformat_config( + &mut doc, + &FormatConfig::builder() + .indent(indent) + .preserve_multiline_strings(true) + .build(), + ); + Ok(doc.to_string()) + } + + #[test] + fn autoformat_preserve_multiline_default_escapes() -> miette::Result<()> { + let input = indoc::indoc! {r##" + node """ + hey + world + """ + "##}; + let mut doc: KdlDocument = input.parse()?; + KdlDocument::autoformat(&mut doc); + pretty_assertions::assert_eq!(doc.to_string(), "node \"hey\\nworld\"\n"); + Ok(()) + } + + #[test] + fn autoformat_preserve_multiline_top_level() -> miette::Result<()> { + let input = indoc::indoc! {r##" + node """ + hey + world + """ + "##}; + let expected = indoc::indoc! {r##" + node """ + hey + world + """ + "##}; + pretty_assertions::assert_eq!(autoformat_preserve(input, " ")?, expected); + Ok(()) + } + + #[test] + fn autoformat_preserve_multiline_nested() -> miette::Result<()> { + let input = indoc::indoc! {r##" + parent { + child """ + line1 + line2 + """ + } + "##}; + let expected = indoc::indoc! {r##" + parent { + child """ + line1 + line2 + """ + } + "##}; + pretty_assertions::assert_eq!(autoformat_preserve(input, " ")?, expected); + Ok(()) + } + + #[test] + fn autoformat_preserve_multiline_custom_indent() -> miette::Result<()> { + let input = indoc::indoc! {r##" + node """ + a + b + """ + "##}; + let expected = indoc::indoc! {r##" + node """ + a + b + """ + "##}; + pretty_assertions::assert_eq!(autoformat_preserve(input, " ")?, expected); + Ok(()) + } + + #[test] + fn autoformat_preserve_multiline_empty_lines() -> miette::Result<()> { + // Empty lines inside the body should have no trailing whitespace, + // which the KDL spec permits even when other lines are indented. + let input = indoc::indoc! {r##" + node """ + a + + b + """ + "##}; + let expected = indoc::indoc! {r##" + node """ + a + + b + """ + "##}; + let formatted = autoformat_preserve(input, " ")?; + pretty_assertions::assert_eq!(formatted, expected); + // And it must round-trip back to the same value. + let reparsed: KdlDocument = formatted.parse()?; + assert_eq!( + reparsed.nodes()[0].entries()[0].value().as_string(), + Some("a\n\nb") + ); + Ok(()) + } + + #[test] + fn autoformat_preserve_multiline_single_line_source_unchanged() -> miette::Result<()> { + // A single-line escaped source string should NOT be promoted to + // multi-line even if its value contains newlines — preservation only + // applies to values that were multi-line in the source. + pretty_assertions::assert_eq!( + autoformat_preserve(r#"node "a\nb""#, " ")?, + "node \"a\\nb\"\n" + ); + Ok(()) + } + + #[test] + fn autoformat_preserve_multiline_raw() -> miette::Result<()> { + let input = indoc::indoc! {r###" + node #""" + hey + world + """# + "###}; + let expected = indoc::indoc! {r###" + node #""" + hey + world + """# + "###}; + let formatted = autoformat_preserve(input, " ")?; + pretty_assertions::assert_eq!(formatted, expected); + let reparsed: KdlDocument = formatted.parse()?; + assert_eq!( + reparsed.nodes()[0].entries()[0].value().as_string(), + Some("hey\nworld") + ); + Ok(()) + } + + #[test] + fn autoformat_preserve_multiline_raw_bumps_hashes_for_collision() -> miette::Result<()> { + // Source is `##"""..."""##` (two hashes). Body contains `"""#` which + // is safe at two hashes. After autoformat the formatter must pick a + // hash count that still avoids the collision. + let input = indoc::indoc! {r####" + node ##""" + a + """# b + """## + "####}; + let formatted = autoformat_preserve(input, " ")?; + assert!(formatted.contains("##\"\"\""), "got: {formatted}"); + let reparsed: KdlDocument = formatted.parse()?; + assert_eq!( + reparsed.nodes()[0].entries()[0].value().as_string(), + Some("a\n\"\"\"# b") + ); + Ok(()) + } + + #[test] + fn autoformat_preserve_multiline_property_entry() -> miette::Result<()> { + let input = indoc::indoc! {r##" + node key=""" + one + two + """ + "##}; + let expected = indoc::indoc! {r##" + node key=""" + one + two + """ + "##}; + pretty_assertions::assert_eq!(autoformat_preserve(input, " ")?, expected); + Ok(()) + } + + #[test] + fn autoformat_preserve_multiline_reparse_roundtrip() -> miette::Result<()> { + let input = indoc::indoc! {r##" + node """ + hey + world + """ + "##}; + let formatted = autoformat_preserve(input, " ")?; + let reparsed: KdlDocument = formatted.parse()?; + assert_eq!( + reparsed.nodes()[0].entries()[0].value().as_string(), + Some("hey\nworld") + ); + Ok(()) + } + + #[test] + fn autoformat_preserve_multiline_falls_back_on_triple_quote() -> miette::Result<()> { + // A string that already contains `"""` can't be emitted as a + // triple-quoted multi-line string, so it should fall back to the + // escaped single-line form. + pretty_assertions::assert_eq!( + autoformat_preserve(r#"node "a\n\"\"\"b""#, " ")?, + "node \"a\\n\\\"\\\"\\\"b\"\n" + ); + Ok(()) + } + #[cfg(feature = "span")] fn check_spans_for_doc(doc: &KdlDocument, source: &impl miette::SourceCode) { for node in doc.nodes() { diff --git a/src/entry.rs b/src/entry.rs index 94f87b8..4d9765e 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -2,7 +2,7 @@ use miette::SourceSpan; use std::{fmt::Display, str::FromStr}; -use crate::{v2_parser, KdlError, KdlIdentifier, KdlValue}; +use crate::{fmt::FormatConfig, v2_parser, KdlError, KdlIdentifier, KdlValue}; /// KDL Entries are the "arguments" to KDL nodes: either a (positional) /// [`Argument`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#argument) or @@ -171,15 +171,35 @@ impl KdlEntry { /// Auto-formats this entry. pub fn autoformat(&mut self) { + self.autoformat_config(&FormatConfig::default()); + } + + /// Auto-formats this entry according to `config`. + pub fn autoformat_config(&mut self, config: &FormatConfig<'_>) { // TODO once MSRV allows (1.80.0): //self.format.take_if(|f| !f.autoformat_keep); - if !self + let keep = self .format .as_ref() .map(|f| f.autoformat_keep) - .unwrap_or(false) - { + .unwrap_or(false); + if !keep { + let source_kind = self + .format + .as_ref() + .and_then(|f| multiline_source_kind(&f.value_repr)); self.format = None; + if config.preserve_multiline_strings { + if let Some(kind) = source_kind { + if let Some(repr) = multiline_string_repr(&self.value, config, kind) { + self.format = Some(KdlEntryFormat { + value_repr: repr, + leading: " ".into(), + ..Default::default() + }); + } + } + } } else { #[cfg(feature = "v1")] self.ensure_v2(); @@ -474,6 +494,134 @@ impl FromStr for KdlEntry { } } +/// Which kind of multi-line string was used in the original source. +#[derive(Debug, Clone, Copy)] +enum MultilineKind { + /// A plain triple-quoted multi-line string: `"""..."""`. + Plain, + /// A raw multi-line string with the given number of `#` delimiters: + /// `#"""..."""#`, `##"""..."""##`, etc. + Raw(usize), +} + +/// Inspects an entry's `value_repr` to determine whether the source was a +/// multi-line string, and if so, which flavor. Returns `None` for values that +/// weren't multi-line strings in the source (including values with no format +/// at all, e.g. programmatically constructed entries). +fn multiline_source_kind(repr: &str) -> Option { + let trimmed = repr.trim_start(); + if trimmed.starts_with("\"\"\"") { + return Some(MultilineKind::Plain); + } + let hashes = trimmed.chars().take_while(|&c| c == '#').count(); + if hashes > 0 && trimmed[hashes..].starts_with("\"\"\"") { + return Some(MultilineKind::Raw(hashes)); + } + None +} + +/// Scans the body for the longest run of `#` that immediately follows a `"""`, +/// so we can pick a hash count for a raw multi-line string that won't collide +/// with the closing delimiter. +fn max_hash_run_after_triple_quote(s: &str) -> usize { + let bytes = s.as_bytes(); + let mut max_run = 0usize; + let mut i = 0; + while i + 3 <= bytes.len() { + if &bytes[i..i + 3] == b"\"\"\"" { + let mut n = 0; + let mut j = i + 3; + while j < bytes.len() && bytes[j] == b'#' { + n += 1; + j += 1; + } + if n > max_run { + max_run = n; + } + i = j; + } else { + i += 1; + } + } + max_run +} + +/// Builds a `value_repr` for `value` in the requested multi-line style, +/// re-indented one level deeper than `config.indent_level`. Returns `None` for +/// values that are not strings, don't contain newlines, or can't safely be +/// emitted in the requested kind (e.g. raw multi-line can't represent values +/// containing control characters other than tab/newline). +fn multiline_string_repr( + value: &KdlValue, + config: &FormatConfig<'_>, + kind: MultilineKind, +) -> Option { + let s = value.as_string()?; + if !s.contains('\n') { + return None; + } + let mut prefix = String::new(); + for _ in 0..config.indent_level + 1 { + prefix.push_str(config.indent); + } + match kind { + MultilineKind::Plain => { + // Can't safely embed `"""` inside a plain triple-quoted string. + if s.contains("\"\"\"") { + return None; + } + let mut out = String::from("\"\"\"\n"); + for line in s.split('\n') { + if !line.is_empty() { + out.push_str(&prefix); + for ch in line.chars() { + match ch { + '\\' => out.push_str("\\\\"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + '\u{08}' => out.push_str("\\b"), + '\u{0C}' => out.push_str("\\f"), + _ => out.push(ch), + } + } + } + out.push('\n'); + } + out.push_str(&prefix); + out.push_str("\"\"\""); + Some(out) + } + MultilineKind::Raw(min_hashes) => { + // Raw strings can't escape anything, so unrepresentable control + // chars force a fallback. + for ch in s.chars() { + match ch { + '\n' | '\t' => {} + c if (c as u32) < 0x20 => return None, + '\u{7F}' => return None, + _ => {} + } + } + let needed = (max_hash_run_after_triple_quote(s) + 1).max(min_hashes.max(1)); + let hash_str = "#".repeat(needed); + let mut out = String::with_capacity(s.len() + 2 * needed + 8); + out.push_str(&hash_str); + out.push_str("\"\"\"\n"); + for line in s.split('\n') { + if !line.is_empty() { + out.push_str(&prefix); + out.push_str(line); + } + out.push('\n'); + } + out.push_str(&prefix); + out.push_str("\"\"\""); + out.push_str(&hash_str); + Some(out) + } + } +} + /// Formatting details for [`KdlEntry`]s. #[derive(Debug, Default, Clone, Eq, PartialEq, Hash)] pub struct KdlEntryFormat { diff --git a/src/fmt.rs b/src/fmt.rs index 238fb75..0cc22b7 100644 --- a/src/fmt.rs +++ b/src/fmt.rs @@ -18,6 +18,12 @@ pub struct FormatConfig<'a> { /// Whether to keep individual entry formatting. pub entry_autoformate_keep: bool, + + /// Whether to preserve string values containing newlines as triple-quoted + /// multi-line strings, with content re-indented to match the current + /// indent level. When `false` (the default), such strings are emitted as + /// single-line escaped strings (e.g. `"foo\nbar"`). + pub preserve_multiline_strings: bool, } /// See field documentation for defaults. @@ -48,6 +54,7 @@ impl<'a> FormatConfigBuilder<'a> { indent: " ", no_comments: false, entry_autoformate_keep: false, + preserve_multiline_strings: false, }) } @@ -105,6 +112,27 @@ impl<'a> FormatConfigBuilder<'a> { self } + /// Whether to preserve string values containing newlines as triple-quoted + /// multi-line strings (re-indented to match the current indent level). + /// Defaults to `false` iff not specified. + pub const fn maybe_preserve_multiline_strings( + mut self, + preserve_multiline_strings: Option, + ) -> Self { + if let Some(preserve_multiline_strings) = preserve_multiline_strings { + self.0.preserve_multiline_strings = preserve_multiline_strings; + } + self + } + + /// Whether to preserve string values containing newlines as triple-quoted + /// multi-line strings (re-indented to match the current indent level). + /// Defaults to `false` iff not specified. + pub const fn preserve_multiline_strings(mut self, preserve_multiline_strings: bool) -> Self { + self.0.preserve_multiline_strings = preserve_multiline_strings; + self + } + /// Builds the [`FormatConfig`]. pub const fn build(self) -> FormatConfig<'a> { self.0 @@ -168,6 +196,7 @@ mod test { indent: " \t", no_comments: true, entry_autoformate_keep: false, + preserve_multiline_strings: false, } )); Ok(()) diff --git a/src/node.rs b/src/node.rs index af9914b..59ee2a1 100644 --- a/src/node.rs +++ b/src/node.rs @@ -312,7 +312,7 @@ impl KdlNode { if config.entry_autoformate_keep { entry.keep_format(); } - entry.autoformat(); + entry.autoformat_config(config); } if let Some(children) = self.children.as_mut() { children.autoformat_config(&FormatConfig {