feat(fmt): add preserve_multiline_strings option

Adds an opt-in FormatConfig flag that preserves multi-line string
values as triple-quoted strings (plain or raw) during autoformat,
re-indented to match the current indent level. When disabled
(default), multi-line strings continue to be emitted as escaped
single-line strings.

Raw-string hash counts are bumped when the body contains a
colliding "\"\"\"#..." run, and unrepresentable values (e.g. bodies
containing \"\"\" for plain, control chars for raw) fall back to
the escaped single-line form.
This commit is contained in:
Ian Macalinao 2026-04-19 14:48:10 +08:00
parent 6841734233
commit eceac9aaf6
No known key found for this signature in database
GPG Key ID: 23023A87F0BE00F3
6 changed files with 418 additions and 5 deletions

16
Cargo.lock generated
View File

@ -368,6 +368,15 @@ dependencies = [
"icu_properties", "icu_properties",
] ]
[[package]]
name = "indoc"
version = "2.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
dependencies = [
"rustversion",
]
[[package]] [[package]]
name = "is_ci" name = "is_ci"
version = "1.2.0" version = "1.2.0"
@ -395,6 +404,7 @@ dependencies = [
name = "kdl" name = "kdl"
version = "6.5.0" version = "6.5.0"
dependencies = [ dependencies = [
"indoc",
"kdl 4.7.1", "kdl 4.7.1",
"miette 7.6.0", "miette 7.6.0",
"num", "num",
@ -829,6 +839,12 @@ dependencies = [
"windows-sys 0.61.2", "windows-sys 0.61.2",
] ]
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.20" version = "1.0.20"

View File

@ -33,6 +33,7 @@ miette = { version = "7.6.0", default-features = false }
miette = { workspace = true, features = ["derive", "fancy"] } miette = { workspace = true, features = ["derive", "fancy"] }
thiserror = "2.0.12" thiserror = "2.0.12"
pretty_assertions = "1.3.0" pretty_assertions = "1.3.0"
indoc = "2"
# The profile that 'dist' will build with # The profile that 'dist' will build with
[profile.dist] [profile.dist]

View File

@ -897,6 +897,225 @@ foo 1 bar=0xdeadbeef {
Ok(()) Ok(())
} }
/// Parses `input`, runs autoformat with `preserve_multiline_strings(true)`
/// and the given indent, and returns the formatted string.
fn autoformat_preserve(input: &str, indent: &str) -> miette::Result<String> {
let mut doc: KdlDocument = input.parse()?;
KdlDocument::autoformat_config(
&mut doc,
&FormatConfig::builder()
.indent(indent)
.preserve_multiline_strings(true)
.build(),
);
Ok(doc.to_string())
}
#[test]
fn autoformat_preserve_multiline_default_escapes() -> miette::Result<()> {
let input = indoc::indoc! {r##"
node """
hey
world
"""
"##};
let mut doc: KdlDocument = input.parse()?;
KdlDocument::autoformat(&mut doc);
pretty_assertions::assert_eq!(doc.to_string(), "node \"hey\\nworld\"\n");
Ok(())
}
#[test]
fn autoformat_preserve_multiline_top_level() -> miette::Result<()> {
let input = indoc::indoc! {r##"
node """
hey
world
"""
"##};
let expected = indoc::indoc! {r##"
node """
hey
world
"""
"##};
pretty_assertions::assert_eq!(autoformat_preserve(input, " ")?, expected);
Ok(())
}
#[test]
fn autoformat_preserve_multiline_nested() -> miette::Result<()> {
let input = indoc::indoc! {r##"
parent {
child """
line1
line2
"""
}
"##};
let expected = indoc::indoc! {r##"
parent {
child """
line1
line2
"""
}
"##};
pretty_assertions::assert_eq!(autoformat_preserve(input, " ")?, expected);
Ok(())
}
#[test]
fn autoformat_preserve_multiline_custom_indent() -> miette::Result<()> {
let input = indoc::indoc! {r##"
node """
a
b
"""
"##};
let expected = indoc::indoc! {r##"
node """
a
b
"""
"##};
pretty_assertions::assert_eq!(autoformat_preserve(input, " ")?, expected);
Ok(())
}
#[test]
fn autoformat_preserve_multiline_empty_lines() -> miette::Result<()> {
// Empty lines inside the body should have no trailing whitespace,
// which the KDL spec permits even when other lines are indented.
let input = indoc::indoc! {r##"
node """
a
b
"""
"##};
let expected = indoc::indoc! {r##"
node """
a
b
"""
"##};
let formatted = autoformat_preserve(input, " ")?;
pretty_assertions::assert_eq!(formatted, expected);
// And it must round-trip back to the same value.
let reparsed: KdlDocument = formatted.parse()?;
assert_eq!(
reparsed.nodes()[0].entries()[0].value().as_string(),
Some("a\n\nb")
);
Ok(())
}
#[test]
fn autoformat_preserve_multiline_single_line_source_unchanged() -> miette::Result<()> {
// A single-line escaped source string should NOT be promoted to
// multi-line even if its value contains newlines — preservation only
// applies to values that were multi-line in the source.
pretty_assertions::assert_eq!(
autoformat_preserve(r#"node "a\nb""#, " ")?,
"node \"a\\nb\"\n"
);
Ok(())
}
#[test]
fn autoformat_preserve_multiline_raw() -> miette::Result<()> {
let input = indoc::indoc! {r###"
node #"""
hey
world
"""#
"###};
let expected = indoc::indoc! {r###"
node #"""
hey
world
"""#
"###};
let formatted = autoformat_preserve(input, " ")?;
pretty_assertions::assert_eq!(formatted, expected);
let reparsed: KdlDocument = formatted.parse()?;
assert_eq!(
reparsed.nodes()[0].entries()[0].value().as_string(),
Some("hey\nworld")
);
Ok(())
}
#[test]
fn autoformat_preserve_multiline_raw_bumps_hashes_for_collision() -> miette::Result<()> {
// Source is `##"""..."""##` (two hashes). Body contains `"""#` which
// is safe at two hashes. After autoformat the formatter must pick a
// hash count that still avoids the collision.
let input = indoc::indoc! {r####"
node ##"""
a
"""# b
"""##
"####};
let formatted = autoformat_preserve(input, " ")?;
assert!(formatted.contains("##\"\"\""), "got: {formatted}");
let reparsed: KdlDocument = formatted.parse()?;
assert_eq!(
reparsed.nodes()[0].entries()[0].value().as_string(),
Some("a\n\"\"\"# b")
);
Ok(())
}
#[test]
fn autoformat_preserve_multiline_property_entry() -> miette::Result<()> {
let input = indoc::indoc! {r##"
node key="""
one
two
"""
"##};
let expected = indoc::indoc! {r##"
node key="""
one
two
"""
"##};
pretty_assertions::assert_eq!(autoformat_preserve(input, " ")?, expected);
Ok(())
}
#[test]
fn autoformat_preserve_multiline_reparse_roundtrip() -> miette::Result<()> {
let input = indoc::indoc! {r##"
node """
hey
world
"""
"##};
let formatted = autoformat_preserve(input, " ")?;
let reparsed: KdlDocument = formatted.parse()?;
assert_eq!(
reparsed.nodes()[0].entries()[0].value().as_string(),
Some("hey\nworld")
);
Ok(())
}
#[test]
fn autoformat_preserve_multiline_falls_back_on_triple_quote() -> miette::Result<()> {
// A string that already contains `"""` can't be emitted as a
// triple-quoted multi-line string, so it should fall back to the
// escaped single-line form.
pretty_assertions::assert_eq!(
autoformat_preserve(r#"node "a\n\"\"\"b""#, " ")?,
"node \"a\\n\\\"\\\"\\\"b\"\n"
);
Ok(())
}
#[cfg(feature = "span")] #[cfg(feature = "span")]
fn check_spans_for_doc(doc: &KdlDocument, source: &impl miette::SourceCode) { fn check_spans_for_doc(doc: &KdlDocument, source: &impl miette::SourceCode) {
for node in doc.nodes() { for node in doc.nodes() {

View File

@ -2,7 +2,7 @@
use miette::SourceSpan; use miette::SourceSpan;
use std::{fmt::Display, str::FromStr}; use std::{fmt::Display, str::FromStr};
use crate::{v2_parser, KdlError, KdlIdentifier, KdlValue}; use crate::{fmt::FormatConfig, v2_parser, KdlError, KdlIdentifier, KdlValue};
/// KDL Entries are the "arguments" to KDL nodes: either a (positional) /// KDL Entries are the "arguments" to KDL nodes: either a (positional)
/// [`Argument`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#argument) or /// [`Argument`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#argument) or
@ -171,15 +171,35 @@ impl KdlEntry {
/// Auto-formats this entry. /// Auto-formats this entry.
pub fn autoformat(&mut self) { pub fn autoformat(&mut self) {
self.autoformat_config(&FormatConfig::default());
}
/// Auto-formats this entry according to `config`.
pub fn autoformat_config(&mut self, config: &FormatConfig<'_>) {
// TODO once MSRV allows (1.80.0): // TODO once MSRV allows (1.80.0):
//self.format.take_if(|f| !f.autoformat_keep); //self.format.take_if(|f| !f.autoformat_keep);
if !self let keep = self
.format .format
.as_ref() .as_ref()
.map(|f| f.autoformat_keep) .map(|f| f.autoformat_keep)
.unwrap_or(false) .unwrap_or(false);
{ if !keep {
let source_kind = self
.format
.as_ref()
.and_then(|f| multiline_source_kind(&f.value_repr));
self.format = None; self.format = None;
if config.preserve_multiline_strings {
if let Some(kind) = source_kind {
if let Some(repr) = multiline_string_repr(&self.value, config, kind) {
self.format = Some(KdlEntryFormat {
value_repr: repr,
leading: " ".into(),
..Default::default()
});
}
}
}
} else { } else {
#[cfg(feature = "v1")] #[cfg(feature = "v1")]
self.ensure_v2(); self.ensure_v2();
@ -474,6 +494,134 @@ impl FromStr for KdlEntry {
} }
} }
/// Which kind of multi-line string was used in the original source.
#[derive(Debug, Clone, Copy)]
enum MultilineKind {
/// A plain triple-quoted multi-line string: `"""..."""`.
Plain,
/// A raw multi-line string with the given number of `#` delimiters:
/// `#"""..."""#`, `##"""..."""##`, etc.
Raw(usize),
}
/// Inspects an entry's `value_repr` to determine whether the source was a
/// multi-line string, and if so, which flavor. Returns `None` for values that
/// weren't multi-line strings in the source (including values with no format
/// at all, e.g. programmatically constructed entries).
fn multiline_source_kind(repr: &str) -> Option<MultilineKind> {
let trimmed = repr.trim_start();
if trimmed.starts_with("\"\"\"") {
return Some(MultilineKind::Plain);
}
let hashes = trimmed.chars().take_while(|&c| c == '#').count();
if hashes > 0 && trimmed[hashes..].starts_with("\"\"\"") {
return Some(MultilineKind::Raw(hashes));
}
None
}
/// Scans the body for the longest run of `#` that immediately follows a `"""`,
/// so we can pick a hash count for a raw multi-line string that won't collide
/// with the closing delimiter.
fn max_hash_run_after_triple_quote(s: &str) -> usize {
let bytes = s.as_bytes();
let mut max_run = 0usize;
let mut i = 0;
while i + 3 <= bytes.len() {
if &bytes[i..i + 3] == b"\"\"\"" {
let mut n = 0;
let mut j = i + 3;
while j < bytes.len() && bytes[j] == b'#' {
n += 1;
j += 1;
}
if n > max_run {
max_run = n;
}
i = j;
} else {
i += 1;
}
}
max_run
}
/// Builds a `value_repr` for `value` in the requested multi-line style,
/// re-indented one level deeper than `config.indent_level`. Returns `None` for
/// values that are not strings, don't contain newlines, or can't safely be
/// emitted in the requested kind (e.g. raw multi-line can't represent values
/// containing control characters other than tab/newline).
fn multiline_string_repr(
value: &KdlValue,
config: &FormatConfig<'_>,
kind: MultilineKind,
) -> Option<String> {
let s = value.as_string()?;
if !s.contains('\n') {
return None;
}
let mut prefix = String::new();
for _ in 0..config.indent_level + 1 {
prefix.push_str(config.indent);
}
match kind {
MultilineKind::Plain => {
// Can't safely embed `"""` inside a plain triple-quoted string.
if s.contains("\"\"\"") {
return None;
}
let mut out = String::from("\"\"\"\n");
for line in s.split('\n') {
if !line.is_empty() {
out.push_str(&prefix);
for ch in line.chars() {
match ch {
'\\' => out.push_str("\\\\"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
'\u{08}' => out.push_str("\\b"),
'\u{0C}' => out.push_str("\\f"),
_ => out.push(ch),
}
}
}
out.push('\n');
}
out.push_str(&prefix);
out.push_str("\"\"\"");
Some(out)
}
MultilineKind::Raw(min_hashes) => {
// Raw strings can't escape anything, so unrepresentable control
// chars force a fallback.
for ch in s.chars() {
match ch {
'\n' | '\t' => {}
c if (c as u32) < 0x20 => return None,
'\u{7F}' => return None,
_ => {}
}
}
let needed = (max_hash_run_after_triple_quote(s) + 1).max(min_hashes.max(1));
let hash_str = "#".repeat(needed);
let mut out = String::with_capacity(s.len() + 2 * needed + 8);
out.push_str(&hash_str);
out.push_str("\"\"\"\n");
for line in s.split('\n') {
if !line.is_empty() {
out.push_str(&prefix);
out.push_str(line);
}
out.push('\n');
}
out.push_str(&prefix);
out.push_str("\"\"\"");
out.push_str(&hash_str);
Some(out)
}
}
}
/// Formatting details for [`KdlEntry`]s. /// Formatting details for [`KdlEntry`]s.
#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)] #[derive(Debug, Default, Clone, Eq, PartialEq, Hash)]
pub struct KdlEntryFormat { pub struct KdlEntryFormat {

View File

@ -18,6 +18,12 @@ pub struct FormatConfig<'a> {
/// Whether to keep individual entry formatting. /// Whether to keep individual entry formatting.
pub entry_autoformate_keep: bool, pub entry_autoformate_keep: bool,
/// Whether to preserve string values containing newlines as triple-quoted
/// multi-line strings, with content re-indented to match the current
/// indent level. When `false` (the default), such strings are emitted as
/// single-line escaped strings (e.g. `"foo\nbar"`).
pub preserve_multiline_strings: bool,
} }
/// See field documentation for defaults. /// See field documentation for defaults.
@ -48,6 +54,7 @@ impl<'a> FormatConfigBuilder<'a> {
indent: " ", indent: " ",
no_comments: false, no_comments: false,
entry_autoformate_keep: false, entry_autoformate_keep: false,
preserve_multiline_strings: false,
}) })
} }
@ -105,6 +112,27 @@ impl<'a> FormatConfigBuilder<'a> {
self self
} }
/// Whether to preserve string values containing newlines as triple-quoted
/// multi-line strings (re-indented to match the current indent level).
/// Defaults to `false` iff not specified.
pub const fn maybe_preserve_multiline_strings(
mut self,
preserve_multiline_strings: Option<bool>,
) -> Self {
if let Some(preserve_multiline_strings) = preserve_multiline_strings {
self.0.preserve_multiline_strings = preserve_multiline_strings;
}
self
}
/// Whether to preserve string values containing newlines as triple-quoted
/// multi-line strings (re-indented to match the current indent level).
/// Defaults to `false` iff not specified.
pub const fn preserve_multiline_strings(mut self, preserve_multiline_strings: bool) -> Self {
self.0.preserve_multiline_strings = preserve_multiline_strings;
self
}
/// Builds the [`FormatConfig`]. /// Builds the [`FormatConfig`].
pub const fn build(self) -> FormatConfig<'a> { pub const fn build(self) -> FormatConfig<'a> {
self.0 self.0
@ -168,6 +196,7 @@ mod test {
indent: " \t", indent: " \t",
no_comments: true, no_comments: true,
entry_autoformate_keep: false, entry_autoformate_keep: false,
preserve_multiline_strings: false,
} }
)); ));
Ok(()) Ok(())

View File

@ -312,7 +312,7 @@ impl KdlNode {
if config.entry_autoformate_keep { if config.entry_autoformate_keep {
entry.keep_format(); entry.keep_format();
} }
entry.autoformat(); entry.autoformat_config(config);
} }
if let Some(children) = self.children.as_mut() { if let Some(children) = self.children.as_mut() {
children.autoformat_config(&FormatConfig { children.autoformat_config(&FormatConfig {