From f02ba59c0c806972f8108481d723c0cd2c345b8b Mon Sep 17 00:00:00 2001 From: Tab Atkins-Bittner Date: Tue, 26 Dec 2023 14:19:45 -0800 Subject: [PATCH] Make multi-line ws prefix determined by the last line. --- SPEC.md | 97 ++++++++++++++----- .../expected_kdl/escaped_whitespace.kdl | 2 +- .../expected_kdl/raw_string_newline.kdl | 2 +- tests/test_cases/input/escaped_whitespace.kdl | 8 +- 4 files changed, 81 insertions(+), 28 deletions(-) diff --git a/SPEC.md b/SPEC.md index 23fcfc3..e6bd0f5 100644 --- a/SPEC.md +++ b/SPEC.md @@ -388,7 +388,7 @@ such) are retained. For example, these strings are all semantically identical: " Hello World -" + " ``` ##### Invalid escapes @@ -428,12 +428,42 @@ quotes-and-escapes ##"hello\n\r\asd"#world"## The string contains the literal characters `hello\n\r\asd"#world` + +### Multi-line Strings + +When a Quoted or Raw String spans multiple lines with literal, non-escaped Newlines, +it follows a special multi-line syntax +that automatically "dedents" the string, +allowing its value to be indented to a visually matching level if desired. + +A Multi-line string _MUST_ start with a [Newline](#newline) +immediately following its opening `"`. +Its final line, preceding the closing `"`, +_MUST_ contain only whitespace. +All in-between lines that contain non-whitespace characters +_MUST_ start with the exact same whitespace as the final line +(precisely matching codepoints, not merely counting characters). + +The value of the Multi-line String omits the first and last Newline, +the Whitespace of the last line, +the matching Whitespace prefix on all intermediate lines, +and all Whitespace on intermediate Whitespace-only lines. +The first and last Newline can be the same character +(that is, empty multi-line strings are legal). + +Strings with literal Newlines that do not immediately start with a Newline and +whose final `"` is not preceeded by optional whitespace and a Newline are illegal. + +In other words, the final line specifies the whitespace prefix that will be removed from all other lines. + +#### Example + ```kdl -multi-line #" +multi-line " foo This is the base indentation - bar - "# + bar + " ``` The last example's string value will be: @@ -444,29 +474,52 @@ This is the base indentation bar ``` -### Multi-line Strings +Equivalent to `" foo\nThis is the base indentation\n bar"`. -Quoted and Raw Strings may span multiple lines with literal Newlines, in which -case the resulting String is "dedented" according to the line with the fewest -number of Whitespace characters preceding the first non-Whitespace character. -That is, the number of literal Whitespace characters in the least-indented -line in the String body is subtracted from the Whitespace of all other lines. +--------- -Multi-line strings _MUST_ have a single [Newline](#newline) immediately -following their opening `"`, after which they may have any number of newlines. -Finally, there must be a Newline, followed by any number of Whitespace, before -the closing `"`. +If the last line wasn't indented as far, +it won't dedent the rest of the lines as much: -The first Newline, the last Newline, along with Whitespace following the last -Newline, are not included in the value of the String. The first and last -Newline can be the same character (that is, empty multi-line strings are -legal). +```kdl +multi-line " + foo + This is no longer on the left edge + bar + " +``` -Furthermore, any lines in the string body that only contain literal whitespace -are stripped to only contain the single Newline character. +This example's string value will be: -Strings with literal Newlines that do not immediately start with a Newline and -whose final `"` is not preceeded by whitespace and a Newline are illegal. +``` + foo + This is no longer on the left edge + bar +``` + +Equivalent to `" foo\n This is no longer on the left edge\n bar"`. + +----------- + +Empty lines can contain any whitespace, or none at all, and will be reflected as empty in the value: + +```kdl +multi-line " + Indented a bit + + A second indented paragraph. + " +``` + +This example's string value will be: + +``` +Indented a bit. + +A second indented paragraph. +``` + +Equivalent to `"Indented a bit.\n\nA second indented paragraph."` ### Number diff --git a/tests/test_cases/expected_kdl/escaped_whitespace.kdl b/tests/test_cases/expected_kdl/escaped_whitespace.kdl index a97d10a..45dd408 100644 --- a/tests/test_cases/expected_kdl/escaped_whitespace.kdl +++ b/tests/test_cases/expected_kdl/escaped_whitespace.kdl @@ -1 +1 @@ -node "Hello\n\tWorld" "Hello\n\tWorld" "Hello\n\tWorld" "Hello\n\tWorld" "Hello\n\tWorld" "Hello\n\tWorld" +node "Hello\n\tWorld" "Hello\n\tWorld" "Hello\n\tWorld" "Hello\n\tWorld" "Hello\n\tWorld" diff --git a/tests/test_cases/expected_kdl/raw_string_newline.kdl b/tests/test_cases/expected_kdl/raw_string_newline.kdl index d738029..fd38cb0 100644 --- a/tests/test_cases/expected_kdl/raw_string_newline.kdl +++ b/tests/test_cases/expected_kdl/raw_string_newline.kdl @@ -1 +1 @@ -node "\nhello\nworld\n" +node "hello\nworld" diff --git a/tests/test_cases/input/escaped_whitespace.kdl b/tests/test_cases/input/escaped_whitespace.kdl index 1f2e67c..797784a 100644 --- a/tests/test_cases/input/escaped_whitespace.kdl +++ b/tests/test_cases/input/escaped_whitespace.kdl @@ -1,13 +1,13 @@ // All of these strings are the same node \ "Hello\n\tWorld" \ - "Hello - World" \ + " + Hello + World + " \ "Hello\n\ \tWorld" \ "Hello\n\ \tWorld" \ - "Hello -\ \tWorld" \ "Hello\n\t\ World"