From 5b7a2a8f428ef9b1255d1173b36186b3574dad5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Tue, 3 Dec 2024 21:45:48 -0800 Subject: [PATCH] unambiguous multiline strings Fixes: https://github.com/kdl-org/kdl/issues/413 --- README.md | 8 ++++-- SPEC.md | 85 ++++++++++++++++++++++++++++++++++--------------------- 2 files changed, 58 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 2037ea1..e3b75f4 100644 --- a/README.md +++ b/README.md @@ -25,11 +25,15 @@ package { scripts { // "Raw" and dedented multi-line strings are supported. - build #" + message """ + hello + world + """ + build #""" echo "foo" node -c "console.log('hello, world!');" echo "foo" > some-file.txt - "# + """# } // `\` breaks up a single node across multiple lines. diff --git a/SPEC.md b/SPEC.md index a74b78d..4f93ab5 100644 --- a/SPEC.md +++ b/SPEC.md @@ -440,14 +440,14 @@ The string contains the literal characters `hello\n\r\asd"#world` ### Multi-line Strings -When a Quoted or Raw String spans multiple lines with literal, non-escaped -Newlines, it follows a special multi-line syntax that automatically "dedents" -the string, allowing its value to be indented to a visually matching level if -desired. +Quoted and Raw Strings support multiple lines with literal, non-escaped +Newlines. They must use a special multi-line syntax, and they automatically +"dedent" the string, allowing its value to be indented to a visually matching +level as desired. A Multi-line string _MUST_ start with a [Newline](#newline) immediately -following its opening `"`. Its final line _MUST_ contain only whitespace, -followed by a single closing `"`. All in-between lines that contain +following its opening `"""` (whether Quoted or Raw). Its final line _MUST_ contain only whitespace, +followed by a closing `"""`. All in-between lines that contain non-newline characters _MUST_ start with _at least_ the exact same whitespace as the final line (precisely matching codepoints, not merely counting characters). They may contain additional whitespace following this prefix. @@ -457,13 +457,14 @@ Whitespace of the last line, and the matching Whitespace prefix on all intermediate lines. The first and last Newline can be the same character (that is, empty multi-line strings are legal). -Strings with literal Newlines that do not immediately start with a Newline and -whose final `"` is not preceeded by optional whitespace and a Newline are -illegal. - In other words, the final line specifies the whitespace prefix that will be removed from all other lines. +Multi-line Strings that do not immediately start with a Newline and whose final +`"""` is not preceeded by optional whitespace and a Newline are illegal. This +also means that `"""` may not be used for a single-line String (e.g. +`"""foo"""`). + It is a syntax error for any body lines of the multi-line string to not match the whitespace prefix of the last line with the final quote. @@ -474,7 +475,20 @@ Literal Newline sequences in Multi-line Strings must be normalized to a single becomes a single `LF` during parsing. This normalization does not apply to non-literal Newlines entered using escape -sequences. +sequences. That is: + +```kdl +multi-line """ + \r\n[CRLF] + foo[CRLF] + """ +``` + +becomes: + +```kdl +"\r\n\nfoo" +``` For clarity: this normalization is for individual sequences. That is, the literal sequence `CRLF CRLF` becomes `LF LF`, not `LF`. @@ -482,11 +496,11 @@ literal sequence `CRLF CRLF` becomes `LF LF`, not `LF`. #### Example ```kdl -multi-line " +multi-line """ foo This is the base indentation bar - " + """ ``` This example's string value will be: @@ -506,11 +520,11 @@ If the last line wasn't indented as far, it won't dedent the rest of the lines as much: ```kdl -multi-line " +multi-line """ foo This is no longer on the left edge bar - " + """ ``` This example's string value will be: @@ -528,11 +542,11 @@ Equivalent to `" foo\n This is no longer on the left edge\n bar"` Empty lines can contain any whitespace, or none at all, and will be reflected as empty in the value: ```kdl -multi-line " +multi-line """ Indented a bit A second indented paragraph. - " + """ ``` This example's string value will be: @@ -547,25 +561,29 @@ Equivalent to `"Indented a bit.\n\nA second indented paragraph."` ----------- -The following yield syntax errors: +The following yield **syntax errors**: ```kdl -multi-line " - closing quote with non-whitespace prefix" +multi-line """can't be single line""" ``` ```kdl -multi-line "stuff - " +multi-line """ + closing quote with non-whitespace prefix""" +``` + +```kdl +multi-line """stuff + """ ``` ```kdl // Every line must share the exact same prefix as the closing line. -multi-line "[\n] +multi-line """[\n] [tab]a[\n] [space][space]b[\n] [space][tab][\n] -[tab]" +[tab]""" ``` #### Interaction with Whitespace Escapes @@ -581,24 +599,25 @@ For example, the following example is illegal: ```kdl // Equivalent to trying to write a string containing `foo\nbar\`. - " + """ foo bar\ - " + """ ``` while the following example is allowed ```kdl - " + """ foo \ bar baz - \ " - // this is equivalent to - " + \ """ + + // equivalent to + """ foo bar baz - " + """ ``` ### Number @@ -800,7 +819,7 @@ dotted-ident := sign? '.' ((identifier-char - digit) identifier-char*)? identifier-char := unicode - unicode-space - newline - [\\/(){};\[\]"#=] - disallowed-literal-code-points - equals-sign disallowed-keyword-identifiers := 'true' - 'false' - 'null' - 'inf' - '-inf' - 'nan' -quoted-string := '"' (single-line-string-body | newline multi-line-string-body newline unicode-space*) '"' +quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline unicode-space*) '"""' single-line-string-body := (string-character - newline)* multi-line-string-body := string-character* string-character := '\' escape | [^\\"] - disallowed-literal-code-points @@ -808,7 +827,7 @@ escape := ["\\bfnrts] | 'u{' hex-digit{1, 6} '}' | (unicode-space | newline)+ hex-digit := [0-9a-fA-F] raw-string := '#' raw-string-quotes '#' | '#' raw-string '#' -raw-string-quotes := '"' (single-line-raw-string-body | newline multi-line-raw-string-body newline unicode-space*) '"' +raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body newline unicode-space*) '"""' single-line-raw-string-body := (unicode - newline - disallowed-literal-code-points)* multi-line-raw-string-body := (unicode - disallowed-literal-code-points)*