unambiguous multiline strings (#414)

Fixes: https://github.com/kdl-org/kdl/issues/413
This commit is contained in:
Kat Marchán 2024-12-03 23:44:40 -08:00 committed by GitHub
parent e477f32f05
commit e7b5ec6bf9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 86 additions and 51 deletions

View File

@ -25,11 +25,15 @@ package {
scripts {
// "Raw" and dedented multi-line strings are supported.
build #"
message """
hello
world
"""
build #"""
echo "foo"
node -c "console.log('hello, world!');"
echo "foo" > some-file.txt
"#
"""#
}
// `\` breaks up a single node across multiple lines.

85
SPEC.md
View File

@ -440,14 +440,14 @@ The string contains the literal characters `hello\n\r\asd"#world`
### Multi-line Strings
When a Quoted or Raw String spans multiple lines with literal, non-escaped
Newlines, it follows a special multi-line syntax that automatically "dedents"
the string, allowing its value to be indented to a visually matching level if
desired.
Quoted and Raw Strings support multiple lines with literal, non-escaped
Newlines. They must use a special multi-line syntax, and they automatically
"dedent" the string, allowing its value to be indented to a visually matching
level as desired.
A Multi-line string _MUST_ start with a [Newline](#newline) immediately
following its opening `"`. Its final line _MUST_ contain only whitespace,
followed by a single closing `"`. All in-between lines that contain
following its opening `"""` (whether Quoted or Raw). Its final line _MUST_ contain only whitespace,
followed by a closing `"""`. All in-between lines that contain
non-newline characters _MUST_ start with _at least_ the exact same whitespace
as the final line (precisely matching codepoints, not merely counting characters).
They may contain additional whitespace following this prefix.
@ -457,13 +457,14 @@ Whitespace of the last line, and the matching Whitespace prefix on all
intermediate lines. The first and last Newline can be the same character (that
is, empty multi-line strings are legal).
Strings with literal Newlines that do not immediately start with a Newline and
whose final `"` is not preceeded by optional whitespace and a Newline are
illegal.
In other words, the final line specifies the whitespace prefix that will be
removed from all other lines.
Multi-line Strings that do not immediately start with a Newline and whose final
`"""` is not preceeded by optional whitespace and a Newline are illegal. This
also means that `"""` may not be used for a single-line String (e.g.
`"""foo"""`).
It is a syntax error for any body lines of the multi-line string to not match
the whitespace prefix of the last line with the final quote.
@ -474,7 +475,20 @@ Literal Newline sequences in Multi-line Strings must be normalized to a single
becomes a single `LF` during parsing.
This normalization does not apply to non-literal Newlines entered using escape
sequences.
sequences. That is:
```kdl
multi-line """
\r\n[CRLF]
foo[CRLF]
"""
```
becomes:
```kdl
"\r\n\nfoo"
```
For clarity: this normalization is for individual sequences. That is, the
literal sequence `CRLF CRLF` becomes `LF LF`, not `LF`.
@ -482,11 +496,11 @@ literal sequence `CRLF CRLF` becomes `LF LF`, not `LF`.
#### Example
```kdl
multi-line "
multi-line """
foo
This is the base indentation
bar
"
"""
```
This example's string value will be:
@ -506,11 +520,11 @@ If the last line wasn't indented as far,
it won't dedent the rest of the lines as much:
```kdl
multi-line "
multi-line """
foo
This is no longer on the left edge
bar
"
"""
```
This example's string value will be:
@ -528,11 +542,11 @@ Equivalent to `" foo\n This is no longer on the left edge\n bar"`
Empty lines can contain any whitespace, or none at all, and will be reflected as empty in the value:
```kdl
multi-line "
multi-line """
Indented a bit
A second indented paragraph.
"
"""
```
This example's string value will be:
@ -547,25 +561,29 @@ Equivalent to `"Indented a bit.\n\nA second indented paragraph."`
-----------
The following yield syntax errors:
The following yield **syntax errors**:
```kdl
multi-line "
closing quote with non-whitespace prefix"
multi-line """can't be single line"""
```
```kdl
multi-line "stuff
"
multi-line """
closing quote with non-whitespace prefix"""
```
```kdl
multi-line """stuff
"""
```
```kdl
// Every line must share the exact same prefix as the closing line.
multi-line "[\n]
multi-line """[\n]
[tab]a[\n]
[space][space]b[\n]
[space][tab][\n]
[tab]"
[tab]"""
```
#### Interaction with Whitespace Escapes
@ -581,24 +599,25 @@ For example, the following example is illegal:
```kdl
// Equivalent to trying to write a string containing `foo\nbar\`.
"
"""
foo
bar\
"
"""
```
while the following example is allowed
```kdl
"
"""
foo \
bar
baz
\ "
// this is equivalent to
"
\ """
// equivalent to
"""
foo bar
baz
"
"""
```
### Number
@ -800,7 +819,7 @@ dotted-ident := sign? '.' ((identifier-char - digit) identifier-char*)?
identifier-char := unicode - unicode-space - newline - [\\/(){};\[\]"#=] - disallowed-literal-code-points - equals-sign
disallowed-keyword-identifiers := 'true' - 'false' - 'null' - 'inf' - '-inf' - 'nan'
quoted-string := '"' (single-line-string-body | newline multi-line-string-body newline unicode-space*) '"'
quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline unicode-space*) '"""'
single-line-string-body := (string-character - newline)*
multi-line-string-body := string-character*
string-character := '\' escape | [^\\"] - disallowed-literal-code-points
@ -808,7 +827,7 @@ escape := ["\\bfnrts] | 'u{' hex-digit{1, 6} '}' | (unicode-space | newline)+
hex-digit := [0-9a-fA-F]
raw-string := '#' raw-string-quotes '#' | '#' raw-string '#'
raw-string-quotes := '"' (single-line-raw-string-body | newline multi-line-raw-string-body newline unicode-space*) '"'
raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body newline unicode-space*) '"""'
single-line-raw-string-body := (unicode - newline - disallowed-literal-code-points)*
multi-line-raw-string-body := (unicode - disallowed-literal-code-points)*

View File

@ -42,11 +42,11 @@ jobs {
}
step Clippy { run cargo clippy --all -- -D warnings }
step "Run tests" { run cargo test --all --verbose }
step "Other Stuff" run="
step "Other Stuff" run="""
echo foo
echo bar
echo baz
"
"""
}
}
}

View File

@ -1,5 +1,5 @@
node #"
node #"""
hey
everyone
how goes?
"#
"""#

View File

@ -1,5 +1,5 @@
node #"
node #"""
hey
everyone
how goes?
"#
"""#

View File

@ -1,5 +1,5 @@
node #"
node #"""
hey
everyone
how goes?
"#
"""#

View File

@ -0,0 +1 @@
node #"""one line"""#

View File

@ -0,0 +1,5 @@
node #"""
hey
everyone
how goes?
"""#

View File

@ -1,5 +1,5 @@
node "
node """
hey
everyone
how goes?
"
"""

View File

@ -1,5 +1,5 @@
node "
node """
hey
everyone
how goes?
"
"""

View File

@ -1,5 +1,5 @@
node "
node """
hey
everyone
how goes?
"
"""

View File

@ -1,5 +1,5 @@
node "
node """
hey
everyone
how goes?
"
"""

View File

@ -0,0 +1 @@
node """one line"""

View File

@ -0,0 +1,5 @@
node "
hey
everyone
how goes?
"