From d73890741da989e4ddbaf00045bac206ce11eff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Tue, 12 Dec 2023 20:26:12 -0800 Subject: [PATCH] `r` prefix is no longer required for raw strings (#354) Fixes: https://github.com/kdl-org/kdl/issues/337 --- CHANGELOG.md | 6 ++++++ SPEC.md | 31 +++++++++++++++++-------------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77bde11..bc2c41e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,12 @@ can now only be represented in regular strings, and there's no facilities to represent them in raw strings. This should be considered a security improvement. +* Raw strings no longer require an `r` prefix: they are now specified by using + `#""#`. +* `#` is an illegal initial identifier character, but is allowed in other + places in identifiers. +* Line continuations can be followed by an EOF now, instead of requiring a + newline (or comment). `node \` is now a legal KDL document. ### KQL diff --git a/SPEC.md b/SPEC.md index ce66f38..3afc5fc 100644 --- a/SPEC.md +++ b/SPEC.md @@ -367,11 +367,16 @@ support `\`-escapes. They otherwise share the same properties as far as literal [Newline](#newline) characters go, and the requirement of UTF-8 representation. -Raw String literals are represented as `r`, followed by zero or more `#` -characters, followed by `"`, followed by any number of UTF-8 literals. The string is then -closed by a `"` followed by a _matching_ number of `#` characters. This means -that the string sequence `"` or `"#` and such must not match the closing `"` -with the same or more `#` characters as the opening `r`. +Raw String literals are represented with one or more `#` characters, followed +by `"`, followed by any number of UTF-8 literals. The string is then closed by +a `"` followed by a _matching_ number of `#` characters. This means that the +string sequence `"` or `"#` and such must not match the closing `"` with the +same or more `#` characters as the opening `#`, in the body of the string. + +Like Strings, Raw Strings _MUST NOT_ include any of the [disallowed literal +code-points](#disallowed-literal-code-points) as code points in their body. +Unlike with Strings, these cannot simply be escaped, and are thus +unrepresentable when using Raw Strings. Like Strings, Raw Strings _MUST NOT_ include any of the [disallowed literal code-points](#disallowed-literal-code-points) as code points in their body. @@ -381,8 +386,8 @@ unrepresentable when using Raw Strings. #### Example ```kdl -just-escapes r"\n will be literal" -quotes-and-escapes r#"hello\n\r\asd"world"# +just-escapes #"\n will be literal"# +quotes-and-escapes ##"hello\n\r\asd"#world"## ``` ### Number @@ -514,10 +519,9 @@ node-children := '{' nodes '}' node-terminator := single-line-comment | newline | ';' | eof identifier := string | bare-identifier -bare-identifier := (unambiguous-ident | numberish-ident | stringish-ident) - keyword -unambiguous-ident := (identifier-char - digit - sign - "r") identifier-char* +bare-identifier := (unambiguous-ident | numberish-ident) - keyword +unambiguous-ident := (identifier-char - digit - sign - "#") identifier-char* numberish-ident := sign ((identifier-char - digit) identifier-char*)? -stringish-ident := "r" ((identifier-char - "#") identifier-char*)? identifier-char := unicode - line-space - [\\/(){};\[\]="] - disallowed-literal-code-points keyword := boolean | 'null' prop := identifier '=' valuel @@ -530,9 +534,8 @@ string-character := '\' escape | [^\"] - disallowed-literal-code-points escape := ["\\bfnrt] | 'u{' hex-digit{1, 6} '}' | (unicode-space | newline)+ hex-digit := [0-9a-fA-F] -raw-string := 'r' raw-string-hash -raw-string-hash := '#' raw-string-hash '#' | raw-string-quotes -raw-string-quotes := '"' .* '"' +raw-string := '#' raw-string-quotes '#' | '#' raw-string '#' +raw-string-quotes := '"' (unicode - disallowed-literal-code-points) '"' number := decimal | hex | octal | binary @@ -548,7 +551,7 @@ binary := sign? '0b' ('0' | '1') ('0' | '1' | '_')* boolean := 'true' | 'false' -escline := '\\' ws* (single-line-comment | newline) +escline := '\\' ws* (single-line-comment | newline | eof) newline := See Table (All line-break white_space)