From e0486db3781dbcdd606e7e088bd3e6df32cb7fba Mon Sep 17 00:00:00 2001 From: eugenesvk Date: Tue, 21 Jan 2025 13:53:12 +0700 Subject: [PATCH] Add explicit regex-set rules to hex unicode document {1,3} ranges --- draft-marchan-kdl2.md | 9 +++++++-- .../input/unicode_escaped_too_long_lead0_fail.kdl | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 tests/test_cases/input/unicode_escaped_too_long_lead0_fail.kdl diff --git a/draft-marchan-kdl2.md b/draft-marchan-kdl2.md index 655e4c3..442cc9e 100644 --- a/draft-marchan-kdl2.md +++ b/draft-marchan-kdl2.md @@ -983,8 +983,13 @@ string-character := [^\\"] - disallowed-literal-code-points ws-escape := '\\' (unicode-space | newline)+ hex-digit := [0-9a-fA-F] -hex-unicode := [\u{0}-\u{10FFFF}] - surrogate // Unicode Scalar Value₁₆, leading 0s allowed as long as length ≤ 6 -surrogate := [\u{D800}-\u{DFFF}] +hex-unicode := hex-digit{1, 6} - surrogate - above-max-scalar // Unicode Scalar Value in hex₁₆, leading 0s allowed within length ≤ 6 +surrogate := [0]{0,2}[dD][8-9a-fA-F]hex-digit{2} +// U+D800-DFFF: D 8 00 +// D F FF +above-max-scalar = [2-9a-fA-F]hex-digit{5} | [1][1-9a-fA-F]hex-digit{4} +// >U+10FFFF: >1 _____ 1 >0 ____ + raw-string := '#' raw-string-quotes '#' | '#' raw-string '#' raw-string-quotes := diff --git a/tests/test_cases/input/unicode_escaped_too_long_lead0_fail.kdl b/tests/test_cases/input/unicode_escaped_too_long_lead0_fail.kdl new file mode 100644 index 0000000..7d66a9e --- /dev/null +++ b/tests/test_cases/input/unicode_escaped_too_long_lead0_fail.kdl @@ -0,0 +1 @@ +no "Even with leading 0s Unicode Scalar Value escapes must ≤6: \u{0123456}"