From d76063e8e954985674f91c14c72a3bacc7ce2928 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 22 Jan 2025 00:16:45 +0700 Subject: [PATCH] Exclude hex above max Unicode Scalar Value (#456) * Exclude hex above max Unicode Scalar Value simplify surrogate regex to use ranges * allow leading 0s, but still limit max length to 6 * Add explicit regex-set rules to hex unicode document {1,3} ranges * add space-separators between sets * Make test fail *only* for length limits Previously it failed due to specifying a codepoint past max *as well*, obscuring the intended fail condition. --------- Co-authored-by: Tab Atkins Jr. --- draft-marchan-kdl2.md | 11 +++++++---- .../input/unicode_escaped_above_max_fail.kdl | 1 + .../input/unicode_escaped_too_long_lead0_fail.kdl | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 tests/test_cases/input/unicode_escaped_above_max_fail.kdl create mode 100644 tests/test_cases/input/unicode_escaped_too_long_lead0_fail.kdl diff --git a/draft-marchan-kdl2.md b/draft-marchan-kdl2.md index 008b9a5..25acffa 100644 --- a/draft-marchan-kdl2.md +++ b/draft-marchan-kdl2.md @@ -983,10 +983,13 @@ string-character := [^\\"] - disallowed-literal-code-points ws-escape := '\\' (unicode-space | newline)+ hex-digit := [0-9a-fA-F] -hex-unicode := hex-digit{1, 6} - surrogates -surrogates := [dD][8-9a-fA-F]hex-digit{2} -// U+D800-DFFF: D 8 00 -// D F FF +hex-unicode := hex-digit{1, 6} - surrogate - above-max-scalar // Unicode Scalar Value in hex₁₆, leading 0s allowed within length ≤ 6 +surrogate := [0]{0, 2} [dD] [8-9a-fA-F] hex-digit{2} +// U+D800-DFFF: D 8 00 +// D F FF +above-max-scalar = [2-9a-fA-F] hex-digit{5} | [1] [1-9a-fA-F] hex-digit{4} +// >U+10FFFF: >1 _____ 1 >0 ____ + raw-string := '#' raw-string-quotes '#' | '#' raw-string '#' raw-string-quotes := diff --git a/tests/test_cases/input/unicode_escaped_above_max_fail.kdl b/tests/test_cases/input/unicode_escaped_above_max_fail.kdl new file mode 100644 index 0000000..5db8765 --- /dev/null +++ b/tests/test_cases/input/unicode_escaped_above_max_fail.kdl @@ -0,0 +1 @@ +no "Higher than max Unicode Scalar Value \u{10FFFF} \u{11FFFF}" diff --git a/tests/test_cases/input/unicode_escaped_too_long_lead0_fail.kdl b/tests/test_cases/input/unicode_escaped_too_long_lead0_fail.kdl new file mode 100644 index 0000000..4ee3b99 --- /dev/null +++ b/tests/test_cases/input/unicode_escaped_too_long_lead0_fail.kdl @@ -0,0 +1 @@ +no "Even with leading 0s Unicode Scalar Value escapes must ≤6: \u{0012345}"