From aa9a4439a5c8d69e311b216046976165eaa66e0f Mon Sep 17 00:00:00 2001 From: eugenesvk Date: Tue, 21 Jan 2025 13:51:02 +0700 Subject: [PATCH] Exclude hex above max Unicode Scalar Value simplify surrogate regex to use ranges --- draft-marchan-kdl2.md | 6 ++---- tests/test_cases/input/unicode_escaped_above_max_fail.kdl | 1 + tests/test_cases/input/unicode_escaped_noncanon_fail.kdl | 1 + 3 files changed, 4 insertions(+), 4 deletions(-) create mode 100644 tests/test_cases/input/unicode_escaped_above_max_fail.kdl create mode 100644 tests/test_cases/input/unicode_escaped_noncanon_fail.kdl diff --git a/draft-marchan-kdl2.md b/draft-marchan-kdl2.md index 008b9a5..d16295e 100644 --- a/draft-marchan-kdl2.md +++ b/draft-marchan-kdl2.md @@ -983,10 +983,8 @@ string-character := [^\\"] - disallowed-literal-code-points ws-escape := '\\' (unicode-space | newline)+ hex-digit := [0-9a-fA-F] -hex-unicode := hex-digit{1, 6} - surrogates -surrogates := [dD][8-9a-fA-F]hex-digit{2} -// U+D800-DFFF: D 8 00 -// D F FF +hex-unicode := [\u{0}-\u{10FFFF}] - surrogate // Unicode Scalar Value₁₆ +surrogate := [\u{D800}-\u{DFFF}] raw-string := '#' raw-string-quotes '#' | '#' raw-string '#' raw-string-quotes := diff --git a/tests/test_cases/input/unicode_escaped_above_max_fail.kdl b/tests/test_cases/input/unicode_escaped_above_max_fail.kdl new file mode 100644 index 0000000..5db8765 --- /dev/null +++ b/tests/test_cases/input/unicode_escaped_above_max_fail.kdl @@ -0,0 +1 @@ +no "Higher than max Unicode Scalar Value \u{10FFFF} \u{11FFFF}" diff --git a/tests/test_cases/input/unicode_escaped_noncanon_fail.kdl b/tests/test_cases/input/unicode_escaped_noncanon_fail.kdl new file mode 100644 index 0000000..409fac7 --- /dev/null +++ b/tests/test_cases/input/unicode_escaped_noncanon_fail.kdl @@ -0,0 +1 @@ +no "Non-canonical format for Unicode Scalar Value with extra 0s: \u{00FFFF} instead of \u{FFFF}"