From 49402ccb7b9ac8b0b17a7f293e86710c64b6d419 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kat=20March=C3=A1n?= <kzm@zkat.tech>
Date: Tue, 12 Dec 2023 22:51:52 -0800
Subject: [PATCH] allow BOM only in the first unicode scalar in a document

---
 CHANGELOG.md | 2 ++
 SPEC.md      | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cfbf263..a6eee12 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -53,6 +53,8 @@
   * Between annotations and the thing they're annotating (`(blah) node (thing)
     1 y= (who) 2`)
   * Around `=` for props (`x = 1`)
+* The BOM is now only allowed as the first character in a document. It was
+  previously treated as generic whitespace.
 
 ### KQL
 
diff --git a/SPEC.md b/SPEC.md
index 88332ac..3b971fb 100644
--- a/SPEC.md
+++ b/SPEC.md
@@ -516,6 +516,8 @@ authoritative if something seems to disagree with the text above. The [grammar
 language syntax](#grammar-language) is defined below.
 
 ```
+document := bom? nodes
+
 nodes := (line-space* node)* line-space*
 
 plain-line-space := newline | ws | single-line-comment
@@ -572,7 +574,7 @@ escline := '\\' ws* (single-line-comment | newline | eof)
 
 newline := See Table (All line-break white_space)
 
-ws := bom | unicode-space | multi-line-comment
+ws := unicode-space | multi-line-comment
 
 bom := '\u{FEFF}'