mirror of https://github.com/kdl-org/kdl.git
Compare commits
65 Commits
2.0.0-draf
...
main
| Author | SHA1 | Date |
|---|---|---|
|
|
b8570137b6 | |
|
|
4e4c0af933 | |
|
|
568d103308 | |
|
|
5f73be73c7 | |
|
|
54a49798d9 | |
|
|
0ff5a6ffa3 | |
|
|
3b75764880 | |
|
|
f238372fc9 | |
|
|
d8faf22503 | |
|
|
ebf9ef7649 | |
|
|
5366787152 | |
|
|
85930fd8dd | |
|
|
8e18add9d4 | |
|
|
43d8a5dbf3 | |
|
|
5d82f60407 | |
|
|
ab92232126 | |
|
|
5e5920fd7c | |
|
|
11615b2807 | |
|
|
42ce272508 | |
|
|
a88c450d7d | |
|
|
b942867c58 | |
|
|
ee79f9f755 | |
|
|
84911feb11 | |
|
|
c54ebd9473 | |
|
|
6c55186c03 | |
|
|
21a9eb3f65 | |
|
|
aab44fcd1b | |
|
|
d76063e8e9 | |
|
|
e9e6a844bd | |
|
|
7322f37800 | |
|
|
919e154dcd | |
|
|
53a884c93d | |
|
|
23159696d1 | |
|
|
82a91697a1 | |
|
|
4263a9de3f | |
|
|
0c2dde6d3c | |
|
|
20375a187e | |
|
|
91cd421988 | |
|
|
34e4259dfc | |
|
|
a3b37857e2 | |
|
|
9c9d2b2445 | |
|
|
39a098237d | |
|
|
1147fe965a | |
|
|
72a3c769d7 | |
|
|
757eb5517a | |
|
|
2843744a87 | |
|
|
6d2bc50939 | |
|
|
3b19a761ff | |
|
|
ce3b2eeb7f | |
|
|
353fd85a2b | |
|
|
3c1cf4f2a1 | |
|
|
76dc3e3002 | |
|
|
646dafcd35 | |
|
|
c930f1d124 | |
|
|
0f77ec4f9f | |
|
|
6ceecd85d6 | |
|
|
0e58f61b18 | |
|
|
edbdab2891 | |
|
|
7aa01a21b7 | |
|
|
b82c924013 | |
|
|
d1ceb44f40 | |
|
|
717e86cb1c | |
|
|
ebef8751c0 | |
|
|
ca2bd45a66 | |
|
|
1a6b17b0ae |
|
|
@ -0,0 +1,8 @@
|
||||||
|
# See http://editorconfig.org
|
||||||
|
|
||||||
|
root = true
|
||||||
|
|
||||||
|
[*.{md,xml,org}]
|
||||||
|
charset = utf-8
|
||||||
|
insert_final_newline = true
|
||||||
|
trim_trailing_whitespace = true
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
name: "Update Editor's Copy"
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
paths-ignore:
|
||||||
|
- README.md
|
||||||
|
- CONTRIBUTING.md
|
||||||
|
- LICENSE.md
|
||||||
|
- .gitignore
|
||||||
|
pull_request:
|
||||||
|
paths-ignore:
|
||||||
|
- README.md
|
||||||
|
- CONTRIBUTING.md
|
||||||
|
- LICENSE.md
|
||||||
|
- .gitignore
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
name: "Update Editor's Copy"
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
steps:
|
||||||
|
- name: "Checkout"
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: "Setup"
|
||||||
|
id: setup
|
||||||
|
run: date -u "+date=%FT%T" >>"$GITHUB_OUTPUT"
|
||||||
|
|
||||||
|
- name: "Caching"
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
.refcache
|
||||||
|
.venv
|
||||||
|
.gems
|
||||||
|
node_modules
|
||||||
|
.targets.mk
|
||||||
|
key: i-d-${{ steps.setup.outputs.date }}
|
||||||
|
restore-keys: i-d-
|
||||||
|
|
||||||
|
- name: "Build Drafts"
|
||||||
|
uses: martinthomson/i-d-template@v1
|
||||||
|
with:
|
||||||
|
token: ${{ github.token }}
|
||||||
|
|
||||||
|
- name: "Update GitHub Pages"
|
||||||
|
uses: martinthomson/i-d-template@v1
|
||||||
|
if: ${{ github.event_name == 'push' }}
|
||||||
|
with:
|
||||||
|
make: gh-pages
|
||||||
|
token: ${{ github.token }}
|
||||||
|
|
||||||
|
- name: "Archive Built Drafts"
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
draft-*.html
|
||||||
|
draft-*.txt
|
||||||
|
|
@ -0,0 +1,57 @@
|
||||||
|
name: "Publish New Draft Version"
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- "draft-*"
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
email:
|
||||||
|
description: "Submitter email"
|
||||||
|
default: ""
|
||||||
|
type: string
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
name: "Publish New Draft Version"
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: "Checkout"
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
# See https://github.com/actions/checkout/issues/290
|
||||||
|
- name: "Get Tag Annotations"
|
||||||
|
run: git fetch -f origin ${{ github.ref }}:${{ github.ref }}
|
||||||
|
|
||||||
|
- name: "Setup"
|
||||||
|
id: setup
|
||||||
|
run: date -u "+date=%FT%T" >>"$GITHUB_OUTPUT"
|
||||||
|
|
||||||
|
- name: "Caching"
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
.refcache
|
||||||
|
.venv
|
||||||
|
.gems
|
||||||
|
node_modules
|
||||||
|
.targets.mk
|
||||||
|
key: i-d-${{ steps.setup.outputs.date }}
|
||||||
|
restore-keys: i-d-
|
||||||
|
|
||||||
|
- name: "Build Drafts"
|
||||||
|
uses: martinthomson/i-d-template@v1
|
||||||
|
with:
|
||||||
|
token: ${{ github.token }}
|
||||||
|
|
||||||
|
- name: "Upload to Datatracker"
|
||||||
|
uses: martinthomson/i-d-template@v1
|
||||||
|
with:
|
||||||
|
make: upload
|
||||||
|
env:
|
||||||
|
UPLOAD_EMAIL: ${{ inputs.email }}
|
||||||
|
|
||||||
|
- name: "Archive Submitted Drafts"
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
path: "versioned/draft-*-[0-9][0-9].*"
|
||||||
|
|
@ -1,2 +1,25 @@
|
||||||
/target
|
/target
|
||||||
Cargo.lock
|
Cargo.lock
|
||||||
|
*.html
|
||||||
|
*.pdf
|
||||||
|
*.redxml
|
||||||
|
*.swp
|
||||||
|
*.txt
|
||||||
|
*.upload
|
||||||
|
*~
|
||||||
|
.tags
|
||||||
|
/*-[0-9][0-9].xml
|
||||||
|
/.*.mk
|
||||||
|
/.gems/
|
||||||
|
/.refcache
|
||||||
|
/.venv/
|
||||||
|
/.vscode/
|
||||||
|
/lib
|
||||||
|
/node_modules/
|
||||||
|
/versioned/
|
||||||
|
Gemfile.lock
|
||||||
|
archive.json
|
||||||
|
draft-marchan-kdl2.xml
|
||||||
|
package-lock.json
|
||||||
|
report.xml
|
||||||
|
!requirements.txt
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
<note title="Discussion Venues" removeInRFC="true">
|
||||||
|
<t>Source for this draft and an issue tracker can be found at
|
||||||
|
<eref target="https://github.com/kdl-org/kdl"/>.</t>
|
||||||
|
</note>
|
||||||
101
CHANGELOG.md
101
CHANGELOG.md
|
|
@ -1,44 +1,6 @@
|
||||||
# KDL Changelog
|
# KDL Changelog
|
||||||
|
|
||||||
## 2.0.0-draft.7 (2024-12-10)
|
## 2.0.0 (2024-12-21)
|
||||||
|
|
||||||
* `node-space` is now allowed as whitespace after a `slashdash`, meaning line
|
|
||||||
continuations will work now.
|
|
||||||
* One or two consecutive double-quotes are now allowed in the bodies of
|
|
||||||
multi-line quoted strings, without needing to be escaped.
|
|
||||||
* Grammar has been fixed to disallow raw strings like `#"""#`, which are now
|
|
||||||
properly treated as invalid multi-line raw strings (instead of the equivalent of
|
|
||||||
`"\""`).
|
|
||||||
* Test suite has been updated to include a `_fail` suffix in all test cases
|
|
||||||
which are expected to fail.
|
|
||||||
* A slew of additional slashdash and multi-line string compliance tests have
|
|
||||||
been added. Have fun. :)
|
|
||||||
* The organization of string types in the spec prose has been updated to a
|
|
||||||
hopefully more helpful structure.
|
|
||||||
|
|
||||||
|
|
||||||
## 2.0.0-draft.6 (2024-12-04)
|
|
||||||
|
|
||||||
* Multiline strings, both Raw and Quoted, must now use `"""` instead of a single `"`. Using `"""` for a single-line string is a syntax error.
|
|
||||||
* Fixed an issue with the `unicode_silly` test case.
|
|
||||||
* Some rewordings and clarification in the spec prose.
|
|
||||||
* Slight grammar tweak where the pre-terminator `node-space*` for `node` and `final-node` have been moved into `base-node`.
|
|
||||||
|
|
||||||
|
|
||||||
## 2.0.0-draft.5 (2024-11-28)
|
|
||||||
|
|
||||||
* Equals signs other than `=` are no longer supported in properties.
|
|
||||||
* 128-bit integer type annotations have been added to the list of "well-known"
|
|
||||||
type annotations.
|
|
||||||
* Multiline string escape rules have been tweaked significantly.
|
|
||||||
* `\s` is now a valid escape within a string, representing a space character.
|
|
||||||
* Slashdash (`/-`)-compatible locations and related grammar adjusted to be more
|
|
||||||
clear and intuitive. This includes some changes relating to whitespace,
|
|
||||||
including comments and newlines, which are breaking changes.
|
|
||||||
* Various updates to test suite to reflect changes.
|
|
||||||
|
|
||||||
|
|
||||||
## 2.0.0 (Unreleased)
|
|
||||||
|
|
||||||
### Grammar
|
### Grammar
|
||||||
|
|
||||||
|
|
@ -47,7 +9,7 @@
|
||||||
escape.
|
escape.
|
||||||
* Single line comments (`//`) can now be immediately followed by a newline.
|
* Single line comments (`//`) can now be immediately followed by a newline.
|
||||||
* All literal whitespace following a `\` in a string is now discarded.
|
* All literal whitespace following a `\` in a string is now discarded.
|
||||||
* Vertical tabs (`U+000B`) are now considered to be whitespace.
|
* Vertical tabs (`U+000B`) are now considered to be newlines.
|
||||||
* The grammar syntax itself has been described, and some confusing definitions
|
* The grammar syntax itself has been described, and some confusing definitions
|
||||||
in the grammar have been fixed accordingly (mostly related to escaped
|
in the grammar have been fixed accordingly (mostly related to escaped
|
||||||
characters).
|
characters).
|
||||||
|
|
@ -63,6 +25,7 @@
|
||||||
improvement.
|
improvement.
|
||||||
* Raw strings no longer require an `r` prefix: they are now specified by using
|
* Raw strings no longer require an `r` prefix: they are now specified by using
|
||||||
`#""#`.
|
`#""#`.
|
||||||
|
* Raw string productions are now explicitly non-greedy (and "fallible").
|
||||||
* Line continuations can be followed by an EOF now, instead of requiring a
|
* Line continuations can be followed by an EOF now, instead of requiring a
|
||||||
newline (or comment). `node \<EOF>` is now a legal KDL document.
|
newline (or comment). `node \<EOF>` is now a legal KDL document.
|
||||||
* `#` is no longer a legal identifier character.
|
* `#` is no longer a legal identifier character.
|
||||||
|
|
@ -93,7 +56,7 @@
|
||||||
* Around `=` for props (`x = 1`)
|
* Around `=` for props (`x = 1`)
|
||||||
* The BOM is now only allowed as the first character in a document. It was
|
* The BOM is now only allowed as the first character in a document. It was
|
||||||
previously treated as generic whitespace.
|
previously treated as generic whitespace.
|
||||||
* Multi-line strings must now use `"""` as delimeters. The opening delimiter must be immediately followed by a newline, and the closing delimiter must be on its own line, prefixed by optional whitespace.
|
* Multi-line strings must now use `"""` as delimiters. The opening delimiter must be immediately followed by a newline, and the closing delimiter must be on its own line, prefixed by optional whitespace.
|
||||||
* Multi-line strings are now automatically dedented, according to the common
|
* Multi-line strings are now automatically dedented, according to the common
|
||||||
whitespace matching the whitespace prefix of the closing line.
|
whitespace matching the whitespace prefix of the closing line.
|
||||||
* `.1`, `+.1` etc are no longer valid identifiers, to prevent confusion and
|
* `.1`, `+.1` etc are no longer valid identifiers, to prevent confusion and
|
||||||
|
|
@ -112,9 +75,14 @@
|
||||||
* Furthermore, The ordering of slashdashed elements has been restricted such
|
* Furthermore, The ordering of slashdashed elements has been restricted such
|
||||||
that a slashdashed child block cannot go before an entry (including slashdashed
|
that a slashdashed child block cannot go before an entry (including slashdashed
|
||||||
entries).
|
entries).
|
||||||
|
* Optional version marker `/- kdl-version 2` (or `1`) as the first line in a document, optionally preceded by the BOM.
|
||||||
|
|
||||||
### KQL
|
### KQL
|
||||||
|
|
||||||
|
> [!INFO] Note: these are provided for convenience, but as of the 2.0.0 KDL spec release,
|
||||||
|
> KQL itself is not finalized and should be considered a separate specification,
|
||||||
|
> alongside the Schema spec and others.
|
||||||
|
|
||||||
* There's now a _required_ descendant selector (`>>`), instead of using plain
|
* There's now a _required_ descendant selector (`>>`), instead of using plain
|
||||||
spaces for that purpose.
|
spaces for that purpose.
|
||||||
* The "any sibling" selector is now `++` instead of `~`, for consistency with
|
* The "any sibling" selector is now `++` instead of `~`, for consistency with
|
||||||
|
|
@ -123,3 +91,54 @@
|
||||||
* Multi- and single-line comments are now supported, as well as line
|
* Multi- and single-line comments are now supported, as well as line
|
||||||
continuations with `\`.
|
continuations with `\`.
|
||||||
* Map operators have been removed entirely.
|
* Map operators have been removed entirely.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2.0.0 Draft Changelogs
|
||||||
|
|
||||||
|
### 2.0.0-draft.8 (2024-12-14)
|
||||||
|
|
||||||
|
* Some details have been clarified around the treatment of whitespace in
|
||||||
|
multiline strings.
|
||||||
|
* `raw-string` productions have been updated to be explicitly non-greedy and
|
||||||
|
"fallible".
|
||||||
|
* Some tests have been added, others adjusted, some removed, after a cleanup pass.
|
||||||
|
|
||||||
|
|
||||||
|
### 2.0.0-draft.7 (2024-12-10)
|
||||||
|
|
||||||
|
* `node-space` is now allowed as whitespace after a `slashdash`, meaning line
|
||||||
|
continuations will work now.
|
||||||
|
* One or two consecutive double-quotes are now allowed in the bodies of
|
||||||
|
multi-line quoted strings, without needing to be escaped.
|
||||||
|
* Grammar has been fixed to disallow raw strings like `#"""#`, which are now
|
||||||
|
properly treated as invalid multi-line raw strings (instead of the equivalent of
|
||||||
|
`"\""`).
|
||||||
|
* Test suite has been updated to include a `_fail` suffix in all test cases
|
||||||
|
which are expected to fail.
|
||||||
|
* A slew of additional slashdash and multi-line string compliance tests have
|
||||||
|
been added. Have fun. :)
|
||||||
|
* The organization of string types in the spec prose has been updated to a
|
||||||
|
hopefully more helpful structure.
|
||||||
|
|
||||||
|
|
||||||
|
### 2.0.0-draft.6 (2024-12-04)
|
||||||
|
|
||||||
|
* Multiline strings, both Raw and Quoted, must now use `"""` instead of a single `"`. Using `"""` for a single-line string is a syntax error.
|
||||||
|
* Fixed an issue with the `unicode_silly` test case.
|
||||||
|
* Some rewordings and clarification in the spec prose.
|
||||||
|
* Slight grammar tweak where the pre-terminator `node-space*` for `node` and `final-node` have been moved into `base-node`.
|
||||||
|
|
||||||
|
|
||||||
|
### 2.0.0-draft.5 (2024-11-28)
|
||||||
|
|
||||||
|
* Equals signs other than `=` are no longer supported in properties.
|
||||||
|
* 128-bit integer type annotations have been added to the list of "well-known"
|
||||||
|
type annotations.
|
||||||
|
* Multiline string escape rules have been tweaked significantly.
|
||||||
|
* `\s` is now a valid escape within a string, representing a space character.
|
||||||
|
* Slashdash (`/-`)-compatible locations and related grammar adjusted to be more
|
||||||
|
clear and intuitive. This includes some changes relating to whitespace,
|
||||||
|
including comments and newlines, which are breaking changes.
|
||||||
|
* Various updates to test suite to reflect changes.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,22 @@
|
||||||
|
# Contributing
|
||||||
|
|
||||||
|
## Mechanics
|
||||||
|
|
||||||
|
Contributions can be made by creating pull requests.
|
||||||
|
The GitHub interface supports creating pull requests using the Edit (✏) button.
|
||||||
|
|
||||||
|
|
||||||
|
## Building the Specification
|
||||||
|
|
||||||
|
The specification is written in
|
||||||
|
[kramdown-rfc](https://github.com/cabo/kramdown-rfc/wiki/Syntax2), which
|
||||||
|
compiles via [RFCXML](https://authors.ietf.org/rfcxml-vocabulary) to text and
|
||||||
|
HTML.
|
||||||
|
|
||||||
|
You can build the formatted versions or the intermediate RFCXML file using
|
||||||
|
https://author-tools.ietf.org/ or locally by running `make`. To preserve the
|
||||||
|
intermediate RFCXML form in a local build, run `make draft-marchan-kdl2.xml`
|
||||||
|
once.
|
||||||
|
|
||||||
|
Command line usage requires that you have the necessary software installed. See
|
||||||
|
[the instructions](https://github.com/martinthomson/i-d-template/blob/main/doc/SETUP.md).
|
||||||
|
|
@ -98,7 +98,7 @@ The properties and/or children of the node represent the items of the object,
|
||||||
with the property names and child nodenames as each item's key.
|
with the property names and child nodenames as each item's key.
|
||||||
All "keys" in an object node must be unique.
|
All "keys" in an object node must be unique.
|
||||||
|
|
||||||
As with arrays, there are two ambiguous cases that must be manually annoted with the `(object)` type annotation:
|
As with arrays, there are two ambiguous cases that must be manually annotated with the `(object)` type annotation:
|
||||||
|
|
||||||
* An object containing a single item whose key is "-" (like `{"-": 1}`) written using children (like `- { - 1 }`)
|
* An object containing a single item whose key is "-" (like `{"-": 1}`) written using children (like `- { - 1 }`)
|
||||||
would be ambiguous with an array node.
|
would be ambiguous with an array node.
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
LIBDIR := lib
|
||||||
|
include $(LIBDIR)/main.mk
|
||||||
|
|
||||||
|
$(LIBDIR)/main.mk:
|
||||||
|
ifneq (,$(shell grep "path *= *$(LIBDIR)" .gitmodules 2>/dev/null))
|
||||||
|
git submodule sync
|
||||||
|
git submodule update --init
|
||||||
|
else
|
||||||
|
ifneq (,$(wildcard $(ID_TEMPLATE_HOME)))
|
||||||
|
ln -s "$(ID_TEMPLATE_HOME)" $(LIBDIR)
|
||||||
|
else
|
||||||
|
git clone -q --depth 10 -b main \
|
||||||
|
https://github.com/martinthomson/i-d-template $(LIBDIR)
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
@ -110,22 +110,22 @@ Then the following queries are valid:
|
||||||
## Full Grammar
|
## Full Grammar
|
||||||
|
|
||||||
Rules that are not defined in this grammar are prefixed with `$`, see [the KDL
|
Rules that are not defined in this grammar are prefixed with `$`, see [the KDL
|
||||||
grammar](https://github.com/kdl-org/kdl/blob/main/SPEC.md#full-grammar) for
|
grammar](https://kdl.dev/spec/#name-full-grammar) for
|
||||||
what they expand to.
|
what they expand to.
|
||||||
|
|
||||||
```
|
```
|
||||||
query-str := $bom? query
|
query-str := $bom? query
|
||||||
query := selector q-ws* "||" q-ws* query | selector
|
query := selector q-ws+ "||" q-ws+ query | selector
|
||||||
selector := filter q-ws* selector-operator q-ws* selector-subsequent | filter
|
selector := filter q-ws+ selector-operator q-ws+ selector-subsequent | filter
|
||||||
selector-subsequent := matchers q-ws* selector-operator q-ws* selector-subsequent | matchers
|
selector-subsequent := matchers q-ws+ selector-operator q-ws+ selector-subsequent | matchers
|
||||||
selector-operator := ">>" | ">" | "++" | "+"
|
selector-operator := ">>" | ">" | "++" | "+"
|
||||||
filter := "top(" q-ws* ")" | matchers
|
filter := "top(" q-ws* ")" | matchers
|
||||||
matchers := type-matcher $string? accessor-matcher* | $string accessor-matcher* | accessor-matcher+
|
matchers := type-matcher $string? accessor-matcher* | $string accessor-matcher* | accessor-matcher+
|
||||||
type-matcher := "(" q-ws* ")" | $type
|
type-matcher := "(" q-ws* ")" | $type
|
||||||
accessor-matcher := "[" q-ws* (comparison | accessor)? q-ws* "]"
|
accessor-matcher := "[" q-ws* (comparison | accessor)? q-ws* "]"
|
||||||
comparison := accessor q-ws* matcher-operator q-ws* ($type | $string | $number | $keyword)
|
comparison := accessor q-ws+ matcher-operator q-ws+ ($type | $string | $number | $keyword)
|
||||||
accessor := "val(" q-ws* $integer q-ws* ")" | "prop(" q-ws* $string q-ws* ")" | "name(" q-ws* ")" | "tag(" q-ws* ")" | "values(" q-ws* ")" | "props(" q-ws* ")" | $string
|
accessor := "val(" q-ws* $integer q-ws* ")" | "prop(" q-ws* $string q-ws* ")" | "name(" q-ws* ")" | "tag(" q-ws* ")" | "values(" q-ws* ")" | "props(" q-ws* ")" | $string
|
||||||
matcher-operator := "=" | "!=" | ">" | "<" | ">=" | "<=" | "^=" | "$=" | "*="
|
matcher-operator := "=" | "!=" | ">" | "<" | ">=" | "<=" | "^=" | "$=" | "*="
|
||||||
|
|
||||||
q-ws := $plain-node-space
|
q-ws := $node-space
|
||||||
```
|
```
|
||||||
|
|
|
||||||
191
README.md
191
README.md
|
|
@ -1,16 +1,9 @@
|
||||||
# The KDL Document Language
|
# The KDL Document Language
|
||||||
|
|
||||||
> [!WARNING]
|
|
||||||
> The main branch of this repository shows the latest v2.0.0 draft, which is a
|
|
||||||
> work in progress and not considered the "mainline" KDL yet. Most KDL
|
|
||||||
> implementations in the wild are based on the [v1.0.0
|
|
||||||
> spec](https://github.com/kdl-org/kdl/tree/1.0.0) instead, so you may want to
|
|
||||||
> refer to that if you're using KDL today.
|
|
||||||
|
|
||||||
KDL is a small, pleasant document language with XML-like node semantics that
|
KDL is a small, pleasant document language with XML-like node semantics that
|
||||||
looks like you're invoking a bunch of CLI commands! It's meant to be used both
|
looks like you're invoking a bunch of CLI commands! It's meant to be used both
|
||||||
as a serialization format and a configuration language, much like JSON, YAML,
|
as a serialization format and a configuration language, much like JSON, YAML, or
|
||||||
or XML. It looks like this:
|
XML. It looks like this:
|
||||||
|
|
||||||
```kdl
|
```kdl
|
||||||
package {
|
package {
|
||||||
|
|
@ -51,27 +44,37 @@ package {
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
There's a living [specification](SPEC.md), as well as various
|
For more details, see the [overview below](#overview).
|
||||||
|
|
||||||
|
There's a living [specification](https://kdl.dev/spec/), as well as various
|
||||||
[implementations](#implementations). You can also check out the [FAQ](#faq) to
|
[implementations](#implementations). You can also check out the [FAQ](#faq) to
|
||||||
answer all your burning questions!
|
answer all your burning questions!
|
||||||
|
|
||||||
The current version of the KDL spec is `2.0.0-draft.7`.
|
The current version of the KDL spec is
|
||||||
|
[KDL 2.0.0](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html). For legacy KDL,
|
||||||
|
please refer to the [KDL 1.0.0
|
||||||
|
spec](https://github.com/kdl-org/kdl/blob/2.0.0/SPEC_v1.md). All users are
|
||||||
|
encouraged to migrate. [Migration is forward-and-backward-compatible and
|
||||||
|
safe](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html#compatibility), and can
|
||||||
|
be automated.
|
||||||
|
|
||||||
In addition to a spec for KDL itself, there are also standard specs for [a KDL
|
In addition to a spec for KDL itself, there are specifications for [a KDL Query
|
||||||
Query Language](QUERY-SPEC.md) based on CSS selectors, and [a KDL Schema
|
Language](QUERY-SPEC.md) based on CSS selectors, and [a KDL Schema
|
||||||
Language](SCHEMA-SPEC.md) loosely based on JSON Schema.
|
Language](SCHEMA-SPEC.md) loosely based on JSON Schema.
|
||||||
|
|
||||||
The language is based on [SDLang](https://sdlang.org), with a [number of
|
The language is based on [SDLang](https://sdlang.org), with a [number of
|
||||||
modifications and clarifications on its syntax and behavior](#why-not-sdlang).
|
modifications and clarifications on its syntax and behavior](#why-not-sdlang).
|
||||||
|
We are grateful for their work as an inspiration to ours.
|
||||||
|
|
||||||
[Play with it in your browser!](https://kdl-play.danini.dev/)
|
[Play with it in your browser!](https://kdl.dev/play/)
|
||||||
|
|
||||||
## Design and Discussion
|
## Design and Discussion
|
||||||
|
|
||||||
KDL 2.0 design is still in progress. Discussions and questions about the format
|
KDL 2.0.0 has been finalized, and no further changes are expected. For questions
|
||||||
should happen over on the [discussions
|
about KDL and discussions, please see the [discussions
|
||||||
page](https://github.com/kdl-org/kdl/discussions). Feel free to jump in and give
|
page](https://github.com/kdl-org/kdl/discussions). For minor editorial fixes or
|
||||||
us your 2 cents!
|
critical spec errata, please feel free to [file an
|
||||||
|
issue](https://github.com/kdl-org/kdl/issues).
|
||||||
|
|
||||||
## Used By
|
## Used By
|
||||||
|
|
||||||
|
|
@ -83,35 +86,55 @@ of some examples of KDL in the wild (either v1, v2, or both):
|
||||||
* [Niri](https://github.com/YaLTeR/niri) - Scrollable-tiling window manager for Wayland
|
* [Niri](https://github.com/YaLTeR/niri) - Scrollable-tiling window manager for Wayland
|
||||||
* [Bikeshed](https://github.com/speced/bikeshed) ([here](https://github.com/speced/bikeshed-boilerplate/blob/main/boilerplate/doctypes.kdl) and [here](https://github.com/speced/bikeshed-data/blob/main/data/manifest.txt)) - Specification pre-processor used by CSS, C++, WHATWG, various W3C working groups, and others.
|
* [Bikeshed](https://github.com/speced/bikeshed) ([here](https://github.com/speced/bikeshed-boilerplate/blob/main/boilerplate/doctypes.kdl) and [here](https://github.com/speced/bikeshed-data/blob/main/data/manifest.txt)) - Specification pre-processor used by CSS, C++, WHATWG, various W3C working groups, and others.
|
||||||
* [orogene](https://orogene.dev) - Lightning-fast JavaScript package manager
|
* [orogene](https://orogene.dev) - Lightning-fast JavaScript package manager
|
||||||
|
* [Onyx](https://onyxlang.io/) - An efficient, procedural, and pragmatic programming language that compiles to WASM. Used for package manifests.
|
||||||
* [Pop!_OS/System76 Scheduler](https://github.com/pop-os/system76-scheduler) - Scheduling service which optimizes Linux's CPU scheduler and makes it go faster.
|
* [Pop!_OS/System76 Scheduler](https://github.com/pop-os/system76-scheduler) - Scheduling service which optimizes Linux's CPU scheduler and makes it go faster.
|
||||||
* [ImStyle](https://patitotective.github.io/ImStyle/) - ImGui application styling with Nim and KDL
|
* [ImStyle](https://patitotective.github.io/ImStyle/) - ImGui application styling with Nim and KDL
|
||||||
* [fmod-rs](https://github.com/CAD97/fmod-rs) - Rust bindings to FMOD Core and FMOD Studio
|
* [fmod-rs](https://github.com/CAD97/fmod-rs) - Rust bindings to FMOD Core and FMOD Studio
|
||||||
* [mise](https://mise.jdx.dev/) - dev tools, env vars, task runner
|
* [mise](https://mise.jdx.dev/) - dev tools, env vars, task runner
|
||||||
* [Camping](https://github.com/camping/camping) - Ruby web microframework
|
* [Camping](https://github.com/camping/camping) - Ruby web microframework
|
||||||
|
* [Iron Vault](https://ironvault.quest) - VTT (Virtual Tabletop) plugin for Obsidian for the Ironsworn family of games
|
||||||
|
* [Microsoft TypeScript DOM Generator](https://github.com/microsoft/TypeScript-DOM-lib-generator) - Tool for generating DOM-related TypeScript and JavaScript library files
|
||||||
|
* [Ferron](https://ferron.sh/) - A fast, memory-safe web server written in Rust
|
||||||
* You?
|
* You?
|
||||||
|
|
||||||
## Implementations
|
## Implementations
|
||||||
|
|
||||||
* Rust: [kdl-rs](https://github.com/kdl-org/kdl-rs), [knuffel](https://crates.io/crates/knuffel/) (latter includes derive macro), and [kaydle](https://github.com/Lucretiel/kaydle) (serde-based)
|
> [!INFO] There are two major versions of KDL. Different libraries may support one or the
|
||||||
* JavaScript: [kdljs](https://github.com/kdl-org/kdljs), [@virtualstate/kdl](https://github.com/virtualstate/kdl) (query only, JSX based)
|
> other, or even provide a "hybrid" mode where both versions are attempted, since
|
||||||
* Ruby: [kdl-rb](https://github.com/danini-the-panini/kdl-rb)
|
> there's no data ambiguity between v1 and v2 documents.
|
||||||
* Dart: [kdl-dart](https://github.com/danini-the-panini/kdl-dart)
|
|
||||||
* Java: [kdl4j](https://github.com/hkolbeck/kdl4j)
|
| Language | Implementation | v1 | v2 | Notes |
|
||||||
* PHP: [kdl-php](https://github.com/kdl-org/kdl-php)
|
|---|---|---|---|---|
|
||||||
* Python: [kdl-py](https://github.com/tabatkins/kdlpy), [cuddle](https://github.com/djmattyg007/python-cuddle), [ckdl](https://github.com/tjol/ckdl)
|
| C | [ckdl](https://github.com/tjol/ckdl) | ✅ | ✅ | |
|
||||||
* Elixir: [kuddle](https://github.com/IceDragon200/kuddle)
|
| C#/.NET | [Kadlet](https://github.com/oledfish/Kadlet) | ✅ | ✖️ | |
|
||||||
* XSLT: [xml2kdl](https://github.com/Devasta/XML2KDL)
|
| C#/.NET | [KadSharp](https://github.com/AndreyAkinshin/KdlSharp) | ✅ | ✅ | .NET Std: 2.1+, .NET 6+, .NET FW 4.7.2+, Mono, Xamarin |
|
||||||
* Haskell: [Hustle](https://github.com/fuzzypixelz/Hustle)
|
| C++ | [kdlpp](https://github.com/tjol/ckdl) | ✅ | ✅ | part of ckdl, requires C++20 |
|
||||||
* .NET: [Kadlet](https://github.com/oledfish/Kadlet)
|
| Common Lisp | [kdlcl](https://github.com/chee/kdlcl) | ✅ | ✖️ | |
|
||||||
* C: [ckdl](https://github.com/tjol/ckdl)
|
| Crystal | [kdl-cr](https://github.com/danini-the-panini/kdl-cr) | ✅ | ✖️ | |
|
||||||
* C++: [kdlpp](https://github.com/tjol/ckdl) (part of ckdl, requires C++20)
|
| Dart | [kdl-dart](https://github.com/danini-the-panini/kdl-dart) | ✅ | ✅ | |
|
||||||
* OCaml: [ocaml-kdl](https://github.com/Bannerets/ocaml-kdl)
|
| Elixir | [kuddle](https://github.com/IceDragon200/kuddle) | ✅ | ✅ | |
|
||||||
* Nim: [kdl-nim](https://github.com/Patitotective/kdl-nim)
|
| Go | [gokdl](https://github.com/lunjon/gokdl) | ✅ | ✖️ | |
|
||||||
* Common Lisp: [kdlcl](https://github.com/chee/kdlcl)
|
| Go | [kdl-go](https://github.com/sblinch/kdl-go) | ✅ | ✖️ | |
|
||||||
* Go: [gokdl](https://github.com/lunjon/gokdl), [kdl-go](https://github.com/sblinch/kdl-go)
|
| Go | [gokdl2](https://github.com/njreid/gokdl2) | ✅ | ✅ | Friendly errors & arena allocator |
|
||||||
* Swift: [kdl-swift](https://github.com/danini-the-panini/kdl-swift)
|
| Haskell | [Hustle](https://github.com/fuzzypixelz/Hustle) | ✅ | ✖️ | |
|
||||||
* Crystal: [kdl-cr](https://github.com/danini-the-panini/kdl-cr)
|
| Haskell | [kdl-hs](https://github.com/brandonchinn178/kdl-hs) | ✅ | ✅ | Format/comment-preserving parser |
|
||||||
* Lua: [kdlua](https://github.com/danini-the-panini/kdlua)
|
| Java | [kdl4j](https://github.com/kdl-org/kdl4j) | ✅ | ✅ | |
|
||||||
|
| JavaScript | [@bgotink/kdl](https://github.com/bgotink/kdl) | ✅ | ✅ | Format/comment-preserving parser |
|
||||||
|
| JavaScript | [@virtualstate/kdl](https://github.com/virtualstate/kdl) | ✅ | ✖️ | query only, JSX based |
|
||||||
|
| JavaScript | [kdljs](https://github.com/kdl-org/kdljs) | ✅ | ✅ | |
|
||||||
|
| Lua | [kdlua](https://github.com/danini-the-panini/kdlua) | ✅ | ✖️ | |
|
||||||
|
| Nim | [kdl-nim](https://github.com/Patitotective/kdl-nim) | ✅ | ✖️ | |
|
||||||
|
| OCaml | [ocaml-kdl](https://github.com/eilvelia/ocaml-kdl) | ✅ | ✅ | |
|
||||||
|
| PHP | [kdl-php](https://github.com/kdl-org/kdl-php) | ✅ | ✖️ | |
|
||||||
|
| Python | [ckdl](https://github.com/tjol/ckdl) | ✅ | ✅ | |
|
||||||
|
| Python | [cuddle](https://github.com/djmattyg007/python-cuddle) | ✅ | ✖️ | |
|
||||||
|
| Python | [kdl-py](https://github.com/tabatkins/kdlpy) | ✅ | ✅ | |
|
||||||
|
| Ruby | [kdl-rb](https://github.com/danini-the-panini/kdl-rb) | ✅ | ✅ | |
|
||||||
|
| Rust | [kdl-rs](https://github.com/kdl-org/kdl-rs) | ✅ | ✅ | Format/comment-preserving parser |
|
||||||
|
| Rust | [knus](https://crates.io/crates/knus/) | ✅ | ✖️ | Serde-_style_ derive macros (not actual Serde) |
|
||||||
|
| Swift | [kdl-swift](https://github.com/danini-the-panini/kdl-swift) | ✅ | ✖️ | |
|
||||||
|
| XSLT | [xml2kdl](https://github.com/Devasta/XML2KDL) | ✅ | ✖️ | |
|
||||||
|
| Zig | [zig-kdl](https://codeberg.org/desttinghim/zig-kdl) | ✅ | ✅ | Format/comment-preserving parser |
|
||||||
|
|
||||||
## Compatibility Test Suite
|
## Compatibility Test Suite
|
||||||
|
|
||||||
|
|
@ -123,11 +146,15 @@ entirety, but in the future, may be required to in order to be included here.
|
||||||
|
|
||||||
## Editor Support
|
## Editor Support
|
||||||
|
|
||||||
* [VS Code](https://marketplace.visualstudio.com/items?itemName=kdl-org.kdl&ssr=false#review-details)
|
|
||||||
* [Sublime Text](https://packagecontrol.io/packages/KDL)
|
|
||||||
* [vim](https://github.com/imsnif/kdl.vim)
|
|
||||||
* [neovim](https://github.com/nvim-treesitter/nvim-treesitter)
|
|
||||||
* [Intellij IDEA](https://plugins.jetbrains.com/plugin/20136-kdl-document-language)
|
* [Intellij IDEA](https://plugins.jetbrains.com/plugin/20136-kdl-document-language)
|
||||||
|
* [Sublime Text](https://packagecontrol.io/packages/KDL)\*
|
||||||
|
* [TreeSitter](https://github.com/tree-sitter-grammars/tree-sitter-kdl) (neovim, among others)
|
||||||
|
* [VS Code](https://marketplace.visualstudio.com/items?itemName=kdl-org.kdl&ssr=false#review-details)\*
|
||||||
|
* [vim](https://github.com/imsnif/kdl.vim)
|
||||||
|
* [Kate](https://github.com/larsgw/katepart-kdl)\*
|
||||||
|
* [Zed](https://zed.dev/extensions/kdl)
|
||||||
|
|
||||||
|
\* Supports KDL 2.0.0
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
|
|
@ -167,7 +194,7 @@ Nodes without children are terminated by a newline, a semicolon, or the end of
|
||||||
a file stream:
|
a file stream:
|
||||||
|
|
||||||
```kdl
|
```kdl
|
||||||
node1; node2; node3;
|
node1; node2; node3
|
||||||
```
|
```
|
||||||
|
|
||||||
### Values
|
### Values
|
||||||
|
|
@ -175,13 +202,13 @@ node1; node2; node3;
|
||||||
KDL supports 4 data types:
|
KDL supports 4 data types:
|
||||||
|
|
||||||
* Strings: `unquoted`, `"hello world"`, or `#"hello world"#`
|
* Strings: `unquoted`, `"hello world"`, or `#"hello world"#`
|
||||||
* Numbers: `123.45`
|
* Numbers: `123.45`, `0xdeadbeef`, `#inf`, `#-inf`, `#nan`
|
||||||
* Booleans: `#true` and `#false`
|
* Booleans: `#true` and `#false`
|
||||||
* Null: `#null`
|
* Null: `#null`
|
||||||
|
|
||||||
#### Strings
|
#### Strings
|
||||||
|
|
||||||
It supports three different formats for string input: identifiers, quoted, and raw.
|
It supports three different formats for string input: unquoted, quoted, and raw.
|
||||||
|
|
||||||
```kdl
|
```kdl
|
||||||
node1 this-is-a-string
|
node1 this-is-a-string
|
||||||
|
|
@ -199,27 +226,28 @@ You don't have to quote strings unless any the following apply:
|
||||||
In essence, if it can get confused for other KDL or KQL syntax, it needs
|
In essence, if it can get confused for other KDL or KQL syntax, it needs
|
||||||
quotes.
|
quotes.
|
||||||
|
|
||||||
Both types of quoted string can be multiline as-is, without a different
|
Both types of quoted string can be written across multiple lines by using triple
|
||||||
syntax. Additionally, common indentation shared with the line containing the
|
quotes (`"""`) followed immediately by a newline. Additionally, common
|
||||||
closing quote will be stripped/dedented:
|
indentation shared with the line containing the closing quotes will be
|
||||||
|
stripped/dedented:
|
||||||
|
|
||||||
```kdl
|
```kdl
|
||||||
string "
|
string """
|
||||||
my
|
my
|
||||||
multiline
|
multiline
|
||||||
value
|
value
|
||||||
"
|
"""
|
||||||
```
|
```
|
||||||
|
|
||||||
Raw strings, which do not support `\` escapes and can be used when you want
|
Raw strings, which do not support `\` escapes and can be used when you want
|
||||||
certain kinds of strings to look nicer without having to escape a lot:
|
certain kinds of strings to look nicer without having to escape a lot:
|
||||||
|
|
||||||
```kdl
|
```kdl
|
||||||
exec #"
|
exec #"""
|
||||||
echo "foo"
|
echo "foo"
|
||||||
echo "bar"
|
echo "bar"
|
||||||
cd C:\path\to\dir
|
cd C:\path\to\dir
|
||||||
"#
|
"""#
|
||||||
|
|
||||||
regex #"\d{3} "[^/"]+""#
|
regex #"\d{3} "[^/"]+""#
|
||||||
```
|
```
|
||||||
|
|
@ -233,10 +261,10 @@ other-raw ##"hello#"world"##
|
||||||
|
|
||||||
#### Numbers
|
#### Numbers
|
||||||
|
|
||||||
There are 4 ways to represent numbers in KDL. KDL does not prescribe any
|
There are 4 ways to represent numbers in KDL, plus 3 float keywords. KDL does
|
||||||
representation for these numbers, and it's entirely up to individual
|
not prescribe any representation for these numbers, and it's entirely up to
|
||||||
implementations whether to represent all numbers with a single type, or to
|
individual implementations whether to represent all numbers with a single type,
|
||||||
have different representations for different forms.
|
or to have different representations for different forms.
|
||||||
|
|
||||||
KDL has regular decimal-radix numbers, with optional decimal part, as well as
|
KDL has regular decimal-radix numbers, with optional decimal part, as well as
|
||||||
an optional exponent.
|
an optional exponent.
|
||||||
|
|
@ -254,6 +282,13 @@ my-octal 0o755
|
||||||
my-binary 0b10101101
|
my-binary 0b10101101
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If you're intending to represent IEEE 754 floats, there are three special
|
||||||
|
keywords you can use:
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
special-floats #inf #-inf #nan
|
||||||
|
```
|
||||||
|
|
||||||
Finally, all numbers can have underscores to help readability:
|
Finally, all numbers can have underscores to help readability:
|
||||||
|
|
||||||
```kdl
|
```kdl
|
||||||
|
|
@ -280,7 +315,7 @@ hello
|
||||||
```
|
```
|
||||||
|
|
||||||
On top of that, KDL supports `/-` "slashdash" comments, which can be used to
|
On top of that, KDL supports `/-` "slashdash" comments, which can be used to
|
||||||
comment out individual nodes, arguments, or child blocks:
|
comment out individual nodes, entries, or child blocks:
|
||||||
|
|
||||||
```kdl
|
```kdl
|
||||||
// This entire node and its children are all commented out.
|
// This entire node and its children are all commented out.
|
||||||
|
|
@ -325,12 +360,12 @@ smile 😁
|
||||||
|
|
||||||
// Node names and property keys are just strings, so you can write them like
|
// Node names and property keys are just strings, so you can write them like
|
||||||
// quoted or raw strings, too!
|
// quoted or raw strings, too!
|
||||||
"illegal{}[]/\\=#;identifier" #"1.2.3"# "#false"=#true
|
"illegal(){}[]/\\=#;identifier" #"1.2.3"# "#false"=#true
|
||||||
|
|
||||||
// Identifiers are very flexible. The following is a legal bare identifier:
|
// Identifiers are very flexible. The following is a legal bare identifier:
|
||||||
<@foo123~!$%^&*.:'|?+>
|
-<123~!$@%^&*,.:'`|?+>
|
||||||
|
|
||||||
// And you can also use unicode!
|
// And you can also use non-ASCII unicode!
|
||||||
ノード お名前=ฅ^•ﻌ•^ฅ
|
ノード お名前=ฅ^•ﻌ•^ฅ
|
||||||
|
|
||||||
// kdl specifically allows properties and values to be
|
// kdl specifically allows properties and values to be
|
||||||
|
|
@ -340,9 +375,9 @@ foo bar=#true baz quux=#false 1 2 3
|
||||||
|
|
||||||
## Design Principles
|
## Design Principles
|
||||||
|
|
||||||
1. Maintainability
|
1. Human Maintainability
|
||||||
1. Flexibility
|
1. Flexibility
|
||||||
1. Cognitive simplicity and Learnability
|
1. Cognitive Simplicity and Learnability
|
||||||
1. Ease of de/serialization
|
1. Ease of de/serialization
|
||||||
1. Ease of implementation
|
1. Ease of implementation
|
||||||
|
|
||||||
|
|
@ -374,27 +409,36 @@ SDLang is an excellent base, but I wanted some details ironed out, and some
|
||||||
things removed that only really made sense for SDLang's current use-cases, including
|
things removed that only really made sense for SDLang's current use-cases, including
|
||||||
some restrictions about data representation. KDL is very similar in many ways, except:
|
some restrictions about data representation. KDL is very similar in many ways, except:
|
||||||
|
|
||||||
* The grammar and expected semantics are [well-defined and specified](SPEC.md).
|
* The grammar and expected semantics are [well-defined and specified](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html).
|
||||||
|
This was the original impetus for working on KDL, followed by details that
|
||||||
|
seemed like they could be improved.
|
||||||
* There is only one "number" type. KDL does not prescribe representations, but
|
* There is only one "number" type. KDL does not prescribe representations, but
|
||||||
does have keywords for NaN, infinity, and negative infinity if decimal numbers
|
does have keywords for NaN, infinity, and negative infinity if decimal numbers
|
||||||
are intended to be represtented as IEEE754 floats.
|
are intended to be represented as IEEE754 floats.
|
||||||
* Slashdash (`/-`) comments are great and useful!
|
* Slashdash (`/-`) comments are great and useful!
|
||||||
* Quoteless "identifier" strings are supported. (e.g. `node foo=bar`, vs `node foo="bar"`)
|
* Quoteless "identifier" strings (e.g. `node foo=bar`, vs `node foo="bar"`).
|
||||||
* KDL does not have first-class date or binary data types. Instead, it
|
* KDL does not have first-class date or binary data types. Instead, it
|
||||||
supports arbitrary type annotations for any custom data type you might need:
|
supports arbitrary type annotations for any custom data type you might need:
|
||||||
`(date)"2021-02-03"`, `(binary)"deadbeefbadc0ffee"`.
|
`(date)"2021-02-03"`, `(binary)"deadbeefbadc0ffee"`.
|
||||||
* Values and properties can be interspersed with each other, rather than one
|
* Values and properties can be interspersed with each other, rather than one
|
||||||
having to follow the other.
|
having to follow the other. It was not clear whether this was actually allowed in SDLang.
|
||||||
* All strings in KDL are multi-line, and multi-line strings are automatically dedented to match their closing quote's indentation level.
|
* Multi-line strings are supported using `"""<newline>` and their lines are automatically
|
||||||
* Raw strings are written with `#` (`#"foo\bar"#`), instead of backticks.
|
"dedented" to match their closing quotes' indentation level.
|
||||||
* KDL identifiers can use UTF-8 and are more lax about symbols than SDLang.
|
* Raw strings are written with `#` (`#"foo\bar"#`), instead of backticks. This,
|
||||||
* KDL does not support "anonymous" nodes.
|
while more verbose, allows embedding of languages, especially scripting
|
||||||
|
languages, that use this syntax on a regular basis, without additional escaping
|
||||||
|
(e.g. bash and JavaScript).
|
||||||
|
* KDL identifiers can use a wide range of UTF-8 and are much more lax about
|
||||||
|
valid characters than SDLang.
|
||||||
|
* KDL does not support "anonymous" nodes. Instead, any string can be used as a
|
||||||
|
node name. For lists of arbitrary values, there is a convention of naming the nodes
|
||||||
|
simply `-`.
|
||||||
* Namespaces are not supported, but `:` is a legal identifier character, and applications
|
* Namespaces are not supported, but `:` is a legal identifier character, and applications
|
||||||
can choose to implement namespaces as they see fit.
|
can choose to implement namespaces as they see fit.
|
||||||
* KDL supports arbitrary identifiers for node names and attribute
|
* KDL supports arbitrary identifiers for node names and attribute
|
||||||
names, meaning you can use arbitrary strings for those: `"123" "value"=1` is
|
names, meaning you can use arbitrary strings for those: `"123" "value"=1` is
|
||||||
a valid node, for example. This makes it easier to use KDL for
|
a valid node, for example. This makes it easier to use KDL for
|
||||||
representing arbitrary key/value pairs.
|
representing arbitrary key/value pairs using child nodes.
|
||||||
|
|
||||||
#### Have you seen that one XKCD comic about standards?
|
#### Have you seen that one XKCD comic about standards?
|
||||||
|
|
||||||
|
|
@ -432,7 +476,10 @@ microsyntax for losslessly encoding JSON](JSON-IN-KDL.md).
|
||||||
|
|
||||||
#### What about TOML?
|
#### What about TOML?
|
||||||
|
|
||||||
It nests very poorly. It doesn't fare well with large files.
|
It nests very poorly. It doesn't fare well with large files. Also, I felt some
|
||||||
|
discomfort [continuing to use and promote something by its
|
||||||
|
creator](https://en.wikipedia.org/wiki/Tom_Preston-Werner#Resignation_from_GitHub).
|
||||||
|
|
||||||
|
|
||||||
#### What about XML?
|
#### What about XML?
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -268,7 +268,7 @@ and property names when the `node-names` or `prop-names` options are activated.
|
||||||
|
|
||||||
* `tag`: [Validations](#validation-nodes) to apply to the tag of the value.
|
* `tag`: [Validations](#validation-nodes) to apply to the tag of the value.
|
||||||
* `type`: A string denoting the type of the property value.
|
* `type`: A string denoting the type of the property value.
|
||||||
* `enum`: A specific list of allowed values for this property. May be heterogenous as long as it agrees with the `type`, if specified.
|
* `enum`: A specific list of allowed values for this property. May be heterogeneous as long as it agrees with the `type`, if specified.
|
||||||
|
|
||||||
#### String validations
|
#### String validations
|
||||||
|
|
||||||
|
|
@ -287,7 +287,7 @@ and property names when the `node-names` or `prop-names` options are activated.
|
||||||
* `country-subdivision`: ISO 3166-2 country subdivision code.
|
* `country-subdivision`: ISO 3166-2 country subdivision code.
|
||||||
* `email`: RFC5302 email address.
|
* `email`: RFC5302 email address.
|
||||||
* `idn-email`: RFC6531 internationalized email address.
|
* `idn-email`: RFC6531 internationalized email address.
|
||||||
* `hostname`: RFC1132 internet hostname.
|
* `hostname`: RFC1123 internet hostname.
|
||||||
* `idn-hostname`: RFC5890 internationalized internet hostname.
|
* `idn-hostname`: RFC5890 internationalized internet hostname.
|
||||||
* `ipv4`: RFC2673 dotted-quad IPv4 address.
|
* `ipv4`: RFC2673 dotted-quad IPv4 address.
|
||||||
* `ipv6`: RFC2373 IPv6 address.
|
* `ipv6`: RFC2373 IPv6 address.
|
||||||
|
|
@ -313,10 +313,12 @@ and property names when the `node-names` or `prop-names` options are activated.
|
||||||
* `i16`: 16-bit signed integer
|
* `i16`: 16-bit signed integer
|
||||||
* `i32`: 32-bit signed integer
|
* `i32`: 32-bit signed integer
|
||||||
* `i64`: 64-bit signed integer
|
* `i64`: 64-bit signed integer
|
||||||
|
* `i128`: 128-bit signed integer
|
||||||
* `u8`: 8-bit unsigned integer
|
* `u8`: 8-bit unsigned integer
|
||||||
* `u16`: 16-bit unsigned integer
|
* `u16`: 16-bit unsigned integer
|
||||||
* `u32`: 32-bit unsigned integer
|
* `u32`: 32-bit unsigned integer
|
||||||
* `u64`: 64-bit unsigned integer
|
* `u64`: 64-bit unsigned integer
|
||||||
|
* `u128`: 128-bit unsigned integer
|
||||||
* `isize`: Platform-dependent signed integer
|
* `isize`: Platform-dependent signed integer
|
||||||
* `usize`: Platform-dependent unsigned integer
|
* `usize`: Platform-dependent unsigned integer
|
||||||
* `f32`: IEEE 754 single (32-bit) precision floating point number
|
* `f32`: IEEE 754 single (32-bit) precision floating point number
|
||||||
|
|
|
||||||
944
SPEC.md
944
SPEC.md
|
|
@ -1,943 +1 @@
|
||||||
# KDL Spec
|
The v2 specification has been moved [here](draft-marchan-kdl2.md).
|
||||||
|
|
||||||
This is the semi-formal specification for KDL, including the intended data
|
|
||||||
model and the grammar.
|
|
||||||
|
|
||||||
This document describes KDL version `2.0.0-draft.7`. It was released on
|
|
||||||
2024-12-10.
|
|
||||||
|
|
||||||
## Compatibility
|
|
||||||
|
|
||||||
KDL v2 is designed such that for any given KDL document written as [KDL
|
|
||||||
1.0](./SPEC_v1.md) or KDL 2.0, the parse will either fail completely, or, if the
|
|
||||||
parse succeeds, the data represented by a v1 or v2 parser will be identical.
|
|
||||||
This means that it's safe to use a fallback parsing strategy in order to support
|
|
||||||
both v1 and v2 simultaneously. For example, `node "foo"` is a valid node in both
|
|
||||||
versions, and should be represented identically by parsers.
|
|
||||||
|
|
||||||
## Introduction
|
|
||||||
|
|
||||||
KDL is a node-oriented document language. Its niche and purpose overlaps with
|
|
||||||
XML, and as do many of its semantics. You can use KDL both as a configuration
|
|
||||||
language, and a data exchange or storage format, if you so choose.
|
|
||||||
|
|
||||||
The bulk of this document is dedicated to a long-form description of all
|
|
||||||
[Components](#components) of a KDL document. There is also a much more terse
|
|
||||||
[Grammar](#full-grammar) at the end of the document that covers most of the
|
|
||||||
rules, with some semantic exceptions involving the data model.
|
|
||||||
|
|
||||||
KDL is designed to be easy to read _and_ easy to implement.
|
|
||||||
|
|
||||||
In this document, references to "left" or "right" refer to directions in the
|
|
||||||
*data stream* towards the beginning or end, respectively; in other words,
|
|
||||||
the directions if the data stream were only ASCII text. They do not refer
|
|
||||||
to the writing direction of text, which can flow in either direction,
|
|
||||||
depending on the characters used.
|
|
||||||
|
|
||||||
## Components
|
|
||||||
|
|
||||||
### Document
|
|
||||||
|
|
||||||
The toplevel concept of KDL is a Document. A Document is composed of zero or
|
|
||||||
more [Nodes](#node), separated by newlines and whitespace, and eventually
|
|
||||||
terminated by an EOF.
|
|
||||||
|
|
||||||
All KDL documents should be UTF-8 encoded and conform to the specifications in
|
|
||||||
this document.
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
The following is a document composed of two toplevel nodes:
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
foo {
|
|
||||||
bar
|
|
||||||
}
|
|
||||||
baz
|
|
||||||
```
|
|
||||||
|
|
||||||
### Node
|
|
||||||
|
|
||||||
Being a node-oriented language means that the real core component of any KDL
|
|
||||||
document is the "node". Every node must have a name, which must be a
|
|
||||||
[String](#string).
|
|
||||||
|
|
||||||
The name may be preceded by a [Type Annotation](#type-annotation) to further
|
|
||||||
clarify its type, particularly in relation to its parent node. (For example,
|
|
||||||
clarifying that a particular `date` child node is for the _publication_ date,
|
|
||||||
rather than the last-modified date, with `(published)date`.)
|
|
||||||
|
|
||||||
Following the name are zero or more [Arguments](#argument) or
|
|
||||||
[Properties](#property), separated by either [whitespace](#whitespace) or [a
|
|
||||||
slash-escaped line continuation](#line-continuation). Arguments and Properties
|
|
||||||
may be interspersed in any order, much like is common with positional arguments
|
|
||||||
vs options in command line tools. Collectively, Arguments and Properties may be
|
|
||||||
referred to as "Entries".
|
|
||||||
|
|
||||||
[Children](#children-block) can be placed after the name and the optional
|
|
||||||
Entries, possibly separated by either whitespace or a
|
|
||||||
slash-escaped line continuation.
|
|
||||||
|
|
||||||
Arguments are ordered relative to each other and that order must be preserved in
|
|
||||||
order to maintain the semantics. Properties between Arguments do not affect
|
|
||||||
Argument ordering.
|
|
||||||
|
|
||||||
By contrast, Properties _SHOULD NOT_ be assumed to be presented in a given
|
|
||||||
order. [Children](#children-block) should be used if an order-sensitive
|
|
||||||
key/value data structure must be represented in KDL. Cf. JSON objects
|
|
||||||
preserving key order.
|
|
||||||
|
|
||||||
Nodes _MAY_ be prefixed with [Slashdash](#slashdash-comments) to "comment out"
|
|
||||||
the entire node, including its properties, arguments, and children, and make
|
|
||||||
it act as plain whitespace, even if it spreads across multiple lines.
|
|
||||||
|
|
||||||
Finally, a node is terminated by either a [Newline](#newline), a semicolon
|
|
||||||
(`;`), the end of a child block (`}`) or the end of the file/stream (an `EOF`).
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
// `foo` will have an Argument value list like `[1, 3]`.
|
|
||||||
foo 1 key=val 3 {
|
|
||||||
bar
|
|
||||||
(role)baz 1 2
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Line Continuation
|
|
||||||
|
|
||||||
Line continuations allow [Nodes](#node) to be spread across multiple lines.
|
|
||||||
|
|
||||||
A line continuation is a `\` character followed by zero or more whitespace
|
|
||||||
items (including multiline comments) and an optional single-line comment. It
|
|
||||||
must be terminated by a [Newline](#newline) (including the Newline that is
|
|
||||||
part of single-line comments).
|
|
||||||
|
|
||||||
Following a line continuation, processing of a Node can continue as usual.
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
my-node 1 2 \ // comments are ok after \
|
|
||||||
3 4 // This is the actual end of the Node.
|
|
||||||
```
|
|
||||||
|
|
||||||
### Property
|
|
||||||
|
|
||||||
A Property is a key/value pair attached to a [Node](#node). A Property is
|
|
||||||
composed of a [String](#string), followed immediately by an equals sign (`=`, `U+003D`),
|
|
||||||
and then a [Value](#value).
|
|
||||||
|
|
||||||
Properties should be interpreted left-to-right, with rightmost properties with
|
|
||||||
identical names overriding earlier properties. That is:
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
node a=1 a=2
|
|
||||||
```
|
|
||||||
|
|
||||||
In this example, the node's `a` value must be `2`, not `1`.
|
|
||||||
|
|
||||||
No other guarantees about order should be expected by implementers.
|
|
||||||
Deserialized representations may iterate over properties in any order and
|
|
||||||
still be spec-compliant.
|
|
||||||
|
|
||||||
Properties _MAY_ be prefixed with `/-` to "comment out" the entire token and
|
|
||||||
make it act as plain whitespace, even if it spreads across multiple lines.
|
|
||||||
|
|
||||||
### Argument
|
|
||||||
|
|
||||||
An Argument is a bare [Value](#value) attached to a [Node](#node), with no
|
|
||||||
associated key. It shares the same space as [Properties](#properties), and may be interleaved with them.
|
|
||||||
|
|
||||||
A Node may have any number of Arguments, which should be evaluated left to
|
|
||||||
right. KDL implementations _MUST_ preserve the order of Arguments relative to
|
|
||||||
each other (not counting Properties).
|
|
||||||
|
|
||||||
Arguments _MAY_ be prefixed with `/-` to "comment out" the entire token and
|
|
||||||
make it act as plain whitespace, even if it spreads across multiple lines.
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
my-node 1 2 3 a b c
|
|
||||||
```
|
|
||||||
|
|
||||||
### Children Block
|
|
||||||
|
|
||||||
A children block is a block of [Nodes](#node), surrounded by `{` and `}`. They
|
|
||||||
are an optional part of nodes, and create a hierarchy of KDL nodes.
|
|
||||||
|
|
||||||
Regular node termination rules apply, which means multiple nodes can be
|
|
||||||
included in a single-line children block, as long as they're all terminated by
|
|
||||||
`;`.
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
parent {
|
|
||||||
child1
|
|
||||||
child2
|
|
||||||
}
|
|
||||||
|
|
||||||
parent { child1; child2; }
|
|
||||||
```
|
|
||||||
|
|
||||||
### Value
|
|
||||||
|
|
||||||
A value is either: a [String](#string), a [Number](#number), a
|
|
||||||
[Boolean](#boolean), or [Null](#null).
|
|
||||||
|
|
||||||
Values _MUST_ be either [Arguments](#argument) or values of
|
|
||||||
[Properties](#property). Only [String](#string) values may be used as
|
|
||||||
[Node](#node) names or [Property](#property) keys.
|
|
||||||
|
|
||||||
Values (both as arguments and in properties) _MAY_ be prefixed by a single
|
|
||||||
[Type Annotation](#type-annotation).
|
|
||||||
|
|
||||||
### Type Annotation
|
|
||||||
|
|
||||||
A type annotation is a prefix to any [Node Name](#node) or [Value](#value) that
|
|
||||||
includes a _suggestion_ of what type the value is _intended_ to be treated as,
|
|
||||||
or as a _context-specific elaboration_ of the more generic type the node name
|
|
||||||
indicates.
|
|
||||||
|
|
||||||
Type annotations are written as a set of `(` and `)` with a single
|
|
||||||
[String](#string) in it. It may contain Whitespace after the `(` and before
|
|
||||||
the `)`, and may be separated from its target by Whitespace.
|
|
||||||
|
|
||||||
KDL does not specify any restrictions on what implementations might do with
|
|
||||||
these annotations. They are free to ignore them, or use them to make decisions
|
|
||||||
about how to interpret a value.
|
|
||||||
|
|
||||||
Additionally, the following type annotations MAY be recognized by KDL parsers
|
|
||||||
and, if used, SHOULD interpret these types as follows:
|
|
||||||
|
|
||||||
#### Reserved Type Annotations for Numbers Without Decimals:
|
|
||||||
|
|
||||||
Signed integers of various sizes (the number is the bit size):
|
|
||||||
|
|
||||||
* `i8`
|
|
||||||
* `i16`
|
|
||||||
* `i32`
|
|
||||||
* `i64`
|
|
||||||
* `i128`
|
|
||||||
|
|
||||||
Unsigned integers of various sizes (the number is the bit size):
|
|
||||||
|
|
||||||
* `u8`
|
|
||||||
* `u16`
|
|
||||||
* `u32`
|
|
||||||
* `u64`
|
|
||||||
* `u128`
|
|
||||||
|
|
||||||
Platform-dependent integer types, both signed and unsigned:
|
|
||||||
|
|
||||||
* `isize`
|
|
||||||
* `usize`
|
|
||||||
|
|
||||||
#### Reserved Type Annotations for Numbers With Decimals:
|
|
||||||
|
|
||||||
IEEE 754 floating point numbers, both single (32) and double (64) precision:
|
|
||||||
|
|
||||||
* `f32`
|
|
||||||
* `f64`
|
|
||||||
|
|
||||||
IEEE 754-2008 decimal floating point numbers
|
|
||||||
|
|
||||||
* `decimal64`
|
|
||||||
* `decimal128`
|
|
||||||
|
|
||||||
#### Reserved Type Annotations for Strings:
|
|
||||||
|
|
||||||
* `date-time`: ISO8601 date/time format.
|
|
||||||
* `time`: "Time" section of ISO8601.
|
|
||||||
* `date`: "Date" section of ISO8601.
|
|
||||||
* `duration`: ISO8601 duration format.
|
|
||||||
* `decimal`: IEEE 754-2008 decimal string format.
|
|
||||||
* `currency`: ISO 4217 currency code.
|
|
||||||
* `country-2`: ISO 3166-1 alpha-2 country code.
|
|
||||||
* `country-3`: ISO 3166-1 alpha-3 country code.
|
|
||||||
* `country-subdivision`: ISO 3166-2 country subdivision code.
|
|
||||||
* `email`: RFC5322 email address.
|
|
||||||
* `idn-email`: RFC6531 internationalized email address.
|
|
||||||
* `hostname`: RFC1132 internet hostname (only ASCII segments)
|
|
||||||
* `idn-hostname`: RFC5890 internationalized internet hostname (only `xn--`-prefixed ASCII "punycode" segments, or non-ASCII segments)
|
|
||||||
* `ipv4`: RFC2673 dotted-quad IPv4 address.
|
|
||||||
* `ipv6`: RFC2373 IPv6 address.
|
|
||||||
* `url`: RFC3986 URI.
|
|
||||||
* `url-reference`: RFC3986 URI Reference.
|
|
||||||
* `irl`: RFC3987 Internationalized Resource Identifier.
|
|
||||||
* `irl-reference`: RFC3987 Internationalized Resource Identifier Reference.
|
|
||||||
* `url-template`: RFC6570 URI Template.
|
|
||||||
* `uuid`: RFC4122 UUID.
|
|
||||||
* `regex`: Regular expression. Specific patterns may be implementation-dependent.
|
|
||||||
* `base64`: A Base64-encoded string, denoting arbitrary binary data.
|
|
||||||
|
|
||||||
#### Examples
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
node (u8)123
|
|
||||||
node prop=(regex).*
|
|
||||||
(published)date "1970-01-01"
|
|
||||||
(contributor)person name="Foo McBar"
|
|
||||||
```
|
|
||||||
|
|
||||||
### String
|
|
||||||
|
|
||||||
Strings in KDL represent textual UTF-8 [Values](#value). A String is either an
|
|
||||||
[Identifier String](#identifier-string) (like `foo`), a
|
|
||||||
[Quoted String](#quoted-string) (like `"foo"`)
|
|
||||||
or a [Multi-Line String](#multi-line-string).
|
|
||||||
Both Quoted and Multiline strings come in normal
|
|
||||||
and [Raw String](#raw-string) variants (like `#"foo"#`):
|
|
||||||
|
|
||||||
* Identifier Strings let you write short, "single-word" strings with a
|
|
||||||
minimum of syntax
|
|
||||||
* Quoted Strings let you write strings "like normal", with whitespace and escapes.
|
|
||||||
* Multi-Line Strings let you write strings across multiple lines
|
|
||||||
and with indentation that's not part of the string value.
|
|
||||||
* Raw Strings don't allow any escapes,
|
|
||||||
allowing you to not worry about the string's content containing anything that
|
|
||||||
might look like an escape.
|
|
||||||
|
|
||||||
Strings _MUST_ be represented as UTF-8 values.
|
|
||||||
|
|
||||||
Strings _MUST NOT_ include the code points for
|
|
||||||
[disallowed literal code points](#disallowed-literal-code-points) directly.
|
|
||||||
Quoted and Multi-Line Strings may include these code points as _values_
|
|
||||||
by representing them with their corresponding `\u{...}` escape.
|
|
||||||
|
|
||||||
### Identifier String
|
|
||||||
|
|
||||||
An Identifier String (sometimes referred to as just an "identifier") is
|
|
||||||
composed of any [Unicode Scalar
|
|
||||||
Value](https://unicode.org/glossary/#unicode_scalar_value) other than
|
|
||||||
[non-initial characters](#non-initial-characters), followed by any number of
|
|
||||||
Unicode Scalar Values other than [non-identifier
|
|
||||||
characters](#non-identifier-characters).
|
|
||||||
|
|
||||||
A handful of patterns are disallowed, to avoid confusion with other values:
|
|
||||||
|
|
||||||
* idents that appear to start with a [Number](#number) (like `1.0v2` or
|
|
||||||
`-1em`) or the "almost a number" pattern of a decimal point without a
|
|
||||||
leading digit (like `.1`).
|
|
||||||
* idents that are the language keywords (`inf`, `-inf`, `nan`, `true`,
|
|
||||||
`false`, and `null`) without their leading `#`.
|
|
||||||
|
|
||||||
Identifiers that match these patterns _MUST_ be treated as a syntax error; such
|
|
||||||
values can only be written as quoted or raw strings. The precise details of the
|
|
||||||
identifier syntax is specified in the [Full Grammar](#full-grammar) below.
|
|
||||||
|
|
||||||
Identifier Strings are terminated by [Whitespace](#whitespace) or
|
|
||||||
[Newlines](#newline).
|
|
||||||
|
|
||||||
#### Non-initial characters
|
|
||||||
|
|
||||||
The following characters cannot be the first character in an
|
|
||||||
[Identifier String](#identifier-string):
|
|
||||||
|
|
||||||
* Any decimal digit (0-9)
|
|
||||||
* Any [non-identifier characters](#non-identifier-characters)
|
|
||||||
|
|
||||||
Additionally, the `-` character can only be used as an initial character if
|
|
||||||
the second character is *not* a digit. This allows identifiers to look like
|
|
||||||
`--this`, and removes the ambiguity of having an identifier look like a
|
|
||||||
negative number.
|
|
||||||
|
|
||||||
#### Non-identifier characters
|
|
||||||
|
|
||||||
The following characters cannot be used anywhere in a [Identifier String](#identifier-string):
|
|
||||||
|
|
||||||
* Any of `(){}[]/\"#;=`
|
|
||||||
* Any [Whitespace](#whitespace) or [Newline](#newline).
|
|
||||||
* Any [disallowed literal code points](#disallowed-literal-code-points) in KDL
|
|
||||||
documents.
|
|
||||||
|
|
||||||
### Quoted String
|
|
||||||
|
|
||||||
A Quoted String is delimited by `"` on either side of any number of literal
|
|
||||||
string characters except unescaped `"` and `\`.
|
|
||||||
|
|
||||||
Literal [Newline](#newline) characters can only be included
|
|
||||||
if they are [Escaped Whitespace](#escaped-whitespace),
|
|
||||||
which discards them from the string value.
|
|
||||||
Actually including a newline in the value requires using a newline escape sequence,
|
|
||||||
like `\n`,
|
|
||||||
or using a [Multi-Line String](#multi-line-string)
|
|
||||||
which is actually designed for strings stretching across multiple lines.
|
|
||||||
|
|
||||||
Like Identifier Strings, Quoted Strings _MUST NOT_ include any of the
|
|
||||||
[disallowed literal code-points](#disallowed-literal-code-points) as code
|
|
||||||
points in their body.
|
|
||||||
|
|
||||||
Quoted Strings have a [Raw String](#raw-string) variant,
|
|
||||||
which disallows escapes.
|
|
||||||
|
|
||||||
#### Escapes
|
|
||||||
|
|
||||||
In addition to literal code points, a number of "escapes" are supported in Quoted Strings.
|
|
||||||
"Escapes" are the character `\` followed by another character, and are
|
|
||||||
interpreted as described in the following table:
|
|
||||||
|
|
||||||
| Name | Escape | Code Pt |
|
|
||||||
|-------------------------------|--------|----------|
|
|
||||||
| Line Feed | `\n` | `U+000A` |
|
|
||||||
| Carriage Return | `\r` | `U+000D` |
|
|
||||||
| Character Tabulation (Tab) | `\t` | `U+0009` |
|
|
||||||
| Reverse Solidus (Backslash) | `\\` | `U+005C` |
|
|
||||||
| Quotation Mark (Double Quote) | `\"` | `U+0022` |
|
|
||||||
| Backspace | `\b` | `U+0008` |
|
|
||||||
| Form Feed | `\f` | `U+000C` |
|
|
||||||
| Space | `\s` | `U+0020` |
|
|
||||||
| Unicode Escape | `\u{(1-6 hex chars)}` | Code point described by hex characters, as long as it represents a [Unicode Scalar Value](https://unicode.org/glossary/#unicode_scalar_value) |
|
|
||||||
| Whitespace Escape | See below | N/A |
|
|
||||||
|
|
||||||
##### Escaped Whitespace
|
|
||||||
|
|
||||||
In addition to escaping individual characters, `\` can also escape whitespace.
|
|
||||||
When a `\` is followed by one or more literal whitespace characters, the `\`
|
|
||||||
and all of that whitespace are discarded. For example, `"Hello World"` and
|
|
||||||
`"Hello \ World"` are semantically identical. See [whitespace](#whitespace)
|
|
||||||
and [newlines](#newline) for how whitespace is defined.
|
|
||||||
|
|
||||||
Note that only literal whitespace is escaped; whitespace escapes (`\n` and
|
|
||||||
such) are retained. For example, these strings are all semantically identical:
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
"Hello\ \nWorld"
|
|
||||||
|
|
||||||
"Hello\n\
|
|
||||||
World"
|
|
||||||
|
|
||||||
"Hello\nWorld"
|
|
||||||
|
|
||||||
"""
|
|
||||||
Hello
|
|
||||||
World
|
|
||||||
"""
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Invalid escapes
|
|
||||||
|
|
||||||
Except as described in the escapes table, above, `\` *MUST NOT* precede any
|
|
||||||
other characters in a string.
|
|
||||||
|
|
||||||
|
|
||||||
### Multi-line String
|
|
||||||
|
|
||||||
Multi-Line Strings support multiple lines with literal, non-escaped
|
|
||||||
Newlines. They must use a special multi-line syntax, and they automatically
|
|
||||||
"dedent" the string, allowing its value to be indented to a visually matching
|
|
||||||
level as desired.
|
|
||||||
|
|
||||||
A Multi-Line String is opened and closed by *three* double-quote characters,
|
|
||||||
like `"""`.
|
|
||||||
Its first line _MUST_ immediately start with a [Newline](#newline)
|
|
||||||
after its opening `"""`.
|
|
||||||
Its final line _MUST_ contain only whitespace
|
|
||||||
before the closing `"""`.
|
|
||||||
All in-between lines that contain non-newline characters
|
|
||||||
_MUST_ start with _at least_ the exact same whitespace as the final line
|
|
||||||
(precisely matching codepoints, not merely counting characters or "size");
|
|
||||||
they may contain additional whitesapce following this prefix. The lines in
|
|
||||||
between may contain unescaped `"` (but no unescaped `"""` as this would close
|
|
||||||
the string).
|
|
||||||
|
|
||||||
The value of the Multi-Line String omits the first and last Newline, the
|
|
||||||
Whitespace of the last line, and the matching Whitespace prefix on all
|
|
||||||
intermediate lines. The first and last Newline can be the same character (that
|
|
||||||
is, empty multi-line strings are legal).
|
|
||||||
|
|
||||||
In other words, the final line specifies the whitespace prefix that will be
|
|
||||||
removed from all other lines.
|
|
||||||
|
|
||||||
Multi-line Strings that do not immediately start with a Newline and whose final
|
|
||||||
`"""` is not preceeded by optional whitespace and a Newline are illegal. This
|
|
||||||
also means that `"""` may not be used for a single-line String (e.g.
|
|
||||||
`"""foo"""`).
|
|
||||||
|
|
||||||
#### Newline Normalization
|
|
||||||
|
|
||||||
Literal Newline sequences in Multi-line Strings must be normalized to a single
|
|
||||||
`U+000A` (`LF`) during deserialization. This means, for example, that `CR LF`
|
|
||||||
becomes a single `LF` during parsing.
|
|
||||||
|
|
||||||
This normalization does not apply to non-literal Newlines entered using escape
|
|
||||||
sequences. That is:
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
multi-line """
|
|
||||||
\r\n[CRLF]
|
|
||||||
foo[CRLF]
|
|
||||||
"""
|
|
||||||
```
|
|
||||||
|
|
||||||
becomes:
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
single-line "\r\n\nfoo"
|
|
||||||
```
|
|
||||||
|
|
||||||
For clarity: this normalization applies to each individual Newline sequence.
|
|
||||||
That is, the literal sequence `CRLF CRLF` becomes `LF LF`, not `LF`.
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
multi-line """
|
|
||||||
foo
|
|
||||||
This is the base indentation
|
|
||||||
bar
|
|
||||||
"""
|
|
||||||
```
|
|
||||||
|
|
||||||
This example's string value will be:
|
|
||||||
|
|
||||||
```
|
|
||||||
foo
|
|
||||||
This is the base indentation
|
|
||||||
bar
|
|
||||||
```
|
|
||||||
|
|
||||||
which is equivalent to `" foo\nThis is the base indentation\n bar"`
|
|
||||||
when written as a single-line string.
|
|
||||||
|
|
||||||
---------
|
|
||||||
|
|
||||||
If the last line wasn't indented as far,
|
|
||||||
it won't dedent the rest of the lines as much:
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
multi-line """
|
|
||||||
foo
|
|
||||||
This is no longer on the left edge
|
|
||||||
bar
|
|
||||||
"""
|
|
||||||
```
|
|
||||||
|
|
||||||
This example's string value will be:
|
|
||||||
|
|
||||||
```
|
|
||||||
foo
|
|
||||||
This is no longer on the left edge
|
|
||||||
bar
|
|
||||||
```
|
|
||||||
|
|
||||||
Equivalent to `" foo\n This is no longer on the left edge\n bar"`.
|
|
||||||
|
|
||||||
-----------
|
|
||||||
|
|
||||||
Empty lines can contain any whitespace, or none at all, and will be reflected as empty in the value:
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
multi-line """
|
|
||||||
Indented a bit
|
|
||||||
|
|
||||||
A second indented paragraph.
|
|
||||||
"""
|
|
||||||
```
|
|
||||||
|
|
||||||
This example's string value will be:
|
|
||||||
|
|
||||||
```
|
|
||||||
Indented a bit.
|
|
||||||
|
|
||||||
A second indented paragraph.
|
|
||||||
```
|
|
||||||
|
|
||||||
Equivalent to `"Indented a bit.\n\nA second indented paragraph."`
|
|
||||||
|
|
||||||
-----------
|
|
||||||
|
|
||||||
The following yield **syntax errors**:
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
multi-line """can't be single line"""
|
|
||||||
```
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
multi-line """
|
|
||||||
closing quote with non-whitespace prefix"""
|
|
||||||
```
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
multi-line """stuff
|
|
||||||
"""
|
|
||||||
```
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
// Every line must share the exact same prefix as the closing line.
|
|
||||||
multi-line """[\n]
|
|
||||||
[tab]a[\n]
|
|
||||||
[space][space]b[\n]
|
|
||||||
[space][tab][\n]
|
|
||||||
[tab]"""
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Interaction with Whitespace Escapes
|
|
||||||
|
|
||||||
Multi-line strings support the same mechanism for escaping whitespace
|
|
||||||
as Quoted Strings.
|
|
||||||
When processing a Multi-line String, implementations MUST dedent the string _after_
|
|
||||||
resolving all whitespace escapes, but _before_ resolving other backslash escapes.
|
|
||||||
Furthermore, a whitespace escape that attempts to escape the final line's newline
|
|
||||||
and/or whitespace prefix is invalid since the multi-line string has to still be
|
|
||||||
valid with the escaped whitespace removed.
|
|
||||||
|
|
||||||
For example, the following example is illegal:
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
// Equivalent to trying to write a string containing `foo\nbar\`.
|
|
||||||
"""
|
|
||||||
foo
|
|
||||||
bar\
|
|
||||||
"""
|
|
||||||
```
|
|
||||||
|
|
||||||
while the following example is allowed
|
|
||||||
```kdl
|
|
||||||
"""
|
|
||||||
foo \
|
|
||||||
bar
|
|
||||||
baz
|
|
||||||
\ """
|
|
||||||
|
|
||||||
// equivalent to
|
|
||||||
"""
|
|
||||||
foo bar
|
|
||||||
baz
|
|
||||||
"""
|
|
||||||
```
|
|
||||||
|
|
||||||
### Raw String
|
|
||||||
|
|
||||||
Both [Quoted](#quoted-string) and [Multi-Line Strings](#multi-line-string)
|
|
||||||
have Raw String variants,
|
|
||||||
which are identical in syntax except they do not support `\`-escapes.
|
|
||||||
They otherwise share the same properties as far as
|
|
||||||
literal [Newline](#newline) characters go, multi-line rules, and the requirement
|
|
||||||
of UTF-8 representation.
|
|
||||||
|
|
||||||
The Raw String variants are indicated by preceding the strings's opening quotes
|
|
||||||
with one or more `#` characters.
|
|
||||||
The string is then closed by its normal closing quotes,
|
|
||||||
followed by a _matching_ number of `#` characters.
|
|
||||||
This means that the string may contain any combination of `"` and `#` characters
|
|
||||||
other than its closing delimiter (e.g., if a raw string starts with `##"`, it can
|
|
||||||
contain `"` or `"#`, but not `"##` or `"###`).
|
|
||||||
|
|
||||||
Like other Strings, Raw Strings _MUST NOT_ include any of the [disallowed
|
|
||||||
literal code-points](#disallowed-literal-code-points) as code points in their
|
|
||||||
body. Unlike with Quoted Strings, these cannot simply be escaped, and are thus
|
|
||||||
unrepresentable when using Raw Strings.
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
just-escapes #"\n will be literal"#
|
|
||||||
```
|
|
||||||
|
|
||||||
The string contains the literal characters `\n will be literal`.
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
quotes-and-escapes ##"hello\n\r\asd"#world"##
|
|
||||||
```
|
|
||||||
|
|
||||||
The string contains the literal characters `hello\n\r\asd"#world`
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
raw-multi-line #"""
|
|
||||||
You can show examples of """
|
|
||||||
multi-line strings
|
|
||||||
"""
|
|
||||||
without worrying about escapes.
|
|
||||||
"""#
|
|
||||||
```
|
|
||||||
|
|
||||||
The string contains the value
|
|
||||||
|
|
||||||
```
|
|
||||||
You can show examples of """
|
|
||||||
multi-line strings
|
|
||||||
"""
|
|
||||||
without worrying about escapes.
|
|
||||||
```
|
|
||||||
|
|
||||||
or equivalently, `"You can show examples of \"\"\"\n multi-line strings\n \"\"\"\nwithout worrying about escapes."` as a Quoted String.
|
|
||||||
|
|
||||||
### Number
|
|
||||||
|
|
||||||
Numbers in KDL represent numerical [Values](#value). There is no logical distinction in KDL
|
|
||||||
between real numbers, integers, and floating point numbers. It's up to
|
|
||||||
individual implementations to determine how to represent KDL numbers.
|
|
||||||
|
|
||||||
There are five syntaxes for Numbers: Keywords, Decimal, Hexadecimal, Octal, and Binary.
|
|
||||||
|
|
||||||
* All non-[Keyword](#keyword-numbers) numbers may optionally start with one of `-` or `+`, which determine whether they'll be positive or negative.
|
|
||||||
* Binary numbers start with `0b` and only allow `0` and `1` as digits, which may be separated by `_`. They represent numbers in radix 2.
|
|
||||||
* Octal numbers start with `0o` and only allow digits between `0` and `7`, which may be separated by `_`. They represent numbers in radix 8.
|
|
||||||
* Hexadecimal numbers start with `0x` and allow digits between `0` and `9`, as well as letters `A` through `F`, in either lower or upper case, which may be separated by `_`. They represent numbers in radix 16.
|
|
||||||
* Decimal numbers are a bit more special:
|
|
||||||
* They have no radix prefix.
|
|
||||||
* They use digits `0` through `9`, which may be separated by `_`.
|
|
||||||
* They may optionally include a decimal separator `.`, followed by more digits, which may again be separated by `_`.
|
|
||||||
* They may optionally be followed by `E` or `e`, an optional `-` or `+`, and more digits, to represent an exponent value.
|
|
||||||
|
|
||||||
Note that, similar to JSON and some other languages,
|
|
||||||
numbers without an integer digit (such as `.1`) are illegal.
|
|
||||||
They must be written with at least one integer digit, like `0.1`.
|
|
||||||
(These patterns are also disallowed from [Identifier Strings](#identifier-string), to avoid confusion.)
|
|
||||||
|
|
||||||
#### Keyword Numbers
|
|
||||||
|
|
||||||
There are three special "keyword" numbers included in KDL to accomodate the
|
|
||||||
widespread use of [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754) floats:
|
|
||||||
|
|
||||||
* `#inf` - floating point positive infinity.
|
|
||||||
* `#-inf` - floating point negative infinity.
|
|
||||||
* `#nan` - floating point NaN/Not a Number.
|
|
||||||
|
|
||||||
To go along with this and prevent foot guns, the bare [Identifier
|
|
||||||
Strings](#identifier-string) `inf`, `-inf`, and `nan` are considered illegal
|
|
||||||
identifiers and should yield a syntax error.
|
|
||||||
|
|
||||||
The existence of these keywords does not imply that any numbers be represented
|
|
||||||
as IEEE 754 floats. These are simply for clarity and convenience for any
|
|
||||||
implementation that chooses to represent their numbers in this way.
|
|
||||||
|
|
||||||
### Boolean
|
|
||||||
|
|
||||||
A boolean [Value](#value) is either the symbol `#true` or `#false`. These
|
|
||||||
_SHOULD_ be represented by implementation as boolean logical values, or some
|
|
||||||
approximation thereof.
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
my-node #true value=#false
|
|
||||||
```
|
|
||||||
|
|
||||||
### Null
|
|
||||||
|
|
||||||
The symbol `#null` represents a null [Value](#value). It's up to the
|
|
||||||
implementation to decide how to represent this, but it generally signals the
|
|
||||||
"absence" of a value.
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```kdl
|
|
||||||
my-node #null key=#null
|
|
||||||
```
|
|
||||||
|
|
||||||
### Whitespace
|
|
||||||
|
|
||||||
The following characters should be treated as non-[Newline](#newline) [white
|
|
||||||
space](https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt):
|
|
||||||
|
|
||||||
| Name | Code Pt |
|
|
||||||
|----------------------|---------|
|
|
||||||
| Character Tabulation | `U+0009` |
|
|
||||||
| Line Tabulation | `U+000B` |
|
|
||||||
| Space | `U+0020` |
|
|
||||||
| No-Break Space | `U+00A0` |
|
|
||||||
| Ogham Space Mark | `U+1680` |
|
|
||||||
| En Quad | `U+2000` |
|
|
||||||
| Em Quad | `U+2001` |
|
|
||||||
| En Space | `U+2002` |
|
|
||||||
| Em Space | `U+2003` |
|
|
||||||
| Three-Per-Em Space | `U+2004` |
|
|
||||||
| Four-Per-Em Space | `U+2005` |
|
|
||||||
| Six-Per-Em Space | `U+2006` |
|
|
||||||
| Figure Space | `U+2007` |
|
|
||||||
| Punctuation Space | `U+2008` |
|
|
||||||
| Thin Space | `U+2009` |
|
|
||||||
| Hair Space | `U+200A` |
|
|
||||||
| Narrow No-Break Space| `U+202F` |
|
|
||||||
| Medium Mathematical Space | `U+205F` |
|
|
||||||
| Ideographic Space | `U+3000` |
|
|
||||||
|
|
||||||
#### Single-line comments
|
|
||||||
|
|
||||||
Any text after `//`, until the next literal [Newline](#newline) is "commented
|
|
||||||
out", and is considered to be [Whitespace](#whitespace).
|
|
||||||
|
|
||||||
#### Multi-line comments
|
|
||||||
|
|
||||||
In addition to single-line comments using `//`, comments can also be started
|
|
||||||
with `/*` and ended with `*/`. These comments can span multiple lines. They
|
|
||||||
are allowed in all positions where [Whitespace](#whitespace) is allowed and
|
|
||||||
can be nested.
|
|
||||||
|
|
||||||
#### Slashdash comments
|
|
||||||
|
|
||||||
Finally, a special kind of comment called a "slashdash", denoted by `/-`, can
|
|
||||||
be used to comment out entire _components_ of a KDL document logically, and
|
|
||||||
have those elements not be included as part of the parsed document data.
|
|
||||||
|
|
||||||
Slashdash comments can be used before the following, including before their type
|
|
||||||
annotations, if present:
|
|
||||||
|
|
||||||
* A [Node](#node): the entire Node is treated as Whitespace, including all
|
|
||||||
props, args, and children.
|
|
||||||
* An [Argument](#argument): the Argument value is treated as Whitespace.
|
|
||||||
* A [Property](#property) key: the entire property, including both key and value,
|
|
||||||
is treated as Whitespace. A slashdash of just the property value is not allowed.
|
|
||||||
* A [Children Block](#children-block): the entire block, including all
|
|
||||||
children within, is treated as Whitespace. Only other children blocks, whether
|
|
||||||
slashdashed or not, may follow a slashdashed children block.
|
|
||||||
|
|
||||||
A slashdash may be be followed by any amount of whitespace, including newlines and
|
|
||||||
comments (other than other slashdashes), before the element that it comments out.
|
|
||||||
|
|
||||||
### Newline
|
|
||||||
|
|
||||||
The following character sequences [should be treated as new
|
|
||||||
lines](https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf):
|
|
||||||
|
|
||||||
| Acronym | Name | Code Pt |
|
|
||||||
|---------|-----------------|---------|
|
|
||||||
| CRLF | Carriage Return and Line Feed | `U+000D` + `U+000A` |
|
|
||||||
| CR | Carriage Return | `U+000D` |
|
|
||||||
| LF | Line Feed | `U+000A` |
|
|
||||||
| NEL | Next Line | `U+0085` |
|
|
||||||
| FF | Form Feed | `U+000C` |
|
|
||||||
| LS | Line Separator | `U+2028` |
|
|
||||||
| PS | Paragraph Separator | `U+2029` |
|
|
||||||
|
|
||||||
Note that for the purpose of new lines, the specific sequence `CRLF` is
|
|
||||||
considered _a single newline_.
|
|
||||||
|
|
||||||
### Disallowed Literal Code Points
|
|
||||||
|
|
||||||
The following code points may not appear literally anywhere in the document.
|
|
||||||
They may be represented in Strings (but not Raw Strings) using [Unicode Escapes](#escapes) (`\u{...}`).
|
|
||||||
|
|
||||||
* The codepoints `U+0000-0008` or the codepoints `U+000E-001F` (various
|
|
||||||
control characters).
|
|
||||||
* `U+007F` (the Delete control character).
|
|
||||||
* Any codepoint that is not a [Unicode Scalar
|
|
||||||
Value](https://unicode.org/glossary/#unicode_scalar_value) (`U+D800-DFFF`).
|
|
||||||
* `U+200E-200F`, `U+202A-202E`, and `U+2066-2069`, the [unicode
|
|
||||||
"direction control"
|
|
||||||
characters](https://www.w3.org/International/questions/qa-bidi-unicode-controls)
|
|
||||||
* `U+FEFF`, aka Zero-width Non-breaking Space (ZWNBSP)/Byte Order Mark (BOM),
|
|
||||||
except as the first code point in a document.
|
|
||||||
|
|
||||||
## Full Grammar
|
|
||||||
|
|
||||||
This is the full official grammar for KDL and should be considered
|
|
||||||
authoritative if something seems to disagree with the text above. The [grammar
|
|
||||||
language syntax](#grammar-language) is defined below.
|
|
||||||
|
|
||||||
```
|
|
||||||
document := bom? nodes
|
|
||||||
|
|
||||||
// Nodes
|
|
||||||
nodes := (line-space* node)* line-space*
|
|
||||||
|
|
||||||
base-node := slashdash? type? node-space* string
|
|
||||||
(node-space+ slashdash? node-prop-or-arg)*
|
|
||||||
// slashdashed node-children must always be after props and args.
|
|
||||||
(node-space+ slashdash node-children)*
|
|
||||||
(node-space+ node-children)?
|
|
||||||
(node-space+ slashdash node-children)*
|
|
||||||
node-space*
|
|
||||||
node := base-node node-terminator
|
|
||||||
final-node := base-node node-terminator?
|
|
||||||
|
|
||||||
// Entries
|
|
||||||
node-prop-or-arg := prop | value
|
|
||||||
node-children := '{' nodes final-node? '}'
|
|
||||||
node-terminator := single-line-comment | newline | ';' | eof
|
|
||||||
|
|
||||||
prop := string node-space* '=' node-space* value
|
|
||||||
value := type? node-space* (string | number | keyword)
|
|
||||||
type := '(' node-space* string node-space* ')'
|
|
||||||
|
|
||||||
// Strings
|
|
||||||
string := identifier-string | quoted-string | raw-string
|
|
||||||
|
|
||||||
identifier-string := unambiguous-ident | signed-ident | dotted-ident
|
|
||||||
unambiguous-ident := ((identifier-char - digit - sign - '.') identifier-char*) - disallowed-keyword-strings
|
|
||||||
signed-ident := sign ((identifier-char - digit - '.') identifier-char*)?
|
|
||||||
dotted-ident := sign? '.' ((identifier-char - digit) identifier-char*)?
|
|
||||||
identifier-char := unicode - unicode-space - newline - [\\/(){};\[\]"#=] - disallowed-literal-code-points - equals-sign
|
|
||||||
disallowed-keyword-identifiers := 'true' - 'false' - 'null' - 'inf' - '-inf' - 'nan'
|
|
||||||
|
|
||||||
quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline unicode-space*) '"""'
|
|
||||||
single-line-string-body := (string-character - newline)*
|
|
||||||
multi-line-string-body := (('"' | '""')? string-character)*
|
|
||||||
string-character := '\' escape | [^\\"] - disallowed-literal-code-points
|
|
||||||
escape := ["\\bfnrts] | 'u{' hex-digit{1, 6} '}' | (unicode-space | newline)+
|
|
||||||
hex-digit := [0-9a-fA-F]
|
|
||||||
|
|
||||||
raw-string := '#' raw-string-quotes '#' | '#' raw-string '#'
|
|
||||||
raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body newline unicode-space*) '"""'
|
|
||||||
single-line-raw-string-body := '' | (single-line-raw-string-char - '"') single-line-raw-string-char* | '"' (single-line-raw-string-char - '"') single-line-raw-string-char*
|
|
||||||
single-line-raw-string-char := unicode - newline - disallowed-literal-code-points
|
|
||||||
multi-line-raw-string-body := (unicode - disallowed-literal-code-points)*
|
|
||||||
|
|
||||||
// Numbers
|
|
||||||
number := keyword-number | hex | octal | binary | decimal
|
|
||||||
|
|
||||||
decimal := sign? integer ('.' integer)? exponent?
|
|
||||||
exponent := ('e' | 'E') sign? integer
|
|
||||||
integer := digit (digit | '_')*
|
|
||||||
digit := [0-9]
|
|
||||||
sign := '+' | '-'
|
|
||||||
|
|
||||||
hex := sign? '0x' hex-digit (hex-digit | '_')*
|
|
||||||
octal := sign? '0o' [0-7] [0-7_]*
|
|
||||||
binary := sign? '0b' ('0' | '1') ('0' | '1' | '_')*
|
|
||||||
|
|
||||||
// Keywords and booleans.
|
|
||||||
keyword := boolean | '#null'
|
|
||||||
keyword-number := '#inf' | '#-inf' | '#nan'
|
|
||||||
boolean := '#true' | '#false'
|
|
||||||
|
|
||||||
// Specific code points
|
|
||||||
bom := '\u{FEFF}'
|
|
||||||
disallowed-literal-code-points := See Table (Disallowed Literal Code Points)
|
|
||||||
unicode := Any Unicode Scalar Value
|
|
||||||
unicode-space := See Table (All White_Space unicode characters which are not `newline`)
|
|
||||||
|
|
||||||
// Comments
|
|
||||||
single-line-comment := '//' ^newline* (newline | eof)
|
|
||||||
multi-line-comment := '/*' commented-block
|
|
||||||
commented-block := '*/' | (multi-line-comment | '*' | '/' | [^*/]+) commented-block
|
|
||||||
slashdash := '/-' (node-space | line-space)*
|
|
||||||
|
|
||||||
// Whitespace
|
|
||||||
ws := unicode-space | multi-line-comment
|
|
||||||
escline := '\\' ws* (single-line-comment | newline | eof)
|
|
||||||
newline := See Table (All Newline White_Space)
|
|
||||||
// Whitespace where newlines are allowed.
|
|
||||||
line-space := newline | ws | single-line-comment
|
|
||||||
// Whitespace within nodes, where newline-ish things must be esclined.
|
|
||||||
node-space := ws* escline ws* | ws+
|
|
||||||
```
|
|
||||||
|
|
||||||
### Grammar language
|
|
||||||
|
|
||||||
The grammar language syntax is a combination of ABNF with some regex spice thrown in.
|
|
||||||
Specifically:
|
|
||||||
|
|
||||||
* Single quotes (`'`) are used to denote literal text. `\` within a literal
|
|
||||||
string is used for escaping other single-quotes, for initiating unicode
|
|
||||||
characters using hex values (`\u{FEFF}`), and for escaping `\` itself
|
|
||||||
(`\\`).
|
|
||||||
* `*` is used for "zero or more", `+` is used for "one or more", and `?` is
|
|
||||||
used for "zero or one".
|
|
||||||
* `()` can be used to group matches that must be matched together.
|
|
||||||
* `a | b` means `a or b`, whichever matches first. If multipe items are before
|
|
||||||
a `|`, they are a single group. `a b c | d` is equivalent to `(a b c) | d`.
|
|
||||||
* `[]` are used for regex-style character matches, where any character between
|
|
||||||
the brackets will be a single match. `\` is used to escape `\`, `[`, and
|
|
||||||
`]`. They also support character ranges (`0-9`), and negation (`^`)
|
|
||||||
* `-` is used for "except for" or "minus" whatever follows it. For example,
|
|
||||||
`a - 'x'` means "any `a`, except something that matches the literal `'x'`".
|
|
||||||
* The prefix `^` means "something that does not match" whatever follows it.
|
|
||||||
For example, `^foo` means "must not match `foo`".
|
|
||||||
* A single definition may be split over multiple lines. Newlines are treated as
|
|
||||||
spaces.
|
|
||||||
* `//` followed by text on its own line is used as comment syntax.
|
|
||||||
|
|
|
||||||
15
SPEC_v1.md
15
SPEC_v1.md
|
|
@ -22,12 +22,17 @@ simultaneously. For example, `node "foo"` is a valid node in both versions, and
|
||||||
should be represented identically by parsers.
|
should be represented identically by parsers.
|
||||||
|
|
||||||
KDL v2 is designed such that for any given KDL document written as KDL
|
KDL v2 is designed such that for any given KDL document written as KDL
|
||||||
1.0 or [KDL 2.0](./SPEC.md), the parse will either fail completely, or, if the
|
1.0 or [KDL 2.0](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html),
|
||||||
|
the parse will either fail completely, or, if the
|
||||||
parse succeeds, the data represented by a v1 or v2 parser will be identical.
|
parse succeeds, the data represented by a v1 or v2 parser will be identical.
|
||||||
This means that it's safe to use a fallback parsing strategy in order to support
|
This means that it's safe to use a fallback parsing strategy in order to support
|
||||||
both v1 and v2 simultaneously. For example, `node "foo"` is a valid node in both
|
both v1 and v2 simultaneously. For example, `node "foo"` is a valid node in both
|
||||||
versions, and should be represented identically by parsers.
|
versions, and should be represented identically by parsers.
|
||||||
|
|
||||||
|
A version marker `/- kdl-version 1` (or `2`) _MAY_ be added to the beginning of
|
||||||
|
a KDL document, optionally preceded by the BOM, and parsers _MAY_ use that as a
|
||||||
|
hint as to which version to parse the document as.
|
||||||
|
|
||||||
## Introduction
|
## Introduction
|
||||||
|
|
||||||
KDL is a node-oriented document language. Its niche and purpose overlaps with
|
KDL is a node-oriented document language. Its niche and purpose overlaps with
|
||||||
|
|
@ -308,7 +313,7 @@ IEEE 754-2008 decimal floating point numbers
|
||||||
* `country-subdivision`: ISO 3166-2 country subdivision code.
|
* `country-subdivision`: ISO 3166-2 country subdivision code.
|
||||||
* `email`: RFC5322 email address.
|
* `email`: RFC5322 email address.
|
||||||
* `idn-email`: RFC6531 internationalized email address.
|
* `idn-email`: RFC6531 internationalized email address.
|
||||||
* `hostname`: RFC1132 internet hostname (only ASCII segments)
|
* `hostname`: RFC1123 internet hostname (only ASCII segments)
|
||||||
* `idn-hostname`: RFC5890 internationalized internet hostname (only `xn--`-prefixed ASCII "punycode" segments, or non-ASCII segments)
|
* `idn-hostname`: RFC5890 internationalized internet hostname (only `xn--`-prefixed ASCII "punycode" segments, or non-ASCII segments)
|
||||||
* `ipv4`: RFC2673 dotted-quad IPv4 address.
|
* `ipv4`: RFC2673 dotted-quad IPv4 address.
|
||||||
* `ipv6`: RFC2373 IPv6 address.
|
* `ipv6`: RFC2373 IPv6 address.
|
||||||
|
|
@ -464,19 +469,19 @@ can be nested.
|
||||||
### Newline
|
### Newline
|
||||||
|
|
||||||
The following characters [should be treated as new
|
The following characters [should be treated as new
|
||||||
lines](https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf):
|
lines](https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-5/#G41643):
|
||||||
|
|
||||||
| Acronym | Name | Code Pt |
|
| Acronym | Name | Code Pt |
|
||||||
|---------|-----------------|---------|
|
|---------|-----------------|---------|
|
||||||
|
| CRLF | Carriage Return and Line Feed | `U+000D` + `U+000A` |
|
||||||
| CR | Carriage Return | `U+000D` |
|
| CR | Carriage Return | `U+000D` |
|
||||||
| LF | Line Feed | `U+000A` |
|
| LF | Line Feed | `U+000A` |
|
||||||
| CRLF | Carriage Return and Line Feed | `U+000D` + `U+000A` |
|
|
||||||
| NEL | Next Line | `U+0085` |
|
| NEL | Next Line | `U+0085` |
|
||||||
| FF | Form Feed | `U+000C` |
|
| FF | Form Feed | `U+000C` |
|
||||||
| LS | Line Separator | `U+2028` |
|
| LS | Line Separator | `U+2028` |
|
||||||
| PS | Paragraph Separator | `U+2029` |
|
| PS | Paragraph Separator | `U+2029` |
|
||||||
|
|
||||||
Note that for the purpose of new lines, CRLF is considered _a single newline_.
|
Note that for the purpose of new lines, CRLF is considered _a single newline_. `VT` `Vertical tab` `U+000B` was mistakenly excluded, but the v1 spec if frozen, so it's left unchanged.
|
||||||
|
|
||||||
## Full Grammar
|
## Full Grammar
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ XML elements and KDL nodes have a direct correspondence. In XiK, an XML element
|
||||||
* making the attributes into KDL properties
|
* making the attributes into KDL properties
|
||||||
* making the child nodes as KDL child nodes
|
* making the child nodes as KDL child nodes
|
||||||
|
|
||||||
For example, the XML `<element foo="bar"><child baz="qux" /></element>` is encoded into XiK as `element foo=bar { child baz=quux }`.
|
For example, the XML `<element foo="bar"><child baz="quux" /></element>` is encoded into XiK as `element foo=bar { child baz=quux }`.
|
||||||
|
|
||||||
XML namespaces are encoded the same as XML: the node name simply contains a `:` character. Note that KDL identifier syntax allows `:` directly in an ident, so a name like `xml:space` or `xlink:href` is a valid node or property name.
|
XML namespaces are encoded the same as XML: the node name simply contains a `:` character. Note that KDL identifier syntax allows `:` directly in an ident, so a name like `xml:space` or `xlink:href` is a valid node or property name.
|
||||||
|
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -54,3 +54,7 @@ please send a PR.
|
||||||
If you think the disagreement is due to a genuine error or oversight in the
|
If you think the disagreement is due to a genuine error or oversight in the
|
||||||
KDL specification, please open an issue explaining the matter and the change
|
KDL specification, please open an issue explaining the matter and the change
|
||||||
will be considered for the next version of the KDL spec.
|
will be considered for the next version of the KDL spec.
|
||||||
|
|
||||||
|
# Benchmarks
|
||||||
|
|
||||||
|
The `benchmarks` folder contains some large or gnarly documents intended to be used to stress-test your parser and help with profiling. They are intentionally not part of the testsuite, and just provided for your own personal benefit.
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,3 @@
|
||||||
|
foo123 {
|
||||||
|
bar
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node "12"
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
node
|
||||||
|
node
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
a
|
||||||
|
b
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
parent {
|
||||||
|
child
|
||||||
|
child
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
(type)node
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node ""
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node ""
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node "a\\ b\na\\b"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node ""
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node ""
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node "\"\"\""
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node "foo bar\nbaz"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node " foo bar\n baz"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node " a"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node "\""
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node "\"\""
|
||||||
|
|
@ -1 +1 @@
|
||||||
node "" "" " \n"
|
node "" "" "" "\n\n " "\n"
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node deadbeef
|
||||||
|
|
@ -1 +1 @@
|
||||||
foo123~!@$%^&*.:'|?+<>, weeee
|
foo123~!@$%^&*.:'|?+<>,`-_ weeee
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
foo123~!@$%^&*.:'|?+<>, weeee
|
foo123~!@$%^&*.:'|?+<>,`-_ weeee
|
||||||
|
|
|
||||||
|
|
@ -1 +1,2 @@
|
||||||
node arg
|
node arg
|
||||||
|
node2 arg2
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
node 0
|
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node string
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
node string
|
||||||
|
node string
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node string
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
foo123{bar}
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
node "1\
|
||||||
|
|
||||||
|
|
||||||
|
2"
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
node; \
|
||||||
|
node
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
\
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
\
|
||||||
|
|
||||||
|
node
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
a \
|
||||||
|
|
||||||
|
b
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
parent {
|
||||||
|
child
|
||||||
|
\ // comment
|
||||||
|
child
|
||||||
|
}
|
||||||
|
|
@ -1,2 +1,3 @@
|
||||||
node1
|
node1
|
||||||
|
\
|
||||||
node2
|
node2
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
\
|
||||||
|
(type)node
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
node
|
||||||
|
\
|
||||||
|
/-
|
||||||
|
node
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node r"foo"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node r#"foo"#
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
node #"""
|
||||||
|
"""#
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
node #"""
|
||||||
|
"""#
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
node """
|
||||||
|
a\\ b
|
||||||
|
a\\\ b
|
||||||
|
"""
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
node """
|
||||||
|
"""
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
node """
|
||||||
|
"""
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
node """
|
||||||
|
\"""
|
||||||
|
"""
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
node """
|
||||||
|
foo \
|
||||||
|
bar
|
||||||
|
baz
|
||||||
|
\ """
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
node """
|
||||||
|
foo \
|
||||||
|
bar
|
||||||
|
baz
|
||||||
|
\ """
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
node """
|
||||||
|
a
|
||||||
|
\
|
||||||
|
"""
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
node """
|
||||||
|
a
|
||||||
|
\
|
||||||
|
"""
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
node """
|
||||||
|
foo
|
||||||
|
bar\
|
||||||
|
"""
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
node """
|
||||||
|
\s escaped prefix
|
||||||
|
literal prefix
|
||||||
|
"""
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
node """
|
||||||
|
"
|
||||||
|
"""
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
node """
|
||||||
|
""
|
||||||
|
"""
|
||||||
|
|
@ -1,12 +1,18 @@
|
||||||
// This file deliberately contains unusual whitespace
|
// This file deliberately contains unusual whitespace
|
||||||
// the first two strings are empty
|
// The first two strings are empty
|
||||||
node """
|
node """
|
||||||
""" """
|
""" """
|
||||||
\
|
\
|
||||||
|
|
||||||
""" \
|
""" """
|
||||||
\ // The next string contains only whitespace
|
|
||||||
|
"""\
|
||||||
|
\ // The next two strings contains only whitespace
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
"""
|
\s
|
||||||
|
""" #"""
|
||||||
|
|
||||||
|
|
||||||
|
"""#
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
node """
|
||||||
|
dead\
|
||||||
|
beef
|
||||||
|
"""
|
||||||
|
|
@ -1 +1,2 @@
|
||||||
|
// This fails because `"""` MUST be followed by a newline.
|
||||||
node #"""#
|
node #"""#
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
no "Higher than max Unicode Scalar Value \u{10FFFF} \u{11FFFF}"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
no "Surrogates high\u{D800}"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
no "Surrogates high\u{D911}"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
no "Surrogates high\u{DABB}"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
no "Surrogates high\u{DBFF}"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
no "Surrogates low\u{DC00}"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
no "Surrogates low\u{DEAD}"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
eno "Surrogates low\u{DFFF}"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
no "Even with leading 0s Unicode Scalar Value escapes must ≤6: \u{0012345}"
|
||||||
|
|
@ -1 +1 @@
|
||||||
"foo123~!@$%^&*.:'|?+<>," weeee
|
"foo123~!@$%^&*.:'|?+<>,`-_" weeee
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
foo123~!@$%^&*.:'|?+<>, weeee
|
foo123~!@$%^&*.:'|?+<>,`-_ weeee
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
nodearg
|
node argnode2 arg2
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
node 0
|
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node"string"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node foo="value"bar=5
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node "string"1
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node "string"/-1
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
node "string"/-{}
|
||||||
|
node "string" {}/-{}
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
node "string"/-foo=1
|
||||||
Loading…
Reference in New Issue