mirror of https://github.com/kdl-org/kdl.git
Compare commits
No commits in common. "main" and "2.0.0" have entirely different histories.
|
|
@ -1,8 +0,0 @@
|
||||||
# See http://editorconfig.org
|
|
||||||
|
|
||||||
root = true
|
|
||||||
|
|
||||||
[*.{md,xml,org}]
|
|
||||||
charset = utf-8
|
|
||||||
insert_final_newline = true
|
|
||||||
trim_trailing_whitespace = true
|
|
||||||
|
|
@ -1,60 +0,0 @@
|
||||||
name: "Update Editor's Copy"
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
paths-ignore:
|
|
||||||
- README.md
|
|
||||||
- CONTRIBUTING.md
|
|
||||||
- LICENSE.md
|
|
||||||
- .gitignore
|
|
||||||
pull_request:
|
|
||||||
paths-ignore:
|
|
||||||
- README.md
|
|
||||||
- CONTRIBUTING.md
|
|
||||||
- LICENSE.md
|
|
||||||
- .gitignore
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
name: "Update Editor's Copy"
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
steps:
|
|
||||||
- name: "Checkout"
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: "Setup"
|
|
||||||
id: setup
|
|
||||||
run: date -u "+date=%FT%T" >>"$GITHUB_OUTPUT"
|
|
||||||
|
|
||||||
- name: "Caching"
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
.refcache
|
|
||||||
.venv
|
|
||||||
.gems
|
|
||||||
node_modules
|
|
||||||
.targets.mk
|
|
||||||
key: i-d-${{ steps.setup.outputs.date }}
|
|
||||||
restore-keys: i-d-
|
|
||||||
|
|
||||||
- name: "Build Drafts"
|
|
||||||
uses: martinthomson/i-d-template@v1
|
|
||||||
with:
|
|
||||||
token: ${{ github.token }}
|
|
||||||
|
|
||||||
- name: "Update GitHub Pages"
|
|
||||||
uses: martinthomson/i-d-template@v1
|
|
||||||
if: ${{ github.event_name == 'push' }}
|
|
||||||
with:
|
|
||||||
make: gh-pages
|
|
||||||
token: ${{ github.token }}
|
|
||||||
|
|
||||||
- name: "Archive Built Drafts"
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
draft-*.html
|
|
||||||
draft-*.txt
|
|
||||||
|
|
@ -1,57 +0,0 @@
|
||||||
name: "Publish New Draft Version"
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
tags:
|
|
||||||
- "draft-*"
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
email:
|
|
||||||
description: "Submitter email"
|
|
||||||
default: ""
|
|
||||||
type: string
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
name: "Publish New Draft Version"
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: "Checkout"
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
# See https://github.com/actions/checkout/issues/290
|
|
||||||
- name: "Get Tag Annotations"
|
|
||||||
run: git fetch -f origin ${{ github.ref }}:${{ github.ref }}
|
|
||||||
|
|
||||||
- name: "Setup"
|
|
||||||
id: setup
|
|
||||||
run: date -u "+date=%FT%T" >>"$GITHUB_OUTPUT"
|
|
||||||
|
|
||||||
- name: "Caching"
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
.refcache
|
|
||||||
.venv
|
|
||||||
.gems
|
|
||||||
node_modules
|
|
||||||
.targets.mk
|
|
||||||
key: i-d-${{ steps.setup.outputs.date }}
|
|
||||||
restore-keys: i-d-
|
|
||||||
|
|
||||||
- name: "Build Drafts"
|
|
||||||
uses: martinthomson/i-d-template@v1
|
|
||||||
with:
|
|
||||||
token: ${{ github.token }}
|
|
||||||
|
|
||||||
- name: "Upload to Datatracker"
|
|
||||||
uses: martinthomson/i-d-template@v1
|
|
||||||
with:
|
|
||||||
make: upload
|
|
||||||
env:
|
|
||||||
UPLOAD_EMAIL: ${{ inputs.email }}
|
|
||||||
|
|
||||||
- name: "Archive Submitted Drafts"
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
path: "versioned/draft-*-[0-9][0-9].*"
|
|
||||||
|
|
@ -1,25 +1,2 @@
|
||||||
/target
|
/target
|
||||||
Cargo.lock
|
Cargo.lock
|
||||||
*.html
|
|
||||||
*.pdf
|
|
||||||
*.redxml
|
|
||||||
*.swp
|
|
||||||
*.txt
|
|
||||||
*.upload
|
|
||||||
*~
|
|
||||||
.tags
|
|
||||||
/*-[0-9][0-9].xml
|
|
||||||
/.*.mk
|
|
||||||
/.gems/
|
|
||||||
/.refcache
|
|
||||||
/.venv/
|
|
||||||
/.vscode/
|
|
||||||
/lib
|
|
||||||
/node_modules/
|
|
||||||
/versioned/
|
|
||||||
Gemfile.lock
|
|
||||||
archive.json
|
|
||||||
draft-marchan-kdl2.xml
|
|
||||||
package-lock.json
|
|
||||||
report.xml
|
|
||||||
!requirements.txt
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +0,0 @@
|
||||||
<note title="Discussion Venues" removeInRFC="true">
|
|
||||||
<t>Source for this draft and an issue tracker can be found at
|
|
||||||
<eref target="https://github.com/kdl-org/kdl"/>.</t>
|
|
||||||
</note>
|
|
||||||
|
|
@ -56,7 +56,7 @@
|
||||||
* Around `=` for props (`x = 1`)
|
* Around `=` for props (`x = 1`)
|
||||||
* The BOM is now only allowed as the first character in a document. It was
|
* The BOM is now only allowed as the first character in a document. It was
|
||||||
previously treated as generic whitespace.
|
previously treated as generic whitespace.
|
||||||
* Multi-line strings must now use `"""` as delimiters. The opening delimiter must be immediately followed by a newline, and the closing delimiter must be on its own line, prefixed by optional whitespace.
|
* Multi-line strings must now use `"""` as delimeters. The opening delimiter must be immediately followed by a newline, and the closing delimiter must be on its own line, prefixed by optional whitespace.
|
||||||
* Multi-line strings are now automatically dedented, according to the common
|
* Multi-line strings are now automatically dedented, according to the common
|
||||||
whitespace matching the whitespace prefix of the closing line.
|
whitespace matching the whitespace prefix of the closing line.
|
||||||
* `.1`, `+.1` etc are no longer valid identifiers, to prevent confusion and
|
* `.1`, `+.1` etc are no longer valid identifiers, to prevent confusion and
|
||||||
|
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
||||||
# Contributing
|
|
||||||
|
|
||||||
## Mechanics
|
|
||||||
|
|
||||||
Contributions can be made by creating pull requests.
|
|
||||||
The GitHub interface supports creating pull requests using the Edit (✏) button.
|
|
||||||
|
|
||||||
|
|
||||||
## Building the Specification
|
|
||||||
|
|
||||||
The specification is written in
|
|
||||||
[kramdown-rfc](https://github.com/cabo/kramdown-rfc/wiki/Syntax2), which
|
|
||||||
compiles via [RFCXML](https://authors.ietf.org/rfcxml-vocabulary) to text and
|
|
||||||
HTML.
|
|
||||||
|
|
||||||
You can build the formatted versions or the intermediate RFCXML file using
|
|
||||||
https://author-tools.ietf.org/ or locally by running `make`. To preserve the
|
|
||||||
intermediate RFCXML form in a local build, run `make draft-marchan-kdl2.xml`
|
|
||||||
once.
|
|
||||||
|
|
||||||
Command line usage requires that you have the necessary software installed. See
|
|
||||||
[the instructions](https://github.com/martinthomson/i-d-template/blob/main/doc/SETUP.md).
|
|
||||||
|
|
@ -98,7 +98,7 @@ The properties and/or children of the node represent the items of the object,
|
||||||
with the property names and child nodenames as each item's key.
|
with the property names and child nodenames as each item's key.
|
||||||
All "keys" in an object node must be unique.
|
All "keys" in an object node must be unique.
|
||||||
|
|
||||||
As with arrays, there are two ambiguous cases that must be manually annotated with the `(object)` type annotation:
|
As with arrays, there are two ambiguous cases that must be manually annoted with the `(object)` type annotation:
|
||||||
|
|
||||||
* An object containing a single item whose key is "-" (like `{"-": 1}`) written using children (like `- { - 1 }`)
|
* An object containing a single item whose key is "-" (like `{"-": 1}`) written using children (like `- { - 1 }`)
|
||||||
would be ambiguous with an array node.
|
would be ambiguous with an array node.
|
||||||
|
|
|
||||||
15
Makefile
15
Makefile
|
|
@ -1,15 +0,0 @@
|
||||||
LIBDIR := lib
|
|
||||||
include $(LIBDIR)/main.mk
|
|
||||||
|
|
||||||
$(LIBDIR)/main.mk:
|
|
||||||
ifneq (,$(shell grep "path *= *$(LIBDIR)" .gitmodules 2>/dev/null))
|
|
||||||
git submodule sync
|
|
||||||
git submodule update --init
|
|
||||||
else
|
|
||||||
ifneq (,$(wildcard $(ID_TEMPLATE_HOME)))
|
|
||||||
ln -s "$(ID_TEMPLATE_HOME)" $(LIBDIR)
|
|
||||||
else
|
|
||||||
git clone -q --depth 10 -b main \
|
|
||||||
https://github.com/martinthomson/i-d-template $(LIBDIR)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
@ -110,22 +110,22 @@ Then the following queries are valid:
|
||||||
## Full Grammar
|
## Full Grammar
|
||||||
|
|
||||||
Rules that are not defined in this grammar are prefixed with `$`, see [the KDL
|
Rules that are not defined in this grammar are prefixed with `$`, see [the KDL
|
||||||
grammar](https://kdl.dev/spec/#name-full-grammar) for
|
grammar](https://github.com/kdl-org/kdl/blob/main/SPEC.md#full-grammar) for
|
||||||
what they expand to.
|
what they expand to.
|
||||||
|
|
||||||
```
|
```
|
||||||
query-str := $bom? query
|
query-str := $bom? query
|
||||||
query := selector q-ws+ "||" q-ws+ query | selector
|
query := selector q-ws* "||" q-ws* query | selector
|
||||||
selector := filter q-ws+ selector-operator q-ws+ selector-subsequent | filter
|
selector := filter q-ws* selector-operator q-ws* selector-subsequent | filter
|
||||||
selector-subsequent := matchers q-ws+ selector-operator q-ws+ selector-subsequent | matchers
|
selector-subsequent := matchers q-ws* selector-operator q-ws* selector-subsequent | matchers
|
||||||
selector-operator := ">>" | ">" | "++" | "+"
|
selector-operator := ">>" | ">" | "++" | "+"
|
||||||
filter := "top(" q-ws* ")" | matchers
|
filter := "top(" q-ws* ")" | matchers
|
||||||
matchers := type-matcher $string? accessor-matcher* | $string accessor-matcher* | accessor-matcher+
|
matchers := type-matcher $string? accessor-matcher* | $string accessor-matcher* | accessor-matcher+
|
||||||
type-matcher := "(" q-ws* ")" | $type
|
type-matcher := "(" q-ws* ")" | $type
|
||||||
accessor-matcher := "[" q-ws* (comparison | accessor)? q-ws* "]"
|
accessor-matcher := "[" q-ws* (comparison | accessor)? q-ws* "]"
|
||||||
comparison := accessor q-ws+ matcher-operator q-ws+ ($type | $string | $number | $keyword)
|
comparison := accessor q-ws* matcher-operator q-ws* ($type | $string | $number | $keyword)
|
||||||
accessor := "val(" q-ws* $integer q-ws* ")" | "prop(" q-ws* $string q-ws* ")" | "name(" q-ws* ")" | "tag(" q-ws* ")" | "values(" q-ws* ")" | "props(" q-ws* ")" | $string
|
accessor := "val(" q-ws* $integer q-ws* ")" | "prop(" q-ws* $string q-ws* ")" | "name(" q-ws* ")" | "tag(" q-ws* ")" | "values(" q-ws* ")" | "props(" q-ws* ")" | $string
|
||||||
matcher-operator := "=" | "!=" | ">" | "<" | ">=" | "<=" | "^=" | "$=" | "*="
|
matcher-operator := "=" | "!=" | ">" | "<" | ">=" | "<=" | "^=" | "$=" | "*="
|
||||||
|
|
||||||
q-ws := $node-space
|
q-ws := $plain-node-space
|
||||||
```
|
```
|
||||||
|
|
|
||||||
39
README.md
39
README.md
|
|
@ -46,16 +46,16 @@ package {
|
||||||
|
|
||||||
For more details, see the [overview below](#overview).
|
For more details, see the [overview below](#overview).
|
||||||
|
|
||||||
There's a living [specification](https://kdl.dev/spec/), as well as various
|
There's a living [specification](SPEC.md), as well as various
|
||||||
[implementations](#implementations). You can also check out the [FAQ](#faq) to
|
[implementations](#implementations). You can also check out the [FAQ](#faq) to
|
||||||
answer all your burning questions!
|
answer all your burning questions!
|
||||||
|
|
||||||
The current version of the KDL spec is
|
The current version of the KDL spec is
|
||||||
[KDL 2.0.0](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html). For legacy KDL,
|
[KDL 2.0.0](https://github.com/kdl-org/kdl/blob/2.0.0/SPEC.md). For legacy KDL,
|
||||||
please refer to the [KDL 1.0.0
|
please refer to the [KDL 1.0.0
|
||||||
spec](https://github.com/kdl-org/kdl/blob/2.0.0/SPEC_v1.md). All users are
|
spec](https://github.com/kdl-org/kdl/blob/2.0.0/SPEC_v1.md). All users are
|
||||||
encouraged to migrate. [Migration is forward-and-backward-compatible and
|
encouraged to migrate. [Migration is forward-and-backward-compatible and
|
||||||
safe](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html#compatibility), and can
|
safe](https://github.com/kdl-org/kdl/blob/2.0.0/SPEC.md#compatibility), and can
|
||||||
be automated.
|
be automated.
|
||||||
|
|
||||||
In addition to a spec for KDL itself, there are specifications for [a KDL Query
|
In addition to a spec for KDL itself, there are specifications for [a KDL Query
|
||||||
|
|
@ -66,7 +66,7 @@ The language is based on [SDLang](https://sdlang.org), with a [number of
|
||||||
modifications and clarifications on its syntax and behavior](#why-not-sdlang).
|
modifications and clarifications on its syntax and behavior](#why-not-sdlang).
|
||||||
We are grateful for their work as an inspiration to ours.
|
We are grateful for their work as an inspiration to ours.
|
||||||
|
|
||||||
[Play with it in your browser!](https://kdl.dev/play/)
|
[Play with it in your browser (currently v1 only)!](https://kdl-play.danini.dev/)
|
||||||
|
|
||||||
## Design and Discussion
|
## Design and Discussion
|
||||||
|
|
||||||
|
|
@ -86,15 +86,11 @@ of some examples of KDL in the wild (either v1, v2, or both):
|
||||||
* [Niri](https://github.com/YaLTeR/niri) - Scrollable-tiling window manager for Wayland
|
* [Niri](https://github.com/YaLTeR/niri) - Scrollable-tiling window manager for Wayland
|
||||||
* [Bikeshed](https://github.com/speced/bikeshed) ([here](https://github.com/speced/bikeshed-boilerplate/blob/main/boilerplate/doctypes.kdl) and [here](https://github.com/speced/bikeshed-data/blob/main/data/manifest.txt)) - Specification pre-processor used by CSS, C++, WHATWG, various W3C working groups, and others.
|
* [Bikeshed](https://github.com/speced/bikeshed) ([here](https://github.com/speced/bikeshed-boilerplate/blob/main/boilerplate/doctypes.kdl) and [here](https://github.com/speced/bikeshed-data/blob/main/data/manifest.txt)) - Specification pre-processor used by CSS, C++, WHATWG, various W3C working groups, and others.
|
||||||
* [orogene](https://orogene.dev) - Lightning-fast JavaScript package manager
|
* [orogene](https://orogene.dev) - Lightning-fast JavaScript package manager
|
||||||
* [Onyx](https://onyxlang.io/) - An efficient, procedural, and pragmatic programming language that compiles to WASM. Used for package manifests.
|
|
||||||
* [Pop!_OS/System76 Scheduler](https://github.com/pop-os/system76-scheduler) - Scheduling service which optimizes Linux's CPU scheduler and makes it go faster.
|
* [Pop!_OS/System76 Scheduler](https://github.com/pop-os/system76-scheduler) - Scheduling service which optimizes Linux's CPU scheduler and makes it go faster.
|
||||||
* [ImStyle](https://patitotective.github.io/ImStyle/) - ImGui application styling with Nim and KDL
|
* [ImStyle](https://patitotective.github.io/ImStyle/) - ImGui application styling with Nim and KDL
|
||||||
* [fmod-rs](https://github.com/CAD97/fmod-rs) - Rust bindings to FMOD Core and FMOD Studio
|
* [fmod-rs](https://github.com/CAD97/fmod-rs) - Rust bindings to FMOD Core and FMOD Studio
|
||||||
* [mise](https://mise.jdx.dev/) - dev tools, env vars, task runner
|
* [mise](https://mise.jdx.dev/) - dev tools, env vars, task runner
|
||||||
* [Camping](https://github.com/camping/camping) - Ruby web microframework
|
* [Camping](https://github.com/camping/camping) - Ruby web microframework
|
||||||
* [Iron Vault](https://ironvault.quest) - VTT (Virtual Tabletop) plugin for Obsidian for the Ironsworn family of games
|
|
||||||
* [Microsoft TypeScript DOM Generator](https://github.com/microsoft/TypeScript-DOM-lib-generator) - Tool for generating DOM-related TypeScript and JavaScript library files
|
|
||||||
* [Ferron](https://ferron.sh/) - A fast, memory-safe web server written in Rust
|
|
||||||
* You?
|
* You?
|
||||||
|
|
||||||
## Implementations
|
## Implementations
|
||||||
|
|
@ -107,34 +103,30 @@ of some examples of KDL in the wild (either v1, v2, or both):
|
||||||
|---|---|---|---|---|
|
|---|---|---|---|---|
|
||||||
| C | [ckdl](https://github.com/tjol/ckdl) | ✅ | ✅ | |
|
| C | [ckdl](https://github.com/tjol/ckdl) | ✅ | ✅ | |
|
||||||
| C#/.NET | [Kadlet](https://github.com/oledfish/Kadlet) | ✅ | ✖️ | |
|
| C#/.NET | [Kadlet](https://github.com/oledfish/Kadlet) | ✅ | ✖️ | |
|
||||||
| C#/.NET | [KadSharp](https://github.com/AndreyAkinshin/KdlSharp) | ✅ | ✅ | .NET Std: 2.1+, .NET 6+, .NET FW 4.7.2+, Mono, Xamarin |
|
|
||||||
| C++ | [kdlpp](https://github.com/tjol/ckdl) | ✅ | ✅ | part of ckdl, requires C++20 |
|
| C++ | [kdlpp](https://github.com/tjol/ckdl) | ✅ | ✅ | part of ckdl, requires C++20 |
|
||||||
| Common Lisp | [kdlcl](https://github.com/chee/kdlcl) | ✅ | ✖️ | |
|
| Common Lisp | [kdlcl](https://github.com/chee/kdlcl) | ✅ | ✖️ | |
|
||||||
| Crystal | [kdl-cr](https://github.com/danini-the-panini/kdl-cr) | ✅ | ✖️ | |
|
| Crystal | [kdl-cr](https://github.com/danini-the-panini/kdl-cr) | ✅ | ✖️ | |
|
||||||
| Dart | [kdl-dart](https://github.com/danini-the-panini/kdl-dart) | ✅ | ✅ | |
|
| Dart | [kdl-dart](https://github.com/danini-the-panini/kdl-dart) | ✅ | ✖️ | |
|
||||||
| Elixir | [kuddle](https://github.com/IceDragon200/kuddle) | ✅ | ✅ | |
|
| Elixir | [kuddle](https://github.com/IceDragon200/kuddle) | ✅ | ✅ | |
|
||||||
| Go | [gokdl](https://github.com/lunjon/gokdl) | ✅ | ✖️ | |
|
| Go | [gokdl](https://github.com/lunjon/gokdl) | ✅ | ✖️ | |
|
||||||
| Go | [kdl-go](https://github.com/sblinch/kdl-go) | ✅ | ✖️ | |
|
| Go | [kdl-go](https://github.com/sblinch/kdl-go) | ✅ | ✖️ | |
|
||||||
| Go | [gokdl2](https://github.com/njreid/gokdl2) | ✅ | ✅ | Friendly errors & arena allocator |
|
|
||||||
| Haskell | [Hustle](https://github.com/fuzzypixelz/Hustle) | ✅ | ✖️ | |
|
| Haskell | [Hustle](https://github.com/fuzzypixelz/Hustle) | ✅ | ✖️ | |
|
||||||
| Haskell | [kdl-hs](https://github.com/brandonchinn178/kdl-hs) | ✅ | ✅ | Format/comment-preserving parser |
|
| Java | [kdl4j](https://github.com/hkolbeck/kdl4j) | ✅ | ✖️ | |
|
||||||
| Java | [kdl4j](https://github.com/kdl-org/kdl4j) | ✅ | ✅ | |
|
|
||||||
| JavaScript | [@bgotink/kdl](https://github.com/bgotink/kdl) | ✅ | ✅ | Format/comment-preserving parser |
|
| JavaScript | [@bgotink/kdl](https://github.com/bgotink/kdl) | ✅ | ✅ | Format/comment-preserving parser |
|
||||||
| JavaScript | [@virtualstate/kdl](https://github.com/virtualstate/kdl) | ✅ | ✖️ | query only, JSX based |
|
| JavaScript | [@virtualstate/kdl](https://github.com/virtualstate/kdl) | ✅ | ✖️ | query only, JSX based |
|
||||||
| JavaScript | [kdljs](https://github.com/kdl-org/kdljs) | ✅ | ✅ | |
|
| JavaScript | [kdljs](https://github.com/kdl-org/kdljs) | ✅ | ✖️ | |
|
||||||
| Lua | [kdlua](https://github.com/danini-the-panini/kdlua) | ✅ | ✖️ | |
|
| Lua | [kdlua](https://github.com/danini-the-panini/kdlua) | ✅ | ✖️ | |
|
||||||
| Nim | [kdl-nim](https://github.com/Patitotective/kdl-nim) | ✅ | ✖️ | |
|
| Nim | [kdl-nim](https://github.com/Patitotective/kdl-nim) | ✅ | ✖️ | |
|
||||||
| OCaml | [ocaml-kdl](https://github.com/eilvelia/ocaml-kdl) | ✅ | ✅ | |
|
| OCaml | [ocaml-kdl](https://github.com/Bannerets/ocaml-kdl) | ✅ | ✖️ | |
|
||||||
| PHP | [kdl-php](https://github.com/kdl-org/kdl-php) | ✅ | ✖️ | |
|
| PHP | [kdl-php](https://github.com/kdl-org/kdl-php) | ✅ | ✖️ | |
|
||||||
| Python | [ckdl](https://github.com/tjol/ckdl) | ✅ | ✅ | |
|
| Python | [ckdl](https://github.com/tjol/ckdl) | ✅ | ✅ | |
|
||||||
| Python | [cuddle](https://github.com/djmattyg007/python-cuddle) | ✅ | ✖️ | |
|
| Python | [cuddle](https://github.com/djmattyg007/python-cuddle) | ✅ | ✖️ | |
|
||||||
| Python | [kdl-py](https://github.com/tabatkins/kdlpy) | ✅ | ✅ | |
|
| Python | [kdl-py](https://github.com/tabatkins/kdlpy) | ✅ | ✅ | |
|
||||||
| Ruby | [kdl-rb](https://github.com/danini-the-panini/kdl-rb) | ✅ | ✅ | |
|
| Ruby | [kdl-rb](https://github.com/danini-the-panini/kdl-rb) | ✅ | ✖️ | |
|
||||||
| Rust | [kdl-rs](https://github.com/kdl-org/kdl-rs) | ✅ | ✅ | Format/comment-preserving parser |
|
| Rust | [kdl-rs](https://github.com/kdl-org/kdl-rs) | ✅ | ✅ | Format/comment-preserving parser |
|
||||||
| Rust | [knus](https://crates.io/crates/knus/) | ✅ | ✖️ | Serde-_style_ derive macros (not actual Serde) |
|
| Rust | [knus](https://crates.io/crates/knus/) | ✅ | ✖️ | Serde-_style_ derive macros (not actual Serde) |
|
||||||
| Swift | [kdl-swift](https://github.com/danini-the-panini/kdl-swift) | ✅ | ✖️ | |
|
| Swift | [kdl-swift](https://github.com/danini-the-panini/kdl-swift) | ✅ | ✖️ | |
|
||||||
| XSLT | [xml2kdl](https://github.com/Devasta/XML2KDL) | ✅ | ✖️ | |
|
| XSLT | [xml2kdl](https://github.com/Devasta/XML2KDL) | ✅ | ✖️ | |
|
||||||
| Zig | [zig-kdl](https://codeberg.org/desttinghim/zig-kdl) | ✅ | ✅ | Format/comment-preserving parser |
|
|
||||||
|
|
||||||
## Compatibility Test Suite
|
## Compatibility Test Suite
|
||||||
|
|
||||||
|
|
@ -147,12 +139,10 @@ entirety, but in the future, may be required to in order to be included here.
|
||||||
## Editor Support
|
## Editor Support
|
||||||
|
|
||||||
* [Intellij IDEA](https://plugins.jetbrains.com/plugin/20136-kdl-document-language)
|
* [Intellij IDEA](https://plugins.jetbrains.com/plugin/20136-kdl-document-language)
|
||||||
* [Sublime Text](https://packagecontrol.io/packages/KDL)\*
|
* [Sublime Text](https://packagecontrol.io/packages/KDL)
|
||||||
* [TreeSitter](https://github.com/tree-sitter-grammars/tree-sitter-kdl) (neovim, among others)
|
* [TreeSitter](https://github.com/tree-sitter-grammars/tree-sitter-kdl) (neovim, among others)
|
||||||
* [VS Code](https://marketplace.visualstudio.com/items?itemName=kdl-org.kdl&ssr=false#review-details)\*
|
* [VS Code](https://marketplace.visualstudio.com/items?itemName=kdl-org.kdl&ssr=false#review-details)\*
|
||||||
* [vim](https://github.com/imsnif/kdl.vim)
|
* [vim](https://github.com/imsnif/kdl.vim)
|
||||||
* [Kate](https://github.com/larsgw/katepart-kdl)\*
|
|
||||||
* [Zed](https://zed.dev/extensions/kdl)
|
|
||||||
|
|
||||||
\* Supports KDL 2.0.0
|
\* Supports KDL 2.0.0
|
||||||
|
|
||||||
|
|
@ -409,12 +399,12 @@ SDLang is an excellent base, but I wanted some details ironed out, and some
|
||||||
things removed that only really made sense for SDLang's current use-cases, including
|
things removed that only really made sense for SDLang's current use-cases, including
|
||||||
some restrictions about data representation. KDL is very similar in many ways, except:
|
some restrictions about data representation. KDL is very similar in many ways, except:
|
||||||
|
|
||||||
* The grammar and expected semantics are [well-defined and specified](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html).
|
* The grammar and expected semantics are [well-defined and specified](SPEC.md).
|
||||||
This was the original impetus for working on KDL, followed by details that
|
This was the original impetus for working on KDL, followed by details that
|
||||||
seemed like they could be improved.
|
seemed like they could be improved.
|
||||||
* There is only one "number" type. KDL does not prescribe representations, but
|
* There is only one "number" type. KDL does not prescribe representations, but
|
||||||
does have keywords for NaN, infinity, and negative infinity if decimal numbers
|
does have keywords for NaN, infinity, and negative infinity if decimal numbers
|
||||||
are intended to be represented as IEEE754 floats.
|
are intended to be represtented as IEEE754 floats.
|
||||||
* Slashdash (`/-`) comments are great and useful!
|
* Slashdash (`/-`) comments are great and useful!
|
||||||
* Quoteless "identifier" strings (e.g. `node foo=bar`, vs `node foo="bar"`).
|
* Quoteless "identifier" strings (e.g. `node foo=bar`, vs `node foo="bar"`).
|
||||||
* KDL does not have first-class date or binary data types. Instead, it
|
* KDL does not have first-class date or binary data types. Instead, it
|
||||||
|
|
@ -476,10 +466,7 @@ microsyntax for losslessly encoding JSON](JSON-IN-KDL.md).
|
||||||
|
|
||||||
#### What about TOML?
|
#### What about TOML?
|
||||||
|
|
||||||
It nests very poorly. It doesn't fare well with large files. Also, I felt some
|
It nests very poorly. It doesn't fare well with large files.
|
||||||
discomfort [continuing to use and promote something by its
|
|
||||||
creator](https://en.wikipedia.org/wiki/Tom_Preston-Werner#Resignation_from_GitHub).
|
|
||||||
|
|
||||||
|
|
||||||
#### What about XML?
|
#### What about XML?
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -268,7 +268,7 @@ and property names when the `node-names` or `prop-names` options are activated.
|
||||||
|
|
||||||
* `tag`: [Validations](#validation-nodes) to apply to the tag of the value.
|
* `tag`: [Validations](#validation-nodes) to apply to the tag of the value.
|
||||||
* `type`: A string denoting the type of the property value.
|
* `type`: A string denoting the type of the property value.
|
||||||
* `enum`: A specific list of allowed values for this property. May be heterogeneous as long as it agrees with the `type`, if specified.
|
* `enum`: A specific list of allowed values for this property. May be heterogenous as long as it agrees with the `type`, if specified.
|
||||||
|
|
||||||
#### String validations
|
#### String validations
|
||||||
|
|
||||||
|
|
@ -287,7 +287,7 @@ and property names when the `node-names` or `prop-names` options are activated.
|
||||||
* `country-subdivision`: ISO 3166-2 country subdivision code.
|
* `country-subdivision`: ISO 3166-2 country subdivision code.
|
||||||
* `email`: RFC5302 email address.
|
* `email`: RFC5302 email address.
|
||||||
* `idn-email`: RFC6531 internationalized email address.
|
* `idn-email`: RFC6531 internationalized email address.
|
||||||
* `hostname`: RFC1123 internet hostname.
|
* `hostname`: RFC1132 internet hostname.
|
||||||
* `idn-hostname`: RFC5890 internationalized internet hostname.
|
* `idn-hostname`: RFC5890 internationalized internet hostname.
|
||||||
* `ipv4`: RFC2673 dotted-quad IPv4 address.
|
* `ipv4`: RFC2673 dotted-quad IPv4 address.
|
||||||
* `ipv6`: RFC2373 IPv6 address.
|
* `ipv6`: RFC2373 IPv6 address.
|
||||||
|
|
@ -313,12 +313,10 @@ and property names when the `node-names` or `prop-names` options are activated.
|
||||||
* `i16`: 16-bit signed integer
|
* `i16`: 16-bit signed integer
|
||||||
* `i32`: 32-bit signed integer
|
* `i32`: 32-bit signed integer
|
||||||
* `i64`: 64-bit signed integer
|
* `i64`: 64-bit signed integer
|
||||||
* `i128`: 128-bit signed integer
|
|
||||||
* `u8`: 8-bit unsigned integer
|
* `u8`: 8-bit unsigned integer
|
||||||
* `u16`: 16-bit unsigned integer
|
* `u16`: 16-bit unsigned integer
|
||||||
* `u32`: 32-bit unsigned integer
|
* `u32`: 32-bit unsigned integer
|
||||||
* `u64`: 64-bit unsigned integer
|
* `u64`: 64-bit unsigned integer
|
||||||
* `u128`: 128-bit unsigned integer
|
|
||||||
* `isize`: Platform-dependent signed integer
|
* `isize`: Platform-dependent signed integer
|
||||||
* `usize`: Platform-dependent unsigned integer
|
* `usize`: Platform-dependent unsigned integer
|
||||||
* `f32`: IEEE 754 single (32-bit) precision floating point number
|
* `f32`: IEEE 754 single (32-bit) precision floating point number
|
||||||
|
|
|
||||||
967
SPEC.md
967
SPEC.md
|
|
@ -1 +1,966 @@
|
||||||
The v2 specification has been moved [here](draft-marchan-kdl2.md).
|
# KDL Spec
|
||||||
|
|
||||||
|
This is the formal specification for KDL, including the intended data model and
|
||||||
|
the grammar.
|
||||||
|
|
||||||
|
This document describes KDL version KDL 2.0.0. It was released on 2024-12-21. It
|
||||||
|
is the latest stable version of the language, and will only be edited for minor
|
||||||
|
copyedits or major errata.
|
||||||
|
|
||||||
|
## Compatibility
|
||||||
|
|
||||||
|
KDL 2.0 is designed such that for any given KDL document written as [KDL
|
||||||
|
1.0](./SPEC_v1.md) or KDL 2.0, the parse will either fail completely, or, if the
|
||||||
|
parse succeeds, the data represented by a v1 or v2 parser will be identical.
|
||||||
|
This means that it's safe to use a fallback parsing strategy in order to support
|
||||||
|
both v1 and v2 simultaneously. For example, `node "foo"` is a valid node in both
|
||||||
|
versions, and should be represented identically by parsers.
|
||||||
|
|
||||||
|
A version marker `/- kdl-version 2` (or `1`) _MAY_ be added to the beginning of
|
||||||
|
a KDL document, optionally preceded by the BOM, and parsers _MAY_ use that as a
|
||||||
|
hint as to which version to parse the document as.
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
KDL is a node-oriented document language. Its niche and purpose overlaps with
|
||||||
|
XML, and as do many of its semantics. You can use KDL both as a configuration
|
||||||
|
language, and a data exchange or storage format, if you so choose.
|
||||||
|
|
||||||
|
The bulk of this document is dedicated to a long-form description of all
|
||||||
|
[Components](#components) of a KDL document. There is also a much more terse
|
||||||
|
[Grammar](#full-grammar) at the end of the document that covers most of the
|
||||||
|
rules, with some semantic exceptions involving the data model.
|
||||||
|
|
||||||
|
KDL is designed to be easy to read _and_ easy to implement.
|
||||||
|
|
||||||
|
In this document, references to "left" or "right" refer to directions in the
|
||||||
|
*data stream* towards the beginning or end, respectively; in other words,
|
||||||
|
the directions if the data stream were only ASCII text. They do not refer
|
||||||
|
to the writing direction of text, which can flow in either direction,
|
||||||
|
depending on the characters used.
|
||||||
|
|
||||||
|
## Components
|
||||||
|
|
||||||
|
### Document
|
||||||
|
|
||||||
|
The toplevel concept of KDL is a Document. A Document is composed of zero or
|
||||||
|
more [Nodes](#node), separated by newlines and whitespace, and eventually
|
||||||
|
terminated by an EOF.
|
||||||
|
|
||||||
|
All KDL documents should be UTF-8 encoded and conform to the specifications in
|
||||||
|
this document.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
The following is a document composed of two toplevel nodes:
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
foo {
|
||||||
|
bar
|
||||||
|
}
|
||||||
|
baz
|
||||||
|
```
|
||||||
|
|
||||||
|
### Node
|
||||||
|
|
||||||
|
Being a node-oriented language means that the real core component of any KDL
|
||||||
|
document is the "node". Every node must have a name, which must be a
|
||||||
|
[String](#string).
|
||||||
|
|
||||||
|
The name may be preceded by a [Type Annotation](#type-annotation) to further
|
||||||
|
clarify its type, particularly in relation to its parent node. (For example,
|
||||||
|
clarifying that a particular `date` child node is for the _publication_ date,
|
||||||
|
rather than the last-modified date, with `(published)date`.)
|
||||||
|
|
||||||
|
Following the name are zero or more [Arguments](#argument) or
|
||||||
|
[Properties](#property), separated by either [whitespace](#whitespace) or [a
|
||||||
|
slash-escaped line continuation](#line-continuation). Arguments and Properties
|
||||||
|
may be interspersed in any order, much like is common with positional arguments
|
||||||
|
vs options in command line tools. Collectively, Arguments and Properties may be
|
||||||
|
referred to as "Entries".
|
||||||
|
|
||||||
|
[Children](#children-block) can be placed after the name and the optional
|
||||||
|
Entries, possibly separated by either whitespace or a
|
||||||
|
slash-escaped line continuation.
|
||||||
|
|
||||||
|
Arguments are ordered relative to each other and that order must be preserved in
|
||||||
|
order to maintain the semantics. Properties between Arguments do not affect
|
||||||
|
Argument ordering.
|
||||||
|
|
||||||
|
By contrast, Properties _SHOULD NOT_ be assumed to be presented in a given
|
||||||
|
order. [Children](#children-block) should be used if an order-sensitive
|
||||||
|
key/value data structure must be represented in KDL. Cf. JSON objects
|
||||||
|
preserving key order.
|
||||||
|
|
||||||
|
Nodes _MAY_ be prefixed with [Slashdash](#slashdash-comments) to "comment out"
|
||||||
|
the entire node, including its properties, arguments, and children, and make
|
||||||
|
it act as plain whitespace, even if it spreads across multiple lines.
|
||||||
|
|
||||||
|
Finally, a node is terminated by either a [Newline](#newline), a semicolon
|
||||||
|
(`;`), the end of a child block (`}`) or the end of the file/stream (an `EOF`).
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
// `foo` will have an Argument value list like `[1, 3]`.
|
||||||
|
foo 1 key=val 3 {
|
||||||
|
bar
|
||||||
|
(role)baz 1 2
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Line Continuation
|
||||||
|
|
||||||
|
Line continuations allow [Nodes](#node) to be spread across multiple lines.
|
||||||
|
|
||||||
|
A line continuation is a `\` character followed by zero or more whitespace
|
||||||
|
items (including multiline comments) and an optional single-line comment. It
|
||||||
|
must be terminated by a [Newline](#newline) (including the Newline that is
|
||||||
|
part of single-line comments).
|
||||||
|
|
||||||
|
Following a line continuation, processing of a Node can continue as usual.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
my-node 1 2 \ // comments are ok after \
|
||||||
|
3 4 // This is the actual end of the Node.
|
||||||
|
```
|
||||||
|
|
||||||
|
### Property
|
||||||
|
|
||||||
|
A Property is a key/value pair attached to a [Node](#node). A Property is
|
||||||
|
composed of a [String](#string), followed immediately by an equals sign (`=`, `U+003D`),
|
||||||
|
and then a [Value](#value).
|
||||||
|
|
||||||
|
Properties should be interpreted left-to-right, with rightmost properties with
|
||||||
|
identical names overriding earlier properties. That is:
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
node a=1 a=2
|
||||||
|
```
|
||||||
|
|
||||||
|
In this example, the node's `a` value must be `2`, not `1`.
|
||||||
|
|
||||||
|
No other guarantees about order should be expected by implementers.
|
||||||
|
Deserialized representations may iterate over properties in any order and
|
||||||
|
still be spec-compliant.
|
||||||
|
|
||||||
|
Properties _MAY_ be prefixed with `/-` to "comment out" the entire token and
|
||||||
|
make it act as plain whitespace, even if it spreads across multiple lines.
|
||||||
|
|
||||||
|
### Argument
|
||||||
|
|
||||||
|
An Argument is a bare [Value](#value) attached to a [Node](#node), with no
|
||||||
|
associated key. It shares the same space as [Properties](#properties), and may be interleaved with them.
|
||||||
|
|
||||||
|
A Node may have any number of Arguments, which should be evaluated left to
|
||||||
|
right. KDL implementations _MUST_ preserve the order of Arguments relative to
|
||||||
|
each other (not counting Properties).
|
||||||
|
|
||||||
|
Arguments _MAY_ be prefixed with `/-` to "comment out" the entire token and
|
||||||
|
make it act as plain whitespace, even if it spreads across multiple lines.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
my-node 1 2 3 a b c
|
||||||
|
```
|
||||||
|
|
||||||
|
### Children Block
|
||||||
|
|
||||||
|
A children block is a block of [Nodes](#node), surrounded by `{` and `}`. They
|
||||||
|
are an optional part of nodes, and create a hierarchy of KDL nodes.
|
||||||
|
|
||||||
|
Regular node termination rules apply, which means multiple nodes can be
|
||||||
|
included in a single-line children block, as long as they're all terminated by
|
||||||
|
`;`.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
parent {
|
||||||
|
child1
|
||||||
|
child2
|
||||||
|
}
|
||||||
|
|
||||||
|
parent { child1; child2; }
|
||||||
|
```
|
||||||
|
|
||||||
|
### Value
|
||||||
|
|
||||||
|
A value is either: a [String](#string), a [Number](#number), a
|
||||||
|
[Boolean](#boolean), or [Null](#null).
|
||||||
|
|
||||||
|
Values _MUST_ be either [Arguments](#argument) or values of
|
||||||
|
[Properties](#property). Only [String](#string) values may be used as
|
||||||
|
[Node](#node) names or [Property](#property) keys.
|
||||||
|
|
||||||
|
Values (both as arguments and in properties) _MAY_ be prefixed by a single
|
||||||
|
[Type Annotation](#type-annotation).
|
||||||
|
|
||||||
|
### Type Annotation
|
||||||
|
|
||||||
|
A type annotation is a prefix to any [Node Name](#node) or [Value](#value) that
|
||||||
|
includes a _suggestion_ of what type the value is _intended_ to be treated as,
|
||||||
|
or as a _context-specific elaboration_ of the more generic type the node name
|
||||||
|
indicates.
|
||||||
|
|
||||||
|
Type annotations are written as a set of `(` and `)` with a single
|
||||||
|
[String](#string) in it. It may contain Whitespace after the `(` and before
|
||||||
|
the `)`, and may be separated from its target by Whitespace.
|
||||||
|
|
||||||
|
KDL does not specify any restrictions on what implementations might do with
|
||||||
|
these annotations. They are free to ignore them, or use them to make decisions
|
||||||
|
about how to interpret a value.
|
||||||
|
|
||||||
|
Additionally, the following type annotations MAY be recognized by KDL parsers
|
||||||
|
and, if used, SHOULD interpret these types as follows:
|
||||||
|
|
||||||
|
#### Reserved Type Annotations for Numbers Without Decimals:
|
||||||
|
|
||||||
|
Signed integers of various sizes (the number is the bit size):
|
||||||
|
|
||||||
|
* `i8`
|
||||||
|
* `i16`
|
||||||
|
* `i32`
|
||||||
|
* `i64`
|
||||||
|
* `i128`
|
||||||
|
|
||||||
|
Unsigned integers of various sizes (the number is the bit size):
|
||||||
|
|
||||||
|
* `u8`
|
||||||
|
* `u16`
|
||||||
|
* `u32`
|
||||||
|
* `u64`
|
||||||
|
* `u128`
|
||||||
|
|
||||||
|
Platform-dependent integer types, both signed and unsigned:
|
||||||
|
|
||||||
|
* `isize`
|
||||||
|
* `usize`
|
||||||
|
|
||||||
|
#### Reserved Type Annotations for Numbers With Decimals:
|
||||||
|
|
||||||
|
IEEE 754 floating point numbers, both single (32) and double (64) precision:
|
||||||
|
|
||||||
|
* `f32`
|
||||||
|
* `f64`
|
||||||
|
|
||||||
|
IEEE 754-2008 decimal floating point numbers
|
||||||
|
|
||||||
|
* `decimal64`
|
||||||
|
* `decimal128`
|
||||||
|
|
||||||
|
#### Reserved Type Annotations for Strings:
|
||||||
|
|
||||||
|
* `date-time`: ISO8601 date/time format.
|
||||||
|
* `time`: "Time" section of ISO8601.
|
||||||
|
* `date`: "Date" section of ISO8601.
|
||||||
|
* `duration`: ISO8601 duration format.
|
||||||
|
* `decimal`: IEEE 754-2008 decimal string format.
|
||||||
|
* `currency`: ISO 4217 currency code.
|
||||||
|
* `country-2`: ISO 3166-1 alpha-2 country code.
|
||||||
|
* `country-3`: ISO 3166-1 alpha-3 country code.
|
||||||
|
* `country-subdivision`: ISO 3166-2 country subdivision code.
|
||||||
|
* `email`: RFC5322 email address.
|
||||||
|
* `idn-email`: RFC6531 internationalized email address.
|
||||||
|
* `hostname`: RFC1132 internet hostname (only ASCII segments)
|
||||||
|
* `idn-hostname`: RFC5890 internationalized internet hostname (only `xn--`-prefixed ASCII "punycode" segments, or non-ASCII segments)
|
||||||
|
* `ipv4`: RFC2673 dotted-quad IPv4 address.
|
||||||
|
* `ipv6`: RFC2373 IPv6 address.
|
||||||
|
* `url`: RFC3986 URI.
|
||||||
|
* `url-reference`: RFC3986 URI Reference.
|
||||||
|
* `irl`: RFC3987 Internationalized Resource Identifier.
|
||||||
|
* `irl-reference`: RFC3987 Internationalized Resource Identifier Reference.
|
||||||
|
* `url-template`: RFC6570 URI Template.
|
||||||
|
* `uuid`: RFC4122 UUID.
|
||||||
|
* `regex`: Regular expression. Specific patterns may be implementation-dependent.
|
||||||
|
* `base64`: A Base64-encoded string, denoting arbitrary binary data.
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
node (u8)123
|
||||||
|
node prop=(regex).*
|
||||||
|
(published)date "1970-01-01"
|
||||||
|
(contributor)person name="Foo McBar"
|
||||||
|
```
|
||||||
|
|
||||||
|
### String
|
||||||
|
|
||||||
|
Strings in KDL represent textual UTF-8 [Values](#value). A String is either an
|
||||||
|
[Identifier String](#identifier-string) (like `foo`), a
|
||||||
|
[Quoted String](#quoted-string) (like `"foo"`)
|
||||||
|
or a [Multi-Line String](#multi-line-string).
|
||||||
|
Both Quoted and Multiline strings come in normal
|
||||||
|
and [Raw String](#raw-string) variants (like `#"foo"#`):
|
||||||
|
|
||||||
|
* Identifier Strings let you write short, "single-word" strings with a
|
||||||
|
minimum of syntax
|
||||||
|
* Quoted Strings let you write strings "like normal", with whitespace and escapes.
|
||||||
|
* Multi-Line Strings let you write strings across multiple lines
|
||||||
|
and with indentation that's not part of the string value.
|
||||||
|
* Raw Strings don't allow any escapes,
|
||||||
|
allowing you to not worry about the string's content containing anything that
|
||||||
|
might look like an escape.
|
||||||
|
|
||||||
|
Strings _MUST_ be represented as UTF-8 values.
|
||||||
|
|
||||||
|
Strings _MUST NOT_ include the code points for
|
||||||
|
[disallowed literal code points](#disallowed-literal-code-points) directly.
|
||||||
|
Quoted and Multi-Line Strings may include these code points as _values_
|
||||||
|
by representing them with their corresponding `\u{...}` escape.
|
||||||
|
|
||||||
|
### Identifier String
|
||||||
|
|
||||||
|
An Identifier String (sometimes referred to as just an "identifier") is
|
||||||
|
composed of any [Unicode Scalar
|
||||||
|
Value](https://unicode.org/glossary/#unicode_scalar_value) other than
|
||||||
|
[non-initial characters](#non-initial-characters), followed by any number of
|
||||||
|
Unicode Scalar Values other than [non-identifier
|
||||||
|
characters](#non-identifier-characters).
|
||||||
|
|
||||||
|
A handful of patterns are disallowed, to avoid confusion with other values:
|
||||||
|
|
||||||
|
* idents that appear to start with a [Number](#number) (like `1.0v2` or
|
||||||
|
`-1em`) or the "almost a number" pattern of a decimal point without a
|
||||||
|
leading digit (like `.1`).
|
||||||
|
* idents that are the language keywords (`inf`, `-inf`, `nan`, `true`,
|
||||||
|
`false`, and `null`) without their leading `#`.
|
||||||
|
|
||||||
|
Identifiers that match these patterns _MUST_ be treated as a syntax error; such
|
||||||
|
values can only be written as quoted or raw strings. The precise details of the
|
||||||
|
identifier syntax is specified in the [Full Grammar](#full-grammar) below.
|
||||||
|
|
||||||
|
Identifier Strings are terminated by [Whitespace](#whitespace) or
|
||||||
|
[Newlines](#newline).
|
||||||
|
|
||||||
|
#### Non-initial characters
|
||||||
|
|
||||||
|
The following characters cannot be the first character in an
|
||||||
|
[Identifier String](#identifier-string):
|
||||||
|
|
||||||
|
* Any decimal digit (0-9)
|
||||||
|
* Any [non-identifier characters](#non-identifier-characters)
|
||||||
|
|
||||||
|
Additionally, the `-` character can only be used as an initial character if
|
||||||
|
the second character is *not* a digit. This allows identifiers to look like
|
||||||
|
`--this`, and removes the ambiguity of having an identifier look like a
|
||||||
|
negative number.
|
||||||
|
|
||||||
|
#### Non-identifier characters
|
||||||
|
|
||||||
|
The following characters cannot be used anywhere in a [Identifier String](#identifier-string):
|
||||||
|
|
||||||
|
* Any of `(){}[]/\"#;=`
|
||||||
|
* Any [Whitespace](#whitespace) or [Newline](#newline).
|
||||||
|
* Any [disallowed literal code points](#disallowed-literal-code-points) in KDL
|
||||||
|
documents.
|
||||||
|
|
||||||
|
### Quoted String
|
||||||
|
|
||||||
|
A Quoted String is delimited by `"` on either side of any number of literal
|
||||||
|
string characters except unescaped `"` and `\`.
|
||||||
|
|
||||||
|
Literal [Newline](#newline) characters can only be included
|
||||||
|
if they are [Escaped Whitespace](#escaped-whitespace),
|
||||||
|
which discards them from the string value.
|
||||||
|
Actually including a newline in the value requires using a newline escape sequence,
|
||||||
|
like `\n`,
|
||||||
|
or using a [Multi-Line String](#multi-line-string)
|
||||||
|
which is actually designed for strings stretching across multiple lines.
|
||||||
|
|
||||||
|
Like Identifier Strings, Quoted Strings _MUST NOT_ include any of the
|
||||||
|
[disallowed literal code-points](#disallowed-literal-code-points) as code
|
||||||
|
points in their body.
|
||||||
|
|
||||||
|
Quoted Strings have a [Raw String](#raw-string) variant,
|
||||||
|
which disallows escapes.
|
||||||
|
|
||||||
|
#### Escapes
|
||||||
|
|
||||||
|
In addition to literal code points, a number of "escapes" are supported in Quoted Strings.
|
||||||
|
"Escapes" are the character `\` followed by another character, and are
|
||||||
|
interpreted as described in the following table:
|
||||||
|
|
||||||
|
| Name | Escape | Code Pt |
|
||||||
|
|-------------------------------|--------|----------|
|
||||||
|
| Line Feed | `\n` | `U+000A` |
|
||||||
|
| Carriage Return | `\r` | `U+000D` |
|
||||||
|
| Character Tabulation (Tab) | `\t` | `U+0009` |
|
||||||
|
| Reverse Solidus (Backslash) | `\\` | `U+005C` |
|
||||||
|
| Quotation Mark (Double Quote) | `\"` | `U+0022` |
|
||||||
|
| Backspace | `\b` | `U+0008` |
|
||||||
|
| Form Feed | `\f` | `U+000C` |
|
||||||
|
| Space | `\s` | `U+0020` |
|
||||||
|
| Unicode Escape | `\u{(1-6 hex chars)}` | Code point described by hex characters, as long as it represents a [Unicode Scalar Value](https://unicode.org/glossary/#unicode_scalar_value) |
|
||||||
|
| Whitespace Escape | See below | N/A |
|
||||||
|
|
||||||
|
##### Escaped Whitespace
|
||||||
|
|
||||||
|
In addition to escaping individual characters, `\` can also escape whitespace.
|
||||||
|
When a `\` is followed by one or more literal whitespace characters, the `\`
|
||||||
|
and all of that whitespace are discarded. For example, `"Hello World"` and
|
||||||
|
`"Hello \ World"` are semantically identical. See [whitespace](#whitespace)
|
||||||
|
and [newlines](#newline) for how whitespace is defined.
|
||||||
|
|
||||||
|
Note that only literal whitespace is escaped; whitespace escapes (`\n` and
|
||||||
|
such) are retained. For example, these strings are all semantically identical:
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
"Hello\ \nWorld"
|
||||||
|
|
||||||
|
"Hello\n\
|
||||||
|
World"
|
||||||
|
|
||||||
|
"Hello\nWorld"
|
||||||
|
|
||||||
|
"""
|
||||||
|
Hello
|
||||||
|
World
|
||||||
|
"""
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Invalid escapes
|
||||||
|
|
||||||
|
Except as described in the escapes table, above, `\` *MUST NOT* precede any
|
||||||
|
other characters in a string.
|
||||||
|
|
||||||
|
### Multi-line String
|
||||||
|
|
||||||
|
Multi-Line Strings support multiple lines with literal, non-escaped
|
||||||
|
Newlines. They must use a special multi-line syntax, and they automatically
|
||||||
|
"dedent" the string, allowing its value to be indented to a visually matching
|
||||||
|
level as desired.
|
||||||
|
|
||||||
|
A Multi-Line String is opened and closed by *three* double-quote characters,
|
||||||
|
like `"""`.
|
||||||
|
Its first line _MUST_ immediately start with a [Newline](#newline)
|
||||||
|
after its opening `"""`.
|
||||||
|
Its final line _MUST_ contain only whitespace
|
||||||
|
before the closing `"""`.
|
||||||
|
All in-between lines that contain non-newline, non-whitespace characters
|
||||||
|
_MUST_ start with _at least_ the exact same whitespace as the final line
|
||||||
|
(precisely matching codepoints, not merely counting characters or "size");
|
||||||
|
they may contain additional whitespace following this prefix. The lines in
|
||||||
|
between may contain unescaped `"` (but no unescaped `"""` as this would close
|
||||||
|
the string).
|
||||||
|
|
||||||
|
The value of the Multi-Line String omits the first and last Newline, the
|
||||||
|
Whitespace of the last line, and the matching Whitespace prefix on all
|
||||||
|
intermediate lines. The first and last Newline can be the same character (that
|
||||||
|
is, empty multi-line strings are legal).
|
||||||
|
|
||||||
|
In other words, the final line specifies the whitespace prefix that will be
|
||||||
|
removed from all other lines.
|
||||||
|
|
||||||
|
Multi-line Strings that do not immediately start with a Newline and whose final
|
||||||
|
`"""` is not preceeded by optional whitespace and a Newline are illegal. This
|
||||||
|
also means that `"""` may not be used for a single-line String (e.g.
|
||||||
|
`"""foo"""`).
|
||||||
|
|
||||||
|
#### Newline Normalization
|
||||||
|
|
||||||
|
Literal Newline sequences in Multi-line Strings must be normalized to a single
|
||||||
|
`U+000A` (`LF`) during deserialization. This means, for example, that `CR LF`
|
||||||
|
becomes a single `LF` during parsing.
|
||||||
|
|
||||||
|
This normalization does not apply to non-literal Newlines entered using escape
|
||||||
|
sequences. That is:
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
multi-line """
|
||||||
|
\r\n[CRLF]
|
||||||
|
foo[CRLF]
|
||||||
|
"""
|
||||||
|
```
|
||||||
|
|
||||||
|
becomes:
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
single-line "\r\n\nfoo"
|
||||||
|
```
|
||||||
|
|
||||||
|
For clarity: this normalization applies to each individual Newline sequence.
|
||||||
|
That is, the literal sequence `CRLF CRLF` becomes `LF LF`, not `LF`.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
multi-line """
|
||||||
|
foo
|
||||||
|
This is the base indentation
|
||||||
|
bar
|
||||||
|
"""
|
||||||
|
```
|
||||||
|
|
||||||
|
This example's string value will be:
|
||||||
|
|
||||||
|
```
|
||||||
|
foo
|
||||||
|
This is the base indentation
|
||||||
|
bar
|
||||||
|
```
|
||||||
|
|
||||||
|
which is equivalent to `" foo\nThis is the base indentation\n bar"`
|
||||||
|
when written as a single-line string.
|
||||||
|
|
||||||
|
---------
|
||||||
|
|
||||||
|
If the last line wasn't indented as far,
|
||||||
|
it won't dedent the rest of the lines as much:
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
multi-line """
|
||||||
|
foo
|
||||||
|
This is no longer on the left edge
|
||||||
|
bar
|
||||||
|
"""
|
||||||
|
```
|
||||||
|
|
||||||
|
This example's string value will be:
|
||||||
|
|
||||||
|
```
|
||||||
|
foo
|
||||||
|
This is no longer on the left edge
|
||||||
|
bar
|
||||||
|
```
|
||||||
|
|
||||||
|
Equivalent to `" foo\n This is no longer on the left edge\n bar"`.
|
||||||
|
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Empty lines can contain any whitespace, or none at all, and will be reflected as empty in the value:
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
multi-line """
|
||||||
|
Indented a bit
|
||||||
|
|
||||||
|
A second indented paragraph.
|
||||||
|
"""
|
||||||
|
```
|
||||||
|
|
||||||
|
This example's string value will be:
|
||||||
|
|
||||||
|
```
|
||||||
|
Indented a bit.
|
||||||
|
|
||||||
|
A second indented paragraph.
|
||||||
|
```
|
||||||
|
|
||||||
|
Equivalent to `"Indented a bit.\n\nA second indented paragraph."`
|
||||||
|
|
||||||
|
-----------
|
||||||
|
|
||||||
|
The following yield **syntax errors**:
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
multi-line """can't be single line"""
|
||||||
|
```
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
multi-line """
|
||||||
|
closing quote with non-whitespace prefix"""
|
||||||
|
```
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
multi-line """stuff
|
||||||
|
"""
|
||||||
|
```
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
// Every line must share the exact same prefix as the closing line.
|
||||||
|
multi-line """[\n]
|
||||||
|
[tab]a[\n]
|
||||||
|
[space][space]b[\n]
|
||||||
|
[space][tab][\n]
|
||||||
|
[tab]"""
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Interaction with Whitespace Escapes
|
||||||
|
|
||||||
|
Multi-line strings support the same mechanism for escaping whitespace as Quoted
|
||||||
|
Strings.
|
||||||
|
|
||||||
|
When processing a Multi-line String, implementations MUST dedent the string
|
||||||
|
_after_ resolving all whitespace escapes, but _before_ resolving other backslash
|
||||||
|
escapes. This means a whitespace escape that attempts to escape the final line's
|
||||||
|
newline and/or whitespace prefix can be invalid: if removing escaped whitespace
|
||||||
|
places the closing `"""` on a line with non-whitespace characters, this escape
|
||||||
|
is invalid.
|
||||||
|
|
||||||
|
For example, the following example is illegal:
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
"""
|
||||||
|
foo
|
||||||
|
bar\
|
||||||
|
"""
|
||||||
|
|
||||||
|
// equivalent to
|
||||||
|
"""
|
||||||
|
foo
|
||||||
|
bar"""
|
||||||
|
```
|
||||||
|
|
||||||
|
while the following example is allowed
|
||||||
|
```kdl
|
||||||
|
"""
|
||||||
|
foo \
|
||||||
|
bar
|
||||||
|
baz
|
||||||
|
\ """
|
||||||
|
|
||||||
|
// equivalent to
|
||||||
|
"""
|
||||||
|
foo bar
|
||||||
|
baz
|
||||||
|
"""
|
||||||
|
```
|
||||||
|
|
||||||
|
### Raw String
|
||||||
|
|
||||||
|
Both [Quoted](#quoted-string) and [Multi-Line Strings](#multi-line-string) have
|
||||||
|
Raw String variants, which are identical in syntax except they do not support
|
||||||
|
`\`-escapes. This includes line-continuation escapes (`\` + `ws` collapsing to
|
||||||
|
nothing). They otherwise share the same properties as far as literal
|
||||||
|
[Newline](#newline) characters go, multi-line rules, and the requirement of
|
||||||
|
UTF-8 representation.
|
||||||
|
|
||||||
|
The Raw String variants are indicated by preceding the strings's opening quotes
|
||||||
|
with one or more `#` characters. The string is then closed by its normal closing
|
||||||
|
quotes, followed by a _matching_ number of `#` characters. This means that the
|
||||||
|
string may contain any combination of `"` and `#` characters other than its
|
||||||
|
closing delimiter (e.g., if a raw string starts with `##"`, it can contain `"`
|
||||||
|
or `"#`, but not `"##` or `"###`).
|
||||||
|
|
||||||
|
Like other Strings, Raw Strings _MUST NOT_ include any of the [disallowed
|
||||||
|
literal code-points](#disallowed-literal-code-points) as code points in their
|
||||||
|
body. Unlike with Quoted Strings, these cannot simply be escaped, and are thus
|
||||||
|
unrepresentable when using Raw Strings.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
just-escapes #"\n will be literal"#
|
||||||
|
```
|
||||||
|
|
||||||
|
The string contains the literal characters `\n will be literal`.
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
quotes-and-escapes ##"hello\n\r\asd"#world"##
|
||||||
|
```
|
||||||
|
|
||||||
|
The string contains the literal characters `hello\n\r\asd"#world`
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
raw-multi-line #"""
|
||||||
|
You can show examples of """
|
||||||
|
multi-line strings
|
||||||
|
"""
|
||||||
|
without worrying about escapes.
|
||||||
|
"""#
|
||||||
|
```
|
||||||
|
|
||||||
|
The string contains the value
|
||||||
|
|
||||||
|
```
|
||||||
|
You can show examples of """
|
||||||
|
multi-line strings
|
||||||
|
"""
|
||||||
|
without worrying about escapes.
|
||||||
|
```
|
||||||
|
|
||||||
|
or equivalently, `"You can show examples of \"\"\"\n multi-line strings\n \"\"\"\nwithout worrying about escapes."` as a Quoted String.
|
||||||
|
|
||||||
|
### Number
|
||||||
|
|
||||||
|
Numbers in KDL represent numerical [Values](#value). There is no logical distinction in KDL
|
||||||
|
between real numbers, integers, and floating point numbers. It's up to
|
||||||
|
individual implementations to determine how to represent KDL numbers.
|
||||||
|
|
||||||
|
There are five syntaxes for Numbers: Keywords, Decimal, Hexadecimal, Octal, and Binary.
|
||||||
|
|
||||||
|
* All non-[Keyword](#keyword-numbers) numbers may optionally start with one of `-` or `+`, which determine whether they'll be positive or negative.
|
||||||
|
* Binary numbers start with `0b` and only allow `0` and `1` as digits, which may be separated by `_`. They represent numbers in radix 2.
|
||||||
|
* Octal numbers start with `0o` and only allow digits between `0` and `7`, which may be separated by `_`. They represent numbers in radix 8.
|
||||||
|
* Hexadecimal numbers start with `0x` and allow digits between `0` and `9`, as well as letters `A` through `F`, in either lower or upper case, which may be separated by `_`. They represent numbers in radix 16.
|
||||||
|
* Decimal numbers are a bit more special:
|
||||||
|
* They have no radix prefix.
|
||||||
|
* They use digits `0` through `9`, which may be separated by `_`.
|
||||||
|
* They may optionally include a decimal separator `.`, followed by more digits, which may again be separated by `_`.
|
||||||
|
* They may optionally be followed by `E` or `e`, an optional `-` or `+`, and more digits, to represent an exponent value.
|
||||||
|
|
||||||
|
Note that, similar to JSON and some other languages,
|
||||||
|
numbers without an integer digit (such as `.1`) are illegal.
|
||||||
|
They must be written with at least one integer digit, like `0.1`.
|
||||||
|
(These patterns are also disallowed from [Identifier Strings](#identifier-string), to avoid confusion.)
|
||||||
|
|
||||||
|
#### Keyword Numbers
|
||||||
|
|
||||||
|
There are three special "keyword" numbers included in KDL to accomodate the
|
||||||
|
widespread use of [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754) floats:
|
||||||
|
|
||||||
|
* `#inf` - floating point positive infinity.
|
||||||
|
* `#-inf` - floating point negative infinity.
|
||||||
|
* `#nan` - floating point NaN/Not a Number.
|
||||||
|
|
||||||
|
To go along with this and prevent foot guns, the bare [Identifier
|
||||||
|
Strings](#identifier-string) `inf`, `-inf`, and `nan` are considered illegal
|
||||||
|
identifiers and should yield a syntax error.
|
||||||
|
|
||||||
|
The existence of these keywords does not imply that any numbers be represented
|
||||||
|
as IEEE 754 floats. These are simply for clarity and convenience for any
|
||||||
|
implementation that chooses to represent their numbers in this way.
|
||||||
|
|
||||||
|
### Boolean
|
||||||
|
|
||||||
|
A boolean [Value](#value) is either the symbol `#true` or `#false`. These
|
||||||
|
_SHOULD_ be represented by implementation as boolean logical values, or some
|
||||||
|
approximation thereof.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
my-node #true value=#false
|
||||||
|
```
|
||||||
|
|
||||||
|
### Null
|
||||||
|
|
||||||
|
The symbol `#null` represents a null [Value](#value). It's up to the
|
||||||
|
implementation to decide how to represent this, but it generally signals the
|
||||||
|
"absence" of a value.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```kdl
|
||||||
|
my-node #null key=#null
|
||||||
|
```
|
||||||
|
|
||||||
|
### Whitespace
|
||||||
|
|
||||||
|
The following characters should be treated as non-[Newline](#newline) [white
|
||||||
|
space](https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt):
|
||||||
|
|
||||||
|
| Name | Code Pt |
|
||||||
|
|----------------------|---------|
|
||||||
|
| Character Tabulation | `U+0009` |
|
||||||
|
| Space | `U+0020` |
|
||||||
|
| No-Break Space | `U+00A0` |
|
||||||
|
| Ogham Space Mark | `U+1680` |
|
||||||
|
| En Quad | `U+2000` |
|
||||||
|
| Em Quad | `U+2001` |
|
||||||
|
| En Space | `U+2002` |
|
||||||
|
| Em Space | `U+2003` |
|
||||||
|
| Three-Per-Em Space | `U+2004` |
|
||||||
|
| Four-Per-Em Space | `U+2005` |
|
||||||
|
| Six-Per-Em Space | `U+2006` |
|
||||||
|
| Figure Space | `U+2007` |
|
||||||
|
| Punctuation Space | `U+2008` |
|
||||||
|
| Thin Space | `U+2009` |
|
||||||
|
| Hair Space | `U+200A` |
|
||||||
|
| Narrow No-Break Space| `U+202F` |
|
||||||
|
| Medium Mathematical Space | `U+205F` |
|
||||||
|
| Ideographic Space | `U+3000` |
|
||||||
|
|
||||||
|
#### Single-line comments
|
||||||
|
|
||||||
|
Any text after `//`, until the next literal [Newline](#newline) is "commented
|
||||||
|
out", and is considered to be [Whitespace](#whitespace).
|
||||||
|
|
||||||
|
#### Multi-line comments
|
||||||
|
|
||||||
|
In addition to single-line comments using `//`, comments can also be started
|
||||||
|
with `/*` and ended with `*/`. These comments can span multiple lines. They
|
||||||
|
are allowed in all positions where [Whitespace](#whitespace) is allowed and
|
||||||
|
can be nested.
|
||||||
|
|
||||||
|
#### Slashdash comments
|
||||||
|
|
||||||
|
Finally, a special kind of comment called a "slashdash", denoted by `/-`, can
|
||||||
|
be used to comment out entire _components_ of a KDL document logically, and
|
||||||
|
have those elements not be included as part of the parsed document data.
|
||||||
|
|
||||||
|
Slashdash comments can be used before the following, including before their type
|
||||||
|
annotations, if present:
|
||||||
|
|
||||||
|
* A [Node](#node): the entire Node is treated as Whitespace, including all
|
||||||
|
props, args, and children.
|
||||||
|
* An [Argument](#argument): the Argument value is treated as Whitespace.
|
||||||
|
* A [Property](#property) key: the entire property, including both key and value,
|
||||||
|
is treated as Whitespace. A slashdash of just the property value is not allowed.
|
||||||
|
* A [Children Block](#children-block): the entire block, including all
|
||||||
|
children within, is treated as Whitespace. Only other children blocks, whether
|
||||||
|
slashdashed or not, may follow a slashdashed children block.
|
||||||
|
|
||||||
|
A slashdash may be be followed by any amount of whitespace, including newlines and
|
||||||
|
comments (other than other slashdashes), before the element that it comments out.
|
||||||
|
|
||||||
|
### Newline
|
||||||
|
|
||||||
|
The following character sequences [should be treated as new
|
||||||
|
lines](https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf):
|
||||||
|
|
||||||
|
| Acronym | Name | Code Pt |
|
||||||
|
|---------|-----------------|---------|
|
||||||
|
| CRLF | Carriage Return and Line Feed | `U+000D` + `U+000A` |
|
||||||
|
| CR | Carriage Return | `U+000D` |
|
||||||
|
| LF | Line Feed | `U+000A` |
|
||||||
|
| NEL | Next Line | `U+0085` |
|
||||||
|
| VT | Vertical tab | `U+000B` |
|
||||||
|
| FF | Form Feed | `U+000C` |
|
||||||
|
| LS | Line Separator | `U+2028` |
|
||||||
|
| PS | Paragraph Separator | `U+2029` |
|
||||||
|
|
||||||
|
Note that for the purpose of new lines, the specific sequence `CRLF` is
|
||||||
|
considered _a single newline_.
|
||||||
|
|
||||||
|
### Disallowed Literal Code Points
|
||||||
|
|
||||||
|
The following code points may not appear literally anywhere in the document.
|
||||||
|
They may be represented in Strings (but not Raw Strings) using [Unicode Escapes](#escapes) (`\u{...}`).
|
||||||
|
|
||||||
|
* The codepoints `U+0000-0008` or the codepoints `U+000E-001F` (various
|
||||||
|
control characters).
|
||||||
|
* `U+007F` (the Delete control character).
|
||||||
|
* Any codepoint that is not a [Unicode Scalar
|
||||||
|
Value](https://unicode.org/glossary/#unicode_scalar_value) (`U+D800-DFFF`).
|
||||||
|
* `U+200E-200F`, `U+202A-202E`, and `U+2066-2069`, the [unicode
|
||||||
|
"direction control"
|
||||||
|
characters](https://www.w3.org/International/questions/qa-bidi-unicode-controls)
|
||||||
|
* `U+FEFF`, aka Zero-width Non-breaking Space (ZWNBSP)/Byte Order Mark (BOM),
|
||||||
|
except as the first code point in a document.
|
||||||
|
|
||||||
|
## Full Grammar
|
||||||
|
|
||||||
|
This is the full official grammar for KDL and should be considered
|
||||||
|
authoritative if something seems to disagree with the text above. The [grammar
|
||||||
|
language syntax](#grammar-language) is defined below.
|
||||||
|
|
||||||
|
```
|
||||||
|
document := bom? version? nodes
|
||||||
|
|
||||||
|
// Nodes
|
||||||
|
nodes := (line-space* node)* line-space*
|
||||||
|
|
||||||
|
base-node := slashdash? type? node-space* string
|
||||||
|
(node-space+ slashdash? node-prop-or-arg)*
|
||||||
|
// slashdashed node-children must always be after props and args.
|
||||||
|
(node-space+ slashdash node-children)*
|
||||||
|
(node-space+ node-children)?
|
||||||
|
(node-space+ slashdash node-children)*
|
||||||
|
node-space*
|
||||||
|
node := base-node node-terminator
|
||||||
|
final-node := base-node node-terminator?
|
||||||
|
|
||||||
|
// Entries
|
||||||
|
node-prop-or-arg := prop | value
|
||||||
|
node-children := '{' nodes final-node? '}'
|
||||||
|
node-terminator := single-line-comment | newline | ';' | eof
|
||||||
|
|
||||||
|
prop := string node-space* '=' node-space* value
|
||||||
|
value := type? node-space* (string | number | keyword)
|
||||||
|
type := '(' node-space* string node-space* ')'
|
||||||
|
|
||||||
|
// Strings
|
||||||
|
string := identifier-string | quoted-string | raw-string ¶
|
||||||
|
|
||||||
|
identifier-string := unambiguous-ident | signed-ident | dotted-ident
|
||||||
|
unambiguous-ident := ((identifier-char - digit - sign - '.') identifier-char*) - disallowed-keyword-strings
|
||||||
|
signed-ident := sign ((identifier-char - digit - '.') identifier-char*)?
|
||||||
|
dotted-ident := sign? '.' ((identifier-char - digit) identifier-char*)?
|
||||||
|
identifier-char := unicode - unicode-space - newline - [\\/(){};\[\]"#=] - disallowed-literal-code-points
|
||||||
|
disallowed-keyword-identifiers := 'true' | 'false' | 'null' | 'inf' | '-inf' | 'nan'
|
||||||
|
|
||||||
|
quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline (unicode-space | ws-escape)* '"""'
|
||||||
|
single-line-string-body := (string-character - newline)*
|
||||||
|
multi-line-string-body := (('"' | '""')? string-character)*
|
||||||
|
string-character := '\\' (["\\bfnrts] | 'u{' hex-digit{1, 6} '}') | ws-escape | [^\\"] - disallowed-literal-code-points
|
||||||
|
ws-escape := '\\' (unicode-space | newline)+
|
||||||
|
hex-digit := [0-9a-fA-F]
|
||||||
|
|
||||||
|
raw-string := '#' raw-string-quotes '#' | '#' raw-string '#'
|
||||||
|
raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body newline unicode-space* '"""'
|
||||||
|
single-line-raw-string-body := '' | (single-line-raw-string-char - '"') single-line-raw-string-char*? | '"' (single-line-raw-string-char - '"') single-line-raw-string-char*?
|
||||||
|
single-line-raw-string-char := unicode - newline - disallowed-literal-code-points
|
||||||
|
multi-line-raw-string-body := (unicode - disallowed-literal-code-points)*?
|
||||||
|
|
||||||
|
// Numbers
|
||||||
|
number := keyword-number | hex | octal | binary | decimal
|
||||||
|
|
||||||
|
decimal := sign? integer ('.' integer)? exponent?
|
||||||
|
exponent := ('e' | 'E') sign? integer
|
||||||
|
integer := digit (digit | '_')*
|
||||||
|
digit := [0-9]
|
||||||
|
sign := '+' | '-'
|
||||||
|
|
||||||
|
hex := sign? '0x' hex-digit (hex-digit | '_')*
|
||||||
|
octal := sign? '0o' [0-7] [0-7_]*
|
||||||
|
binary := sign? '0b' ('0' | '1') ('0' | '1' | '_')*
|
||||||
|
|
||||||
|
// Keywords and booleans.
|
||||||
|
keyword := boolean | '#null'
|
||||||
|
keyword-number := '#inf' | '#-inf' | '#nan'
|
||||||
|
boolean := '#true' | '#false'
|
||||||
|
|
||||||
|
// Specific code points
|
||||||
|
bom := '\u{FEFF}'
|
||||||
|
disallowed-literal-code-points := See Table (Disallowed Literal Code Points)
|
||||||
|
unicode := Any Unicode Scalar Value
|
||||||
|
unicode-space := See Table (All White_Space unicode characters which are not `newline`)
|
||||||
|
|
||||||
|
// Comments
|
||||||
|
single-line-comment := '//' ^newline* (newline | eof)
|
||||||
|
multi-line-comment := '/*' commented-block
|
||||||
|
commented-block := '*/' | (multi-line-comment | '*' | '/' | [^*/]+) commented-block
|
||||||
|
slashdash := '/-' line-space*
|
||||||
|
|
||||||
|
// Whitespace
|
||||||
|
ws := unicode-space | multi-line-comment
|
||||||
|
escline := '\\' ws* (single-line-comment | newline | eof)
|
||||||
|
newline := See Table (All Newline White_Space)
|
||||||
|
// Whitespace where newlines are allowed.
|
||||||
|
line-space := node-space | newline | single-line-comment
|
||||||
|
// Whitespace within nodes, where newline-ish things must be esclined.
|
||||||
|
node-space := ws* escline ws* | ws+
|
||||||
|
|
||||||
|
// Version marker
|
||||||
|
version := '/-' unicode-space* 'kdl-version' unicode-space+ ('1' | '2') unicode-space* newline
|
||||||
|
```
|
||||||
|
|
||||||
|
### Grammar language
|
||||||
|
|
||||||
|
The grammar language syntax is a combination of ABNF with some regex spice thrown in.
|
||||||
|
Specifically:
|
||||||
|
|
||||||
|
* Single quotes (`'`) are used to denote literal text. `\` within a literal
|
||||||
|
string is used for escaping other single-quotes, for initiating unicode
|
||||||
|
characters using hex values (`\u{FEFF}`), and for escaping `\` itself
|
||||||
|
(`\\`).
|
||||||
|
* `*` is used for "zero or more", `+` is used for "one or more", and `?` is
|
||||||
|
used for "zero or one". Per standard regex semantics, `*` and `+` are *greedy*;
|
||||||
|
they match as many instances as possible without failing the match.
|
||||||
|
* `*?` (used only in raw strings) indicates a *non-greedy* match;
|
||||||
|
it matches as *few* instances as possible without failing the match.
|
||||||
|
* `¶` is a *cut point*. It always matches and consumes no characters,
|
||||||
|
but once matched, the parser is not allowed to backtrack past that point in the source.
|
||||||
|
If a parser would rewind past the cut point, it must instead fail the overall parse,
|
||||||
|
as if it had run out of options.
|
||||||
|
(This is only used with the `raw-string` production,
|
||||||
|
to ensure the first instance of the appropriate closing quote sequence
|
||||||
|
is guaranteed to be the end of the raw string,
|
||||||
|
rather than allowing it to potentially consume more of the document unexpectedly.)
|
||||||
|
* `()` can be used to group matches that must be matched together.
|
||||||
|
* `a | b` means `a or b`, whichever matches first. If multiple items are before
|
||||||
|
a `|`, they are a single group. `a b c | d` is equivalent to `(a b c) | d`.
|
||||||
|
* `[]` are used for regex-style character matches, where any character between
|
||||||
|
the brackets will be a single match. `\` is used to escape `\`, `[`, and
|
||||||
|
`]`. They also support character ranges (`0-9`), and negation (`^`)
|
||||||
|
* `-` is used for "except for" or "minus" whatever follows it. For example,
|
||||||
|
`a - 'x'` means "any `a`, except something that matches the literal `'x'`".
|
||||||
|
* The prefix `^` means "something that does not match" whatever follows it.
|
||||||
|
For example, `^foo` means "must not match `foo`".
|
||||||
|
* A single definition may be split over multiple lines. Newlines are treated as
|
||||||
|
spaces.
|
||||||
|
* `//` followed by text on its own line is used as comment syntax.
|
||||||
|
|
|
||||||
11
SPEC_v1.md
11
SPEC_v1.md
|
|
@ -22,8 +22,7 @@ simultaneously. For example, `node "foo"` is a valid node in both versions, and
|
||||||
should be represented identically by parsers.
|
should be represented identically by parsers.
|
||||||
|
|
||||||
KDL v2 is designed such that for any given KDL document written as KDL
|
KDL v2 is designed such that for any given KDL document written as KDL
|
||||||
1.0 or [KDL 2.0](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html),
|
1.0 or [KDL 2.0](./SPEC.md), the parse will either fail completely, or, if the
|
||||||
the parse will either fail completely, or, if the
|
|
||||||
parse succeeds, the data represented by a v1 or v2 parser will be identical.
|
parse succeeds, the data represented by a v1 or v2 parser will be identical.
|
||||||
This means that it's safe to use a fallback parsing strategy in order to support
|
This means that it's safe to use a fallback parsing strategy in order to support
|
||||||
both v1 and v2 simultaneously. For example, `node "foo"` is a valid node in both
|
both v1 and v2 simultaneously. For example, `node "foo"` is a valid node in both
|
||||||
|
|
@ -313,7 +312,7 @@ IEEE 754-2008 decimal floating point numbers
|
||||||
* `country-subdivision`: ISO 3166-2 country subdivision code.
|
* `country-subdivision`: ISO 3166-2 country subdivision code.
|
||||||
* `email`: RFC5322 email address.
|
* `email`: RFC5322 email address.
|
||||||
* `idn-email`: RFC6531 internationalized email address.
|
* `idn-email`: RFC6531 internationalized email address.
|
||||||
* `hostname`: RFC1123 internet hostname (only ASCII segments)
|
* `hostname`: RFC1132 internet hostname (only ASCII segments)
|
||||||
* `idn-hostname`: RFC5890 internationalized internet hostname (only `xn--`-prefixed ASCII "punycode" segments, or non-ASCII segments)
|
* `idn-hostname`: RFC5890 internationalized internet hostname (only `xn--`-prefixed ASCII "punycode" segments, or non-ASCII segments)
|
||||||
* `ipv4`: RFC2673 dotted-quad IPv4 address.
|
* `ipv4`: RFC2673 dotted-quad IPv4 address.
|
||||||
* `ipv6`: RFC2373 IPv6 address.
|
* `ipv6`: RFC2373 IPv6 address.
|
||||||
|
|
@ -469,19 +468,19 @@ can be nested.
|
||||||
### Newline
|
### Newline
|
||||||
|
|
||||||
The following characters [should be treated as new
|
The following characters [should be treated as new
|
||||||
lines](https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-5/#G41643):
|
lines](https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf):
|
||||||
|
|
||||||
| Acronym | Name | Code Pt |
|
| Acronym | Name | Code Pt |
|
||||||
|---------|-----------------|---------|
|
|---------|-----------------|---------|
|
||||||
| CRLF | Carriage Return and Line Feed | `U+000D` + `U+000A` |
|
|
||||||
| CR | Carriage Return | `U+000D` |
|
| CR | Carriage Return | `U+000D` |
|
||||||
| LF | Line Feed | `U+000A` |
|
| LF | Line Feed | `U+000A` |
|
||||||
|
| CRLF | Carriage Return and Line Feed | `U+000D` + `U+000A` |
|
||||||
| NEL | Next Line | `U+0085` |
|
| NEL | Next Line | `U+0085` |
|
||||||
| FF | Form Feed | `U+000C` |
|
| FF | Form Feed | `U+000C` |
|
||||||
| LS | Line Separator | `U+2028` |
|
| LS | Line Separator | `U+2028` |
|
||||||
| PS | Paragraph Separator | `U+2029` |
|
| PS | Paragraph Separator | `U+2029` |
|
||||||
|
|
||||||
Note that for the purpose of new lines, CRLF is considered _a single newline_. `VT` `Vertical tab` `U+000B` was mistakenly excluded, but the v1 spec if frozen, so it's left unchanged.
|
Note that for the purpose of new lines, CRLF is considered _a single newline_.
|
||||||
|
|
||||||
## Full Grammar
|
## Full Grammar
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ XML elements and KDL nodes have a direct correspondence. In XiK, an XML element
|
||||||
* making the attributes into KDL properties
|
* making the attributes into KDL properties
|
||||||
* making the child nodes as KDL child nodes
|
* making the child nodes as KDL child nodes
|
||||||
|
|
||||||
For example, the XML `<element foo="bar"><child baz="quux" /></element>` is encoded into XiK as `element foo=bar { child baz=quux }`.
|
For example, the XML `<element foo="bar"><child baz="qux" /></element>` is encoded into XiK as `element foo=bar { child baz=quux }`.
|
||||||
|
|
||||||
XML namespaces are encoded the same as XML: the node name simply contains a `:` character. Note that KDL identifier syntax allows `:` directly in an ident, so a name like `xml:space` or `xlink:href` is a valid node or property name.
|
XML namespaces are encoded the same as XML: the node name simply contains a `:` character. Note that KDL identifier syntax allows `:` directly in an ident, so a name like `xml:space` or `xlink:href` is a valid node or property name.
|
||||||
|
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -54,7 +54,3 @@ please send a PR.
|
||||||
If you think the disagreement is due to a genuine error or oversight in the
|
If you think the disagreement is due to a genuine error or oversight in the
|
||||||
KDL specification, please open an issue explaining the matter and the change
|
KDL specification, please open an issue explaining the matter and the change
|
||||||
will be considered for the next version of the KDL spec.
|
will be considered for the next version of the KDL spec.
|
||||||
|
|
||||||
# Benchmarks
|
|
||||||
|
|
||||||
The `benchmarks` folder contains some large or gnarly documents intended to be used to stress-test your parser and help with profiling. They are intentionally not part of the testsuite, and just provided for your own personal benefit.
|
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -1,3 +0,0 @@
|
||||||
foo123 {
|
|
||||||
bar
|
|
||||||
}
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
node ""
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
node ""
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
node ""
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
node ""
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
node "\""
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
node "\"\""
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
node deadbeef
|
|
||||||
|
|
@ -1,2 +1 @@
|
||||||
node arg
|
node arg
|
||||||
node2 arg2
|
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
node string
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
node string
|
|
||||||
node string
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
node string
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
foo123{bar}
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
node #"""
|
|
||||||
"""#
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
node #"""
|
|
||||||
"""#
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
node """
|
|
||||||
"""
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
node """
|
|
||||||
"""
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
node """
|
|
||||||
"
|
|
||||||
"""
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
node """
|
|
||||||
""
|
|
||||||
"""
|
|
||||||
|
|
@ -1,4 +0,0 @@
|
||||||
node """
|
|
||||||
dead\
|
|
||||||
beef
|
|
||||||
"""
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
no "Higher than max Unicode Scalar Value \u{10FFFF} \u{11FFFF}"
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
no "Surrogates high\u{D800}"
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
no "Surrogates high\u{D911}"
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
no "Surrogates high\u{DABB}"
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
no "Surrogates high\u{DBFF}"
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
no "Surrogates low\u{DC00}"
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
no "Surrogates low\u{DEAD}"
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
eno "Surrogates low\u{DFFF}"
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
no "Even with leading 0s Unicode Scalar Value escapes must ≤6: \u{0012345}"
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
node argnode2 arg2
|
nodearg
|
||||||
|
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
node "string"/-{}
|
|
||||||
node "string" {}/-{}
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
node "string"/-foo=1
|
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
node arg
|
||||||
|
node2 arg2
|
||||||
Loading…
Reference in New Issue