Compare commits

..

No commits in common. "main" and "2.0.0-draft.2" have entirely different histories.

227 changed files with 920 additions and 1528076 deletions

View File

@ -1,8 +0,0 @@
# See http://editorconfig.org
root = true
[*.{md,xml,org}]
charset = utf-8
insert_final_newline = true
trim_trailing_whitespace = true

View File

@ -1,60 +0,0 @@
name: "Update Editor's Copy"
on:
push:
paths-ignore:
- README.md
- CONTRIBUTING.md
- LICENSE.md
- .gitignore
pull_request:
paths-ignore:
- README.md
- CONTRIBUTING.md
- LICENSE.md
- .gitignore
jobs:
build:
name: "Update Editor's Copy"
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: "Checkout"
uses: actions/checkout@v4
- name: "Setup"
id: setup
run: date -u "+date=%FT%T" >>"$GITHUB_OUTPUT"
- name: "Caching"
uses: actions/cache@v4
with:
path: |
.refcache
.venv
.gems
node_modules
.targets.mk
key: i-d-${{ steps.setup.outputs.date }}
restore-keys: i-d-
- name: "Build Drafts"
uses: martinthomson/i-d-template@v1
with:
token: ${{ github.token }}
- name: "Update GitHub Pages"
uses: martinthomson/i-d-template@v1
if: ${{ github.event_name == 'push' }}
with:
make: gh-pages
token: ${{ github.token }}
- name: "Archive Built Drafts"
uses: actions/upload-artifact@v4
with:
path: |
draft-*.html
draft-*.txt

View File

@ -1,25 +0,0 @@
name: Lint the test files
on:
push:
paths:
- "tests/**"
pull_request:
paths:
- "tests/**"
workflow_dispatch: {}
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Verify failing tests and orphaned tests
run: |
cd tests/test_cases
python ../../.github/workflows/lint-tests/lint.py

View File

@ -1,50 +0,0 @@
from __future__ import annotations
import os
import sys
import typing
def findTestFiles(path) -> typing.Generator[str, None, None]:
for root, _, filenames in os.walk(path):
for filename in filenames:
yield os.path.join(root, filename)
# strip the leading folder name, so they can be directly compared
inputFiles = set(x[len("input")+1:] for x in findTestFiles("input"))
validFiles = set(x[len("expected_kdl")+1:] for x in findTestFiles("expected_kdl"))
invalidFiles = inputFiles - validFiles
orphanedFiles = validFiles - inputFiles
SUCCESS = True
# Check for any expected_kdl files without a corresponding input file.
if orphanedFiles:
SUCCESS = False
print("ERROR: There are outputs in /expected_kdl without corresponding tests in /input:\n" + "\n".join([" "+x for x in orphanedFiles]))
# Check for any input files lacking an expected_kdl file
# (aka inputs expected to generate a parse error)
# that don't have a _fail suffix.
misnamedFiles: list[str] = []
for filepath in invalidFiles:
basepath, ext = os.path.splitext(filepath)
if not basepath.endswith("_fail"):
misnamedFiles.append(filepath)
if misnamedFiles:
SUCCESS = False
print("ERROR: There are tests in /input without corresponding outputs in /expected_kdl, but they don't have a _fail suffix:\n" + "\n".join([" "+x for x in misnamedFiles]))
# Check for any expected_kdl files that don't end in a newline.
noNewlineFiles: list[str] = []
for filepath in validFiles:
with open("expected_kdl/" + filepath, "r", encoding="utf-8") as fh:
text = fh.read()
if not text.endswith("\n"):
noNewlineFiles.append(filepath)
if noNewlineFiles:
SUCCESS = False
print("ERROR: There are outputs in /expected_kdl that don't end with a newline:\n" + "\n".join([" "+x for x in noNewlineFiles]))
if not SUCCESS:
sys.exit(1)

View File

@ -1,57 +0,0 @@
name: "Publish New Draft Version"
on:
push:
tags:
- "draft-*"
workflow_dispatch:
inputs:
email:
description: "Submitter email"
default: ""
type: string
jobs:
build:
name: "Publish New Draft Version"
runs-on: ubuntu-latest
steps:
- name: "Checkout"
uses: actions/checkout@v4
# See https://github.com/actions/checkout/issues/290
- name: "Get Tag Annotations"
run: git fetch -f origin ${{ github.ref }}:${{ github.ref }}
- name: "Setup"
id: setup
run: date -u "+date=%FT%T" >>"$GITHUB_OUTPUT"
- name: "Caching"
uses: actions/cache@v4
with:
path: |
.refcache
.venv
.gems
node_modules
.targets.mk
key: i-d-${{ steps.setup.outputs.date }}
restore-keys: i-d-
- name: "Build Drafts"
uses: martinthomson/i-d-template@v1
with:
token: ${{ github.token }}
- name: "Upload to Datatracker"
uses: martinthomson/i-d-template@v1
with:
make: upload
env:
UPLOAD_EMAIL: ${{ inputs.email }}
- name: "Archive Submitted Drafts"
uses: actions/upload-artifact@v4
with:
path: "versioned/draft-*-[0-9][0-9].*"

23
.gitignore vendored
View File

@ -1,25 +1,2 @@
/target /target
Cargo.lock Cargo.lock
*.html
*.pdf
*.redxml
*.swp
*.txt
*.upload
*~
.tags
/*-[0-9][0-9].xml
/.*.mk
/.gems/
/.refcache
/.venv/
/.vscode/
/lib
/node_modules/
/versioned/
Gemfile.lock
archive.json
draft-marchan-kdl2.xml
package-lock.json
report.xml
!requirements.txt

View File

@ -1,4 +0,0 @@
<note title="Discussion Venues" removeInRFC="true">
<t>Source for this draft and an issue tracker can be found at
<eref target="https://github.com/kdl-org/kdl"/>.</t>
</note>

View File

@ -1,6 +1,6 @@
# KDL Changelog # KDL Changelog
## 2.0.0 (2024-12-21) ## 2.0.0 (2022-08-28)
### Grammar ### Grammar
@ -9,15 +9,14 @@
escape. escape.
* Single line comments (`//`) can now be immediately followed by a newline. * Single line comments (`//`) can now be immediately followed by a newline.
* All literal whitespace following a `\` in a string is now discarded. * All literal whitespace following a `\` in a string is now discarded.
* Vertical tabs (`U+000B`) are now considered to be newlines. * Vertical tabs (`U+000B`) are now considered to be whitespace.
* The grammar syntax itself has been described, and some confusing definitions * The grammar syntax itself has been described, and some confusing definitions
in the grammar have been fixed accordingly (mostly related to escaped in the grammar have been fixed accordingly (mostly related to escaped
characters). characters).
* `,`, `<`, and `>` are now legal identifier characters. They were previously * `,`, `<`, and `>` are now legal identifier characters. They were previously
reserved for KQL but this is no longer necessary. reserved for KQL but this is no longer necessary.
* Code points under `0x20` (except newline and whitespace code points), code * Code points under `0x20`, code points above `0x10FFFF`, Delete control
points above `0x10FFFF`, Delete control character (`0x7F`), and the [unicode character (`0x7F`), and the [unicode "direction control"
"direction control"
characters](https://www.w3.org/International/questions/qa-bidi-unicode-controls) characters](https://www.w3.org/International/questions/qa-bidi-unicode-controls)
are now completely banned from appearing literally in KDL documents. They are now completely banned from appearing literally in KDL documents. They
can now only be represented in regular strings, and there's no facilities to can now only be represented in regular strings, and there's no facilities to
@ -25,7 +24,6 @@
improvement. improvement.
* Raw strings no longer require an `r` prefix: they are now specified by using * Raw strings no longer require an `r` prefix: they are now specified by using
`#""#`. `#""#`.
* Raw string productions are now explicitly non-greedy (and "fallible").
* Line continuations can be followed by an EOF now, instead of requiring a * Line continuations can be followed by an EOF now, instead of requiring a
newline (or comment). `node \<EOF>` is now a legal KDL document. newline (or comment). `node \<EOF>` is now a legal KDL document.
* `#` is no longer a legal identifier character. * `#` is no longer a legal identifier character.
@ -37,7 +35,7 @@
* Bare identifiers can now be used as values in Arguments and Properties, and are interpreted as string values. * Bare identifiers can now be used as values in Arguments and Properties, and are interpreted as string values.
* The spec prose now more explicitly states that strings and raw strings can * The spec prose now more explicitly states that strings and raw strings can
be used as type annotations. be used as type annotations.
* Removed a statement in the spec prose that said "It is reasonable for an * A statement in the spec prose that said "It is reasonable for an
implementation to ignore null values altogether when deserializing". This is implementation to ignore null values altogether when deserializing". This is
no longer encouraged or desired. no longer encouraged or desired.
* Code points have been constrained to [Unicode Scalar * Code points have been constrained to [Unicode Scalar
@ -46,7 +44,7 @@
should be valid UTF-8 now, as was intended. should be valid UTF-8 now, as was intended.
* The last node in a child block no longer needs to be terminated with `;`, * The last node in a child block no longer needs to be terminated with `;`,
even if the closing `}` is on the same line, so this is now a legal node: even if the closing `}` is on the same line, so this is now a legal node:
`node{foo;bar;baz}` `node {foo;bar;baz}`
* More places allow whitespace (node-spaces, specifically) now. With great * More places allow whitespace (node-spaces, specifically) now. With great
power comes great responsibility: power comes great responsibility:
* Inside `(foo)` annotations (so, `( foo )` would be legal (`( f oo )` would * Inside `(foo)` annotations (so, `( foo )` would be legal (`( f oo )` would
@ -56,89 +54,21 @@
* Around `=` for props (`x = 1`) * Around `=` for props (`x = 1`)
* The BOM is now only allowed as the first character in a document. It was * The BOM is now only allowed as the first character in a document. It was
previously treated as generic whitespace. previously treated as generic whitespace.
* Multi-line strings must now use `"""` as delimiters. The opening delimiter must be immediately followed by a newline, and the closing delimiter must be on its own line, prefixed by optional whitespace. * Multi-line strings are now automatically dedented, according to the
* Multi-line strings are now automatically dedented, according to the common least-indented line in the body. Multiline strings and raw strings now must
whitespace matching the whitespace prefix of the closing line. have a newline immediately following their opening `"`, and a final newline
preceding the closing `"`.
* SMALL EQUALS SIGN (`U+FE66`), FULLWIDTH EQUALS SIGN (`U+FF1D`), and HEAVY
EQUALS SIGN (`U+1F7F0`) are now treated the same as `=` and can be used for
properties (e.g. `お名前=☜(゚ヮ゚☜)`). They are also no longer valid in bare
identifiers.
* `.1`, `+.1` etc are no longer valid identifiers, to prevent confusion and * `.1`, `+.1` etc are no longer valid identifiers, to prevent confusion and
conflicts with numbers. conflicts with numbers.
* Multi-line strings' literal Newline sequences are now normalized to single
`LF`s.
* `#inf`, `#-inf`, and `#nan` have been added in order to properly support
IEEE floats for implementations that choose to represent their decimals that
way.
* Correspondingly, the identifiers `inf`, `-inf`, and `nan` are now syntax
errors.
* `u128` and `i128` have been added as well-known number type annotations.
* Slashdash (`/-`) -compatible locations adjusted to be more clear and
intuitive. They can now be used in exactly three different places: before nodes,
before entire entries, or before entire child blocks.
* Furthermore, The ordering of slashdashed elements has been restricted such
that a slashdashed child block cannot go before an entry (including slashdashed
entries).
* Optional version marker `/- kdl-version 2` (or `1`) as the first line in a document, optionally preceded by the BOM.
### KQL ### KQL
> [!INFO] Note: these are provided for convenience, but as of the 2.0.0 KDL spec release,
> KQL itself is not finalized and should be considered a separate specification,
> alongside the Schema spec and others.
* There's now a _required_ descendant selector (`>>`), instead of using plain * There's now a _required_ descendant selector (`>>`), instead of using plain
spaces for that purpose. spaces for that purpose.
* The "any sibling" selector is now `++` instead of `~`, for consistency with * The "any sibling" selector is now `++` instead of `~`, for consistency with
the new descendant selector. the new descendant selector.
* Some parsing logic around the grammar has changed.
* Multi- and single-line comments are now supported, as well as line
continuations with `\`.
* Map operators have been removed entirely. * Map operators have been removed entirely.
---
## 2.0.0 Draft Changelogs
### 2.0.0-draft.8 (2024-12-14)
* Some details have been clarified around the treatment of whitespace in
multiline strings.
* `raw-string` productions have been updated to be explicitly non-greedy and
"fallible".
* Some tests have been added, others adjusted, some removed, after a cleanup pass.
### 2.0.0-draft.7 (2024-12-10)
* `node-space` is now allowed as whitespace after a `slashdash`, meaning line
continuations will work now.
* One or two consecutive double-quotes are now allowed in the bodies of
multi-line quoted strings, without needing to be escaped.
* Grammar has been fixed to disallow raw strings like `#"""#`, which are now
properly treated as invalid multi-line raw strings (instead of the equivalent of
`"\""`).
* Test suite has been updated to include a `_fail` suffix in all test cases
which are expected to fail.
* A slew of additional slashdash and multi-line string compliance tests have
been added. Have fun. :)
* The organization of string types in the spec prose has been updated to a
hopefully more helpful structure.
### 2.0.0-draft.6 (2024-12-04)
* Multiline strings, both Raw and Quoted, must now use `"""` instead of a single `"`. Using `"""` for a single-line string is a syntax error.
* Fixed an issue with the `unicode_silly` test case.
* Some rewordings and clarification in the spec prose.
* Slight grammar tweak where the pre-terminator `node-space*` for `node` and `final-node` have been moved into `base-node`.
### 2.0.0-draft.5 (2024-11-28)
* Equals signs other than `=` are no longer supported in properties.
* 128-bit integer type annotations have been added to the list of "well-known"
type annotations.
* Multiline string escape rules have been tweaked significantly.
* `\s` is now a valid escape within a string, representing a space character.
* Slashdash (`/-`)-compatible locations and related grammar adjusted to be more
clear and intuitive. This includes some changes relating to whitespace,
including comments and newlines, which are breaking changes.
* Various updates to test suite to reflect changes.

View File

@ -1,22 +0,0 @@
# Contributing
## Mechanics
Contributions can be made by creating pull requests.
The GitHub interface supports creating pull requests using the Edit (✏) button.
## Building the Specification
The specification is written in
[kramdown-rfc](https://github.com/cabo/kramdown-rfc/wiki/Syntax2), which
compiles via [RFCXML](https://authors.ietf.org/rfcxml-vocabulary) to text and
HTML.
You can build the formatted versions or the intermediate RFCXML file using
https://author-tools.ietf.org/ or locally by running `make`. To preserve the
intermediate RFCXML form in a local build, run `make draft-marchan-kdl2.xml`
once.
Command line usage requires that you have the necessary software installed. See
[the instructions](https://github.com/martinthomson/i-d-template/blob/main/doc/SETUP.md).

View File

@ -98,7 +98,7 @@ The properties and/or children of the node represent the items of the object,
with the property names and child nodenames as each item's key. with the property names and child nodenames as each item's key.
All "keys" in an object node must be unique. All "keys" in an object node must be unique.
As with arrays, there are two ambiguous cases that must be manually annotated with the `(object)` type annotation: As with arrays, there are two ambiguous cases that must be manually annoted with the `(object)` type annotation:
* An object containing a single item whose key is "-" (like `{"-": 1}`) written using children (like `- { - 1 }`) * An object containing a single item whose key is "-" (like `{"-": 1}`) written using children (like `- { - 1 }`)
would be ambiguous with an array node. would be ambiguous with an array node.

View File

@ -1,15 +0,0 @@
LIBDIR := lib
include $(LIBDIR)/main.mk
$(LIBDIR)/main.mk:
ifneq (,$(shell grep "path *= *$(LIBDIR)" .gitmodules 2>/dev/null))
git submodule sync
git submodule update --init
else
ifneq (,$(wildcard $(ID_TEMPLATE_HOME)))
ln -s "$(ID_TEMPLATE_HOME)" $(LIBDIR)
else
git clone -q --depth 10 -b main \
https://github.com/martinthomson/i-d-template $(LIBDIR)
endif
endif

View File

@ -30,11 +30,6 @@ properties, node names, etc). With the exception of `top()` and `()`, they are a
used inside a `[]` selector. Some matchers are unary, but most of them involve used inside a `[]` selector. Some matchers are unary, but most of them involve
binary operators. binary operators.
The `top()` matcher can only be used as the first matcher of a selector. This means
that it cannot be the right operand of the `>`, `>>`, `+`, or `++` operators. As `||`
combines selectors, the `top()` can appear just after it. For instance,
`a > b || top() > b` is valid, but `a > top()` is not.
* `top()`: Returns all toplevel children of the current document. * `top()`: Returns all toplevel children of the current document.
* `top() > []`: Equivalent to `top()` on its own. * `top() > []`: Equivalent to `top()` on its own.
* `(foo)`: Selects any element whose type annotation is `foo`. * `(foo)`: Selects any element whose type annotation is `foo`.
@ -109,23 +104,18 @@ Then the following queries are valid:
## Full Grammar ## Full Grammar
Rules that are not defined in this grammar are prefixed with `$`, see [the KDL For rules that are not defined in this grammar, see [the KDL grammar](https://github.com/kdl-org/kdl/blob/main/SPEC.md#full-grammar).
grammar](https://kdl.dev/spec/#name-full-grammar) for
what they expand to.
``` ```
query-str := $bom? query query := selector q-ws* "||" q-ws* query | selector
query := selector q-ws+ "||" q-ws+ query | selector selector := filter q-ws* selector-operator q-ws* selector | filter
selector := filter q-ws+ selector-operator q-ws+ selector-subsequent | filter
selector-subsequent := matchers q-ws+ selector-operator q-ws+ selector-subsequent | matchers
selector-operator := ">>" | ">" | "++" | "+" selector-operator := ">>" | ">" | "++" | "+"
filter := "top(" q-ws* ")" | matchers filter := matcher+
matchers := type-matcher $string? accessor-matcher* | $string accessor-matcher* | accessor-matcher+ matcher := "top()"| "()" | identifier | type | accessor-matcher
type-matcher := "(" q-ws* ")" | $type accessor-matcher := "[" (comparison | accessor)? "]"
accessor-matcher := "[" q-ws* (comparison | accessor)? q-ws* "]" comparison := accessor q-ws* matcher-operator q-ws* (type | identifier | string | number | keyword)
comparison := accessor q-ws+ matcher-operator q-ws+ ($type | $string | $number | $keyword) accessor := "val(" number ")" | "prop(" identifier ")" | "name()" | "tag()" | "values()" | "props()" | identifier
accessor := "val(" q-ws* $integer q-ws* ")" | "prop(" q-ws* $string q-ws* ")" | "name(" q-ws* ")" | "tag(" q-ws* ")" | "values(" q-ws* ")" | "props(" q-ws* ")" | $string
matcher-operator := "=" | "!=" | ">" | "<" | ">=" | "<=" | "^=" | "$=" | "*=" matcher-operator := "=" | "!=" | ">" | "<" | ">=" | "<=" | "^=" | "$=" | "*="
q-ws := $node-space q-ws := bom | unicode-space
``` ```

249
README.md
View File

@ -2,8 +2,8 @@
KDL is a small, pleasant document language with XML-like node semantics that KDL is a small, pleasant document language with XML-like node semantics that
looks like you're invoking a bunch of CLI commands! It's meant to be used both looks like you're invoking a bunch of CLI commands! It's meant to be used both
as a serialization format and a configuration language, much like JSON, YAML, or as a serialization format and a configuration language, much like JSON, YAML,
XML. It looks like this: or XML. It looks like this:
```kdl ```kdl
package { package {
@ -18,15 +18,11 @@ package {
scripts { scripts {
// "Raw" and dedented multi-line strings are supported. // "Raw" and dedented multi-line strings are supported.
message """ build #"
hello
world
"""
build #"""
echo "foo" echo "foo"
node -c "console.log('hello, world!');" node -c "console.log('hello, world!');"
echo "foo" > some-file.txt echo "foo" > some-file.txt
"""# "#
} }
// `\` breaks up a single node across multiple lines. // `\` breaks up a single node across multiple lines.
@ -44,97 +40,46 @@ package {
} }
``` ```
For more details, see the [overview below](#overview). There's a living [specification](SPEC.md), as well as various
There's a living [specification](https://kdl.dev/spec/), as well as various
[implementations](#implementations). You can also check out the [FAQ](#faq) to [implementations](#implementations). You can also check out the [FAQ](#faq) to
answer all your burning questions! answer all your burning questions!
The current version of the KDL spec is In addition to a spec for KDL itself, there are also standard specs for [a KDL
[KDL 2.0.0](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html). For legacy KDL, Query Language](QUERY-SPEC.md) based on CSS selectors, and [a KDL Schema
please refer to the [KDL 1.0.0
spec](https://github.com/kdl-org/kdl/blob/2.0.0/SPEC_v1.md). All users are
encouraged to migrate. [Migration is forward-and-backward-compatible and
safe](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html#compatibility), and can
be automated.
In addition to a spec for KDL itself, there are specifications for [a KDL Query
Language](QUERY-SPEC.md) based on CSS selectors, and [a KDL Schema
Language](SCHEMA-SPEC.md) loosely based on JSON Schema. Language](SCHEMA-SPEC.md) loosely based on JSON Schema.
The language is based on [SDLang](https://sdlang.org), with a [number of The language is based on [SDLang](https://sdlang.org), with a number of
modifications and clarifications on its syntax and behavior](#why-not-sdlang). modifications and clarifications on its syntax and behavior.
We are grateful for their work as an inspiration to ours.
[Play with it in your browser!](https://kdl.dev/play/) The current version of the KDL spec is `2.0.0-draft.2`.
[Play with it in your browser!](https://kdl-play.danini.dev/)
## Design and Discussion ## Design and Discussion
KDL 2.0.0 has been finalized, and no further changes are expected. For questions KDL is still extremely new, and discussion about the format should happen over
about KDL and discussions, please see the [discussions on the [discussions page](https://github.com/kdl-org/kdl/discussions). Feel
page](https://github.com/kdl-org/kdl/discussions). For minor editorial fixes or free to jump in and give us your 2 cents!
critical spec errata, please feel free to [file an
issue](https://github.com/kdl-org/kdl/issues).
## Used By
A lot of folks have started picking up KDL for both personal projects, and
larger open source, and even proprietary projects! This section includes a list
of some examples of KDL in the wild (either v1, v2, or both):
* [Zellij](https://zellij.dev) - Terminal workspace/multiplexer
* [Niri](https://github.com/YaLTeR/niri) - Scrollable-tiling window manager for Wayland
* [Bikeshed](https://github.com/speced/bikeshed) ([here](https://github.com/speced/bikeshed-boilerplate/blob/main/boilerplate/doctypes.kdl) and [here](https://github.com/speced/bikeshed-data/blob/main/data/manifest.txt)) - Specification pre-processor used by CSS, C++, WHATWG, various W3C working groups, and others.
* [orogene](https://orogene.dev) - Lightning-fast JavaScript package manager
* [Onyx](https://onyxlang.io/) - An efficient, procedural, and pragmatic programming language that compiles to WASM. Used for package manifests.
* [Pop!_OS/System76 Scheduler](https://github.com/pop-os/system76-scheduler) - Scheduling service which optimizes Linux's CPU scheduler and makes it go faster.
* [ImStyle](https://patitotective.github.io/ImStyle/) - ImGui application styling with Nim and KDL
* [fmod-rs](https://github.com/CAD97/fmod-rs) - Rust bindings to FMOD Core and FMOD Studio
* [mise](https://mise.jdx.dev/) - dev tools, env vars, task runner
* [Camping](https://github.com/camping/camping) - Ruby web microframework
* [Iron Vault](https://ironvault.quest) - VTT (Virtual Tabletop) plugin for Obsidian for the Ironsworn family of games
* [Microsoft TypeScript DOM Generator](https://github.com/microsoft/TypeScript-DOM-lib-generator) - Tool for generating DOM-related TypeScript and JavaScript library files
* [Ferron](https://ferron.sh/) - A fast, memory-safe web server written in Rust
* You?
## Implementations ## Implementations
> [!INFO] There are two major versions of KDL. Different libraries may support one or the * Rust: [kdl-rs](https://github.com/kdl-org/kdl-rs), [knuffel](https://crates.io/crates/knuffel/) (latter includes derive macro), and [kaydle](https://github.com/Lucretiel/kaydle) (serde-based)
> other, or even provide a "hybrid" mode where both versions are attempted, since * JavaScript: [kdljs](https://github.com/kdl-org/kdljs), [@virtualstate/kdl](https://github.com/virtualstate/kdl) (query only, JSX based)
> there's no data ambiguity between v1 and v2 documents. * Ruby: [kdl-rb](https://github.com/danini-the-panini/kdl-rb)
* Dart: [kdl-dart](https://github.com/danini-the-panini/kdl-dart)
| Language | Implementation | v1 | v2 | Notes | * Java: [kdl4j](https://github.com/hkolbeck/kdl4j)
|---|---|---|---|---| * PHP: [kdl-php](https://github.com/kdl-org/kdl-php)
| C | [ckdl](https://github.com/tjol/ckdl) | ✅ | ✅ | | * Python: [kdl-py](https://github.com/tabatkins/kdlpy), [cuddle](https://github.com/djmattyg007/python-cuddle), [ckdl](https://github.com/tjol/ckdl)
| C#/.NET | [Kadlet](https://github.com/oledfish/Kadlet) | ✅ | ✖️ | | * Elixir: [kuddle](https://github.com/IceDragon200/kuddle)
| C#/.NET | [KadSharp](https://github.com/AndreyAkinshin/KdlSharp) | ✅ | ✅ | .NET Std: 2.1+, .NET 6+, .NET FW 4.7.2+, Mono, Xamarin | * XSLT: [xml2kdl](https://github.com/Devasta/XML2KDL)
| C++ | [kdlpp](https://github.com/tjol/ckdl) | ✅ | ✅ | part of ckdl, requires C++20 | * Haskell: [Hustle](https://github.com/fuzzypixelz/Hustle)
| Common Lisp | [kdlcl](https://github.com/chee/kdlcl) | ✅ | ✖️ | | * .NET: [Kadlet](https://github.com/oledfish/Kadlet)
| Crystal | [kdl-cr](https://github.com/danini-the-panini/kdl-cr) | ✅ | ✖️ | | * C: [ckdl](https://github.com/tjol/ckdl)
| Dart | [kdl-dart](https://github.com/danini-the-panini/kdl-dart) | ✅ | ✅ | | * C++: [kdlpp](https://github.com/tjol/ckdl) (part of ckdl, requires C++20)
| Elixir | [kuddle](https://github.com/IceDragon200/kuddle) | ✅ | ✅ | | * OCaml: [ocaml-kdl](https://github.com/Bannerets/ocaml-kdl)
| Go | [gokdl](https://github.com/lunjon/gokdl) | ✅ | ✖️ | | * Nim: [kdl-nim](https://github.com/Patitotective/kdl-nim)
| Go | [kdl-go](https://github.com/sblinch/kdl-go) | ✅ | ✖️ | | * Common Lisp: [kdlcl](https://github.com/chee/kdlcl)
| Go | [gokdl2](https://github.com/njreid/gokdl2) | ✅ | ✅ | Friendly errors & arena allocator | * Go: [gokdl](https://github.com/lunjon/gokdl), [kdl-go](https://github.com/sblinch/kdl-go)
| Haskell | [Hustle](https://github.com/fuzzypixelz/Hustle) | ✅ | ✖️ | |
| Haskell | [kdl-hs](https://github.com/brandonchinn178/kdl-hs) | ✅ | ✅ | Format/comment-preserving parser |
| Java | [kdl4j](https://github.com/kdl-org/kdl4j) | ✅ | ✅ | |
| JavaScript | [@bgotink/kdl](https://github.com/bgotink/kdl) | ✅ | ✅ | Format/comment-preserving parser |
| JavaScript | [@virtualstate/kdl](https://github.com/virtualstate/kdl) | ✅ | ✖️ | query only, JSX based |
| JavaScript | [kdljs](https://github.com/kdl-org/kdljs) | ✅ | ✅ | |
| Lua | [kdlua](https://github.com/danini-the-panini/kdlua) | ✅ | ✖️ | |
| Nim | [kdl-nim](https://github.com/Patitotective/kdl-nim) | ✅ | ✖️ | |
| OCaml | [ocaml-kdl](https://github.com/eilvelia/ocaml-kdl) | ✅ | ✅ | |
| PHP | [kdl-php](https://github.com/kdl-org/kdl-php) | ✅ | ✖️ | |
| Python | [ckdl](https://github.com/tjol/ckdl) | ✅ | ✅ | |
| Python | [cuddle](https://github.com/djmattyg007/python-cuddle) | ✅ | ✖️ | |
| Python | [kdl-py](https://github.com/tabatkins/kdlpy) | ✅ | ✅ | |
| Ruby | [kdl-rb](https://github.com/danini-the-panini/kdl-rb) | ✅ | ✅ | |
| Rust | [kdl-rs](https://github.com/kdl-org/kdl-rs) | ✅ | ✅ | Format/comment-preserving parser |
| Rust | [knus](https://crates.io/crates/knus/) | ✅ | ✖️ | Serde-_style_ derive macros (not actual Serde) |
| Swift | [kdl-swift](https://github.com/danini-the-panini/kdl-swift) | ✅ | ✖️ | |
| XSLT | [xml2kdl](https://github.com/Devasta/XML2KDL) | ✅ | ✖️ | |
| Zig | [zig-kdl](https://codeberg.org/desttinghim/zig-kdl) | ✅ | ✅ | Format/comment-preserving parser |
## Compatibility Test Suite ## Compatibility Test Suite
@ -146,15 +91,10 @@ entirety, but in the future, may be required to in order to be included here.
## Editor Support ## Editor Support
* [Intellij IDEA](https://plugins.jetbrains.com/plugin/20136-kdl-document-language) * [VS Code](https://marketplace.visualstudio.com/items?itemName=kdl-org.kdl&ssr=false#review-details)
* [Sublime Text](https://packagecontrol.io/packages/KDL)\* * [Sublime Text](https://packagecontrol.io/packages/KDL)
* [TreeSitter](https://github.com/tree-sitter-grammars/tree-sitter-kdl) (neovim, among others)
* [VS Code](https://marketplace.visualstudio.com/items?itemName=kdl-org.kdl&ssr=false#review-details)\*
* [vim](https://github.com/imsnif/kdl.vim) * [vim](https://github.com/imsnif/kdl.vim)
* [Kate](https://github.com/larsgw/katepart-kdl)\* * [Intellij IDEA](https://plugins.jetbrains.com/plugin/20136-kdl-document-language)
* [Zed](https://zed.dev/extensions/kdl)
\* Supports KDL 2.0.0
## Overview ## Overview
@ -194,7 +134,7 @@ Nodes without children are terminated by a newline, a semicolon, or the end of
a file stream: a file stream:
```kdl ```kdl
node1; node2; node3 node1; node2; node3;
``` ```
### Values ### Values
@ -202,13 +142,13 @@ node1; node2; node3
KDL supports 4 data types: KDL supports 4 data types:
* Strings: `unquoted`, `"hello world"`, or `#"hello world"#` * Strings: `unquoted`, `"hello world"`, or `#"hello world"#`
* Numbers: `123.45`, `0xdeadbeef`, `#inf`, `#-inf`, `#nan` * Numbers: `123.45`
* Booleans: `#true` and `#false` * Booleans: `#true` and `#false`
* Null: `#null` * Null: `#null`
#### Strings #### Strings
It supports three different formats for string input: unquoted, quoted, and raw. It supports three different formats for string input: identifiers, quoted, and raw.
```kdl ```kdl
node1 this-is-a-string node1 this-is-a-string
@ -216,38 +156,38 @@ node2 "this\nhas\tescapes"
node3 #"C:\Users\zkat\raw\string"# node3 #"C:\Users\zkat\raw\string"#
``` ```
You don't have to quote strings unless any the following apply: You don't have to quote strings unless they contain whitespace, or if any the
following apply:
* The string contains any of `[]{}()\/#";`.
* The string contains whitespace. * The string contains whitespace.
* The string contains any of `[]{}()\/#";=`. * The string is one of `true`, `false`, or `null`.
* The string is one of `true`, `false`, `null`, `inf`, `-inf`, or `nan`. * The strings starts with a digit, or `+`/`-` and a digit.
* The strings starts with a digit, or `+`/`-`/`.`/`-.`,`+.` and a digit. * The string contains an equals sign (including unicode equals signs `﹦`,
(aka "looks like a number") ``, and `🟰`).
In essence, if it can get confused for other KDL or KQL syntax, it needs In essence, if it can get confused for other KDL syntax, it needs quotes.
quotes.
Both types of quoted string can be written across multiple lines by using triple Both types of quoted string can be multiline as-is, without a different
quotes (`"""`) followed immediately by a newline. Additionally, common syntax. Additionally, these multi-line strings will be "dedented" according to
indentation shared with the line containing the closing quotes will be the common indentation that all lines share:
stripped/dedented:
```kdl ```kdl
string """ string "
my my
multiline multiline
value value
""" "
``` ```
Raw strings, which do not support `\` escapes and can be used when you want Raw strings, which do not support `\` escapes and can be used when you want
certain kinds of strings to look nicer without having to escape a lot: certain kinds of strings to look nicer without having to escape a lot:
```kdl ```kdl
exec #""" exec #"
echo "foo" echo "foo"
echo "bar" echo "bar"
cd C:\path\to\dir cd C:\path\to\dir
"""# "#
regex #"\d{3} "[^/"]+""# regex #"\d{3} "[^/"]+""#
``` ```
@ -256,15 +196,15 @@ You can add any number of `#`s before and after the opening and
closing `#` to disambiguate literal closing `#"` sequences: closing `#` to disambiguate literal closing `#"` sequences:
```kdl ```kdl
other-raw ##"hello#"world"## other-raw ##"hello"#world"##
``` ```
#### Numbers #### Numbers
There are 4 ways to represent numbers in KDL, plus 3 float keywords. KDL does There are 4 ways to represent numbers in KDL. KDL does not prescribe any
not prescribe any representation for these numbers, and it's entirely up to representation for these numbers, and it's entirely up to individual
individual implementations whether to represent all numbers with a single type, implementations whether to represent all numbers with a single type, or to
or to have different representations for different forms. have different representations for different forms.
KDL has regular decimal-radix numbers, with optional decimal part, as well as KDL has regular decimal-radix numbers, with optional decimal part, as well as
an optional exponent. an optional exponent.
@ -282,13 +222,6 @@ my-octal 0o755
my-binary 0b10101101 my-binary 0b10101101
``` ```
If you're intending to represent IEEE 754 floats, there are three special
keywords you can use:
```kdl
special-floats #inf #-inf #nan
```
Finally, all numbers can have underscores to help readability: Finally, all numbers can have underscores to help readability:
```kdl ```kdl
@ -315,7 +248,7 @@ hello
``` ```
On top of that, KDL supports `/-` "slashdash" comments, which can be used to On top of that, KDL supports `/-` "slashdash" comments, which can be used to
comment out individual nodes, entries, or child blocks: comment out individual nodes, arguments, or children:
```kdl ```kdl
// This entire node and its children are all commented out. // This entire node and its children are all commented out.
@ -329,8 +262,6 @@ mynode /-commented "not commented" /-key=value /-{
a a
b b
} }
// The above is equivalent to:
mynode "not commented"
``` ```
### Type Annotations ### Type Annotations
@ -360,13 +291,13 @@ smile 😁
// Node names and property keys are just strings, so you can write them like // Node names and property keys are just strings, so you can write them like
// quoted or raw strings, too! // quoted or raw strings, too!
"illegal(){}[]/\\=#;identifier" #"1.2.3"# "#false"=#true "illegal{}[]/\\=#;identifier" #"1.2.3"# "#false"=#true
// Identifiers are very flexible. The following is a legal bare identifier: // Identifiers are very flexible. The following is a legal bare identifier:
-<123~!$@%^&*,.:'`|?+> <@foo123~!$%^&*.:'|?+>
// And you can also use non-ASCII unicode! // And you can also use unicode, even for the equals sign!
ノード お名前=ฅ^•ﻌ•^ฅ ノード お名前=☜(゚ヮ゚☜)
// kdl specifically allows properties and values to be // kdl specifically allows properties and values to be
// interspersed with each other, much like CLI commands. // interspersed with each other, much like CLI commands.
@ -375,9 +306,9 @@ foo bar=#true baz quux=#false 1 2 3
## Design Principles ## Design Principles
1. Human Maintainability 1. Maintainability
1. Flexibility 1. Flexibility
1. Cognitive Simplicity and Learnability 1. Cognitive simplicity and Learnability
1. Ease of de/serialization 1. Ease of de/serialization
1. Ease of implementation 1. Ease of implementation
@ -402,43 +333,28 @@ Same as "cuddle".
Because nothing out there felt quite right. The closest one I found was Because nothing out there felt quite right. The closest one I found was
SDLang, but that had some design choices I disagreed with. SDLang, but that had some design choices I disagreed with.
<a name="why-not-sdlang"></a>
#### Ok, then, why not SDLang? #### Ok, then, why not SDLang?
SDLang is an excellent base, but I wanted some details ironed out, and some SDLang is designed for use cases that are not interesting to me, but are very
things removed that only really made sense for SDLang's current use-cases, including relevant to the D-lang community. KDL is very similar in many ways, but is
some restrictions about data representation. KDL is very similar in many ways, except: different in the following ways:
* The grammar and expected semantics are [well-defined and specified](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html). * The grammar and expected semantics are [well-defined and specified](SPEC.md).
This was the original impetus for working on KDL, followed by details that * There is only one "number" type. KDL does not prescribe representations.
seemed like they could be improved.
* There is only one "number" type. KDL does not prescribe representations, but
does have keywords for NaN, infinity, and negative infinity if decimal numbers
are intended to be represented as IEEE754 floats.
* Slashdash (`/-`) comments are great and useful! * Slashdash (`/-`) comments are great and useful!
* Quoteless "identifier" strings (e.g. `node foo=bar`, vs `node foo="bar"`). * I am not interested in having first-class date types, and SDLang's are very
* KDL does not have first-class date or binary data types. Instead, it non-standard.
supports arbitrary type annotations for any custom data type you might need:
`(date)"2021-02-03"`, `(binary)"deadbeefbadc0ffee"`.
* Values and properties can be interspersed with each other, rather than one * Values and properties can be interspersed with each other, rather than one
having to follow the other. It was not clear whether this was actually allowed in SDLang. having to follow the other.
* Multi-line strings are supported using `"""<newline>` and their lines are automatically * KDL does not have a first-class binary data type. Just use strings with base64.
"dedented" to match their closing quotes' indentation level. * All strings in KDL are multi-line, and raw strings are written with
* Raw strings are written with `#` (`#"foo\bar"#`), instead of backticks. This, Rust-style syntax (`r"foo"`), instead of backticks.
while more verbose, allows embedding of languages, especially scripting * KDL identifiers can use UTF-8 and are much more lax about symbols than SDLang.
languages, that use this syntax on a regular basis, without additional escaping * KDL does not support "anonymous" nodes.
(e.g. bash and JavaScript). * Instead, KDL supports arbitrary identifiers for node names and attribute
* KDL identifiers can use a wide range of UTF-8 and are much more lax about
valid characters than SDLang.
* KDL does not support "anonymous" nodes. Instead, any string can be used as a
node name. For lists of arbitrary values, there is a convention of naming the nodes
simply `-`.
* Namespaces are not supported, but `:` is a legal identifier character, and applications
can choose to implement namespaces as they see fit.
* KDL supports arbitrary identifiers for node names and attribute
names, meaning you can use arbitrary strings for those: `"123" "value"=1` is names, meaning you can use arbitrary strings for those: `"123" "value"=1` is
a valid node, for example. This makes it easier to use KDL for a valid node, for example. This makes it easier to use KDL for
representing arbitrary key/value pairs using child nodes. representing arbitrary key/value pairs.
#### Have you seen that one XKCD comic about standards? #### Have you seen that one XKCD comic about standards?
@ -476,10 +392,7 @@ microsyntax for losslessly encoding JSON](JSON-IN-KDL.md).
#### What about TOML? #### What about TOML?
It nests very poorly. It doesn't fare well with large files. Also, I felt some It nests very poorly. It doesn't fare well with large files.
discomfort [continuing to use and promote something by its
creator](https://en.wikipedia.org/wiki/Tom_Preston-Werner#Resignation_from_GitHub).
#### What about XML? #### What about XML?

View File

@ -268,7 +268,7 @@ and property names when the `node-names` or `prop-names` options are activated.
* `tag`: [Validations](#validation-nodes) to apply to the tag of the value. * `tag`: [Validations](#validation-nodes) to apply to the tag of the value.
* `type`: A string denoting the type of the property value. * `type`: A string denoting the type of the property value.
* `enum`: A specific list of allowed values for this property. May be heterogeneous as long as it agrees with the `type`, if specified. * `enum`: A specific list of allowed values for this property. May be heterogenous as long as it agrees with the `type`, if specified.
#### String validations #### String validations
@ -287,7 +287,7 @@ and property names when the `node-names` or `prop-names` options are activated.
* `country-subdivision`: ISO 3166-2 country subdivision code. * `country-subdivision`: ISO 3166-2 country subdivision code.
* `email`: RFC5302 email address. * `email`: RFC5302 email address.
* `idn-email`: RFC6531 internationalized email address. * `idn-email`: RFC6531 internationalized email address.
* `hostname`: RFC1123 internet hostname. * `hostname`: RFC1132 internet hostname.
* `idn-hostname`: RFC5890 internationalized internet hostname. * `idn-hostname`: RFC5890 internationalized internet hostname.
* `ipv4`: RFC2673 dotted-quad IPv4 address. * `ipv4`: RFC2673 dotted-quad IPv4 address.
* `ipv6`: RFC2373 IPv6 address. * `ipv6`: RFC2373 IPv6 address.
@ -313,12 +313,10 @@ and property names when the `node-names` or `prop-names` options are activated.
* `i16`: 16-bit signed integer * `i16`: 16-bit signed integer
* `i32`: 32-bit signed integer * `i32`: 32-bit signed integer
* `i64`: 64-bit signed integer * `i64`: 64-bit signed integer
* `i128`: 128-bit signed integer
* `u8`: 8-bit unsigned integer * `u8`: 8-bit unsigned integer
* `u16`: 16-bit unsigned integer * `u16`: 16-bit unsigned integer
* `u32`: 32-bit unsigned integer * `u32`: 32-bit unsigned integer
* `u64`: 64-bit unsigned integer * `u64`: 64-bit unsigned integer
* `u128`: 128-bit unsigned integer
* `isize`: Platform-dependent signed integer * `isize`: Platform-dependent signed integer
* `usize`: Platform-dependent unsigned integer * `usize`: Platform-dependent unsigned integer
* `f32`: IEEE 754 single (32-bit) precision floating point number * `f32`: IEEE 754 single (32-bit) precision floating point number

771
SPEC.md
View File

@ -1 +1,770 @@
The v2 specification has been moved [here](draft-marchan-kdl2.md). # KDL Spec
This is the semi-formal specification for KDL, including the intended data
model and the grammar.
This document describes KDL version `2.0.0-draft.2`. It was released on
2024-02-06.
## Introduction
KDL is a node-oriented document language. Its niche and purpose overlaps with
XML, and as do many of its semantics. You can use KDL both as a configuration
language, and a data exchange or storage format, if you so choose.
The bulk of this document is dedicated to a long-form description of all
[Components](#components) of a KDL document. There is also a much more terse
[Grammar](#full-grammar) at the end of the document that covers most of the
rules, with some semantic exceptions involving the data model.
KDL is designed to be easy to read _and_ easy to implement.
In this document, references to "left" or "right" refer to directions in the
*data stream* towards the beginning or end, respectively; in other words,
the directions if the data stream were only ASCII text. They do not refer
to the writing direction of text, which can flow in either direction,
depending on the characters used.
## Components
### Document
The toplevel concept of KDL is a Document. A Document is composed of zero or
more [Nodes](#node), separated by newlines and whitespace, and eventually
terminated by an EOF.
All KDL documents should be UTF-8 encoded and conform to the specifications in
this document.
#### Example
The following is a document composed of two toplevel nodes:
```kdl
foo {
bar
}
baz
```
### Node
Being a node-oriented language means that the real core component of any KDL
document is the "node". Every node must have a name, which must be a
[String](#string).
The name may be preceded by a [Type Annotation](#type-annotation) to further
clarify its type, particularly in relation to its parent node. (For example,
clarifying that a particular `date` child node is for the _publication_ date,
rather than the last-modified date, with `(published)date`.)
Following the name are zero or more [Arguments](#argument) or
[Properties](#property), separated by either [whitespace](#whitespace) or [a
slash-escaped line continuation](#line-continuation). Arguments and Properties
may be interspersed in any order, much like is common with positional
arguments vs options in command line tools.
[Children](#children-block) can be placed after the name and the optional
Arguments and Properties, possibly separated by either whitespace or a
slash-escaped line continuation.
Arguments are ordered relative to each other (but not relative to Properties)
and that order must be preserved in order to maintain the semantics.
By contrast, Property order _SHOULD NOT_ matter to implementations.
[Children](#children-block) should be used if an order-sensitive key/value
data structure must be represented in KDL.
Nodes _MAY_ be prefixed with [Slashdash](#slashdash-comments) to "comment out"
the entire node, including its properties, arguments, and children, and make
it act as plain whitespace, even if it spreads across multiple lines.
Finally, a node is terminated by either a [Newline](#newline), a semicolon (`;`)
or the end of the file/stream (an `EOF`).
#### Example
```kdl
foo 1 key=val 3 {
bar
(role)baz 1 2
}
```
### Line Continuation
Line continuations allow [Nodes](#node) to be spread across multiple lines.
A line continuation is a `\` character followed by zero or more whitespace
items (including multiline comments) and an optional single-line comment. It
must be terminated by a [Newline](#newline) (including the Newline that is
part of single-line comments).
Following a line continuation, processing of a Node can continue as usual.
#### Example
```kdl
my-node 1 2 \ // comments are ok after \
3 4 // This is the actual end of the Node.
```
### Property
A Property is a key/value pair attached to a [Node](#node). A Property is
composed of a [String](#string), followed immediately by an [equals
sign](#equals-sign), and then a [Value](#value).
Properties should be interpreted left-to-right, with rightmost properties with
identical names overriding earlier properties. That is:
```kdl
node a=1 a=2
```
In this example, the node's `a` value must be `2`, not `1`.
No other guarantees about order should be expected by implementers.
Deserialized representations may iterate over properties in any order and
still be spec-compliant.
Properties _MAY_ be prefixed with `/-` to "comment out" the entire token and
make it act as plain whitespace, even if it spreads across multiple lines.
#### Equals Sign
Any of the following characters may be used as equals signs in properties:
| Name | Character | Code Point |
|----|-----|----|
| EQUALS SIGN | `=` | `U+003D` |
| SMALL EQUALS SIGN | `﹦` | `U+FE66` |
| FULLWIDTH EQUALS SIGN | `` | `U+FF1D` |
| HEAVY EQUALS SIGN | `🟰` | `U+1F7F0` |
### Argument
An Argument is a bare [Value](#value) attached to a [Node](#node), with no
associated key. It shares the same space as [Properties](#properties), and may be interleaved with them.
A Node may have any number of Arguments, which should be evaluated left to
right. KDL implementations _MUST_ preserve the order of Arguments relative to
each other (not counting Properties).
Arguments _MAY_ be prefixed with `/-` to "comment out" the entire token and
make it act as plain whitespace, even if it spreads across multiple lines.
#### Example
```kdl
my-node 1 2 3 a b c
```
### Children Block
A children block is a block of [Nodes](#node), surrounded by `{` and `}`. They
are an optional part of nodes, and create a hierarchy of KDL nodes.
Regular node termination rules apply, which means multiple nodes can be
included in a single-line children block, as long as they're all terminated by
`;`.
#### Example
```kdl
parent {
child1
child2
}
parent { child1; child2; }
```
### Value
A value is either: a [String](#string), a [Number](#number), a
[Boolean](#boolean), or [Null](#null).
Values _MUST_ be either [Arguments](#argument) or values of
[Properties](#property). Only [String](#string) values may be used as
[Node](#node) names or [Property](#property) keys.
Values (both as arguments and as properties) _MAY_ be prefixed by a single
[Type Annotation](#type-annotation).
### Type Annotation
A type annotation is a prefix to any [Node Name](#node) or [Value](#value) that
includes a _suggestion_ of what type the value is _intended_ to be treated as,
or as a _context-specific elaboration_ of the more generic type the node name
indicates.
Type annotations are written as a set of `(` and `)` with a single
[String](#string) in it. It may contain Whitespace after the `(` and before
the `)`, and may be separated from its target by Whitespace.
KDL does not specify any restrictions on what implementations might do with
these annotations. They are free to ignore them, or use them to make decisions
about how to interpret a value.
Additionally, the following type annotations MAY be recognized by KDL parsers
and, if used, SHOULD interpret these types as follows:
#### Reserved Type Annotations for Numbers Without Decimals:
Signed integers of various sizes (the number is the bit size):
* `i8`
* `i16`
* `i32`
* `i64`
Unsigned integers of various sizes (the number is the bit size):
* `u8`
* `u16`
* `u32`
* `u64`
Platform-dependent integer types, both signed and unsigned:
* `isize`
* `usize`
#### Reserved Type Annotations for Numbers With Decimals:
IEEE 754 floating point numbers, both single (32) and double (64) precision:
* `f32`
* `f64`
IEEE 754-2008 decimal floating point numbers
* `decimal64`
* `decimal128`
#### Reserved Type Annotations for Strings:
* `date-time`: ISO8601 date/time format.
* `time`: "Time" section of ISO8601.
* `date`: "Date" section of ISO8601.
* `duration`: ISO8601 duration format.
* `decimal`: IEEE 754-2008 decimal string format.
* `currency`: ISO 4217 currency code.
* `country-2`: ISO 3166-1 alpha-2 country code.
* `country-3`: ISO 3166-1 alpha-3 country code.
* `country-subdivision`: ISO 3166-2 country subdivision code.
* `email`: RFC5322 email address.
* `idn-email`: RFC6531 internationalized email address.
* `hostname`: RFC1132 internet hostname (only ASCII segments)
* `idn-hostname`: RFC5890 internationalized internet hostname (only `xn--`-prefixed ASCII "punycode" segments, or non-ASCII segments)
* `ipv4`: RFC2673 dotted-quad IPv4 address.
* `ipv6`: RFC2373 IPv6 address.
* `url`: RFC3986 URI.
* `url-reference`: RFC3986 URI Reference.
* `irl`: RFC3987 Internationalized Resource Identifier.
* `irl-reference`: RFC3987 Internationalized Resource Identifier Reference.
* `url-template`: RFC6570 URI Template.
* `uuid`: RFC4122 UUID.
* `regex`: Regular expression. Specific patterns may be implementation-dependent.
* `base64`: A Base64-encoded string, denoting arbitrary binary data.
#### Examples
```kdl
node (u8)123
node prop=(regex).*
(published)date "1970-01-01"
(contributor)person name="Foo McBar"
```
### String
Strings in KDL represent textual UTF-8 [Values](#value). A String is either an
[Identifier String](#identifier-string) (like `foo`), a [Quoted String](#quoted-string) (like `"foo"`) or
a [Raw String](#raw-string) (like `#"foo"#`). Identifier Strings let you write short, "single-word" strings with a minimum of syntax; Quoted Strings let you write strings with whitespace (including newlines!) or escapes; Raw Strings let you write strings with whitespace *but without escapes*, allowing you to not worry about the string's content containing anything that might look like an escape.
Strings _MUST_ be represented as UTF-8 values.
Strings _MUST NOT_ include the code points for [disallowed literal code
points](#disallowed-literal-code-points) directly. Quoted Strings may include
these code points as _values_ by representing them with their corresponding
`\u{...}` escape.
### Identifier String
An Identifier String (sometimes referred to as just an "identifier") is
composed of any [Unicode Scalar
Value](https://unicode.org/glossary/#unicode_scalar_value) other than
[non-initial characters](#non-initial-characters), followed by any number of
Unicode Scalar Values other than [non-identifier
characters](#non-identifier-characters).
A handful of patterns are disallowed, to avoid confusion with other values:
* idents that appear to start with a [Number](#number)
(like `1.0v2` or `-1em`)
or the "almost a number" pattern of a decimal point without a leading digit
(like `.1`)
* idents that are the language keywords (`true`, `false`, and `null`) without their leading `#`
Identifiers that match these patterns _MUST_ be treated as a syntax error;
such values can only be written as quoted or raw strings.
The precise details of the identifier syntax is specified in the [Full Grammar](#full-grammar) below.
Identifier Strings are terminated by [Whitespace](#whitespace) or
[Newlines](#newline).
#### Non-initial characters
The following characters cannot be the first character in an
[Identifier String](#identifier-string):
* Any decimal digit (0-9)
* Any [non-identifier characters](#non-identifier-characters)
Additionally, the `-` character can only be used as an initial character if
the second character is *not* a digit. This allows identifiers to look like
`--this`, and removes the ambiguity of having an identifier look like a
negative number.
#### Non-identifier characters
The following characters cannot be used anywhere in a [Identifier String](#identifier-string):
* Any of `(){}[]/\"#;`
* Any [Equals Sign](#equals-sign)
* Any [Whitespace](#whitespace) or [Newline](#newline).
* Any [disallowed literal code points](#disallowed-literal-code-points) in KDL
documents.
### Quoted String
A Quoted String is delimited by `"` on either side of any number of literal
string characters except unescaped `"` and `\`. This includes literal
[Newline](#newline) characters, which means a String Value can encompass
multiple lines without behaving like a Newline for [Node](#node) parsing
purposes.
Like Identifier Strings, Quoted Strings _MUST NOT_ include any of the [disallowed literal
code-points](#disallowed-literal-code-points) as code points in their body.
Quoted Strings also follow the Multi-line rules specified in [Multi-line
String](#multi-line-strings).
#### Escapes
In addition to literal code points, a number of "escapes" are supported in Quoted Strings.
"Escapes" are the character `\` followed by another character, and are
interpreted as described in the following table:
| Name | Escape | Code Pt |
|-------------------------------|--------|----------|
| Line Feed | `\n` | `U+000A` |
| Carriage Return | `\r` | `U+000D` |
| Character Tabulation (Tab) | `\t` | `U+0009` |
| Reverse Solidus (Backslash) | `\\` | `U+005C` |
| Quotation Mark (Double Quote) | `\"` | `U+0022` |
| Backspace | `\b` | `U+0008` |
| Form Feed | `\f` | `U+000C` |
| Space | `\s` | `U+0020` |
| Unicode Escape | `\u{(1-6 hex chars)}` | Code point described by hex characters, as long as it represents a [Unicode Scalar Value](https://unicode.org/glossary/#unicode_scalar_value) |
| Whitespace Escape | See below | N/A |
##### Escaped Whitespace
In addition to escaping individual characters, `\` can also escape whitespace.
When a `\` is followed by one or more literal whitespace characters, the `\`
and all of that whitespace are discarded. For example, `"Hello World"` and
`"Hello \ World"` are semantically identical. See [whitespace](#whitespace)
and [newlines](#newlines) for how whitespace is defined.
Note that only literal whitespace is escaped; whitespace escapes (`\n` and
such) are retained. For example, these strings are all semantically identical:
```kdl
"Hello\ \nWorld"
"Hello\n\
World"
"Hello\nWorld"
"
Hello
World
"
```
##### Invalid escapes
Except as described in the escapes table, above, `\` *MUST NOT* precede any
other characters in a string.
### Raw String
Raw Strings in KDL are much like [Quoted Strings](#quoted-string), except they
do not support `\`-escapes. They otherwise share the same properties as far as
literal [Newline](#newline) characters go, multi-line rules, and the requirement
of UTF-8 representation.
Raw String literals are represented with one or more `#` characters, followed
by `"`, followed by any number of UTF-8 literals. The string is then closed by
a `"` followed by a _matching_ number of `#` characters. This means that the
string sequence `"` or `"#` and such must not match the closing `"` with the
same or more `#` characters as the opening `#`, in the body of the string.
Like other Strings, Raw Strings _MUST NOT_ include any of the [disallowed
literal code-points](#disallowed-literal-code-points) as code points in their
body. Unlike with Quoted Strings, these cannot simply be escaped, and are thus
unrepresentable when using Raw Strings.
#### Example
```kdl
just-escapes #"\n will be literal"#
```
The string contains the literal characters `\n will be literal`.
```kdl
quotes-and-escapes ##"hello\n\r\asd"#world"##
```
The string contains the literal characters `hello\n\r\asd"#world`
### Multi-line Strings
When a Quoted or Raw String spans multiple lines with literal, non-escaped Newlines,
it follows a special multi-line syntax
that automatically "dedents" the string,
allowing its value to be indented to a visually matching level if desired.
A Multi-line string _MUST_ start with a [Newline](#newline)
immediately following its opening `"`.
Its final line, preceding the closing `"`,
_MUST_ contain only whitespace.
All in-between lines that contain non-whitespace characters
_MUST_ start with the exact same whitespace as the final line
(precisely matching codepoints, not merely counting characters).
The value of the Multi-line String omits the first and last Newline,
the Whitespace of the last line,
the matching Whitespace prefix on all intermediate lines,
and all Whitespace on intermediate Whitespace-only lines.
The first and last Newline can be the same character
(that is, empty multi-line strings are legal).
Strings with literal Newlines that do not immediately start with a Newline and
whose final `"` is not preceeded by optional whitespace and a Newline are illegal.
In other words, the final line specifies the whitespace prefix that will be removed from all other lines.
#### Example
```kdl
multi-line "
foo
This is the base indentation
bar
"
```
The last example's string value will be:
```
foo
This is the base indentation
bar
```
Equivalent to `" foo\nThis is the base indentation\n bar"`.
---------
If the last line wasn't indented as far,
it won't dedent the rest of the lines as much:
```kdl
multi-line "
foo
This is no longer on the left edge
bar
"
```
This example's string value will be:
```
foo
This is no longer on the left edge
bar
```
Equivalent to `" foo\n This is no longer on the left edge\n bar"`.
-----------
Empty lines can contain any whitespace, or none at all, and will be reflected as empty in the value:
```kdl
multi-line "
Indented a bit
A second indented paragraph.
"
```
This example's string value will be:
```
Indented a bit.
A second indented paragraph.
```
Equivalent to `"Indented a bit.\n\nA second indented paragraph."`
### Number
Numbers in KDL represent numerical [Values](#value). There is no logical distinction in KDL
between real numbers, integers, and floating point numbers. It's up to
individual implementations to determine how to represent KDL numbers.
There are four syntaxes for Numbers: Decimal, Hexadecimal, Octal, and Binary.
* All numbers may optionally start with one of `-` or `+`, which determine whether they'll be positive or negative.
* Binary numbers start with `0b` and only allow `0` and `1` as digits, which may be separated by `_`. They represent numbers in radix 2.
* Octal numbers start with `0o` and only allow digits between `0` and `7`, which may be separated by `_`. They represent numbers in radix 8.
* Hexadecimal numbers start with `0x` and allow digits between `0` and `9`, as well as letters `A` through `F`, in either lower or upper case, which may be separated by `_`. They represent numbers in radix 16.
* Decimal numbers are a bit more special:
* They have no radix prefix.
* They use digits `0` through `9`, which may be separated by `_`.
* They may optionally include a decimal separator `.`, followed by more digits, which may again be separated by `_`.
* They may optionally be followed by `E` or `e`, an optional `-` or `+`, and more digits, to represent an exponent value.
Note that, similar to JSON and some other languages,
numbers without an integer digit (such as `.1`) are illegal.
They must be written with at least one integer digit, like `0.1`.
(These patterns are also disallowed from [Identifier Strings](#identifier-string), to avoid confusion.)
### Boolean
A boolean [Value](#value) is either the symbol `#true` or `#false`. These
_SHOULD_ be represented by implementation as boolean logical values, or some
approximation thereof.
#### Example
```kdl
my-node true value=#false
```
### Null
The symbol `#null` represents a null [Value](#value). It's up to the
implementation to decide how to represent this, but it generally signals the
"absence" of a value.
#### Example
```kdl
my-node #null key=#null
```
### Whitespace
The following characters should be treated as non-[Newline](#newline) [white
space](https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt):
| Name | Code Pt |
|----------------------|---------|
| Character Tabulation | `U+0009` |
| Line Tabulation | `U+000B` |
| Space | `U+0020` |
| No-Break Space | `U+00A0` |
| Ogham Space Mark | `U+1680` |
| En Quad | `U+2000` |
| Em Quad | `U+2001` |
| En Space | `U+2002` |
| Em Space | `U+2003` |
| Three-Per-Em Space | `U+2004` |
| Four-Per-Em Space | `U+2005` |
| Six-Per-Em Space | `U+2006` |
| Figure Space | `U+2007` |
| Punctuation Space | `U+2008` |
| Thin Space | `U+2009` |
| Hair Space | `U+200A` |
| Narrow No-Break Space| `U+202F` |
| Medium Mathematical Space | `U+205F` |
| Ideographic Space | `U+3000` |
#### Single-line comments
Any text after `//`, until the next literal [Newline](#newline) is "commented
out", and is considered to be [Whitespace](#whitespace).
#### Multi-line comments
In addition to single-line comments using `//`, comments can also be started
with `/*` and ended with `*/`. These comments can span multiple lines. They
are allowed in all positions where [Whitespace](#whitespace) is allowed and
can be nested.
#### Slashdash comments
Finally, a special kind of comment called a "slashdash", denoted by `/-`, can
be used to comment out entire _components_ of a KDL document logically, and
have those elements be treated as whitespace.
Slashdash comments can be used before:
* A [Node](#node) name (or its type annotation): the entire Node is
treated as Whitespace, including all props, args, and children.
* A node [Argument](#argument) (or its type annotation), in which case
the Argument value is treated as Whitespace.
* A [Property](#property) key, in which case the entire property, both
key and value, is treated as Whitespace.
* A [Children Block](#children-block), in which case the entire block,
including all children within, is treated as Whitespace.
### Newline
The following characters [should be treated as new
lines](https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf):
| Acronym | Name | Code Pt |
|---------|-----------------|---------|
| CR | Carriage Return | `U+000D` |
| LF | Line Feed | `U+000A` |
| CRLF | Carriage Return and Line Feed | `U+000D` + `U+000A` |
| NEL | Next Line | `U+0085` |
| FF | Form Feed | `U+000C` |
| LS | Line Separator | `U+2028` |
| PS | Paragraph Separator | `U+2029` |
Note that for the purpose of new lines, CRLF is considered _a single newline_.
### Disallowed Literal Code Points
The following code points may not appear literally anywhere in the document.
They may be represented in Strings (but not Raw Strings) using `\u{}`.
* The codepoints `U+0000-0009`,
the codepoint `U+000B`,
or the codepoints `U+000E-001F` (various control characters).
* `U+007F` (the Delete control character).
* Any codepoint that is not a [Unicode Scalar
Value](https://unicode.org/glossary/#unicode_scalar_value).
* `U+2066-2069`, `U+202A-202E`, `U+200E`, and `U+200F`, the [unicode
"direction control"
characters](https://www.w3.org/International/questions/qa-bidi-unicode-controls)
## Full Grammar
This is the full official grammar for KDL and should be considered
authoritative if something seems to disagree with the text above. The [grammar
language syntax](#grammar-language) is defined below.
```
document := bom? nodes
nodes := (line-space* node)* line-space*
plain-line-space := newline | ws | single-line-comment
plain-node-space := ws* escline ws* | ws+
line-space := plain-line-space+ ('/-' plain-node-space* node)?
node-space := plain-node-space+ ('/-' plain-node-space* (node-prop-or-arg | node-children))?
required-node-space := node-space* plain-node-space+
optional-node-space := node-space*
base-node := type? optional-node-space string (required-node-space node-prop-or-arg)* (required-node-space node-children)?
node := base-node optional-node-space node-terminator
final-node := base-node optional-node-space node-terminator?
node-prop-or-arg := prop | value
node-children := '{' nodes final-node? '}'
node-terminator := single-line-comment | newline | ';' | eof
prop := string optional-node-space equals-sign optional-node-space value
value := type? optional-node-space (string | number | keyword)
type := '(' optional-node-space string optional-node-space ')'
equals-sign := See Table (Equals Sign)
string := identifier-string | quoted-string | raw-string
identifier-string := unambiguous-ident | signed-ident | dotted-ident
unambiguous-ident := ((identifier-char - digit - sign - '.') identifier-char*) - 'true' - 'false' - 'null'
signed-ident := sign ((identifier-char - digit - '.') identifier-char*)?
dotted-ident := sign? '.' ((identifier-char - digit) identifier-char*)?
identifier-char := unicode - line-space - [\\/(){};\[\]="#] - disallowed-literal-code-points
quoted-string := '"' (single-line-string-body | newline multi-line-string-body newline ws*) '"'
single-line-string-body := (string-character - newline)*
multi-line-string-body := string-character*
string-character := '\' escape | [^\\"] - disallowed-literal-code-points
escape := ["\\bfnrt] | 'u{' hex-digit{1, 6} '}' | (unicode-space | newline)+
hex-digit := [0-9a-fA-F]
raw-string := '#' raw-string-quotes '#' | '#' raw-string '#'
raw-string-quotes := '"' (single-line-raw-string-body | newline multi-line-raw-string-body newline ws*) '"'
single-line-raw-string-body := (unicode - newline - disallowed-literal-code-points)*
multi-line-raw-string-body := (unicode - disallowed-literal-code-points)*
number := hex | octal | binary | decimal
decimal := sign? integer ('.' integer)? exponent?
exponent := ('e' | 'E') sign? integer
integer := digit (digit | '_')*
digit := [0-9]
sign := '+' | '-'
hex := sign? '0x' hex-digit (hex-digit | '_')*
octal := sign? '0o' [0-7] [0-7_]*
binary := sign? '0b' ('0' | '1') ('0' | '1' | '_')*
keyword := boolean | '#null'
boolean := '#true' | '#false'
escline := '\\' ws* (single-line-comment | newline | eof)
newline := See Table (All line-break white_space)
ws := unicode-space | multi-line-comment
bom := '\u{FEFF}'
disallowed-literal-code-points := See Table (Disallowed Literal Code Points)
unicode-space := See Table (All White_Space unicode characters which are not `newline`)
single-line-comment := '//' ^newline* (newline | eof)
multi-line-comment := '/*' commented-block
commented-block := '*/' | (multi-line-comment | '*' | '/' | [^*/]+) commented-block
```
### Grammar language
The grammar language syntax is a combination of ABNF with some regex spice thrown in.
Specifically:
* Single quotes (`'`) are used to denote literal text. `\` within a literal
string is used for escaping other single-quotes, for initiating unicode
characters using hex values (`\u{FEFF}`), and for escaping `\` itself
(`\\`).
* `*` is used for "zero or more", `+` is used for "one or more", and `?` is
used for "zero or one".
* `()` can be used to group matches that must be matched together.
* `a | b` means `a or b`, whichever matches first. If multipe items are before
a `|`, they are a single group. `a b c | d` is equivalent to `(a b c) | d`.
* `[]` are used for regex-style character matches, where any character between
the brackets will be a single match. `\` is used to escape `\`, `[`, and
`]`. They also support character ranges (`0-9`), and negation (`^`)
* `-` is used for "except for" or "minus" whatever follows it. For example, `a
- `'x'` means "any `a`, except something that matches the literal `'x'`".
* The prefix `^` means "something that does not match" whatever follows it.
For example, `^foo` means "must not match `foo`".

View File

@ -1,544 +0,0 @@
# KDL v1 Spec
This is the semi-formal specification for the legacy version of KDL, including
the intended data model and the grammar.
This document describes KDL version `1.0.0`. It was released on September 11, 2021.
Information in this spec is intended as both an accessible historical record,
and a reference for KDL implementors who are interested in supporting both major
versions of the language.
The v1 spec will not receive further updates outside of minor, inconsequential
rewordings or other superficial fixes and is considered a "legacy" version.
## Compatibility
KDL v2 is designed such that for any given KDL document in either v1 or v2, the
parse will either fail completely, or, if the parse succeeds, the data
represented by a v1 or v2 parser will be identical. This means that it's safe to
use a fallback parsing strategy in order to support both v1 and v2
simultaneously. For example, `node "foo"` is a valid node in both versions, and
should be represented identically by parsers.
KDL v2 is designed such that for any given KDL document written as KDL
1.0 or [KDL 2.0](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html),
the parse will either fail completely, or, if the
parse succeeds, the data represented by a v1 or v2 parser will be identical.
This means that it's safe to use a fallback parsing strategy in order to support
both v1 and v2 simultaneously. For example, `node "foo"` is a valid node in both
versions, and should be represented identically by parsers.
A version marker `/- kdl-version 1` (or `2`) _MAY_ be added to the beginning of
a KDL document, optionally preceded by the BOM, and parsers _MAY_ use that as a
hint as to which version to parse the document as.
## Introduction
KDL is a node-oriented document language. Its niche and purpose overlaps with
XML, and as do many of its semantics. You can use KDL both as a configuration
language, and a data exchange or storage format, if you so choose.
The bulk of this document is dedicated to a long-form description of all
[Components](#components) of a KDL document. There is also a much more terse
[Grammar](#full-grammar) at the end of the document that covers most of the
rules, with some semantic exceptions involving the data model.
KDL is designed to be easy to read _and_ easy to implement.
In this document, references to "left" or "right" refer to directions in the
*data stream* towards the beginning or end, respectively; in other words,
the directions if the data stream were only ASCII text. They do not refer
to the writing direction of text, which can flow in either direction,
depending on the characters used.
## Components
### Document
The toplevel concept of KDL is a Document. A Document is composed of zero or
more [Nodes](#node), separated by newlines and whitespace, and eventually
terminated by an EOF.
All KDL documents should be UTF-8 encoded and conform to the specifications in
this document.
#### Example
The following is a document composed of two toplevel nodes:
```kdl
foo {
bar
}
baz
```
### Node
Being a node-oriented language means that the real core component of any KDL
document is the "node". Every node must have a name, which is an
[Identifier](#identifier).
The name may be preceded by a [Type Annotation](#type-annotation) to further
clarify its type, particularly in relation to its parent node. (For example,
clarifying that a particular `date` child node is for the _publication_ date,
rather than the last-modified date, with `(published)date`.)
Following the name are zero or more [Arguments](#argument) or
[Properties](#property), separated by either [whitespace](#whitespace) or [a
slash-escaped line continuation](#line-continuation). Arguments and Properties
may be interspersed in any order, much like is common with positional
arguments vs options in command line tools.
[Children](#children-block) can be placed after the name and the optional
Arguments and Properties, possibly separated by either whitespace or a
slash-escaped line continuation.
Arguments are ordered relative to each other (but not relative to Properties)
and that order must be preserved in order to maintain the semantics.
By contrast, Property order _SHOULD NOT_ matter to implementations.
[Children](#children-block) should be used if an order-sensitive key/value
data structure must be represented in KDL.
Nodes _MAY_ be prefixed with `/-` to "comment out" the entire node, including
its properties, arguments, and children, and make it act as plain whitespace,
even if it spreads across multiple lines.
Finally, a node is terminated by either a [Newline](#newline), a semicolon (`;`)
or the end of the file/stream (an `EOF`).
#### Example
```kdl
foo 1 key="val" 3 {
bar
(role)baz 1 2
}
```
### Identifier
An Identifier is either a [Bare Identifier](#bare-identifier), which is an
unquoted string like `node` or `item`, or a [String](#string), which is quoted,
like `"node"` or `"two words"`. There's no semantic difference between the
kinds of identifier; this simply allows for the use of quotes to have unusual
identifiers that are inexpressible as bare identifiers.
### Bare Identifier
A Bare Identifier is composed of any Unicode codepoint other than [non-initial
characters](#non-initial-characters), followed by any number of Unicode
codepoints other than [non-identifier characters](#non-identifier-characters),
so long as this doesn't produce something confusable for a [Number](#number),
[Boolean](#boolean), or [Null](#null). For example, both a [Number](#number)
and an Identifier can start with `-`, but when an Identifier starts with `-`
the second character cannot be a digit. This is precisely specified in the
[Full Grammar](#full-grammar) below.
Identifiers are terminated by [Whitespace](#whitespace) or
[Newlines](#newline).
### Non-initial characters
The following characters cannot be the first character in a
[Bare Identifier](#identifier):
* Any decimal digit (0-9)
* Any [non-identifier characters](#non-identifier-characters)
Be aware that the `-` character can only be used as an initial
character if the second character is not a digit. This allows
identifiers to look like `--this`, and removes the ambiguity
of having an identifier look like a negative number.
### Non-identifier characters
The following characters cannot be used anywhere in a [Bare Identifier](#identifier):
* Any codepoint with hexadecimal value `0x20` or below.
* Any codepoint with hexadecimal value higher than `0x10FFFF`.
* Any of `\/(){}<>;[]=,"`
### Line Continuation
Line continuations allow [Nodes](#node) to be spread across multiple lines.
A line continuation is a `\` character followed by zero or more whitespace
characters and an optional single-line comment. It must be terminated by a
[Newline](#newline) (including the Newline that is part of single-line comments).
Following a line continuation, processing of a Node can continue as usual.
#### Example
```kdl
my-node 1 2 \ // comments are ok after \
3 4 // This is the actual end of the Node.
```
### Property
A Property is a key/value pair attached to a [Node](#node). A Property is
composed of an [Identifier](#identifier), followed immediately by a `=`, and then a [Value](#value).
Properties should be interpreted left-to-right, with rightmost properties with
identical names overriding earlier properties. That is:
```kdl
node a=1 a=2
```
In this example, the node's `a` value must be `2`, not `1`.
No other guarantees about order should be expected by implementers.
Deserialized representations may iterate over properties in any order and
still be spec-compliant.
Properties _MAY_ be prefixed with `/-` to "comment out" the entire token and
make it act as plain whitespace, even if it spreads across multiple lines.
### Argument
An Argument is a bare [Value](#value) attached to a [Node](#node), with no
associated key. It shares the same space as [Properties](#properties), and may be interleaved with them.
A Node may have any number of Arguments, which should be evaluated left to
right. KDL implementations _MUST_ preserve the order of Arguments relative to
each other (not counting Properties).
Arguments _MAY_ be prefixed with `/-` to "comment out" the entire token and
make it act as plain whitespace, even if it spreads across multiple lines.
#### Example
```kdl
my-node 1 2 3 "a" "b" "c"
```
### Children Block
A children block is a block of [Nodes](#node), surrounded by `{` and `}`. They
are an optional part of nodes, and create a hierarchy of KDL nodes.
Regular node termination rules apply, which means multiple nodes can be
included in a single-line children block, as long as they're all terminated by
`;`.
#### Example
```kdl
parent {
child1
child2
}
parent { child1; child2; }
```
### Value
A value is either: a [String](#string), a [Number](#number), a
[Boolean](#boolean), or [Null](#null).
Values _MUST_ be either [Arguments](#argument) or values of
[Properties](#property).
Values (both as arguments and as properties) _MAY_ be prefixed by a single
[Type Annotation](#type-annotation).
### Type Annotation
A type annotation is a prefix to any [Node Name](#node) or [Value](#value) that
includes a _suggestion_ of what type the value is _intended_ to be treated as,
or as a _context-specific elaboration_ of the more generic type the node name
indicates.
Type annotations are written as a set of `(` and `)` with an
[Identifier](#identifier) in it. Any valid identifier is considered a valid
type annotation. There must be no whitespace between a type annotation and its
associated Node Name or Value.
KDL does not specify any restrictions on what implementations might do with
these annotations. They are free to ignore them, or use them to make decisions
about how to interpret a value.
Additionally, the following type annotations MAY be recognized by KDL parsers
and, if used, SHOULD interpret these types as follows:
#### Reserved Type Annotations for Numbers Without Decimals:
Signed integers of various sizes (the number is the bit size):
* `i8`
* `i16`
* `i32`
* `i64`
Unsigned integers of various sizes (the number is the bit size):
* `u8`
* `u16`
* `u32`
* `u64`
Platform-dependent integer types, both signed and unsigned:
* `isize`
* `usize`
#### Reserved Type Annotations for Numbers With Decimals:
IEEE 754 floating point numbers, both single (32) and double (64) precision:
* `f32`
* `f64`
IEEE 754-2008 decimal floating point numbers
* `decimal64`
* `decimal128`
#### Reserved Type Annotations for Strings:
* `date-time`: ISO8601 date/time format.
* `time`: "Time" section of ISO8601.
* `date`: "Date" section of ISO8601.
* `duration`: ISO8601 duration format.
* `decimal`: IEEE 754-2008 decimal string format.
* `currency`: ISO 4217 currency code.
* `country-2`: ISO 3166-1 alpha-2 country code.
* `country-3`: ISO 3166-1 alpha-3 country code.
* `country-subdivision`: ISO 3166-2 country subdivision code.
* `email`: RFC5322 email address.
* `idn-email`: RFC6531 internationalized email address.
* `hostname`: RFC1123 internet hostname (only ASCII segments)
* `idn-hostname`: RFC5890 internationalized internet hostname (only `xn--`-prefixed ASCII "punycode" segments, or non-ASCII segments)
* `ipv4`: RFC2673 dotted-quad IPv4 address.
* `ipv6`: RFC2373 IPv6 address.
* `url`: RFC3986 URI.
* `url-reference`: RFC3986 URI Reference.
* `irl`: RFC3987 Internationalized Resource Identifier.
* `irl-reference`: RFC3987 Internationalized Resource Identifier Reference.
* `url-template`: RFC6570 URI Template.
* `uuid`: RFC4122 UUID.
* `regex`: Regular expression. Specific patterns may be implementation-dependent.
* `base64`: A Base64-encoded string, denoting arbitrary binary data.
#### Examples
```kdl
node (u8)123
node prop=(regex)".*"
(published)date "1970-01-01"
(contributor)person name="Foo McBar"
```
### String
Strings in KDL represent textual [Values](#value), or unusual identifiers. A
String is either a [Quoted String](#quoted-string) or a
[Raw String](#raw-string). Quoted Strings may include escaped characters, while
Raw Strings always contain only the literal characters that are present.
### Quoted String
A Quoted String is delimited by `"` on either side of any number of literal
string characters except unescaped `"` and `\`. This includes literal
[Newline](#newline) characters, which means a String Value can encompass
multiple lines without behaving like a Newline for [Node](#node) parsing
purposes.
Strings _MUST_ be represented as UTF-8 values.
In addition to literal code points, a number of "escapes" are supported.
"Escapes" are the character `\` followed by another character, and are
interpreted as described in the following table:
| Name | Escape | Code Pt |
|-------------------------------|--------|----------|
| Line Feed | `\n` | `U+000A` |
| Carriage Return | `\r` | `U+000D` |
| Character Tabulation (Tab) | `\t` | `U+0009` |
| Reverse Solidus (Backslash) | `\\` | `U+005C` |
| Solidus (Forwardslash) | `\/` | `U+002F` |
| Quotation Mark (Double Quote) | `\"` | `U+0022` |
| Backspace | `\b` | `U+0008` |
| Form Feed | `\f` | `U+000C` |
| Unicode Escape | `\u{(1-6 hex chars)}` | Code point described by hex characters, up to `10FFFF` |
### Raw String
Raw Strings in KDL are much like [Quoted Strings](#quoted-string), except they
do not support `\`-escapes. They otherwise share the same properties as far as
literal [Newline](#newline) characters go, and the requirement of UTF-8
representation.
Raw String literals are represented as `r`, followed by zero or more `#`
characters, followed by `"`, followed by any number of UTF-8 literals. The
string is then closed by a `"` followed by a _matching_ number of `#`
characters. This allows them to contain raw `"` or `#` characters; only the
precise terminator (resembling `"##`, for example) ends the raw string. This
means that the string sequence `"` or `"#` and such must not match the closing
`"` with the same or more `#` characters as the opening `r`.
#### Example
```kdl
just-escapes r"\n will be literal"
quotes-and-escapes r#"hello\n\r\asd"world"#
```
### Number
Numbers in KDL represent numerical [Values](#value). There is no logical distinction in KDL
between real numbers, integers, and floating point numbers. It's up to
individual implementations to determine how to represent KDL numbers.
There are four syntaxes for Numbers: Decimal, Hexadecimal, Octal, and Binary.
* All numbers may optionally start with one of `-` or `+`, which determine whether they'll be positive or negative.
* Binary numbers start with `0b` and only allow `0` and `1` as digits, which may be separated by `_`. They represent numbers in radix 2.
* Octal numbers start with `0o` and only allow digits between `0` and `7`, which may be separated by `_`. They represent numbers in radix 8.
* Hexadecimal numbers start with `0x` and allow digits between `0` and `9`, as well as letters `A` through `F`, in either lower or upper case, which may be separated by `_`. They represent numbers in radix 16.
* Decimal numbers are a bit more special:
* They have no radix prefix.
* They use digits `0` through `9`, which may be separated by `_`.
* They may optionally include a decimal separator `.`, followed by more digits, which may again be separated by `_`.
* They may optionally be followed by `E` or `e`, an optional `-` or `+`, and more digits, to represent an exponent value.
### Boolean
A boolean [Value](#value) is either the symbol `true` or `false`. These
_SHOULD_ be represented by implementation as boolean logical values, or some
approximation thereof.
#### Example
```kdl
my-node true value=false
```
### Null
The symbol `null` represents a null [Value](#value). It's up to the
implementation to decide how to represent this, but it generally signals the
"absence" of a value. It is reasonable for an implementation to ignore null
values altogether when deserializing.
#### Example
```kdl
my-node null key=null
```
### Whitespace
The following characters should be treated as non-[Newline](#newline) [white
space](https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt):
| Name | Code Pt |
|----------------------|---------|
| Character Tabulation | `U+0009` |
| Space | `U+0020` |
| No-Break Space | `U+00A0` |
| Ogham Space Mark | `U+1680` |
| En Quad | `U+2000` |
| Em Quad | `U+2001` |
| En Space | `U+2002` |
| Em Space | `U+2003` |
| Three-Per-Em Space | `U+2004` |
| Four-Per-Em Space | `U+2005` |
| Six-Per-Em Space | `U+2006` |
| Figure Space | `U+2007` |
| Punctuation Space | `U+2008` |
| Thin Space | `U+2009` |
| Hair Space | `U+200A` |
| Narrow No-Break Space| `U+202F` |
| Medium Mathematical Space | `U+205F` |
| Ideographic Space | `U+3000` |
#### Multi-line comments
In addition to single-line comments using `//`, comments can also be started
with `/*` and ended with `*/`. These comments can span multiple lines. They
are allowed in all positions where [Whitespace](#whitespace) is allowed and
can be nested.
### Newline
The following characters [should be treated as new
lines](https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-5/#G41643):
| Acronym | Name | Code Pt |
|---------|-----------------|---------|
| CRLF | Carriage Return and Line Feed | `U+000D` + `U+000A` |
| CR | Carriage Return | `U+000D` |
| LF | Line Feed | `U+000A` |
| NEL | Next Line | `U+0085` |
| FF | Form Feed | `U+000C` |
| LS | Line Separator | `U+2028` |
| PS | Paragraph Separator | `U+2029` |
Note that for the purpose of new lines, CRLF is considered _a single newline_. `VT` `Vertical tab` `U+000B` was mistakenly excluded, but the v1 spec if frozen, so it's left unchanged.
## Full Grammar
```
nodes := linespace* (node nodes?)? linespace*
node := ('/-' node-space*)? type? identifier (node-space+ node-prop-or-arg)* (node-space* node-children ws*)? node-space* node-terminator
node-prop-or-arg := ('/-' node-space*)? (prop | value)
node-children := ('/-' node-space*)? '{' nodes '}'
node-space := ws* escline ws* | ws+
node-terminator := single-line-comment | newline | ';' | eof
identifier := string | bare-identifier
bare-identifier := ((identifier-char - digit - sign) identifier-char* | sign ((identifier-char - digit) identifier-char*)?) - keyword
identifier-char := unicode - linespace - [\/(){}<>;[]=,"]
keyword := boolean | 'null'
prop := identifier '=' value
value := type? (string | number | keyword)
type := '(' identifier ')'
string := raw-string | escaped-string
escaped-string := '"' character* '"'
character := '\' escape | [^\"]
escape := ["\\/bfnrt] | 'u{' hex-digit{1, 6} '}'
hex-digit := [0-9a-fA-F]
raw-string := 'r' raw-string-hash
raw-string-hash := '#' raw-string-hash '#' | raw-string-quotes
raw-string-quotes := '"' .* '"'
number := hex | octal | binary | decimal
decimal := sign? integer ('.' integer)? exponent?
exponent := ('e' | 'E') sign? integer
integer := digit (digit | '_')*
digit := [0-9]
sign := '+' | '-'
hex := sign? '0x' hex-digit (hex-digit | '_')*
octal := sign? '0o' [0-7] [0-7_]*
binary := sign? '0b' ('0' | '1') ('0' | '1' | '_')*
boolean := 'true' | 'false'
escline := '\\' ws* (single-line-comment | newline)
linespace := newline | ws | single-line-comment
newline := See Table (All line-break white_space)
ws := bom | unicode-space | multi-line-comment
bom := '\u{FEFF}'
unicode-space := See Table (All White_Space unicode characters which are not `newline`)
single-line-comment := '//' ^newline+ (newline | eof)
multi-line-comment := '/*' commented-block
commented-block := '*/' | (multi-line-comment | '*' | '/' | [^*/]+) commented-block
```

View File

@ -25,7 +25,7 @@ XML elements and KDL nodes have a direct correspondence. In XiK, an XML element
* making the attributes into KDL properties * making the attributes into KDL properties
* making the child nodes as KDL child nodes * making the child nodes as KDL child nodes
For example, the XML `<element foo="bar"><child baz="quux" /></element>` is encoded into XiK as `element foo=bar { child baz=quux }`. For example, the XML `<element foo="bar"><child baz="qux" /></element>` is encoded into XiK as `element foo=bar { child baz=quux }`.
XML namespaces are encoded the same as XML: the node name simply contains a `:` character. Note that KDL identifier syntax allows `:` directly in an ident, so a name like `xml:space` or `xlink:href` is a valid node or property name. XML namespaces are encoded the same as XML: the node name simply contains a `:` character. Note that KDL identifier syntax allows `:` directly in an ident, so a name like `xml:space` or `xlink:href` is a valid node or property name.

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
package { package {
name kdl name kdl
version "0.0.0" version "0.0.0"
description "The kdl document language" description "kat's document language"
authors "Kat Marchán <kzm@zkat.tech>" authors "Kat Marchán <kzm@zkat.tech>"
license-file LICENSE.md license-file LICENSE.md
edition "2018" edition "2018"

View File

@ -19,8 +19,8 @@ jobs {
components rustfmt components rustfmt
override #true override #true
} }
step rustfmt { run cargo fmt --all -- --check } step rustfmt run="cargo fmt --all -- --check"
step docs { run cargo doc --no-deps } step docs run="cargo doc --no-deps"
} }
} }
build_and_test "Build & Test" { build_and_test "Build & Test" {
@ -40,13 +40,13 @@ jobs {
components clippy components clippy
override #true override #true
} }
step Clippy { run cargo clippy --all -- -D warnings } step Clippy run="cargo clippy --all -- -D warnings"
step "Run tests" { run cargo test --all --verbose } step "Run tests" run="cargo test --all --verbose"
step "Other Stuff" run=""" step "Other Stuff" run="
echo foo echo foo
echo bar echo bar
echo baz echo baz
""" "
} }
} }
} }

View File

@ -290,7 +290,7 @@ document {
type number type number
} }
} }
node ">=" description="Only used for numeric values. Constrains them to be greater than or equal to the given number(s)" { node >= description="Only used for numeric values. Constrains them to be greater than or equal to the given number(s)" {
max 1 max 1
value { value {
min 1 min 1
@ -306,7 +306,7 @@ document {
type number type number
} }
} }
node "<=" description="Only used for numeric values. Constrains them to be less than or equal to the given number(s)" { node <= description="Only used for numeric values. Constrains them to be less than or equal to the given number(s)" {
max 1 max 1
value { value {
min 1 min 1

View File

@ -6,13 +6,13 @@ html lang=en {
meta \ meta \
name=description \ name=description \
content="kdl is a document language, mostly based on SDLang, with xml-like semantics that looks like you're invoking a bunch of CLI commands!" content="kdl is a document language, mostly based on SDLang, with xml-like semantics that looks like you're invoking a bunch of CLI commands!"
title "kdl - The KDL Document Language" title "kdl - Kat's Document Language"
link rel=stylesheet href="/styles/global.css" link rel=stylesheet href="/styles/global.css"
} }
body { body {
main { main {
header class="py-10 bg-gray-300" { header class="py-10 bg-gray-300" {
h1 class="text-4xl text-center" "kdl - The KDL Document Language" h1 class="text-4xl text-center" "kdl - Kat's Document Language"
} }
section class=kdl-section id=description { section class=kdl-section id=description {
p { p {

View File

@ -2,11 +2,9 @@
The `input` folder contains test cases for KDL parsers. The `expected_kdl` The `input` folder contains test cases for KDL parsers. The `expected_kdl`
folder contains files with the same name as those in `input` with the expected folder contains files with the same name as those in `input` with the expected
output after being run through the parser and printed out again. output after being run through the parser and printed out again. If there's no
file in `expected_kdl` with a name corresponding to one in `input` it
If a testcase is intended to fail parsing, indicates that parsing for that case should fail.
the `input` file _MUST_ have a `_fail` suffix,
and there must be no corresponding file in `expected_kdl`.
## Translation Rules ## Translation Rules
@ -54,7 +52,3 @@ please send a PR.
If you think the disagreement is due to a genuine error or oversight in the If you think the disagreement is due to a genuine error or oversight in the
KDL specification, please open an issue explaining the matter and the change KDL specification, please open an issue explaining the matter and the change
will be considered for the next version of the KDL spec. will be considered for the next version of the KDL spec.
# Benchmarks
The `benchmarks` folder contains some large or gnarly documents intended to be used to stress-test your parser and help with profiling. They are intentionally not part of the testsuite, and just provided for your own personal benefit.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,3 +0,0 @@
foo123 {
bar
}

View File

@ -1 +0,0 @@
node "12"

View File

@ -1,2 +0,0 @@
node
node

View File

@ -1,4 +0,0 @@
parent {
child
child
}

View File

@ -1 +0,0 @@
(type)node

View File

@ -1 +0,0 @@
floats #inf #-inf #nan

View File

@ -1 +0,0 @@
another-node

View File

@ -1 +0,0 @@
node "\"\"\"triple-quote\"\"\"\n##\"too few quotes\"##\n#\"\"\"too few #\"\"\"#"

View File

@ -1 +0,0 @@
node " hey\n everyone\n how goes?"

View File

@ -1 +0,0 @@
node "this string contains \"quotes\", twice\"\""

View File

@ -1 +0,0 @@
node "a\\ b\na\\b"

View File

@ -1 +0,0 @@
node " hey\n everyone\n how goes?"

View File

@ -1 +0,0 @@
node "" "" "" "\n\n " "\n"

View File

@ -1 +0,0 @@
node foo bar

View File

@ -1,3 +0,0 @@
node foo {
three
}

View File

@ -0,0 +1 @@
node p1=val1 p2=val2 p3=val3

View File

@ -1 +0,0 @@
ノード お名前=ฅ^•ﻌ•^ฅ

View File

@ -1 +1 @@
foo123~!@$%^&*.:'|?+<>,`-_ weeee foo123~!@$%^&*.:'|?+<>, weeee

View File

@ -1 +1 @@
foo123~!@$%^&*.:'|?+<>,`-_ weeee foo123~!@$%^&*.:'|?+<>, weeee

View File

@ -1,2 +1 @@
node arg node arg
node2 arg2

View File

@ -0,0 +1 @@
node 0

View File

@ -1,2 +0,0 @@
node string
node string

View File

@ -1 +0,0 @@
foo123{bar}

View File

@ -1,3 +1,2 @@
/- node_1 /- node_1
node_2 node_2
/- node_3

View File

@ -1,4 +0,0 @@
node "1\
2"

View File

@ -1,10 +1,10 @@
// All of these strings are the same // All of these strings are the same
node \ node \
"Hello\n\tWorld" \ "Hello\n\tWorld" \
""" "
Hello Hello
World World
""" \ " \
"Hello\n\ \tWorld" \ "Hello\n\ \tWorld" \
"Hello\n\ "Hello\n\
\tWorld" \ \tWorld" \

View File

@ -1,2 +0,0 @@
node; \
node

View File

@ -1 +0,0 @@
\

View File

@ -1,3 +0,0 @@
\
node

View File

@ -1,3 +0,0 @@
a \
b

View File

@ -1,5 +0,0 @@
parent {
child
\ // comment
child
}

View File

@ -1,3 +1,2 @@
node1 node1
\
node2 node2

View File

@ -1,2 +0,0 @@
\
(type)node

View File

@ -1,4 +0,0 @@
node
\
/-
node

Some files were not shown because too many files have changed in this diff Show More