From 4263a9de3f6d457fe562906b436bd7f4a2728a4c Mon Sep 17 00:00:00 2001 From: Jeffrey Yasskin Date: Fri, 17 Jan 2025 21:06:17 -0800 Subject: [PATCH] Convert the specification into RFC format (#466) Fixes: https://github.com/kdl-org/kdl/issues/461 * Initial conversion to Internet-Draft. * Setup repository for draft-marchan-kdl2 using https://github.com/martinthomson/i-d-template * Remove unnecessary changes. * Tweak source to make the I-D build. * Fix zkat's name. Guess at their preferred initials. * Separate links where linking text equals the anchor. This works around https://github.com/cabo/kramdown-rfc/issues/249. * Replace horizontal rules with subsections. RFCXML v3 doesn't include horizontal rules. * Use ~~~kdl instead of `` for strings with significant whitespace. That compiles to which preserves sequences of spaces. * Give breadcrumbs to the RFC format definitions. * Point contributors to https://author-tools.ietf.org/. For if installing the dependencies is inconvenient. * Separate a code block from adjacent text. * Remove a workflow that overwrites customized files. * Remove the 'archive' workflow. * Use "Term ({{term}})" uniformly instead of just when the term matches the anchor. * Fix the instructions for preserving the RFCXML. * Remove the default copyright notice until the spec is relicensed. * Take the IETF licensing details out of CONTRIBUTING.md. --- .editorconfig | 8 + .github/workflows/ghpages.yml | 60 ++++ .github/workflows/publish.yml | 57 ++++ .gitignore | 23 ++ .note.xml | 4 + CONTRIBUTING.md | 22 ++ Makefile | 15 + QUERY-SPEC.md | 2 +- README.md | 8 +- SPEC_v1.md | 5 +- SPEC.md => draft-marchan-kdl2.md | 455 ++++++++++++++++++------------- 11 files changed, 466 insertions(+), 193 deletions(-) create mode 100644 .editorconfig create mode 100644 .github/workflows/ghpages.yml create mode 100644 .github/workflows/publish.yml create mode 100644 .note.xml create mode 100644 CONTRIBUTING.md create mode 100644 Makefile rename SPEC.md => draft-marchan-kdl2.md (77%) diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..8682023 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,8 @@ +# See http://editorconfig.org + +root = true + +[*.{md,xml,org}] +charset = utf-8 +insert_final_newline = true +trim_trailing_whitespace = true diff --git a/.github/workflows/ghpages.yml b/.github/workflows/ghpages.yml new file mode 100644 index 0000000..f6cb64d --- /dev/null +++ b/.github/workflows/ghpages.yml @@ -0,0 +1,60 @@ +name: "Update Editor's Copy" + +on: + push: + paths-ignore: + - README.md + - CONTRIBUTING.md + - LICENSE.md + - .gitignore + pull_request: + paths-ignore: + - README.md + - CONTRIBUTING.md + - LICENSE.md + - .gitignore + +jobs: + build: + name: "Update Editor's Copy" + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: "Checkout" + uses: actions/checkout@v4 + + - name: "Setup" + id: setup + run: date -u "+date=%FT%T" >>"$GITHUB_OUTPUT" + + - name: "Caching" + uses: actions/cache@v4 + with: + path: | + .refcache + .venv + .gems + node_modules + .targets.mk + key: i-d-${{ steps.setup.outputs.date }} + restore-keys: i-d- + + - name: "Build Drafts" + uses: martinthomson/i-d-template@v1 + with: + token: ${{ github.token }} + + - name: "Update GitHub Pages" + uses: martinthomson/i-d-template@v1 + if: ${{ github.event_name == 'push' }} + with: + make: gh-pages + token: ${{ github.token }} + + - name: "Archive Built Drafts" + uses: actions/upload-artifact@v4 + with: + path: | + draft-*.html + draft-*.txt diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..94d885f --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,57 @@ +name: "Publish New Draft Version" + +on: + push: + tags: + - "draft-*" + workflow_dispatch: + inputs: + email: + description: "Submitter email" + default: "" + type: string + +jobs: + build: + name: "Publish New Draft Version" + runs-on: ubuntu-latest + steps: + - name: "Checkout" + uses: actions/checkout@v4 + + # See https://github.com/actions/checkout/issues/290 + - name: "Get Tag Annotations" + run: git fetch -f origin ${{ github.ref }}:${{ github.ref }} + + - name: "Setup" + id: setup + run: date -u "+date=%FT%T" >>"$GITHUB_OUTPUT" + + - name: "Caching" + uses: actions/cache@v4 + with: + path: | + .refcache + .venv + .gems + node_modules + .targets.mk + key: i-d-${{ steps.setup.outputs.date }} + restore-keys: i-d- + + - name: "Build Drafts" + uses: martinthomson/i-d-template@v1 + with: + token: ${{ github.token }} + + - name: "Upload to Datatracker" + uses: martinthomson/i-d-template@v1 + with: + make: upload + env: + UPLOAD_EMAIL: ${{ inputs.email }} + + - name: "Archive Submitted Drafts" + uses: actions/upload-artifact@v4 + with: + path: "versioned/draft-*-[0-9][0-9].*" diff --git a/.gitignore b/.gitignore index 96ef6c0..6562a43 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,25 @@ /target Cargo.lock +*.html +*.pdf +*.redxml +*.swp +*.txt +*.upload +*~ +.tags +/*-[0-9][0-9].xml +/.*.mk +/.gems/ +/.refcache +/.venv/ +/.vscode/ +/lib +/node_modules/ +/versioned/ +Gemfile.lock +archive.json +draft-marchan-kdl2.xml +package-lock.json +report.xml +!requirements.txt diff --git a/.note.xml b/.note.xml new file mode 100644 index 0000000..e52406d --- /dev/null +++ b/.note.xml @@ -0,0 +1,4 @@ + +Source for this draft and an issue tracker can be found at + . + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..2785acf --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,22 @@ +# Contributing + +## Mechanics + +Contributions can be made by creating pull requests. +The GitHub interface supports creating pull requests using the Edit (✏) button. + + +## Building the Specification + +The specification is written in +[kramdown-rfc](https://github.com/cabo/kramdown-rfc/wiki/Syntax2), which +compiles via [RFCXML](https://authors.ietf.org/rfcxml-vocabulary) to text and +HTML. + +You can build the formatted versions or the intermediate RFCXML file using +https://author-tools.ietf.org/ or locally by running `make`. To preserve the +intermediate RFCXML form in a local build, run `make draft-marchan-kdl2.xml` +once. + +Command line usage requires that you have the necessary software installed. See +[the instructions](https://github.com/martinthomson/i-d-template/blob/main/doc/SETUP.md). diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9a5d15b --- /dev/null +++ b/Makefile @@ -0,0 +1,15 @@ +LIBDIR := lib +include $(LIBDIR)/main.mk + +$(LIBDIR)/main.mk: +ifneq (,$(shell grep "path *= *$(LIBDIR)" .gitmodules 2>/dev/null)) + git submodule sync + git submodule update --init +else +ifneq (,$(wildcard $(ID_TEMPLATE_HOME))) + ln -s "$(ID_TEMPLATE_HOME)" $(LIBDIR) +else + git clone -q --depth 10 -b main \ + https://github.com/martinthomson/i-d-template $(LIBDIR) +endif +endif diff --git a/QUERY-SPEC.md b/QUERY-SPEC.md index 2997370..36501b3 100644 --- a/QUERY-SPEC.md +++ b/QUERY-SPEC.md @@ -110,7 +110,7 @@ Then the following queries are valid: ## Full Grammar Rules that are not defined in this grammar are prefixed with `$`, see [the KDL -grammar](https://github.com/kdl-org/kdl/blob/main/SPEC.md#full-grammar) for +grammar](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html#full-grammar) for what they expand to. ``` diff --git a/README.md b/README.md index c818334..08bf35e 100644 --- a/README.md +++ b/README.md @@ -46,16 +46,16 @@ package { For more details, see the [overview below](#overview). -There's a living [specification](SPEC.md), as well as various +There's a living [specification](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html), as well as various [implementations](#implementations). You can also check out the [FAQ](#faq) to answer all your burning questions! The current version of the KDL spec is -[KDL 2.0.0](https://github.com/kdl-org/kdl/blob/2.0.0/SPEC.md). For legacy KDL, +[KDL 2.0.0](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html). For legacy KDL, please refer to the [KDL 1.0.0 spec](https://github.com/kdl-org/kdl/blob/2.0.0/SPEC_v1.md). All users are encouraged to migrate. [Migration is forward-and-backward-compatible and -safe](https://github.com/kdl-org/kdl/blob/2.0.0/SPEC.md#compatibility), and can +safe](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html#compatibility), and can be automated. In addition to a spec for KDL itself, there are specifications for [a KDL Query @@ -400,7 +400,7 @@ SDLang is an excellent base, but I wanted some details ironed out, and some things removed that only really made sense for SDLang's current use-cases, including some restrictions about data representation. KDL is very similar in many ways, except: -* The grammar and expected semantics are [well-defined and specified](SPEC.md). +* The grammar and expected semantics are [well-defined and specified](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html). This was the original impetus for working on KDL, followed by details that seemed like they could be improved. * There is only one "number" type. KDL does not prescribe representations, but diff --git a/SPEC_v1.md b/SPEC_v1.md index 43436da..ed74ca0 100644 --- a/SPEC_v1.md +++ b/SPEC_v1.md @@ -22,7 +22,8 @@ simultaneously. For example, `node "foo"` is a valid node in both versions, and should be represented identically by parsers. KDL v2 is designed such that for any given KDL document written as KDL -1.0 or [KDL 2.0](./SPEC.md), the parse will either fail completely, or, if the +1.0 or [KDL 2.0](https://kdl-org.github.io/kdl/#go.draft-marchan-kdl2.html), +the parse will either fail completely, or, if the parse succeeds, the data represented by a v1 or v2 parser will be identical. This means that it's safe to use a fallback parsing strategy in order to support both v1 and v2 simultaneously. For example, `node "foo"` is a valid node in both @@ -540,4 +541,4 @@ unicode-space := See Table (All White_Space unicode characters which are not `ne single-line-comment := '//' ^newline+ (newline | eof) multi-line-comment := '/*' commented-block commented-block := '*/' | (multi-line-comment | '*' | '/' | [^*/]+) commented-block -``` \ No newline at end of file +``` diff --git a/SPEC.md b/draft-marchan-kdl2.md similarity index 77% rename from SPEC.md rename to draft-marchan-kdl2.md index 333349d..0d8171a 100644 --- a/SPEC.md +++ b/draft-marchan-kdl2.md @@ -1,4 +1,43 @@ -# KDL Spec +--- +title: "The KDL Document Language" +abbrev: "KDL" +docname: draft-marchan-kdl2-latest +submissionType: independent +category: exp + +ipr: none +area: General +venue: + github: kdl-org/kdl + home: https://kdl.dev/ +workgroup: KDL Community +keyword: + - Document-Language + - Configuration + +stand_alone: yes +smart_quotes: no +pi: [toc, sortrefs, symrefs] + +author: + - + name: Katerina Zoé Marchán Salvá + ins: K. Marchán + organization: Microsoft + - + name: The KDL Contributors + ins: KDL Contributors + +normative: + +informative: + + +--- abstract + +KDL is a node-oriented document language. Its niche and purpose overlaps with +XML, and as do many of its semantics. You can use KDL both as a configuration +language, and a data exchange or storage format, if you so choose. This is the formal specification for KDL, including the intended data model and the grammar. @@ -7,7 +46,17 @@ This document describes KDL version KDL 2.0.0. It was released on 2024-12-21. It is the latest stable version of the language, and will only be edited for minor copyedits or major errata. -## Compatibility + +--- note_License + +This work is licensed under Creative Commons Attribution-ShareAlike 4.0 +International. To view a copy of this license, visit +https://creativecommons.org/licenses/by-sa/4.0/ + + +--- middle + +# Compatibility KDL 2.0 is designed such that for any given KDL document written as [KDL 1.0](./SPEC_v1.md) or KDL 2.0, the parse will either fail completely, or, if the @@ -20,15 +69,16 @@ A version marker `/- kdl-version 2` (or `1`) _MAY_ be added to the beginning of a KDL document, optionally preceded by the BOM, and parsers _MAY_ use that as a hint as to which version to parse the document as. -## Introduction +# Introduction KDL is a node-oriented document language. Its niche and purpose overlaps with XML, and as do many of its semantics. You can use KDL both as a configuration language, and a data exchange or storage format, if you so choose. The bulk of this document is dedicated to a long-form description of all -[Components](#components) of a KDL document. There is also a much more terse -[Grammar](#full-grammar) at the end of the document that covers most of the +Components ({{components}}) of a KDL document. +There is also a much more terse +Grammar ({{full-grammar}}) at the end of the document that covers most of the rules, with some semantic exceptions involving the data model. KDL is designed to be easy to read _and_ easy to implement. @@ -39,47 +89,47 @@ the directions if the data stream were only ASCII text. They do not refer to the writing direction of text, which can flow in either direction, depending on the characters used. -## Components +# Components -### Document +## Document The toplevel concept of KDL is a Document. A Document is composed of zero or -more [Nodes](#node), separated by newlines and whitespace, and eventually +more Nodes ({{node}}), separated by newlines and whitespace, and eventually terminated by an EOF. All KDL documents should be UTF-8 encoded and conform to the specifications in this document. -#### Example +### Example The following is a document composed of two toplevel nodes: -```kdl +~~~kdl foo { bar } baz -``` +~~~ -### Node +## Node Being a node-oriented language means that the real core component of any KDL document is the "node". Every node must have a name, which must be a -[String](#string). +String ({{string}}). -The name may be preceded by a [Type Annotation](#type-annotation) to further +The name may be preceded by a Type Annotation ({{type-annotation}}) to further clarify its type, particularly in relation to its parent node. (For example, clarifying that a particular `date` child node is for the _publication_ date, rather than the last-modified date, with `(published)date`.) -Following the name are zero or more [Arguments](#argument) or -[Properties](#property), separated by either [whitespace](#whitespace) or [a -slash-escaped line continuation](#line-continuation). Arguments and Properties +Following the name are zero or more Arguments ({{argument}}) or +Properties ({{property}}), separated by either whitespace ({{whitespace}}) or a +slash-escaped line continuation ({{line-continuation}}). Arguments and Properties may be interspersed in any order, much like is common with positional arguments vs options in command line tools. Collectively, Arguments and Properties may be referred to as "Entries". -[Children](#children-block) can be placed after the name and the optional +Children ({{children-block}}) can be placed after the name and the optional Entries, possibly separated by either whitespace or a slash-escaped line continuation. @@ -88,57 +138,57 @@ order to maintain the semantics. Properties between Arguments do not affect Argument ordering. By contrast, Properties _SHOULD NOT_ be assumed to be presented in a given -order. [Children](#children-block) should be used if an order-sensitive +order. Children ({{children-block}}) should be used if an order-sensitive key/value data structure must be represented in KDL. Cf. JSON objects preserving key order. -Nodes _MAY_ be prefixed with [Slashdash](#slashdash-comments) to "comment out" +Nodes _MAY_ be prefixed with Slashdash ({{slashdash-comments}}) to "comment out" the entire node, including its properties, arguments, and children, and make it act as plain whitespace, even if it spreads across multiple lines. -Finally, a node is terminated by either a [Newline](#newline), a semicolon +Finally, a node is terminated by either a Newline ({{newline}}), a semicolon (`;`), the end of a child block (`}`) or the end of the file/stream (an `EOF`). -#### Example +### Example -```kdl +~~~kdl // `foo` will have an Argument value list like `[1, 3]`. foo 1 key=val 3 { bar (role)baz 1 2 } -``` +~~~ -### Line Continuation +## Line Continuation -Line continuations allow [Nodes](#node) to be spread across multiple lines. +Line continuations allow Nodes ({{node}}) to be spread across multiple lines. A line continuation is a `\` character followed by zero or more whitespace items (including multiline comments) and an optional single-line comment. It -must be terminated by a [Newline](#newline) (including the Newline that is +must be terminated by a Newline ({{newline}}) (including the Newline that is part of single-line comments). Following a line continuation, processing of a Node can continue as usual. -#### Example +### Example -```kdl +~~~kdl my-node 1 2 \ // comments are ok after \ 3 4 // This is the actual end of the Node. -``` +~~~ -### Property +## Property -A Property is a key/value pair attached to a [Node](#node). A Property is -composed of a [String](#string), followed immediately by an equals sign (`=`, `U+003D`), -and then a [Value](#value). +A Property is a key/value pair attached to a Node ({{node}}). A Property is +composed of a String ({{string}}), followed immediately by an equals sign (`=`, `U+003D`), +and then a Value ({{value}}). Properties should be interpreted left-to-right, with rightmost properties with identical names overriding earlier properties. That is: -```kdl +~~~kdl node a=1 a=2 -``` +~~~ In this example, the node's `a` value must be `2`, not `1`. @@ -149,10 +199,10 @@ still be spec-compliant. Properties _MAY_ be prefixed with `/-` to "comment out" the entire token and make it act as plain whitespace, even if it spreads across multiple lines. -### Argument +## Argument -An Argument is a bare [Value](#value) attached to a [Node](#node), with no -associated key. It shares the same space as [Properties](#properties), and may be interleaved with them. +An Argument is a bare Value ({{value}}) attached to a Node ({{node}}), with no +associated key. It shares the same space as Properties ({{property}}), and may be interleaved with them. A Node may have any number of Arguments, which should be evaluated left to right. KDL implementations _MUST_ preserve the order of Arguments relative to @@ -161,53 +211,53 @@ each other (not counting Properties). Arguments _MAY_ be prefixed with `/-` to "comment out" the entire token and make it act as plain whitespace, even if it spreads across multiple lines. -#### Example +### Example -```kdl +~~~kdl my-node 1 2 3 a b c -``` +~~~ -### Children Block +## Children Block -A children block is a block of [Nodes](#node), surrounded by `{` and `}`. They +A children block is a block of Nodes ({{node}}), surrounded by `{` and `}`. They are an optional part of nodes, and create a hierarchy of KDL nodes. Regular node termination rules apply, which means multiple nodes can be included in a single-line children block, as long as they're all terminated by `;`. -#### Example +### Example -```kdl +~~~kdl parent { child1 child2 } parent { child1; child2; } -``` +~~~ -### Value +## Value -A value is either: a [String](#string), a [Number](#number), a -[Boolean](#boolean), or [Null](#null). +A value is either: a String ({{string}}), a Number ({{number}}), a +Boolean ({{boolean}}), or Null ({{null}}). -Values _MUST_ be either [Arguments](#argument) or values of -[Properties](#property). Only [String](#string) values may be used as -[Node](#node) names or [Property](#property) keys. +Values _MUST_ be either Arguments ({{argument}}) or values of +Properties ({{property}}). Only String ({{string}}) values may be used as +Node ({{node}}) names or Property ({{property}}) keys. Values (both as arguments and in properties) _MAY_ be prefixed by a single -[Type Annotation](#type-annotation). +Type Annotation ({{type-annotation}}). -### Type Annotation +## Type Annotation -A type annotation is a prefix to any [Node Name](#node) or [Value](#value) that +A type annotation is a prefix to any Node Name ({{node}}) or Value ({{value}}) that includes a _suggestion_ of what type the value is _intended_ to be treated as, or as a _context-specific elaboration_ of the more generic type the node name indicates. Type annotations are written as a set of `(` and `)` with a single -[String](#string) in it. It may contain Whitespace after the `(` and before +String ({{string}}) in it. It may contain Whitespace after the `(` and before the `)`, and may be separated from its target by Whitespace. KDL does not specify any restrictions on what implementations might do with @@ -217,7 +267,7 @@ about how to interpret a value. Additionally, the following type annotations MAY be recognized by KDL parsers and, if used, SHOULD interpret these types as follows: -#### Reserved Type Annotations for Numbers Without Decimals: +### Reserved Type Annotations for Numbers Without Decimals: Signed integers of various sizes (the number is the bit size): @@ -240,7 +290,7 @@ Platform-dependent integer types, both signed and unsigned: * `isize` * `usize` -#### Reserved Type Annotations for Numbers With Decimals: +### Reserved Type Annotations for Numbers With Decimals: IEEE 754 floating point numbers, both single (32) and double (64) precision: @@ -252,7 +302,7 @@ IEEE 754-2008 decimal floating point numbers * `decimal64` * `decimal128` -#### Reserved Type Annotations for Strings: +### Reserved Type Annotations for Strings: * `date-time`: ISO8601 date/time format. * `time`: "Time" section of ISO8601. @@ -279,23 +329,23 @@ IEEE 754-2008 decimal floating point numbers * `regex`: Regular expression. Specific patterns may be implementation-dependent. * `base64`: A Base64-encoded string, denoting arbitrary binary data. -#### Examples +### Examples -```kdl +~~~kdl node (u8)123 node prop=(regex).* (published)date "1970-01-01" (contributor)person name="Foo McBar" -``` +~~~ -### String +## String -Strings in KDL represent textual UTF-8 [Values](#value). A String is either an -[Identifier String](#identifier-string) (like `foo`), a -[Quoted String](#quoted-string) (like `"foo"`) -or a [Multi-Line String](#multi-line-string). +Strings in KDL represent textual UTF-8 Values ({{value}}). A String is either an +Identifier String ({{identifier-string}}) (like `foo`), a +Quoted String ({{quoted-string}}) (like `"foo"`) +or a Multi-Line String ({{multi-line-string}}). Both Quoted and Multiline strings come in normal -and [Raw String](#raw-string) variants (like `#"foo"#`): +and Raw String ({{raw-string}}) variants (like `#"foo"#`): * Identifier Strings let you write short, "single-word" strings with a minimum of syntax @@ -309,22 +359,22 @@ and [Raw String](#raw-string) variants (like `#"foo"#`): Strings _MUST_ be represented as UTF-8 values. Strings _MUST NOT_ include the code points for -[disallowed literal code points](#disallowed-literal-code-points) directly. +disallowed literal code points ({{disallowed-literal-code-points}}) directly. Quoted and Multi-Line Strings may include these code points as _values_ by representing them with their corresponding `\u{...}` escape. -### Identifier String +## Identifier String An Identifier String (sometimes referred to as just an "identifier") is composed of any [Unicode Scalar Value](https://unicode.org/glossary/#unicode_scalar_value) other than -[non-initial characters](#non-initial-characters), followed by any number of -Unicode Scalar Values other than [non-identifier -characters](#non-identifier-characters). +non-initial characters ({{non-initial-characters}}), followed by any number of +Unicode Scalar Values other than non-identifier +characters ({{non-identifier-characters}}). A handful of patterns are disallowed, to avoid confusion with other values: -* idents that appear to start with a [Number](#number) (like `1.0v2` or +* idents that appear to start with a Number ({{number}}) (like `1.0v2` or `-1em`) or the "almost a number" pattern of a decimal point without a leading digit (like `.1`). * idents that are the language keywords (`inf`, `-inf`, `nan`, `true`, @@ -332,15 +382,15 @@ A handful of patterns are disallowed, to avoid confusion with other values: Identifiers that match these patterns _MUST_ be treated as a syntax error; such values can only be written as quoted or raw strings. The precise details of the -identifier syntax is specified in the [Full Grammar](#full-grammar) below. +identifier syntax is specified in the Full Grammar in {{full-grammar}}. -#### Non-initial characters +### Non-initial characters The following characters cannot be the first character in an -[Identifier String](#identifier-string): +Identifier String ({{identifier-string}}): * Any decimal digit (0-9) -* Any [non-identifier characters](#non-identifier-characters) +* Any non-identifier characters ({{non-identifier-characters}}) Additionally, the following initial characters impose limitations on subsequent characters: @@ -354,36 +404,36 @@ characters: This allows identifiers to look like `--this` or `.md`, and removes the ambiguity of having an identifier look like a number. -#### Non-identifier characters +### Non-identifier characters -The following characters cannot be used anywhere in a [Identifier String](#identifier-string): +The following characters cannot be used anywhere in a Identifier String ({{identifier-string}}): * Any of `(){}[]/\"#;=` -* Any [Whitespace](#whitespace) or [Newline](#newline). -* Any [disallowed literal code points](#disallowed-literal-code-points) in KDL +* Any Whitespace ({{whitespace}}) or Newline ({{newline}}). +* Any disallowed literal code points ({{disallowed-literal-code-points}}) in KDL documents. -### Quoted String +## Quoted String A Quoted String is delimited by `"` on either side of any number of literal string characters except unescaped `"` and `\`. -Literal [Newline](#newline) characters can only be included -if they are [Escaped Whitespace](#escaped-whitespace), +Literal Newline ({{newline}}) characters can only be included +if they are Escaped Whitespace ({{escaped-whitespace}}), which discards them from the string value. Actually including a newline in the value requires using a newline escape sequence, like `\n`, -or using a [Multi-Line String](#multi-line-string) +or using a Multi-Line String ({{multi-line-string}}) which is actually designed for strings stretching across multiple lines. Like Identifier Strings, Quoted Strings _MUST NOT_ include any of the -[disallowed literal code-points](#disallowed-literal-code-points) as code +disallowed literal code-points ({{disallowed-literal-code-points}}) as code points in their body. -Quoted Strings have a [Raw String](#raw-string) variant, +Quoted Strings have a Raw String ({{raw-string}}) variant, which disallows escapes. -#### Escapes +### Escapes In addition to literal code points, a number of "escapes" are supported in Quoted Strings. "Escapes" are the character `\` followed by another character, and are @@ -402,18 +452,29 @@ interpreted as described in the following table: | Unicode Escape | `\u{(1-6 hex chars)}` | Code point described by hex characters, as long as it represents a [Unicode Scalar Value](https://unicode.org/glossary/#unicode_scalar_value) | | Whitespace Escape | See below | N/A | -##### Escaped Whitespace +#### Escaped Whitespace In addition to escaping individual characters, `\` can also escape whitespace. When a `\` is followed by one or more literal whitespace characters, the `\` -and all of that whitespace are discarded. For example, `"Hello World"` and -`"Hello \ World"` are semantically identical. See [whitespace](#whitespace) -and [newlines](#newline) for how whitespace is defined. +and all of that whitespace are discarded. For example, + +~~~kdl +"Hello World" +~~~ + +and + +~~~kdl +"Hello \ World" +~~~ + +are semantically identical. See whitespace ({{whitespace}}) +and newlines ({{newline}}) for how whitespace is defined. Note that only literal whitespace is escaped; whitespace escapes (`\n` and such) are retained. For example, these strings are all semantically identical: -```kdl +~~~kdl "Hello\ \nWorld" "Hello\n\ @@ -425,14 +486,14 @@ such) are retained. For example, these strings are all semantically identical: Hello World """ -``` +~~~ -##### Invalid escapes +#### Invalid escapes Except as described in the escapes table, above, `\` *MUST NOT* precede any other characters in a string. -### Multi-line String +## Multi-line String Multi-Line Strings support multiple lines with literal, non-escaped Newlines. They must use a special multi-line syntax, and they automatically @@ -441,7 +502,7 @@ level as desired. A Multi-Line String is opened and closed by *three* double-quote characters, like `"""`. -Its first line _MUST_ immediately start with a [Newline](#newline) +Its first line _MUST_ immediately start with a Newline ({{newline}}) after its opening `"""`. Its final line _MUST_ contain only whitespace before the closing `"""`. @@ -465,7 +526,7 @@ Multi-line Strings that do not immediately start with a Newline and whose final also means that `"""` may not be used for a single-line String (e.g. `"""foo"""`). -#### Newline Normalization +### Newline Normalization Literal Newline sequences in Multi-line Strings must be normalized to a single `U+000A` (`LF`) during deserialization. This means, for example, that `CR LF` @@ -474,116 +535,131 @@ becomes a single `LF` during parsing. This normalization does not apply to non-literal Newlines entered using escape sequences. That is: -```kdl +~~~kdl multi-line """ \r\n[CRLF] foo[CRLF] """ -``` +~~~ becomes: -```kdl +~~~kdl single-line "\r\n\nfoo" -``` +~~~ For clarity: this normalization applies to each individual Newline sequence. That is, the literal sequence `CRLF CRLF` becomes `LF LF`, not `LF`. -#### Example +### Examples -```kdl +#### Indented multi-line string + +~~~kdl multi-line """ foo This is the base indentation bar """ -``` +~~~ This example's string value will be: -``` +~~~ foo This is the base indentation bar -``` +~~~ + +which is equivalent to + +~~~kdl +" foo\nThis is the base indentation\n bar" +~~~ -which is equivalent to `" foo\nThis is the base indentation\n bar"` when written as a single-line string. ---------- +#### Shorter last-line indent If the last line wasn't indented as far, it won't dedent the rest of the lines as much: -```kdl +~~~kdl multi-line """ foo This is no longer on the left edge bar """ -``` +~~~ This example's string value will be: -``` +~~~ foo This is no longer on the left edge bar -``` +~~~ -Equivalent to `" foo\n This is no longer on the left edge\n bar"`. +Equivalent to ------------ +~~~kdl +" foo\n This is no longer on the left edge\n bar" +~~~ + +#### Empty lines Empty lines can contain any whitespace, or none at all, and will be reflected as empty in the value: -```kdl +~~~kdl multi-line """ Indented a bit A second indented paragraph. """ -``` +~~~ This example's string value will be: -``` +~~~ Indented a bit. A second indented paragraph. -``` +~~~ -Equivalent to `"Indented a bit.\n\nA second indented paragraph."` +Equivalent to ------------ +~~~kdl +"Indented a bit.\n\nA second indented paragraph." +~~~ + +#### Syntax errors The following yield **syntax errors**: -```kdl +~~~kdl multi-line """can't be single line""" -``` +~~~ -```kdl +~~~kdl multi-line """ closing quote with non-whitespace prefix""" -``` +~~~ -```kdl +~~~kdl multi-line """stuff """ -``` +~~~ -```kdl +~~~kdl // Every line must share the exact same prefix as the closing line. multi-line """[\n] [tab]a[\n] [space][space]b[\n] [space][tab][\n] [tab]""" -``` +~~~ -#### Interaction with Whitespace Escapes +### Interaction with Whitespace Escapes Multi-line strings support the same mechanism for escaping whitespace as Quoted Strings. @@ -597,7 +673,7 @@ is invalid. For example, the following example is illegal: -```kdl +~~~kdl """ foo bar\ @@ -607,10 +683,11 @@ For example, the following example is illegal: """ foo bar""" -``` +~~~ while the following example is allowed -```kdl + +~~~kdl """ foo \ bar @@ -622,15 +699,15 @@ bar foo bar baz """ -``` +~~~ -### Raw String +## Raw String -Both [Quoted](#quoted-string) and [Multi-Line Strings](#multi-line-string) have +Both Quoted ({{quoted-string}}) and Multi-Line Strings ({{multi-line-string}}) have Raw String variants, which are identical in syntax except they do not support `\`-escapes. This includes line-continuation escapes (`\` + `ws` collapsing to nothing). They otherwise share the same properties as far as literal -[Newline](#newline) characters go, multi-line rules, and the requirement of +Newline ({{newline}}) characters go, multi-line rules, and the requirement of UTF-8 representation. The Raw String variants are indicated by preceding the strings's opening quotes @@ -640,33 +717,33 @@ string may contain any combination of `"` and `#` characters other than its closing delimiter (e.g., if a raw string starts with `##"`, it can contain `"` or `"#`, but not `"##` or `"###`). -Like other Strings, Raw Strings _MUST NOT_ include any of the [disallowed -literal code-points](#disallowed-literal-code-points) as code points in their +Like other Strings, Raw Strings _MUST NOT_ include any of the disallowed +literal code-points ({{disallowed-literal-code-points}}) as code points in their body. Unlike with Quoted Strings, these cannot simply be escaped, and are thus unrepresentable when using Raw Strings. -#### Example +### Example -```kdl +~~~kdl just-escapes #"\n will be literal"# -``` +~~~ The string contains the literal characters `\n will be literal`. -```kdl +~~~kdl quotes-and-escapes ##"hello\n\r\asd"#world"## -``` +~~~ The string contains the literal characters `hello\n\r\asd"#world` -```kdl +~~~kdl raw-multi-line #""" Here's a """ multiline string """ without escapes. """# -``` +~~~ The string contains the value @@ -677,17 +754,23 @@ Here's a """ without escapes. ~~~ -or equivalently, `"Here's a \"\"\"\n multiline string\n \"\"\"\nwithout escapes."` as a Quoted String. +or equivalently, -### Number +~~~kdl +"Here's a \"\"\"\n multiline string\n \"\"\"\nwithout escapes." +~~~ -Numbers in KDL represent numerical [Values](#value). There is no logical distinction in KDL +as a Quoted String. + +## Number + +Numbers in KDL represent numerical Values ({{value}}). There is no logical distinction in KDL between real numbers, integers, and floating point numbers. It's up to individual implementations to determine how to represent KDL numbers. There are five syntaxes for Numbers: Keywords, Decimal, Hexadecimal, Octal, and Binary. -* All non-[Keyword](#keyword-numbers) numbers may optionally start with one of `-` or `+`, which determine whether they'll be positive or negative. +* All non-Keyword ({{keyword-numbers}}) numbers may optionally start with one of `-` or `+`, which determine whether they'll be positive or negative. * Binary numbers start with `0b` and only allow `0` and `1` as digits, which may be separated by `_`. They represent numbers in radix 2. * Octal numbers start with `0o` and only allow digits between `0` and `7`, which may be separated by `_`. They represent numbers in radix 8. * Hexadecimal numbers start with `0x` and allow digits between `0` and `9`, as well as letters `A` through `F`, in either lower or upper case, which may be separated by `_`. They represent numbers in radix 16. @@ -700,9 +783,9 @@ There are five syntaxes for Numbers: Keywords, Decimal, Hexadecimal, Octal, and Note that, similar to JSON and some other languages, numbers without an integer digit (such as `.1`) are illegal. They must be written with at least one integer digit, like `0.1`. -(These patterns are also disallowed from [Identifier Strings](#identifier-string), to avoid confusion.) +(These patterns are also disallowed from Identifier Strings ({{identifier-string}}), to avoid confusion.) -#### Keyword Numbers +### Keyword Numbers There are three special "keyword" numbers included in KDL to accomodate the widespread use of [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754) floats: @@ -711,41 +794,41 @@ widespread use of [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754) floats: * `#-inf` - floating point negative infinity. * `#nan` - floating point NaN/Not a Number. -To go along with this and prevent foot guns, the bare [Identifier -Strings](#identifier-string) `inf`, `-inf`, and `nan` are considered illegal +To go along with this and prevent foot guns, the bare Identifier +Strings ({{identifier-string}}) `inf`, `-inf`, and `nan` are considered illegal identifiers and should yield a syntax error. The existence of these keywords does not imply that any numbers be represented as IEEE 754 floats. These are simply for clarity and convenience for any implementation that chooses to represent their numbers in this way. -### Boolean +## Boolean -A boolean [Value](#value) is either the symbol `#true` or `#false`. These +A boolean Value ({{value}}) is either the symbol `#true` or `#false`. These _SHOULD_ be represented by implementation as boolean logical values, or some approximation thereof. -#### Example +### Example -```kdl +~~~kdl my-node #true value=#false -``` +~~~ -### Null +## Null -The symbol `#null` represents a null [Value](#value). It's up to the +The symbol `#null` represents a null Value ({{value}}). It's up to the implementation to decide how to represent this, but it generally signals the "absence" of a value. -#### Example +### Example -```kdl +~~~kdl my-node #null key=#null -``` +~~~ -### Whitespace +## Whitespace -The following characters should be treated as non-[Newline](#newline) [white +The following characters should be treated as non-Newline ({{newline}}) [white space](https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt): | Name | Code Pt | @@ -769,19 +852,19 @@ space](https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt): | Medium Mathematical Space | `U+205F` | | Ideographic Space | `U+3000` | -#### Single-line comments +### Single-line comments -Any text after `//`, until the next literal [Newline](#newline) is "commented -out", and is considered to be [Whitespace](#whitespace). +Any text after `//`, until the next literal Newline ({{newline}}) is "commented +out", and is considered to be Whitespace ({{whitespace}}). -#### Multi-line comments +### Multi-line comments In addition to single-line comments using `//`, comments can also be started with `/*` and ended with `*/`. These comments can span multiple lines. They -are allowed in all positions where [Whitespace](#whitespace) is allowed and +are allowed in all positions where Whitespace ({{whitespace}}) is allowed and can be nested. -#### Slashdash comments +### Slashdash comments Finally, a special kind of comment called a "slashdash", denoted by `/-`, can be used to comment out entire _components_ of a KDL document logically, and @@ -790,19 +873,19 @@ have those elements not be included as part of the parsed document data. Slashdash comments can be used before the following, including before their type annotations, if present: -* A [Node](#node): the entire Node is treated as Whitespace, including all +* A Node ({{node}}): the entire Node is treated as Whitespace, including all props, args, and children. -* An [Argument](#argument): the Argument value is treated as Whitespace. -* A [Property](#property) key: the entire property, including both key and value, +* An Argument ({{argument}}): the Argument value is treated as Whitespace. +* A Property ({{property}}) key: the entire property, including both key and value, is treated as Whitespace. A slashdash of just the property value is not allowed. -* A [Children Block](#children-block): the entire block, including all +* A Children Block ({{children-block}}): the entire block, including all children within, is treated as Whitespace. Only other children blocks, whether slashdashed or not, may follow a slashdashed children block. A slashdash may be be followed by any amount of whitespace, including newlines and comments (other than other slashdashes), before the element that it comments out. -### Newline +## Newline The following character sequences [should be treated as new lines](https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-5/#G41643): @@ -821,10 +904,10 @@ lines](https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-5/#G4164 Note that for the purpose of new lines, the specific sequence `CRLF` is considered _a single newline_. -### Disallowed Literal Code Points +## Disallowed Literal Code Points The following code points may not appear literally anywhere in the document. -They may be represented in Strings (but not Raw Strings) using [Unicode Escapes](#escapes) (`\u{...}`, +They may be represented in Strings (but not Raw Strings) using Unicode Escapes ({{escapes}}) (`\u{...}`, except for non Unicode Scalar Value, which can't be represented even as escapes). * The codepoints `U+0000-0008` or the codepoints `U+000E-001F` (various @@ -838,13 +921,13 @@ except for non Unicode Scalar Value, which can't be represented even as escapes) * `U+FEFF`, aka Zero-width Non-breaking Space (ZWNBSP)/Byte Order Mark (BOM), except as the first code point in a document. -## Full Grammar +# Full Grammar This is the full official grammar for KDL and should be considered -authoritative if something seems to disagree with the text above. The [grammar -language syntax](#grammar-language) is defined below. +authoritative if something seems to disagree with the text above. The grammar +language syntax is defined in {{grammar-language}}. -``` +~~~abnf document := bom? version? nodes // Nodes @@ -971,7 +1054,7 @@ version := unicode-space* newline ~~~ -### Grammar language +## Grammar language The grammar language syntax is a combination of ABNF with some regex spice thrown in. Specifically: