From 80dec07c378823bce41779704b15bdedb53caa45 Mon Sep 17 00:00:00 2001 From: Lars Willighagen Date: Thu, 2 Sep 2021 07:07:09 +0200 Subject: [PATCH 1/6] Add schema specs for info and definitions (#120) Fixes: https://github.com/kdl-org/kdl/issues/113 --- SCHEMA-SPEC.md | 137 +++++++++++++++++++++++++++++++++++++++- examples/kdl-schema.kdl | 135 +++++++++++++++++++++++++++++++++++---- 2 files changed, 259 insertions(+), 13 deletions(-) diff --git a/SCHEMA-SPEC.md b/SCHEMA-SPEC.md index cc4700b..801fe4b 100644 --- a/SCHEMA-SPEC.md +++ b/SCHEMA-SPEC.md @@ -24,15 +24,129 @@ None. #### Properties -* `description` (optional): An informational description of the purpose of this schema. -* `schema-url` (optional): A URL where someone may go to find more information about this schema. It is not meant for mechanical processing. +None. #### Children +* [`info`](#info-node) - one info node for that describes the schema itself. * [`node`](#node-node) - zero or more toplevel nodes for the KDL document this schema describes. +* [`definitions`](#definitions-node) (optional): Definitions of nodes, values, props, and children block to reference in the toplevel nodes. * `node-names` (optional): [Validations](#validation-nodes) to apply to the _names_ of child nodes. * `other-nodes-allowed` (optional): Whether to allow nodes other than the ones explicitly listed here. Defaults to `false`. +### `info` node + +The `info` node describes the schema itself. + +#### Values + +None. + +#### Properties + +None. + +#### Children + +* [`title`](#title-node) (optional): zero or more titles +* [`description`](#description-node) (optional): zero or more descriptions +* [`author`](#author-and-contributor-nodes) (optional): zero or more authors +* [`contributor`](#author-and-contributor-nodes) (optional): zero or more contributors +* [`link`](#link-node) (optional): zero or more URLs +* [`license`](#license-node) (optional): zero or more licenses +* [`published`](#published-and-modified-nodes) (optional): a publication date +* [`modified`](#published-and-modified-nodes) (optional): a modification date +* [`version`](#version-node) (optional): a [SemVer](https://semver.org/) version number + +### `title` node + +The title of the schema or the format it describes. + +#### Values + +* Title + +#### Properties + +* `lang` (optional): An IETF BCP 47 language tag + +### `description` node + +A description of the schema or the format it describes. + +#### Values + +* Description + +#### Properties + +* `lang` (optional): An IETF BCP 47 language tag + +### `author` and `contributor` nodes + +Author(s) of the schema. + +#### Values + +* Author name + +#### Properties + +* `orcid` (optional): The [ORCID](https://orcid.org/) of the author. + +#### Children + +* [`link`](#link-node) (optional): zero or more URLs + +### `link` node + +Links to the schema itself, and to sources about the schema. + +#### Values + +* URI/IRI - A URI/IRI that the link points to + +#### Properties + +* `rel`: what the link is for (`"self"` or `"documentation"`) +* `lang` (optional): An IETF BCP 47 language tag + +### `license` node + +The license(s) that the schema is licensed under. + +#### Values + +* License name - Name of the used license + +#### Properties + +* `spdx` (optional): an [SPDX license identifier](https://spdx.dev/ids/) + +#### Children + +* [`link`](#link-node): one or more URLs + +### `published` and `modified` nodes + +When the schema was published or last modified respectively. + +#### Values + +* Publication or modification date - As a ISO8601 date + +#### Properties + +* `time` (optional): an ISO8601 Time to accompany the date + +### `version` nodes + +The version number of this version of the schema. + +#### Values + +* Version - Semver version specification + ### `node` node The `node` node describes node instances in a document. These may either be at @@ -160,3 +274,22 @@ and property names when the `node-names` or `prop-names` options are activated. * `>=`: Greater than or equal to. * `<`: Less than. * `<=`: Less than or equal to. + +### `definitions` node + +Definitions to reference in parts of the top-level `node`s. + +#### Values + +None. + +#### Properties + +None. + +#### Children + +* [`node`](#node-node) - zero or more node definitions. +* [`prop`](#prop-node) - zero or more property definitions. +* [`value`](#value-node) - zero or more value definitions. +* [`children`](#children-node) - zero or more definitions of children blocks. diff --git a/examples/kdl-schema.kdl b/examples/kdl-schema.kdl index e6c8b59..b514b39 100644 --- a/examples/kdl-schema.kdl +++ b/examples/kdl-schema.kdl @@ -1,13 +1,23 @@ -document description="KDL Schema KDL schema in KDL" schema-url="https://github.com/zkat/kdl" { +document { + info { + title "KDL Schema" lang="en" + description "KDL Schema KDL schema in KDL" lang="en" + author "Kat Marchán" { + link "https://github.com/zkat" rel="self" + } + contributor "Lars Willighagen" { + link "https://github.com/larsgw" rel="self" + } + link https://github.com/zkat/kdl rel="documentation" + license "Creative Commons Attribution-ShareAlike 4.0 International License" spdx="CC-BY-SA-4.0" { + link "https://creativecommons.org/licenses/by-sa/4.0/" lang="en" + } + published "2021-08-31" + modified "2021-09-01" + } node "document" { min 1 max 1 - prop "schema-url" description="URL where you can find this schema. Informational only." { - type "url" - } - prop "description" description="General purpose and description for this document schema." { - type "string" - } children id="node-children" { node "node-names" description="Validations to apply specifically to arbitrary node names" { children ref="#validations" @@ -20,7 +30,102 @@ document description="KDL Schema KDL schema in KDL" schema-url="https://github.c type "boolean" } } - node "node" description="A child node belonging either to `document` or to another `node`. Nodes may be anonymous." { + node "info" description="A child node that describes the schema itself." { + children { + node "title" description="The title of the schema or the format it describes" { + value description="The title text" { + type "string" + min 1 + max 1 + } + prop "lang" id="info-lang" description="The language of the text" { + type "string" + } + } + node "description" description="A description of the schema or the format it describes" { + value description="The description text" { + type "string" + min 1 + max 1 + } + prop ref="#info-lang" + } + node "author" description="Author of the schema" { + value id="info-person-name" description="Person name" { + type "string" + min 1 + max 1 + } + prop "orcid" id="info-orcid" description="The ORCID of the person" { + type "string" + pattern r"\d{4}-\d{4}-\d{4}-\d{4}" + } + children { + node ref="#info-link" + } + } + node "contributor" description="Contributor to the schema" { + value ref="#info-person-name" + prop ref="#info-orcid" + } + node "link" id="info-link" description="Links to itself, and to sources describing it" { + value description="A URL that the link points to" { + type "string" + format "uri" "iri" + min 1 + max 1 + } + prop "rel" description="The relation between the current entity and the URL" { + type "string" + enum "self" "documentation" + } + prop ref="#info-lang" + } + node "license" description="The license(s) that the schema is licensed under" { + value description="Name of the used license" { + type "string" + min 1 + max 1 + } + prop "spdx" description="An SPDX license identifier" { + type "string" + } + children { + node ref="#info-link" + } + } + node "published" description="When the schema was published" { + value description="Publication date" { + type "string" + format "date" + min 1 + max 1 + } + prop "time" id="info-time" description="A time to accompany the date" { + type "string" + format "time" + } + } + node "modified" description="When the schema was last modified" { + value description="Modification date" { + type "string" + format "date" + min 1 + max 1 + } + prop ref="#info-time" + } + node "version" description="The version number of this version of the schema" { + value description="Semver version number" { + type "string" + pattern r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$" + min 1 + max 1 + } + } + } + } + node "node" id="node-node" description="A child node belonging either to `document` or to another `node`. Nodes may be anonymous." { value description="The name of the node. If a node name is not supplied, the node rules apply to _all_ nodes belonging to the parent." { type "string" max 1 @@ -62,7 +167,7 @@ document description="KDL Schema KDL schema in KDL" schema-url="https://github.c type "number" } } - node "prop" description="A node property key/value pair." { + node "prop" id="prop-node" description="A node property key/value pair." { value description="The property key." { type "string" } @@ -169,7 +274,7 @@ document description="KDL Schema KDL schema in KDL" schema-url="https://github.c } } } - node "value" description="one or more direct node values" { + node "value" id="value-node" description="one or more direct node values" { prop "id" description="A globally-unique ID of this value." { type "string" } @@ -199,7 +304,7 @@ document description="KDL Schema KDL schema in KDL" schema-url="https://github.c } } } - node "children" { + node "children" id="children-node" { prop "id" description="A globally-unique ID of this children node." { type "string" } @@ -213,6 +318,14 @@ document description="KDL Schema KDL schema in KDL" schema-url="https://github.c } } } + node "definitions" description="Definitions to reference in parts of the top-level nodes" { + children { + node ref="#node-node" + node ref="#value-node" + node ref="#prop-node" + node ref="#children-node" + } + } } } } From a44fcbb1d48c4efe71a8bd61895dd6c562bfa78f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 2 Sep 2021 09:23:51 -0700 Subject: [PATCH 2/6] add type annotation info to spec (#123) Fixes: https://github.com/kdl-org/kdl/issues/106 --- SPEC.md | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/SPEC.md b/SPEC.md index cfb65cf..4cfaea0 100644 --- a/SPEC.md +++ b/SPEC.md @@ -189,6 +189,78 @@ A value is either: a [String](#string), a [Raw String](#raw-string), a Values _MUST_ be either [Arguments](#argument) or values of [Properties](#property). +Values _MAY_ be prefixed by a single [Type Annotation](#type-annotation). + +### Type Annotation + +A type annotation is a prefix to any [Value](#value) that includes a +_suggestion_ of what type the value is _intended_ to be treated as. + +Type annotations are written as a set of `(` and `)` with a single +[Identifier](#identifier) in it. Any valid identifier is considered a valid +type annotation. There must be no whitespace between a type annotation and its +associated Value. + +KDL does not specify any restrictions on what implementations might do with +these annotations. They are free to ignore them, or use them to make decisions +about how to interpret a value. + +Additionally, the following type annotations MAY be recognized by KDL parsers +and, if used, SHOULD interpret these types as follows: + +#### Reserved Type Annotations for Numbers Without Decimals: + +Signed integers of various sizes (the number is the bit size): + +* `i8` +* `i16` +* `i32` +* `i64` + +Unsigned integers of various sizes (the number is the bit size): + +* `u8` +* `u16` +* `u32` +* `u64` + +Platform-dependent integer types, both signed and unsigned: + +* `isize` +* `usize` + +IEEE 754 floating point numbers, both single (32) and double (64) precision: + +* `f32` +* `f64` + +#### Reserved Type Annotations for Strings: + +* `date-time`: ISO8601 date/time format. +* `time`: "Time" section of ISO8601. +* `date`: "Date" section of ISO8601. +* `email`: RFC5302 email address. +* `idn-email`: RFC6531 internationalized email address. +* `hostname`: RFC1132 internet hostname. +* `idn-hostname`: RFC5890 internationalized internet hostname. +* `ipv4`: RFC2673 dotted-quad IPv4 address. +* `ipv6`: RFC2373 IPv6 address. +* `uri`: RFC3986 URI. +* `uri-reference`: RFC3986 URI Reference. +* `iri`: RFC3987 Internationalized Resource Identifier. +* `iri-reference`: RFC3987 Internationalized Resource Identifier Reference. +* `uri-template`: RFC6570 URI Template. +* `uuid`: RFC4122 UUID. +* `regex`: Regular expression. Specific patterns may be implementation-dependent. +* `base64`: A Base64-encoded string, denoting arbitrary binary data. + +#### Examples + +```kdl +node (u8)123 +node prop=(regex)".*" +``` + ### String Strings in KDL represent textual [Values](#value). They are delimited by `"` @@ -338,7 +410,8 @@ bare-identifier := ((identifier-char - digit - sign) identifier-char* | sign ((i identifier-char := unicode - linespace - [\/(){}<>;[]=,"] keyword := boolean | 'null' prop := identifier '=' value -value := string | number | keyword +value := (type ws*)? (string | number | keyword) +type := '(' identifier ')' string := raw-string | escaped-string escaped-string := '"' character* '"' From 2e42c4c0ecc54c92c22c570a8c97d3e09052e949 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 2 Sep 2021 15:10:03 -0700 Subject: [PATCH 3/6] Change URIs to URLs for clarity (#125) --- SCHEMA-SPEC.md | 10 +++++----- SPEC.md | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/SCHEMA-SPEC.md b/SCHEMA-SPEC.md index 801fe4b..6a6e20d 100644 --- a/SCHEMA-SPEC.md +++ b/SCHEMA-SPEC.md @@ -258,11 +258,11 @@ and property names when the `node-names` or `prop-names` options are activated. * `idn-hostname`: RFC5890 internationalized internet hostname. * `ipv4`: RFC2673 dotted-quad IPv4 address. * `ipv6`: RFC2373 IPv6 address. - * `uri`: RFC3986 URI. - * `uri-reference`: RFC3986 URI Reference. - * `iri`: RFC3987 Internationalized Resource Identifier. - * `iri-reference`: RFC3987 Internationalized Resource Identifier Reference. - * `uri-template`: RFC6570 URI Template. + * `url`: RFC3986 URI. + * `url-reference`: RFC3986 URI Reference. + * `irl`: RFC3987 Internationalized Resource Identifier. + * `irl-reference`: RFC3987 Internationalized Resource Identifier Reference. + * `url-template`: RFC6570 URI Template. * `uuid`: RFC4122 UUID. * `regex`: Regular expression. Specific patterns may be implementation-dependent. * `base64`: A Base64-encoded string, denoting arbitrary binary data. diff --git a/SPEC.md b/SPEC.md index 4cfaea0..dc99682 100644 --- a/SPEC.md +++ b/SPEC.md @@ -245,11 +245,11 @@ IEEE 754 floating point numbers, both single (32) and double (64) precision: * `idn-hostname`: RFC5890 internationalized internet hostname. * `ipv4`: RFC2673 dotted-quad IPv4 address. * `ipv6`: RFC2373 IPv6 address. -* `uri`: RFC3986 URI. -* `uri-reference`: RFC3986 URI Reference. -* `iri`: RFC3987 Internationalized Resource Identifier. -* `iri-reference`: RFC3987 Internationalized Resource Identifier Reference. -* `uri-template`: RFC6570 URI Template. +* `url`: RFC3986 URI. +* `url-reference`: RFC3986 URI Reference. +* `irl`: RFC3987 Internationalized Resource Identifier. +* `irl-reference`: RFC3987 Internationalized Resource Identifier Reference. +* `url-template`: RFC6570 URI Template. * `uuid`: RFC4122 UUID. * `regex`: Regular expression. Specific patterns may be implementation-dependent. * `base64`: A Base64-encoded string, denoting arbitrary binary data. From 6b4d0ea0300c8fdf28e84be48616b0f6749df2b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 2 Sep 2021 15:25:13 -0700 Subject: [PATCH 4/6] allow eof termination for single line comments (#126) --- SPEC.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SPEC.md b/SPEC.md index dc99682..3d342f5 100644 --- a/SPEC.md +++ b/SPEC.md @@ -448,7 +448,7 @@ bom := '\u{FFEF}' unicode-space := See Table (All White_Space unicode characters which are not `newline`) -single-line-comment := '//' ^newline+ newline +single-line-comment := '//' ^newline+ (newline | eof) multi-line-comment := '/*' (commented-block | multi-line-comment) '*/' commented-block := ('*' [^\/] | [^*])* ``` From cbb500a4ac5e9b6e97c70b80c4f376b10f17d344 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 2 Sep 2021 15:29:25 -0700 Subject: [PATCH 5/6] allow /- to cross linespaces (#127) Ref: https://github.com/kdl-org/kdl/issues/121 --- SPEC.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SPEC.md b/SPEC.md index 3d342f5..9ee4976 100644 --- a/SPEC.md +++ b/SPEC.md @@ -399,9 +399,9 @@ Note that for the purpose of new lines, CRLF is considered _a single newline_. ``` nodes := linespace* (node nodes?)? linespace* -node := ('/-' ws*)? identifier (node-space node-space* node-props-and-args)* (node-space* node-children ws*)? node-space* node-terminator -node-props-and-args := ('/-' ws*)? (prop | value) -node-children := ('/-' ws*)? '{' nodes '}' +node := ('/-' node-space*)? identifier (node-space node-space* node-props-and-args)* (node-space* node-children ws*)? node-space* node-terminator +node-props-and-args := ('/-' node-space*)? (prop | value) +node-children := ('/-' node-space*)? '{' nodes '}' node-space := ws* escline ws* | ws+ node-terminator := single-line-comment | newline | ';' | eof From dec2565a17333cba03eadd6a294b28f61d6aa357 Mon Sep 17 00:00:00 2001 From: Jo Liss Date: Fri, 3 Sep 2021 16:02:49 +0100 Subject: [PATCH 6/6] fix typos (#129) --- SPEC.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SPEC.md b/SPEC.md index 9ee4976..425d531 100644 --- a/SPEC.md +++ b/SPEC.md @@ -78,8 +78,8 @@ foo 1 key="val" 3 { ### Identifier -A bare Identifier is composed of any unicode codepoint other than [non-initial -characters](#non-inidital-characters), followed by any number of unicode +A bare Identifier is composed of any Unicode codepoint other than [non-initial +characters](#non-initial-characters), followed by any number of Unicode codepoints other than [non-identifier characters](#non-identifier-characters), so long as this doesn't produce something confusable for a [Number](#number), [Boolean](#boolean), or [Null](#null).