diff --git a/Cargo.toml b/Cargo.toml index f56c59c..a49c92f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,3 +5,7 @@ description = "kat's document language" authors = ["Kat Marchán "] license-file = "LICENSE.md" edition = "2018" + +[dependencies] +nom = "6.0.1" +thiserror = "1.0.22" diff --git a/README.md b/README.md index 5560cb0..696a013 100644 --- a/README.md +++ b/README.md @@ -106,11 +106,11 @@ to jump in and give us your 2 cents! ## Grammar ``` -document := linespace* (node (newline document)?)? +document := linespace* (node (newline document)? linespace*)? -node := identifier (node-space node-argument)* (node-space node-document)? +node := identifier (node-space node-argument)* (node-space node-document)? single-line-comment? node-argument := prop | value -node-document := '{' linespace* document linespace* '}' +node-document := '{' document '}' node-space := ws* escline ws* | ws+ identifier := [a-zA-Z] [a-zA-Z0-9!#$%&'*+\-./:<>?@\^_|~]* | string @@ -136,7 +136,7 @@ binary := '0b' ('0' | '1') ('0' | '1' | '_')* boolean := 'true' | 'false' -escline := '\\' newline +escline := '\\' ws* (single-line-comment | newline) linespace := newline | ws | single-line-comment diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..dfa3de3 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,48 @@ +use nom::error::{ContextError, ParseError}; + +use thiserror::Error; +#[derive(Debug, Clone, Error)] +#[error("Error parsing document. {kind}")] +pub struct Error { + pub input: String, + pub offset: usize, + pub kind: ErrorKind, +} + +#[derive(Debug, Clone, Error)] +pub enum ErrorKind { + #[error("Failed to parse {0} component of semver string.")] + Context(&'static str), + #[error("Incomplete input to semver parser.")] + IncompleteInput, + #[error("An unspecified error occurred.")] + Other, +} + +#[derive(Debug)] +pub(crate) struct NodeParseError { + pub(crate) input: I, + pub(crate) context: Option<&'static str>, + pub(crate) kind: Option, +} + +impl ParseError for NodeParseError { + fn from_error_kind(input: I, _kind: nom::error::ErrorKind) -> Self { + Self { + input, + context: None, + kind: None, + } + } + + fn append(_input: I, _kind: nom::error::ErrorKind, other: Self) -> Self { + other + } +} + +impl ContextError for NodeParseError { + fn add_context(_input: I, ctx: &'static str, mut other: Self) -> Self { + other.context = Some(ctx); + other + } +} diff --git a/src/lib.rs b/src/lib.rs index 31e1bb2..30d1d30 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,37 @@ -#[cfg(test)] -mod tests { - #[test] - fn it_works() { - assert_eq!(2 + 2, 4); - } -} +use nom::combinator::all_consuming; +use nom::Err; + +pub use crate::error::{Error, ErrorKind}; +pub use crate::node::Node; + +mod error; +mod node; +mod parser; + +pub fn parse_document(input: I) -> Result, Error> +where + I: AsRef, +{ + let input = &input.as_ref()[..]; + match all_consuming(parser::nodes)(input) { + Ok((_, arg)) => Ok(arg), + Err(err) => Err(match err { + Err::Error(e) | Err::Failure(e) => Error { + input: input.into(), + offset: e.input.as_ptr() as usize - input.as_ptr() as usize, + kind: if let Some(kind) = e.kind { + kind + } else if let Some(ctx) = e.context { + ErrorKind::Context(ctx) + } else { + ErrorKind::Other + }, + }, + Err::Incomplete(_) => Error { + input: input.into(), + offset: input.len() - 1, + kind: ErrorKind::IncompleteInput, + }, + }), + } +} diff --git a/src/node.rs b/src/node.rs new file mode 100644 index 0000000..a47f03d --- /dev/null +++ b/src/node.rs @@ -0,0 +1 @@ +pub struct Node; diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..0941362 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,65 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::combinator::{map, opt}; +use nom::multi::{many0, many1}; +use nom::sequence::{delimited, preceded}; +use nom::IResult; + +use crate::error::{ErrorKind, NodeParseError}; +use crate::node::Node; + +/// `document := linespace* (node (newline document)?)?` +pub(crate) fn nodes(input: &str) -> IResult<&str, Vec, NodeParseError<&str>> { + // TODO: this is wrong + many0(node)(input) +} + +/// `node := identifier (node-space node-argument)* (node-space node-document)?` +pub(crate) fn node(input: &str) -> IResult<&str, Node, NodeParseError<&str>> { + let (input, tag) = identifier(input)?; + let (input, args) = many0(preceded(node_space, node_arg))(input)?; + let (input, children) = opt(preceded(node_space, node_children))(input)?; + todo!(); +} + +/// `identifier := [a-zA-Z] [a-zA-Z0-9!#$%&'*+\-./:<>?@\^_|~]* | string` +fn identifier(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { + todo!() +} + +/// `node-space := ws* escline ws* | ws+` +fn node_space(input: &str) -> IResult<&str, (), NodeParseError<&str>> { + alt(( + delimited(many0(whitespace), escline, many0(whitespace)), + map(many1(whitespace), |_| ()), + ))(input) +} + +/// `single-line-comment := '//' ('\r' [^\n] | [^\r\n])* newline` +fn single_line_comment(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { + todo!() +} + +/// `multi-line-comment := '/*' ('*' [^\/] | [^*])* '*/'` +fn multi_line_comment(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { + todo!() +} + +/// `escline := '\\' ws* (single-line-comment | newline)` +fn escline(input: &str) -> IResult<&str, (), NodeParseError<&str>> { + let (input, _) = tag("\\")(input)?; + let (input, _) = many0(whitespace)(input)?; + let (input, _) = alt((single_line_comment, newline))(input)?; + Ok((input, ())) +} + +/// `ws := bom | ' ' | '\t' | multi-line-comment` +fn whitespace(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { + // TODO: bom? + alt((/*bom,*/ tag(" "), tag("\t"), multi_line_comment))(input) +} + +/// `newline := ('\r' '\n') | '\n'` +fn newline(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { + alt((tag("\r\n"), tag("\n")))(input) +}