wip parser

This commit is contained in:
Kat Marchán 2020-12-12 11:50:34 -08:00
parent e6eb4f342b
commit 3296cc83a0
No known key found for this signature in database
GPG Key ID: AEB529C08A3C7E9E
6 changed files with 159 additions and 11 deletions

View File

@ -5,3 +5,7 @@ description = "kat's document language"
authors = ["Kat Marchán <kzm@zkat.tech>"]
license-file = "LICENSE.md"
edition = "2018"
[dependencies]
nom = "6.0.1"
thiserror = "1.0.22"

View File

@ -106,11 +106,11 @@ to jump in and give us your 2 cents!
## Grammar
```
document := linespace* (node (newline document)?)?
document := linespace* (node (newline document)? linespace*)?
node := identifier (node-space node-argument)* (node-space node-document)?
node := identifier (node-space node-argument)* (node-space node-document)? single-line-comment?
node-argument := prop | value
node-document := '{' linespace* document linespace* '}'
node-document := '{' document '}'
node-space := ws* escline ws* | ws+
identifier := [a-zA-Z] [a-zA-Z0-9!#$%&'*+\-./:<>?@\^_|~]* | string
@ -136,7 +136,7 @@ binary := '0b' ('0' | '1') ('0' | '1' | '_')*
boolean := 'true' | 'false'
escline := '\\' newline
escline := '\\' ws* (single-line-comment | newline)
linespace := newline | ws | single-line-comment

48
src/error.rs Normal file
View File

@ -0,0 +1,48 @@
use nom::error::{ContextError, ParseError};
use thiserror::Error;
#[derive(Debug, Clone, Error)]
#[error("Error parsing document. {kind}")]
pub struct Error {
pub input: String,
pub offset: usize,
pub kind: ErrorKind,
}
#[derive(Debug, Clone, Error)]
pub enum ErrorKind {
#[error("Failed to parse {0} component of semver string.")]
Context(&'static str),
#[error("Incomplete input to semver parser.")]
IncompleteInput,
#[error("An unspecified error occurred.")]
Other,
}
#[derive(Debug)]
pub(crate) struct NodeParseError<I> {
pub(crate) input: I,
pub(crate) context: Option<&'static str>,
pub(crate) kind: Option<ErrorKind>,
}
impl<I> ParseError<I> for NodeParseError<I> {
fn from_error_kind(input: I, _kind: nom::error::ErrorKind) -> Self {
Self {
input,
context: None,
kind: None,
}
}
fn append(_input: I, _kind: nom::error::ErrorKind, other: Self) -> Self {
other
}
}
impl<I> ContextError<I> for NodeParseError<I> {
fn add_context(_input: I, ctx: &'static str, mut other: Self) -> Self {
other.context = Some(ctx);
other
}
}

View File

@ -1,7 +1,37 @@
#[cfg(test)]
mod tests {
#[test]
fn it_works() {
assert_eq!(2 + 2, 4);
}
}
use nom::combinator::all_consuming;
use nom::Err;
pub use crate::error::{Error, ErrorKind};
pub use crate::node::Node;
mod error;
mod node;
mod parser;
pub fn parse_document<I>(input: I) -> Result<Vec<Node>, Error>
where
I: AsRef<str>,
{
let input = &input.as_ref()[..];
match all_consuming(parser::nodes)(input) {
Ok((_, arg)) => Ok(arg),
Err(err) => Err(match err {
Err::Error(e) | Err::Failure(e) => Error {
input: input.into(),
offset: e.input.as_ptr() as usize - input.as_ptr() as usize,
kind: if let Some(kind) = e.kind {
kind
} else if let Some(ctx) = e.context {
ErrorKind::Context(ctx)
} else {
ErrorKind::Other
},
},
Err::Incomplete(_) => Error {
input: input.into(),
offset: input.len() - 1,
kind: ErrorKind::IncompleteInput,
},
}),
}
}

1
src/node.rs Normal file
View File

@ -0,0 +1 @@
pub struct Node;

65
src/parser.rs Normal file
View File

@ -0,0 +1,65 @@
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::combinator::{map, opt};
use nom::multi::{many0, many1};
use nom::sequence::{delimited, preceded};
use nom::IResult;
use crate::error::{ErrorKind, NodeParseError};
use crate::node::Node;
/// `document := linespace* (node (newline document)?)?`
pub(crate) fn nodes(input: &str) -> IResult<&str, Vec<Node>, NodeParseError<&str>> {
// TODO: this is wrong
many0(node)(input)
}
/// `node := identifier (node-space node-argument)* (node-space node-document)?`
pub(crate) fn node(input: &str) -> IResult<&str, Node, NodeParseError<&str>> {
let (input, tag) = identifier(input)?;
let (input, args) = many0(preceded(node_space, node_arg))(input)?;
let (input, children) = opt(preceded(node_space, node_children))(input)?;
todo!();
}
/// `identifier := [a-zA-Z] [a-zA-Z0-9!#$%&'*+\-./:<>?@\^_|~]* | string`
fn identifier(input: &str) -> IResult<&str, &str, NodeParseError<&str>> {
todo!()
}
/// `node-space := ws* escline ws* | ws+`
fn node_space(input: &str) -> IResult<&str, (), NodeParseError<&str>> {
alt((
delimited(many0(whitespace), escline, many0(whitespace)),
map(many1(whitespace), |_| ()),
))(input)
}
/// `single-line-comment := '//' ('\r' [^\n] | [^\r\n])* newline`
fn single_line_comment(input: &str) -> IResult<&str, &str, NodeParseError<&str>> {
todo!()
}
/// `multi-line-comment := '/*' ('*' [^\/] | [^*])* '*/'`
fn multi_line_comment(input: &str) -> IResult<&str, &str, NodeParseError<&str>> {
todo!()
}
/// `escline := '\\' ws* (single-line-comment | newline)`
fn escline(input: &str) -> IResult<&str, (), NodeParseError<&str>> {
let (input, _) = tag("\\")(input)?;
let (input, _) = many0(whitespace)(input)?;
let (input, _) = alt((single_line_comment, newline))(input)?;
Ok((input, ()))
}
/// `ws := bom | ' ' | '\t' | multi-line-comment`
fn whitespace(input: &str) -> IResult<&str, &str, NodeParseError<&str>> {
// TODO: bom?
alt((/*bom,*/ tag(" "), tag("\t"), multi_line_comment))(input)
}
/// `newline := ('\r' '\n') | '\n'`
fn newline(input: &str) -> IResult<&str, &str, NodeParseError<&str>> {
alt((tag("\r\n"), tag("\n")))(input)
}