diff --git a/src/node.rs b/src/node.rs index a47f03d..8e6d80b 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1 +1,12 @@ -pub struct Node; +use std::collections::HashMap; + +#[derive(Debug, Clone)] +pub struct Node { + pub name: String, + pub values: Vec, + pub properties: HashMap, + pub children: Vec, +} + +#[derive(Debug, Clone)] +pub enum NodeValue {} diff --git a/src/parser.rs b/src/parser.rs index 0941362..58df343 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,12 +1,15 @@ +use std::collections::HashMap; + use nom::branch::alt; use nom::bytes::complete::tag; -use nom::combinator::{map, opt}; +use nom::character::complete::{alpha1, alphanumeric1, one_of}; +use nom::combinator::{map, not, opt, recognize}; use nom::multi::{many0, many1}; -use nom::sequence::{delimited, preceded}; +use nom::sequence::{delimited, pair, preceded}; use nom::IResult; use crate::error::{ErrorKind, NodeParseError}; -use crate::node::Node; +use crate::node::{Node, NodeValue}; /// `document := linespace* (node (newline document)?)?` pub(crate) fn nodes(input: &str) -> IResult<&str, Vec, NodeParseError<&str>> { @@ -14,16 +17,70 @@ pub(crate) fn nodes(input: &str) -> IResult<&str, Vec, NodeParseError<&str many0(node)(input) } +#[derive(Clone)] +enum NodeArg<'a> { + Value(NodeValue), + Property(&'a str, NodeValue), +} + /// `node := identifier (node-space node-argument)* (node-space node-document)?` pub(crate) fn node(input: &str) -> IResult<&str, Node, NodeParseError<&str>> { let (input, tag) = identifier(input)?; let (input, args) = many0(preceded(node_space, node_arg))(input)?; let (input, children) = opt(preceded(node_space, node_children))(input)?; - todo!(); + let (values, properties): (Vec, Vec) = args + .into_iter() + .partition(|arg| matches!(arg, NodeArg::Value(_))); + Ok(( + input, + Node { + name: tag.into(), + children: children.unwrap_or_else(Vec::new), + values: values + .into_iter() + .map(|arg| match arg { + NodeArg::Value(val) => val, + _ => unreachable!(), + }) + .collect(), + properties: properties.into_iter().fold(HashMap::new(), |mut acc, arg| { + match arg { + NodeArg::Property(key, value) => { + acc.insert(String::from(key), value); + } + _ => unreachable!(), + } + acc + }), + }, + )) } -/// `identifier := [a-zA-Z] [a-zA-Z0-9!#$%&'*+\-./:<>?@\^_|~]* | string` +/// `identifier := [a-zA-Z_] [a-zA-Z0-9!#$%&'*+\-./:<>?@\^_|~]* | string` fn identifier(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { + alt(( + recognize(pair( + alt((alpha1, tag("_"))), + many0(alt(( + alphanumeric1, + recognize(one_of("~!@#$%^&*-_+./:<>?")), + ))), + )), + string, + ))(input) +} + +fn node_arg(input: &str) -> IResult<&str, NodeArg, NodeParseError<&str>> { + todo!() +} + +fn node_children(input: &str) -> IResult<&str, Vec, NodeParseError<&str>> { + todo!() +} + +// TODO: This should be much more specific about what escapes are allowed. +/// `string := '"' ('\\' ["\\] | [^"])* '"'` +fn string(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { todo!() } @@ -37,12 +94,18 @@ fn node_space(input: &str) -> IResult<&str, (), NodeParseError<&str>> { /// `single-line-comment := '//' ('\r' [^\n] | [^\r\n])* newline` fn single_line_comment(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { - todo!() + let (input, _) = tag("//")(input)?; + let (input, comment) = recognize(many0(alt(( + preceded(tag("\r"), not(tag("\n"))), + not(tag("\r\n")), + ))))(input)?; + let (input, _) = newline(input)?; + Ok((input, comment)) } /// `multi-line-comment := '/*' ('*' [^\/] | [^*])* '*/'` fn multi_line_comment(input: &str) -> IResult<&str, &str, NodeParseError<&str>> { - todo!() + delimited(tag("/*"), recognize(many0(not(tag("*/")))), tag("*/"))(input) } /// `escline := '\\' ws* (single-line-comment | newline)`