From 6d1a516eb92415f99f7a5170ac61ce3252d6a4b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Sun, 4 Dec 2022 19:06:50 -0800 Subject: [PATCH] feat(kql): implement KQL query engine (#61) Fixes: https://github.com/kdl-org/kdl-rs/issues/32 This implements a proposed draft of a KQL engine, allowing CSS selector-style querying of KDL documents (and nodes) in a variety of different ways. --- Cargo.toml | 8 +- examples/insert-node.rs | 2 +- src/document.rs | 77 +++++- src/entry.rs | 14 +- src/lib.rs | 3 + src/node.rs | 59 +++- src/parser.rs | 8 +- src/query.rs | 334 +++++++++++++++++++++++ src/query_parser.rs | 591 ++++++++++++++++++++++++++++++++++++++++ src/value.rs | 37 ++- tests/query_api.rs | 182 +++++++++++++ tests/query_matchers.rs | 314 +++++++++++++++++++++ tests/query_ops.rs | 321 ++++++++++++++++++++++ tests/query_syntax.rs | 56 ++++ 14 files changed, 1993 insertions(+), 13 deletions(-) create mode 100644 src/query.rs create mode 100644 src/query_parser.rs create mode 100644 tests/query_api.rs create mode 100644 tests/query_matchers.rs create mode 100644 tests/query_ops.rs create mode 100644 tests/query_syntax.rs diff --git a/Cargo.toml b/Cargo.toml index 00108bf..1b5a292 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,10 @@ default = ["span"] span = [] [dependencies] -miette = "5.3.0" -nom = { version = "7.1.1", default-features = false } +miette = "5.5.0" +nom = "7.1.1" thiserror = "1.0.30" + +[dev-dependencies] +miette = { version = "5.5.0", features = ["fancy"] } +pretty_assertions = "1.3.0" diff --git a/examples/insert-node.rs b/examples/insert-node.rs index fe008bf..d744ce4 100644 --- a/examples/insert-node.rs +++ b/examples/insert-node.rs @@ -29,7 +29,7 @@ words { word_nodes.sort_by(sort_by_name); words_section.fmt(); - println!("{}", doc.to_string()); + println!("{}", doc); // output: // words { diff --git a/src/document.rs b/src/document.rs index ad5b9ad..85f1b46 100644 --- a/src/document.rs +++ b/src/document.rs @@ -2,7 +2,7 @@ use miette::SourceSpan; use std::{fmt::Display, str::FromStr}; -use crate::{parser, KdlError, KdlNode, KdlValue}; +use crate::{parser, IntoKdlQuery, KdlError, KdlNode, KdlQueryIterator, KdlValue, NodeKey}; /// Represents a KDL /// [`Document`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#document). @@ -18,7 +18,7 @@ use crate::{parser, KdlError, KdlNode, KdlValue}; /// # use kdl::KdlDocument; /// let kdl: KdlDocument = "foo 1 2 3\nbar 4 5 6".parse().expect("parse failed"); /// ``` -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Eq)] pub struct KdlDocument { pub(crate) leading: Option, pub(crate) nodes: Vec, @@ -36,6 +36,15 @@ impl PartialEq for KdlDocument { } } +impl std::hash::Hash for KdlDocument { + fn hash(&self, state: &mut H) { + self.leading.hash(state); + self.nodes.hash(state); + self.trailing.hash(state); + // Intentionally omitted: self.span.hash(state) + } +} + impl Default for KdlDocument { fn default() -> Self { Self { @@ -241,6 +250,70 @@ impl KdlDocument { pub fn fmt_no_comments(&mut self) { self.fmt_impl(0, true); } + + /// Queries this Document's children according to the KQL query language, + /// returning an iterator over all matching nodes. + /// + /// # NOTE + /// + /// Any query selectors that try to select the toplevel `scope()` will + /// fail to match when using this method, since there's no [`KdlNode`] to + /// return in this case. + pub fn query_all(&self, query: impl IntoKdlQuery) -> Result, KdlError> { + let parsed = query.into_query()?; + Ok(KdlQueryIterator::new(None, Some(self), parsed)) + } + + /// Queries this Document's children according to the KQL query language, + /// returning the first match, if any. + /// + /// # NOTE + /// + /// Any query selectors that try to select the toplevel `scope()` will + /// fail to match when using this method, since there's no [`KdlNode`] to + /// return in this case. + pub fn query(&self, query: impl IntoKdlQuery) -> Result, KdlError> { + let mut iter = self.query_all(query)?; + Ok(iter.next()) + } + + /// Queries this Document's children according to the KQL query language, + /// picking the first match, and calling `.get(key)` on it, if the query + /// succeeded. + /// + /// # NOTE + /// + /// Any query selectors that try to select the toplevel `scope()` will + /// fail to match when using this method, since there's no [`KdlNode`] to + /// return in this case. + pub fn query_get( + &self, + query: impl IntoKdlQuery, + key: impl Into, + ) -> Result, KdlError> { + Ok(self.query(query)?.and_then(|node| node.get(key))) + } + + /// Queries this Document's children according to the KQL query language, + /// returning an iterator over all matching nodes, returning the requested + /// field from each of those nodes and filtering out nodes that don't have + /// it. + /// + /// # NOTE + /// + /// Any query selectors that try to select the toplevel `scope()` will + /// fail to match when using this method, since there's no [`KdlNode`] to + /// return in this case. + pub fn query_get_all( + &self, + query: impl IntoKdlQuery, + key: impl Into, + ) -> Result, KdlError> { + let key: NodeKey = key.into(); + Ok(self + .query_all(query)? + .filter_map(move |node| node.get(key.clone()))) + } } impl Display for KdlDocument { diff --git a/src/entry.rs b/src/entry.rs index 7d8e43d..e867afe 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -8,7 +8,7 @@ use crate::{parser, KdlError, KdlIdentifier, KdlValue}; /// [`Argument`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#argument) or /// a (key/value) /// [`Property`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#property) -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Eq)] pub struct KdlEntry { pub(crate) leading: Option, pub(crate) ty: Option, @@ -32,6 +32,18 @@ impl PartialEq for KdlEntry { } } +impl std::hash::Hash for KdlEntry { + fn hash(&self, state: &mut H) { + self.leading.hash(state); + self.ty.hash(state); + self.value.hash(state); + self.value_repr.hash(state); + self.name.hash(state); + self.trailing.hash(state); + // intentionally omitted: self.span.hash(state) + } +} + impl KdlEntry { /// Creates a new Argument (positional) KdlEntry. pub fn new(value: impl Into) -> Self { diff --git a/src/lib.rs b/src/lib.rs index dbcb4b0..30c6af0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -136,6 +136,7 @@ pub use entry::*; pub use error::*; pub use identifier::*; pub use node::*; +pub use query::*; pub use value::*; mod document; @@ -146,4 +147,6 @@ mod identifier; mod node; mod nom_compat; mod parser; +mod query; +mod query_parser; mod value; diff --git a/src/node.rs b/src/node.rs index b478bc9..a499ffa 100644 --- a/src/node.rs +++ b/src/node.rs @@ -7,12 +7,15 @@ use std::{ #[cfg(feature = "span")] use miette::SourceSpan; -use crate::{parser, KdlDocument, KdlEntry, KdlError, KdlIdentifier, KdlValue}; +use crate::{ + parser, IntoKdlQuery, KdlDocument, KdlEntry, KdlError, KdlIdentifier, KdlQueryIterator, + KdlValue, +}; /// Represents an individual KDL /// [`Node`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#node) inside a /// KDL Document. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Eq)] pub struct KdlNode { pub(crate) leading: Option, pub(crate) ty: Option, @@ -39,6 +42,19 @@ impl PartialEq for KdlNode { } } +impl std::hash::Hash for KdlNode { + fn hash(&self, state: &mut H) { + self.leading.hash(state); + self.ty.hash(state); + self.name.hash(state); + self.entries.hash(state); + self.before_children.hash(state); + self.children.hash(state); + self.trailing.hash(state); + // Intentionally omitted: self.span.hash(state); + } +} + impl KdlNode { /// Creates a new KdlNode with a given name. pub fn new(name: impl Into) -> Self { @@ -414,6 +430,45 @@ impl KdlNode { pub fn fmt_no_comments(&mut self) { self.fmt_impl(0, true); } + + /// Queries this Node according to the KQL query language, + /// returning an iterator over all matching nodes. + pub fn query_all(&self, query: impl IntoKdlQuery) -> Result, KdlError> { + let q = query.into_query()?; + Ok(KdlQueryIterator::new(Some(self), None, q)) + } + + /// Queries this Node according to the KQL query language, + /// returning the first match, if any. + pub fn query(&self, query: impl IntoKdlQuery) -> Result, KdlError> { + Ok(self.query_all(query)?.next()) + } + + /// Queries this Node according to the KQL query language, + /// picking the first match, and calling `.get(key)` on it, if the query + /// succeeded. + pub fn query_get( + &self, + query: impl IntoKdlQuery, + key: impl Into, + ) -> Result, KdlError> { + Ok(self.query(query)?.and_then(|node| node.get(key))) + } + + /// Queries this Node according to the KQL query language, + /// returning an iterator over all matching nodes, returning the requested + /// field from each of those nodes and filtering out nodes that don't have + /// it. + pub fn query_get_all( + &self, + query: impl IntoKdlQuery, + key: impl Into, + ) -> Result, KdlError> { + let key: NodeKey = key.into(); + Ok(self + .query_all(query)? + .filter_map(move |node| node.get(key.clone()))) + } } /// Represents a [`KdlNode`]'s entry key. diff --git a/src/parser.rs b/src/parser.rs index 719c241..74be3ed 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -89,7 +89,7 @@ impl<'a> KdlParser<'a> { /// /// Note that substr must be a literal substring, as in it must be /// a pointer into the same string! - fn span_from_substr(&self, substr: &str) -> SourceSpan { + pub(crate) fn span_from_substr(&self, substr: &str) -> SourceSpan { let base_addr = self.full_input.as_ptr() as usize; let substr_addr = substr.as_ptr() as usize; assert!( @@ -388,7 +388,7 @@ fn argument<'a: 'b, 'b>( } } -fn value(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { +pub(crate) fn value(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> { alt(( null, boolean, @@ -416,7 +416,7 @@ fn children<'a: 'b, 'b>( } } -fn annotation<'a: 'b, 'b>( +pub(crate) fn annotation<'a: 'b, 'b>( kdl_parser: &'b KdlParser<'a>, ) -> impl Fn(&'a str) -> IResult<&'a str, KdlIdentifier, KdlParseError<&'a str>> + 'b { move |input| { @@ -471,7 +471,7 @@ fn escline(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { ))(input).map_err(|e| set_details(e, input, Some("line escape starts here"), Some("line escapes can only be followed by whitespace plus a newline (or single-line comment)."))) } -fn unicode_space(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { +pub(crate) fn unicode_space(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { alt(( tag(" "), tag("\t"), diff --git a/src/query.rs b/src/query.rs new file mode 100644 index 0000000..c41a35a --- /dev/null +++ b/src/query.rs @@ -0,0 +1,334 @@ +use std::{collections::VecDeque, str::FromStr, sync::Arc}; + +use crate::{query_parser::KdlQueryParser, KdlDocument, KdlError, KdlNode, KdlValue}; + +/// A parsed KQL query. For details on the syntax, see the [KQL +/// spec](https://github.com/kdl-org/kdl/blob/main/QUERY-SPEC.md). +#[derive(Debug, Clone, PartialEq)] +pub struct KdlQuery(pub(crate) Vec); + +impl FromStr for KdlQuery { + type Err = KdlError; + + fn from_str(s: &str) -> Result { + let parser = KdlQueryParser::new(s); + parser.parse(crate::query_parser::query(&parser)) + } +} + +/// A trait that tries to convert something into a [`KdlQuery`]. +pub trait IntoKdlQuery: IntoQuerySealed {} + +impl IntoKdlQuery for KdlQuery {} +impl IntoKdlQuery for String {} +impl<'a> IntoKdlQuery for &'a str {} +impl<'a> IntoKdlQuery for &'a String {} + +#[doc(hidden)] +pub trait IntoQuerySealed { + fn into_query(self) -> Result; +} + +impl IntoQuerySealed for KdlQuery { + fn into_query(self) -> Result { + Ok(self) + } +} + +impl IntoQuerySealed for &str { + fn into_query(self) -> Result { + self.parse() + } +} + +impl IntoQuerySealed for String { + fn into_query(self) -> Result { + self.parse() + } +} + +impl IntoQuerySealed for &String { + fn into_query(self) -> Result { + self.parse() + } +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct KdlQuerySelector(pub(crate) Vec); + +impl KdlQuerySelector { + fn matches(&self, crumb: Arc>, scope: Option<&KdlNode>) -> bool { + if self.0.is_empty() { + // I don't think this is possible, but just in case. + return false; + } + + let mut segments = self.0.iter().rev(); + let end = segments + .next() + .expect("This should've had at least one item."); + + // When doing a query from a node, instead of a document, we have to + // skip matching on the node itself, unless the query is just + // `scope()`. + if let Some(scope) = &scope { + // We're in node-query mode. + if crumb.next.is_none() { + // We're almost definitely looking at the scope node itself, + // but just check. We'll do no further processing. + if end.is_scope() { + return crumb.node == *scope; + } else { + return false; + } + } + } + + if !end.matcher.matches(crumb.node) { + // If the final segment doesn't even match the node, don't bother + // looking any further. + return false; + } + + let mut node = crumb.node; + let mut next = crumb.next.clone(); + let mut parent_doc = crumb.parent_doc; + 'segments: for segment in segments { + use KdlSegmentCombinator::*; + match segment.op.as_ref().expect("This should've had an op.") { + Child | Descendant => { + while let Some(crumb) = next.clone() { + if segment.matcher.matches(crumb.node) { + continue 'segments; + } + + // We only loop once if the op is `Child`. Otherwise, + // we keep going up the tree! + if segment.op == Some(Child) { + break; + } + + next = crumb.next.clone(); + if let Some(crumb) = &next { + node = crumb.node; + } + parent_doc = crumb.parent_doc; + } + + if segment.is_scope() { + return next.map(|crumb| crumb.node) == scope; + } + + return false; + } + Neighbor | Sibling => { + if let Some(parent) = &parent_doc { + for n in parent + .nodes() + .iter() + .rev() + .skip_while(|n| !std::ptr::eq(*n, node)) + .skip(1) + { + if segment.matcher.matches(n) { + node = n; + continue 'segments; + } + if segment.op == Some(Neighbor) { + break; + } + } + } + return false; + } + } + } + + true + } +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct KdlQuerySelectorSegment { + pub(crate) op: Option, + pub(crate) matcher: KdlQueryMatcher, +} + +impl KdlQuerySelectorSegment { + fn is_scope(&self) -> bool { + self.matcher.0.len() == 1 && self.matcher.0[0].accessor == KdlQueryMatcherAccessor::Scope + } +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum KdlSegmentCombinator { + Child, + Descendant, + Neighbor, + Sibling, +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct KdlQueryMatcher(pub(crate) Vec); + +impl KdlQueryMatcher { + pub(crate) fn matches(&self, node: &KdlNode) -> bool { + self.0.iter().all(|m| m.matches(node)) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct KdlQueryMatcherDetails { + pub(crate) accessor: KdlQueryMatcherAccessor, + pub(crate) op: KdlQueryAttributeOp, + pub(crate) value: Option, +} + +impl KdlQueryMatcherDetails { + pub(crate) fn matches(&self, node: &KdlNode) -> bool { + use KdlQueryAttributeOp::*; + use KdlQueryMatcherAccessor::*; + + match (&self.accessor, &self.op, &self.value) { + (Scope, _, _) => false, + (Annotation | Node, op, Some(KdlValue::String(s) | KdlValue::RawString(s))) => { + let lhs = match &self.accessor { + Annotation => node.ty().map(|ty| ty.value()), + Node => Some(node.name().value()), + _ => unreachable!(), + }; + let ss = Some(&s[..]); + match op { + Equal => lhs == ss, + NotEqual => lhs != ss, + Gt => lhs > ss, + Gte => lhs >= ss, + Lt => lhs < ss, + Lte => lhs <= ss, + StartsWith => lhs.map(|lhs| lhs.starts_with(s)).unwrap_or(false), + EndsWith => lhs.map(|lhs| lhs.ends_with(s)).unwrap_or(false), + Contains => lhs.map(|lhs| lhs.contains(s)).unwrap_or(false), + } + } + (Annotation | Node, _op, Some(_)) => false, + // This is `()blah`. + (Annotation, _, None) => node.ty().is_some(), + // This is `[]`. + (Node, _, None) => true, + (Arg(_) | Prop(_), op, val @ Some(_)) => { + let val = val.as_ref(); + let lhs = match &self.accessor { + Arg(Some(idx)) => node.get(*idx), + Arg(None) => node.get(0), + Prop(name) => node.get(&name[..]), + _ => unreachable!(), + }; + match &op { + Equal => lhs == val, + NotEqual => lhs != val, + Gt => lhs > val, + Gte => lhs >= val, + Lt => lhs < val, + Lte => lhs <= val, + StartsWith | EndsWith | Contains => { + unreachable!("This should have been caught by the parser") + } + } + } + (Arg(_) | Prop(_), _op, None) => match &self.accessor { + Arg(Some(idx)) => node.get(*idx).is_some(), + Arg(None) => node.get(0).is_some(), + Prop(name) => node.get(&name[..]).is_some(), + _ => unreachable!(), + }, + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum KdlQueryAttributeOp { + Equal, + NotEqual, + Gt, + Gte, + Lt, + Lte, + StartsWith, + EndsWith, + Contains, +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum KdlQueryMatcherAccessor { + Scope, + Node, + Annotation, + Arg(Option), + Prop(String), +} +#[derive(Debug, Clone)] +struct Breadcrumb<'a> { + node: &'a KdlNode, + parent_doc: Option<&'a KdlDocument>, + next: Option>>, +} + +/// Iterator for results of a KDL query over a [`KdlDocument`]. +#[derive(Debug, Clone)] +pub struct KdlQueryIterator<'a> { + scope: Option<&'a KdlNode>, + query: KdlQuery, + q: VecDeque>>, +} + +impl<'a> KdlQueryIterator<'a> { + pub(crate) fn new( + scope: Option<&'a KdlNode>, + ctx_doc: Option<&'a KdlDocument>, + query: KdlQuery, + ) -> Self { + let mut q = VecDeque::new(); + if let Some(scope) = scope { + q.push_back(Arc::new(Breadcrumb { + node: scope, + parent_doc: None, + next: None, + })); + } else if let Some(doc) = ctx_doc { + for node in doc.nodes() { + q.push_front(Arc::new(Breadcrumb { + node, + parent_doc: Some(doc), + next: None, + })); + } + } + Self { scope, query, q } + } +} + +impl<'a> Iterator for KdlQueryIterator<'a> { + type Item = &'a KdlNode; + + fn next(&mut self) -> Option { + while let Some(crumb) = self.q.pop_back() { + if let Some(children) = crumb.node.children() { + for node in children.nodes().iter().rev() { + self.q.push_back(Arc::new(Breadcrumb { + node, + parent_doc: Some(children), + next: Some(crumb.clone()), + })); + } + } + for selector in &self.query.0 { + if selector.matches(crumb.clone(), self.scope) { + return Some(crumb.node); + } + } + } + + // Otherwise, we're done! Just return None and the iterator's done. + None + } +} diff --git a/src/query_parser.rs b/src/query_parser.rs new file mode 100644 index 0000000..23d482d --- /dev/null +++ b/src/query_parser.rs @@ -0,0 +1,591 @@ +use crate::nom_compat::many0; +use crate::parser::{value, KdlParser}; +use crate::query::{ + KdlQuery, KdlQueryAttributeOp, KdlQueryMatcher, KdlQueryMatcherAccessor, + KdlQueryMatcherDetails, KdlQuerySelector, KdlQuerySelectorSegment, KdlSegmentCombinator, +}; +use crate::{KdlError, KdlErrorKind, KdlParseError, KdlValue}; +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::combinator::{all_consuming, cut, map, opt, recognize}; +use nom::error::context; +use nom::multi::separated_list1; +use nom::sequence::{delimited, preceded, terminated}; +use nom::{Finish, IResult, Offset, Parser}; + +pub(crate) struct KdlQueryParser<'a>(KdlParser<'a>); + +impl<'a> KdlQueryParser<'a> { + pub(crate) fn new(full_input: &'a str) -> Self { + Self(KdlParser::new(full_input)) + } + + pub(crate) fn parse(&self, parser: P) -> Result + where + P: Parser<&'a str, T, KdlParseError<&'a str>>, + { + all_consuming(parser)(self.0.full_input) + .finish() + .map(|(_, arg)| arg) + .map_err(|e| { + let span_substr = &e.input[..e.len]; + KdlError { + input: self.0.full_input.into(), + span: self.0.span_from_substr(span_substr), + help: if let Some(help) = e.help { + Some(help) + } else if e.kind.is_none() && e.context.is_none() { + Some("The general syntax for queries is '(type)nodename[prop=value], anothernode, etc'. For more details, please see https://github.com/kdl-org/kdl/blob/main/QUERY-SPEC.md") + } else { + None + }, + label: e.label, + kind: if let Some(kind) = e.kind { + kind + } else if let Some(ctx) = e.context { + KdlErrorKind::Context(ctx) + } else { + KdlErrorKind::Context("a valid KQL query") + }, + } + }) + } +} + +fn set_details<'a>( + mut err: nom::Err>, + start: &'a str, + label: Option<&'static str>, + help: Option<&'static str>, +) -> nom::Err> { + match &mut err { + nom::Err::Error(e) | nom::Err::Failure(e) => { + if !e.touched { + e.len = start.offset(e.input); + e.input = start; + e.label = label; + e.help = help; + e.touched = true; + } + } + _ => {} + } + err +} + +pub(crate) fn query<'a: 'b, 'b>( + kdl_parser: &'b KdlQueryParser<'a>, +) -> impl FnMut(&'a str) -> IResult<&'a str, KdlQuery, KdlParseError<&'a str>> + 'b { + map( + separated_list1( + delimited(whitespace, tag(","), whitespace), + query_selector(kdl_parser), + ), + KdlQuery, + ) +} + +fn query_selector<'a: 'b, 'b>( + kdl_parser: &'b KdlQueryParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlQuerySelector, KdlParseError<&'a str>> + 'b { + move |input| { + let mut segments = Vec::new(); + let mut is_scope = true; + let mut input = input; + loop { + let (inp, _) = whitespace(input)?; + input = inp; + let (inp, matchers) = node_matchers(kdl_parser, is_scope)(input)?; + input = inp; + let (inp, _) = whitespace(input)?; + input = inp; + let (inp, op) = opt(segment_combinator)(input)?; + input = inp; + let is_last = op.is_none(); + segments.push(KdlQuerySelectorSegment { + op, + matcher: KdlQueryMatcher(matchers), + }); + if is_last { + break; + } + is_scope = false; + } + let (input, _) = whitespace(input)?; + Ok((input, KdlQuerySelector(segments))) + } +} + +fn segment_combinator(input: &str) -> IResult<&str, KdlSegmentCombinator, KdlParseError<&str>> { + alt(( + map(tag(">>"), |_| KdlSegmentCombinator::Descendant), + map(tag(">"), |_| KdlSegmentCombinator::Child), + map(tag("++"), |_| KdlSegmentCombinator::Sibling), + map(tag("+"), |_| KdlSegmentCombinator::Neighbor), + ))(input) +} + +fn node_matchers<'a: 'b, 'b>( + kdl_parser: &'b KdlQueryParser<'a>, + is_scope: bool, +) -> impl Fn(&'a str) -> IResult<&'a str, Vec, KdlParseError<&'a str>> + 'b +{ + move |input| { + let mut matchers = Vec::new(); + + let (input, _) = whitespace(input)?; + + let start = input; + let (input, scope) = opt(scope_accessor)(input)?; + if let Some(xsr) = scope { + if is_scope { + matchers.push(KdlQueryMatcherDetails { + op: KdlQueryAttributeOp::Equal, + accessor: xsr, + value: None, + }); + return Ok((input, matchers)); + } else { + return Err(nom::Err::Error(KdlParseError { + input: start, + len: start.len() - input.len(), + kind: None, + label: Some("scope()"), + help: Some("Make sure scope() precedes any other items within a (comma-separated) selector."), + touched: false, + context: Some("scope() to be the first item in this selector"), + })); + } + } + + let (input, details) = opt(annotation_matcher(kdl_parser))(input)?; + if let Some(details) = details { + matchers.push(details); + let start = input; + let (input, typed) = opt(annotation_matcher(kdl_parser))(input)?; + if typed.is_some() { + return Err(nom::Err::Error(KdlParseError { + input: start, + len: start.len() - input.len(), + kind: None, + label: Some("type annotation"), + help: Some("The syntax for node selectors is (type)node[attribute=value]."), + touched: false, + context: Some("only one type annotation per selector"), + })); + } + } + + let (input, node) = opt(crate::parser::identifier(&kdl_parser.0))(input)?; + if let Some(node) = node { + matchers.push(KdlQueryMatcherDetails { + op: KdlQueryAttributeOp::Equal, + value: Some(KdlValue::String(node.value().to_owned())), + accessor: KdlQueryMatcherAccessor::Node, + }); + } + + let start = input; + let (input, typed) = opt(annotation_matcher(kdl_parser))(input)?; + if typed.is_some() { + return Err(nom::Err::Error(KdlParseError { + input: start, + len: start.len() - input.len(), + kind: None, + label: Some("type annotation"), + help: Some("The syntax for node selectors is (type)node[attribute=value]."), + touched: false, + context: Some("type annotation to not be used after a node name"), + })); + } + + let start = input; + let (input, mut attribute_matchers) = many0(attribute_matcher(kdl_parser))(input)?; + matchers.append(&mut attribute_matchers); + + if matchers.is_empty() { + Err(nom::Err::Error(KdlParseError { + input: start, + len: 0, + kind: None, + label: Some("node matcher"), + help: Some("node matcher must not be empty"), + touched: false, + context: Some("a valid node matcher"), + })) + } else { + // Check for trailing type annotations. + let start = input; + let (end, typed) = opt(annotation_matcher(kdl_parser))(input)?; + if typed.is_some() { + return Err(nom::Err::Error(KdlParseError { + input: start, + len: start.len() - end.len(), + kind: None, + label: Some("type annotation"), + help: Some("The syntax for node selectors is (type)node[attribute=value]."), + touched: false, + context: Some("type annotation to come before attribute matcher(s)"), + })); + } + + // Check for trailing node name matcher. + let (end, ident) = opt(crate::parser::identifier(&kdl_parser.0))(input)?; + if ident.is_some() { + return Err(nom::Err::Error(KdlParseError { + input: start, + len: start.len() - end.len(), + kind: None, + label: Some("node name"), + help: Some("The syntax for node selectors is (type)node[attribute=value]."), + touched: false, + context: Some("node name to come before attribute matcher(s)"), + })); + } + + Ok((input, matchers)) + } + } +} + +fn attribute_matcher<'a: 'b, 'b>( + kdl_parser: &'b KdlQueryParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlQueryMatcherDetails, KdlParseError<&'a str>> + 'b { + move |input| { + let start = input; + let (input, _) = tag("[")(input)?; + let (input, _) = whitespace(input)?; + let (input, matcher) = attribute_matcher_inner(kdl_parser)(input)?; + let (input, _) = whitespace(input)?; + let (input, _) = context("a closing ']' for this attribute matcher", cut(tag("]")))(input) + .map_err(|e| set_details(e, start, Some("partial attribute matcher"), None))?; + + Ok((input, matcher)) + } +} + +fn attribute_matcher_inner<'a: 'b, 'b>( + kdl_parser: &'b KdlQueryParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlQueryMatcherDetails, KdlParseError<&'a str>> + 'b { + move |input| { + let (input, xsr) = opt(accessor(kdl_parser))(input)?; + if let Some(xsr) = xsr { + let (input, _) = whitespace(input)?; + let (input, op) = opt(attribute_op)(input)?; + let (input, _) = whitespace(input)?; + if let Some(op) = op { + let prev = input; + let (input, val) = opt(crate::parser::value)(input)?; + // Make sure it's a syntax error to try and use string + // operators with non-string arguments. + if let Some((_, value)) = val { + if matches!( + op, + KdlQueryAttributeOp::StartsWith + | KdlQueryAttributeOp::EndsWith + | KdlQueryAttributeOp::Contains + ) { + if value.is_string() { + Ok(( + input, + KdlQueryMatcherDetails { + op, + value: Some(value), + accessor: xsr, + }, + )) + } else { + Err(nom::Err::Failure(KdlParseError { + input: prev, + len: prev.len() - input.len(), + kind: None, + label: Some("non-string operator value"), + help: Some("Only strings can be used as arguments for string-related operators (*=, ^=, $=)."), + touched: false, + context: Some("a string as an operator value"), + })) + } + } else { + Ok(( + input, + KdlQueryMatcherDetails { + op, + value: Some(value), + accessor: xsr, + }, + )) + } + } else { + Err(nom::Err::Failure(KdlParseError { + input: prev, + len: 0, + kind: None, + label: Some("operator value"), + help: Some("Only valid KDL values can be used on the right hand side of attribute matcher operators."), + touched: false, + context: Some("a valid operator argument"), + })) + } + } else { + Ok(( + input, + KdlQueryMatcherDetails { + op: KdlQueryAttributeOp::Equal, + value: None, + accessor: xsr, + }, + )) + } + } else { + Ok(( + input, + KdlQueryMatcherDetails { + op: KdlQueryAttributeOp::Equal, + value: None, + accessor: KdlQueryMatcherAccessor::Node, + }, + )) + } + } +} + +fn attribute_op(input: &str) -> IResult<&str, KdlQueryAttributeOp, KdlParseError<&str>> { + alt(( + map(tag("="), |_| KdlQueryAttributeOp::Equal), + map(tag("!="), |_| KdlQueryAttributeOp::NotEqual), + map(tag(">"), |_| KdlQueryAttributeOp::Gt), + map(tag(">="), |_| KdlQueryAttributeOp::Gte), + map(tag("<"), |_| KdlQueryAttributeOp::Lt), + map(tag("<="), |_| KdlQueryAttributeOp::Lte), + map(tag("^="), |_| KdlQueryAttributeOp::StartsWith), + map(tag("$="), |_| KdlQueryAttributeOp::EndsWith), + map(tag("*="), |_| KdlQueryAttributeOp::Contains), + ))(input) +} + +fn annotation_matcher<'a: 'b, 'b>( + kdl_parser: &'b KdlQueryParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlQueryMatcherDetails, KdlParseError<&'a str>> + 'b { + move |input| { + let start = input; + let (input, _) = tag("(")(input)?; + let (input, _) = whitespace(input)?; + let (input, ty) = opt(crate::parser::identifier(&kdl_parser.0))(input)?; + let (input, _) = context("closing ')' for type annotation", cut(tag(")")))(input) + .map_err(|e| set_details(e, start, Some("annotation"), Some("annotations can only be KDL identifiers (including string identifiers), and can't have any space inside the parentheses.")))?; + Ok(( + input, + KdlQueryMatcherDetails { + op: KdlQueryAttributeOp::Equal, + value: ty.map(|ident| KdlValue::String(ident.value().to_owned())), + accessor: KdlQueryMatcherAccessor::Annotation, + }, + )) + } +} + +fn scope_accessor(input: &str) -> IResult<&str, KdlQueryMatcherAccessor, KdlParseError<&str>> { + let start = input; + let (input, _) = tag("scope(")(input)?; + let (input, _) = context( + "a valid scope accessor", + cut(preceded(whitespace, tag(")"))), + )(input) + .map_err(|e| set_details(e, start, Some("partial scope accessor"), None))?; + Ok((input, KdlQueryMatcherAccessor::Scope)) +} + +fn accessor<'a: 'b, 'b>( + kdl_parser: &'b KdlQueryParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlQueryMatcherAccessor, KdlParseError<&'a str>> + 'b { + move |input| { + let (input, accessor) = alt(( + type_accessor, + arg_accessor, + prop_accessor(kdl_parser), + prop_name_accessor(kdl_parser), + bad_accessor(kdl_parser), + ))(input)?; + + Ok((input, accessor)) + } +} + +fn type_accessor(input: &str) -> IResult<&str, KdlQueryMatcherAccessor, KdlParseError<&str>> { + let start = input; + let (input, _) = tag("type")(input)?; + let (input, _) = context( + "an opening '(' for a 'type()' accessor", + preceded(whitespace, tag("(")), + )(input) + .map_err(|e| set_details(e, start, Some("partial type accessor"), None))?; + let (input, _) = context( + "a closing ')' for this 'type()' accessor", + cut(preceded(whitespace, tag(")"))), + )(input) + .map_err(|e| { + set_details( + e, + start, + Some("partial type accessor"), + Some("type() accessors don't take any arguments. Use e.g. [type() = \"foo\"] instead."), + ) + })?; + Ok((input, KdlQueryMatcherAccessor::Annotation)) +} + +fn arg_accessor(input: &str) -> IResult<&str, KdlQueryMatcherAccessor, KdlParseError<&str>> { + let (input, _) = tag("arg")(input)?; + let (input, arg) = parenthesized_arg(input)?; + if let Some(arg) = arg { + if let Some(index) = arg + .as_i64() + .and_then(|arg| -> Option { arg.try_into().ok() }) + { + Ok((input, KdlQueryMatcherAccessor::Arg(Some(index)))) + } else { + Err(nom::Err::Error(KdlParseError { + input, + len: 0, + kind: None, + label: Some("arg accessor"), + help: Some("arg accessor must be an integer"), + touched: false, + context: Some("a valid arg accessor"), + })) + } + } else { + Ok((input, KdlQueryMatcherAccessor::Arg(None))) + } +} + +fn prop_name_accessor<'a: 'b, 'b>( + kdl_parser: &'b KdlQueryParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlQueryMatcherAccessor, KdlParseError<&'a str>> + 'b { + move |input| { + let start = input; + let (input, prop_name) = crate::parser::identifier(&kdl_parser.0)(input)?; + let (_, paren) = opt(preceded(whitespace, tag("(")))(input)?; + if paren.is_some() { + Err(nom::Err::Error(KdlParseError { + input: start, + len: 0, + kind: None, + label: Some("accessor"), + help: Some("accessor must be one of: type(), arg(), prop(), propname"), + touched: false, + context: Some("a valid accessor"), + })) + } else { + Ok(( + input, + KdlQueryMatcherAccessor::Prop(prop_name.value().to_owned()), + )) + } + } +} + +fn prop_accessor<'a: 'b, 'b>( + kdl_parser: &'b KdlQueryParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlQueryMatcherAccessor, KdlParseError<&'a str>> + 'b { + move |input| { + let (input, _) = tag("prop")(input)?; + let (input, val) = parenthesized_prop(kdl_parser)(input)?; + Ok((input, KdlQueryMatcherAccessor::Prop(val))) + } +} + +fn parenthesized_arg(input: &str) -> IResult<&str, Option, KdlParseError<&str>> { + let (input, _) = tag("(")(input)?; + let (input, maybe_value) = opt(value)(input)?; + let (input, _) = tag(")")(input)?; + + if let Some((_, val)) = maybe_value { + Ok((input, Some(val))) + } else { + Ok((input, None)) + } +} + +fn parenthesized_prop<'a: 'b, 'b>( + kdl_parser: &'b KdlQueryParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, String, KdlParseError<&'a str>> + 'b { + move |input| { + let (input, _) = tag("(")(input)?; + let (input, prop) = crate::parser::identifier(&kdl_parser.0)(input)?; + let (input, _) = tag(")")(input)?; + Ok((input, prop.value().to_owned())) + } +} + +fn bad_accessor<'a: 'b, 'b>( + kdl_parser: &'b KdlQueryParser<'a>, +) -> impl Fn(&'a str) -> IResult<&'a str, KdlQueryMatcherAccessor, KdlParseError<&'a str>> + 'b { + move |input| { + let start = input; + + let (input, scope) = opt(preceded( + tag("scope"), + preceded( + whitespace, + opt(terminated(tag("("), opt(preceded(whitespace, tag(")"))))), + ), + ))(input)?; + + if scope.is_some() { + return Err(nom::Err::Failure(KdlParseError { + input: start, + len: start.len() - input.len(), + kind: None, + label: Some("incorrect scope() accessor"), + help: Some("Accessors must be one of: type(), arg(), prop(), propname"), + touched: false, + context: Some( + "'scope()' to be the first item only at the top level of the query selector", + ), + })); + } + + let (input, ident) = opt(terminated( + crate::parser::identifier(&kdl_parser.0), + preceded( + whitespace, + terminated(tag("("), opt(preceded(whitespace, tag(")")))), + ), + ))(input)?; + + if let Some(ident) = ident { + match ident.value() { + "type" | "arg" | "prop" | "val" => {} + _ => { + return Err(nom::Err::Failure(KdlParseError { + input: start, + len: start.len() - input.len(), + kind: None, + label: Some("invalid attribute accessor"), + help: Some("Accessors must be one of: type(), arg(), prop(), propname"), + touched: false, + context: Some("a valid attribute accessor"), + })); + } + } + } + + Err(nom::Err::Error(KdlParseError { + input: start, + len: 0, + kind: None, + label: Some("accessor"), + help: Some("accessor must be one of: type(), arg(), prop(), propname"), + touched: false, + context: Some("a valid accessor"), + })) + } +} + +fn whitespace(input: &str) -> IResult<&str, &str, KdlParseError<&str>> { + recognize(many0(alt(( + crate::parser::unicode_space, + crate::parser::newline, + ))))(input) +} diff --git a/src/value.rs b/src/value.rs index 929c047..b0eee6a 100644 --- a/src/value.rs +++ b/src/value.rs @@ -1,7 +1,7 @@ use std::fmt::Display; /// A specific [KDL Value](https://github.com/kdl-org/kdl/blob/main/SPEC.md#value). -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, PartialOrd)] pub enum KdlValue { /// A [KDL Raw String](https://github.com/kdl-org/kdl/blob/main/SPEC.md#raw-string). RawString(String), @@ -41,6 +41,41 @@ pub enum KdlValue { Null, } +impl Eq for KdlValue {} + +// NOTE: I know, I know. This is terrible and I shouldn't do it, but it's +// better than not being able to hash KdlValue at all. +#[allow(clippy::derive_hash_xor_eq)] +impl std::hash::Hash for KdlValue { + fn hash(&self, state: &mut H) { + match self { + KdlValue::RawString(val) => val.hash(state), + KdlValue::String(val) => val.hash(state), + KdlValue::Base2(val) => val.hash(state), + KdlValue::Base8(val) => val.hash(state), + KdlValue::Base10(val) => val.hash(state), + KdlValue::Base10Float(val) => { + let val = if val == &f64::INFINITY { + f64::MAX + } else if val == &f64::NEG_INFINITY { + -f64::MAX + } else if val.is_nan() { + // We collapse NaN to 0.0 because we're evil like that. + 0.0 + } else { + *val + }; + // Good enough to be close-ish for our purposes. + (val.trunc() as i64).hash(state); + (val.fract() as i64).hash(state); + } + KdlValue::Base16(val) => val.hash(state), + KdlValue::Bool(val) => val.hash(state), + KdlValue::Null => core::mem::discriminant(self).hash(state), + } + } +} + impl KdlValue { /// Returns `true` if the value is a [`KdlValue::RawString`]. pub fn is_raw_string(&self) -> bool { diff --git a/tests/query_api.rs b/tests/query_api.rs new file mode 100644 index 0000000..755ac86 --- /dev/null +++ b/tests/query_api.rs @@ -0,0 +1,182 @@ +use kdl::{KdlDocument, KdlQuery}; +use miette::Result; + +#[test] +fn document_query_all() -> Result<()> { + let doc = "foo\nbar\nbaz".parse::()?; + let results = doc.query_all("bar")?; + assert_eq!(results.count(), 1); + let results = doc.query_all(String::from("bar"))?; + assert_eq!(results.count(), 1); + let results = doc.query_all(&String::from("bar"))?; + assert_eq!(results.count(), 1); + let results = doc.query_all("bar".parse::()?)?; + assert_eq!(results.count(), 1); + + let results = doc.query_all("scope()")?; + assert_eq!( + results.count(), + 0, + "scope() on its own doesn't return anything if querying from a doc." + ); + + Ok(()) +} + +#[test] +fn document_query() -> Result<()> { + let doc = "foo\nbar\nbaz".parse::()?; + + assert!(doc.query("bar")?.is_some()); + assert!(doc.query(String::from("bar"))?.is_some()); + assert!(doc.query(&String::from("bar"))?.is_some()); + assert!(doc.query("bar".parse::()?)?.is_some()); + + assert!(doc.query("scope()")?.is_none()); + + Ok(()) +} + +#[test] +fn document_query_get() -> Result<()> { + let doc = "foo\nbar true\nbaz".parse::()?; + + assert_eq!(doc.query_get("bar", 0)?, Some(&true.into())); + assert_eq!(doc.query_get(String::from("bar"), 0)?, Some(&true.into())); + assert_eq!(doc.query_get(&String::from("bar"), 0)?, Some(&true.into())); + assert_eq!( + doc.query_get("bar".parse::()?, 0)?, + Some(&true.into()) + ); + + Ok(()) +} + +#[test] +fn document_query_get_all() -> Result<()> { + let doc = "foo\nbar true\nbaz false".parse::()?; + + assert_eq!( + doc.query_get_all("[]", 0)?.collect::>(), + vec![&true.into(), &false.into()] + ); + assert_eq!(doc.query_get_all(String::from("[]"), 0)?.count(), 2); + assert_eq!(doc.query_get_all(&String::from("[]"), 0)?.count(), 2); + assert_eq!(doc.query_get_all("[]".parse::()?, 0)?.count(), 2); + + Ok(()) +} + +#[test] +fn node_query_all() -> Result<()> { + let doc = r#" + foo + bar { + a { + b + } + } + baz + "# + .parse::()?; + let node = doc.query("bar")?.unwrap(); + + let results = node.query_all("b")?; + assert_eq!(results.count(), 1); + let results = node.query_all(String::from("b"))?; + assert_eq!(results.count(), 1); + let results = node.query_all(&String::from("b"))?; + assert_eq!(results.count(), 1); + let results = node.query_all("b".parse::()?)?; + assert_eq!(results.count(), 1); + + let results = node.query_all("scope()")?.collect::>(); + assert_eq!(results[0], node); + + let results = node.query_all("scope() > a".parse::()?)?; + assert_eq!(results.count(), 1); + + let results = node.query_all("scope() > b".parse::()?)?; + assert_eq!(results.count(), 0); + + Ok(()) +} + +#[test] +fn node_query() -> Result<()> { + let doc = r#" + foo + bar { + a { + b + } + } + baz + "# + .parse::()?; + let node = doc.query("bar")?.unwrap(); + + assert!(node.query("b")?.is_some()); + assert!(node.query(String::from("b"))?.is_some()); + assert!(node.query(&String::from("b"))?.is_some()); + assert!(node.query("b".parse::()?)?.is_some()); + + assert_eq!(node.query("scope()")?, Some(node)); + assert!(node.query("scope() > a")?.is_some()); + assert!(node.query("scope() > b")?.is_none()); + + Ok(()) +} + +#[test] +fn node_query_get() -> Result<()> { + let doc = r#" + foo + bar 1 2 3 { + a false { + b true + } + } + baz + "# + .parse::()?; + let node = doc.query("bar")?.unwrap(); + + assert_eq!(node.query_get("b", 0)?, Some(&true.into())); + assert_eq!(node.query_get(String::from("b"), 0)?, Some(&true.into())); + assert_eq!(node.query_get(&String::from("b"), 0)?, Some(&true.into())); + assert_eq!( + node.query_get("b".parse::()?, 0)?, + Some(&true.into()) + ); + + assert_eq!(node.query_get("scope()", 0)?, Some(&1.into())); + assert_eq!(node.query_get("scope() > a", 0)?, Some(&false.into())); + assert!(node.query_get("scope() > b", "prop")?.is_none()); + Ok(()) +} + +#[test] +fn node_query_get_all() -> Result<()> { + let doc = r#" + foo + bar 1 2 3 { + a false { + b true + } + } + baz + "# + .parse::()?; + let node = doc.query("bar")?.unwrap(); + + assert_eq!( + node.query_get_all("[]", 0)?.collect::>(), + vec![&false.into(), &true.into()] + ); + assert_eq!(node.query_get_all(String::from("[]"), 0)?.count(), 2); + assert_eq!(node.query_get_all(&String::from("[]"), 0)?.count(), 2); + assert_eq!(node.query_get_all("[]".parse::()?, 0)?.count(), 2); + + Ok(()) +} diff --git a/tests/query_matchers.rs b/tests/query_matchers.rs new file mode 100644 index 0000000..28a3ce4 --- /dev/null +++ b/tests/query_matchers.rs @@ -0,0 +1,314 @@ +use kdl::{KdlDocument, KdlNode}; +use miette::Result; +use pretty_assertions::assert_eq; + +#[test] +fn scope_alone() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar + baz + } + bar + baz + "# + .parse()?; + + let results = doc.query_all("scope()")?.collect::>(); + + assert_eq!(results, Vec::<&KdlNode>::new()); + + let results = doc.nodes()[0] + .query_all("scope()")? + .collect::>(); + + assert_eq!(results, vec![&doc.nodes()[0]]); + + Ok(()) +} + +#[test] +fn scope_only_at_top() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar + } + "# + .parse()?; + + assert!( + doc.query_all("foo >> scope()").is_err(), + "scope() must be at the top level" + ); + + Ok(()) +} + +#[test] +fn any_descendants() -> Result<()> { + let doc: KdlDocument = r#" + foo + bar + baz + "# + .parse()?; + + let results = doc.query_all("bar")?.collect::>(); + + assert_eq!(results, vec![&doc.nodes()[1]]); + Ok(()) +} + +#[test] +fn prop_matcher() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar p=1 + baz + } + bar p=1 + baz p=2 { + foo { + bar p=1 { + bar p=2 + } + } + } + "# + .parse()?; + + let results = doc.query_all("[p = 2]")?.collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[2], + &doc.nodes()[2].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0] + .children() + .unwrap() + .nodes()[0] + ] + ); + + let results = doc.query_all("[p = 1]")?.collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[0].children().unwrap().nodes()[0], + &doc.nodes()[1], + &doc.nodes()[2].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0] + ] + ); + + assert_eq!( + doc.query_all("[prop(p) = 1]")?.collect::>(), + results + ); + + let results = doc.query_all("[p]")?.collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[0].children().unwrap().nodes()[0], + &doc.nodes()[1], + &doc.nodes()[2], + &doc.nodes()[2].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0], + &doc.nodes()[2].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0] + .children() + .unwrap() + .nodes()[0] + ] + ); + + assert_eq!( + doc.query_all("[prop(p)]")?.collect::>(), + results + ); + + Ok(()) +} + +#[test] +fn empty_arg_matcher() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar 1 + baz + } + bar 2 + baz { + foo { + bar 1 { + bar + } + } + } + "# + .parse()?; + + let results = doc.query_all("[arg() = 1]")?.collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[0].children().unwrap().nodes()[0], + &doc.nodes()[2].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0] + ] + ); + + let results = doc.query_all("[arg()]")?.collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[0].children().unwrap().nodes()[0], + &doc.nodes()[1], + &doc.nodes()[2].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0] + ] + ); + + Ok(()) +} + +#[test] +fn indexed_arg_matcher() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar 1 2 + baz + } + bar 2 1 + baz { + foo { + bar 1 2 { + bar 1 3 2 + } + } + } + "# + .parse()?; + + let results = doc.query_all("[arg(1) = 2]")?.collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[0].children().unwrap().nodes()[0], + &doc.nodes()[2].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0] + ] + ); + + let results = doc.query_all("[arg(2) = 2]")?.collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[2].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0] + .children() + .unwrap() + .nodes()[0] + ] + ); + + Ok(()) +} + +#[test] +fn type_annotation_matcher() -> Result<()> { + let doc: KdlDocument = r#" + foo { + (here)bar + baz + } + bar + baz { + (here)foo { + bar { + (here)bar + } + } + } + "# + .parse()?; + + let results = doc.query_all("(here)")?.collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[0].children().unwrap().nodes()[0], + &doc.nodes()[2].children().unwrap().nodes()[0], + &doc.nodes()[2].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0] + .children() + .unwrap() + .nodes()[0] + ] + ); + + let results = doc + .query_all("[type() = \"here\"]")? + .collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[0].children().unwrap().nodes()[0], + &doc.nodes()[2].children().unwrap().nodes()[0], + &doc.nodes()[2].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0] + .children() + .unwrap() + .nodes()[0] + ] + ); + + let results = doc.query_all("()")?.collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[0].children().unwrap().nodes()[0], + &doc.nodes()[2].children().unwrap().nodes()[0], + &doc.nodes()[2].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0] + .children() + .unwrap() + .nodes()[0] + ] + ); + + Ok(()) +} diff --git a/tests/query_ops.rs b/tests/query_ops.rs new file mode 100644 index 0000000..39dda49 --- /dev/null +++ b/tests/query_ops.rs @@ -0,0 +1,321 @@ +use kdl::{KdlDocument, KdlNode}; +use miette::Result; +use pretty_assertions::assert_eq; + +#[test] +fn scope_with_all_children() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar + baz + } + bar + baz + "# + .parse()?; + + let results = doc.query_all("scope() > []")?.collect::>(); + + assert_eq!(&results, &doc.nodes().iter().collect::>()); + Ok(()) +} + +#[test] +fn scope_child_by_name() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar + baz + } + bar { + a + b + } + baz + "# + .parse()?; + + let results = doc.query_all("scope() > bar")?.collect::>(); + + assert_eq!(results, vec![&doc.nodes()[1]]); + + // Scope from a specific node. + let results = results[0] + .query_all("scope() > a")? + .collect::>(); + + assert_eq!( + results, + vec![&doc.nodes()[1].children().unwrap().nodes()[0]] + ); + + Ok(()) +} + +#[test] +fn scope_descendants() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar + baz + } + bar + baz + "# + .parse()?; + + let results = doc.query_all("scope() >> bar")?.collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[0].children().unwrap().nodes()[0], + &doc.nodes()[1] + ] + ); + Ok(()) +} + +#[test] +fn scope_only_at_top() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar + } + "# + .parse()?; + + assert!( + doc.query_all("foo >> scope()").is_err(), + "scope() must be at the top level" + ); + + Ok(()) +} + +#[test] +fn any_descendants() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar + baz + } + bar + baz + "# + .parse()?; + + let results = doc.query_all("bar")?.collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[0].children().unwrap().nodes()[0], + &doc.nodes()[1] + ] + ); + Ok(()) +} + +#[test] +fn node_descendants() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar + baz + } + bar + baz { + foo { + bar { + bar + } + } + } + "# + .parse()?; + + let results = doc.query_all("foo >> bar")?.collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[0].children().unwrap().nodes()[0], + &doc.nodes()[2].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0], + &doc.nodes()[2].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0] + .children() + .unwrap() + .nodes()[0] + ] + ); + Ok(()) +} + +#[test] +fn node_children() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar + baz + } + bar + baz { + foo { + bar { + bar + } + } + } + "# + .parse()?; + + let results = doc.query_all("foo > bar")?.collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[0].children().unwrap().nodes()[0], + &doc.nodes()[2].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0] + ] + ); + Ok(()) +} + +#[test] +fn node_neighbor() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar + baz + } + bar + baz + "# + .parse()?; + + let results = doc.query_all("foo + bar")?.collect::>(); + + assert_eq!(results, vec![&doc.nodes()[1]]); + + let results = doc.query_all("foo + bar + baz")?.collect::>(); + + assert_eq!(results, vec![&doc.nodes()[2]]); + + Ok(()) +} + +#[test] +fn node_sibling() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar + baz + } + bar + baz + quux + other + "# + .parse()?; + + let results = doc.query_all("foo ++ bar")?.collect::>(); + + assert_eq!(results, vec![&doc.nodes()[1]]); + + let results = doc.query_all("foo ++ baz")?.collect::>(); + + assert_eq!(results, vec![&doc.nodes()[2]]); + + let results = doc + .query_all("foo ++ bar ++ other")? + .collect::>(); + + assert_eq!(results, vec![&doc.nodes()[4]]); + + Ok(()) +} + +#[test] +fn multiple_selectors() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar + baz + } + bar + baz { + foo { + bar { + bar + } + } + } + "# + .parse()?; + + let results = doc.query_all("foo, baz")?.collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[0], + &doc.nodes()[0].children().unwrap().nodes()[1], + &doc.nodes()[2], + &doc.nodes()[2].children().unwrap().nodes()[0] + ], + "First match all the `foo`s, then all the `baz`s." + ); + + Ok(()) +} + +#[test] +fn all_combined() -> Result<()> { + let doc: KdlDocument = r#" + foo { + bar { + baz { + foo { + bar { + bar + } + } + bar + baz + quux + other + } + } + } + bar + baz + "# + .parse()?; + + let results = doc + .query_all("foo >> baz > foo + bar ++ other")? + .collect::>(); + + assert_eq!( + results, + vec![ + &doc.nodes()[0].children().unwrap().nodes()[0] + .children() + .unwrap() + .nodes()[0] + .children() + .unwrap() + .nodes()[4] + ] + ); + Ok(()) +} diff --git a/tests/query_syntax.rs b/tests/query_syntax.rs new file mode 100644 index 0000000..4d6ebbf --- /dev/null +++ b/tests/query_syntax.rs @@ -0,0 +1,56 @@ +use kdl::KdlDocument; +use miette::Result; + +#[test] +fn syntax_errors() -> Result<()> { + macro_rules! assert_syntax_errors { + ($(($input:expr, $msg:expr, ($offset:expr, $len:expr))),*) => { + $( + let err = "node".parse::() + .unwrap() + .query_all($input) + .expect_err("query parse should've failed."); + assert_eq!(err.to_string(), $msg, "unexpected error message"); + assert_eq!(err.span.offset(), $offset, "unexpected span offset"); + assert_eq!(err.span.len(), $len, "unexpected span length"); + )* + } + } + + assert_syntax_errors! { + ("", "Expected a valid node matcher.", (0, 0)), + (" scope(", "Expected a valid scope accessor.", (1, 6)), + ("(", "Expected closing ')' for type annotation.", (0, 1)), + (")", "Expected a valid node matcher.", (0, 0)), + ("[", "Expected a closing ']' for this attribute matcher.", (0, 1)), + ("]", "Expected a valid node matcher.", (0, 0)), + ("a b", "Expected a valid KQL query.", (2, 0)), + ("a\nb", "Expected a valid KQL query.", (2, 0)), + (",", "Expected a valid node matcher.", (0, 0)), + ("[] > scope( )", "Expected scope() to be the first item in this selector.", (5, 8)), + ("()(type)", "Expected only one type annotation per selector.", (2, 6)), + ("(type)()", "Expected only one type annotation per selector.", (6, 2)), + ("name(type)", "Expected type annotation to not be used after a node name.", (4, 6)), + ("[]name", "Expected node name to come before attribute matcher(s).", (2, 4)), + ("[]()", "Expected type annotation to come before attribute matcher(s).", (2, 2)), + ("[type(blah)]", "Expected a closing ')' for this 'type()' accessor.", (1, 5)), + ("[scope()]", "Expected 'scope()' to be the first item only at the top level of the query selector.", (1, 7)), + ("[scope ( )]", "Expected 'scope()' to be the first item only at the top level of the query selector.", (1, 9)), + ("[other()]", "Expected a valid attribute accessor.", (1, 7)), + ("[arg()1]", "Expected a closing ']' for this attribute matcher.", (0, 6)), + ("[arg() 1]", "Expected a closing ']' for this attribute matcher.", (0, 7)), + ("[arg()=identifier]", "Expected a valid operator argument.", (7, 0)), + // // Only string values are allowed here. + ("[arg()*=1]", "Expected a string as an operator value.", (8, 1)), + ("[arg()^=1]", "Expected a string as an operator value.", (8, 1)), + ("[arg()$=1]", "Expected a string as an operator value.", (8, 1)), + ("[arg()*=null]", "Expected a string as an operator value.", (8, 4)), + ("[arg()^=null]", "Expected a string as an operator value.", (8, 4)), + ("[arg()$=null]", "Expected a string as an operator value.", (8, 4)), + ("[arg()*=true]", "Expected a string as an operator value.", (8, 4)), + ("[arg()^=true]", "Expected a string as an operator value.", (8, 4)), + ("[arg()$=true]", "Expected a string as an operator value.", (8, 4)) + } + + Ok(()) +}