From d7670e17b4609008b0069a182b08c12a00f91393 Mon Sep 17 00:00:00 2001 From: Lily Ballard Date: Wed, 16 Dec 2020 17:43:13 -0800 Subject: [PATCH] Add line, column to KdlError (#3) * Add line and column numbers to KdlError These numbers are calculated using our `newline` rule, as I'm not aware of any standard for how to calculate line numbers in error messages. The error retains the original `offset` field as well, but it's now expressed in characters rather than bytes. * Clean up error handling in parse_document() We don't need to worry about `Incomplete` errors, the `complete` family of parsers (which we're using) doesn't ever produce those. We can use the `.finish()` function to unwrap the error instead, so we don't have to worry about the underlying error types. --- src/error.rs | 9 +++-- src/lib.rs | 36 ++++++++++++-------- src/parser.rs | 91 ++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 115 insertions(+), 21 deletions(-) diff --git a/src/error.rs b/src/error.rs index 7fd6345..d217570 100644 --- a/src/error.rs +++ b/src/error.rs @@ -4,10 +4,15 @@ use nom::error::{ContextError, ErrorKind, FromExternalError, ParseError}; use thiserror::Error; #[derive(Debug, Clone, Eq, PartialEq, Error)] -#[error("Error parsing document. {kind}")] +#[error("Error parsing document at line {line} column {column}. {kind}")] pub struct KdlError { pub input: String, + /// Offset in chars of the error. pub offset: usize, + /// 1-based line number of the error. + pub line: usize, + /// 1-based column number (in chars) of the error. + pub column: usize, pub kind: KdlErrorKind, } @@ -19,8 +24,6 @@ pub enum KdlErrorKind { ParseFloatError(ParseFloatError), #[error("Failed to parse {0} component of semver string.")] Context(&'static str), - #[error("Incomplete input to semver parser.")] - IncompleteInput, #[error("An unspecified error occurred.")] Other, } diff --git a/src/lib.rs b/src/lib.rs index 40063a7..83952fb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ use nom::combinator::all_consuming; -use nom::Err; +use nom::Finish; pub use crate::error::{KdlError, KdlErrorKind}; pub use crate::node::{KdlNode, KdlNodeValue}; @@ -13,12 +13,17 @@ where I: AsRef, { let input = &input.as_ref()[..]; - match all_consuming(parser::nodes)(input) { - Ok((_, arg)) => Ok(arg), - Err(err) => Err(match err { - Err::Error(e) | Err::Failure(e) => KdlError { + all_consuming(parser::nodes)(input) + .finish() + .map(|(_, arg)| arg) + .map_err(|e| { + let prefix = &input[..(input.len() - e.input.len())]; + let (line, column) = calculate_line_column(prefix); + KdlError { input: input.into(), - offset: e.input.as_ptr() as usize - input.as_ptr() as usize, + offset: prefix.chars().count(), + line, + column, kind: if let Some(kind) = e.kind { kind } else if let Some(ctx) = e.context { @@ -26,12 +31,15 @@ where } else { KdlErrorKind::Other }, - }, - Err::Incomplete(_) => KdlError { - input: input.into(), - offset: input.len() - 1, - kind: KdlErrorKind::IncompleteInput, - }, - }), - } + } + }) +} + +/// Calculates the line and column of the end of a `&str`. +/// +/// If the line ends on a newline, the (line, column) pair is placed on the previous line instead. +fn calculate_line_column(input: &str) -> (usize, usize) { + let (input, skipped_lines) = parser::count_leading_lines(input); + let input = parser::strip_trailing_newline(input); + (skipped_lines + 1, input.len() + 1) // +1 as we're 1-based } diff --git a/src/parser.rs b/src/parser.rs index 61e9987..43dd15a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,12 +1,15 @@ -use std::collections::HashMap; +use std::{collections::HashMap, iter::from_fn}; +use nom::branch::alt; use nom::bytes::complete::{tag, take_until, take_while_m_n}; use nom::character::complete::{alpha1, alphanumeric1, anychar, char, none_of, one_of}; -use nom::combinator::{eof, map, map_opt, map_res, opt, recognize, value}; -use nom::multi::{fold_many0, many0, many1}; +use nom::combinator::{ + all_consuming, eof, iterator, map, map_opt, map_res, not, opt, recognize, value, +}; +use nom::multi::{fold_many0, many0, many1, many_till}; use nom::sequence::{delimited, pair, preceded, terminated, tuple}; +use nom::Finish; use nom::IResult; -use nom::{branch::alt, multi::many_till}; use crate::error::KdlParseError; use crate::node::{KdlNode, KdlNodeValue}; @@ -16,6 +19,44 @@ pub(crate) fn nodes(input: &str) -> IResult<&str, Vec, KdlParseError<&s many0(delimited(many0(linespace), node, newline))(input) } +// The following two functions exist for the purposes of translating offsets into line/column pairs +// for error reporting. We're doing this here so we can make use of our `newline` definition, to +// ensure line/column information is reported accurately based on our definition of newlines, even +// if we update our definition of newlines later. + +/// Counts all lines in the input up to the final line. +/// +/// This counts and skips past all lines terminated in `newline` with the exception of the final +/// line, regardless of whether it's newline-terminated. If the input only contains a single line, +/// the input will be returned unmodified with a count of `0`. +pub(crate) fn count_leading_lines(input: &str) -> (&str, usize) { + let mut iter = iterator( + input, + terminated(many_till(value((), anychar), newline), not(eof)), + ); + let count = (&mut iter).count(); + match iter.finish().finish() { + Ok((input, _)) => (input, count), + // I don't believe this particular parser can error, but we need to handle it anyway + Err(e) => (e.input, count), + } +} + +/// Strips a single trailing `newline`, if present, from the input. +pub(crate) fn strip_trailing_newline(input: &str) -> &str { + // Nom doesn't support parsing in reverse, but we want to reuse our newline definition. The + // longest newline sequence is 2 characters, so we can just test the last char, and the + // second-to-last char, and validate that the parser consumes the full input. + let mut idx_iter = input.char_indices().map(|(idx, _)| idx); + let mut last = idx_iter.next_back(); + let mut second_last = idx_iter.next_back(); + // Start with the second-to-last, otherwise \r\n will be parsed as just the \n. + from_fn(|| second_last.take().or_else(|| last.take())) + .find(|&idx| all_consuming(newline)(&input[idx..]).is_ok()) + .map(|idx| &input[..idx]) + .unwrap_or(input) +} + #[derive(Clone)] enum NodeArg { Value(KdlNodeValue), @@ -473,4 +514,46 @@ mod tests { assert!(newline("\r").is_err()); assert!(newline("blah").is_err()); } + + #[test] + fn test_count_leading_lines() { + assert_eq!(count_leading_lines(""), ("", 0)); + assert_eq!(count_leading_lines("foo"), ("foo", 0)); + assert_eq!(count_leading_lines("foo\n"), ("foo\n", 0)); + assert_eq!(count_leading_lines("foo\nbar"), ("bar", 1)); + assert_eq!(count_leading_lines("foo\nbar\n"), ("bar\n", 1)); + assert_eq!(count_leading_lines("\nfoo\n\nbar\n"), ("bar\n", 3)); + assert_eq!(count_leading_lines("foo\r\nbar\r\n"), ("bar\r\n", 1)); + assert_eq!(count_leading_lines("foo\nbar\rbaz"), ("bar\rbaz", 1)); + assert_eq!(count_leading_lines("foo\nbar\n\n"), ("\n", 2)); + + assert_eq!( + count_leading_lines( + r#"// This example is a GitHub Action if it used KDL syntax. +// See .github/workflows/ci.yml for the file this was based on. +name "CI" + +on "push" "pull_request" + +env { + RUSTFLAGS "-Dwarnings" +"# + ), + (" RUSTFLAGS \"-Dwarnings\"\n", 7) + ); + } + + #[test] + fn test_strip_trailing_newline() { + assert_eq!(strip_trailing_newline(""), ""); + assert_eq!(strip_trailing_newline("foo"), "foo"); + assert_eq!(strip_trailing_newline("foo\n"), "foo"); + assert_eq!(strip_trailing_newline("foo\n\n"), "foo\n"); + assert_eq!(strip_trailing_newline("foo\nbar"), "foo\nbar"); + assert_eq!(strip_trailing_newline("foo\nbar\n"), "foo\nbar"); + assert_eq!(strip_trailing_newline("foo\r\n"), "foo"); + assert_eq!(strip_trailing_newline("\n"), ""); + assert_eq!(strip_trailing_newline("foo\r\n\r"), "foo\r\n\r"); + assert_eq!(strip_trailing_newline("foo\nx"), "foo\nx"); + } }