From f7fa50d8de219d21d479d0658922e10fea905b93 Mon Sep 17 00:00:00 2001 From: Nahor Date: Sun, 3 Mar 2024 12:02:41 -0800 Subject: [PATCH] fix(SourceCode): Fix and improve default `SourceCode` implementation - Fix a panic when a 0 length span covers the end of a document - Fix incorrect `line_count` - Add new unit tests and complete existing ones - Improve readability --- src/source_impls.rs | 265 +++++++++++++++++++++++++++++++------------- 1 file changed, 189 insertions(+), 76 deletions(-) diff --git a/src/source_impls.rs b/src/source_impls.rs index e362b4a..61a7ffb 100644 --- a/src/source_impls.rs +++ b/src/source_impls.rs @@ -5,91 +5,103 @@ use std::{borrow::Cow, collections::VecDeque, fmt::Debug, sync::Arc}; use crate::{MietteError, MietteSpanContents, SourceCode, SourceSpan, SpanContents}; +#[derive(Clone, Copy)] +struct LineInfo { + line_no: usize, + start: usize, + end: usize, +} + fn context_info<'a>( input: &'a [u8], span: &SourceSpan, context_lines_before: usize, context_lines_after: usize, ) -> Result, MietteError> { - let mut offset = 0usize; - let mut line_count = 0usize; - let mut start_line = 0usize; - let mut start_column = 0usize; - let mut before_lines_starts = VecDeque::new(); - let mut current_line_start = 0usize; - let mut end_lines = 0usize; - let mut post_span = false; - let mut post_span_got_newline = false; - let mut iter = input.iter().copied().peekable(); - while let Some(char) = iter.next() { - if matches!(char, b'\r' | b'\n') { - line_count += 1; - if char == b'\r' && iter.next_if_eq(&b'\n').is_some() { - offset += 1; - } - if offset < span.offset() { - // We're before the start of the span. - start_column = 0; - before_lines_starts.push_back(current_line_start); - if before_lines_starts.len() > context_lines_before { - start_line += 1; - before_lines_starts.pop_front(); - } - } else if offset >= span.offset() + span.len().saturating_sub(1) { - // We're after the end of the span, but haven't necessarily - // started collecting end lines yet (we might still be - // collecting context lines). - if post_span { - start_column = 0; - if post_span_got_newline { - end_lines += 1; - } else { - post_span_got_newline = true; - } - if end_lines >= context_lines_after { - offset += 1; - break; - } - } - } - current_line_start = offset + 1; - } else if offset < span.offset() { - start_column += 1; - } - - if offset >= (span.offset() + span.len()).saturating_sub(1) { - post_span = true; - if end_lines >= context_lines_after { - offset += 1; - break; - } - } - - offset += 1; - } - - if offset >= (span.offset() + span.len()).saturating_sub(1) { - let starting_offset = before_lines_starts.front().copied().unwrap_or_else(|| { - if context_lines_before == 0 { - span.offset() - } else { - 0 + let mut iter = input + .split_inclusive(|b| *b == b'\n') + .enumerate() + .map(|(line_no, line)| { + // SAFETY: + // - it is safe to use `offset_from` on slices of an array per Rust design (max array size) + // (https://doc.rust-lang.org/stable/reference/types/numeric.html#machine-dependent-integer-types) + // - since `line` is a slice of `input, the offset cannot be negative either + let offset = unsafe { line.as_ptr().offset_from(input.as_ptr()) } as usize; + LineInfo { + line_no, + start: offset, + end: offset + line.len(), } }); - Ok(MietteSpanContents::new( - &input[starting_offset..offset], - (starting_offset, offset - starting_offset).into(), - start_line, - if context_lines_before == 0 { - start_column - } else { - 0 - }, - line_count, - )) - } else { - Err(MietteError::OutOfBounds) + + // First line handled separately because otherwise, we can't distinguish + // between `None` from an empty document (which we still want to handle as + // a "single empty line"), and `None` from the end of the document. + let mut line_starts = VecDeque::new(); + let mut line_info = match iter.next() { + None => LineInfo { + line_no: 0, + start: 0, + end: 0, + }, + Some(info) => info, + }; + line_starts.push_back(line_info); + + // Get the "before" lines (including the line containing the start + // of the span) + while span.offset() >= line_info.end { + line_info = match iter.next() { + None => break, + Some(info) => info, + }; + + if line_starts.len() > context_lines_before { + line_starts.pop_front(); + } + line_starts.push_back(line_info); } + let (start_lineno, start_offset, start_column) = { + let start_info = line_starts.pop_front().unwrap(); + if context_lines_before > 0 { + (start_info.line_no, start_info.start, 0) + } else { + ( + start_info.line_no, + span.offset(), + span.offset() - start_info.start, + ) + } + }; + + // Find the end of the span + while span.offset() + span.len() > line_info.end { + line_info = match iter.next() { + None => break, + Some(info) => info, + }; + } + + // Get the "after" lines + if let Some(last) = iter.take(context_lines_after).last() { + line_info = last; + } + if span.offset() + span.len() > line_info.end { + return Err(MietteError::OutOfBounds); + } + let (end_lineno, end_offset) = if context_lines_after > 0 { + (line_info.line_no, line_info.end) + } else { + (line_info.line_no, span.offset() + span.len()) + }; + + Ok(MietteSpanContents::new( + &input[start_offset..end_offset], + (start_offset..end_offset).into(), + start_lineno, + start_column, + end_lineno - start_lineno + 1, + )) } impl SourceCode for [u8] { @@ -206,8 +218,10 @@ mod tests { let src = String::from("foo\n"); let contents = src.read_span(&(0, 4).into(), 0, 0)?; assert_eq!("foo\n", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((0, 4)), *contents.span()); assert_eq!(0, contents.line()); assert_eq!(0, contents.column()); + assert_eq!(1, contents.line_count()); Ok(()) } @@ -216,8 +230,10 @@ mod tests { let src = String::from("foobar"); let contents = src.read_span(&(3, 3).into(), 1, 1)?; assert_eq!("foobar", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((0, 6)), *contents.span()); assert_eq!(0, contents.line()); assert_eq!(0, contents.column()); + assert_eq!(1, contents.line_count()); Ok(()) } @@ -226,8 +242,10 @@ mod tests { let src = String::from("foo\nbar\nbaz\n"); let contents = src.read_span(&(4, 4).into(), 0, 0)?; assert_eq!("bar\n", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((4, 4)), *contents.span()); assert_eq!(1, contents.line()); assert_eq!(0, contents.column()); + assert_eq!(1, contents.line_count()); Ok(()) } @@ -236,8 +254,58 @@ mod tests { let src = String::from("foo\nbarbar\nbaz\n"); let contents = src.read_span(&(7, 4).into(), 0, 0)?; assert_eq!("bar\n", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((7, 4)), *contents.span()); assert_eq!(1, contents.line()); assert_eq!(3, contents.column()); + assert_eq!(1, contents.line_count()); + Ok(()) + } + + #[test] + fn end_of_line_before_newline() -> Result<(), MietteError> { + let src = String::from("foo\nbar\nbaz\n"); + let contents = src.read_span(&(7, 0).into(), 0, 0)?; + assert_eq!("", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((7, 0)), *contents.span()); + assert_eq!(1, contents.line()); + assert_eq!(3, contents.column()); + assert_eq!(1, contents.line_count()); + Ok(()) + } + + #[test] + fn end_of_line_after_newline() -> Result<(), MietteError> { + let src = String::from("foo\nbar\nbaz\n"); + let contents = src.read_span(&(8, 0).into(), 0, 0)?; + assert_eq!("", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((8, 0)), *contents.span()); + assert_eq!(2, contents.line()); + assert_eq!(0, contents.column()); + assert_eq!(1, contents.line_count()); + Ok(()) + } + + #[test] + fn end_of_file_with_newline() -> Result<(), MietteError> { + let src = String::from("foo\nbar\nbaz\n"); + let contents = src.read_span(&(12, 0).into(), 0, 0)?; + assert_eq!("", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((12, 0)), *contents.span()); + assert_eq!(2, contents.line()); + assert_eq!(4, contents.column()); + assert_eq!(1, contents.line_count()); + Ok(()) + } + + #[test] + fn end_of_file_without_newline() -> Result<(), MietteError> { + let src = String::from("foo\nbar\nbaz"); + let contents = src.read_span(&(11, 0).into(), 0, 0)?; + assert_eq!("", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((11, 0)), *contents.span()); + assert_eq!(2, contents.line()); + assert_eq!(3, contents.column()); + assert_eq!(1, contents.line_count()); Ok(()) } @@ -246,8 +314,10 @@ mod tests { let src = String::from("foo\r\nbar\r\nbaz\r\n"); let contents = src.read_span(&(5, 5).into(), 0, 0)?; assert_eq!("bar\r\n", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((5, 5)), *contents.span()); assert_eq!(1, contents.line()); assert_eq!(0, contents.column()); + assert_eq!(1, contents.line_count()); Ok(()) } @@ -259,8 +329,10 @@ mod tests { "foo\nbar\nbaz\n", std::str::from_utf8(contents.data()).unwrap() ); + assert_eq!(SourceSpan::from((4, 12)), *contents.span()); assert_eq!(1, contents.line()); assert_eq!(0, contents.column()); + assert_eq!(3, contents.line_count()); Ok(()) } @@ -272,8 +344,10 @@ mod tests { "\nfoo\nbar\nbaz\n\n", std::str::from_utf8(contents.data()).unwrap() ); + assert_eq!(SourceSpan::from((8, 14)), *contents.span()); assert_eq!(2, contents.line()); assert_eq!(0, contents.column()); + assert_eq!(5, contents.line_count()); let span: SourceSpan = (8, 14).into(); assert_eq!(&span, contents.span()); Ok(()) @@ -287,10 +361,49 @@ mod tests { "one\ntwo\n\n", std::str::from_utf8(contents.data()).unwrap() ); + assert_eq!(SourceSpan::from((0, 9)), *contents.span()); assert_eq!(0, contents.line()); assert_eq!(0, contents.column()); let span: SourceSpan = (0, 9).into(); assert_eq!(&span, contents.span()); + assert_eq!(3, contents.line_count()); Ok(()) } + + #[test] + fn empty_source() -> Result<(), MietteError> { + let src = String::from(""); + + let contents = src.read_span(&(0, 0).into(), 0, 0)?; + assert_eq!("", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((0, 0)), *contents.span()); + assert_eq!(0, contents.line()); + assert_eq!(0, contents.column()); + assert_eq!(1, contents.line_count()); + + Ok(()) + } + + #[test] + fn empty_source_out_of_bounds() { + let src = String::from(""); + + let contents = src.read_span(&(0, 1).into(), 0, 0); + assert!(matches!(contents, Err(MietteError::OutOfBounds))); + + let contents = src.read_span(&(0, 2).into(), 0, 0); + assert!(matches!(contents, Err(MietteError::OutOfBounds))); + + let contents = src.read_span(&(1, 0).into(), 0, 0); + assert!(matches!(contents, Err(MietteError::OutOfBounds))); + + let contents = src.read_span(&(1, 1).into(), 0, 0); + assert!(matches!(contents, Err(MietteError::OutOfBounds))); + + let contents = src.read_span(&(2, 0).into(), 0, 0); + assert!(matches!(contents, Err(MietteError::OutOfBounds))); + + let contents = src.read_span(&(2, 1).into(), 0, 0); + assert!(matches!(contents, Err(MietteError::OutOfBounds))); + } }