From 6f8fbf836d33f51e4f02cde19dc25d85a6ae4503 Mon Sep 17 00:00:00 2001 From: Nahor Date: Mon, 4 Mar 2024 13:03:02 -0800 Subject: [PATCH 1/3] refactor(get_lines): Simplify `get_lines` --- src/handlers/graphical.rs | 68 ++++++++++++----------------------- src/handlers/narratable.rs | 72 ++++++++++++++------------------------ 2 files changed, 48 insertions(+), 92 deletions(-) diff --git a/src/handlers/graphical.rs b/src/handlers/graphical.rs index a4474f9..1eb915e 100644 --- a/src/handlers/graphical.rs +++ b/src/handlers/graphical.rs @@ -1189,54 +1189,30 @@ impl GraphicalReportHandler { .read_span(context_span, self.context_lines, self.context_lines) .map_err(|_| fmt::Error)?; let context = std::str::from_utf8(context_data.data()).expect("Bad utf8 detected"); - let mut line = context_data.line(); - let mut column = context_data.column(); - let mut offset = context_data.span().offset(); - let mut line_offset = offset; - let mut iter = context.chars().peekable(); - let mut line_str = String::new(); - let mut lines = Vec::new(); - while let Some(char) = iter.next() { - offset += char.len_utf8(); - let mut at_end_of_file = false; - match char { - '\r' => { - if iter.next_if_eq(&'\n').is_some() { - offset += 1; - line += 1; - column = 0; - } else { - line_str.push(char); - column += 1; - } - at_end_of_file = iter.peek().is_none(); + let lines = context + .split_inclusive('\n') + .enumerate() + .map(|(line_number, line)| { + let length = line.len(); + // Strip the newline chars + let line = line + .strip_suffix('\n') + .and_then(|line| line.strip_suffix('\r').or(Some(line))) + .unwrap_or(line); + // SAFETY: + // - it is safe to use `offset_from` on slices of an array per Rus design (max array size) + // (https://doc.rust-lang.org/stable/reference/types/numeric.html#machine-dependent-integer-types) + // - since `line` is a slice of `context`, the offset cannot be negative either + Line { + line_number: context_data.line() + line_number + 1, + offset: context_data.span().offset() + + unsafe { line.as_ptr().offset_from(context.as_ptr()) } as usize, + length, + text: line.to_string(), } - '\n' => { - at_end_of_file = iter.peek().is_none(); - line += 1; - column = 0; - } - _ => { - line_str.push(char); - column += 1; - } - } + }) + .collect::>(); - if iter.peek().is_none() && !at_end_of_file { - line += 1; - } - - if column == 0 || iter.peek().is_none() { - lines.push(Line { - line_number: line, - offset: line_offset, - length: offset - line_offset, - text: line_str.clone(), - }); - line_str.clear(); - line_offset = offset; - } - } Ok((context_data, lines)) } } diff --git a/src/handlers/narratable.rs b/src/handlers/narratable.rs index c809124..418c899 100644 --- a/src/handlers/narratable.rs +++ b/src/handlers/narratable.rs @@ -291,54 +291,34 @@ impl NarratableReportHandler { .read_span(context_span, self.context_lines, self.context_lines) .map_err(|_| fmt::Error)?; let context = std::str::from_utf8(context_data.data()).expect("Bad utf8 detected"); - let mut line = context_data.line(); - let mut column = context_data.column(); - let mut offset = context_data.span().offset(); - let mut line_offset = offset; - let mut iter = context.chars().peekable(); - let mut line_str = String::new(); - let mut lines = Vec::new(); - while let Some(char) = iter.next() { - offset += char.len_utf8(); - let mut at_end_of_file = false; - match char { - '\r' => { - if iter.next_if_eq(&'\n').is_some() { - offset += 1; - line += 1; - column = 0; - } else { - line_str.push(char); - column += 1; - } - at_end_of_file = iter.peek().is_none(); - } - '\n' => { - at_end_of_file = iter.peek().is_none(); - line += 1; - column = 0; - } - _ => { - line_str.push(char); - column += 1; - } - } - if iter.peek().is_none() && !at_end_of_file { - line += 1; - } - - if column == 0 || iter.peek().is_none() { - lines.push(Line { - line_number: line, - offset: line_offset, - text: line_str.clone(), + let lines = context + .split_inclusive('\n') + .enumerate() + .map(|(line_number, line)| { + // SAFETY: + // - it is safe to use `offset_from` on slices of an array per Rus design (max array size) + // (https://doc.rust-lang.org/stable/reference/types/numeric.html#machine-dependent-integer-types) + // - since `line` is a slice of `context`, the offset cannot be negative either + let offset = unsafe { line.as_ptr().offset_from(context.as_ptr()) } as usize; + let length = line.len(); + // Strip the newline chars + let line = line + .strip_suffix('\n') + .and_then(|line| line.strip_suffix('\r').or(Some(line))) + .unwrap_or(line); + // End of the "file" if the end of the line is also the end of + // the context and we removed some characters (newline) + let at_end_of_file = (offset + length == context.len()) && (length != line.len()); + Line { + line_number: context_data.line() + line_number + 1, + offset: context_data.span().offset() + offset, + text: line.to_string(), at_end_of_file, - }); - line_str.clear(); - line_offset = offset; - } - } + } + }) + .collect::>(); + Ok((context_data, lines)) } } From a37328a29982d497919486817e89acddd1a155f0 Mon Sep 17 00:00:00 2001 From: Nahor Date: Mon, 4 Mar 2024 13:49:15 -0800 Subject: [PATCH 2/3] refactor(span): Simplify checks how a span applies on a line --- src/handlers/graphical.rs | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/src/handlers/graphical.rs b/src/handlers/graphical.rs index 1eb915e..f3561de 100644 --- a/src/handlers/graphical.rs +++ b/src/handlers/graphical.rs @@ -1257,28 +1257,27 @@ impl Line { /// Returns whether `span` should be visible on this line, either in the gutter or under the /// text on this line fn span_applies(&self, span: &FancySpan) -> bool { - let spanlen = if span.len() == 0 { 1 } else { span.len() }; - // Span starts in this line - - (span.offset() >= self.offset && span.offset() < self.offset + self.length) - // Span passes through this line - || (span.offset() < self.offset && span.offset() + spanlen > self.offset + self.length) //todo - // Span ends on this line - || (span.offset() + spanlen > self.offset && span.offset() + spanlen <= self.offset + self.length) + // A span applies if its start is strictly before the line's end, + // i.e. the span is not after the line, and its end is strictly after + // the line's start, i.e. the span is not before the line. + // + // One corner case: if the span length is 0, then the span also applies + // if its end is *at* the line's start, not just strictly after. + (span.offset() < self.offset + self.length) + && match span.len() == 0 { + true => (span.offset() + span.len()) >= self.offset, + false => (span.offset() + span.len()) > self.offset, + } } /// Returns whether `span` should be visible on this line in the gutter (so this excludes spans /// that are only visible on this line and do not span multiple lines) fn span_applies_gutter(&self, span: &FancySpan) -> bool { - let spanlen = if span.len() == 0 { 1 } else { span.len() }; - // Span starts in this line + // The span must covers this line and at least one of its ends must be + // on another line self.span_applies(span) - && !( - // as long as it doesn't start *and* end on this line - (span.offset() >= self.offset && span.offset() < self.offset + self.length) - && (span.offset() + spanlen > self.offset - && span.offset() + spanlen <= self.offset + self.length) - ) + && ((span.offset() < self.offset) + || ((span.offset() + span.len()) >= (self.offset + self.length))) } // A 'flyby' is a multi-line span that technically covers this line, but @@ -1301,8 +1300,7 @@ impl Line { // Does this line contain the *end* of this multiline span? // This assumes self.span_applies() is true already. fn span_ends(&self, span: &FancySpan) -> bool { - span.offset() + span.len() >= self.offset - && span.offset() + span.len() <= self.offset + self.length + span.offset() + span.len() <= self.offset + self.length } } From f7fa50d8de219d21d479d0658922e10fea905b93 Mon Sep 17 00:00:00 2001 From: Nahor Date: Sun, 3 Mar 2024 12:02:41 -0800 Subject: [PATCH 3/3] fix(SourceCode): Fix and improve default `SourceCode` implementation - Fix a panic when a 0 length span covers the end of a document - Fix incorrect `line_count` - Add new unit tests and complete existing ones - Improve readability --- src/source_impls.rs | 265 +++++++++++++++++++++++++++++++------------- 1 file changed, 189 insertions(+), 76 deletions(-) diff --git a/src/source_impls.rs b/src/source_impls.rs index e362b4a..61a7ffb 100644 --- a/src/source_impls.rs +++ b/src/source_impls.rs @@ -5,91 +5,103 @@ use std::{borrow::Cow, collections::VecDeque, fmt::Debug, sync::Arc}; use crate::{MietteError, MietteSpanContents, SourceCode, SourceSpan, SpanContents}; +#[derive(Clone, Copy)] +struct LineInfo { + line_no: usize, + start: usize, + end: usize, +} + fn context_info<'a>( input: &'a [u8], span: &SourceSpan, context_lines_before: usize, context_lines_after: usize, ) -> Result, MietteError> { - let mut offset = 0usize; - let mut line_count = 0usize; - let mut start_line = 0usize; - let mut start_column = 0usize; - let mut before_lines_starts = VecDeque::new(); - let mut current_line_start = 0usize; - let mut end_lines = 0usize; - let mut post_span = false; - let mut post_span_got_newline = false; - let mut iter = input.iter().copied().peekable(); - while let Some(char) = iter.next() { - if matches!(char, b'\r' | b'\n') { - line_count += 1; - if char == b'\r' && iter.next_if_eq(&b'\n').is_some() { - offset += 1; - } - if offset < span.offset() { - // We're before the start of the span. - start_column = 0; - before_lines_starts.push_back(current_line_start); - if before_lines_starts.len() > context_lines_before { - start_line += 1; - before_lines_starts.pop_front(); - } - } else if offset >= span.offset() + span.len().saturating_sub(1) { - // We're after the end of the span, but haven't necessarily - // started collecting end lines yet (we might still be - // collecting context lines). - if post_span { - start_column = 0; - if post_span_got_newline { - end_lines += 1; - } else { - post_span_got_newline = true; - } - if end_lines >= context_lines_after { - offset += 1; - break; - } - } - } - current_line_start = offset + 1; - } else if offset < span.offset() { - start_column += 1; - } - - if offset >= (span.offset() + span.len()).saturating_sub(1) { - post_span = true; - if end_lines >= context_lines_after { - offset += 1; - break; - } - } - - offset += 1; - } - - if offset >= (span.offset() + span.len()).saturating_sub(1) { - let starting_offset = before_lines_starts.front().copied().unwrap_or_else(|| { - if context_lines_before == 0 { - span.offset() - } else { - 0 + let mut iter = input + .split_inclusive(|b| *b == b'\n') + .enumerate() + .map(|(line_no, line)| { + // SAFETY: + // - it is safe to use `offset_from` on slices of an array per Rust design (max array size) + // (https://doc.rust-lang.org/stable/reference/types/numeric.html#machine-dependent-integer-types) + // - since `line` is a slice of `input, the offset cannot be negative either + let offset = unsafe { line.as_ptr().offset_from(input.as_ptr()) } as usize; + LineInfo { + line_no, + start: offset, + end: offset + line.len(), } }); - Ok(MietteSpanContents::new( - &input[starting_offset..offset], - (starting_offset, offset - starting_offset).into(), - start_line, - if context_lines_before == 0 { - start_column - } else { - 0 - }, - line_count, - )) - } else { - Err(MietteError::OutOfBounds) + + // First line handled separately because otherwise, we can't distinguish + // between `None` from an empty document (which we still want to handle as + // a "single empty line"), and `None` from the end of the document. + let mut line_starts = VecDeque::new(); + let mut line_info = match iter.next() { + None => LineInfo { + line_no: 0, + start: 0, + end: 0, + }, + Some(info) => info, + }; + line_starts.push_back(line_info); + + // Get the "before" lines (including the line containing the start + // of the span) + while span.offset() >= line_info.end { + line_info = match iter.next() { + None => break, + Some(info) => info, + }; + + if line_starts.len() > context_lines_before { + line_starts.pop_front(); + } + line_starts.push_back(line_info); } + let (start_lineno, start_offset, start_column) = { + let start_info = line_starts.pop_front().unwrap(); + if context_lines_before > 0 { + (start_info.line_no, start_info.start, 0) + } else { + ( + start_info.line_no, + span.offset(), + span.offset() - start_info.start, + ) + } + }; + + // Find the end of the span + while span.offset() + span.len() > line_info.end { + line_info = match iter.next() { + None => break, + Some(info) => info, + }; + } + + // Get the "after" lines + if let Some(last) = iter.take(context_lines_after).last() { + line_info = last; + } + if span.offset() + span.len() > line_info.end { + return Err(MietteError::OutOfBounds); + } + let (end_lineno, end_offset) = if context_lines_after > 0 { + (line_info.line_no, line_info.end) + } else { + (line_info.line_no, span.offset() + span.len()) + }; + + Ok(MietteSpanContents::new( + &input[start_offset..end_offset], + (start_offset..end_offset).into(), + start_lineno, + start_column, + end_lineno - start_lineno + 1, + )) } impl SourceCode for [u8] { @@ -206,8 +218,10 @@ mod tests { let src = String::from("foo\n"); let contents = src.read_span(&(0, 4).into(), 0, 0)?; assert_eq!("foo\n", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((0, 4)), *contents.span()); assert_eq!(0, contents.line()); assert_eq!(0, contents.column()); + assert_eq!(1, contents.line_count()); Ok(()) } @@ -216,8 +230,10 @@ mod tests { let src = String::from("foobar"); let contents = src.read_span(&(3, 3).into(), 1, 1)?; assert_eq!("foobar", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((0, 6)), *contents.span()); assert_eq!(0, contents.line()); assert_eq!(0, contents.column()); + assert_eq!(1, contents.line_count()); Ok(()) } @@ -226,8 +242,10 @@ mod tests { let src = String::from("foo\nbar\nbaz\n"); let contents = src.read_span(&(4, 4).into(), 0, 0)?; assert_eq!("bar\n", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((4, 4)), *contents.span()); assert_eq!(1, contents.line()); assert_eq!(0, contents.column()); + assert_eq!(1, contents.line_count()); Ok(()) } @@ -236,8 +254,58 @@ mod tests { let src = String::from("foo\nbarbar\nbaz\n"); let contents = src.read_span(&(7, 4).into(), 0, 0)?; assert_eq!("bar\n", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((7, 4)), *contents.span()); assert_eq!(1, contents.line()); assert_eq!(3, contents.column()); + assert_eq!(1, contents.line_count()); + Ok(()) + } + + #[test] + fn end_of_line_before_newline() -> Result<(), MietteError> { + let src = String::from("foo\nbar\nbaz\n"); + let contents = src.read_span(&(7, 0).into(), 0, 0)?; + assert_eq!("", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((7, 0)), *contents.span()); + assert_eq!(1, contents.line()); + assert_eq!(3, contents.column()); + assert_eq!(1, contents.line_count()); + Ok(()) + } + + #[test] + fn end_of_line_after_newline() -> Result<(), MietteError> { + let src = String::from("foo\nbar\nbaz\n"); + let contents = src.read_span(&(8, 0).into(), 0, 0)?; + assert_eq!("", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((8, 0)), *contents.span()); + assert_eq!(2, contents.line()); + assert_eq!(0, contents.column()); + assert_eq!(1, contents.line_count()); + Ok(()) + } + + #[test] + fn end_of_file_with_newline() -> Result<(), MietteError> { + let src = String::from("foo\nbar\nbaz\n"); + let contents = src.read_span(&(12, 0).into(), 0, 0)?; + assert_eq!("", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((12, 0)), *contents.span()); + assert_eq!(2, contents.line()); + assert_eq!(4, contents.column()); + assert_eq!(1, contents.line_count()); + Ok(()) + } + + #[test] + fn end_of_file_without_newline() -> Result<(), MietteError> { + let src = String::from("foo\nbar\nbaz"); + let contents = src.read_span(&(11, 0).into(), 0, 0)?; + assert_eq!("", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((11, 0)), *contents.span()); + assert_eq!(2, contents.line()); + assert_eq!(3, contents.column()); + assert_eq!(1, contents.line_count()); Ok(()) } @@ -246,8 +314,10 @@ mod tests { let src = String::from("foo\r\nbar\r\nbaz\r\n"); let contents = src.read_span(&(5, 5).into(), 0, 0)?; assert_eq!("bar\r\n", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((5, 5)), *contents.span()); assert_eq!(1, contents.line()); assert_eq!(0, contents.column()); + assert_eq!(1, contents.line_count()); Ok(()) } @@ -259,8 +329,10 @@ mod tests { "foo\nbar\nbaz\n", std::str::from_utf8(contents.data()).unwrap() ); + assert_eq!(SourceSpan::from((4, 12)), *contents.span()); assert_eq!(1, contents.line()); assert_eq!(0, contents.column()); + assert_eq!(3, contents.line_count()); Ok(()) } @@ -272,8 +344,10 @@ mod tests { "\nfoo\nbar\nbaz\n\n", std::str::from_utf8(contents.data()).unwrap() ); + assert_eq!(SourceSpan::from((8, 14)), *contents.span()); assert_eq!(2, contents.line()); assert_eq!(0, contents.column()); + assert_eq!(5, contents.line_count()); let span: SourceSpan = (8, 14).into(); assert_eq!(&span, contents.span()); Ok(()) @@ -287,10 +361,49 @@ mod tests { "one\ntwo\n\n", std::str::from_utf8(contents.data()).unwrap() ); + assert_eq!(SourceSpan::from((0, 9)), *contents.span()); assert_eq!(0, contents.line()); assert_eq!(0, contents.column()); let span: SourceSpan = (0, 9).into(); assert_eq!(&span, contents.span()); + assert_eq!(3, contents.line_count()); Ok(()) } + + #[test] + fn empty_source() -> Result<(), MietteError> { + let src = String::from(""); + + let contents = src.read_span(&(0, 0).into(), 0, 0)?; + assert_eq!("", std::str::from_utf8(contents.data()).unwrap()); + assert_eq!(SourceSpan::from((0, 0)), *contents.span()); + assert_eq!(0, contents.line()); + assert_eq!(0, contents.column()); + assert_eq!(1, contents.line_count()); + + Ok(()) + } + + #[test] + fn empty_source_out_of_bounds() { + let src = String::from(""); + + let contents = src.read_span(&(0, 1).into(), 0, 0); + assert!(matches!(contents, Err(MietteError::OutOfBounds))); + + let contents = src.read_span(&(0, 2).into(), 0, 0); + assert!(matches!(contents, Err(MietteError::OutOfBounds))); + + let contents = src.read_span(&(1, 0).into(), 0, 0); + assert!(matches!(contents, Err(MietteError::OutOfBounds))); + + let contents = src.read_span(&(1, 1).into(), 0, 0); + assert!(matches!(contents, Err(MietteError::OutOfBounds))); + + let contents = src.read_span(&(2, 0).into(), 0, 0); + assert!(matches!(contents, Err(MietteError::OutOfBounds))); + + let contents = src.read_span(&(2, 1).into(), 0, 0); + assert!(matches!(contents, Err(MietteError::OutOfBounds))); + } }