Merge f7fa50d8de into 813232ba79

2024-06-13 16:18:44 +00:00 · 2024-06-13 16:18:44 +00:00 · 76d58e98e1
parent 813232ba79 f7fa50d8de
commit 76d58e98e1
3 changed files with 253 additions and 186 deletions
--- a/src/handlers/graphical.rs
+++ b/src/handlers/graphical.rs
@ -1192,54 +1192,30 @@ impl GraphicalReportHandler {
            .read_span(context_span, self.context_lines, self.context_lines)
            .map_err(|_| fmt::Error)?;
        let context = std::str::from_utf8(context_data.data()).expect("Bad utf8 detected");
-        let mut line = context_data.line();
-        let mut column = context_data.column();
-        let mut offset = context_data.span().offset();
-        let mut line_offset = offset;
-        let mut iter = context.chars().peekable();
-        let mut line_str = String::new();
-        let mut lines = Vec::new();
-        while let Some(char) = iter.next() {
-            offset += char.len_utf8();
-            let mut at_end_of_file = false;
-            match char {
-                '\r' => {
-                    if iter.next_if_eq(&'\n').is_some() {
-                        offset += 1;
-                        line += 1;
-                        column = 0;
-                    } else {
-                        line_str.push(char);
-                        column += 1;
-                    }
-                    at_end_of_file = iter.peek().is_none();
+        let lines = context
+            .split_inclusive('\n')
+            .enumerate()
+            .map(|(line_number, line)| {
+                let length = line.len();
+                // Strip the newline chars
+                let line = line
+                    .strip_suffix('\n')
+                    .and_then(|line| line.strip_suffix('\r').or(Some(line)))
+                    .unwrap_or(line);
+                // SAFETY:
+                // - it is safe to use `offset_from` on slices of an array per Rus design (max array size)
+                //   (https://doc.rust-lang.org/stable/reference/types/numeric.html#machine-dependent-integer-types)
+                // - since `line` is a slice of `context`, the offset cannot be negative either
+                Line {
+                    line_number: context_data.line() + line_number + 1,
+                    offset: context_data.span().offset()
+                        + unsafe { line.as_ptr().offset_from(context.as_ptr()) } as usize,
+                    length,
+                    text: line.to_string(),
                }
-                '\n' => {
-                    at_end_of_file = iter.peek().is_none();
-                    line += 1;
-                    column = 0;
-                }
-                _ => {
-                    line_str.push(char);
-                    column += 1;
-                }
-            }
+            })
+            .collect::<Vec<_>>();

-            if iter.peek().is_none() && !at_end_of_file {
-                line += 1;
-            }
-
-            if column == 0 || iter.peek().is_none() {
-                lines.push(Line {
-                    line_number: line,
-                    offset: line_offset,
-                    length: offset - line_offset,
-                    text: line_str.clone(),
-                });
-                line_str.clear();
-                line_offset = offset;
-            }
-        }
        Ok((context_data, lines))
    }
 }
@ -1284,28 +1260,27 @@ impl Line {
    /// Returns whether `span` should be visible on this line, either in the gutter or under the
    /// text on this line
    fn span_applies(&self, span: &FancySpan) -> bool {
-        let spanlen = if span.len() == 0 { 1 } else { span.len() };
-        // Span starts in this line
-
-        (span.offset() >= self.offset && span.offset() < self.offset + self.length)
-            // Span passes through this line
-            || (span.offset() < self.offset && span.offset() + spanlen > self.offset + self.length) //todo
-            // Span ends on this line
-            || (span.offset() + spanlen > self.offset && span.offset() + spanlen <= self.offset + self.length)
+        // A span applies if its start is strictly before the line's end,
+        // i.e. the span is not after the line, and its end is strictly after
+        // the line's start, i.e. the span is not before the line.
+        //
+        // One corner case: if the span length is 0, then the span also applies
+        // if its end is *at* the line's start, not just strictly after.
+        (span.offset() < self.offset + self.length)
+            && match span.len() == 0 {
+                true => (span.offset() + span.len()) >= self.offset,
+                false => (span.offset() + span.len()) > self.offset,
+            }
    }

    /// Returns whether `span` should be visible on this line in the gutter (so this excludes spans
    /// that are only visible on this line and do not span multiple lines)
    fn span_applies_gutter(&self, span: &FancySpan) -> bool {
-        let spanlen = if span.len() == 0 { 1 } else { span.len() };
-        // Span starts in this line
+        // The span must covers this line and at least one of its ends must be
+        // on another line
        self.span_applies(span)
-            && !(
-                // as long as it doesn't start *and* end on this line
-                (span.offset() >= self.offset && span.offset() < self.offset + self.length)
-                    && (span.offset() + spanlen > self.offset
-                        && span.offset() + spanlen <= self.offset + self.length)
-            )
+            && ((span.offset() < self.offset)
+                || ((span.offset() + span.len()) >= (self.offset + self.length)))
    }

    // A 'flyby' is a multi-line span that technically covers this line, but
@ -1328,8 +1303,7 @@ impl Line {
    // Does this line contain the *end* of this multiline span?
    // This assumes self.span_applies() is true already.
    fn span_ends(&self, span: &FancySpan) -> bool {
-        span.offset() + span.len() >= self.offset
-            && span.offset() + span.len() <= self.offset + self.length
+        span.offset() + span.len() <= self.offset + self.length
    }
 }

--- a/src/handlers/narratable.rs
+++ b/src/handlers/narratable.rs
@ -291,54 +291,34 @@ impl NarratableReportHandler {
            .read_span(context_span, self.context_lines, self.context_lines)
            .map_err(|_| fmt::Error)?;
        let context = std::str::from_utf8(context_data.data()).expect("Bad utf8 detected");
-        let mut line = context_data.line();
-        let mut column = context_data.column();
-        let mut offset = context_data.span().offset();
-        let mut line_offset = offset;
-        let mut iter = context.chars().peekable();
-        let mut line_str = String::new();
-        let mut lines = Vec::new();
-        while let Some(char) = iter.next() {
-            offset += char.len_utf8();
-            let mut at_end_of_file = false;
-            match char {
-                '\r' => {
-                    if iter.next_if_eq(&'\n').is_some() {
-                        offset += 1;
-                        line += 1;
-                        column = 0;
-                    } else {
-                        line_str.push(char);
-                        column += 1;
-                    }
-                    at_end_of_file = iter.peek().is_none();
-                }
-                '\n' => {
-                    at_end_of_file = iter.peek().is_none();
-                    line += 1;
-                    column = 0;
-                }
-                _ => {
-                    line_str.push(char);
-                    column += 1;
-                }
-            }

-            if iter.peek().is_none() && !at_end_of_file {
-                line += 1;
-            }
-
-            if column == 0 || iter.peek().is_none() {
-                lines.push(Line {
-                    line_number: line,
-                    offset: line_offset,
-                    text: line_str.clone(),
+        let lines = context
+            .split_inclusive('\n')
+            .enumerate()
+            .map(|(line_number, line)| {
+                // SAFETY:
+                // - it is safe to use `offset_from` on slices of an array per Rus design (max array size)
+                //   (https://doc.rust-lang.org/stable/reference/types/numeric.html#machine-dependent-integer-types)
+                // - since `line` is a slice of `context`, the offset cannot be negative either
+                let offset = unsafe { line.as_ptr().offset_from(context.as_ptr()) } as usize;
+                let length = line.len();
+                // Strip the newline chars
+                let line = line
+                    .strip_suffix('\n')
+                    .and_then(|line| line.strip_suffix('\r').or(Some(line)))
+                    .unwrap_or(line);
+                // End of the "file" if the end of the line is also the end of
+                // the context and we removed some characters (newline)
+                let at_end_of_file = (offset + length == context.len()) && (length != line.len());
+                Line {
+                    line_number: context_data.line() + line_number + 1,
+                    offset: context_data.span().offset() + offset,
+                    text: line.to_string(),
                    at_end_of_file,
-                });
-                line_str.clear();
-                line_offset = offset;
-            }
-        }
+                }
+            })
+            .collect::<Vec<_>>();
+
        Ok((context_data, lines))
    }
 }
--- a/src/source_impls.rs
+++ b/src/source_impls.rs
@ -5,91 +5,103 @@ use std::{borrow::Cow, collections::VecDeque, fmt::Debug, sync::Arc};

 use crate::{MietteError, MietteSpanContents, SourceCode, SourceSpan, SpanContents};

+#[derive(Clone, Copy)]
+struct LineInfo {
+    line_no: usize,
+    start: usize,
+    end: usize,
+}
+
 fn context_info<'a>(
    input: &'a [u8],
    span: &SourceSpan,
    context_lines_before: usize,
    context_lines_after: usize,
 ) -> Result<MietteSpanContents<'a>, MietteError> {
-    let mut offset = 0usize;
-    let mut line_count = 0usize;
-    let mut start_line = 0usize;
-    let mut start_column = 0usize;
-    let mut before_lines_starts = VecDeque::new();
-    let mut current_line_start = 0usize;
-    let mut end_lines = 0usize;
-    let mut post_span = false;
-    let mut post_span_got_newline = false;
-    let mut iter = input.iter().copied().peekable();
-    while let Some(char) = iter.next() {
-        if matches!(char, b'\r' | b'\n') {
-            line_count += 1;
-            if char == b'\r' && iter.next_if_eq(&b'\n').is_some() {
-                offset += 1;
-            }
-            if offset < span.offset() {
-                // We're before the start of the span.
-                start_column = 0;
-                before_lines_starts.push_back(current_line_start);
-                if before_lines_starts.len() > context_lines_before {
-                    start_line += 1;
-                    before_lines_starts.pop_front();
-                }
-            } else if offset >= span.offset() + span.len().saturating_sub(1) {
-                // We're after the end of the span, but haven't necessarily
-                // started collecting end lines yet (we might still be
-                // collecting context lines).
-                if post_span {
-                    start_column = 0;
-                    if post_span_got_newline {
-                        end_lines += 1;
-                    } else {
-                        post_span_got_newline = true;
-                    }
-                    if end_lines >= context_lines_after {
-                        offset += 1;
-                        break;
-                    }
-                }
-            }
-            current_line_start = offset + 1;
-        } else if offset < span.offset() {
-            start_column += 1;
-        }
-
-        if offset >= (span.offset() + span.len()).saturating_sub(1) {
-            post_span = true;
-            if end_lines >= context_lines_after {
-                offset += 1;
-                break;
-            }
-        }
-
-        offset += 1;
-    }
-
-    if offset >= (span.offset() + span.len()).saturating_sub(1) {
-        let starting_offset = before_lines_starts.front().copied().unwrap_or_else(|| {
-            if context_lines_before == 0 {
-                span.offset()
-            } else {
-                0
+    let mut iter = input
+        .split_inclusive(|b| *b == b'\n')
+        .enumerate()
+        .map(|(line_no, line)| {
+            // SAFETY:
+            // - it is safe to use `offset_from` on slices of an array per Rust design (max array size)
+            //   (https://doc.rust-lang.org/stable/reference/types/numeric.html#machine-dependent-integer-types)
+            // - since `line` is a slice of `input, the offset cannot be negative either
+            let offset = unsafe { line.as_ptr().offset_from(input.as_ptr()) } as usize;
+            LineInfo {
+                line_no,
+                start: offset,
+                end: offset + line.len(),
            }
        });
-        Ok(MietteSpanContents::new(
-            &input[starting_offset..offset],
-            (starting_offset, offset - starting_offset).into(),
-            start_line,
-            if context_lines_before == 0 {
-                start_column
-            } else {
-                0
-            },
-            line_count,
-        ))
-    } else {
-        Err(MietteError::OutOfBounds)
+
+    // First line handled separately because otherwise, we can't distinguish
+    // between `None` from an empty document (which we still want to handle as
+    // a "single empty line"), and `None` from the end of the document.
+    let mut line_starts = VecDeque::new();
+    let mut line_info = match iter.next() {
+        None => LineInfo {
+            line_no: 0,
+            start: 0,
+            end: 0,
+        },
+        Some(info) => info,
+    };
+    line_starts.push_back(line_info);
+
+    // Get the "before" lines (including the line containing the start
+    // of the span)
+    while span.offset() >= line_info.end {
+        line_info = match iter.next() {
+            None => break,
+            Some(info) => info,
+        };
+
+        if line_starts.len() > context_lines_before {
+            line_starts.pop_front();
+        }
+        line_starts.push_back(line_info);
    }
+    let (start_lineno, start_offset, start_column) = {
+        let start_info = line_starts.pop_front().unwrap();
+        if context_lines_before > 0 {
+            (start_info.line_no, start_info.start, 0)
+        } else {
+            (
+                start_info.line_no,
+                span.offset(),
+                span.offset() - start_info.start,
+            )
+        }
+    };
+
+    // Find the end of the span
+    while span.offset() + span.len() > line_info.end {
+        line_info = match iter.next() {
+            None => break,
+            Some(info) => info,
+        };
+    }
+
+    // Get the "after" lines
+    if let Some(last) = iter.take(context_lines_after).last() {
+        line_info = last;
+    }
+    if span.offset() + span.len() > line_info.end {
+        return Err(MietteError::OutOfBounds);
+    }
+    let (end_lineno, end_offset) = if context_lines_after > 0 {
+        (line_info.line_no, line_info.end)
+    } else {
+        (line_info.line_no, span.offset() + span.len())
+    };
+
+    Ok(MietteSpanContents::new(
+        &input[start_offset..end_offset],
+        (start_offset..end_offset).into(),
+        start_lineno,
+        start_column,
+        end_lineno - start_lineno + 1,
+    ))
 }

 impl SourceCode for [u8] {
@ -206,8 +218,10 @@ mod tests {
        let src = String::from("foo\n");
        let contents = src.read_span(&(0, 4).into(), 0, 0)?;
        assert_eq!("foo\n", std::str::from_utf8(contents.data()).unwrap());
+        assert_eq!(SourceSpan::from((0, 4)), *contents.span());
        assert_eq!(0, contents.line());
        assert_eq!(0, contents.column());
+        assert_eq!(1, contents.line_count());
        Ok(())
    }

@ -216,8 +230,10 @@ mod tests {
        let src = String::from("foobar");
        let contents = src.read_span(&(3, 3).into(), 1, 1)?;
        assert_eq!("foobar", std::str::from_utf8(contents.data()).unwrap());
+        assert_eq!(SourceSpan::from((0, 6)), *contents.span());
        assert_eq!(0, contents.line());
        assert_eq!(0, contents.column());
+        assert_eq!(1, contents.line_count());
        Ok(())
    }

@ -226,8 +242,10 @@ mod tests {
        let src = String::from("foo\nbar\nbaz\n");
        let contents = src.read_span(&(4, 4).into(), 0, 0)?;
        assert_eq!("bar\n", std::str::from_utf8(contents.data()).unwrap());
+        assert_eq!(SourceSpan::from((4, 4)), *contents.span());
        assert_eq!(1, contents.line());
        assert_eq!(0, contents.column());
+        assert_eq!(1, contents.line_count());
        Ok(())
    }

@ -236,8 +254,58 @@ mod tests {
        let src = String::from("foo\nbarbar\nbaz\n");
        let contents = src.read_span(&(7, 4).into(), 0, 0)?;
        assert_eq!("bar\n", std::str::from_utf8(contents.data()).unwrap());
+        assert_eq!(SourceSpan::from((7, 4)), *contents.span());
        assert_eq!(1, contents.line());
        assert_eq!(3, contents.column());
+        assert_eq!(1, contents.line_count());
+        Ok(())
+    }
+
+    #[test]
+    fn end_of_line_before_newline() -> Result<(), MietteError> {
+        let src = String::from("foo\nbar\nbaz\n");
+        let contents = src.read_span(&(7, 0).into(), 0, 0)?;
+        assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
+        assert_eq!(SourceSpan::from((7, 0)), *contents.span());
+        assert_eq!(1, contents.line());
+        assert_eq!(3, contents.column());
+        assert_eq!(1, contents.line_count());
+        Ok(())
+    }
+
+    #[test]
+    fn end_of_line_after_newline() -> Result<(), MietteError> {
+        let src = String::from("foo\nbar\nbaz\n");
+        let contents = src.read_span(&(8, 0).into(), 0, 0)?;
+        assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
+        assert_eq!(SourceSpan::from((8, 0)), *contents.span());
+        assert_eq!(2, contents.line());
+        assert_eq!(0, contents.column());
+        assert_eq!(1, contents.line_count());
+        Ok(())
+    }
+
+    #[test]
+    fn end_of_file_with_newline() -> Result<(), MietteError> {
+        let src = String::from("foo\nbar\nbaz\n");
+        let contents = src.read_span(&(12, 0).into(), 0, 0)?;
+        assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
+        assert_eq!(SourceSpan::from((12, 0)), *contents.span());
+        assert_eq!(2, contents.line());
+        assert_eq!(4, contents.column());
+        assert_eq!(1, contents.line_count());
+        Ok(())
+    }
+
+    #[test]
+    fn end_of_file_without_newline() -> Result<(), MietteError> {
+        let src = String::from("foo\nbar\nbaz");
+        let contents = src.read_span(&(11, 0).into(), 0, 0)?;
+        assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
+        assert_eq!(SourceSpan::from((11, 0)), *contents.span());
+        assert_eq!(2, contents.line());
+        assert_eq!(3, contents.column());
+        assert_eq!(1, contents.line_count());
        Ok(())
    }

@ -246,8 +314,10 @@ mod tests {
        let src = String::from("foo\r\nbar\r\nbaz\r\n");
        let contents = src.read_span(&(5, 5).into(), 0, 0)?;
        assert_eq!("bar\r\n", std::str::from_utf8(contents.data()).unwrap());
+        assert_eq!(SourceSpan::from((5, 5)), *contents.span());
        assert_eq!(1, contents.line());
        assert_eq!(0, contents.column());
+        assert_eq!(1, contents.line_count());
        Ok(())
    }

@ -259,8 +329,10 @@ mod tests {
            "foo\nbar\nbaz\n",
            std::str::from_utf8(contents.data()).unwrap()
        );
+        assert_eq!(SourceSpan::from((4, 12)), *contents.span());
        assert_eq!(1, contents.line());
        assert_eq!(0, contents.column());
+        assert_eq!(3, contents.line_count());
        Ok(())
    }

@ -272,8 +344,10 @@ mod tests {
            "\nfoo\nbar\nbaz\n\n",
            std::str::from_utf8(contents.data()).unwrap()
        );
+        assert_eq!(SourceSpan::from((8, 14)), *contents.span());
        assert_eq!(2, contents.line());
        assert_eq!(0, contents.column());
+        assert_eq!(5, contents.line_count());
        let span: SourceSpan = (8, 14).into();
        assert_eq!(&span, contents.span());
        Ok(())
@ -287,10 +361,49 @@ mod tests {
            "one\ntwo\n\n",
            std::str::from_utf8(contents.data()).unwrap()
        );
+        assert_eq!(SourceSpan::from((0, 9)), *contents.span());
        assert_eq!(0, contents.line());
        assert_eq!(0, contents.column());
        let span: SourceSpan = (0, 9).into();
        assert_eq!(&span, contents.span());
+        assert_eq!(3, contents.line_count());
        Ok(())
    }
+
+    #[test]
+    fn empty_source() -> Result<(), MietteError> {
+        let src = String::from("");
+
+        let contents = src.read_span(&(0, 0).into(), 0, 0)?;
+        assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
+        assert_eq!(SourceSpan::from((0, 0)), *contents.span());
+        assert_eq!(0, contents.line());
+        assert_eq!(0, contents.column());
+        assert_eq!(1, contents.line_count());
+
+        Ok(())
+    }
+
+    #[test]
+    fn empty_source_out_of_bounds() {
+        let src = String::from("");
+
+        let contents = src.read_span(&(0, 1).into(), 0, 0);
+        assert!(matches!(contents, Err(MietteError::OutOfBounds)));
+
+        let contents = src.read_span(&(0, 2).into(), 0, 0);
+        assert!(matches!(contents, Err(MietteError::OutOfBounds)));
+
+        let contents = src.read_span(&(1, 0).into(), 0, 0);
+        assert!(matches!(contents, Err(MietteError::OutOfBounds)));
+
+        let contents = src.read_span(&(1, 1).into(), 0, 0);
+        assert!(matches!(contents, Err(MietteError::OutOfBounds)));
+
+        let contents = src.read_span(&(2, 0).into(), 0, 0);
+        assert!(matches!(contents, Err(MietteError::OutOfBounds)));
+
+        let contents = src.read_span(&(2, 1).into(), 0, 0);
+        assert!(matches!(contents, Err(MietteError::OutOfBounds)));
+    }
 }