From 2af391efc58028aea56c202306e147f5a755f9f6 Mon Sep 17 00:00:00 2001 From: Rob Ede Date: Sat, 23 Nov 2024 23:41:15 +0000 Subject: [PATCH] feat(bytestring): split_at method --- bytestring/CHANGES.md | 1 + bytestring/Cargo.toml | 2 +- bytestring/src/lib.rs | 55 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 55 insertions(+), 3 deletions(-) diff --git a/bytestring/CHANGES.md b/bytestring/CHANGES.md index 8a24680f..d0bfff33 100644 --- a/bytestring/CHANGES.md +++ b/bytestring/CHANGES.md @@ -2,6 +2,7 @@ ## Unreleased +- Add `ByteString::split_at()` method. - Minimum supported Rust version (MSRV) is now 1.70. ## 1.3.1 diff --git a/bytestring/Cargo.toml b/bytestring/Cargo.toml index 63472205..2b0d719a 100644 --- a/bytestring/Cargo.toml +++ b/bytestring/Cargo.toml @@ -6,7 +6,7 @@ authors = [ "Nikolay Kim ", "Rob Ede ", ] -keywords = ["string", "bytes", "utf8", "web", "actix"] +keywords = ["string", "bytes", "utf8", "web", "bytestring"] categories = ["no-std", "web-programming"] homepage = "https://actix.rs" repository = "https://github.com/actix/actix-net" diff --git a/bytestring/src/lib.rs b/bytestring/src/lib.rs index 6cb164dc..4040401b 100644 --- a/bytestring/src/lib.rs +++ b/bytestring/src/lib.rs @@ -17,7 +17,7 @@ use core::{borrow::Borrow, fmt, hash, ops, str}; use bytes::Bytes; -/// An immutable UTF-8 encoded string with [`Bytes`] as a storage. +/// An immutable UTF-8 encoded string using [`Bytes`] as the storage. #[derive(Clone, Default, Eq, PartialOrd, Ord)] pub struct ByteString(Bytes); @@ -53,7 +53,29 @@ impl ByteString { Self(src) } - /// Returns a new byte string that is equivalent to the given `subset`. + /// Divides one bytestring into two at an index, returning both parts. + /// + /// # Panics + /// + /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is past the end of the last + /// code point of the bytestring. + pub fn split_at(&self, mid: usize) -> (ByteString, ByteString) { + let this: &str = self.as_ref(); + let _valid_midpoint_check = this.split_at(mid); + + let mut bytes = self.0.clone(); + let first = bytes.split_to(mid); + let last = bytes; + + unsafe { + ( + ByteString::from_bytes_unchecked(first), + ByteString::from_bytes_unchecked(last), + ) + } + } + + /// Returns a new `ByteString` that is equivalent to the given `subset`. /// /// When processing a `ByteString` buffer with other tools, one often gets a `&str` which is in /// fact a slice of the original `ByteString`; i.e., a subset of it. This function turns that @@ -465,4 +487,33 @@ mod test { // being a logical subset of the string ByteString::from_static("foo bar").slice_ref("foo"); } + + #[test] + fn split_at() { + let buf = ByteString::from_static("foo bar"); + + let (first, last) = buf.split_at(0); + assert_eq!(ByteString::from_static(""), first); + assert_eq!(ByteString::from_static("foo bar"), last); + + let (first, last) = buf.split_at(4); + assert_eq!(ByteString::from_static("foo "), first); + assert_eq!(ByteString::from_static("bar"), last); + + let (first, last) = buf.split_at(7); + assert_eq!(ByteString::from_static("foo bar"), first); + assert_eq!(ByteString::from_static(""), last); + } + + #[test] + #[should_panic] + fn split_at_invalid_code_point() { + ByteString::from_static("ยต").split_at(1); + } + + #[test] + #[should_panic] + fn split_at_outside_string() { + ByteString::from_static("foo").split_at(9); + } }