diff --git a/Cargo.toml b/Cargo.toml index 702a559..ec5ae2d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ serde_derive = "1.0.130" serde_json = "1.0.68" sha1 = "0.10.5" sha2 = "0.10.6" -ssri = "8.1.0" +ssri = "9.0.0" tempfile = "3.4.0" thiserror = "1.0.40" tokio = { version = "1.12.0", features = [ diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index c27c9b1..aa84f68 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -104,6 +104,17 @@ fn read_hash_sync(c: &mut Criterion) { }); } +fn read_hash_sync_xxh3(c: &mut Criterion) { + let tmp = tempfile::tempdir().unwrap(); + let cache = tmp.path().to_owned(); + let data = b"hello world".to_vec(); + let sri = + cacache::write_sync_with_algo(cacache::Algorithm::Xxh3, &cache, "hello", data).unwrap(); + c.bench_function("get::data_hash_sync::xxh3", move |b| { + b.iter(|| cacache::read_hash_sync(black_box(&cache), black_box(&sri)).unwrap()) + }); +} + fn read_hash_many_sync(c: &mut Criterion) { let tmp = tempfile::tempdir().unwrap(); let cache = tmp.path().to_owned(); @@ -124,6 +135,28 @@ fn read_hash_many_sync(c: &mut Criterion) { }); } +fn read_hash_many_sync_xxh3(c: &mut Criterion) { + let tmp = tempfile::tempdir().unwrap(); + let cache = tmp.path().to_owned(); + let data: Vec<_> = (0..) + .take(NUM_REPEATS) + .map(|i| format!("test_file_{i}")) + .collect(); + let sris: Vec<_> = data + .iter() + .map(|datum| { + cacache::write_sync_with_algo(cacache::Algorithm::Xxh3, &cache, "hello", datum).unwrap() + }) + .collect(); + c.bench_function("get::data_hash_many_sync::xxh3", move |b| { + b.iter(|| { + for sri in sris.iter() { + cacache::read_hash_sync(black_box(&cache), black_box(sri)).unwrap(); + } + }) + }); +} + fn read_sync(c: &mut Criterion) { let tmp = tempfile::tempdir().unwrap(); let cache = tmp.path().to_owned(); @@ -144,6 +177,17 @@ fn read_hash_sync_big_data(c: &mut Criterion) { }); } +fn read_hash_sync_big_data_xxh3(c: &mut Criterion) { + let tmp = tempfile::tempdir().unwrap(); + let cache = tmp.path().to_owned(); + let data = vec![1; 1024 * 1024 * 5]; + let sri = + cacache::write_sync_with_algo(cacache::Algorithm::Xxh3, &cache, "hello", data).unwrap(); + c.bench_function("get_hash_big_data::xxh3", move |b| { + b.iter(|| cacache::read_hash_sync(black_box(&cache), black_box(&sri)).unwrap()) + }); +} + fn read_hash_many_async(c: &mut Criterion) { let tmp = tempfile::tempdir().unwrap(); let cache = tmp.path().to_owned(); @@ -195,6 +239,38 @@ fn read_hash_async_big_data(c: &mut Criterion) { }); } +fn write_hash(c: &mut Criterion) { + let tmp = tempfile::tempdir().unwrap(); + let cache = tmp.path().to_owned(); + c.bench_function("put::data::sync", move |b| { + b.iter_custom(|iters| { + let start = std::time::Instant::now(); + for i in 0..iters { + cacache::write_hash_sync(&cache, format!("hello world{i}")).unwrap(); + } + start.elapsed() + }) + }); +} + +fn write_hash_xxh3(c: &mut Criterion) { + let tmp = tempfile::tempdir().unwrap(); + let cache = tmp.path().to_owned(); + c.bench_function("put::data::sync::xxh3", move |b| { + b.iter_custom(|iters| { + let start = std::time::Instant::now(); + for i in 0..iters { + cacache::write_hash_sync_with_algo( + cacache::Algorithm::Xxh3, + &cache, + format!("hello world{i}"), + ) + .unwrap(); + } + start.elapsed() + }) + }); +} fn write_hash_async(c: &mut Criterion) { let tmp = tempfile::tempdir().unwrap(); let cache = tmp.path().to_owned(); @@ -209,6 +285,25 @@ fn write_hash_async(c: &mut Criterion) { }); } +fn write_hash_async_xxh3(c: &mut Criterion) { + let tmp = tempfile::tempdir().unwrap(); + let cache = tmp.path().to_owned(); + c.bench_function("put::data::xxh3", move |b| { + b.iter_custom(|iters| { + let start = std::time::Instant::now(); + for i in 0..iters { + block_on(cacache::write_hash_with_algo( + cacache::Algorithm::Xxh3, + &cache, + format!("hello world{i}"), + )) + .unwrap(); + } + start.elapsed() + }) + }); +} + #[cfg(feature = "link_to")] fn create_tmpfile(tmp: &tempfile::TempDir, buf: &[u8]) -> PathBuf { let dir = tmp.path().to_owned(); @@ -294,12 +389,18 @@ criterion_group!( read_hash_async, read_hash_many_async, read_async, + write_hash, + write_hash_xxh3, write_hash_async, + write_hash_async_xxh3, read_hash_sync, + read_hash_sync_xxh3, read_hash_many_sync, + read_hash_many_sync_xxh3, read_sync, read_hash_async_big_data, - read_hash_sync_big_data + read_hash_sync_big_data, + read_hash_sync_big_data_xxh3, ); #[cfg(feature = "link_to")] diff --git a/src/put.rs b/src/put.rs index 15a3d2f..2da3004 100644 --- a/src/put.rs +++ b/src/put.rs @@ -31,9 +31,36 @@ where D: AsRef<[u8]>, K: AsRef, { - async fn inner(cache: &Path, key: &str, data: &[u8]) -> Result { + write_with_algo(Algorithm::Sha256, cache, key, data).await +} + +/// Writes `data` to the `cache`, indexing it under `key`. Use this function +/// to customize the hashing algorithm. +/// +/// ## Example +/// ```no_run +/// use async_attributes; +/// +/// #[async_attributes::main] +/// async fn main() -> cacache::Result<()> { +/// cacache::write_with_algo(cacache::Algorithm::Xxh3, "./my-cache", "my-key", b"hello").await?; +/// Ok(()) +/// } +/// ``` +pub async fn write_with_algo( + algo: Algorithm, + cache: P, + key: K, + data: D, +) -> Result +where + P: AsRef, + D: AsRef<[u8]>, + K: AsRef, +{ + async fn inner(algo: Algorithm, cache: &Path, key: &str, data: &[u8]) -> Result { let mut writer = WriteOpts::new() - .algorithm(Algorithm::Sha256) + .algorithm(algo) .size(data.len()) .open(cache, key) .await?; @@ -42,9 +69,8 @@ where })?; writer.commit().await } - inner(cache.as_ref(), key.as_ref(), data.as_ref()).await + inner(algo, cache.as_ref(), key.as_ref(), data.as_ref()).await } - /// Writes `data` to the `cache`, skipping associating an index key with it. /// /// ## Example @@ -62,9 +88,30 @@ where P: AsRef, D: AsRef<[u8]>, { - async fn inner(cache: &Path, data: &[u8]) -> Result { + write_hash_with_algo(Algorithm::Sha256, cache, data).await +} + +/// Writes `data` to the `cache`, skipping associating an index key with it. +/// Use this to customize the hashing algorithm. +/// +/// ## Example +/// ```no_run +/// use async_attributes; +/// +/// #[async_attributes::main] +/// async fn main() -> cacache::Result<()> { +/// cacache::write_hash_with_algo(cacache::Algorithm::Xxh3, "./my-cache", b"hello").await?; +/// Ok(()) +/// } +/// ``` +pub async fn write_hash_with_algo(algo: Algorithm, cache: P, data: D) -> Result +where + P: AsRef, + D: AsRef<[u8]>, +{ + async fn inner(algo: Algorithm, cache: &Path, data: &[u8]) -> Result { let mut writer = WriteOpts::new() - .algorithm(Algorithm::Sha256) + .algorithm(algo) .size(data.len()) .open_hash(cache) .await?; @@ -74,9 +121,8 @@ where .with_context(|| format!("Failed to write to cache data for cache at {cache:?}"))?; writer.commit().await } - inner(cache.as_ref(), data.as_ref()).await + inner(algo, cache.as_ref(), data.as_ref()).await } - /// A reference to an open file writing to the cache. pub struct Writer { cache: PathBuf, @@ -137,13 +183,35 @@ impl Writer { P: AsRef, K: AsRef, { - async fn inner(cache: &Path, key: &str) -> Result { - WriteOpts::new() - .algorithm(Algorithm::Sha256) - .open(cache, key) - .await + Self::create_with_algo(Algorithm::Sha256, cache, key).await + } + + /// Creates a new writable file handle into the cache. Use this to + /// customize the algorithm used for hashing. + /// + /// ## Example + /// ```no_run + /// use async_attributes; + /// use async_std::prelude::*; + /// + /// #[async_attributes::main] + /// async fn main() -> cacache::Result<()> { + /// let mut fd = cacache::Writer::create_with_algo(cacache::Algorithm::Xxh3, "./my-cache", "my-key").await?; + /// fd.write_all(b"hello world").await.expect("Failed to write to cache"); + /// // Data is not saved into the cache until you commit it. + /// fd.commit().await?; + /// Ok(()) + /// } + /// ``` + pub async fn create_with_algo(algo: Algorithm, cache: P, key: K) -> Result + where + P: AsRef, + K: AsRef, + { + async fn inner(algo: Algorithm, cache: &Path, key: &str) -> Result { + WriteOpts::new().algorithm(algo).open(cache, key).await } - inner(cache.as_ref(), key.as_ref()).await + inner(algo, cache.as_ref(), key.as_ref()).await } /// Closes the Writer handle and writes content and index entries. Also @@ -190,15 +258,41 @@ where D: AsRef<[u8]>, K: AsRef, { - fn inner(cache: &Path, key: &str, data: &[u8]) -> Result { - let mut writer = SyncWriter::create(cache, key)?; + write_sync_with_algo(Algorithm::Sha256, cache, key, data) +} + +/// Writes `data` to the `cache` synchronously, indexing it under `key`. Use +/// this to customize the hashing algorithm. +/// +/// ## Example +/// ```no_run +/// use std::io::Read; +/// +/// fn main() -> cacache::Result<()> { +/// let data = cacache::write_sync_with_algo(cacache::Algorithm::Xxh3, "./my-cache", "my-key", b"hello")?; +/// Ok(()) +/// } +/// ``` +pub fn write_sync_with_algo( + algo: Algorithm, + cache: P, + key: K, + data: D, +) -> Result +where + P: AsRef, + D: AsRef<[u8]>, + K: AsRef, +{ + fn inner(algo: Algorithm, cache: &Path, key: &str, data: &[u8]) -> Result { + let mut writer = SyncWriter::create_with_algo(algo, cache, key)?; writer.write_all(data).with_context(|| { format!("Failed to write to cache data for key {key} for cache at {cache:?}") })?; writer.written = data.as_ref().len(); writer.commit() } - inner(cache.as_ref(), key.as_ref(), data.as_ref()) + inner(algo, cache.as_ref(), key.as_ref(), data.as_ref()) } /// Writes `data` to the `cache` synchronously, skipping associating a key with it. @@ -217,9 +311,28 @@ where P: AsRef, D: AsRef<[u8]>, { - fn inner(cache: &Path, data: &[u8]) -> Result { + write_hash_sync_with_algo(Algorithm::Sha256, cache, data) +} + +/// Writes `data` to the `cache` synchronously, skipping associating a key with it. +/// +/// ## Example +/// ```no_run +/// use std::io::Read; +/// +/// fn main() -> cacache::Result<()> { +/// let data = cacache::write_hash_sync_with_algo(cacache::Algorithm::Xxh3, "./my-cache", b"hello")?; +/// Ok(()) +/// } +/// ``` +pub fn write_hash_sync_with_algo(algo: Algorithm, cache: P, data: D) -> Result +where + P: AsRef, + D: AsRef<[u8]>, +{ + fn inner(algo: Algorithm, cache: &Path, data: &[u8]) -> Result { let mut writer = WriteOpts::new() - .algorithm(Algorithm::Sha256) + .algorithm(algo) .size(data.len()) .open_hash_sync(cache)?; writer @@ -228,9 +341,8 @@ where writer.written = data.len(); writer.commit() } - inner(cache.as_ref(), data.as_ref()) + inner(algo, cache.as_ref(), data.as_ref()) } - /// Builder for options and flags for opening a new cache file to write data into. #[derive(Clone, Default)] pub struct WriteOpts { @@ -426,6 +538,31 @@ impl SyncWriter { inner(cache.as_ref(), key.as_ref()) } + /// Creates a new writable file handle into the cache. Use this to + /// customize the hashing algorithm. + /// + /// ## Example + /// ```no_run + /// use std::io::prelude::*; + /// + /// fn main() -> cacache::Result<()> { + /// let mut fd = cacache::SyncWriter::create_with_algo(cacache::Algorithm::Xxh3, "./my-cache", "my-key")?; + /// fd.write_all(b"hello world").expect("Failed to write to cache"); + /// // Data is not saved into the cache until you commit it. + /// fd.commit()?; + /// Ok(()) + /// } + /// ``` + pub fn create_with_algo(algo: Algorithm, cache: P, key: K) -> Result + where + P: AsRef, + K: AsRef, + { + fn inner(algo: Algorithm, cache: &Path, key: &str) -> Result { + WriteOpts::new().algorithm(algo).open_sync(cache, key) + } + inner(algo, cache.as_ref(), key.as_ref()) + } /// Closes the Writer handle and writes content and index entries. Also /// verifies data against `size` and `integrity` options, if provided. /// Must be called manually in order to complete the writing process,