fix(write): set tmpfile length in async writer (#35)

Fixes: https://github.com/zkat/cacache-rs/issues/34

The async `poll_write()` implementation was creating a tempfile as
a backing for its inner mmap, but it was failing to set the length
on the file to match the incoming data. Compare with the sync
implementation!

This bug was exposed when the `memmap2` crate was swapped in
for `memmap`. The older crate was likely more lax about this.

Wrote a pair of new tests for `cacache::write_hash_sync` and
`cacache::write_hash`. The async test fails without this change, as
does any benchmarks run. Everything passes with it.

BREAKING CHANGE: This commit also bumps the MSRV for cacache to 1.66.1.
This commit is contained in:
C J Silverio 2023-01-23 17:37:34 -08:00 committed by GitHub
parent 1b29e7435c
commit 6d84ff0aed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 46 additions and 15 deletions

View File

@ -28,7 +28,7 @@ jobs:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
rust: [1.54.0, stable] rust: [1.66.1, stable]
os: [ubuntu-latest, macOS-latest, windows-latest] os: [ubuntu-latest, macOS-latest, windows-latest]
steps: steps:

View File

@ -2,7 +2,7 @@
name = "cacache" name = "cacache"
version = "10.0.2-alpha.0" version = "10.0.2-alpha.0"
authors = ["Kat Marchán <kzm@zkat.tech>"] authors = ["Kat Marchán <kzm@zkat.tech>"]
edition = "2018" edition = "2021"
description = "Content-addressable, key-value, high-performance, on-disk cache." description = "Content-addressable, key-value, high-performance, on-disk cache."
license = "Apache-2.0" license = "Apache-2.0"
repository = "https://github.com/zkat/cacache-rs" repository = "https://github.com/zkat/cacache-rs"

View File

@ -26,7 +26,7 @@ fn baseline_read_many_sync(c: &mut Criterion) {
.collect(); .collect();
let data = b"hello world"; let data = b"hello world";
for path in paths.iter() { for path in paths.iter() {
let mut fd = File::create(&path).unwrap(); let mut fd = File::create(path).unwrap();
fd.write_all(data).unwrap(); fd.write_all(data).unwrap();
drop(fd); drop(fd);
} }
@ -59,7 +59,7 @@ fn baseline_read_many_async(c: &mut Criterion) {
.collect(); .collect();
let data = b"hello world"; let data = b"hello world";
for path in paths.iter() { for path in paths.iter() {
let mut fd = File::create(&path).unwrap(); let mut fd = File::create(path).unwrap();
fd.write_all(data).unwrap(); fd.write_all(data).unwrap();
drop(fd); drop(fd);
} }

View File

@ -70,7 +70,7 @@ pub async fn open_async(cache: &Path, sri: Integrity) -> Result<AsyncReader> {
pub fn read(cache: &Path, sri: &Integrity) -> Result<Vec<u8>> { pub fn read(cache: &Path, sri: &Integrity) -> Result<Vec<u8>> {
let cpath = path::content_path(cache, sri); let cpath = path::content_path(cache, sri);
let ret = fs::read(&cpath).to_internal()?; let ret = fs::read(cpath).to_internal()?;
sri.check(&ret)?; sri.check(&ret)?;
Ok(ret) Ok(ret)
} }

View File

@ -121,11 +121,12 @@ impl AsyncWriter {
.create(&tmp_path) .create(&tmp_path)
.await .await
.to_internal()?; .to_internal()?;
let tmpfile = task::spawn_blocking(|| NamedTempFile::new_in(tmp_path)) let mut tmpfile = task::spawn_blocking(|| NamedTempFile::new_in(tmp_path))
.await .await
.to_internal()?; .to_internal()?;
let mmap = if let Some(size) = size { let mmap = if let Some(size) = size {
if size <= MAX_MMAP_SIZE { if size <= MAX_MMAP_SIZE {
tmpfile.as_file_mut().set_len(size as u64).to_internal()?;
unsafe { MmapMut::map_mut(tmpfile.as_file()).ok() } unsafe { MmapMut::map_mut(tmpfile.as_file()).ok() }
} else { } else {
None None

View File

@ -169,7 +169,7 @@ pub async fn read_hash<P>(cache: P, sri: &Integrity) -> Result<Vec<u8>>
where where
P: AsRef<Path>, P: AsRef<Path>,
{ {
Ok(read::read_async(cache.as_ref(), sri).await?) read::read_async(cache.as_ref(), sri).await
} }
/// Copies cache data to a specified location. Returns the number of bytes /// Copies cache data to a specified location. Returns the number of bytes
@ -235,7 +235,7 @@ where
P: AsRef<Path>, P: AsRef<Path>,
K: AsRef<str>, K: AsRef<str>,
{ {
Ok(index::find_async(cache.as_ref(), key.as_ref()).await?) index::find_async(cache.as_ref(), key.as_ref()).await
} }
/// Returns true if the given hash exists in the cache. /// Returns true if the given hash exists in the cache.
@ -584,9 +584,9 @@ mod tests {
let tmp = tempfile::tempdir().unwrap(); let tmp = tempfile::tempdir().unwrap();
let dir = tmp.path(); let dir = tmp.path();
let dest = dir.join("data"); let dest = dir.join("data");
crate::write_sync(&dir, "my-key", b"hello world").unwrap(); crate::write_sync(dir, "my-key", b"hello world").unwrap();
crate::copy_sync(&dir, "my-key", &dest).unwrap(); crate::copy_sync(dir, "my-key", &dest).unwrap();
let data = fs::read(&dest).unwrap(); let data = fs::read(&dest).unwrap();
assert_eq!(data, b"hello world"); assert_eq!(data, b"hello world");
} }
@ -596,9 +596,9 @@ mod tests {
let tmp = tempfile::tempdir().unwrap(); let tmp = tempfile::tempdir().unwrap();
let dir = tmp.path(); let dir = tmp.path();
let dest = dir.join("data"); let dest = dir.join("data");
let sri = crate::write_sync(&dir, "my-key", b"hello world").unwrap(); let sri = crate::write_sync(dir, "my-key", b"hello world").unwrap();
crate::copy_hash_sync(&dir, &sri, &dest).unwrap(); crate::copy_hash_sync(dir, &sri, &dest).unwrap();
let data = fs::read(&dest).unwrap(); let data = fs::read(&dest).unwrap();
assert_eq!(data, b"hello world"); assert_eq!(data, b"hello world");
} }

View File

@ -267,13 +267,13 @@ fn bucket_path(cache: &Path, key: &str) -> PathBuf {
fn hash_key(key: &str) -> String { fn hash_key(key: &str) -> String {
let mut hasher = Sha1::new(); let mut hasher = Sha1::new();
hasher.update(&key); hasher.update(key);
hex::encode(hasher.finalize()) hex::encode(hasher.finalize())
} }
fn hash_entry(key: &str) -> String { fn hash_entry(key: &str) -> String {
let mut hasher = Sha256::new(); let mut hasher = Sha256::new();
hasher.update(&key); hasher.update(key);
hex::encode(hasher.finalize()) hex::encode(hasher.finalize())
} }

View File

@ -440,4 +440,34 @@ mod tests {
let data = crate::read_sync(&dir, "hello").unwrap(); let data = crate::read_sync(&dir, "hello").unwrap();
assert_eq!(data, b"hello"); assert_eq!(data, b"hello");
} }
#[test]
fn hash_write_sync() {
let tmp = tempfile::tempdir().unwrap();
let dir = tmp.path().to_owned();
let original = format!("hello world{}", 5);
let integrity = crate::write_hash_sync(&dir, &original)
.expect("should be able to write a hash synchronously");
let bytes = crate::read_hash_sync(&dir, &integrity)
.expect("should be able to read the data we just wrote");
let result =
String::from_utf8(bytes).expect("we wrote valid utf8 but did not read valid utf8 back");
assert_eq!(result, original, "we did not read back what we wrote");
}
#[async_attributes::test]
async fn hash_write_async() {
let tmp = tempfile::tempdir().unwrap();
let dir = tmp.path().to_owned();
let original = format!("hello world{}", 12);
let integrity = crate::write_hash(&dir, &original)
.await
.expect("should be able to write a hash asynchronously");
let bytes = crate::read_hash(&dir, &integrity)
.await
.expect("should be able to read back what we wrote");
let result =
String::from_utf8(bytes).expect("we wrote valid utf8 but did not read valid utf8 back");
assert_eq!(result, original, "we did not read back what we wrote");
}
} }

View File

@ -66,7 +66,7 @@ where
/// } /// }
/// ``` /// ```
pub async fn remove_hash<P: AsRef<Path>>(cache: P, sri: &Integrity) -> Result<()> { pub async fn remove_hash<P: AsRef<Path>>(cache: P, sri: &Integrity) -> Result<()> {
Ok(rm::rm_async(cache.as_ref(), sri).await?) rm::rm_async(cache.as_ref(), sri).await
} }
/// Removes entire contents of the cache, including temporary files, the entry /// Removes entire contents of the cache, including temporary files, the entry