fix(write): set tmpfile length in async writer (#35)

Fixes: https://github.com/zkat/cacache-rs/issues/34

The async `poll_write()` implementation was creating a tempfile as
a backing for its inner mmap, but it was failing to set the length
on the file to match the incoming data. Compare with the sync
implementation!

This bug was exposed when the `memmap2` crate was swapped in
for `memmap`. The older crate was likely more lax about this.

Wrote a pair of new tests for `cacache::write_hash_sync` and
`cacache::write_hash`. The async test fails without this change, as
does any benchmarks run. Everything passes with it.

BREAKING CHANGE: This commit also bumps the MSRV for cacache to 1.66.1.
This commit is contained in:
C J Silverio 2023-01-23 17:37:34 -08:00 committed by GitHub
parent 1b29e7435c
commit 6d84ff0aed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 46 additions and 15 deletions

View File

@ -28,7 +28,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
rust: [1.54.0, stable]
rust: [1.66.1, stable]
os: [ubuntu-latest, macOS-latest, windows-latest]
steps:

View File

@ -2,7 +2,7 @@
name = "cacache"
version = "10.0.2-alpha.0"
authors = ["Kat Marchán <kzm@zkat.tech>"]
edition = "2018"
edition = "2021"
description = "Content-addressable, key-value, high-performance, on-disk cache."
license = "Apache-2.0"
repository = "https://github.com/zkat/cacache-rs"

View File

@ -26,7 +26,7 @@ fn baseline_read_many_sync(c: &mut Criterion) {
.collect();
let data = b"hello world";
for path in paths.iter() {
let mut fd = File::create(&path).unwrap();
let mut fd = File::create(path).unwrap();
fd.write_all(data).unwrap();
drop(fd);
}
@ -59,7 +59,7 @@ fn baseline_read_many_async(c: &mut Criterion) {
.collect();
let data = b"hello world";
for path in paths.iter() {
let mut fd = File::create(&path).unwrap();
let mut fd = File::create(path).unwrap();
fd.write_all(data).unwrap();
drop(fd);
}

View File

@ -70,7 +70,7 @@ pub async fn open_async(cache: &Path, sri: Integrity) -> Result<AsyncReader> {
pub fn read(cache: &Path, sri: &Integrity) -> Result<Vec<u8>> {
let cpath = path::content_path(cache, sri);
let ret = fs::read(&cpath).to_internal()?;
let ret = fs::read(cpath).to_internal()?;
sri.check(&ret)?;
Ok(ret)
}

View File

@ -121,11 +121,12 @@ impl AsyncWriter {
.create(&tmp_path)
.await
.to_internal()?;
let tmpfile = task::spawn_blocking(|| NamedTempFile::new_in(tmp_path))
let mut tmpfile = task::spawn_blocking(|| NamedTempFile::new_in(tmp_path))
.await
.to_internal()?;
let mmap = if let Some(size) = size {
if size <= MAX_MMAP_SIZE {
tmpfile.as_file_mut().set_len(size as u64).to_internal()?;
unsafe { MmapMut::map_mut(tmpfile.as_file()).ok() }
} else {
None

View File

@ -169,7 +169,7 @@ pub async fn read_hash<P>(cache: P, sri: &Integrity) -> Result<Vec<u8>>
where
P: AsRef<Path>,
{
Ok(read::read_async(cache.as_ref(), sri).await?)
read::read_async(cache.as_ref(), sri).await
}
/// Copies cache data to a specified location. Returns the number of bytes
@ -235,7 +235,7 @@ where
P: AsRef<Path>,
K: AsRef<str>,
{
Ok(index::find_async(cache.as_ref(), key.as_ref()).await?)
index::find_async(cache.as_ref(), key.as_ref()).await
}
/// Returns true if the given hash exists in the cache.
@ -584,9 +584,9 @@ mod tests {
let tmp = tempfile::tempdir().unwrap();
let dir = tmp.path();
let dest = dir.join("data");
crate::write_sync(&dir, "my-key", b"hello world").unwrap();
crate::write_sync(dir, "my-key", b"hello world").unwrap();
crate::copy_sync(&dir, "my-key", &dest).unwrap();
crate::copy_sync(dir, "my-key", &dest).unwrap();
let data = fs::read(&dest).unwrap();
assert_eq!(data, b"hello world");
}
@ -596,9 +596,9 @@ mod tests {
let tmp = tempfile::tempdir().unwrap();
let dir = tmp.path();
let dest = dir.join("data");
let sri = crate::write_sync(&dir, "my-key", b"hello world").unwrap();
let sri = crate::write_sync(dir, "my-key", b"hello world").unwrap();
crate::copy_hash_sync(&dir, &sri, &dest).unwrap();
crate::copy_hash_sync(dir, &sri, &dest).unwrap();
let data = fs::read(&dest).unwrap();
assert_eq!(data, b"hello world");
}

View File

@ -267,13 +267,13 @@ fn bucket_path(cache: &Path, key: &str) -> PathBuf {
fn hash_key(key: &str) -> String {
let mut hasher = Sha1::new();
hasher.update(&key);
hasher.update(key);
hex::encode(hasher.finalize())
}
fn hash_entry(key: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(&key);
hasher.update(key);
hex::encode(hasher.finalize())
}

View File

@ -440,4 +440,34 @@ mod tests {
let data = crate::read_sync(&dir, "hello").unwrap();
assert_eq!(data, b"hello");
}
#[test]
fn hash_write_sync() {
let tmp = tempfile::tempdir().unwrap();
let dir = tmp.path().to_owned();
let original = format!("hello world{}", 5);
let integrity = crate::write_hash_sync(&dir, &original)
.expect("should be able to write a hash synchronously");
let bytes = crate::read_hash_sync(&dir, &integrity)
.expect("should be able to read the data we just wrote");
let result =
String::from_utf8(bytes).expect("we wrote valid utf8 but did not read valid utf8 back");
assert_eq!(result, original, "we did not read back what we wrote");
}
#[async_attributes::test]
async fn hash_write_async() {
let tmp = tempfile::tempdir().unwrap();
let dir = tmp.path().to_owned();
let original = format!("hello world{}", 12);
let integrity = crate::write_hash(&dir, &original)
.await
.expect("should be able to write a hash asynchronously");
let bytes = crate::read_hash(&dir, &integrity)
.await
.expect("should be able to read back what we wrote");
let result =
String::from_utf8(bytes).expect("we wrote valid utf8 but did not read valid utf8 back");
assert_eq!(result, original, "we did not read back what we wrote");
}
}

View File

@ -66,7 +66,7 @@ where
/// }
/// ```
pub async fn remove_hash<P: AsRef<Path>>(cache: P, sri: &Integrity) -> Result<()> {
Ok(rm::rm_async(cache.as_ref(), sri).await?)
rm::rm_async(cache.as_ref(), sri).await
}
/// Removes entire contents of the cache, including temporary files, the entry