feat(reflink): Separate reflink behavior into their own functions (#58)

BREAKING CHANGE: some signatures for copy have changed, and copy no longer automatically reflinks
This commit is contained in:
Kat Marchán 2023-10-07 12:37:19 -07:00
parent 34ee679816
commit cfdf4eed4d
No known key found for this signature in database
GPG Key ID: AEB529C08A3C7E9E
6 changed files with 305 additions and 45 deletions

View File

@ -18,7 +18,7 @@ futures = { version = "0.3.17", optional = true }
hex = "0.4.3"
memmap2 = { version = "0.5.8", optional = true }
miette = "5.7.0"
reflink-copy = "0.1.5"
reflink-copy = "0.1.9"
serde = "1.0.130"
serde_derive = "1.0.130"
serde_json = "1.0.68"

View File

@ -317,7 +317,7 @@ fn write_hash_async_xxh3(c: &mut Criterion) {
fn create_tmpfile(tmp: &tempfile::TempDir, buf: &[u8]) -> PathBuf {
let dir = tmp.path().to_owned();
let target = dir.join("target-file");
std::fs::create_dir_all(target.parent().unwrap().clone()).unwrap();
std::fs::create_dir_all(&target.parent().unwrap()).unwrap();
let mut file = File::create(target.clone()).unwrap();
file.write_all(buf).unwrap();
file.flush().unwrap();

View File

@ -44,7 +44,7 @@ fn create_symlink(sri: Integrity, cache: &PathBuf, target: &PathBuf) -> Result<I
cpath.parent().unwrap().display()
)
})?;
if let Err(e) = symlink_file(target, cpath.clone()) {
if let Err(e) = symlink_file(target, &cpath) {
// If symlinking fails because there's *already* a file at the desired
// destination, that is ok -- all the cache should care about is that
// there is **some** valid file associated with the computed integrity.
@ -187,8 +187,8 @@ mod tests {
fn create_tmpfile(tmp: &tempfile::TempDir, buf: &[u8]) -> PathBuf {
let dir = tmp.path().to_owned();
let target = dir.join("target-file");
std::fs::create_dir_all(target.parent().unwrap().clone()).unwrap();
let mut file = File::create(target.clone()).unwrap();
std::fs::create_dir_all(&target.parent().unwrap()).unwrap();
let mut file = File::create(&target).unwrap();
file.write_all(buf).unwrap();
file.flush().unwrap();
target
@ -216,7 +216,7 @@ mod tests {
let cpath = path::content_path(&dir, &sri);
assert!(cpath.exists());
let metadata = std::fs::symlink_metadata(cpath.clone()).unwrap();
let metadata = std::fs::symlink_metadata(&cpath).unwrap();
let file_type = metadata.file_type();
assert!(file_type.is_symlink());
assert_eq!(std::fs::read(cpath).unwrap(), b"hello world");
@ -249,7 +249,7 @@ mod tests {
let cpath = path::content_path(&dir, &sri);
assert!(cpath.exists());
let metadata = std::fs::symlink_metadata(cpath.clone()).unwrap();
let metadata = std::fs::symlink_metadata(&cpath).unwrap();
let file_type = metadata.file_type();
assert!(file_type.is_symlink());
assert_eq!(std::fs::read(cpath).unwrap(), b"hello world");

View File

@ -11,7 +11,6 @@ use futures::io::AsyncReadExt;
#[cfg(feature = "tokio")]
use tokio::io::AsyncReadExt;
use reflink_copy as reflink;
use ssri::{Algorithm, Integrity, IntegrityChecker};
#[cfg(any(feature = "async-std", feature = "tokio"))]
@ -133,11 +132,11 @@ pub async fn read_async<'a>(cache: &'a Path, sri: &'a Integrity) -> Result<Vec<u
Ok(ret)
}
pub fn copy_unchecked(cache: &Path, sri: &Integrity, to: &Path) -> Result<()> {
pub fn reflink_unchecked(cache: &Path, sri: &Integrity, to: &Path) -> Result<()> {
let cpath = path::content_path(cache, sri);
reflink::reflink_or_copy(cpath, to).with_context(|| {
reflink_copy::reflink(cpath, to).with_context(|| {
format!(
"Failed to copy cache contents from {} to {}",
"Failed to reflink cache contents from {} to {}",
path::content_path(cache, sri).display(),
to.display()
)
@ -145,8 +144,56 @@ pub fn copy_unchecked(cache: &Path, sri: &Integrity, to: &Path) -> Result<()> {
Ok(())
}
pub fn reflink(cache: &Path, sri: &Integrity, to: &Path) -> Result<()> {
let mut reader = open(cache, sri.clone())?;
let mut buf: [u8; 1024] = [0; 1024];
loop {
let read = reader.read(&mut buf).with_context(|| {
format!(
"Failed to read cache contents while verifying integrity for {}",
path::content_path(cache, sri).display()
)
})?;
if read == 0 {
break;
}
}
reader.check()?;
reflink_unchecked(cache, sri, to)
}
pub async fn reflink_async(cache: &Path, sri: &Integrity, to: &Path) -> Result<()> {
let mut reader = open_async(cache, sri.clone()).await?;
let mut buf = [0u8; 1024 * 8];
loop {
let read = AsyncReadExt::read(&mut reader, &mut buf)
.await
.with_context(|| {
format!(
"Failed to read cache contents while verifying integrity for {}",
path::content_path(cache, sri).display()
)
})?;
if read == 0 {
break;
}
}
reader.check()?;
reflink_unchecked(cache, sri, to)
}
pub fn copy_unchecked(cache: &Path, sri: &Integrity, to: &Path) -> Result<u64> {
let cpath = path::content_path(cache, sri);
std::fs::copy(cpath, to).with_context(|| {
format!(
"Failed to copy cache contents from {} to {}",
path::content_path(cache, sri).display(),
to.display()
)
})
}
pub fn copy(cache: &Path, sri: &Integrity, to: &Path) -> Result<u64> {
copy_unchecked(cache, sri, to)?;
let mut reader = open(cache, sri.clone())?;
let mut buf: [u8; 1024] = [0; 1024];
let mut size = 0;
@ -163,6 +210,7 @@ pub fn copy(cache: &Path, sri: &Integrity, to: &Path) -> Result<u64> {
}
}
reader.check()?;
copy_unchecked(cache, sri, to)?;
Ok(size as u64)
}
@ -172,23 +220,19 @@ pub async fn copy_unchecked_async<'a>(
cache: &'a Path,
sri: &'a Integrity,
to: &'a Path,
) -> Result<()> {
) -> Result<u64> {
let cpath = path::content_path(cache, sri);
if reflink::reflink(&cpath, to).is_err() {
crate::async_lib::copy(&cpath, to).await.with_context(|| {
format!(
"Failed to copy cache contents from {} to {}",
path::content_path(cache, sri).display(),
to.display()
)
})?;
}
Ok(())
crate::async_lib::copy(&cpath, to).await.with_context(|| {
format!(
"Failed to copy cache contents from {} to {}",
path::content_path(cache, sri).display(),
to.display()
)
})
}
#[cfg(any(feature = "async-std", feature = "tokio"))]
pub async fn copy_async<'a>(cache: &'a Path, sri: &'a Integrity, to: &'a Path) -> Result<u64> {
copy_unchecked_async(cache, sri, to).await?;
let mut reader = open_async(cache, sri.clone()).await?;
let mut buf: [u8; 1024] = [0; 1024];
let mut size = 0;
@ -207,6 +251,7 @@ pub async fn copy_async<'a>(cache: &'a Path, sri: &'a Integrity, to: &'a Path) -
}
}
reader.check()?;
copy_unchecked_async(cache, sri, to).await?;
Ok(size as u64)
}
@ -243,7 +288,6 @@ pub fn hard_link(cache: &Path, sri: &Integrity, to: &Path) -> Result<()> {
#[cfg(any(feature = "async-std", feature = "tokio"))]
pub async fn hard_link_async(cache: &Path, sri: &Integrity, to: &Path) -> Result<()> {
hard_link_unchecked(cache, sri, to)?;
let mut reader = open_async(cache, sri.clone()).await?;
let mut buf = [0u8; 1024 * 8];
loop {
@ -260,6 +304,7 @@ pub async fn hard_link_async(cache: &Path, sri: &Integrity, to: &Path) -> Result
}
}
reader.check()?;
hard_link_unchecked(cache, sri, to)?;
Ok(())
}

View File

@ -193,9 +193,6 @@ where
/// Copies cache data to a specified location. Returns the number of bytes
/// copied.
///
/// On platforms that support it, this will create a copy-on-write "reflink"
/// with a full-copy fallback.
///
/// ## Example
/// ```no_run
/// use async_std::prelude::*;
@ -227,9 +224,6 @@ where
/// Copies cache data to a specified location. Cache data will not be checked
/// during copy.
///
/// On platforms that support it, this will create a copy-on-write "reflink"
/// with a full-copy fallback.
///
/// ## Example
/// ```no_run
/// use async_std::prelude::*;
@ -242,13 +236,13 @@ where
/// }
/// ```
#[cfg(any(feature = "async-std", feature = "tokio"))]
pub async fn copy_unchecked<P, K, Q>(cache: P, key: K, to: Q) -> Result<()>
pub async fn copy_unchecked<P, K, Q>(cache: P, key: K, to: Q) -> Result<u64>
where
P: AsRef<Path>,
K: AsRef<str>,
Q: AsRef<Path>,
{
async fn inner(cache: &Path, key: &str, to: &Path) -> Result<()> {
async fn inner(cache: &Path, key: &str, to: &Path) -> Result<u64> {
if let Some(entry) = index::find_async(cache, key).await? {
copy_hash_unchecked(cache, &entry.integrity, to).await
} else {
@ -261,9 +255,6 @@ where
/// Copies a cache data by hash to a specified location. Returns the number of
/// bytes copied.
///
/// On platforms that support it, this will create a copy-on-write "reflink"
/// with a full-copy fallback.
///
/// ## Example
/// ```no_run
/// use async_std::prelude::*;
@ -288,9 +279,6 @@ where
/// Copies a cache data by hash to a specified location. Copied data will not
/// be checked against the given hash.
///
/// On platforms that support it, this will create a copy-on-write "reflink"
/// with a full-copy fallback.
///
/// ## Example
/// ```no_run
/// use async_std::prelude::*;
@ -304,7 +292,7 @@ where
/// }
/// ```
#[cfg(any(feature = "async-std", feature = "tokio"))]
pub async fn copy_hash_unchecked<P, Q>(cache: P, sri: &Integrity, to: Q) -> Result<()>
pub async fn copy_hash_unchecked<P, Q>(cache: P, sri: &Integrity, to: Q) -> Result<u64>
where
P: AsRef<Path>,
Q: AsRef<Path>,
@ -312,6 +300,105 @@ where
read::copy_unchecked_async(cache.as_ref(), sri, to.as_ref()).await
}
/// Creates a reflink/clonefile from a cache entry to a destination path.
///
/// Fails if the destination is on a different filesystem or if the filesystem
/// does not support reflinks.
///
/// Currently, reflinks are known to work on APFS (macOS), XFS, btrfs, and
/// ReFS (Windows DevDrive)
///
/// ## Example
/// ```no_run
/// use async_std::prelude::*;
/// use async_attributes;
///
/// #[async_attributes::main]
/// async fn main() -> cacache::Result<()> {
/// cacache::reflink("./my-cache", "my-key", "./data.txt").await?;
/// Ok(())
/// }
/// ```
pub async fn reflink<P, K, Q>(cache: P, key: K, to: Q) -> Result<()>
where
P: AsRef<Path>,
K: AsRef<str>,
Q: AsRef<Path>,
{
async fn inner(cache: &Path, key: &str, to: &Path) -> Result<()> {
if let Some(entry) = index::find_async(cache, key).await? {
reflink_hash(cache, &entry.integrity, to).await
} else {
Err(Error::EntryNotFound(cache.to_path_buf(), key.into()))
}
}
inner(cache.as_ref(), key.as_ref(), to.as_ref()).await
}
/// Reflinks/clonefiles cache data to a specified location. Cache data will
/// not be checked during linking.
///
/// Fails if the destination is on a different filesystem or if the filesystem
/// does not support reflinks.
///
/// Currently, reflinks are known to work on APFS (macOS), XFS, btrfs, and
/// ReFS (Windows DevDrive)
///
/// ## Example
/// ```no_run
/// use async_std::prelude::*;
/// use async_attributes;
///
/// #[async_attributes::main]
/// async fn main() -> cacache::Result<()> {
/// cacache::reflink_unchecked("./my-cache", "my-key", "./data.txt").await?;
/// Ok(())
/// }
/// ```
pub async fn reflink_unchecked<P, K, Q>(cache: P, key: K, to: Q) -> Result<()>
where
P: AsRef<Path>,
K: AsRef<str>,
Q: AsRef<Path>,
{
async fn inner(cache: &Path, key: &str, to: &Path) -> Result<()> {
if let Some(entry) = index::find_async(cache, key).await? {
reflink_hash_unchecked_sync(cache, &entry.integrity, to)
} else {
Err(Error::EntryNotFound(cache.to_path_buf(), key.into()))
}
}
inner(cache.as_ref(), key.as_ref(), to.as_ref()).await
}
/// Reflinks/clonefiles cache data by hash to a specified location.
///
/// Fails if the destination is on a different filesystem or if the filesystem
/// does not support reflinks.
///
/// Currently, reflinks are known to work on APFS (macOS), XFS, btrfs, and
/// ReFS (Windows DevDrive)
///
/// ## Example
/// ```no_run
/// use async_std::prelude::*;
/// use async_attributes;
///
/// #[async_attributes::main]
/// async fn main() -> cacache::Result<()> {
/// let sri = cacache::write("./my-cache", "my-key", b"hello world").await?;
/// cacache::reflink_hash("./my-cache", &sri, "./data.txt").await?;
/// Ok(())
/// }
/// ```
pub async fn reflink_hash<P, Q>(cache: P, sri: &Integrity, to: Q) -> Result<()>
where
P: AsRef<Path>,
Q: AsRef<Path>,
{
read::reflink_async(cache.as_ref(), sri, to.as_ref()).await
}
/// Hard links a cache entry by key to a specified location.
#[cfg(any(feature = "async-std", feature = "tokio"))]
pub async fn hard_link<P, K, Q>(cache: P, key: K, to: Q) -> Result<()>
@ -541,13 +628,13 @@ where
/// Ok(())
/// }
/// ```
pub fn copy_unchecked_sync<P, K, Q>(cache: P, key: K, to: Q) -> Result<()>
pub fn copy_unchecked_sync<P, K, Q>(cache: P, key: K, to: Q) -> Result<u64>
where
P: AsRef<Path>,
K: AsRef<str>,
Q: AsRef<Path>,
{
fn inner(cache: &Path, key: &str, to: &Path) -> Result<()> {
fn inner(cache: &Path, key: &str, to: &Path) -> Result<u64> {
if let Some(entry) = index::find(cache, key)? {
copy_hash_unchecked_sync(cache, &entry.integrity, to)
} else {
@ -597,7 +684,7 @@ where
/// Ok(())
/// }
/// ```
pub fn copy_hash_unchecked_sync<P, Q>(cache: P, sri: &Integrity, to: Q) -> Result<()>
pub fn copy_hash_unchecked_sync<P, Q>(cache: P, sri: &Integrity, to: Q) -> Result<u64>
where
P: AsRef<Path>,
Q: AsRef<Path>,
@ -605,6 +692,134 @@ where
read::copy_unchecked(cache.as_ref(), sri, to.as_ref())
}
/// Creates a reflink/clonefile from a cache entry to a destination path.
///
/// Fails if the destination is on a different filesystem or if the filesystem
/// does not support reflinks.
///
/// Currently, reflinks are known to work on APFS (macOS), XFS, btrfs, and
/// ReFS (Windows DevDrive)
///
/// ## Example
/// ```no_run
/// use async_std::prelude::*;
/// use async_attributes;
///
/// #[async_attributes::main]
/// async fn main() -> cacache::Result<()> {
/// cacache::reflink_sync("./my-cache", "my-key", "./data.txt")?;
/// Ok(())
/// }
/// ```
pub fn reflink_sync<P, K, Q>(cache: P, key: K, to: Q) -> Result<()>
where
P: AsRef<Path>,
K: AsRef<str>,
Q: AsRef<Path>,
{
fn inner(cache: &Path, key: &str, to: &Path) -> Result<()> {
if let Some(entry) = index::find(cache, key)? {
reflink_hash_sync(cache, &entry.integrity, to)
} else {
Err(Error::EntryNotFound(cache.to_path_buf(), key.into()))
}
}
inner(cache.as_ref(), key.as_ref(), to.as_ref())
}
/// Reflinks/clonefiles cache data by hash to a specified location.
///
/// Fails if the destination is on a different filesystem or if the filesystem
/// does not support reflinks.
///
/// Currently, reflinks are known to work on APFS (macOS), XFS, btrfs, and
/// ReFS (Windows DevDrive)
///
/// ## Example
/// ```no_run
/// use async_std::prelude::*;
/// use async_attributes;
///
/// #[async_attributes::main]
/// async fn main() -> cacache::Result<()> {
/// let sri = cacache::write_sync("./my-cache", "my-key", b"hello world")?;
/// cacache::reflink_hash_sync("./my-cache", &sri, "./data.txt")?;
/// Ok(())
/// }
/// ```
pub fn reflink_hash_sync<P, Q>(cache: P, sri: &Integrity, to: Q) -> Result<()>
where
P: AsRef<Path>,
Q: AsRef<Path>,
{
read::reflink(cache.as_ref(), sri, to.as_ref())
}
/// Reflinks/clonefiles cache data by hash to a specified location. Cache data
/// will not be checked during linking.
///
/// Fails if the destination is on a different filesystem or if the filesystem
/// does not support reflinks.
///
/// Currently, reflinks are known to work on APFS (macOS), XFS, btrfs, and
/// ReFS (Windows DevDrive)
///
/// ## Example
/// ```no_run
/// use async_std::prelude::*;
/// use async_attributes;
///
/// #[async_attributes::main]
/// async fn main() -> cacache::Result<()> {
/// let sri = cacache::write_sync("./my-cache", "my-key", b"hello world")?;
/// cacache::reflink_hash_unchecked_sync("./my-cache", &sri, "./data.txt")?;
/// Ok(())
/// }
/// ```
pub fn reflink_hash_unchecked_sync<P, Q>(cache: P, sri: &Integrity, to: Q) -> Result<()>
where
P: AsRef<Path>,
Q: AsRef<Path>,
{
read::reflink_unchecked(cache.as_ref(), sri, to.as_ref())
}
/// Reflinks/clonefiles cache data to a specified location. Cache data will
/// not be checked during linking.
///
/// Fails if the destination is on a different filesystem or if the filesystem
/// does not support reflinks.
///
/// Currently, reflinks are known to work on APFS (macOS), XFS, btrfs, and
/// ReFS (Windows DevDrive)
///
/// ## Example
/// ```no_run
/// use async_std::prelude::*;
/// use async_attributes;
///
/// #[async_attributes::main]
/// async fn main() -> cacache::Result<()> {
/// cacache::reflink_unchecked_sync("./my-cache", "my-key", "./data.txt")?;
/// Ok(())
/// }
/// ```
pub fn reflink_unchecked_sync<P, K, Q>(cache: P, key: K, to: Q) -> Result<()>
where
P: AsRef<Path>,
K: AsRef<str>,
Q: AsRef<Path>,
{
fn inner(cache: &Path, key: &str, to: &Path) -> Result<()> {
if let Some(entry) = index::find(cache, key)? {
reflink_hash_unchecked_sync(cache, &entry.integrity, to)
} else {
Err(Error::EntryNotFound(cache.to_path_buf(), key.into()))
}
}
inner(cache.as_ref(), key.as_ref(), to.as_ref())
}
/// Hard links a cache entry by key to a specified location. The cache entry
/// contents will not be checked, and all the usual caveats of hard links
/// apply: The potentially-shared cache might be corrupted if the hard link is

View File

@ -502,8 +502,8 @@ mod tests {
fn create_tmpfile(tmp: &tempfile::TempDir, buf: &[u8]) -> PathBuf {
let dir = tmp.path().to_owned();
let target = dir.join("target-file");
std::fs::create_dir_all(target.parent().unwrap().clone()).unwrap();
let mut file = File::create(target.clone()).unwrap();
std::fs::create_dir_all(target.parent().unwrap()).unwrap();
let mut file = File::create(&target).unwrap();
file.write_all(buf).unwrap();
file.flush().unwrap();
target