From 7e8c1e50cc1601d24660ea4c46979f683a26e9f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Sat, 25 Feb 2023 23:49:41 -0800 Subject: [PATCH] feat(copy): add support for reflink-copy and unsafe-copy --- Cargo.toml | 1 + src/content/read.rs | 89 ++++++++++++++++++++----------- src/get.rs | 126 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 185 insertions(+), 31 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2489400..e29af6c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ futures = "0.3.17" hex = "0.4.3" memmap2 = "0.5.8" miette = "5.5.0" +reflink = "0.1.3" serde = "1.0.130" serde_derive = "1.0.130" serde_json = "1.0.68" diff --git a/src/content/read.rs b/src/content/read.rs index 68a032b..c1a1c9a 100644 --- a/src/content/read.rs +++ b/src/content/read.rs @@ -124,52 +124,79 @@ pub async fn read_async<'a>(cache: &'a Path, sri: &'a Integrity) -> Result Result { +pub fn copy_unchecked(cache: &Path, sri: &Integrity, to: &Path) -> Result<()> { let cpath = path::content_path(cache, sri); - let ret = fs::copy(&cpath, to).with_context(|| { + reflink::reflink_or_copy(cpath, to).with_context(|| { format!( "Failed to copy cache contents from {} to {}", path::content_path(cache, sri).display(), to.display() ) })?; - let mut reader = open(cache, sri.clone())?; - let mut buf: [u8; 1024] = [0; 1024]; - while reader.read(&mut buf).with_context(|| { - format!( - "Failed to read cache contents while verifying integrity for {}", - path::content_path(cache, sri).display() - ) - })? > 0 - {} - reader.check()?; - - Ok(ret) + Ok(()) } -pub async fn copy_async<'a>(cache: &'a Path, sri: &'a Integrity, to: &'a Path) -> Result { - let cpath = path::content_path(cache, sri); - let ret = crate::async_lib::copy(&cpath, to).await.with_context(|| { - format!( - "Failed to copy cache contents from {} to {}", - path::content_path(cache, sri).display(), - to.display() - ) - })?; - let mut reader = open_async(cache, sri.clone()).await?; +pub fn copy(cache: &Path, sri: &Integrity, to: &Path) -> Result { + copy_unchecked(cache, sri, to)?; + let mut reader = open(cache, sri.clone())?; let mut buf: [u8; 1024] = [0; 1024]; - while AsyncReadExt::read(&mut reader, &mut buf) - .await - .with_context(|| { + let mut size = 0; + loop { + let read = reader.read(&mut buf).with_context(|| { format!( "Failed to read cache contents while verifying integrity for {}", path::content_path(cache, sri).display() ) - })? - > 0 - {} + })?; + size += read; + if read == 0 { + break; + } + } reader.check()?; - Ok(ret) + + Ok(size as u64) +} + +pub async fn copy_unchecked_async<'a>( + cache: &'a Path, + sri: &'a Integrity, + to: &'a Path, +) -> Result<()> { + let cpath = path::content_path(cache, sri); + if reflink::reflink(&cpath, to).is_err() { + crate::async_lib::copy(&cpath, to).await.with_context(|| { + format!( + "Failed to copy cache contents from {} to {}", + path::content_path(cache, sri).display(), + to.display() + ) + })?; + } + Ok(()) +} + +pub async fn copy_async<'a>(cache: &'a Path, sri: &'a Integrity, to: &'a Path) -> Result { + copy_unchecked_async(cache, sri, to).await?; + let mut reader = open_async(cache, sri.clone()).await?; + let mut buf: [u8; 1024] = [0; 1024]; + let mut size = 0; + loop { + let read = AsyncReadExt::read(&mut reader, &mut buf) + .await + .with_context(|| { + format!( + "Failed to read cache contents while verifying integrity for {}", + path::content_path(cache, sri).display() + ) + })?; + size += read; + if read == 0 { + break; + } + } + reader.check()?; + Ok(size as u64) } pub fn has_content(cache: &Path, sri: &Integrity) -> Option { diff --git a/src/get.rs b/src/get.rs index a058807..d9b70f6 100644 --- a/src/get.rs +++ b/src/get.rs @@ -185,6 +185,9 @@ where /// Copies cache data to a specified location. Returns the number of bytes /// copied. /// +/// On platforms that support it, this will create a copy-on-write "reflink" +/// with a full-copy fallback. +/// /// ## Example /// ```no_run /// use async_std::prelude::*; @@ -212,9 +215,45 @@ where inner(cache.as_ref(), key.as_ref(), to.as_ref()).await } +/// Copies cache data to a specified location. Cache data will not be checked +/// during copy. +/// +/// On platforms that support it, this will create a copy-on-write "reflink" +/// with a full-copy fallback. +/// +/// ## Example +/// ```no_run +/// use async_std::prelude::*; +/// use async_attributes; +/// +/// #[async_attributes::main] +/// async fn main() -> cacache::Result<()> { +/// cacache::copy_unchecked("./my-cache", "my-key", "./data.txt").await?; +/// Ok(()) +/// } +/// ``` +pub async fn copy_unchecked(cache: P, key: K, to: Q) -> Result<()> +where + P: AsRef, + K: AsRef, + Q: AsRef, +{ + async fn inner(cache: &Path, key: &str, to: &Path) -> Result<()> { + if let Some(entry) = index::find_async(cache, key).await? { + copy_hash_unchecked(cache, &entry.integrity, to).await + } else { + Err(Error::EntryNotFound(cache.to_path_buf(), key.into())) + } + } + inner(cache.as_ref(), key.as_ref(), to.as_ref()).await +} + /// Copies a cache data by hash to a specified location. Returns the number of /// bytes copied. /// +/// On platforms that support it, this will create a copy-on-write "reflink" +/// with a full-copy fallback. +/// /// ## Example /// ```no_run /// use async_std::prelude::*; @@ -235,6 +274,32 @@ where read::copy_async(cache.as_ref(), sri, to.as_ref()).await } +/// Copies a cache data by hash to a specified location. Copied data will not +/// be checked against the given hash. +/// +/// On platforms that support it, this will create a copy-on-write "reflink" +/// with a full-copy fallback. +/// +/// ## Example +/// ```no_run +/// use async_std::prelude::*; +/// use async_attributes; +/// +/// #[async_attributes::main] +/// async fn main() -> cacache::Result<()> { +/// let sri = cacache::write("./my-cache", "my-key", b"hello world").await?; +/// cacache::copy_hash_unchecked("./my-cache", &sri, "./data.txt").await?; +/// Ok(()) +/// } +/// ``` +pub async fn copy_hash_unchecked(cache: P, sri: &Integrity, to: Q) -> Result<()> +where + P: AsRef, + Q: AsRef, +{ + read::copy_unchecked_async(cache.as_ref(), sri, to.as_ref()).await +} + /// Gets the metadata entry for a certain key. /// /// Note that the existence of a metadata entry is not a guarantee that the @@ -401,6 +466,9 @@ where /// Copies a cache entry by key to a specified location. Returns the number of /// bytes copied. /// +/// On platforms that support it, this will create a copy-on-write "reflink" +/// with a full-copy fallback. +/// /// ## Example /// ```no_run /// use std::io::Read; @@ -426,9 +494,43 @@ where inner(cache.as_ref(), key.as_ref(), to.as_ref()) } +/// Copies a cache entry by key to a specified location. Does not verify cache +/// contents while copying. +/// +/// On platforms that support it, this will create a copy-on-write "reflink" +/// with a full-copy fallback. +/// +/// ## Example +/// ```no_run +/// use std::io::Read; +/// +/// fn main() -> cacache::Result<()> { +/// cacache::copy_unchecked_sync("./my-cache", "my-key", "./my-hello.txt")?; +/// Ok(()) +/// } +/// ``` +pub fn copy_unchecked_sync(cache: P, key: K, to: Q) -> Result<()> +where + P: AsRef, + K: AsRef, + Q: AsRef, +{ + fn inner(cache: &Path, key: &str, to: &Path) -> Result<()> { + if let Some(entry) = index::find(cache, key)? { + copy_hash_unchecked_sync(cache, &entry.integrity, to) + } else { + Err(Error::EntryNotFound(cache.to_path_buf(), key.into())) + } + } + inner(cache.as_ref(), key.as_ref(), to.as_ref()) +} + /// Copies a cache entry by integrity address to a specified location. Returns /// the number of bytes copied. /// +/// On platforms that support it, this will create a copy-on-write "reflink" +/// with a full-copy fallback. +/// /// ## Example /// ```no_run /// use std::io::Read; @@ -447,6 +549,30 @@ where read::copy(cache.as_ref(), sri, to.as_ref()) } +/// Copies a cache entry by integrity address to a specified location. Does +/// not verify cache contents while copying. +/// +/// On platforms that support it, this will create a copy-on-write "reflink" +/// with a full-copy fallback. +/// +/// ## Example +/// ```no_run +/// use std::io::Read; +/// +/// fn main() -> cacache::Result<()> { +/// let sri = cacache::write_sync("./my-cache", "my-key", b"hello")?; +/// cacache::copy_hash_unchecked_sync("./my-cache", &sri, "./my-hello.txt")?; +/// Ok(()) +/// } +/// ``` +pub fn copy_hash_unchecked_sync(cache: P, sri: &Integrity, to: Q) -> Result<()> +where + P: AsRef, + Q: AsRef, +{ + read::copy_unchecked(cache.as_ref(), sri, to.as_ref()) +} + /// Gets metadata for a certain key. /// /// Note that the existence of a metadata entry is not a guarantee that the