From e6d1eadb7a79e538326f46e13763908010f322ab Mon Sep 17 00:00:00 2001 From: voidlizard Date: Tue, 14 Jan 2025 12:24:41 +0300 Subject: [PATCH] wip, git packs --- hbs2-git3/app/Main.hs | 134 ++++++++++++++++++++++------ hbs2-git3/lib/HBS2/Git3/Git.hs | 3 + hbs2-git3/lib/HBS2/Git3/Git/Pack.hs | 9 ++ 3 files changed, 121 insertions(+), 25 deletions(-) diff --git a/hbs2-git3/app/Main.hs b/hbs2-git3/app/Main.hs index 36c5d9cd..eeec45be 100644 --- a/hbs2-git3/app/Main.hs +++ b/hbs2-git3/app/Main.hs @@ -11,6 +11,7 @@ module Main where import HBS2.Git3.Prelude import HBS2.Git3.State.Index +import HBS2.Git3.Git.Pack import HBS2.Peer.CLI.Detect import HBS2.Peer.RPC.API.LWWRef @@ -95,6 +96,8 @@ import Control.Monad.ST import Data.BloomFilter qualified as Bloom import Data.BloomFilter.Mutable qualified as MBloom +import Crypto.Hash qualified as C + {- HLINT ignore "Functor law" -} {- HLINT ignore "Eta reduce" -} @@ -861,49 +864,130 @@ theDict = do _ -> throwIO (BadFormException @C nil) - entry $ bindMatch "test:git:zstd:packed:import" $ nil_ $ \syn -> lift $ flip runContT pure do + entry $ bindMatch "test:segment:dump" $ nil_ $ \syn -> lift do + sto <- getStorage + let (_, argz) = splitOpts [] syn + tree <- headMay [ x | HashLike x <- argz ] & orThrowUser "tree hash required" + + lbs <- runExceptT (getTreeContents sto tree) >>= orThrowPassIO + + runConsumeLBS (ZstdL.decompress lbs) $ readLogFileLBS () $ \h s obs -> do + let (t, body) = LBS.splitAt 1 obs + + let tp = fromStringMay @(Short GitObjectType) (LBS8.unpack t) + & maybe Blob coerce + + liftIO $ print $ pretty h <+> fill 8 (viaShow tp) <+> pretty s + + + entry $ bindMatch "test:segment:dump:pack" $ nil_ $ \syn -> lift do + sto <- getStorage + let (_, argz) = splitOpts [] syn + + let trees = [ x | HashLike x <- argz ] + + for_ trees $ \tree -> do + + notice $ "running" <+> pretty tree + + lbs <- runExceptT (getTreeContents sto tree) >>= orThrowPassIO + + file <- liftIO $ Temp.emptyTempFile "" (show (pretty tree) <> ".pack") + + liftIO $ UIO.withBinaryFileAtomic file ReadWriteMode $ \fh -> do + + let header = BS.concat [ "PACK", N.bytestring32 2, N.bytestring32 0 ] + + BS.hPutStr fh header + + no_ <- newTVarIO 0 + seen_ <- newTVarIO (mempty :: HashSet GitHash) + + runConsumeLBS (ZstdL.decompress lbs) $ readLogFileLBS () $ \h s obs -> do + seen <- readTVarIO seen_ <&> HS.member h + unless seen do + + let (t, body) = LBS.splitAt 1 obs + + let tp = fromStringMay @(Short GitObjectType) (LBS8.unpack t) + & maybe Blob coerce + + let params = Zlib.defaultCompressParams { Zlib.compressMethod = Zlib.deflateMethod } + + let packed = Zlib.compressWith params body + + let preamble = encodeObjectSize (gitPackTypeOf tp) (fromIntegral $ LBS.length body) + + liftIO do + atomically $ modifyTVar seen_ (HS.insert h) + BS.hPutStr fh preamble + LBS.hPutStr fh packed + + atomically $ modifyTVar no_ succ + + no <- readTVarIO no_ + hSeek fh AbsoluteSeek 8 + BS.hPutStr fh (N.bytestring32 no) + hFlush fh + + sz <- hFileSize fh + hSeek fh AbsoluteSeek 0 + + sha <- LBS.hGetNonBlocking fh (fromIntegral sz) <&> sha1lazy + + hSeek fh SeekFromEnd 0 + + BS.hPutStr fh sha + + entry $ bindMatch "test:segment:import:loose" $ nil_ $ \syn -> lift $ connectedDo do let (opts, argz) = splitOpts [] syn let logs = [ x| StringLike x <- argz ] d <- findGitDir >>= orThrowUser "not a git directory" - gitCatCheck <- contWorkerPool 8 do - che <- ContT withGitCatCheck - pure $ gitCheckObjectFromHandle che + sto <- getStorage - lift $ forConcurrently_ logs $ \lfn -> do + flip runContT pure do - debug $ pretty lfn + gitCatCheck <- contWorkerPool 8 do + che <- ContT withGitCatCheck + pure $ gitCheckObjectFromHandle che - lbs <- liftIO $ LBS.readFile lfn + let trees = [ x | HashLike x <- argz ] - runConsumeLBS (ZstdL.decompress lbs) $ readLogFileLBS () $ \h s lbs -> do - let (t, body) = LBS.splitAt 1 lbs + lift $ for_ trees $ \tree -> do - let tp = fromStringMay @(Short GitObjectType) (LBS8.unpack t) - & maybe Blob coerce + notice $ pretty "running" <+> pretty tree - here <- isJust <$> lift (gitCatCheck h) + lbs <- runExceptT (getTreeContents sto tree) >>= orThrowPassIO - let gitHash = show $ pretty h - let (prefix,name) = L.splitAt 2 gitHash - let path = joinPath [d, "objects", prefix, name] + runConsumeLBS (ZstdL.decompress lbs) $ readLogFileLBS () $ \h s lbs -> do + let (t, body) = LBS.splitAt 1 lbs - let signature = [qc|{pretty tp} {pretty $ LBS.length body}|] <> "\x00" :: LBS8.ByteString - let o = signature <> body + let tp = fromStringMay @(Short GitObjectType) (LBS8.unpack t) + & maybe Blob coerce - unless here $ liftIO do + here <- lift $ isJust <$> gitCatCheck h - debug $ "FUCKING IMPORT OBJECT" <+> pretty here <+> pretty h <+> pretty tp + let gitHash = show $ pretty h + let (prefix,name) = L.splitAt 2 gitHash + let path = joinPath [d, "objects", prefix, name] - touch path + let signature = [qc|{pretty tp} {pretty $ LBS.length body}|] <> "\x00" :: LBS8.ByteString + let o = signature <> body - debug $ pretty tp <+> pretty s <+> pretty h <+> pretty path + unless here $ liftIO do - let params = Zlib.defaultCompressParams { Zlib.compressMethod = Zlib.deflateMethod } - UIO.withBinaryFileAtomic path WriteMode $ \fh -> do - let contents = Zlib.compressWith params o - LBS.hPutStr fh contents + notice $ "FUCKING IMPORT OBJECT" <+> pretty here <+> pretty h <+> pretty tp + + touch path + + debug $ pretty tp <+> pretty s <+> pretty h <+> pretty path + + let params = Zlib.defaultCompressParams { Zlib.compressMethod = Zlib.deflateMethod } + UIO.withBinaryFileAtomic path WriteMode $ \fh -> do + let contents = Zlib.compressWith params o + LBS.hPutStr fh contents entry $ bindMatch "reflog:index:count:missed" $ nil_ $ const $ lift $ flip runContT pure do diff --git a/hbs2-git3/lib/HBS2/Git3/Git.hs b/hbs2-git3/lib/HBS2/Git3/Git.hs index abe0b020..8c2f690e 100644 --- a/hbs2-git3/lib/HBS2/Git3/Git.hs +++ b/hbs2-git3/lib/HBS2/Git3/Git.hs @@ -256,6 +256,9 @@ instance GitObjectReader (Process Handle Handle ()) where sortGitTreeEntries :: [GitTreeEntry] -> [GitTreeEntry] sortGitTreeEntries = sortOn (\e -> (gitEntryType e, gitEntrySize e)) +sha1lazy :: ByteString -> BS.ByteString +sha1lazy lbs = BS.pack $ BA.unpack $ hashlazy @Crypton.SHA1 lbs + gitHashBlobPure :: ByteString -> GitHash gitHashBlobPure body = do let preamble = [qc|{pretty Blob} {pretty $ LBS.length body}|] <> "\x00" :: LBS8.ByteString diff --git a/hbs2-git3/lib/HBS2/Git3/Git/Pack.hs b/hbs2-git3/lib/HBS2/Git3/Git/Pack.hs index bbfc850f..f6efe678 100644 --- a/hbs2-git3/lib/HBS2/Git3/Git/Pack.hs +++ b/hbs2-git3/lib/HBS2/Git3/Git/Pack.hs @@ -42,6 +42,15 @@ instance Enum PackFileObjectType where toEnum 7 = OBJ_REF_DELTA toEnum n = error $ "Invalid PackFileObjectType: " ++ show n +class HasGitPackType a where + gitPackTypeOf :: a -> PackFileObjectType + + +instance HasGitPackType GitObjectType where + gitPackTypeOf = \case + Commit -> OBJ_COMMIT + Tree -> OBJ_TREE + Blob -> OBJ_BLOB encodeObjectSize :: PackFileObjectType -> Natural -> ByteString encodeObjectSize objType size =