wip, git packs

This commit is contained in:
voidlizard 2025-01-14 12:24:41 +03:00
parent 33cec9f40f
commit e6d1eadb7a
3 changed files with 121 additions and 25 deletions

View File

@ -11,6 +11,7 @@ module Main where
import HBS2.Git3.Prelude import HBS2.Git3.Prelude
import HBS2.Git3.State.Index import HBS2.Git3.State.Index
import HBS2.Git3.Git.Pack
import HBS2.Peer.CLI.Detect import HBS2.Peer.CLI.Detect
import HBS2.Peer.RPC.API.LWWRef import HBS2.Peer.RPC.API.LWWRef
@ -95,6 +96,8 @@ import Control.Monad.ST
import Data.BloomFilter qualified as Bloom import Data.BloomFilter qualified as Bloom
import Data.BloomFilter.Mutable qualified as MBloom import Data.BloomFilter.Mutable qualified as MBloom
import Crypto.Hash qualified as C
{- HLINT ignore "Functor law" -} {- HLINT ignore "Functor law" -}
{- HLINT ignore "Eta reduce" -} {- HLINT ignore "Eta reduce" -}
@ -861,21 +864,102 @@ theDict = do
_ -> throwIO (BadFormException @C nil) _ -> throwIO (BadFormException @C nil)
entry $ bindMatch "test:git:zstd:packed:import" $ nil_ $ \syn -> lift $ flip runContT pure do entry $ bindMatch "test:segment:dump" $ nil_ $ \syn -> lift do
sto <- getStorage
let (_, argz) = splitOpts [] syn
tree <- headMay [ x | HashLike x <- argz ] & orThrowUser "tree hash required"
lbs <- runExceptT (getTreeContents sto tree) >>= orThrowPassIO
runConsumeLBS (ZstdL.decompress lbs) $ readLogFileLBS () $ \h s obs -> do
let (t, body) = LBS.splitAt 1 obs
let tp = fromStringMay @(Short GitObjectType) (LBS8.unpack t)
& maybe Blob coerce
liftIO $ print $ pretty h <+> fill 8 (viaShow tp) <+> pretty s
entry $ bindMatch "test:segment:dump:pack" $ nil_ $ \syn -> lift do
sto <- getStorage
let (_, argz) = splitOpts [] syn
let trees = [ x | HashLike x <- argz ]
for_ trees $ \tree -> do
notice $ "running" <+> pretty tree
lbs <- runExceptT (getTreeContents sto tree) >>= orThrowPassIO
file <- liftIO $ Temp.emptyTempFile "" (show (pretty tree) <> ".pack")
liftIO $ UIO.withBinaryFileAtomic file ReadWriteMode $ \fh -> do
let header = BS.concat [ "PACK", N.bytestring32 2, N.bytestring32 0 ]
BS.hPutStr fh header
no_ <- newTVarIO 0
seen_ <- newTVarIO (mempty :: HashSet GitHash)
runConsumeLBS (ZstdL.decompress lbs) $ readLogFileLBS () $ \h s obs -> do
seen <- readTVarIO seen_ <&> HS.member h
unless seen do
let (t, body) = LBS.splitAt 1 obs
let tp = fromStringMay @(Short GitObjectType) (LBS8.unpack t)
& maybe Blob coerce
let params = Zlib.defaultCompressParams { Zlib.compressMethod = Zlib.deflateMethod }
let packed = Zlib.compressWith params body
let preamble = encodeObjectSize (gitPackTypeOf tp) (fromIntegral $ LBS.length body)
liftIO do
atomically $ modifyTVar seen_ (HS.insert h)
BS.hPutStr fh preamble
LBS.hPutStr fh packed
atomically $ modifyTVar no_ succ
no <- readTVarIO no_
hSeek fh AbsoluteSeek 8
BS.hPutStr fh (N.bytestring32 no)
hFlush fh
sz <- hFileSize fh
hSeek fh AbsoluteSeek 0
sha <- LBS.hGetNonBlocking fh (fromIntegral sz) <&> sha1lazy
hSeek fh SeekFromEnd 0
BS.hPutStr fh sha
entry $ bindMatch "test:segment:import:loose" $ nil_ $ \syn -> lift $ connectedDo do
let (opts, argz) = splitOpts [] syn let (opts, argz) = splitOpts [] syn
let logs = [ x| StringLike x <- argz ] let logs = [ x| StringLike x <- argz ]
d <- findGitDir >>= orThrowUser "not a git directory" d <- findGitDir >>= orThrowUser "not a git directory"
sto <- getStorage
flip runContT pure do
gitCatCheck <- contWorkerPool 8 do gitCatCheck <- contWorkerPool 8 do
che <- ContT withGitCatCheck che <- ContT withGitCatCheck
pure $ gitCheckObjectFromHandle che pure $ gitCheckObjectFromHandle che
lift $ forConcurrently_ logs $ \lfn -> do let trees = [ x | HashLike x <- argz ]
debug $ pretty lfn lift $ for_ trees $ \tree -> do
lbs <- liftIO $ LBS.readFile lfn notice $ pretty "running" <+> pretty tree
lbs <- runExceptT (getTreeContents sto tree) >>= orThrowPassIO
runConsumeLBS (ZstdL.decompress lbs) $ readLogFileLBS () $ \h s lbs -> do runConsumeLBS (ZstdL.decompress lbs) $ readLogFileLBS () $ \h s lbs -> do
let (t, body) = LBS.splitAt 1 lbs let (t, body) = LBS.splitAt 1 lbs
@ -883,7 +967,7 @@ theDict = do
let tp = fromStringMay @(Short GitObjectType) (LBS8.unpack t) let tp = fromStringMay @(Short GitObjectType) (LBS8.unpack t)
& maybe Blob coerce & maybe Blob coerce
here <- isJust <$> lift (gitCatCheck h) here <- lift $ isJust <$> gitCatCheck h
let gitHash = show $ pretty h let gitHash = show $ pretty h
let (prefix,name) = L.splitAt 2 gitHash let (prefix,name) = L.splitAt 2 gitHash
@ -894,7 +978,7 @@ theDict = do
unless here $ liftIO do unless here $ liftIO do
debug $ "FUCKING IMPORT OBJECT" <+> pretty here <+> pretty h <+> pretty tp notice $ "FUCKING IMPORT OBJECT" <+> pretty here <+> pretty h <+> pretty tp
touch path touch path

View File

@ -256,6 +256,9 @@ instance GitObjectReader (Process Handle Handle ()) where
sortGitTreeEntries :: [GitTreeEntry] -> [GitTreeEntry] sortGitTreeEntries :: [GitTreeEntry] -> [GitTreeEntry]
sortGitTreeEntries = sortOn (\e -> (gitEntryType e, gitEntrySize e)) sortGitTreeEntries = sortOn (\e -> (gitEntryType e, gitEntrySize e))
sha1lazy :: ByteString -> BS.ByteString
sha1lazy lbs = BS.pack $ BA.unpack $ hashlazy @Crypton.SHA1 lbs
gitHashBlobPure :: ByteString -> GitHash gitHashBlobPure :: ByteString -> GitHash
gitHashBlobPure body = do gitHashBlobPure body = do
let preamble = [qc|{pretty Blob} {pretty $ LBS.length body}|] <> "\x00" :: LBS8.ByteString let preamble = [qc|{pretty Blob} {pretty $ LBS.length body}|] <> "\x00" :: LBS8.ByteString

View File

@ -42,6 +42,15 @@ instance Enum PackFileObjectType where
toEnum 7 = OBJ_REF_DELTA toEnum 7 = OBJ_REF_DELTA
toEnum n = error $ "Invalid PackFileObjectType: " ++ show n toEnum n = error $ "Invalid PackFileObjectType: " ++ show n
class HasGitPackType a where
gitPackTypeOf :: a -> PackFileObjectType
instance HasGitPackType GitObjectType where
gitPackTypeOf = \case
Commit -> OBJ_COMMIT
Tree -> OBJ_TREE
Blob -> OBJ_BLOB
encodeObjectSize :: PackFileObjectType -> Natural -> ByteString encodeObjectSize :: PackFileObjectType -> Natural -> ByteString
encodeObjectSize objType size = encodeObjectSize objType size =