From fca0786356cb3c39877402f6a7e6d97ea6cd036a Mon Sep 17 00:00:00 2001 From: voidlizard Date: Sun, 12 Jan 2025 10:39:41 +0300 Subject: [PATCH] wip, works but not dedupes --- hbs2-git3/app/Main.hs | 4 ++-- hbs2-git3/lib/HBS2/Data/Log/Structured.hs | 1 + hbs2-git3/lib/HBS2/Git3/State/Index.hs | 26 +++++++++++------------ 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/hbs2-git3/app/Main.hs b/hbs2-git3/app/Main.hs index dfcb6412..1f7d6d70 100644 --- a/hbs2-git3/app/Main.hs +++ b/hbs2-git3/app/Main.hs @@ -900,11 +900,11 @@ theDict = do LBS.hPutStr fh contents - entry $ bindMatch "test:git:reflog:index:list:fast" $ nil_ $ const $ lift do + entry $ bindMatch "reflog:index:list:fast" $ nil_ $ const $ lift do files <- listObjectIndexFiles forConcurrently_ files $ \(f,_) -> do bs <- liftIO $ mmapFileByteString f Nothing - scanBS bs $ \segment -> do + for_ (toSectionList bs) $ \segment -> do let (sha1,blake) = BS.splitAt 20 segment & over _1 (coerce @_ @GitHash) & over _2 (coerce @_ @HashRef) diff --git a/hbs2-git3/lib/HBS2/Data/Log/Structured.hs b/hbs2-git3/lib/HBS2/Data/Log/Structured.hs index 51bd4e99..3640bda9 100644 --- a/hbs2-git3/lib/HBS2/Data/Log/Structured.hs +++ b/hbs2-git3/lib/HBS2/Data/Log/Structured.hs @@ -122,6 +122,7 @@ validateSorted bs = do (Just _, x:xs, e) -> next (Just x, xs, e) r == 0 + scanBS :: Monad m => BS.ByteString -> ( BS.ByteString -> m () ) -> m () scanBS bs action = do let hsz = 4 diff --git a/hbs2-git3/lib/HBS2/Git3/State/Index.hs b/hbs2-git3/lib/HBS2/Git3/State/Index.hs index 35f145eb..4e8f757e 100644 --- a/hbs2-git3/lib/HBS2/Git3/State/Index.hs +++ b/hbs2-git3/lib/HBS2/Git3/State/Index.hs @@ -107,25 +107,25 @@ mergeSortedFilesN getKey inputFiles outFile = do liftIO $ UIO.withBinaryFileAtomic outFile WriteMode $ \hOut -> do - let seed = HPSQ.empty @BS.ByteString @BS.ByteString @BS.ByteString + let seed = HPSQ.fromList $ mapMaybe mkState mmaped - flip fix (mmaped, seed) $ \next (files, win) -> do - - let (vals,rests) = unzip (mapMaybe L.uncons files) - - let newWin = flip fix (win, vals) $ \l -> \case - (w, []) -> w - (w, e:es) -> let k = getKey e in l (HPSQ.insert k k e w, es) - - maybe1 (HPSQ.minView newWin) none $ \(k,_,e, nextWin) -> do - liftIO $ writeSection (LBS.fromStrict e) (LBS.hPutStr hOut) - next (L.filter (not . L.null) $ fmap (dropDupes k) rests, nextWin) + flip fix seed $ \next heap -> do + let h0 = HPSQ.minView heap + maybe1 h0 none $ \case + (_,_,[],rest) -> next rest + (_,_,e:xs,rest) -> do + liftIO $ writeSection (LBS.fromStrict e) (LBS.hPutStr hOut) + let new = maybe rest (\(a,b,c) -> HPSQ.insert a b c rest) (mkState xs) + next new + -- mapMaybe mkState (xs : fmap (view _3) (HPSQ.toList rest)) + -- next (HPSQ.fromList new) mapM_ rm inputFiles where dropDupes k = L.dropWhile ( (== k) . getKey ) - + mkState [] = Nothing + mkState (x:xs) = Just (getKey x, getKey x, x:xs) compactIndex :: forall m . (Git3Perks m, MonadReader Git3Env m) => Natural -> m () compactIndex maxSize = do