wip, works but not dedupes

This commit is contained in:
voidlizard 2025-01-12 10:39:41 +03:00
parent 13133148dc
commit fca0786356
3 changed files with 16 additions and 15 deletions

View File

@ -900,11 +900,11 @@ theDict = do
LBS.hPutStr fh contents LBS.hPutStr fh contents
entry $ bindMatch "test:git:reflog:index:list:fast" $ nil_ $ const $ lift do entry $ bindMatch "reflog:index:list:fast" $ nil_ $ const $ lift do
files <- listObjectIndexFiles files <- listObjectIndexFiles
forConcurrently_ files $ \(f,_) -> do forConcurrently_ files $ \(f,_) -> do
bs <- liftIO $ mmapFileByteString f Nothing bs <- liftIO $ mmapFileByteString f Nothing
scanBS bs $ \segment -> do for_ (toSectionList bs) $ \segment -> do
let (sha1,blake) = BS.splitAt 20 segment let (sha1,blake) = BS.splitAt 20 segment
& over _1 (coerce @_ @GitHash) & over _1 (coerce @_ @GitHash)
& over _2 (coerce @_ @HashRef) & over _2 (coerce @_ @HashRef)

View File

@ -122,6 +122,7 @@ validateSorted bs = do
(Just _, x:xs, e) -> next (Just x, xs, e) (Just _, x:xs, e) -> next (Just x, xs, e)
r == 0 r == 0
scanBS :: Monad m => BS.ByteString -> ( BS.ByteString -> m () ) -> m () scanBS :: Monad m => BS.ByteString -> ( BS.ByteString -> m () ) -> m ()
scanBS bs action = do scanBS bs action = do
let hsz = 4 let hsz = 4

View File

@ -107,25 +107,25 @@ mergeSortedFilesN getKey inputFiles outFile = do
liftIO $ UIO.withBinaryFileAtomic outFile WriteMode $ \hOut -> do liftIO $ UIO.withBinaryFileAtomic outFile WriteMode $ \hOut -> do
let seed = HPSQ.empty @BS.ByteString @BS.ByteString @BS.ByteString let seed = HPSQ.fromList $ mapMaybe mkState mmaped
flip fix (mmaped, seed) $ \next (files, win) -> do flip fix seed $ \next heap -> do
let h0 = HPSQ.minView heap
let (vals,rests) = unzip (mapMaybe L.uncons files) maybe1 h0 none $ \case
(_,_,[],rest) -> next rest
let newWin = flip fix (win, vals) $ \l -> \case (_,_,e:xs,rest) -> do
(w, []) -> w liftIO $ writeSection (LBS.fromStrict e) (LBS.hPutStr hOut)
(w, e:es) -> let k = getKey e in l (HPSQ.insert k k e w, es) let new = maybe rest (\(a,b,c) -> HPSQ.insert a b c rest) (mkState xs)
next new
maybe1 (HPSQ.minView newWin) none $ \(k,_,e, nextWin) -> do -- mapMaybe mkState (xs : fmap (view _3) (HPSQ.toList rest))
liftIO $ writeSection (LBS.fromStrict e) (LBS.hPutStr hOut) -- next (HPSQ.fromList new)
next (L.filter (not . L.null) $ fmap (dropDupes k) rests, nextWin)
mapM_ rm inputFiles mapM_ rm inputFiles
where where
dropDupes k = L.dropWhile ( (== k) . getKey ) dropDupes k = L.dropWhile ( (== k) . getKey )
mkState [] = Nothing
mkState (x:xs) = Just (getKey x, getKey x, x:xs)
compactIndex :: forall m . (Git3Perks m, MonadReader Git3Env m) => Natural -> m () compactIndex :: forall m . (Git3Perks m, MonadReader Git3Env m) => Natural -> m ()
compactIndex maxSize = do compactIndex maxSize = do