From 13133148dc5943db1cd8beb2987abfd6397d0b03 Mon Sep 17 00:00:00 2001 From: voidlizard Date: Sun, 12 Jan 2025 09:32:47 +0300 Subject: [PATCH] wip, works but mem O(k) --- hbs2-git3/app/Main.hs | 9 ++++++ hbs2-git3/lib/HBS2/Data/Log/Structured.hs | 25 ++++++++++++++++ hbs2-git3/lib/HBS2/Git3/State/Index.hs | 36 +++++++++++------------ 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/hbs2-git3/app/Main.hs b/hbs2-git3/app/Main.hs index 78aa80dd..dfcb6412 100644 --- a/hbs2-git3/app/Main.hs +++ b/hbs2-git3/app/Main.hs @@ -924,6 +924,15 @@ theDict = do liftIO $ hPrint stdout $ pretty sha1 <+> pretty blake + entry $ bindMatch "test:reflog:file:check" $ nil_ $ \case + [ StringLike fn ] -> lift do + bs <- liftIO $ mmapFileByteString fn Nothing + + unless (validateSorted bs) do + error "malformed" + + _ -> throwIO (BadFormException @C nil) + entry $ bindMatch "test:git:reflog:index:sqlite" $ nil_ $ \syn -> lift $ connectedDo do reflog <- getGitRemoteKey >>= orThrowUser "reflog not set" diff --git a/hbs2-git3/lib/HBS2/Data/Log/Structured.hs b/hbs2-git3/lib/HBS2/Data/Log/Structured.hs index 049fc483..51bd4e99 100644 --- a/hbs2-git3/lib/HBS2/Data/Log/Structured.hs +++ b/hbs2-git3/lib/HBS2/Data/Log/Structured.hs @@ -17,6 +17,8 @@ import Codec.Compression.Zstd.Streaming qualified as Zstd import Codec.Compression.Zstd.Streaming (Result(..)) import Control.Exception +import Control.Monad.Trans.Maybe +import Lens.Micro.Platform -- import UnliftIO @@ -97,6 +99,29 @@ instance Monad m => BytesReader (ConsumeBS m) where put $! b pure (LBS.fromStrict a) +{- HLINT ignore "Eta reduce"-} +toSectionList :: BS.ByteString -> [BS.ByteString] +toSectionList source = go source + where + go bs | BS.length bs < 4 = [] + | otherwise = go1 (BS.splitAt 4 bs & over _1 (fromIntegral . N.word32)) + + go1 (len,rest) | BS.length rest < len = [] + + go1 (len,rest) = do + let (sect, rest1) = BS.splitAt len rest + sect : go rest1 + +validateSorted :: BS.ByteString -> Bool +validateSorted bs = do + let sections = toSectionList bs + let r = flip fix (Nothing, sections, 0) $ \next -> \case + (_, [], e) -> e + (Nothing, x:xs, e) -> next (Just x, xs, e) + (Just v, x:_, e) | v > x -> (e+1) + (Just _, x:xs, e) -> next (Just x, xs, e) + r == 0 + scanBS :: Monad m => BS.ByteString -> ( BS.ByteString -> m () ) -> m () scanBS bs action = do let hsz = 4 diff --git a/hbs2-git3/lib/HBS2/Git3/State/Index.hs b/hbs2-git3/lib/HBS2/Git3/State/Index.hs index e65b541f..35f145eb 100644 --- a/hbs2-git3/lib/HBS2/Git3/State/Index.hs +++ b/hbs2-git3/lib/HBS2/Git3/State/Index.hs @@ -102,32 +102,30 @@ mergeSortedFilesN _ [_] out = rm out mergeSortedFilesN getKey inputFiles outFile = do mmaped <- for inputFiles $ \fn -> do - liftIO (mmapFileByteString fn Nothing) + bs <- liftIO (mmapFileByteString fn Nothing) + pure $ toSectionList bs liftIO $ UIO.withBinaryFileAtomic outFile WriteMode $ \hOut -> do - flip fix (mmaped, Heap.empty) $ \next (mmf, win) -> do - let (entries, files) = fmap readEntry mmf & unzip - let values = [ Entry (getKey e) e | e <- catMaybes entries ] - let e' = (win <> Heap.fromList values) & Heap.uncons - maybe1 e' none $ \(Entry _ e, newWin) -> do + + let seed = HPSQ.empty @BS.ByteString @BS.ByteString @BS.ByteString + + flip fix (mmaped, seed) $ \next (files, win) -> do + + let (vals,rests) = unzip (mapMaybe L.uncons files) + + let newWin = flip fix (win, vals) $ \l -> \case + (w, []) -> w + (w, e:es) -> let k = getKey e in l (HPSQ.insert k k e w, es) + + maybe1 (HPSQ.minView newWin) none $ \(k,_,e, nextWin) -> do liftIO $ writeSection (LBS.fromStrict e) (LBS.hPutStr hOut) - next (catMaybes files, newWin) + next (L.filter (not . L.null) $ fmap (dropDupes k) rests, nextWin) mapM_ rm inputFiles where - readEntry :: BS.ByteString -> (Maybe BS.ByteString, Maybe BS.ByteString) + dropDupes k = L.dropWhile ( (== k) . getKey ) - readEntry src | BS.length src < 4 = (mzero, mzero) - - readEntry src = do - let (size, rest) = BS.splitAt 4 src & over _1 ( fromIntegral . N.word32 ) - let (e, rest2) = BS.splitAt size rest - - if BS.length e < size then - (mzero, mzero) - else - (Just e, Just rest2) compactIndex :: forall m . (Git3Perks m, MonadReader Git3Env m) => Natural -> m () compactIndex maxSize = do @@ -356,5 +354,5 @@ updateReflogIndex = do -- notice $ pretty sha1 <+> pretty tx writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh) - lift $ compactIndex ( 32 * 1024 * 1024 ) + -- lift $ compactIndex ( 32 * 1024 * 1024 )