wip, works but mem O(k)

This commit is contained in:
voidlizard 2025-01-12 09:32:47 +03:00
parent 340bb9aaa8
commit 13133148dc
3 changed files with 51 additions and 19 deletions

View File

@ -924,6 +924,15 @@ theDict = do
liftIO $ hPrint stdout $ pretty sha1 <+> pretty blake liftIO $ hPrint stdout $ pretty sha1 <+> pretty blake
entry $ bindMatch "test:reflog:file:check" $ nil_ $ \case
[ StringLike fn ] -> lift do
bs <- liftIO $ mmapFileByteString fn Nothing
unless (validateSorted bs) do
error "malformed"
_ -> throwIO (BadFormException @C nil)
entry $ bindMatch "test:git:reflog:index:sqlite" $ nil_ $ \syn -> lift $ connectedDo do entry $ bindMatch "test:git:reflog:index:sqlite" $ nil_ $ \syn -> lift $ connectedDo do
reflog <- getGitRemoteKey >>= orThrowUser "reflog not set" reflog <- getGitRemoteKey >>= orThrowUser "reflog not set"

View File

@ -17,6 +17,8 @@ import Codec.Compression.Zstd.Streaming qualified as Zstd
import Codec.Compression.Zstd.Streaming (Result(..)) import Codec.Compression.Zstd.Streaming (Result(..))
import Control.Exception import Control.Exception
import Control.Monad.Trans.Maybe
import Lens.Micro.Platform
-- import UnliftIO -- import UnliftIO
@ -97,6 +99,29 @@ instance Monad m => BytesReader (ConsumeBS m) where
put $! b put $! b
pure (LBS.fromStrict a) pure (LBS.fromStrict a)
{- HLINT ignore "Eta reduce"-}
toSectionList :: BS.ByteString -> [BS.ByteString]
toSectionList source = go source
where
go bs | BS.length bs < 4 = []
| otherwise = go1 (BS.splitAt 4 bs & over _1 (fromIntegral . N.word32))
go1 (len,rest) | BS.length rest < len = []
go1 (len,rest) = do
let (sect, rest1) = BS.splitAt len rest
sect : go rest1
validateSorted :: BS.ByteString -> Bool
validateSorted bs = do
let sections = toSectionList bs
let r = flip fix (Nothing, sections, 0) $ \next -> \case
(_, [], e) -> e
(Nothing, x:xs, e) -> next (Just x, xs, e)
(Just v, x:_, e) | v > x -> (e+1)
(Just _, x:xs, e) -> next (Just x, xs, e)
r == 0
scanBS :: Monad m => BS.ByteString -> ( BS.ByteString -> m () ) -> m () scanBS :: Monad m => BS.ByteString -> ( BS.ByteString -> m () ) -> m ()
scanBS bs action = do scanBS bs action = do
let hsz = 4 let hsz = 4

View File

@ -102,32 +102,30 @@ mergeSortedFilesN _ [_] out = rm out
mergeSortedFilesN getKey inputFiles outFile = do mergeSortedFilesN getKey inputFiles outFile = do
mmaped <- for inputFiles $ \fn -> do mmaped <- for inputFiles $ \fn -> do
liftIO (mmapFileByteString fn Nothing) bs <- liftIO (mmapFileByteString fn Nothing)
pure $ toSectionList bs
liftIO $ UIO.withBinaryFileAtomic outFile WriteMode $ \hOut -> do liftIO $ UIO.withBinaryFileAtomic outFile WriteMode $ \hOut -> do
flip fix (mmaped, Heap.empty) $ \next (mmf, win) -> do
let (entries, files) = fmap readEntry mmf & unzip let seed = HPSQ.empty @BS.ByteString @BS.ByteString @BS.ByteString
let values = [ Entry (getKey e) e | e <- catMaybes entries ]
let e' = (win <> Heap.fromList values) & Heap.uncons flip fix (mmaped, seed) $ \next (files, win) -> do
maybe1 e' none $ \(Entry _ e, newWin) -> do
let (vals,rests) = unzip (mapMaybe L.uncons files)
let newWin = flip fix (win, vals) $ \l -> \case
(w, []) -> w
(w, e:es) -> let k = getKey e in l (HPSQ.insert k k e w, es)
maybe1 (HPSQ.minView newWin) none $ \(k,_,e, nextWin) -> do
liftIO $ writeSection (LBS.fromStrict e) (LBS.hPutStr hOut) liftIO $ writeSection (LBS.fromStrict e) (LBS.hPutStr hOut)
next (catMaybes files, newWin) next (L.filter (not . L.null) $ fmap (dropDupes k) rests, nextWin)
mapM_ rm inputFiles mapM_ rm inputFiles
where where
readEntry :: BS.ByteString -> (Maybe BS.ByteString, Maybe BS.ByteString) dropDupes k = L.dropWhile ( (== k) . getKey )
readEntry src | BS.length src < 4 = (mzero, mzero)
readEntry src = do
let (size, rest) = BS.splitAt 4 src & over _1 ( fromIntegral . N.word32 )
let (e, rest2) = BS.splitAt size rest
if BS.length e < size then
(mzero, mzero)
else
(Just e, Just rest2)
compactIndex :: forall m . (Git3Perks m, MonadReader Git3Env m) => Natural -> m () compactIndex :: forall m . (Git3Perks m, MonadReader Git3Env m) => Natural -> m ()
compactIndex maxSize = do compactIndex maxSize = do
@ -356,5 +354,5 @@ updateReflogIndex = do
-- notice $ pretty sha1 <+> pretty tx -- notice $ pretty sha1 <+> pretty tx
writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh) writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)
lift $ compactIndex ( 32 * 1024 * 1024 ) -- lift $ compactIndex ( 32 * 1024 * 1024 )