mirror of https://github.com/voidlizard/hbs2
wip, works but mem O(k)
This commit is contained in:
parent
340bb9aaa8
commit
13133148dc
|
@ -924,6 +924,15 @@ theDict = do
|
||||||
|
|
||||||
liftIO $ hPrint stdout $ pretty sha1 <+> pretty blake
|
liftIO $ hPrint stdout $ pretty sha1 <+> pretty blake
|
||||||
|
|
||||||
|
entry $ bindMatch "test:reflog:file:check" $ nil_ $ \case
|
||||||
|
[ StringLike fn ] -> lift do
|
||||||
|
bs <- liftIO $ mmapFileByteString fn Nothing
|
||||||
|
|
||||||
|
unless (validateSorted bs) do
|
||||||
|
error "malformed"
|
||||||
|
|
||||||
|
_ -> throwIO (BadFormException @C nil)
|
||||||
|
|
||||||
entry $ bindMatch "test:git:reflog:index:sqlite" $ nil_ $ \syn -> lift $ connectedDo do
|
entry $ bindMatch "test:git:reflog:index:sqlite" $ nil_ $ \syn -> lift $ connectedDo do
|
||||||
|
|
||||||
reflog <- getGitRemoteKey >>= orThrowUser "reflog not set"
|
reflog <- getGitRemoteKey >>= orThrowUser "reflog not set"
|
||||||
|
|
|
@ -17,6 +17,8 @@ import Codec.Compression.Zstd.Streaming qualified as Zstd
|
||||||
import Codec.Compression.Zstd.Streaming (Result(..))
|
import Codec.Compression.Zstd.Streaming (Result(..))
|
||||||
|
|
||||||
import Control.Exception
|
import Control.Exception
|
||||||
|
import Control.Monad.Trans.Maybe
|
||||||
|
import Lens.Micro.Platform
|
||||||
|
|
||||||
-- import UnliftIO
|
-- import UnliftIO
|
||||||
|
|
||||||
|
@ -97,6 +99,29 @@ instance Monad m => BytesReader (ConsumeBS m) where
|
||||||
put $! b
|
put $! b
|
||||||
pure (LBS.fromStrict a)
|
pure (LBS.fromStrict a)
|
||||||
|
|
||||||
|
{- HLINT ignore "Eta reduce"-}
|
||||||
|
toSectionList :: BS.ByteString -> [BS.ByteString]
|
||||||
|
toSectionList source = go source
|
||||||
|
where
|
||||||
|
go bs | BS.length bs < 4 = []
|
||||||
|
| otherwise = go1 (BS.splitAt 4 bs & over _1 (fromIntegral . N.word32))
|
||||||
|
|
||||||
|
go1 (len,rest) | BS.length rest < len = []
|
||||||
|
|
||||||
|
go1 (len,rest) = do
|
||||||
|
let (sect, rest1) = BS.splitAt len rest
|
||||||
|
sect : go rest1
|
||||||
|
|
||||||
|
validateSorted :: BS.ByteString -> Bool
|
||||||
|
validateSorted bs = do
|
||||||
|
let sections = toSectionList bs
|
||||||
|
let r = flip fix (Nothing, sections, 0) $ \next -> \case
|
||||||
|
(_, [], e) -> e
|
||||||
|
(Nothing, x:xs, e) -> next (Just x, xs, e)
|
||||||
|
(Just v, x:_, e) | v > x -> (e+1)
|
||||||
|
(Just _, x:xs, e) -> next (Just x, xs, e)
|
||||||
|
r == 0
|
||||||
|
|
||||||
scanBS :: Monad m => BS.ByteString -> ( BS.ByteString -> m () ) -> m ()
|
scanBS :: Monad m => BS.ByteString -> ( BS.ByteString -> m () ) -> m ()
|
||||||
scanBS bs action = do
|
scanBS bs action = do
|
||||||
let hsz = 4
|
let hsz = 4
|
||||||
|
|
|
@ -102,32 +102,30 @@ mergeSortedFilesN _ [_] out = rm out
|
||||||
mergeSortedFilesN getKey inputFiles outFile = do
|
mergeSortedFilesN getKey inputFiles outFile = do
|
||||||
|
|
||||||
mmaped <- for inputFiles $ \fn -> do
|
mmaped <- for inputFiles $ \fn -> do
|
||||||
liftIO (mmapFileByteString fn Nothing)
|
bs <- liftIO (mmapFileByteString fn Nothing)
|
||||||
|
pure $ toSectionList bs
|
||||||
|
|
||||||
liftIO $ UIO.withBinaryFileAtomic outFile WriteMode $ \hOut -> do
|
liftIO $ UIO.withBinaryFileAtomic outFile WriteMode $ \hOut -> do
|
||||||
flip fix (mmaped, Heap.empty) $ \next (mmf, win) -> do
|
|
||||||
let (entries, files) = fmap readEntry mmf & unzip
|
let seed = HPSQ.empty @BS.ByteString @BS.ByteString @BS.ByteString
|
||||||
let values = [ Entry (getKey e) e | e <- catMaybes entries ]
|
|
||||||
let e' = (win <> Heap.fromList values) & Heap.uncons
|
flip fix (mmaped, seed) $ \next (files, win) -> do
|
||||||
maybe1 e' none $ \(Entry _ e, newWin) -> do
|
|
||||||
|
let (vals,rests) = unzip (mapMaybe L.uncons files)
|
||||||
|
|
||||||
|
let newWin = flip fix (win, vals) $ \l -> \case
|
||||||
|
(w, []) -> w
|
||||||
|
(w, e:es) -> let k = getKey e in l (HPSQ.insert k k e w, es)
|
||||||
|
|
||||||
|
maybe1 (HPSQ.minView newWin) none $ \(k,_,e, nextWin) -> do
|
||||||
liftIO $ writeSection (LBS.fromStrict e) (LBS.hPutStr hOut)
|
liftIO $ writeSection (LBS.fromStrict e) (LBS.hPutStr hOut)
|
||||||
next (catMaybes files, newWin)
|
next (L.filter (not . L.null) $ fmap (dropDupes k) rests, nextWin)
|
||||||
|
|
||||||
mapM_ rm inputFiles
|
mapM_ rm inputFiles
|
||||||
|
|
||||||
where
|
where
|
||||||
readEntry :: BS.ByteString -> (Maybe BS.ByteString, Maybe BS.ByteString)
|
dropDupes k = L.dropWhile ( (== k) . getKey )
|
||||||
|
|
||||||
readEntry src | BS.length src < 4 = (mzero, mzero)
|
|
||||||
|
|
||||||
readEntry src = do
|
|
||||||
let (size, rest) = BS.splitAt 4 src & over _1 ( fromIntegral . N.word32 )
|
|
||||||
let (e, rest2) = BS.splitAt size rest
|
|
||||||
|
|
||||||
if BS.length e < size then
|
|
||||||
(mzero, mzero)
|
|
||||||
else
|
|
||||||
(Just e, Just rest2)
|
|
||||||
|
|
||||||
compactIndex :: forall m . (Git3Perks m, MonadReader Git3Env m) => Natural -> m ()
|
compactIndex :: forall m . (Git3Perks m, MonadReader Git3Env m) => Natural -> m ()
|
||||||
compactIndex maxSize = do
|
compactIndex maxSize = do
|
||||||
|
@ -356,5 +354,5 @@ updateReflogIndex = do
|
||||||
-- notice $ pretty sha1 <+> pretty tx
|
-- notice $ pretty sha1 <+> pretty tx
|
||||||
writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)
|
writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)
|
||||||
|
|
||||||
lift $ compactIndex ( 32 * 1024 * 1024 )
|
-- lift $ compactIndex ( 32 * 1024 * 1024 )
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue