mirror of https://github.com/voidlizard/hbs2
wip
This commit is contained in:
parent
693e650b93
commit
33d0493edc
|
@ -87,6 +87,9 @@ import Data.Generics.Labels
|
||||||
import Data.Generics.Product
|
import Data.Generics.Product
|
||||||
import Lens.Micro.Platform
|
import Lens.Micro.Platform
|
||||||
|
|
||||||
|
import Data.Heap (Entry(..))
|
||||||
|
import Data.Heap qualified as Heap
|
||||||
|
|
||||||
import Streaming.Prelude qualified as S
|
import Streaming.Prelude qualified as S
|
||||||
import Streaming hiding (run,chunksOf)
|
import Streaming hiding (run,chunksOf)
|
||||||
|
|
||||||
|
@ -589,52 +592,45 @@ mergeSortedFiles getKey file1 file2 outFile = do
|
||||||
|
|
||||||
|
|
||||||
mergeSortedFilesN :: forall m . MonadUnliftIO m
|
mergeSortedFilesN :: forall m . MonadUnliftIO m
|
||||||
=> (ByteString -> ByteString) -- ^ Функция извлечения ключа
|
=> (N.ByteString -> N.ByteString) -- ^ Функция извлечения ключа
|
||||||
-> [FilePath] -- ^ Входные файлы
|
-> [FilePath] -- ^ Входные файлы
|
||||||
-> FilePath -- ^ Выходной файл
|
-> FilePath -- ^ Выходной файл
|
||||||
-> m ()
|
-> m ()
|
||||||
|
|
||||||
|
mergeSortedFilesN _ [] out = rm out
|
||||||
|
|
||||||
|
mergeSortedFilesN _ [_] out = rm out
|
||||||
|
|
||||||
mergeSortedFilesN getKey inputFiles outFile = do
|
mergeSortedFilesN getKey inputFiles outFile = do
|
||||||
-- Парсим все файлы
|
|
||||||
lists <- traverse parseFile inputFiles
|
|
||||||
|
|
||||||
-- Используем ленивые списки для обработки
|
mmaped <- for inputFiles $ \fn -> do
|
||||||
UIO.withBinaryFileAtomic outFile WriteMode $ \hOut ->
|
liftIO (mmapFileByteString fn Nothing)
|
||||||
mergeEntriesN lists getKey (\s -> writeSection s (liftIO . LBS.hPutStr hOut))
|
|
||||||
|
liftIO $ UIO.withBinaryFileAtomic outFile WriteMode $ \hOut ->
|
||||||
|
|
||||||
|
flip fix (mmaped, Heap.empty) $ \next (mmf, win) -> do
|
||||||
|
let (entries, files) = fmap readEntry mmf & unzip
|
||||||
|
let values = [ Entry (getKey e) e | e <- catMaybes entries ]
|
||||||
|
let e' = (win <> Heap.fromList values) & Heap.uncons
|
||||||
|
maybe1 e' none $ \(Entry _ e, newWin) -> do
|
||||||
|
liftIO $ writeSection (LBS.fromStrict e) (LBS.hPutStr hOut)
|
||||||
|
next (catMaybes files, newWin)
|
||||||
|
|
||||||
-- Удаляем исходные файлы
|
|
||||||
mapM_ rm inputFiles
|
mapM_ rm inputFiles
|
||||||
|
|
||||||
where
|
where
|
||||||
parseFile :: FilePath -> m [ByteString]
|
readEntry :: BS.ByteString -> (Maybe BS.ByteString, Maybe BS.ByteString)
|
||||||
parseFile path = do
|
|
||||||
lbs <- liftIO $ LBS.readFile path
|
|
||||||
S.toList_ $ runConsumeLBS lbs $ readSections $ \_ sdata -> lift $ S.yield sdata
|
|
||||||
|
|
||||||
mergeEntriesN :: [[ByteString]] -- ^ Списки данных
|
readEntry src | BS.length src < 4 = (mzero, mzero)
|
||||||
-> (ByteString -> ByteString) -- ^ Функция извлечения ключа
|
|
||||||
-> (ByteString -> m ()) -- ^ Функция записи
|
|
||||||
-> m ()
|
|
||||||
mergeEntriesN lists extractKey write = do
|
|
||||||
let initialQueue = buildQueue lists extractKey
|
|
||||||
mergeQueue initialQueue extractKey write
|
|
||||||
|
|
||||||
buildQueue :: [[ByteString]]
|
readEntry src = do
|
||||||
-> (ByteString -> ByteString)
|
let (size, rest) = BS.splitAt 4 src & over _1 ( fromIntegral . N.word32 )
|
||||||
-> PSQ.OrdPSQ ByteString Int [ByteString]
|
let (e, rest2) = BS.splitAt size rest
|
||||||
buildQueue xs extractKey =
|
|
||||||
foldr (\(i, x:xs') queue -> PSQ.insert (extractKey x) i (x:xs') queue) PSQ.empty (zip [0..] xs)
|
|
||||||
|
|
||||||
mergeQueue :: PSQ.OrdPSQ ByteString Int [ByteString]
|
if BS.length e < size then
|
||||||
-> (ByteString -> ByteString)
|
(mzero, mzero)
|
||||||
-> (ByteString -> m ())
|
else
|
||||||
-> m ()
|
(Just e, Just rest2)
|
||||||
mergeQueue queue extractKey write = unless (PSQ.null queue) $ do
|
|
||||||
let Just (key, _, x:xs', queue') = PSQ.minView queue
|
|
||||||
write x
|
|
||||||
let updatedQueue = if null xs'
|
|
||||||
then queue'
|
|
||||||
else PSQ.insert (extractKey (head xs')) 0 xs' queue'
|
|
||||||
mergeQueue updatedQueue extractKey write
|
|
||||||
|
|
||||||
|
|
||||||
theDict :: forall m . ( HBS2GitPerks m
|
theDict :: forall m . ( HBS2GitPerks m
|
||||||
|
@ -1123,20 +1119,15 @@ theDict = do
|
||||||
LBS.hPutStr fh contents
|
LBS.hPutStr fh contents
|
||||||
|
|
||||||
|
|
||||||
entry $ bindMatch "test:git:reflog:index:list:fucked" $ nil_ $ \case
|
entry $ bindMatch "test:git:reflog:index:list:fast" $ nil_ $ \case
|
||||||
[ StringLike f ] -> lift do
|
[ StringLike f ] -> lift do
|
||||||
bs <- liftIO $ mmapFileByteString f Nothing
|
bs <- liftIO $ mmapFileByteString f Nothing
|
||||||
let len = BS.length bs
|
scanBS bs $ \segment -> do
|
||||||
|
let (sha1,blake) = BS.splitAt 20 segment
|
||||||
|
& over _1 (coerce @_ @GitHash)
|
||||||
|
& over _2 (coerce @_ @HashRef)
|
||||||
|
|
||||||
let pnum = 20 + 32 + 4
|
liftIO $ hPrint stdout $ pretty sha1 <+> pretty blake
|
||||||
|
|
||||||
num <- flip fix (0 :: Int,len,bs) $ \next (n,l,bss) -> do
|
|
||||||
if l < pnum then pure n
|
|
||||||
else do
|
|
||||||
let (_,rest) = BS.splitAt pnum bss
|
|
||||||
next (n+1,l-pnum,rest)
|
|
||||||
|
|
||||||
liftIO $ print $ "okay" <+> pretty num
|
|
||||||
|
|
||||||
_ -> throwIO (BadFormException @C nil)
|
_ -> throwIO (BadFormException @C nil)
|
||||||
|
|
||||||
|
@ -1151,8 +1142,7 @@ theDict = do
|
||||||
& over _1 (coerce @_ @GitHash . LBS.toStrict)
|
& over _1 (coerce @_ @GitHash . LBS.toStrict)
|
||||||
& over _2 (coerce @_ @HashRef . LBS.toStrict)
|
& over _2 (coerce @_ @HashRef . LBS.toStrict)
|
||||||
|
|
||||||
-- liftIO $ hPrint stdout $ pretty sha1 <+> pretty blake
|
liftIO $ hPrint stdout $ pretty sha1 <+> pretty blake
|
||||||
void $ pure ()
|
|
||||||
|
|
||||||
entry $ bindMatch "test:git:reflog:index:sqlite" $ nil_ $ \syn -> lift $ connectedDo do
|
entry $ bindMatch "test:git:reflog:index:sqlite" $ nil_ $ \syn -> lift $ connectedDo do
|
||||||
|
|
||||||
|
@ -1232,7 +1222,7 @@ theDict = do
|
||||||
|
|
||||||
out <- liftIO $ emptyTempFile idxPath "objects-.idx"
|
out <- liftIO $ emptyTempFile idxPath "objects-.idx"
|
||||||
|
|
||||||
mergeSortedFilesN (LBS.take 20) files out
|
mergeSortedFilesN (BS.take 20) files out
|
||||||
|
|
||||||
-- let entriesListOf lbs = S.toList_ $ runConsumeLBS lbs $ readSections $ \s ss -> do
|
-- let entriesListOf lbs = S.toList_ $ runConsumeLBS lbs $ readSections $ \s ss -> do
|
||||||
entry $ bindMatch "test:git:reflog:index:files" $ nil_ $ \syn -> lift do
|
entry $ bindMatch "test:git:reflog:index:files" $ nil_ $ \syn -> lift do
|
||||||
|
@ -1296,12 +1286,10 @@ theDict = do
|
||||||
-- notice $ pretty sha1 <+> pretty tx
|
-- notice $ pretty sha1 <+> pretty tx
|
||||||
writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)
|
writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)
|
||||||
|
|
||||||
files <- dirFiles idxPath
|
-- files <- dirFiles idxPath
|
||||||
<&> filter ((== ".idx") . takeExtension)
|
-- <&> filter ((== ".idx") . takeExtension)
|
||||||
|
-- out <- liftIO $ emptyTempFile idxPath "objects-.idx"
|
||||||
out <- liftIO $ emptyTempFile idxPath "objects-.idx"
|
-- liftIO $ mergeSortedFilesN (LBS.take 20) files out
|
||||||
|
|
||||||
liftIO $ mergeSortedFilesN (LBS.take 20) files out
|
|
||||||
|
|
||||||
entry $ bindMatch "test:git:export" $ nil_ $ \syn -> lift $ connectedDo do
|
entry $ bindMatch "test:git:export" $ nil_ $ \syn -> lift $ connectedDo do
|
||||||
let (opts, argz) = splitOpts [("--index",1),("--ref",1)] syn
|
let (opts, argz) = splitOpts [("--index",1),("--ref",1)] syn
|
||||||
|
|
|
@ -80,6 +80,7 @@ common shared-properties
|
||||||
, filepattern
|
, filepattern
|
||||||
, generic-lens
|
, generic-lens
|
||||||
, generic-deriving
|
, generic-deriving
|
||||||
|
, heaps
|
||||||
, interpolatedstring-perl6
|
, interpolatedstring-perl6
|
||||||
, memory
|
, memory
|
||||||
, mmap
|
, mmap
|
||||||
|
|
|
@ -97,6 +97,21 @@ instance Monad m => BytesReader (ConsumeBS m) where
|
||||||
put $! b
|
put $! b
|
||||||
pure (LBS.fromStrict a)
|
pure (LBS.fromStrict a)
|
||||||
|
|
||||||
|
scanBS :: Monad m => BS.ByteString -> ( BS.ByteString -> m () ) -> m ()
|
||||||
|
scanBS bs action = do
|
||||||
|
let hsz = 4
|
||||||
|
flip fix bs $ \next bss -> do
|
||||||
|
if BS.length bss < hsz then pure ()
|
||||||
|
else do
|
||||||
|
let (ssize, rest) = BS.splitAt hsz bss
|
||||||
|
let size = N.word32 ssize & fromIntegral
|
||||||
|
let (sdata, rest2) = BS.splitAt size rest
|
||||||
|
if BS.length sdata < size then
|
||||||
|
pure ()
|
||||||
|
else do
|
||||||
|
action sdata
|
||||||
|
next rest2
|
||||||
|
|
||||||
runConsumeBS :: Monad m => BS.ByteString -> ConsumeBS m a -> m a
|
runConsumeBS :: Monad m => BS.ByteString -> ConsumeBS m a -> m a
|
||||||
runConsumeBS s m = evalStateT (fromConsumeBS m) s
|
runConsumeBS s m = evalStateT (fromConsumeBS m) s
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue