mirror of https://github.com/voidlizard/hbs2
wip
This commit is contained in:
parent
83bcba17ae
commit
3773c7857b
|
@ -2090,7 +2090,7 @@ theDict = do
|
||||||
for_ fnames $ \f -> do
|
for_ fnames $ \f -> do
|
||||||
theLog <- liftIO $ LBS.readFile f
|
theLog <- liftIO $ LBS.readFile f
|
||||||
|
|
||||||
void $ runConsumeLBS theLog $ readLogFileLBS () $ \h s lbs -> do
|
void $ runConsumeLBS (ZstdL.decompress theLog) $ readLogFileLBS () $ \h s lbs -> do
|
||||||
lift $ S.yield (coerce @_ @BS.ByteString h)
|
lift $ S.yield (coerce @_ @BS.ByteString h)
|
||||||
debug $ "object" <+> pretty h
|
debug $ "object" <+> pretty h
|
||||||
|
|
||||||
|
@ -2102,6 +2102,20 @@ theDict = do
|
||||||
BS.hPutStr fh entrySize
|
BS.hPutStr fh entrySize
|
||||||
BS.hPutStr fh ghs
|
BS.hPutStr fh ghs
|
||||||
|
|
||||||
|
|
||||||
|
entry $ bindMatch "test:sqlite" $ nil_ $ \case
|
||||||
|
[ StringLike fn ] -> lift do
|
||||||
|
db <- newDBPipeEnv dbPipeOptsDef fn
|
||||||
|
withDB db do
|
||||||
|
all <- select_ @_ @(Only Text) [qc|select hash from githash|]
|
||||||
|
for_ all $ \x -> do
|
||||||
|
n <- select @(Only Int) [qc|select 1 from githash where hash = ?|] (Only (fromOnly x))
|
||||||
|
<&> L.null
|
||||||
|
unless n do
|
||||||
|
liftIO $ print $ pretty (fromOnly x)
|
||||||
|
|
||||||
|
_ -> throwIO (BadFormException @C nil)
|
||||||
|
|
||||||
entry $ bindMatch "test:git:export-commit-dfs" $ nil_ $ \syn -> lift do
|
entry $ bindMatch "test:git:export-commit-dfs" $ nil_ $ \syn -> lift do
|
||||||
let (opts, argz) = splitOpts [("--index",1)] syn
|
let (opts, argz) = splitOpts [("--index",1)] syn
|
||||||
let hd = headDef "HEAD" [ x | StringLike x <- argz]
|
let hd = headDef "HEAD" [ x | StringLike x <- argz]
|
||||||
|
@ -2136,44 +2150,69 @@ theDict = do
|
||||||
|
|
||||||
liftIO $ flip runContT pure do
|
liftIO $ flip runContT pure do
|
||||||
|
|
||||||
sourceQ <- newTBQueueIO 1000
|
tn <- getNumCapabilities
|
||||||
|
|
||||||
theReader <- ContT $ withGitCat
|
sourceQ <- newTBQueueIO (fromIntegral tn * 100)
|
||||||
|
|
||||||
seed <- randomIO @Word16
|
seed <- randomIO @Word16
|
||||||
logFile <- ContT $ withBinaryFile (show $ "export-" <> pretty seed <> ".log") AppendMode
|
logFile <- ContT $ withBinaryFile (show $ "export-" <> pretty seed <> ".log") AppendMode
|
||||||
|
|
||||||
l <- lift $ async $ do
|
l <- lift $ async $ do
|
||||||
stream <- ZstdS.compress maxCLevel
|
zstd <- ZstdS.compress maxCLevel
|
||||||
q <- newTQueueIO
|
flip fix zstd \jerk sn -> do
|
||||||
writeSections (atomically (readTBQueue sourceQ)) (atomically . writeTQueue q . Just)
|
atomically (readTBQueue sourceQ) >>= \case
|
||||||
atomically $ writeTQueue q Nothing
|
Nothing -> writeCompressedChunkZstd (LBS.hPutStr logFile) sn Nothing
|
||||||
writeCompressedStreamZstd stream (atomically $ readTQueue q) $ \shit -> do
|
Just s -> do
|
||||||
liftIO $ LBS.hPutStr logFile shit
|
lbs <- S.toList_ (writeSection s $ S.yield) <&> mconcat
|
||||||
|
writeCompressedChunkZstd (LBS.hPutStr logFile) sn (Just lbs) >>= jerk
|
||||||
|
|
||||||
link l
|
link l
|
||||||
|
|
||||||
for_ r $ \commit -> do
|
let commitz = chunksOf (total `div` tn) r
|
||||||
hashes <- gitReadTreeObjectsOnly commit
|
|
||||||
<&> (commit:)
|
|
||||||
>>= filterM notWrittenYet
|
|
||||||
|
|
||||||
for_ hashes $ \gh -> do
|
progress_ <- newTVarIO 0
|
||||||
(_t,lbs) <- gitReadObjectMaybe theReader gh
|
|
||||||
>>= orThrow (GitReadError (show $ pretty gh))
|
|
||||||
|
|
||||||
let section = [ Builder.byteString (coerce gh)
|
workers <- lift $ forM (zip [0..] commitz) $ \(i,chunk) -> async $ flip runContT pure do
|
||||||
, Builder.lazyByteString lbs
|
theReader <- ContT withGitCat
|
||||||
] & Builder.toLazyByteString . mconcat
|
|
||||||
|
|
||||||
atomically do
|
for_ chunk \commit -> do
|
||||||
modifyTVar _already (HS.insert gh)
|
|
||||||
writeTBQueue sourceQ (Just section)
|
atomically $ modifyTVar progress_ succ
|
||||||
|
|
||||||
|
hashes <- gitReadTreeObjectsOnly commit
|
||||||
|
<&> (commit:)
|
||||||
|
>>= filterM notWrittenYet
|
||||||
|
|
||||||
|
for_ hashes $ \gh -> do
|
||||||
|
(_t,lbs) <- gitReadObjectMaybe theReader gh
|
||||||
|
>>= orThrow (GitReadError (show $ pretty gh))
|
||||||
|
|
||||||
|
let e = [ Builder.byteString (coerce gh)
|
||||||
|
, Builder.lazyByteString lbs
|
||||||
|
] & Builder.toLazyByteString . mconcat
|
||||||
|
|
||||||
|
atomically do
|
||||||
|
modifyTVar _already (HS.insert gh)
|
||||||
|
writeTBQueue sourceQ (Just e)
|
||||||
|
|
||||||
|
ContT $ withAsync $ forever do
|
||||||
|
pause @'Seconds 1
|
||||||
|
p <- readTVarIO progress_
|
||||||
|
|
||||||
|
let pp = fromIntegral p / (fromIntegral total :: Double) * 100
|
||||||
|
& realToFrac @_ @(Fixed E2)
|
||||||
|
|
||||||
|
liftIO $ IO.hPutStr stderr $ show $ " \r" <> pretty pp <> "%"
|
||||||
|
pure ()
|
||||||
|
|
||||||
|
mapM_ link workers
|
||||||
|
mapM_ wait workers
|
||||||
|
|
||||||
atomically $ writeTBQueue sourceQ Nothing
|
atomically $ writeTBQueue sourceQ Nothing
|
||||||
|
|
||||||
wait l
|
wait l
|
||||||
|
|
||||||
|
|
||||||
linearSearchLBS hash lbs = do
|
linearSearchLBS hash lbs = do
|
||||||
|
|
||||||
found <- S.toList_ $ runConsumeLBS lbs $ flip fix 0 \go n -> do
|
found <- S.toList_ $ runConsumeLBS lbs $ flip fix 0 \go n -> do
|
||||||
|
|
|
@ -2,14 +2,15 @@ module HBS2.Data.Log.Structured where
|
||||||
|
|
||||||
import HBS2.Prelude.Plated
|
import HBS2.Prelude.Plated
|
||||||
|
|
||||||
|
import Data.ByteString.Builder qualified as B
|
||||||
import Data.ByteString.Lazy (ByteString)
|
import Data.ByteString.Lazy (ByteString)
|
||||||
import Data.ByteString.Lazy qualified as LBS
|
import Data.ByteString.Lazy qualified as LBS
|
||||||
import Data.ByteString qualified as BS
|
import Data.ByteString qualified as BS
|
||||||
import Data.ByteString.Builder qualified as B
|
import Data.Maybe
|
||||||
import Network.ByteOrder hiding (ByteString)
|
import Network.ByteOrder hiding (ByteString)
|
||||||
|
|
||||||
import Codec.Compression.Zstd.Streaming qualified as Zstd
|
|
||||||
import Codec.Compression.Zstd qualified as Zstd
|
import Codec.Compression.Zstd qualified as Zstd
|
||||||
|
import Codec.Compression.Zstd.Streaming qualified as Zstd
|
||||||
import Codec.Compression.Zstd.Streaming (Result(..))
|
import Codec.Compression.Zstd.Streaming (Result(..))
|
||||||
|
|
||||||
import Control.Exception
|
import Control.Exception
|
||||||
|
@ -42,33 +43,48 @@ data CompressedStreamError =
|
||||||
|
|
||||||
instance Exception CompressedStreamError
|
instance Exception CompressedStreamError
|
||||||
|
|
||||||
|
writeCompressedChunkZstd :: forall m . MonadIO m
|
||||||
|
=> ( ByteString -> m () )
|
||||||
|
-> Result
|
||||||
|
-> Maybe ByteString
|
||||||
|
-> m Result
|
||||||
|
|
||||||
|
writeCompressedChunkZstd sink stream mlbs = do
|
||||||
|
flip fix ( LBS.toChunks lbs, stream) $ \next -> \case
|
||||||
|
|
||||||
|
([], r@(Done s)) -> sink (LBS.fromStrict s) >> pure r
|
||||||
|
|
||||||
|
(_, Done{}) -> liftIO (throwIO CompressedStreamWriteError)
|
||||||
|
|
||||||
|
(_, Error{})-> liftIO (throwIO CompressedStreamWriteError)
|
||||||
|
|
||||||
|
(w, Produce s continue) -> do
|
||||||
|
sink (LBS.fromStrict s)
|
||||||
|
c <- liftIO continue
|
||||||
|
next (w, c)
|
||||||
|
|
||||||
|
(_, Consume consume) | isNothing mlbs -> do
|
||||||
|
r <- liftIO (consume mempty)
|
||||||
|
next ([], r)
|
||||||
|
|
||||||
|
([], r@(Consume{})) -> pure r
|
||||||
|
|
||||||
|
(x:xs, r@(Consume consume)) -> do
|
||||||
|
what <- liftIO (consume x)
|
||||||
|
next (xs, what)
|
||||||
|
|
||||||
|
where
|
||||||
|
lbs = fromMaybe mempty mlbs
|
||||||
|
|
||||||
|
|
||||||
writeCompressedStreamZstd :: forall m . MonadIO m
|
writeCompressedStreamZstd :: forall m . MonadIO m
|
||||||
=> Result
|
=> Result
|
||||||
-> m (Maybe ByteString)
|
-> m (Maybe ByteString)
|
||||||
-> ( ByteString -> m () )
|
-> ( ByteString -> m () )
|
||||||
-> m ()
|
-> m ()
|
||||||
writeCompressedStreamZstd stream source sink = do
|
writeCompressedStreamZstd stream source sink = do
|
||||||
|
flip fix stream $ \next sn -> do
|
||||||
flip fix (mempty,stream) $ \next -> \case
|
source >>= \case
|
||||||
(_, Done s) -> sink (LBS.fromStrict s)
|
Nothing -> writeCompressedChunkZstd sink sn Nothing >> none
|
||||||
|
Just lbs -> writeCompressedChunkZstd sink sn (Just lbs) >>= next
|
||||||
(_,Error _ _) -> liftIO (throwIO CompressedStreamWriteError)
|
|
||||||
|
|
||||||
(some, Produce s continue) -> do
|
|
||||||
sink (LBS.fromStrict s)
|
|
||||||
c <- liftIO continue
|
|
||||||
next (some, c)
|
|
||||||
|
|
||||||
([], w@(Consume consume)) -> do
|
|
||||||
source >>= \case
|
|
||||||
Just piece -> do
|
|
||||||
next (LBS.toChunks piece, w)
|
|
||||||
|
|
||||||
Nothing -> do
|
|
||||||
c <- liftIO (consume mempty)
|
|
||||||
next ([], c)
|
|
||||||
|
|
||||||
(x:xs, Consume consume) -> do
|
|
||||||
c <- liftIO (consume x)
|
|
||||||
next (xs, c)
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue