mirror of https://github.com/voidlizard/hbs2
wip
This commit is contained in:
parent
ff7abbefc7
commit
218ff12000
|
@ -93,7 +93,9 @@ import Data.Vector.Algorithms.Search qualified as MV
|
||||||
import UnliftIO.Concurrent
|
import UnliftIO.Concurrent
|
||||||
import UnliftIO.IO.File qualified as UIO
|
import UnliftIO.IO.File qualified as UIO
|
||||||
|
|
||||||
import Data.BloomFilter.Easy qualified as Bloom
|
import Control.Monad.ST
|
||||||
|
import Data.BloomFilter qualified as Bloom
|
||||||
|
import Data.BloomFilter.Mutable qualified as MBloom
|
||||||
|
|
||||||
{- HLINT ignore "Functor law" -}
|
{- HLINT ignore "Functor law" -}
|
||||||
{- HLINT ignore "Eta reduce" -}
|
{- HLINT ignore "Eta reduce" -}
|
||||||
|
@ -146,6 +148,7 @@ recover m = fix \again -> do
|
||||||
|
|
||||||
liftIO $ withGit3Env connected again
|
liftIO $ withGit3Env connected again
|
||||||
|
|
||||||
|
e -> throwIO e
|
||||||
---
|
---
|
||||||
|
|
||||||
data TreeReadState = TreeReadState
|
data TreeReadState = TreeReadState
|
||||||
|
@ -578,14 +581,6 @@ theDict = do
|
||||||
entry $ bindMatch "zlib:deflate" $ nil_ $ const $ liftIO do
|
entry $ bindMatch "zlib:deflate" $ nil_ $ const $ liftIO do
|
||||||
LBS.hGetContents stdin <&> Zlib.decompress >>= LBS.hPutStr stdout
|
LBS.hGetContents stdin <&> Zlib.decompress >>= LBS.hPutStr stdout
|
||||||
|
|
||||||
|
|
||||||
entry $ bindMatch "test:reflog:bloom:create" $ nil_ $ \syn -> lift do
|
|
||||||
r <- newTQueueIO
|
|
||||||
enumEntries $ \e -> do
|
|
||||||
atomically $ writeTQueue r (BS.take 20 e)
|
|
||||||
atomically (STM.flushTQueue r) <&> (Bloom.easyList 0.01)
|
|
||||||
notice $ "bloom filter build done"
|
|
||||||
|
|
||||||
entry $ bindMatch "test:git:read-commit-chain" $ nil_ $ \syn -> lift do
|
entry $ bindMatch "test:git:read-commit-chain" $ nil_ $ \syn -> lift do
|
||||||
(mpath, hss) <- case syn of
|
(mpath, hss) <- case syn of
|
||||||
[ HashLike s ] -> pure (Nothing, s)
|
[ HashLike s ] -> pure (Nothing, s)
|
||||||
|
@ -705,7 +700,8 @@ theDict = do
|
||||||
|
|
||||||
entry $ bindMatch "test:reflog:index:search:binary:test:2" $ nil_ $ const $ lift do
|
entry $ bindMatch "test:reflog:index:search:binary:test:2" $ nil_ $ const $ lift do
|
||||||
r <- newTQueueIO
|
r <- newTQueueIO
|
||||||
enumEntries $ \e -> do
|
idx <- openIndex
|
||||||
|
enumEntries idx $ \e -> do
|
||||||
let ha = GitHash $ coerce $ BS.take 20 e
|
let ha = GitHash $ coerce $ BS.take 20 e
|
||||||
atomically $ writeTQueue r ha
|
atomically $ writeTQueue r ha
|
||||||
|
|
||||||
|
@ -958,19 +954,16 @@ theDict = do
|
||||||
LBS.hPutStr fh contents
|
LBS.hPutStr fh contents
|
||||||
|
|
||||||
|
|
||||||
entry $ bindMatch "test:git:reflog:index:list:fast" $ nil_ $ \case
|
entry $ bindMatch "test:git:reflog:index:list:fast" $ nil_ $ const $ lift do
|
||||||
[ StringLike f ] -> lift do
|
files <- listObjectIndexFiles
|
||||||
|
forConcurrently_ files $ \(f,_) -> do
|
||||||
bs <- liftIO $ mmapFileByteString f Nothing
|
bs <- liftIO $ mmapFileByteString f Nothing
|
||||||
scanBS bs $ \segment -> do
|
scanBS bs $ \segment -> do
|
||||||
none
|
let (sha1,blake) = BS.splitAt 20 segment
|
||||||
-- let (sha1,blake) = BS.splitAt 20 segment
|
& over _1 (coerce @_ @GitHash)
|
||||||
-- & over _1 (coerce @_ @GitHash)
|
& over _2 (coerce @_ @HashRef)
|
||||||
-- & over _2 (coerce @_ @HashRef)
|
|
||||||
|
|
||||||
-- notice $ pretty sha1 <+> pretty blake
|
notice $ pretty sha1 <+> pretty blake
|
||||||
liftIO $ print $ pretty "okay"
|
|
||||||
|
|
||||||
_ -> throwIO (BadFormException @C nil)
|
|
||||||
|
|
||||||
entry $ bindMatch "reflog:index:list" $ nil_ $ const $ lift do
|
entry $ bindMatch "reflog:index:list" $ nil_ $ const $ lift do
|
||||||
files <- listObjectIndexFiles
|
files <- listObjectIndexFiles
|
||||||
|
@ -1068,7 +1061,6 @@ theDict = do
|
||||||
entry $ bindMatch "reflog:index:path" $ nil_ $ const $ lift do
|
entry $ bindMatch "reflog:index:path" $ nil_ $ const $ lift do
|
||||||
indexPath >>= liftIO . print . pretty
|
indexPath >>= liftIO . print . pretty
|
||||||
|
|
||||||
|
|
||||||
-- let entriesListOf lbs = S.toList_ $ runConsumeLBS lbs $ readSections $ \s ss -> do
|
-- let entriesListOf lbs = S.toList_ $ runConsumeLBS lbs $ readSections $ \s ss -> do
|
||||||
entry $ bindMatch "reflog:index:files" $ nil_ $ \syn -> lift do
|
entry $ bindMatch "reflog:index:files" $ nil_ $ \syn -> lift do
|
||||||
files <- listObjectIndexFiles
|
files <- listObjectIndexFiles
|
||||||
|
@ -1079,7 +1071,8 @@ theDict = do
|
||||||
|
|
||||||
entry $ bindMatch "reflog:index:list:tx" $ nil_ $ const $ lift do
|
entry $ bindMatch "reflog:index:list:tx" $ nil_ $ const $ lift do
|
||||||
r <- newTVarIO ( mempty :: HashSet HashRef )
|
r <- newTVarIO ( mempty :: HashSet HashRef )
|
||||||
enumEntries $ \bs -> do
|
index <- openIndex
|
||||||
|
enumEntries index $ \bs -> do
|
||||||
atomically $ modifyTVar r (HS.insert (coerce $ BS.take 32 $ BS.drop 20 bs))
|
atomically $ modifyTVar r (HS.insert (coerce $ BS.take 32 $ BS.drop 20 bs))
|
||||||
z <- readTVarIO r <&> HS.toList
|
z <- readTVarIO r <&> HS.toList
|
||||||
liftIO $ mapM_ ( print . pretty ) z
|
liftIO $ mapM_ ( print . pretty ) z
|
||||||
|
@ -1087,57 +1080,24 @@ theDict = do
|
||||||
entry $ bindMatch "reflog:index:build" $ nil_ $ const $ lift $ connectedDo do
|
entry $ bindMatch "reflog:index:build" $ nil_ $ const $ lift $ connectedDo do
|
||||||
writeReflogIndex
|
writeReflogIndex
|
||||||
|
|
||||||
entry $ bindMatch "git:list:objects:new" $ nil_ $ \syn -> lift do
|
entry $ bindMatch "test:reflog:index:lookup" $ nil_ \case
|
||||||
let (opts,argz) = splitOpts [] syn
|
[ GitHashLike h ] -> lift do
|
||||||
|
idx <- openIndex
|
||||||
|
what <- indexEntryLookup idx h >>= orThrowUser "object not found"
|
||||||
|
liftIO $ print $ pretty ( coerce @_ @HashRef what )
|
||||||
|
|
||||||
let what = headDef "HEAD" [ x | StringLike x <- argz ]
|
_ -> throwIO (BadFormException @C nil)
|
||||||
h0 <- gitRevParseThrow what
|
|
||||||
|
|
||||||
void $ flip runContT pure do
|
entry $ bindMatch "git:commit:list:objects:new" $ nil_ $ \case
|
||||||
|
[ StringLike what ] -> lift do
|
||||||
|
|
||||||
rq <- newTQueueIO
|
commit <- gitRevParseThrow what
|
||||||
-- ContT $ withAsync (startReflogIndexQueryQueue rq)
|
|
||||||
|
|
||||||
-- let req h = do
|
idx <- openIndex
|
||||||
-- let bs = coerce @GitHash @N.ByteString h
|
|
||||||
-- let tr = const True
|
|
||||||
-- w <- newEmptyTMVarIO
|
|
||||||
-- atomically $ writeTQueue rq (bs, tr, w)
|
|
||||||
-- r <- atomically $ readTMVar w
|
|
||||||
-- pure $ isNothing r
|
|
||||||
|
|
||||||
cache <- newTVarIO ( mempty :: HashSet GitHash )
|
let req h = lift $ indexEntryLookup idx h <&> isNothing
|
||||||
|
|
||||||
-- читаем вообще всё из индекса в память и строим HashSet
|
|
||||||
-- получается, что вообще никакого профита, что это индекс,
|
|
||||||
-- это фуллскан в любом случае.
|
|
||||||
-- Индекс это сортированная последовательность [(GitHash, HashRef)]
|
|
||||||
-- в виде байстроки формата "SD", D ~ GitHash <> HashRef
|
|
||||||
--
|
|
||||||
--
|
|
||||||
|
|
||||||
(t,_) <- timeItT do
|
|
||||||
lift $ enumEntries $ \e -> do
|
|
||||||
atomically do
|
|
||||||
modifyTVar cache ( HS.insert (coerce $ BS.take 20 e) )
|
|
||||||
|
|
||||||
s <- readTVarIO cache <&> HS.size
|
|
||||||
notice $ pretty s <+> " records read at" <+> pretty t
|
|
||||||
|
|
||||||
l_ <- newTVarIO 0
|
|
||||||
h_ <- newTVarIO ( mempty :: HashSet GitHash )
|
|
||||||
s_ <- newTVarIO ( mempty :: HashMap GitHash Int )
|
|
||||||
|
|
||||||
let req h = do
|
|
||||||
atomically do
|
|
||||||
modifyTVar l_ succ
|
|
||||||
readTVar cache <&> not . HS.member h
|
|
||||||
|
|
||||||
-- читаем только те коммиты, которые не в индексе
|
|
||||||
-- очень быстро, пушо относительно мало объектов
|
|
||||||
(t1,r) <- timeItT (lift $ readCommitChainHPSQ req Nothing h0 dontHandle)
|
|
||||||
notice $ pretty s <+> " new commits read at" <+> pretty (realToFrac @_ @(Fixed E3) t1)
|
|
||||||
|
|
||||||
|
flip runContT pure do
|
||||||
cap <- liftIO getNumCapabilities
|
cap <- liftIO getNumCapabilities
|
||||||
gitCatBatchQ <- contWorkerPool cap do
|
gitCatBatchQ <- contWorkerPool cap do
|
||||||
che <- ContT withGitCat
|
che <- ContT withGitCat
|
||||||
|
@ -1145,9 +1105,8 @@ theDict = do
|
||||||
|
|
||||||
new_ <- newTQueueIO
|
new_ <- newTQueueIO
|
||||||
c1 <- newCacheFixedHPSQ 1000
|
c1 <- newCacheFixedHPSQ 1000
|
||||||
(t3, _) <- timeItT $ lift $ forConcurrently_ (HPSQ.toList r) $ \(commit,_,_) -> do
|
|
||||||
|
|
||||||
(_,self) <- gitCatBatchQ commit
|
(_,self) <- lift $ gitCatBatchQ commit
|
||||||
>>= orThrow (GitReadError (show $ pretty commit))
|
>>= orThrow (GitReadError (show $ pretty commit))
|
||||||
|
|
||||||
tree <- gitReadCommitTree self
|
tree <- gitReadCommitTree self
|
||||||
|
@ -1157,23 +1116,121 @@ theDict = do
|
||||||
<&> ([commit,tree]<>)
|
<&> ([commit,tree]<>)
|
||||||
>>= filterM req
|
>>= filterM req
|
||||||
--
|
--
|
||||||
atomically $ modifyTVar s_ (HM.insertWith (\old new -> max old new) commit (length hashes))
|
|
||||||
|
|
||||||
atomically $ modifyTVar h_ (HS.union (HS.fromList hashes))
|
|
||||||
|
|
||||||
atomically $ mapM_ (writeTQueue new_) hashes
|
atomically $ mapM_ (writeTQueue new_) hashes
|
||||||
|
|
||||||
-- 1.8 секунд и заметно растёт от числа коммитов, сука
|
|
||||||
atomically (STM.flushTQueue new_) >>= liftIO . print . pretty . length
|
atomically (STM.flushTQueue new_) >>= liftIO . print . pretty . length
|
||||||
|
|
||||||
l <- readTVarIO l_
|
_ -> throwIO (BadFormException @C nil)
|
||||||
n <- readTVarIO h_
|
|
||||||
w <- readTVarIO s_ <&> HM.elems
|
|
||||||
|
|
||||||
let a = realToFrac (sum w) / realToFrac (length w)
|
entry $ bindMatch "git:list:objects:new" $ nil_ $ \syn -> lift do
|
||||||
|
let (opts,argz) = splitOpts [] syn
|
||||||
|
|
||||||
notice $ pretty l <+> pretty (HS.size n) <+> "done in " <+> pretty (realToFrac @_ @(Fixed E3) t3)
|
let what = headDef "HEAD" [ x | StringLike x <- argz ]
|
||||||
notice $ "avg per commit" <+> pretty a
|
h0 <- gitRevParseThrow what
|
||||||
|
|
||||||
|
no_ <- newTVarIO 0
|
||||||
|
|
||||||
|
void $ flip runContT pure do
|
||||||
|
|
||||||
|
-- cache <- newTVarIO ( mempty :: HashSet GitHash )
|
||||||
|
|
||||||
|
-- читаем вообще всё из индекса в память и строим HashSet
|
||||||
|
-- получается, что вообще никакого профита, что это индекс,
|
||||||
|
-- это фуллскан в любом случае.
|
||||||
|
-- Индекс это сортированная последовательность [(GitHash, HashRef)]
|
||||||
|
-- в виде байстроки формата "SD", D ~ GitHash <> HashRef
|
||||||
|
|
||||||
|
-- let blm = runST (MBloom.new undefined 1000000)
|
||||||
|
--
|
||||||
|
-- bloom <- liftIO $ stToIO $ MBloom.new 10000
|
||||||
|
--
|
||||||
|
|
||||||
|
index <- lift openIndex
|
||||||
|
|
||||||
|
-- let req h = do
|
||||||
|
-- atomically do
|
||||||
|
-- readTVar cache <&> not . HS.member h
|
||||||
|
|
||||||
|
-- let
|
||||||
|
-- req2 :: GitHash -> Git3 m Bool
|
||||||
|
-- req2 h = liftIO do
|
||||||
|
-- here <- liftIO $ stToIO $ MBloom.elem h bloom
|
||||||
|
|
||||||
|
-- if not here then pure True else do
|
||||||
|
-- atomically $ modifyTVar blmn_ succ
|
||||||
|
-- forConcurrently_ files $ \f -> do
|
||||||
|
-- found <- binarySearchBS 56 ( BS.take 20. BS.drop 4 ) (coerce h) f
|
||||||
|
-- when (isJust found) do
|
||||||
|
-- atomically $ modifyTVar excl_ (HS.insert h)
|
||||||
|
-- readTVarIO excl_ <&> not . HS.member h
|
||||||
|
|
||||||
|
-- req3 :: HashSet GitHash -> Git3 m (HashSet GitHash)
|
||||||
|
-- req3 hs = liftIO do
|
||||||
|
|
||||||
|
-- forConcurrently_ files $ \f -> do
|
||||||
|
-- flip fix (HS.toList hs) $ \next -> \case
|
||||||
|
-- [] -> none
|
||||||
|
-- (x:xs) -> do
|
||||||
|
-- already <- readTVarIO excl_ <&> HS.member x
|
||||||
|
-- inBloom <- liftIO $ stToIO $ MBloom.elem x bloom
|
||||||
|
|
||||||
|
-- when inBloom do
|
||||||
|
-- atomically $ modifyTVar blmn_ succ
|
||||||
|
|
||||||
|
-- when (not already || inBloom) do
|
||||||
|
-- found <- binarySearchBS 56 ( BS.take 20. BS.drop 4 ) (coerce x) f
|
||||||
|
-- when (isJust found) do
|
||||||
|
-- atomically $ modifyTVar excl_ (HS.insert x)
|
||||||
|
-- next xs
|
||||||
|
|
||||||
|
-- found <- readTVarIO excl_
|
||||||
|
-- pure ( hs `HS.difference` found)
|
||||||
|
|
||||||
|
-- читаем только те коммиты, которые не в индексе
|
||||||
|
-- очень быстро, пушо относительно мало объектов
|
||||||
|
|
||||||
|
idx <- lift openIndex
|
||||||
|
let req h = lift $ indexEntryLookup idx h <&> isNothing
|
||||||
|
|
||||||
|
(t1,r) <- timeItT (lift $ readCommitChainHPSQ req Nothing h0 dontHandle)
|
||||||
|
|
||||||
|
let s = HPSQ.size r
|
||||||
|
notice $ pretty s <+> "new commits read at" <+> pretty (realToFrac @_ @(Fixed E3) t1)
|
||||||
|
|
||||||
|
cap <- liftIO getNumCapabilities
|
||||||
|
gitCatBatchQ <- contWorkerPool cap do
|
||||||
|
che <- ContT withGitCat
|
||||||
|
pure $ gitReadObjectMaybe che
|
||||||
|
|
||||||
|
uniq_ <- newTVarIO mempty
|
||||||
|
-- c1 <- newCacheFixedHPSQ 1000
|
||||||
|
(t3, _) <- timeItT $ lift $ forConcurrently_ (HPSQ.toList r) $ \(commit,_,_) -> do
|
||||||
|
|
||||||
|
(_,self) <- gitCatBatchQ commit
|
||||||
|
>>= orThrow (GitReadError (show $ pretty commit))
|
||||||
|
|
||||||
|
tree <- gitReadCommitTree self
|
||||||
|
|
||||||
|
-- читаем только те объекты, которые не в индексе
|
||||||
|
gitReadTreeObjectsOnly commit
|
||||||
|
<&> ([commit,tree]<>)
|
||||||
|
>>= \hs -> atomically (for_ hs (modifyTVar uniq_ . HS.insert))
|
||||||
|
|
||||||
|
notice $ "all shit read" <+> pretty (realToFrac @_ @(Fixed E2) t3)
|
||||||
|
|
||||||
|
(t4,new) <- lift $ timeItT $ readTVarIO uniq_ >>= filterM req . HS.toList
|
||||||
|
|
||||||
|
notice $ pretty (length new) <+> "new objects" <+> "at" <+> pretty (realToFrac @_ @(Fixed E2) t4)
|
||||||
|
|
||||||
|
-- x <- readTVarIO uniq_ <&> HS.size
|
||||||
|
|
||||||
|
-- blmn <- readTVarIO blmn_
|
||||||
|
-- notice $ "all shit filter" <+> parens (pretty x) <+> brackets (pretty blmn) <+> pretty (realToFrac @_ @(Fixed E2) t4)
|
||||||
|
|
||||||
|
-- notice $ pretty (length new)
|
||||||
|
|
||||||
|
|
||||||
|
-- notice $ "total objects" <+> pretty
|
||||||
|
-- notice $ "present" <+> pretty nhere
|
||||||
|
|
||||||
-- liftIO $ print $ pretty (HS
|
-- liftIO $ print $ pretty (HS
|
||||||
|
|
||||||
|
|
|
@ -17,9 +17,21 @@ import Data.ByteString.Lazy qualified as LBS
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
import Data.HashMap.Strict (HashMap)
|
import Data.HashMap.Strict (HashMap)
|
||||||
import Data.HashMap.Strict qualified as HM
|
import Data.HashMap.Strict qualified as HM
|
||||||
|
import Data.Word
|
||||||
|
import Data.Vector (Vector)
|
||||||
|
import Data.Vector qualified as V
|
||||||
|
import Data.Kind
|
||||||
|
|
||||||
|
|
||||||
|
import Data.BloomFilter qualified as Bloom
|
||||||
|
import Data.BloomFilter (Bloom(..))
|
||||||
|
import Data.BloomFilter.Mutable qualified as MBloom
|
||||||
|
|
||||||
|
import Control.Monad.ST
|
||||||
|
|
||||||
import Control.Concurrent.STM qualified as STM
|
import Control.Concurrent.STM qualified as STM
|
||||||
import Codec.Compression.Zstd.Lazy qualified as ZstdL
|
import Codec.Compression.Zstd.Lazy qualified as ZstdL
|
||||||
|
import Codec.Serialise
|
||||||
import Streaming.Prelude qualified as S
|
import Streaming.Prelude qualified as S
|
||||||
import Streaming hiding (run,chunksOf)
|
import Streaming hiding (run,chunksOf)
|
||||||
|
|
||||||
|
@ -59,6 +71,46 @@ indexPath = do
|
||||||
reflog <- getGitRemoteKey >>= orThrow Git3ReflogNotSet
|
reflog <- getGitRemoteKey >>= orThrow Git3ReflogNotSet
|
||||||
getStatePath (AsBase58 reflog) <&> (</> "index")
|
getStatePath (AsBase58 reflog) <&> (</> "index")
|
||||||
|
|
||||||
|
data IndexEntry =
|
||||||
|
IndexEntry
|
||||||
|
{ entryFile :: FilePath
|
||||||
|
, entryBS :: N.ByteString
|
||||||
|
}
|
||||||
|
|
||||||
|
data Index a =
|
||||||
|
Index { entries :: [IndexEntry]
|
||||||
|
, bitmap :: MBloom.MBloom RealWorld GitHash
|
||||||
|
}
|
||||||
|
|
||||||
|
openIndex :: forall a m . (Git3Perks m, MonadReader Git3Env m)
|
||||||
|
=> m (Index a)
|
||||||
|
|
||||||
|
openIndex = do
|
||||||
|
files <- listObjectIndexFiles
|
||||||
|
bss <- liftIO $ for files $ \(f,_) -> (f,) <$> mmapFileByteString f Nothing
|
||||||
|
let entries = [ IndexEntry f bs | (f,bs) <- bss ]
|
||||||
|
bloom <- liftIO $ stToIO $ MBloom.new bloomHash 10
|
||||||
|
pure $ Index entries bloom
|
||||||
|
|
||||||
|
indexEntryLookup :: forall a m . (Git3Perks m)
|
||||||
|
=> Index a
|
||||||
|
-> GitHash
|
||||||
|
-> m (Maybe N.ByteString)
|
||||||
|
|
||||||
|
indexEntryLookup Index{..} h = do
|
||||||
|
already_ <- newTVarIO ( mempty :: HashMap GitHash N.ByteString )
|
||||||
|
forConcurrently_ entries $ \IndexEntry{..} -> do
|
||||||
|
what <- readTVarIO already_ <&> HM.lookup h
|
||||||
|
case what of
|
||||||
|
Just{} -> none
|
||||||
|
Nothing -> do
|
||||||
|
offset' <- binarySearchBS 56 ( BS.take 20 . BS.drop 4 ) (coerce h) entryBS
|
||||||
|
maybe1 offset' none $ \offset -> do
|
||||||
|
let ebs = BS.take 32 $ BS.drop (offset + 4 + 20) entryBS
|
||||||
|
atomically $ modifyTVar already_ (HM.insert h ebs)
|
||||||
|
|
||||||
|
readTVarIO already_ <&> headMay . HM.elems
|
||||||
|
|
||||||
listObjectIndexFiles :: forall m . ( Git3Perks m
|
listObjectIndexFiles :: forall m . ( Git3Perks m
|
||||||
, MonadReader Git3Env m
|
, MonadReader Git3Env m
|
||||||
) => m [(FilePath, Natural)]
|
) => m [(FilePath, Natural)]
|
||||||
|
@ -72,16 +124,23 @@ listObjectIndexFiles = do
|
||||||
pure (f,z)
|
pure (f,z)
|
||||||
|
|
||||||
|
|
||||||
|
enumEntries :: forall a m . ( Git3Perks m
|
||||||
enumEntries :: forall m . ( Git3Perks m
|
|
||||||
, MonadReader Git3Env m
|
, MonadReader Git3Env m
|
||||||
) => ( BS.ByteString -> m () ) -> m ()
|
) => Index a -> ( BS.ByteString -> m () ) -> m ()
|
||||||
|
|
||||||
enumEntries action = do
|
enumEntries Index{..} action = do
|
||||||
files <- listObjectIndexFiles <&> fmap fst
|
forConcurrently_ entries $ \IndexEntry{..} -> do
|
||||||
forConcurrently_ files $ \f -> do
|
scanBS entryBS action
|
||||||
bs <- liftIO $ mmapFileByteString f Nothing
|
|
||||||
scanBS bs action
|
bloomHash :: GitHash -> [Word32]
|
||||||
|
bloomHash gh = [a,b,c,d,e]
|
||||||
|
where
|
||||||
|
bs = coerce gh
|
||||||
|
a = N.word32 (BS.take 4 bs)
|
||||||
|
b = N.word32 (BS.take 4 $ BS.drop 4 bs)
|
||||||
|
c = N.word32 (BS.take 4 $ BS.drop 8 bs)
|
||||||
|
d = N.word32 (BS.take 4 $ BS.drop 12 bs)
|
||||||
|
e = N.word32 (BS.take 4 $ BS.drop 16 bs)
|
||||||
|
|
||||||
startReflogIndexQueryQueue :: forall a m . ( Git3Perks m
|
startReflogIndexQueryQueue :: forall a m . ( Git3Perks m
|
||||||
, MonadReader Git3Env m
|
, MonadReader Git3Env m
|
||||||
|
@ -134,6 +193,13 @@ startReflogIndexQueryQueue rq = flip runContT pure do
|
||||||
rest <- readTVar answQ
|
rest <- readTVar answQ
|
||||||
for_ rest $ \x -> writeTMVar x Nothing
|
for_ rest $ \x -> writeTMVar x Nothing
|
||||||
|
|
||||||
|
bloomFilterSize :: Natural -> Natural -> Double -> Natural
|
||||||
|
bloomFilterSize n k p
|
||||||
|
| p <= 0 || p >= 1 = 0
|
||||||
|
| otherwise = rnd $ negate (fromIntegral n * fromIntegral k) / log (1 - p ** (1 / fromIntegral k))
|
||||||
|
where
|
||||||
|
rnd x = 2 ** realToFrac (ceiling (logBase 2 x)) & round
|
||||||
|
|
||||||
writeReflogIndex :: forall m . ( Git3Perks m
|
writeReflogIndex :: forall m . ( Git3Perks m
|
||||||
, MonadReader Git3Env m
|
, MonadReader Git3Env m
|
||||||
, HasClientAPI PeerAPI UNIX m
|
, HasClientAPI PeerAPI UNIX m
|
||||||
|
@ -195,4 +261,27 @@ writeReflogIndex = do
|
||||||
-- notice $ pretty sha1 <+> pretty tx
|
-- notice $ pretty sha1 <+> pretty tx
|
||||||
writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)
|
writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)
|
||||||
|
|
||||||
|
-- files <- lift listObjectIndexFiles
|
||||||
|
-- let num = sum (fmap snd files) `div` 56
|
||||||
|
-- let size = bloomFilterSize num 5 0.01
|
||||||
|
|
||||||
|
-- bloom <- liftIO $ stToIO (MBloom.new bloomHash (fromIntegral size))
|
||||||
|
|
||||||
|
-- lift $ enumEntries $ \bs -> do
|
||||||
|
-- liftIO $ stToIO $ MBloom.insert bloom (coerce bs)
|
||||||
|
|
||||||
|
-- let bloomIdxName = idxPath </> "filter"
|
||||||
|
-- bytes <- liftIO $ stToIO $ Bloom.freeze bloom
|
||||||
|
|
||||||
|
-- liftIO $ UIO.withBinaryFileAtomic bloomIdxName WriteMode $ \wh -> do
|
||||||
|
-- LBS.hPutStr wh "puk"
|
||||||
|
-- LBS.hPutStr wh (serialise bytes)
|
||||||
|
-- LBS.writeFile (serialise b
|
||||||
|
-- for_ ss $ \sha1 -> do
|
||||||
|
-- let key = coerce @_ @N.ByteString sha1
|
||||||
|
-- let value = coerce @_ @N.ByteString tx
|
||||||
|
-- -- notice $ pretty sha1 <+> pretty tx
|
||||||
|
-- writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue