This commit is contained in:
voidlizard 2025-01-03 12:46:30 +03:00
parent 772ea4235e
commit 01de679589
2 changed files with 52 additions and 36 deletions

View File

@ -601,7 +601,6 @@ theDict = do
ContT $ withAsync (startReflogIndexQueryQueue rq) ContT $ withAsync (startReflogIndexQueryQueue rq)
let req h = do let req h = do
debug $ "AAAAA" <+> pretty h
let bs = coerce @GitHash @N.ByteString h let bs = coerce @GitHash @N.ByteString h
let tr = const True let tr = const True
w <- newEmptyTMVarIO w <- newEmptyTMVarIO
@ -1096,16 +1095,31 @@ theDict = do
void $ flip runContT pure do void $ flip runContT pure do
rq <- newTQueueIO rq <- newTQueueIO
ContT $ withAsync (startReflogIndexQueryQueue rq) -- ContT $ withAsync (startReflogIndexQueryQueue rq)
-- let req h = do
-- let bs = coerce @GitHash @N.ByteString h
-- let tr = const True
-- w <- newEmptyTMVarIO
-- atomically $ writeTQueue rq (bs, tr, w)
-- r <- atomically $ readTMVar w
-- pure $ isNothing r
cache <- newTVarIO ( mempty :: HashSet GitHash )
-- читаем вообще всё из индекса в память и строим HashSet
-- получается, что вообще никакого профита, что это индекс,
-- это фуллскан в любом случае.
-- Индекс это сортированная последовательность [(GitHash, HashRef)]
-- в виде байстроки формата "SD", D ~ GitHash <> HashRef
lift $ enumEntries $ \e -> do
atomically $ modifyTVar cache ( HS.insert (coerce $ BS.take 20 e) )
let req h = do let req h = do
let bs = coerce @GitHash @N.ByteString h readTVarIO cache <&> not . HS.member h
let tr = const True
w <- newEmptyTMVarIO
atomically $ writeTQueue rq (bs, tr, w)
r <- atomically $ readTMVar w
pure $ isNothing r
-- читаем только те коммиты, которые не в индексе
-- очень быстро, пушо относительно мало объектов
r <- lift $ readCommitChainHPSQ req Nothing h0 dontHandle r <- lift $ readCommitChainHPSQ req Nothing h0 dontHandle
cap <- liftIO getNumCapabilities cap <- liftIO getNumCapabilities
@ -1113,8 +1127,7 @@ theDict = do
che <- ContT withGitCat che <- ContT withGitCat
pure $ gitReadObjectMaybe che pure $ gitReadObjectMaybe che
new_ <- newTQueueIO
new_ <- newTVarIO mempty
lift $ forConcurrently_ (HPSQ.toList r) $ \(commit,_,_) -> do lift $ forConcurrently_ (HPSQ.toList r) $ \(commit,_,_) -> do
(_,self) <- gitCatBatchQ commit (_,self) <- gitCatBatchQ commit
@ -1122,26 +1135,15 @@ theDict = do
tree <- gitReadCommitTree self tree <- gitReadCommitTree self
-- читаем только те объекты, которые не в индексе
hashes <- gitReadTreeObjectsOnly commit hashes <- gitReadTreeObjectsOnly commit
<&> ([commit,tree]<>) <&> ([commit,tree]<>)
-- >>= filterM req >>= filterM req
atomically $ modifyTVar new_ (HS.union (HS.fromList hashes)) atomically $ mapM_ (writeTQueue new_) hashes
-- 1.8 секунд и заметно растёт от числа коммитов, сука
fps <- lift $ listObjectIndexFiles atomically (STM.flushTQueue new_) >>= liftIO . print . pretty . length
<&> fmap fst
>>= liftIO . mapM (`mmapFileByteString` Nothing)
allHashes <- readTVarIO new_
newHashes <- newTVarIO mempty
for_ fps $ \mmaped -> do
for_ allHashes $ \ha -> do
found <- binarySearchBS 56 ( BS.take 20 . BS.drop 4) (coerce ha) mmaped
when (isNothing found) do
atomically $ modifyTVar newHashes (HS.insert ha)
readTVarIO newHashes >>= liftIO . print . pretty . HS.size
-- liftIO $ print $ pretty (HS -- liftIO $ print $ pretty (HS
-- fix \next -> do -- fix \next -> do

View File

@ -26,6 +26,7 @@ import Streaming hiding (run,chunksOf)
import UnliftIO import UnliftIO
import UnliftIO.IO.File qualified as UIO import UnliftIO.IO.File qualified as UIO
import Data.HashPSQ qualified as HPSQ
readLogFileLBS :: forall opts m . ( MonadIO m, ReadLogOpts opts, BytesReader m ) readLogFileLBS :: forall opts m . ( MonadIO m, ReadLogOpts opts, BytesReader m )
@ -102,23 +103,36 @@ startReflogIndexQueryQueue rq = flip runContT pure do
mmaped <- liftIO $ for files (liftIO . flip mmapFileByteString Nothing) mmaped <- liftIO $ for files (liftIO . flip mmapFileByteString Nothing)
answQ <- newTVarIO mempty
forever $ liftIO do forever $ liftIO do
requests <- atomically do requests <- atomically do
_ <- peekTQueue rq _ <- peekTQueue rq
STM.flushTQueue rq w <- STM.flushTQueue rq
for_ w $ \(k,_,a) -> do
modifyTVar answQ (HM.insert k a)
pure w
forConcurrently_ mmaped \bs -> do
for requests $ \(s,f,answ) -> runMaybeT do
still <- readTVarIO answQ <&> HM.member s
guard still
for_ requests $ \(s,f,answ) -> do
found <- for mmaped $ \bs -> runMaybeT do
-- FIXME: size-hardcodes -- FIXME: size-hardcodes
w <- binarySearchBS 56 ( BS.take 20 . BS.drop 4 ) s bs w <- binarySearchBS 56 ( BS.take 20 . BS.drop 4 ) s bs
>>= toMPlus >>= toMPlus
let v = BS.drop ( w * 56 ) bs let r = f (BS.drop (w * 56) bs)
pure $ f v atomically do
writeTMVar answ (Just r)
modifyTVar answQ (HM.delete bs)
let what = headMay (catMaybes found) atomically do
atomically $ writeTMVar answ what rest <- readTVar answQ
for_ rest $ \x -> writeTMVar x Nothing
writeReflogIndex :: forall m . ( Git3Perks m writeReflogIndex :: forall m . ( Git3Perks m
, MonadReader Git3Env m , MonadReader Git3Env m