From 01de679589f076de6542efc74bd1900317692c0a Mon Sep 17 00:00:00 2001 From: voidlizard Date: Fri, 3 Jan 2025 12:46:30 +0300 Subject: [PATCH] wip --- hbs2-git3/app/Main.hs | 54 +++++++++++++------------- hbs2-git3/lib/HBS2/Git3/State/Index.hs | 34 +++++++++++----- 2 files changed, 52 insertions(+), 36 deletions(-) diff --git a/hbs2-git3/app/Main.hs b/hbs2-git3/app/Main.hs index 9f09f8e3..90600f86 100644 --- a/hbs2-git3/app/Main.hs +++ b/hbs2-git3/app/Main.hs @@ -601,7 +601,6 @@ theDict = do ContT $ withAsync (startReflogIndexQueryQueue rq) let req h = do - debug $ "AAAAA" <+> pretty h let bs = coerce @GitHash @N.ByteString h let tr = const True w <- newEmptyTMVarIO @@ -1096,16 +1095,31 @@ theDict = do void $ flip runContT pure do rq <- newTQueueIO - ContT $ withAsync (startReflogIndexQueryQueue rq) + -- ContT $ withAsync (startReflogIndexQueryQueue rq) + + -- let req h = do + -- let bs = coerce @GitHash @N.ByteString h + -- let tr = const True + -- w <- newEmptyTMVarIO + -- atomically $ writeTQueue rq (bs, tr, w) + -- r <- atomically $ readTMVar w + -- pure $ isNothing r + + cache <- newTVarIO ( mempty :: HashSet GitHash ) + + -- читаем вообще всё из индекса в память и строим HashSet + -- получается, что вообще никакого профита, что это индекс, + -- это фуллскан в любом случае. + -- Индекс это сортированная последовательность [(GitHash, HashRef)] + -- в виде байстроки формата "SD", D ~ GitHash <> HashRef + lift $ enumEntries $ \e -> do + atomically $ modifyTVar cache ( HS.insert (coerce $ BS.take 20 e) ) let req h = do - let bs = coerce @GitHash @N.ByteString h - let tr = const True - w <- newEmptyTMVarIO - atomically $ writeTQueue rq (bs, tr, w) - r <- atomically $ readTMVar w - pure $ isNothing r + readTVarIO cache <&> not . HS.member h + -- читаем только те коммиты, которые не в индексе + -- очень быстро, пушо относительно мало объектов r <- lift $ readCommitChainHPSQ req Nothing h0 dontHandle cap <- liftIO getNumCapabilities @@ -1113,8 +1127,7 @@ theDict = do che <- ContT withGitCat pure $ gitReadObjectMaybe che - - new_ <- newTVarIO mempty + new_ <- newTQueueIO lift $ forConcurrently_ (HPSQ.toList r) $ \(commit,_,_) -> do (_,self) <- gitCatBatchQ commit @@ -1122,26 +1135,15 @@ theDict = do tree <- gitReadCommitTree self + -- читаем только те объекты, которые не в индексе hashes <- gitReadTreeObjectsOnly commit <&> ([commit,tree]<>) - -- >>= filterM req + >>= filterM req - atomically $ modifyTVar new_ (HS.union (HS.fromList hashes)) + atomically $ mapM_ (writeTQueue new_) hashes - - fps <- lift $ listObjectIndexFiles - <&> fmap fst - >>= liftIO . mapM (`mmapFileByteString` Nothing) - - allHashes <- readTVarIO new_ - newHashes <- newTVarIO mempty - for_ fps $ \mmaped -> do - for_ allHashes $ \ha -> do - found <- binarySearchBS 56 ( BS.take 20 . BS.drop 4) (coerce ha) mmaped - when (isNothing found) do - atomically $ modifyTVar newHashes (HS.insert ha) - - readTVarIO newHashes >>= liftIO . print . pretty . HS.size + -- 1.8 секунд и заметно растёт от числа коммитов, сука + atomically (STM.flushTQueue new_) >>= liftIO . print . pretty . length -- liftIO $ print $ pretty (HS -- fix \next -> do diff --git a/hbs2-git3/lib/HBS2/Git3/State/Index.hs b/hbs2-git3/lib/HBS2/Git3/State/Index.hs index 7ea8c871..a3f1570b 100644 --- a/hbs2-git3/lib/HBS2/Git3/State/Index.hs +++ b/hbs2-git3/lib/HBS2/Git3/State/Index.hs @@ -26,6 +26,7 @@ import Streaming hiding (run,chunksOf) import UnliftIO import UnliftIO.IO.File qualified as UIO +import Data.HashPSQ qualified as HPSQ readLogFileLBS :: forall opts m . ( MonadIO m, ReadLogOpts opts, BytesReader m ) @@ -102,23 +103,36 @@ startReflogIndexQueryQueue rq = flip runContT pure do mmaped <- liftIO $ for files (liftIO . flip mmapFileByteString Nothing) + answQ <- newTVarIO mempty + forever $ liftIO do requests <- atomically do _ <- peekTQueue rq - STM.flushTQueue rq + w <- STM.flushTQueue rq + for_ w $ \(k,_,a) -> do + modifyTVar answQ (HM.insert k a) + pure w - for_ requests $ \(s,f,answ) -> do - found <- for mmaped $ \bs -> runMaybeT do - -- FIXME: size-hardcodes - w <- binarySearchBS 56 ( BS.take 20 . BS.drop 4 ) s bs - >>= toMPlus + forConcurrently_ mmaped \bs -> do + for requests $ \(s,f,answ) -> runMaybeT do - let v = BS.drop ( w * 56 ) bs + still <- readTVarIO answQ <&> HM.member s - pure $ f v + guard still - let what = headMay (catMaybes found) - atomically $ writeTMVar answ what + -- FIXME: size-hardcodes + w <- binarySearchBS 56 ( BS.take 20 . BS.drop 4 ) s bs + >>= toMPlus + + let r = f (BS.drop (w * 56) bs) + + atomically do + writeTMVar answ (Just r) + modifyTVar answQ (HM.delete bs) + + atomically do + rest <- readTVar answQ + for_ rest $ \x -> writeTMVar x Nothing writeReflogIndex :: forall m . ( Git3Perks m , MonadReader Git3Env m