mirror of https://github.com/voidlizard/hbs2
wip
This commit is contained in:
parent
218ff12000
commit
568fb735a5
|
@ -593,16 +593,9 @@ theDict = do
|
|||
|
||||
liftIO $ mapM_ setCurrentDirectory mpath
|
||||
|
||||
rq <- newTQueueIO
|
||||
ContT $ withAsync (startReflogIndexQueryQueue rq)
|
||||
idx <- lift openIndex
|
||||
|
||||
let req h = do
|
||||
let bs = coerce @GitHash @N.ByteString h
|
||||
let tr = const True
|
||||
w <- newEmptyTMVarIO
|
||||
atomically $ writeTQueue rq (bs, tr, w)
|
||||
r <- atomically $ readTMVar w
|
||||
pure $ isNothing r
|
||||
let req h = lift $ indexEntryLookup idx h <&> isNothing
|
||||
|
||||
-- let hss = headDef "HEAD" [ x | StringLike x <- snd (splitOpts [] syn) ]
|
||||
h <- gitRevParseThrow hss
|
||||
|
@ -611,7 +604,6 @@ theDict = do
|
|||
for_ (HPSQ.toList r) $ \(k,_,_) -> do
|
||||
liftIO $ print $ pretty k
|
||||
|
||||
|
||||
entry $ bindMatch "test:git:log:cat" $ nil_ $ \syn -> lift do
|
||||
|
||||
let (opts, argz) = splitOpts [("--git",0),("--packed",0),("--import",1)] syn
|
||||
|
@ -754,23 +746,18 @@ theDict = do
|
|||
found <- liftIO $ binarySearchBS 56 (BS.take 20 . BS.drop 4) (coerce h) file
|
||||
liftIO $ notice $ pretty h <+> pretty (isJust found)
|
||||
|
||||
entry $ bindMatch "reflog:index:search" $ nil_ $ \syn -> lift $ flip runContT pure do
|
||||
entry $ bindMatch "reflog:index:search" $ nil_ $ \syn -> lift do
|
||||
|
||||
let (_, argz) = splitOpts [] syn
|
||||
|
||||
hash <- headMay [ x | GitHashLike x <- argz ] & orThrowUser "need sha1"
|
||||
|
||||
rq <- newTQueueIO
|
||||
idx <- openIndex
|
||||
|
||||
ContT $ withAsync $ startReflogIndexQueryQueue rq
|
||||
answ <- indexEntryLookup idx hash
|
||||
|
||||
answ_ <- newEmptyTMVarIO
|
||||
|
||||
atomically $ writeTQueue rq (coerce hash, const True, answ_)
|
||||
|
||||
answ <- atomically $ readTMVar answ_
|
||||
|
||||
for_ answ $ \a -> do
|
||||
for_ answ $ \bs -> do
|
||||
let a = coerce (BS.take 32 bs) :: HashRef
|
||||
liftIO $ print $ pretty a
|
||||
|
||||
entry $ bindMatch "test:git:log:index:flat:search:linear:test" $ nil_ $ \case
|
||||
|
@ -1095,7 +1082,7 @@ theDict = do
|
|||
|
||||
idx <- openIndex
|
||||
|
||||
let req h = lift $ indexEntryLookup idx h <&> isNothing
|
||||
-- let req h = lift $ indexEntryLookup idx h <&> isNothing
|
||||
|
||||
flip runContT pure do
|
||||
cap <- liftIO getNumCapabilities
|
||||
|
@ -1114,7 +1101,7 @@ theDict = do
|
|||
-- читаем только те объекты, которые не в индексе
|
||||
hashes <- gitReadTreeObjectsOnly commit
|
||||
<&> ([commit,tree]<>)
|
||||
>>= filterM req
|
||||
>>= lift . indexFilterNewObjects idx . HS.fromList
|
||||
--
|
||||
atomically $ mapM_ (writeTQueue new_) hashes
|
||||
atomically (STM.flushTQueue new_) >>= liftIO . print . pretty . length
|
||||
|
@ -1131,63 +1118,6 @@ theDict = do
|
|||
|
||||
void $ flip runContT pure do
|
||||
|
||||
-- cache <- newTVarIO ( mempty :: HashSet GitHash )
|
||||
|
||||
-- читаем вообще всё из индекса в память и строим HashSet
|
||||
-- получается, что вообще никакого профита, что это индекс,
|
||||
-- это фуллскан в любом случае.
|
||||
-- Индекс это сортированная последовательность [(GitHash, HashRef)]
|
||||
-- в виде байстроки формата "SD", D ~ GitHash <> HashRef
|
||||
|
||||
-- let blm = runST (MBloom.new undefined 1000000)
|
||||
--
|
||||
-- bloom <- liftIO $ stToIO $ MBloom.new 10000
|
||||
--
|
||||
|
||||
index <- lift openIndex
|
||||
|
||||
-- let req h = do
|
||||
-- atomically do
|
||||
-- readTVar cache <&> not . HS.member h
|
||||
|
||||
-- let
|
||||
-- req2 :: GitHash -> Git3 m Bool
|
||||
-- req2 h = liftIO do
|
||||
-- here <- liftIO $ stToIO $ MBloom.elem h bloom
|
||||
|
||||
-- if not here then pure True else do
|
||||
-- atomically $ modifyTVar blmn_ succ
|
||||
-- forConcurrently_ files $ \f -> do
|
||||
-- found <- binarySearchBS 56 ( BS.take 20. BS.drop 4 ) (coerce h) f
|
||||
-- when (isJust found) do
|
||||
-- atomically $ modifyTVar excl_ (HS.insert h)
|
||||
-- readTVarIO excl_ <&> not . HS.member h
|
||||
|
||||
-- req3 :: HashSet GitHash -> Git3 m (HashSet GitHash)
|
||||
-- req3 hs = liftIO do
|
||||
|
||||
-- forConcurrently_ files $ \f -> do
|
||||
-- flip fix (HS.toList hs) $ \next -> \case
|
||||
-- [] -> none
|
||||
-- (x:xs) -> do
|
||||
-- already <- readTVarIO excl_ <&> HS.member x
|
||||
-- inBloom <- liftIO $ stToIO $ MBloom.elem x bloom
|
||||
|
||||
-- when inBloom do
|
||||
-- atomically $ modifyTVar blmn_ succ
|
||||
|
||||
-- when (not already || inBloom) do
|
||||
-- found <- binarySearchBS 56 ( BS.take 20. BS.drop 4 ) (coerce x) f
|
||||
-- when (isJust found) do
|
||||
-- atomically $ modifyTVar excl_ (HS.insert x)
|
||||
-- next xs
|
||||
|
||||
-- found <- readTVarIO excl_
|
||||
-- pure ( hs `HS.difference` found)
|
||||
|
||||
-- читаем только те коммиты, которые не в индексе
|
||||
-- очень быстро, пушо относительно мало объектов
|
||||
|
||||
idx <- lift openIndex
|
||||
let req h = lift $ indexEntryLookup idx h <&> isNothing
|
||||
|
||||
|
@ -1217,7 +1147,7 @@ theDict = do
|
|||
|
||||
notice $ "all shit read" <+> pretty (realToFrac @_ @(Fixed E2) t3)
|
||||
|
||||
(t4,new) <- lift $ timeItT $ readTVarIO uniq_ >>= filterM req . HS.toList
|
||||
(t4,new) <- lift $ timeItT $ readTVarIO uniq_ >>= indexFilterNewObjects idx
|
||||
|
||||
notice $ pretty (length new) <+> "new objects" <+> "at" <+> pretty (realToFrac @_ @(Fixed E2) t4)
|
||||
|
||||
|
|
|
@ -17,6 +17,8 @@ import Data.ByteString.Lazy qualified as LBS
|
|||
import Data.Maybe
|
||||
import Data.HashMap.Strict (HashMap)
|
||||
import Data.HashMap.Strict qualified as HM
|
||||
import Data.HashSet (HashSet)
|
||||
import Data.HashSet qualified as HS
|
||||
import Data.Word
|
||||
import Data.Vector (Vector)
|
||||
import Data.Vector qualified as V
|
||||
|
@ -79,7 +81,7 @@ data IndexEntry =
|
|||
|
||||
data Index a =
|
||||
Index { entries :: [IndexEntry]
|
||||
, bitmap :: MBloom.MBloom RealWorld GitHash
|
||||
, bitmap :: Bloom GitHash
|
||||
}
|
||||
|
||||
openIndex :: forall a m . (Git3Perks m, MonadReader Git3Env m)
|
||||
|
@ -89,8 +91,20 @@ openIndex = do
|
|||
files <- listObjectIndexFiles
|
||||
bss <- liftIO $ for files $ \(f,_) -> (f,) <$> mmapFileByteString f Nothing
|
||||
let entries = [ IndexEntry f bs | (f,bs) <- bss ]
|
||||
bloom <- liftIO $ stToIO $ MBloom.new bloomHash 10
|
||||
pure $ Index entries bloom
|
||||
let n = sum (fmap snd files)
|
||||
let bss = bloomFilterSize n 5 0.01 & fromIntegral
|
||||
|
||||
bloom <- liftIO $ stToIO $ MBloom.new bloomHash bss
|
||||
|
||||
let idx = Index entries undefined
|
||||
|
||||
enumEntries idx $ \bs -> do
|
||||
let h = coerce (BS.take 20 bs) :: GitHash
|
||||
liftIO $ stToIO (MBloom.insert bloom h)
|
||||
|
||||
bm <- liftIO $ stToIO $ Bloom.freeze bloom
|
||||
|
||||
pure $ idx { bitmap = bm }
|
||||
|
||||
indexEntryLookup :: forall a m . (Git3Perks m)
|
||||
=> Index a
|
||||
|
@ -101,9 +115,11 @@ indexEntryLookup Index{..} h = do
|
|||
already_ <- newTVarIO ( mempty :: HashMap GitHash N.ByteString )
|
||||
forConcurrently_ entries $ \IndexEntry{..} -> do
|
||||
what <- readTVarIO already_ <&> HM.lookup h
|
||||
case what of
|
||||
Just{} -> none
|
||||
Nothing -> do
|
||||
let inBloom = Bloom.elem h bitmap
|
||||
case (inBloom,what) of
|
||||
(False,_) -> none
|
||||
(_,Just{}) -> none
|
||||
(_,Nothing) -> do
|
||||
offset' <- binarySearchBS 56 ( BS.take 20 . BS.drop 4 ) (coerce h) entryBS
|
||||
maybe1 offset' none $ \offset -> do
|
||||
let ebs = BS.take 32 $ BS.drop (offset + 4 + 20) entryBS
|
||||
|
@ -111,6 +127,33 @@ indexEntryLookup Index{..} h = do
|
|||
|
||||
readTVarIO already_ <&> headMay . HM.elems
|
||||
|
||||
indexFilterNewObjects :: forall a m . (Git3Perks m)
|
||||
=> Index a
|
||||
-> HashSet GitHash
|
||||
-> m [GitHash]
|
||||
|
||||
indexFilterNewObjects Index{..} hashes = do
|
||||
old_ <- newTVarIO ( mempty :: HashSet GitHash )
|
||||
forConcurrently_ entries $ \IndexEntry{..} -> do
|
||||
flip fix (HS.toList hashes) $ \next -> \case
|
||||
[] -> none
|
||||
(x:xs) -> do
|
||||
let inBloom = Bloom.elem x bitmap
|
||||
if not inBloom then
|
||||
next xs
|
||||
else do
|
||||
old <- readTVarIO old_ <&> HS.member x
|
||||
if old then
|
||||
next xs
|
||||
else do
|
||||
off <- binarySearchBS 56 ( BS.take 20 . BS.drop 4 ) (coerce x) entryBS
|
||||
when (isJust off) do
|
||||
atomically $ modifyTVar old_ (HS.insert x)
|
||||
next xs
|
||||
|
||||
old <- readTVarIO old_
|
||||
pure $ HS.toList (hashes `HS.difference` old)
|
||||
|
||||
listObjectIndexFiles :: forall m . ( Git3Perks m
|
||||
, MonadReader Git3Env m
|
||||
) => m [(FilePath, Natural)]
|
||||
|
@ -142,58 +185,10 @@ bloomHash gh = [a,b,c,d,e]
|
|||
d = N.word32 (BS.take 4 $ BS.drop 12 bs)
|
||||
e = N.word32 (BS.take 4 $ BS.drop 16 bs)
|
||||
|
||||
startReflogIndexQueryQueue :: forall a m . ( Git3Perks m
|
||||
, MonadReader Git3Env m
|
||||
, HasClientAPI PeerAPI UNIX m
|
||||
, HasClientAPI RefLogAPI UNIX m
|
||||
, HasStorage m
|
||||
)
|
||||
=> TQueue (BS.ByteString, BS.ByteString -> a, TMVar (Maybe a))
|
||||
-> m ()
|
||||
|
||||
startReflogIndexQueryQueue rq = flip runContT pure do
|
||||
files <- lift $ listObjectIndexFiles <&> fmap fst
|
||||
|
||||
-- один файл - не более, чем один поток
|
||||
-- мапим файлы
|
||||
-- возвращаем функцию запроса?
|
||||
-- для каждого файла -- мы создаём отдельную очередь,
|
||||
-- нам надо искать во всех файлах
|
||||
|
||||
mmaped <- liftIO $ for files (liftIO . flip mmapFileByteString Nothing)
|
||||
|
||||
answQ <- newTVarIO mempty
|
||||
|
||||
forever $ liftIO do
|
||||
requests <- atomically do
|
||||
_ <- peekTQueue rq
|
||||
w <- STM.flushTQueue rq
|
||||
for_ w $ \(k,_,a) -> do
|
||||
modifyTVar answQ (HM.insert k a)
|
||||
pure w
|
||||
|
||||
forConcurrently_ mmaped \bs -> do
|
||||
for requests $ \(s,f,answ) -> runMaybeT do
|
||||
|
||||
still <- readTVarIO answQ <&> HM.member s
|
||||
|
||||
guard still
|
||||
|
||||
-- FIXME: size-hardcodes
|
||||
w <- binarySearchBS 56 ( BS.take 20 . BS.drop 4 ) s bs
|
||||
>>= toMPlus
|
||||
|
||||
let r = f (BS.drop (w * 56) bs)
|
||||
|
||||
atomically do
|
||||
writeTMVar answ (Just r)
|
||||
modifyTVar answQ (HM.delete bs)
|
||||
|
||||
atomically do
|
||||
rest <- readTVar answQ
|
||||
for_ rest $ \x -> writeTMVar x Nothing
|
||||
|
||||
bloomFilterSize :: Natural -> Natural -> Double -> Natural
|
||||
bloomFilterSize :: Natural -- ^ elems?
|
||||
-> Natural -- ^ hash functions
|
||||
-> Double -- ^ error probability
|
||||
-> Natural
|
||||
bloomFilterSize n k p
|
||||
| p <= 0 || p >= 1 = 0
|
||||
| otherwise = rnd $ negate (fromIntegral n * fromIntegral k) / log (1 - p ** (1 / fromIntegral k))
|
||||
|
|
Loading…
Reference in New Issue