From 62eb5ca49f5bf735c064ef27dbcdc8fff9261f92 Mon Sep 17 00:00:00 2001 From: Dmitry Zuikov Date: Sun, 28 Jan 2024 06:49:18 +0300 Subject: [PATCH] fixed UuoMCa4gxd --- .fixme/log | 2 +- cabal.project | 2 +- flake.lock | 18 ++ flake.nix | 6 + hbs2-peer/app/RefLog.hs | 21 +-- .../lib/HBS2/Storage/Simple.hs | 3 +- hbs2-tests/hbs2-tests.cabal | 6 + hbs2-tests/repo-export/RepoExportMain.hs | 165 +++++++++++++++--- 8 files changed, 184 insertions(+), 39 deletions(-) diff --git a/.fixme/log b/.fixme/log index 941499a2..b39442b4 100644 --- a/.fixme/log +++ b/.fixme/log @@ -1,2 +1,2 @@ -(fixme-set "workflow" "done" "GPidfZYrFx") \ No newline at end of file +(fixme-set "workflow" "done" "UuoMCa4gxd") \ No newline at end of file diff --git a/cabal.project b/cabal.project index d37766c9..42ba3ea8 100644 --- a/cabal.project +++ b/cabal.project @@ -1,7 +1,7 @@ packages: **/*.cabal examples/*/*.cabal --- allow-newer: all +allow-newer: all -- executable-static: True -- profiling: True diff --git a/flake.lock b/flake.lock index 48a3106b..4ceca234 100644 --- a/flake.lock +++ b/flake.lock @@ -1,5 +1,22 @@ { "nodes": { + "bloomfilter": { + "flake": false, + "locked": { + "lastModified": 1691177623, + "narHash": "sha256-B/q0JxkARnZRUPe2CIN11QUQbum9nT0+jL3sn5tdYFE=", + "owner": "haskell-pkg-janitors", + "repo": "bloomfilter", + "rev": "0838caf5301da25830a7ff4ca4b4b7ce3bf9d441", + "type": "github" + }, + "original": { + "owner": "haskell-pkg-janitors", + "repo": "bloomfilter", + "rev": "0838caf5301da25830a7ff4ca4b4b7ce3bf9d441", + "type": "github" + } + }, "db-pipe": { "inputs": { "haskell-flake-utils": "haskell-flake-utils", @@ -282,6 +299,7 @@ }, "root": { "inputs": { + "bloomfilter": "bloomfilter", "db-pipe": "db-pipe", "fixme": "fixme", "haskell-flake-utils": "haskell-flake-utils_4", diff --git a/flake.nix b/flake.nix index 79c04e86..fc3e48ad 100644 --- a/flake.nix +++ b/flake.nix @@ -23,6 +23,11 @@ inputs = { flake = false; }; + bloomfilter = { + url = "github:haskell-pkg-janitors/bloomfilter/0838caf5301da25830a7ff4ca4b4b7ce3bf9d441"; + flake = false; + }; + }; outputs = { self, nixpkgs, haskell-flake-utils, ... }@inputs: @@ -60,6 +65,7 @@ outputs = { self, nixpkgs, haskell-flake-utils, ... }@inputs: hpPreOverrides = {pkgs, ...}: final: prev: with pkgs; { saltine = prev.callCabal2nix "saltine" inputs.saltine { inherit (pkgs) libsodium; }; + # bloomfilter = prev.callCabal2nix "bloomfilter" inputs.bloomfilter { }; }; packagePostOverrides = { pkgs }: with pkgs; with haskell.lib; [ diff --git a/hbs2-peer/app/RefLog.hs b/hbs2-peer/app/RefLog.hs index 0c992403..a5fffaea 100644 --- a/hbs2-peer/app/RefLog.hs +++ b/hbs2-peer/app/RefLog.hs @@ -69,23 +69,24 @@ mkRefLogRequestAdapter :: forall e s m . ( MonadIO m => SomeBrains e -> m (RefLogRequestI e (ResponseM e m )) mkRefLogRequestAdapter brains = do sto <- getStorage - pure $ RefLogRequestI (doOnRefLogRequest sto) dontHandle (isPolledRef @e brains) + pure $ RefLogRequestI (doOnRefLogRequest brains sto) dontHandle (isPolledRef @e brains) - --- FIXME: check-if-subscribed --- не дергать диск для неизвестных ссылок --- должно уменьшить дергание диска и флуд --- в логе doOnRefLogRequest :: forall e s m . ( MonadIO m , MyPeer e , s ~ Encryption e , IsRefPubKey s ) - => AnyStorage -> (Peer e, PubKey 'Sign s) -> m (Maybe (Hash HbSync)) - -doOnRefLogRequest sto (_,pk) = do - liftIO $ getRef sto (RefLogKey @s pk) + => SomeBrains e + -> AnyStorage + -> (Peer e, PubKey 'Sign s) + -> m (Maybe (Hash HbSync)) +doOnRefLogRequest brains sto (_,pk) = runMaybeT do + isPolledRef @e brains pk >>= guard + ref <- liftIO $ getRef sto (RefLogKey @s pk) + when (isNothing ref) do + warn $ "missed reflog value" <+> pretty ref + toMPlus ref data RefLogWorkerAdapter e = RefLogWorkerAdapter diff --git a/hbs2-storage-simple/lib/HBS2/Storage/Simple.hs b/hbs2-storage-simple/lib/HBS2/Storage/Simple.hs index fb350229..073ef397 100644 --- a/hbs2-storage-simple/lib/HBS2/Storage/Simple.hs +++ b/hbs2-storage-simple/lib/HBS2/Storage/Simple.hs @@ -439,7 +439,7 @@ simpleReadLinkVal ss hash = do let fn = simpleRefFileName ss hash rs <- spawnAndWait ss $ do (Just <$> BS.readFile fn) `catchAny` \e -> do - err $ "simpleReadLinkVal" <+> pretty hash <+> pretty fn <+> viaShow e + trace $ "simpleReadLinkVal" <+> pretty hash <+> pretty fn <+> viaShow e pure Nothing runMaybeT do @@ -465,7 +465,6 @@ instance ( MonadIO m, IsKey hash updateRef ss ref v = do let refHash = hashObject @hash ref let meta = refMetaData ref - debug $ "updateRef:" <+> pretty refHash void $ liftIO $ simpleWriteLinkRawRef ss meta refHash v getRef ss ref = do diff --git a/hbs2-tests/hbs2-tests.cabal b/hbs2-tests/hbs2-tests.cabal index 2bf68124..ba9793ba 100644 --- a/hbs2-tests/hbs2-tests.cabal +++ b/hbs2-tests/hbs2-tests.cabal @@ -981,5 +981,11 @@ executable test-repo-export , temporary , unliftio , unordered-containers + , bloomfilter >=2.0.1.2 + , timeit + , memory + , deepseq + , xxhash-ffi + diff --git a/hbs2-tests/repo-export/RepoExportMain.hs b/hbs2-tests/repo-export/RepoExportMain.hs index c662deab..ced13b51 100644 --- a/hbs2-tests/repo-export/RepoExportMain.hs +++ b/hbs2-tests/repo-export/RepoExportMain.hs @@ -24,11 +24,40 @@ import Control.Monad.Catch import Control.Monad.Trans.Cont import Control.Monad.Trans.Maybe import Data.ByteString.Lazy qualified as LBS +import Data.ByteString.Lazy.Char8 qualified as LBS8 import Data.ByteString.Lazy (ByteString) import Codec.Serialise import Data.Maybe import Data.HashSet qualified as HS +import Data.ByteArray.Hash (SipHash(..), SipKey(..)) +import Data.ByteArray.Hash qualified as BA + +import System.TimeIt + +-- import Data.BloomFilter.Easy qualified as B +import Data.BloomFilter qualified as B +import Data.BloomFilter.Easy qualified as B +import Data.BloomFilter.Hash qualified as B +import Control.Concurrent.STM (flushTQueue) +import Control.DeepSeq (deepseq) +import Data.Bits +import Data.HashMap.Strict (HashMap) +import Data.HashMap.Strict qualified as HashMap +import Data.Word +import Data.Bits + +import Data.Vector.Mutable qualified as V +import Data.IntMap qualified as IntMap +import Data.IntMap (IntMap) + +import Data.Hashable +import Data.Digest.XXHash.FFI + +import Streaming.Prelude qualified as S + +-- import Control.Concurrent.BloomFilter qualified as U + data RPCEndpoints = RPCEndpoints { rpcPeer :: ServiceCaller PeerAPI UNIX @@ -73,44 +102,130 @@ runWithRPC action = do void $ waitAnyCatchCancel [messaging, c1] + +doAlter :: (Bits a1, Integral a2, Num a1) => a2 -> Maybe a1 -> Maybe a1 +doAlter j = \case + Nothing -> Just (setBit 0 (fromIntegral j)) + Just x -> Just (setBit x (fromIntegral j)) +{-# INLINE doAlter #-} + main :: IO () main = do - dir <- findGitDir "." >>= orThrowUser "not a git dir" + ls <- LBS8.readFile "input.txt" <&> LBS8.lines - flip runContT pure do + z <- newIORef 0 - o <- gitListAllObjects + let (sz, hn) = B.suggestSizing 5000000 0.01 - ep <- ContT runWithRPC + let bloom = B.fromList (\s -> [ xxh32 (LBS.toStrict s) x | x <- [1 .. fromIntegral hn] ]) sz ls + -- let bloom = B.fromList (\s -> [ xxh32 (LBS.toStrict s) x | x <- [1 .. fromIntegral 2] ]) sz ls + -- let bloom = B.fromList (\s -> [ fromIntegral (hashWithSalt x ls) | x <- [1 .. hn] ]) sz ls - let sto = StorageClient (rpcStorage ep) + print $ B.length bloom - cat <- startGitCatFile + -- v <- V.new @_ @Word8 (sz `div` 8) - -- h <- gitGetHash "HEAD" >>= orThrowUser "wtf1" - -- rvl <- gitRevList Nothing h + -- thm <- newIORef (HashMap.empty @Word64 @Word64) + -- thi <- newIORef (IntMap.empty @Word64) + -- tvm <- newTVarIO (HashMap.empty @Word64 @Word64) - liftIO do - allShit' <- for o $ \r@(o,h) -> runMaybeT do - GitObject t lbs <- toMPlus =<< gitReadFromCatFileBatch cat h - liftIO $ print $ pretty (t, h) - ght <- writeAsMerkle sto lbs + -- tq <- newTQueueIO - tt <- getBlock sto ght - >>= toMPlus - >>= orThrowUser "FUCK" . (deserialiseOrFail @(MTree [HashRef])) + -- haha <- for ls $ \s -> do + -- let hashes = [ xxh32 s x `mod` fromIntegral sz | x <- [1 .. 7] ] + -- vals <- for hashes $ \h -> do + -- let (w,b) = h `divMod` 64 + -- pure (w, setBit 0 (fromIntegral b) :: Word64) + -- pure $ HashMap.fromListWith (.|.) vals - let txt = fromString (show $ pretty t) - let ann = MTreeAnn (ShortMetadata txt) NullEncryption tt - putBlock sto (serialise ann) >>= toMPlus + -- let result = HashMap.unions haha - let pt = HS.fromList (HashRef <$> catMaybes allShit') - & HS.toList - & toPTree (MaxSize 256) (MaxNum 256) + -- print $ length result - ht <- makeMerkle 0 pt $ \(_,_,bss) -> do - void $ putBlock sto bss + -- for_ hashes $ \i -> do + -- let (w,b) = i `divMod` 64 + -- pure + -- pure () - print $ pretty (HashRef ht) + -- atomically $ mapM_ (writeTQueue tq) hashes + + -- print "FUCK!" + + -- pure () + -- modifyIORef thm (HashMap.alter (doAlter j) (fromIntegral i)) + + -- w <- readIORef z + -- print w + + -- let bloom = B.easyList 0.01 ls + + -- print $ B.length bloom + + --dir <- findGitDir "." >>= orThrowUser "not a git dir" + + --flip runContT pure do + + -- o <- gitListAllObjects + + -- ep <- ContT runWithRPC + + -- let sto = StorageClient (rpcStorage ep) + + -- cat <- startGitCatFile + + -- -- h <- gitGetHash "HEAD" >>= orThrowUser "wtf1" + -- -- rvl <- gitRevList Nothing h + -- -- + + -- items <- for o $ \(a,GitHash b) -> do + -- pure b + + -- liftIO $ print $ "bloom params" <+> pretty (B.suggestSizing (length items) 0.01) + + -- timeItNamed (show $ "build bloom filter" <+> pretty (length items)) do + -- let bloom = B.easyList 0.01 items + + -- liftIO $ print $ "bloom filter size" <+> pretty (B.length bloom) <> line + -- <> "data size" <+> pretty (LBS.length (serialise items)) + + -- timeItNamed "calc siphashes" do + + -- let w = 67108864 + -- tvm <- newTVarIO (HashMap.empty @Word64 @Bool) + -- -- q <- newTQueueIO + + -- for_ items $ \it -> do + -- for_ (B.cheapHashes 7 it) $ \hx -> do + -- let k = fromIntegral (hx `mod` w) + -- atomically $ modifyTVar tvm (HashMap.insert k True) + + + -- wtf <- liftIO $ readTVarIO tvm + -- liftIO $ print $ length wtf + + -- liftIO $ print $ LBS.length $ serialise bloom + + -- liftIO do + -- allShit' <- for o $ \r@(o,h) -> runMaybeT do + -- GitObject t lbs <- toMPlus =<< gitReadFromCatFileBatch cat h + -- liftIO $ print $ pretty (t, h) + -- ght <- writeAsMerkle sto lbs + + -- tt <- getBlock sto ght + -- >>= toMPlus + -- >>= orThrowUser "FUCK" . (deserialiseOrFail @(MTree [HashRef])) + + -- let txt = fromString (show $ pretty t) + -- let ann = MTreeAnn (ShortMetadata txt) NullEncryption tt + -- putBlock sto (serialise ann) >>= toMPlus + + -- let pt = HS.fromList (HashRef <$> catMaybes allShit') + -- & HS.toList + -- & toPTree (MaxSize 256) (MaxNum 256) + + -- ht <- makeMerkle 0 pt $ \(_,_,bss) -> do + -- void $ putBlock sto bss + + -- print $ pretty (HashRef ht)