fixed UuoMCa4gxd

This commit is contained in:
Dmitry Zuikov 2024-01-28 06:49:18 +03:00
parent 34868173ed
commit 62eb5ca49f
8 changed files with 184 additions and 39 deletions

View File

@ -1,2 +1,2 @@
(fixme-set "workflow" "done" "GPidfZYrFx")
(fixme-set "workflow" "done" "UuoMCa4gxd")

View File

@ -1,7 +1,7 @@
packages: **/*.cabal
examples/*/*.cabal
-- allow-newer: all
allow-newer: all
-- executable-static: True
-- profiling: True

View File

@ -1,5 +1,22 @@
{
"nodes": {
"bloomfilter": {
"flake": false,
"locked": {
"lastModified": 1691177623,
"narHash": "sha256-B/q0JxkARnZRUPe2CIN11QUQbum9nT0+jL3sn5tdYFE=",
"owner": "haskell-pkg-janitors",
"repo": "bloomfilter",
"rev": "0838caf5301da25830a7ff4ca4b4b7ce3bf9d441",
"type": "github"
},
"original": {
"owner": "haskell-pkg-janitors",
"repo": "bloomfilter",
"rev": "0838caf5301da25830a7ff4ca4b4b7ce3bf9d441",
"type": "github"
}
},
"db-pipe": {
"inputs": {
"haskell-flake-utils": "haskell-flake-utils",
@ -282,6 +299,7 @@
},
"root": {
"inputs": {
"bloomfilter": "bloomfilter",
"db-pipe": "db-pipe",
"fixme": "fixme",
"haskell-flake-utils": "haskell-flake-utils_4",

View File

@ -23,6 +23,11 @@ inputs = {
flake = false;
};
bloomfilter = {
url = "github:haskell-pkg-janitors/bloomfilter/0838caf5301da25830a7ff4ca4b4b7ce3bf9d441";
flake = false;
};
};
outputs = { self, nixpkgs, haskell-flake-utils, ... }@inputs:
@ -60,6 +65,7 @@ outputs = { self, nixpkgs, haskell-flake-utils, ... }@inputs:
hpPreOverrides = {pkgs, ...}: final: prev: with pkgs; {
saltine = prev.callCabal2nix "saltine" inputs.saltine { inherit (pkgs) libsodium; };
# bloomfilter = prev.callCabal2nix "bloomfilter" inputs.bloomfilter { };
};
packagePostOverrides = { pkgs }: with pkgs; with haskell.lib; [

View File

@ -69,23 +69,24 @@ mkRefLogRequestAdapter :: forall e s m . ( MonadIO m
=> SomeBrains e -> m (RefLogRequestI e (ResponseM e m ))
mkRefLogRequestAdapter brains = do
sto <- getStorage
pure $ RefLogRequestI (doOnRefLogRequest sto) dontHandle (isPolledRef @e brains)
pure $ RefLogRequestI (doOnRefLogRequest brains sto) dontHandle (isPolledRef @e brains)
-- FIXME: check-if-subscribed
-- не дергать диск для неизвестных ссылок
-- должно уменьшить дергание диска и флуд
-- в логе
doOnRefLogRequest :: forall e s m . ( MonadIO m
, MyPeer e
, s ~ Encryption e
, IsRefPubKey s
)
=> AnyStorage -> (Peer e, PubKey 'Sign s) -> m (Maybe (Hash HbSync))
doOnRefLogRequest sto (_,pk) = do
liftIO $ getRef sto (RefLogKey @s pk)
=> SomeBrains e
-> AnyStorage
-> (Peer e, PubKey 'Sign s)
-> m (Maybe (Hash HbSync))
doOnRefLogRequest brains sto (_,pk) = runMaybeT do
isPolledRef @e brains pk >>= guard
ref <- liftIO $ getRef sto (RefLogKey @s pk)
when (isNothing ref) do
warn $ "missed reflog value" <+> pretty ref
toMPlus ref
data RefLogWorkerAdapter e =
RefLogWorkerAdapter

View File

@ -439,7 +439,7 @@ simpleReadLinkVal ss hash = do
let fn = simpleRefFileName ss hash
rs <- spawnAndWait ss $ do
(Just <$> BS.readFile fn) `catchAny` \e -> do
err $ "simpleReadLinkVal" <+> pretty hash <+> pretty fn <+> viaShow e
trace $ "simpleReadLinkVal" <+> pretty hash <+> pretty fn <+> viaShow e
pure Nothing
runMaybeT do
@ -465,7 +465,6 @@ instance ( MonadIO m, IsKey hash
updateRef ss ref v = do
let refHash = hashObject @hash ref
let meta = refMetaData ref
debug $ "updateRef:" <+> pretty refHash
void $ liftIO $ simpleWriteLinkRawRef ss meta refHash v
getRef ss ref = do

View File

@ -981,5 +981,11 @@ executable test-repo-export
, temporary
, unliftio
, unordered-containers
, bloomfilter >=2.0.1.2
, timeit
, memory
, deepseq
, xxhash-ffi

View File

@ -24,11 +24,40 @@ import Control.Monad.Catch
import Control.Monad.Trans.Cont
import Control.Monad.Trans.Maybe
import Data.ByteString.Lazy qualified as LBS
import Data.ByteString.Lazy.Char8 qualified as LBS8
import Data.ByteString.Lazy (ByteString)
import Codec.Serialise
import Data.Maybe
import Data.HashSet qualified as HS
import Data.ByteArray.Hash (SipHash(..), SipKey(..))
import Data.ByteArray.Hash qualified as BA
import System.TimeIt
-- import Data.BloomFilter.Easy qualified as B
import Data.BloomFilter qualified as B
import Data.BloomFilter.Easy qualified as B
import Data.BloomFilter.Hash qualified as B
import Control.Concurrent.STM (flushTQueue)
import Control.DeepSeq (deepseq)
import Data.Bits
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.Word
import Data.Bits
import Data.Vector.Mutable qualified as V
import Data.IntMap qualified as IntMap
import Data.IntMap (IntMap)
import Data.Hashable
import Data.Digest.XXHash.FFI
import Streaming.Prelude qualified as S
-- import Control.Concurrent.BloomFilter qualified as U
data RPCEndpoints =
RPCEndpoints
{ rpcPeer :: ServiceCaller PeerAPI UNIX
@ -73,44 +102,130 @@ runWithRPC action = do
void $ waitAnyCatchCancel [messaging, c1]
doAlter :: (Bits a1, Integral a2, Num a1) => a2 -> Maybe a1 -> Maybe a1
doAlter j = \case
Nothing -> Just (setBit 0 (fromIntegral j))
Just x -> Just (setBit x (fromIntegral j))
{-# INLINE doAlter #-}
main :: IO ()
main = do
dir <- findGitDir "." >>= orThrowUser "not a git dir"
ls <- LBS8.readFile "input.txt" <&> LBS8.lines
flip runContT pure do
z <- newIORef 0
o <- gitListAllObjects
let (sz, hn) = B.suggestSizing 5000000 0.01
ep <- ContT runWithRPC
let bloom = B.fromList (\s -> [ xxh32 (LBS.toStrict s) x | x <- [1 .. fromIntegral hn] ]) sz ls
-- let bloom = B.fromList (\s -> [ xxh32 (LBS.toStrict s) x | x <- [1 .. fromIntegral 2] ]) sz ls
-- let bloom = B.fromList (\s -> [ fromIntegral (hashWithSalt x ls) | x <- [1 .. hn] ]) sz ls
let sto = StorageClient (rpcStorage ep)
print $ B.length bloom
cat <- startGitCatFile
-- v <- V.new @_ @Word8 (sz `div` 8)
-- h <- gitGetHash "HEAD" >>= orThrowUser "wtf1"
-- rvl <- gitRevList Nothing h
-- thm <- newIORef (HashMap.empty @Word64 @Word64)
-- thi <- newIORef (IntMap.empty @Word64)
-- tvm <- newTVarIO (HashMap.empty @Word64 @Word64)
liftIO do
allShit' <- for o $ \r@(o,h) -> runMaybeT do
GitObject t lbs <- toMPlus =<< gitReadFromCatFileBatch cat h
liftIO $ print $ pretty (t, h)
ght <- writeAsMerkle sto lbs
-- tq <- newTQueueIO
tt <- getBlock sto ght
>>= toMPlus
>>= orThrowUser "FUCK" . (deserialiseOrFail @(MTree [HashRef]))
-- haha <- for ls $ \s -> do
-- let hashes = [ xxh32 s x `mod` fromIntegral sz | x <- [1 .. 7] ]
-- vals <- for hashes $ \h -> do
-- let (w,b) = h `divMod` 64
-- pure (w, setBit 0 (fromIntegral b) :: Word64)
-- pure $ HashMap.fromListWith (.|.) vals
let txt = fromString (show $ pretty t)
let ann = MTreeAnn (ShortMetadata txt) NullEncryption tt
putBlock sto (serialise ann) >>= toMPlus
-- let result = HashMap.unions haha
let pt = HS.fromList (HashRef <$> catMaybes allShit')
& HS.toList
& toPTree (MaxSize 256) (MaxNum 256)
-- print $ length result
ht <- makeMerkle 0 pt $ \(_,_,bss) -> do
void $ putBlock sto bss
-- for_ hashes $ \i -> do
-- let (w,b) = i `divMod` 64
-- pure
-- pure ()
print $ pretty (HashRef ht)
-- atomically $ mapM_ (writeTQueue tq) hashes
-- print "FUCK!"
-- pure ()
-- modifyIORef thm (HashMap.alter (doAlter j) (fromIntegral i))
-- w <- readIORef z
-- print w
-- let bloom = B.easyList 0.01 ls
-- print $ B.length bloom
--dir <- findGitDir "." >>= orThrowUser "not a git dir"
--flip runContT pure do
-- o <- gitListAllObjects
-- ep <- ContT runWithRPC
-- let sto = StorageClient (rpcStorage ep)
-- cat <- startGitCatFile
-- -- h <- gitGetHash "HEAD" >>= orThrowUser "wtf1"
-- -- rvl <- gitRevList Nothing h
-- --
-- items <- for o $ \(a,GitHash b) -> do
-- pure b
-- liftIO $ print $ "bloom params" <+> pretty (B.suggestSizing (length items) 0.01)
-- timeItNamed (show $ "build bloom filter" <+> pretty (length items)) do
-- let bloom = B.easyList 0.01 items
-- liftIO $ print $ "bloom filter size" <+> pretty (B.length bloom) <> line
-- <> "data size" <+> pretty (LBS.length (serialise items))
-- timeItNamed "calc siphashes" do
-- let w = 67108864
-- tvm <- newTVarIO (HashMap.empty @Word64 @Bool)
-- -- q <- newTQueueIO
-- for_ items $ \it -> do
-- for_ (B.cheapHashes 7 it) $ \hx -> do
-- let k = fromIntegral (hx `mod` w)
-- atomically $ modifyTVar tvm (HashMap.insert k True)
-- wtf <- liftIO $ readTVarIO tvm
-- liftIO $ print $ length wtf
-- liftIO $ print $ LBS.length $ serialise bloom
-- liftIO do
-- allShit' <- for o $ \r@(o,h) -> runMaybeT do
-- GitObject t lbs <- toMPlus =<< gitReadFromCatFileBatch cat h
-- liftIO $ print $ pretty (t, h)
-- ght <- writeAsMerkle sto lbs
-- tt <- getBlock sto ght
-- >>= toMPlus
-- >>= orThrowUser "FUCK" . (deserialiseOrFail @(MTree [HashRef]))
-- let txt = fromString (show $ pretty t)
-- let ann = MTreeAnn (ShortMetadata txt) NullEncryption tt
-- putBlock sto (serialise ann) >>= toMPlus
-- let pt = HS.fromList (HashRef <$> catMaybes allShit')
-- & HS.toList
-- & toPTree (MaxSize 256) (MaxNum 256)
-- ht <- makeMerkle 0 pt $ \(_,_,bss) -> do
-- void $ putBlock sto bss
-- print $ pretty (HashRef ht)