From 955fb65dcea4095e6e628607a9a70f8ee15e1594 Mon Sep 17 00:00:00 2001 From: voidlizard Date: Thu, 10 Jul 2025 19:07:40 +0300 Subject: [PATCH] wip --- hbs2-tests/hbs2-tests.cabal | 1 + hbs2-tests/test/TestNCQ.hs | 87 +++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/hbs2-tests/hbs2-tests.cabal b/hbs2-tests/hbs2-tests.cabal index 0070cde1..906f73ed 100644 --- a/hbs2-tests/hbs2-tests.cabal +++ b/hbs2-tests/hbs2-tests.cabal @@ -22,6 +22,7 @@ common common-deps , fuzzy-parse , async , bytestring + , bloomfilter , cache , containers , data-default diff --git a/hbs2-tests/test/TestNCQ.hs b/hbs2-tests/test/TestNCQ.hs index 6487d627..8dfc8586 100644 --- a/hbs2-tests/test/TestNCQ.hs +++ b/hbs2-tests/test/TestNCQ.hs @@ -40,6 +40,7 @@ import Data.ByteString.Lazy qualified as LBS import Data.Text.Encoding qualified as TE import Data.ByteString.Char8 qualified as BS8 import Data.ByteString.Builder +import Data.Hashable (hash) import Data.Maybe import Data.Either import Data.Word @@ -58,6 +59,8 @@ import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict qualified as HM import Data.IntMap qualified as IntMap import Data.IntMap (IntMap) +import Data.IntSet (IntSet) +import Data.IntSet qualified as IntSet import Data.Fixed import System.Environment import System.FilePath.Posix @@ -88,6 +91,8 @@ import System.TimeIt import System.IO.Unsafe (unsafePerformIO) +import Data.BloomFilter.Easy as Bloom + {- HLINT ignore "Functor law" -} setupLogger :: MonadIO m => m () @@ -616,6 +621,83 @@ testNCQ2Simple1 TestEnv{..} = do -- debug $ fill 44 (pretty ha) <+> fill 8 (pretty found) + +testFilterEmulate1 :: MonadUnliftIO m + => Int + -> TestEnv + -> m () + +testFilterEmulate1 n TestEnv{..} = do + let tmp = testEnvDir + let ncqDir = tmp + + g <- liftIO MWC.createSystemRandom + + bz <- replicateM n $ liftIO do + n <- (`mod` (64*1024)) <$> uniformM @Int g + uniformByteStringM n g + + + hs' <- newTVarIO (mempty :: HashSet HashRef) + noHs' <- newTVarIO (mempty :: HashSet HashRef) + + ncqWithStorage ncqDir $ \sto -> liftIO do + for bz $ \z -> do + h <- ncqPutBS sto (Just B) Nothing z + atomically $ modifyTVar' hs' (HS.insert h) + + replicateM_ (max 100 (n `div` 3)) $ liftIO do + n <- (`mod` (64*1024)) <$> uniformM @Int g + fake <- HashRef . hashObject @HbSync <$> uniformByteStringM n g + atomically $ modifyTVar' noHs' (HS.insert fake) + + hs <- readTVarIO hs' + noHs <- readTVarIO noHs' + + let allShit = HS.toList hs <> HS.toList noHs + + let bloom = easyList 0.01 (fmap (coerce @_ @ByteString) (HS.toList hs)) + + let bucno e = hash e `mod` 4096 + + let dumb = IntSet.fromList [ bucno k | k <- HS.toList hs ] + + ncqWithStorage ncqDir $ \sto -> liftIO do + + for_ [1..4] $ \i -> do + + notice $ "-- pass" <+> pretty i <+> "--" + + (t1,_) <- timeItT do + for_ allShit $ \ha -> do + ncqSearchBS sto ha <&> maybe (-1) BS.length + + notice $ "lookup-no-filter" <+> pretty (realToFrac @_ @(Fixed E3) t1) + + (t2,_) <- timeItT do + for_ allShit $ \ha -> do + unless (HS.member ha noHs) do + void $ ncqSearchBS sto ha <&> maybe (-1) BS.length + + notice $ "lookup-fake-filter" <+> pretty (realToFrac @_ @(Fixed E3) t2) + + (t3,_) <- timeItT do + for_ allShit $ \ha -> do + let here = IntSet.member (bucno ha) dumb + when here do + void $ ncqSearchBS sto ha <&> maybe (-1) BS.length + + notice $ "lookup-dumb-filter" <+> pretty (realToFrac @_ @(Fixed E3) t3) + + (t4,_) <- timeItT do + for_ allShit $ \ha -> do + let here = Bloom.elem (coerce ha) bloom + when here do + void $ ncqSearchBS sto ha <&> maybe (-1) BS.length + + notice $ "lookup-simple-bloom-filter" <+> pretty (realToFrac @_ @(Fixed E3) t4) + + testNCQ2Repair1:: MonadUnliftIO m => TestEnv -> m () @@ -924,6 +1006,11 @@ main = do e -> throwIO $ BadFormException @C (mkList e) + + entry $ bindMatch "test:filter:emulate-1" $ nil_ $ \case + [ LitIntVal n ] -> runTest $ testFilterEmulate1 (fromIntegral n) + e -> throwIO $ BadFormException @C (mkList e) + hidden do internalEntries entry $ bindMatch "#!" $ nil_ $ const none