This commit is contained in:
Dmitry Zuikov 2024-06-03 06:47:57 +03:00
parent 1a9258ee38
commit 0834a9a142
3 changed files with 64 additions and 15 deletions

View File

@ -119,7 +119,7 @@ pattern Tomb e <- e@(Entry _ ( isTomb -> True ))
pattern Existed :: Entry -> IndexEntry -> Entry pattern Existed :: Entry -> IndexEntry -> Entry
pattern Existed e w <- e@(Entry _ (existed -> Just w)) pattern Existed e w <- e@(Entry _ (existed -> Just w))
{-# COMPLETE Existed #-} -- {-# COMPLETE Existed #-}
isAlive :: Entry -> Bool isAlive :: Entry -> Bool
isAlive = \case isAlive = \case
@ -277,7 +277,10 @@ compactStorageCommit sto = liftIO do
kv <- atomically do kv <- atomically do
mapM readTVar (csKeys sto) <&> mconcat . V.toList . fmap HM.toList mapM readTVar (csKeys sto) <&> mconcat . V.toList . fmap HM.toList
let items = [ (k, v) | (k, v@Fresh{}) <- kv ] let items = [ (k, e)
| (k, e@Fresh{}) <- kv
, reallyUpdated mma e
]
unless (List.null items) do unless (List.null items) do
@ -295,14 +298,7 @@ compactStorageCommit sto = liftIO do
((_,Entry i (Del e)):rest) | not (idxEntryTomb e) -> do ((_,Entry i (Del e)):rest) | not (idxEntryTomb e) -> do
next (off + 0, rest, (e { idxEntryTomb = True },i) : idx) next (off + 0, rest, (e { idxEntryTomb = True },i) : idx)
-- NOTE: this-might-be-slow
-- но это правильно, поскольку
-- у нас **compact** storage и мы не хотим,
-- что бы его раздувало одинаковыми значениями
((k,Entry i (Upd v e)):rest) -> do ((k,Entry i (Upd v e)):rest) -> do
if readValue mma e == v then do
next (off + 0, rest, idx)
else do
BS.hPut ha v BS.hPut ha v
let sz = fromIntegral $ BS.length v let sz = fromIntegral $ BS.length v
next (off + sz, rest, (IndexEntry (fromIntegral off) (fromIntegral sz) 0 False k,i) : idx) next (off + sz, rest, (IndexEntry (fromIntegral off) (fromIntegral sz) 0 False k,i) : idx)
@ -375,6 +371,19 @@ compactStorageCommit sto = liftIO do
mmapped <- liftIO (mmapFileForeignPtr fp ReadOnly Nothing) mmapped <- liftIO (mmapFileForeignPtr fp ReadOnly Nothing)
atomically (writeTVar (csMMapped sto) mmapped) atomically (writeTVar (csMMapped sto) mmapped)
-- NOTE: this-might-be-slow
-- но это правильно, поскольку
-- у нас **compact** storage и мы не хотим,
-- что бы его раздувало одинаковыми значениями
-- Можно попробовать использовать siphash
-- при загрузке (?)... да ну нахрен, капец долго
-- будет. если только его не хранить (это можно)
reallyUpdated mma = \case
Entry _ (Upd v e) -> readValue mma e /= v
_ -> True
compactStorageDel :: ForCompactStorage m => CompactStorage k -> ByteString -> m () compactStorageDel :: ForCompactStorage m => CompactStorage k -> ByteString -> m ()
compactStorageDel sto key = do compactStorageDel sto key = do
@ -398,6 +407,9 @@ compactStorageDel sto key = do
modifyTVar tvar (HM.insert key (Entry j (Del what))) modifyTVar tvar (HM.insert key (Entry j (Del what)))
succUncommitedSTM sto 1 succUncommitedSTM sto 1
-- FIXME: fix-incomplete-pattern-warning
_ -> pure ()
newSequenceSTM :: CompactStorage k -> STM Integer newSequenceSTM :: CompactStorage k -> STM Integer
newSequenceSTM sto = stateTVar (csSeq sto) (\n -> (n+1,n)) newSequenceSTM sto = stateTVar (csSeq sto) (\n -> (n+1,n))
@ -420,6 +432,7 @@ compactStoragePut sto k v = do
where where
check (Entry i (New v1)) (Entry _ (Off e)) = Entry i (Upd v1 e) check (Entry i (New v1)) (Entry _ (Off e)) = Entry i (Upd v1 e)
check (Entry i (New v1)) (Entry _ (Upd v0 e)) = Entry i (Upd v1 e)
check x _ = x check x _ = x
readValue :: MMaped -> IndexEntry -> ByteString readValue :: MMaped -> IndexEntry -> ByteString
@ -450,6 +463,7 @@ compactStorageExists sto key = do
case val of case val of
Just (Entry _ (New s)) -> pure (Just (fromIntegral (BS.length s))) Just (Entry _ (New s)) -> pure (Just (fromIntegral (BS.length s)))
Just (Entry _ (Off e)) -> pure (Just (fromIntegral $ idxEntrySize e)) Just (Entry _ (Off e)) -> pure (Just (fromIntegral $ idxEntrySize e))
Just (Entry _ (Upd v e)) -> pure (Just (fromIntegral $ BS.length v))
_ -> pure Nothing _ -> pure Nothing
unmapFile :: ForCompactStorage m => CompactStorage sto -> m () unmapFile :: ForCompactStorage m => CompactStorage sto -> m ()

View File

@ -17,9 +17,7 @@ main =
, testCase "testSimpleStorageBundles" testSimpleStorageBundles , testCase "testSimpleStorageBundles" testSimpleStorageBundles
, testCase "testSimpleStorageSymmEncryption" testSimpleStorageSymmEncryption , testCase "testSimpleStorageSymmEncryption" testSimpleStorageSymmEncryption
, testCase "testCompactStorage" testCompactStorageBasic , testCase "testCompactStorage" testCompactStorageBasic
, testCase "testCompactStorageNoDupes" testCompactStorageNoDupes
] ]
-- testGroup "compact"
-- [ testCase "testCompactStorage" testCompactStorageBasic
-- ]

View File

@ -35,12 +35,12 @@ import Test.Tasty.HUnit
testCompactStorageBasic :: IO () testCompactStorageBasic :: IO ()
testCompactStorageBasic = do testCompactStorageBasic = do
let elems = [ 0 .. 10_000 :: Int ] let elems = [ 0 .. 100_000 :: Int ]
let pt = toPTree (MaxSize 1000) (MaxNum 256) elems let pt = toPTree (MaxSize 1000) (MaxNum 256) elems
withSystemTempDirectory "simpleStorageTest1" $ \dir -> do withSystemTempDirectory "simpleStorageTest1" $ \dir -> do
let db = "storage" let db = dir </> "storage"
sto <- compactStorageOpen @HbSync mempty db sto <- compactStorageOpen @HbSync mempty db
root <- makeMerkle 0 pt $ \(_,_,bss) -> do root <- makeMerkle 0 pt $ \(_,_,bss) -> do
@ -56,3 +56,40 @@ testCompactStorageBasic = do
assertEqual "elems-read-from-storage" elems elems2 assertEqual "elems-read-from-storage" elems elems2
testCompactStorageNoDupes :: IO ()
testCompactStorageNoDupes = do
let elems = [ 0 .. 1_000 :: Int ]
withSystemTempDirectory "simpleStorageTest2" $ \dir -> do
let db = dir </> "storage"
sto <- compactStorageOpen @HbSync mempty db
for_ elems $ \k -> do
put sto (LBS.toStrict $ serialise k) (LBS.toStrict $ serialise $ show $ pretty k)
commit sto
size1 <- compactStorageSize sto
here <- for elems $ \e -> do
let k = LBS.toStrict $ serialise e
member sto k
assertBool "all-members-here" (and here)
for_ elems $ \k -> do
put sto (LBS.toStrict $ serialise k) (LBS.toStrict $ serialise $ show $ pretty k)
commit sto
size2 <- compactStorageSize sto
assertEqual "no-dupes" size1 size2
here2 <- for elems $ \e -> do
let k = LBS.toStrict $ serialise e
member sto k
assertBool "all-members-here" (and here2)