From 91a25631343ab1378c45571eadc191deba6247b6 Mon Sep 17 00:00:00 2001 From: voidlizard Date: Sun, 24 Aug 2025 08:44:19 +0300 Subject: [PATCH] some kludges? in order file gc to work faster --- hbs2-peer/app/RPC2.hs | 1 + .../lib/HBS2/Storage/NCQ3/Internal.hs | 1 + .../lib/HBS2/Storage/NCQ3/Internal/Fossil.hs | 3 ++ .../lib/HBS2/Storage/NCQ3/Internal/Index.hs | 7 ++++- .../lib/HBS2/Storage/NCQ3/Internal/Run.hs | 27 ++++++++++------- .../lib/HBS2/Storage/NCQ3/Internal/State.hs | 18 +++++++++-- .../lib/HBS2/Storage/NCQ3/Internal/Sweep.hs | 30 +++++++++++++++++++ .../lib/HBS2/Storage/NCQ3/Internal/Types.hs | 1 + 8 files changed, 75 insertions(+), 13 deletions(-) diff --git a/hbs2-peer/app/RPC2.hs b/hbs2-peer/app/RPC2.hs index 23c5145b..ef0e04ce 100644 --- a/hbs2-peer/app/RPC2.hs +++ b/hbs2-peer/app/RPC2.hs @@ -278,3 +278,4 @@ instance (e ~ L4Proto, MonadUnliftIO m, HasRpcContext PeerAPI RPC2Context m) => pure $ mkList r + diff --git a/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal.hs b/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal.hs index dc302bb3..9499db44 100644 --- a/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal.hs +++ b/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal.hs @@ -68,6 +68,7 @@ ncqStorageOpen fp upd = do ncqSweepReq <- newTVarIO False ncqMergeReq <- newTVarIO False ncqCompactReq <- newTVarIO False + ncqStateDumpReq <- newTVarIO False ncqOnRunWriteIdle <- newTVarIO none ncqSyncNo <- newTVarIO 0 ncqState <- newTVarIO mempty diff --git a/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Fossil.hs b/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Fossil.hs index 2a9399cc..b47157a3 100644 --- a/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Fossil.hs +++ b/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Fossil.hs @@ -257,6 +257,9 @@ fileTailRecord w = do ncqMakeSectionBS (Just M) h paylo {-# INLINE fileTailRecord #-} +typicalFileTailRecordLen :: Integral a => a +typicalFileTailRecordLen = fromIntegral (BS.length (fileTailRecord @Integer 0)) + appendSection :: forall m . MonadUnliftIO m => Fd -> ByteString diff --git a/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Index.hs b/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Index.hs index 332984aa..6e47f333 100644 --- a/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Index.hs +++ b/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Index.hs @@ -5,6 +5,7 @@ import HBS2.Storage.NCQ3.Internal.Types import HBS2.Storage.NCQ3.Internal.State import HBS2.Storage.NCQ3.Internal.Memtable import HBS2.Storage.NCQ3.Internal.Files +import HBS2.Storage.NCQ3.Internal.Flags import System.Posix.Files qualified as PFS import Streaming.Prelude qualified as S @@ -158,7 +159,7 @@ ncqIndexCompactStep :: MonadUnliftIO m -> m Bool ncqIndexCompactStep me@NCQStorage{..} = flip runContT pure $ callCC \exit -> do - debug "ncqIndexCompactStep" + debug $ red "ncqIndexCompactStep" idx <- readTVarIO ncqState <&> fmap (IndexFile . snd) . ncqStateIndex @@ -210,6 +211,10 @@ ncqIndexCompactStep me@NCQStorage{..} = flip runContT pure $ callCC \exit -> do ncqStateDelIndexFile (coerce a) ncqStateDelIndexFile (coerce b) + -- FIXME: crutch + -- костыль! + ncqSetFlag ncqStateDumpReq + pure True ncqStorageScanDataFile :: MonadIO m diff --git a/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Run.hs b/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Run.hs index 1f364d00..49f1cd87 100644 --- a/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Run.hs +++ b/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Run.hs @@ -93,21 +93,27 @@ ncqTryLoadState me@NCQStorage{..} = do let corrupted = isLeft good - if not corrupted then do - debug $ yellow "indexing" <+> pretty dataFile - ncqIndexFile me Nothing dataFile - else do + if | not corrupted && realSize <= typicalFileTailRecordLen -> do - o <- ncqFileTryRecover path - warn $ "ncqFileTryRecover" <+> pretty path <+> pretty o <+> parens (pretty realSize) + warn $ "skip indexing" <+> pretty realSize <+> pretty (takeFileName path) - let best = if i < 1 then max s o else s + | not corrupted -> do - warn $ red "trim" <+> pretty s <+> pretty best <+> red (pretty (fromIntegral best - realSize)) <+> pretty (takeFileName path) + debug $ "indexing" <+> pretty dataFile + void $ ncqIndexFile me Nothing dataFile - liftIO $ PFS.setFileSize path (fromIntegral best) + | otherwise -> do - if i <= 1 then again (succ i) else pure Nothing + o <- ncqFileTryRecover path + warn $ "ncqFileTryRecover" <+> pretty path <+> pretty o <+> parens (pretty realSize) + + let best = if i < 1 then max s o else s + + warn $ red "trim" <+> pretty s <+> pretty best <+> red (pretty (fromIntegral best - realSize)) <+> pretty (takeFileName path) + + liftIO $ PFS.setFileSize path (fromIntegral best) + + if i <= 1 then again (succ i) else none for_ (bad <> fmap snd rest) $ \f -> do @@ -235,6 +241,7 @@ ncqStorageRun ncq@NCQStorage{..} = withSem ncqRunSem $ flip runContT pure do debug $ "EMA" <+> pretty (realToFrac @_ @(Fixed E3) ema) spawnActivity $ postponed ncqPostponeService $ forever do + ncqRemoveEmptyFossils ncq ncqSweepObsoleteStates ncq ncqSweepFiles ncq void $ race (pause @'Seconds ncqSweepTime) do diff --git a/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/State.hs b/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/State.hs index 0d68652d..c317d076 100644 --- a/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/State.hs +++ b/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/State.hs @@ -4,6 +4,7 @@ module HBS2.Storage.NCQ3.Internal.State where import HBS2.Storage.NCQ3.Internal.Prelude import HBS2.Storage.NCQ3.Internal.Types import HBS2.Storage.NCQ3.Internal.Files +import HBS2.Storage.NCQ3.Internal.Flags import HBS2.Storage.NCQ3.Internal.MMapCache import Data.Config.Suckless.Script @@ -34,8 +35,15 @@ ncqStateDump ncq@NCQStorage{..} = do state <- readTVarIO ncqState key <- ncqGetNewFileKey ncq StateFile let snkFile = ncqGetFileName ncq (StateFile key) + liftIO $ withBinaryFileDurableAtomic snkFile WriteMode $ \fh -> do IO.hPrint fh (pretty state) + + atomically do + writeTVar ncqStateKey key + ncqClearFlagSTM ncqStateDumpReq + + debug $ yellow "ncqStateDump" <+> pretty key <+> pretty (toFileName (StateFile key)) pure key ncqStateUpdateLoop :: MonadIO m @@ -46,18 +54,19 @@ ncqStateUpdateLoop ncq@NCQStorage{..} = do debug $ red "ncqStateUpdateLoop" + sInit <- readTVarIO ncqState flip fix sInit $ \next s0 -> do state <- atomically do s1 <- readTVar ncqState stop <- readTVar ncqStopReq - if s1 == s0 && not stop then STM.retry else pure s1 + dump <- readTVar ncqStateDumpReq + if s1 == s0 && not stop && not dump then STM.retry else pure s1 key <- ncqStateDump ncq done <- atomically do - writeTVar ncqStateKey key modifyTVar ncqWrites succ readTVar ncqStopReq @@ -71,7 +80,12 @@ ncqStateUpdate :: MonadIO m ncqStateUpdate ncq@NCQStorage{..} action = do atomically do + s0 <- readTVar ncqState void $ runReaderT (fromStateOp action) ncq + s1 <- readTVar ncqState + when (s0 /= s1) do + modifyTVar ncqState (over #ncqStateVersion succ) + ncqSetFlagSTM ncqStateDumpReq ncqStateAddDataFile :: FileKey -> StateOP () ncqStateAddDataFile fk = do diff --git a/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Sweep.hs b/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Sweep.hs index fd537231..f3b4ed03 100644 --- a/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Sweep.hs +++ b/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Sweep.hs @@ -4,6 +4,7 @@ module HBS2.Storage.NCQ3.Internal.Sweep where import HBS2.Storage.NCQ3.Internal.Prelude import HBS2.Storage.NCQ3.Internal.Types import HBS2.Storage.NCQ3.Internal.Files +import HBS2.Storage.NCQ3.Internal.Fossil import HBS2.Storage.NCQ3.Internal.State import Control.Monad.Trans.Cont @@ -13,6 +14,7 @@ import Data.HashMap.Strict qualified as HM import Data.HashSet qualified as HS import Data.List qualified as List import System.Posix.Files qualified as PFS +import Streaming.Prelude qualified as S ncqLiveKeysSTM :: NCQStorage -> STM (HashSet FileKey) ncqLiveKeysSTM NCQStorage{..} = do @@ -28,6 +30,34 @@ ncqLiveKeys ncq = atomically $ ncqLiveKeysSTM ncq {- HLINT ignore "Functor law"-} +ncqRemoveEmptyFossils :: forall m . MonadUnliftIO m => NCQStorage -> m () +ncqRemoveEmptyFossils me@NCQStorage{..} = flip runContT pure $ callCC \exit -> do + + s@NCQState{..} <- readTVarIO ncqState + debug $ red "CURRENT STATE" <+> pretty ncqStateVersion <> line <> pretty s + + check <- atomically do + NCQState{..} <- readTVar ncqState + current <- readTVar ncqCurrentFossils + let fks = HS.fromList [ coerce fk | P (PData fk _) <- universeBi ncqStateFacts ] + pure $ HS.toList (fks `HS.difference` current) + + current <- readTVarIO ncqCurrentFossils + debug $ "ncqRemoveEmptyFossils" <+> pretty (HS.toList current) <+> pretty check + + loosers <- S.toList_ $ for_ check $ \fk -> do + let path = ncqGetFileName me (toFileName (DataFile fk)) + s <- fileSize path + when (s <= typicalFileTailRecordLen) $ S.yield fk + + debug $ "ncqRemoveEmptyFossils" <+> pretty loosers + + when (List.null loosers) $ exit () + + ncqStateUpdate me $ for_ loosers $ \fk -> do + ncqStateDelDataFile fk + ncqStateDelFact (P (PData (DataFile fk) 0)) + ncqSweepFiles :: forall m . MonadUnliftIO m => NCQStorage -> m () ncqSweepFiles me@NCQStorage{..} = do diff --git a/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Types.hs b/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Types.hs index 7858ca71..24248bf1 100644 --- a/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Types.hs +++ b/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Types.hs @@ -120,6 +120,7 @@ data NCQStorage = , ncqSyncReq :: TVar Bool , ncqSweepReq :: TVar Bool , ncqMergeReq :: TVar Bool + , ncqStateDumpReq :: TVar Bool , ncqCompactReq :: TVar Bool , ncqOnRunWriteIdle :: TVar (IO ()) , ncqSyncNo :: TVar Int