This commit is contained in:
voidlizard 2025-07-23 11:35:57 +03:00
parent 39ac3e8832
commit e8d019eaa2
3 changed files with 118 additions and 52 deletions

View File

@ -108,6 +108,9 @@ class Expires a where
-- FIXME: dangerous!
expiresIn _ = Nothing
timeSpecDeltaSeconds :: RealFrac a => TimeSpec -> TimeSpec -> a
timeSpecDeltaSeconds a b = realToFrac . (*1e-9) . realToFrac $ toNanoSecs (max a b - min a b)
getEpoch :: MonadIO m => m Word64
getEpoch = liftIO getPOSIXTime <&> floor

View File

@ -368,13 +368,18 @@ ncqPutBS ncq@NCQStorage2{..} mtp mhref bs' = do
ncqEntryUnwrap :: NCQStorage2
-> ByteString
-> (ByteString, Either ByteString (NCQSectionType, ByteString))
ncqEntryUnwrap _ source = do
ncqEntryUnwrap n source = do
let (k,v) = BS.splitAt ncqKeyLen (BS.drop 4 source)
case ncqIsMeta v of
Just meta -> (k, Right (meta, BS.drop ncqPrefixLen v))
Nothing -> (k, Left v)
(k, ncqEntryUnwrapValue n v)
{-# INLINE ncqEntryUnwrap #-}
ncqEntryUnwrapValue :: NCQStorage2
-> ByteString
-> Either ByteString (NCQSectionType, ByteString)
ncqEntryUnwrapValue _ v = case ncqIsMeta v of
Just meta -> Right (meta, BS.drop ncqPrefixLen v)
Nothing -> Left v
{-# INLINE ncqEntryUnwrapValue #-}
ncqIdxIsTombSize :: NCQIdxEntry -> Bool
ncqIdxIsTombSize (NCQIdxEntry _ s) = s == ncqSLen + ncqKeyLen + ncqPrefixLen
@ -535,12 +540,6 @@ decodeEntry entryBs = do
NCQIdxEntry off size
{-# INLINE decodeEntry #-}
ncqLocateActually :: MonadUnliftIO m => NCQStorage2 -> HashRef -> m (Maybe Location)
ncqLocateActually ncq href = do
inMem <- ncqLookupEntry ncq href <&> fmap (InMemory . ncqEntryData)
inFo <- listToMaybe <$> ncqSeekInFossils ncq href \loc -> pure (SeekStop [loc])
pure $ inMem <|> inFo
ncqLocate2 :: MonadUnliftIO m => NCQStorage2 -> HashRef -> m (Maybe Location)
ncqLocate2 NCQStorage2{..} href = do
answ <- newEmptyTMVarIO
@ -578,7 +577,6 @@ ncqStorageRun2 ncq@NCQStorage2{..} = flip runContT pure do
debug $ yellow "indexing" <+> pretty fk
idx <- ncqRunTaskNoMatterWhat ncq (ncqIndexFile ncq (DataFile fk))
ncqRunTaskNoMatterWhat ncq $ ncqStateUpdate ncq [F 0 fk]
debug $ "REMOVE ALL SHIT" <+> pretty idx
nwayHashMMapReadOnly idx >>= \case
Nothing -> err $ "can't open index" <+> pretty idx
Just (bs,nway) -> do
@ -1253,22 +1251,36 @@ ncqMergeStep ncq@NCQStorage2{..} = do
debug $ "merge: okay, good to go" <+> pretty (takeFileName mfile)
(mmIdx, nway) <- nwayHashMMapReadOnly fIndexNameA
idxA <- nwayHashMMapReadOnly fIndexNameA
>>= orThrow (NCQMergeInvariantFailed (show $ "can't mmap" <+> pretty fIndexNameA))
idxB <- nwayHashMMapReadOnly fIndexNameB
>>= orThrow (NCQMergeInvariantFailed (show $ "can't mmap" <+> pretty fIndexNameB))
debug $ "SCAN FILE A" <+> pretty fDataNameA
writeFiltered ncq fDataNameA fwh $ \_ _ _ v -> do
-- we write only record from A, that last in index(A) and not meta
writeFiltered ncq fDataNameA fwh $ \o _ k v -> do
let meta = Just M == ncqIsMeta v
pure $ not meta
liftIO (ncqLookupIndex (coerce k) idxA ) >>= \case
Just (NCQIdxEntry o1 _) | o1 == fromIntegral o -> pure $ not meta
_ -> pure $ False
-- we write only record from B, that last in index(B)
-- and not meta and not already written 'A' pass
debug $ "SCAN FILE B" <+> pretty fDataNameA
writeFiltered ncq fDataNameB fwh $ \_ _ k v -> do
writeFiltered ncq fDataNameB fwh $ \o _ k v -> do
let meta = Just M == ncqIsMeta v
foundInA <- liftIO (nwayHashLookup nway mmIdx (coerce k)) <&> isJust
let skip = foundInA || meta
pure $ not skip
foundInA <- liftIO (ncqLookupIndex (coerce k) idxA) <&> isJust
actual <- liftIO (ncqLookupIndex (coerce k) idxB ) >>= \case
Just (NCQIdxEntry o1 _) | o1 == fromIntegral o -> pure $ not meta
_ -> pure $ False
pure $ not ( foundInA || meta || not actual )
appendTailSection =<< handleToFd fwh
@ -1305,15 +1317,30 @@ ncqCompactStep me@NCQStorage2{..} = withSem ncqMergeSem $ flip runContT pure $ c
<&> zip [0 :: Int ..]
<&> IntMap.fromList
for_ (IntMap.elems files) $ \fk -> do
(i,fkA,tombsA) <- lift (findFileA files) >>= maybe (exit ()) pure
let datF = ncqGetFileName me (toFileName (DataFile fk))
dataSize <- liftIO (fileSize datF)
garbage <- lift $ getGargabeSlow fk mempty
let (_,_,rest) = IntMap.splitLookup i files
let realProfit = sum (HM.elems garbage)
let pfl = (realToFrac realProfit / realToFrac dataSize) & realToFrac @_ @(Fixed E6)
notice $ "profit" <+> pretty fk <+> pretty dataSize <+> pretty realProfit <+> pretty pfl
garbage0 <- lift $ getGarbageSlow fkA mempty
-- FIXME: hardcode
(j,fkB,tombsB) <- lift (findClosestAmongst rest (HM.keysSet garbage0) 0.15)
>>= maybe (exit ()) pure
notice $ "found" <+> pretty fkA <+> pretty fkB
-- for_ (IntMap.elems rest) $ \fk -> do
-- let datF = ncqGetFileName me (toFileName (DataFile fk))
-- dataSize <- liftIO (fileSize datF)
-- garbage <- lift $ getGarbageSlow fk tombsA
-- let realProfit = sum (HM.elems garbage)
-- let kUse = realToFrac realProfit / (1 + realToFrac dataSize) :: Fixed E3
-- notice $ "profit" <+> pretty fk <+> pretty dataSize <+> pretty realProfit <+> pretty kUse
-- (aIdx, fileA, nTombs) <- findFileA files >>= maybe (exit ()) pure
@ -1352,9 +1379,24 @@ ncqCompactStep me@NCQStorage2{..} = withSem ncqMergeSem $ flip runContT pure $ c
where
-- findFileA files = lift do
-- tnums <- for (IntMap.toList files) $ \(i, fk) -> (i, fk,) . HS.size <$> (getTombsInIndex =<< viewIndex fk)
-- pure $ listToMaybe ( List.sortOn ( Down . view _3 ) tnums )
findFileA files = do
tnums <- for (IntMap.toList files) $ \(i, fk) -> (i, fk,) <$> (getTombsInIndex =<< viewIndex fk)
pure $ listToMaybe ( List.sortOn ( Down . HS.size . view _3 ) tnums )
findClosestAmongst rest tombs ratio = flip runContT pure $ callCC \exit -> do
for_ (IntMap.toList rest) $ \(i,fk) -> do
let datF = ncqGetFileName me (toFileName (DataFile fk))
dataSize <- liftIO (fileSize datF)
garbage <- lift (getGarbageSlow fk tombs)
let realProfit = sum (HM.elems garbage)
let kUse = realToFrac realProfit / (1 + realToFrac dataSize)
when (kUse >= ratio) $ exit $ Just (i, fk, HM.keysSet garbage)
pure Nothing
viewIndex fk = do
let idxf = ncqGetFileName me $ toFileName (IndexFile fk)
@ -1374,21 +1416,28 @@ ncqCompactStep me@NCQStorage2{..} = withSem ncqMergeSem $ flip runContT pure $ c
when (HS.member (coerce k) tombs) $ S.yield $ let (NCQIdxEntry _ s) = decodeEntry v in s
pure (sum r)
getGargabeSlow :: MonadIO m => FileKey -> HashSet HashRef -> m (HashMap HashRef NCQSize)
getGargabeSlow fk tombs = do
getGarbageSlow :: MonadUnliftIO m => FileKey -> HashSet HashRef -> m (HashMap HashRef NCQSize)
getGarbageSlow fk tombs = do
let datFile = ncqGetFileName me (toFileName $ DataFile fk)
idx <- viewIndex fk
mmaped <- liftIO (mmapFileByteString datFile Nothing)
r <- newTVarIO mempty
runConsumeBS mmaped do
readSections $ \size bs -> do
let k = coerce @_ @HashRef $ fst (ncqEntryUnwrap me (LBS.toStrict bs))
found <- isJust <$> lift (ncqLookupIndex k idx)
let garbage = HS.member k tombs || not found
when garbage $ atomically do
modifyTVar' r (HM.insertWith (+) k (fromIntegral size))
ncqStorageScanDataFile me datFile $ \o s k v -> do
case ncqEntryUnwrapValue me v of
Left bs -> atomically $ modifyTVar' r (HM.insertWith (+) k (fromIntegral s))
Right (t, bs) -> do
ncqLookupIndex k idx >>= \case
Nothing -> do
-- notice $ "not found in index" <+> pretty k
atomically $ modifyTVar' r (HM.insertWith (+) k (fromIntegral s))
Just (NCQIdxEntry oi _) -> do
let garbage = HS.member k tombs || oi /= fromIntegral o
when garbage do
-- notice $ "offset mismatch or tomb" <+> pretty o <+> pretty oi <+> pretty k
when garbage $ atomically do
modifyTVar' r (HM.insertWith (+) k (fromIntegral s))
readTVarIO r

View File

@ -792,17 +792,17 @@ testNCQ2Simple1 syn TestEnv{..} = do
notice $ "merge data"
-- ncqWithStorage ncqDir $ \sto -> liftIO do
-- notice "perform merge"
-- ncqMergeFull sto
-- ncqSweepStates sto
-- ncqSweepFossils sto
ncqWithStorage ncqDir $ \sto -> liftIO do
notice "perform merge"
ncqMergeFull sto
ncqSweepStates sto
ncqSweepFossils sto
-- notice $ "full sweep unused states"
notice $ "full sweep unused states"
-- ncqWithStorage ncqDir $ \sto -> liftIO do
-- ncqSweepStates sto
-- ncqSweepFossils sto
ncqWithStorage ncqDir $ \sto -> liftIO do
ncqSweepStates sto
ncqSweepFossils sto
notice $ "lookup" <+> pretty n <+> "blocks"
@ -1663,17 +1663,30 @@ main = do
notice $ "should be deleted" <+> pretty (HS.size deleted) <+> "/" <+> pretty tnum
t0 <- getTimeCoarse
ncqCompactStep sto
t1 <- getTimeCoarse
let dt = timeSpecDeltaSeconds @(Fixed E6) t0 t1
notice $ "ncqCompactStep time" <+> pretty dt
none
entry $ bindMatch "test:ncq2:del1" $ nil_ $ \syn -> do
runTest $ \TestEnv{..} -> do
g <- liftIO MWC.createSystemRandom
let dir = testEnvDir
let (_, argz) = splitOpts [] syn
let (opts, argz) = splitOpts [("-m",0)] syn
let n = headDef 10000 [ fromIntegral x | LitIntVal x <- argz ]
let merge = or [ True | ListVal [StringLike "-m"] <- opts ]
thashes <- newTVarIO mempty
ncqWithStorage dir $ \sto@NCQStorage2{..} -> do
@ -1692,9 +1705,6 @@ main = do
pure h
pause @'Seconds 5
atomically $ writeTVar thashes (HS.fromList hashes)
flip runContT pure $ callCC \exit -> do
@ -1717,6 +1727,10 @@ main = do
exit ()
when merge do
ncqWithStorage dir \sto -> do
ncqMergeFull sto
ncqWithStorage dir $ \sto -> do
-- notice "check deleted"
hashes <- readTVarIO thashes