hbs2/hbs2-storage-ncq/lib/HBS2/Storage/NCQ3/Internal/Index.hs

216 lines
6.5 KiB
Haskell

module HBS2.Storage.NCQ3.Internal.Index where
import HBS2.Storage.NCQ3.Internal.Prelude
import HBS2.Storage.NCQ3.Internal.Types
import HBS2.Storage.NCQ3.Internal.State
import HBS2.Storage.NCQ3.Internal.Memtable
import HBS2.Storage.NCQ3.Internal.Files
import System.Posix.Files qualified as PFS
import Streaming.Prelude qualified as S
import Network.ByteOrder qualified as N
import Control.Monad.Trans.Cont
import Control.Monad.Trans.Maybe
import Data.ByteString qualified as BS
import System.IO.MMap
import System.IO.Temp as Temp
import Streaming.Prelude qualified as S
data IndexEntry = IndexEntry {-# UNPACK #-} !FileKey !Word64 !Word32
deriving stock (Eq,Show)
unpackIndexEntry :: ByteString -> IndexEntry
unpackIndexEntry entryBs = do
let (fks,rest1) = BS.splitAt 4 entryBs
let (offs,rest2) = BS.splitAt 8 rest1
let ss = BS.take 4 rest2
let fk = FileKey (N.word32 fks)
let off = N.word64 offs
let size = N.word32 ss
IndexEntry fk off size
{-# INLINE unpackIndexEntry #-}
emptyKey :: ByteString
emptyKey = BS.replicate 32 0
ncqIndexAlloc :: NWayHashAlloc
ncqIndexAlloc = nwayAllocDef 1.10 32 8 16
ncqIndexAllocForMerge :: NWayHashAlloc
ncqIndexAllocForMerge = nwayAllocDef 0.8 32 8 16
ncqLookupIndex :: MonadUnliftIO m
=> HashRef
-> (ByteString, NWayHash)
-> m (Maybe IndexEntry )
ncqLookupIndex hx (mmaped, nway) = do
fmap unpackIndexEntry <$> nwayHashLookup nway mmaped (coerce hx)
{-# INLINE ncqLookupIndex #-}
ncqLocate_ :: MonadUnliftIO m => Bool -> NCQStorage3 -> HashRef -> m (Maybe Location)
ncqLocate_ f me@NCQStorage3{..} href = ncqOperation me (pure Nothing) do
answ <- newEmptyTMVarIO
atomically do
when f $ modifyTVar ncqWrites succ
writeTQueue ncqReadReq (href, answ)
atomically $ takeTMVar answ
ncqLocate :: MonadUnliftIO m => NCQStorage3 -> HashRef -> m (Maybe Location)
ncqLocate me href = ncqOperation me (pure Nothing) do
ncqLocate_ True me href
ncqIndexFile :: MonadUnliftIO m => NCQStorage3 -> DataFile FileKey -> m (Maybe FilePath)
ncqIndexFile n fk = runMaybeT do
let fp = toFileName fk & ncqGetFileName n
fki <- ncqGetNewFileKey n IndexFile
let dest = ncqGetFileName n (IndexFile fki)
debug $ "INDEX" <+> pretty fp <+> pretty dest
items <- S.toList_ do
ncqStorageScanDataFile n fp $ \offset w key s -> case ncqIsMeta s of
Just M -> none
_ -> do
-- we need size in order to return block size faster
-- w/o search in fossil
let fks = N.bytestring32 (coerce fk)
let rs = (w + ncqSLen) & fromIntegral @_ @Word32 & N.bytestring32
let os = fromIntegral @_ @Word64 offset & N.bytestring64
let record = fks <> os <> rs
-- debug $ "WRITE INDEX ENTRY" <+> pretty (BS.length record)
S.yield (coerce key, record)
let (dir,name) = splitFileName fp
let idxTemp = (dropExtension name <> "-") `addExtension` ".cq$"
result <- lift $ nwayWriteBatch ncqIndexAlloc dir idxTemp items
mv result dest
stat <- liftIO $ PFS.getFileStatus dest
let ts = PFS.modificationTimeHiRes stat
midx <- liftIO (nwayHashMMapReadOnly dest)
unless (isJust midx) do
err $ "can't mmap index" <+> pretty dest
ncqStateUpdate n do
ncqStateAddIndexFile ts fki
ncqStateAddDataFile (coerce fk)
ncqStateDelFact (P (PData fk 0))
(bs,nw) <- toMPlus midx
nwayHashScanAll nw bs $ \_ k _ -> do
unless (k == emptyKey) $ atomically $ void $ runMaybeT do
NCQEntry _ tfk <- MaybeT $ ncqLookupEntrySTM n (coerce k)
fk' <- MaybeT $ readTVar tfk
guard (coerce fk == fk') -- remove only own stuff
lift $ ncqAlterEntrySTM n (coerce k) (const Nothing)
pure dest
{-HLINT ignore "Functor law"-}
ncqIndexCompactFull :: MonadUnliftIO m
=> NCQStorage3
-> m ()
ncqIndexCompactFull ncq = fix \again ->
ncqIndexCompactStep ncq >>= \case
True -> again
False -> none
ncqIndexCompactStep :: MonadUnliftIO m
=> NCQStorage3
-> m Bool
ncqIndexCompactStep me@NCQStorage3{..} = withSem ncqServiceSem $ flip runContT pure $ callCC \exit -> do
debug "ncqIndexCompactStep"
idx <- readTVarIO ncqState
<&> fmap (IndexFile . snd) . ncqStateIndex
r' <- lift $ ncqFindMinPairOf me idx
(_, a, b) <- ContT $ maybe1 r' (pure False)
let idx1Name = ncqGetFileName me a
let idx2Name = ncqGetFileName me b
(bs1, nw1) <- lift (nwayHashMMapReadOnly idx1Name) >>= \case
Nothing -> err ("missed file" <+> pretty idx1Name) >> exit False
Just e -> pure e
(bs2, nw2) <- lift (nwayHashMMapReadOnly idx2Name) >>= \case
Nothing -> err ("missed file" <+> pretty idx2Name) >> exit False
Just e -> pure e
e <- S.toList_ do
nwayHashScanAll nw1 bs1 $ \_ k v -> unless (k == emptyKey) do
S.yield (k,v)
nwayHashScanAll nw2 bs2 \_ k v -> unless (k == emptyKey) do
r <- liftIO (nwayHashLookup nw1 bs1 k)
unless (isJust r) do
S.yield (k,v)
let dir = ncqGetWorkDir me
ts <- liftIO (PFS.getFileStatus idx1Name) <&> PFS.modificationTimeHiRes
-- result <- lift $ nwayWriteBatch ncqIndexAllocForMerge dir "merged-.cq$" e
result <- lift $ nwayWriteBatch ncqIndexAlloc dir "merged-.cq$" e
liftIO $ PFS.setFileTimesHiRes result ts ts
fki <- ncqGetNewFileKey me IndexFile
mv result (ncqGetFileName me (IndexFile fki))
debug $ "state update" <+> pretty a <+> pretty b <+> "=>" <+> pretty fki
ncqStateUpdate me do
ncqStateDelIndexFile (coerce a)
ncqStateDelIndexFile (coerce b)
ncqStateAddIndexFile ts fki
pure True
ncqStorageScanDataFile :: MonadIO m
=> NCQStorage3
-> FilePath
-> ( Integer -> Integer -> HashRef -> ByteString -> m () )
-> m ()
ncqStorageScanDataFile ncq fp' action = do
let fp = ncqGetFileName ncq fp'
mmaped <- liftIO (mmapFileByteString fp Nothing)
flip runContT pure $ callCC \exit -> do
flip fix (0,mmaped) $ \next (o,bs) -> do
when (BS.length bs < ncqSLen) $ exit ()
let w = BS.take ncqSLen bs & N.word32 & fromIntegral
when (BS.length bs < ncqSLen + w) $ exit ()
let kv = BS.drop ncqSLen bs
let k = BS.take ncqKeyLen kv & coerce @_ @HashRef
let v = BS.take (ncqFullDataLen (NCQFullRecordLen w)) $ BS.drop ncqKeyLen kv
lift (action o (fromIntegral w) k v)
next (ncqSLen + o + fromIntegral w, BS.drop (w+ncqSLen) bs)