hbs2/hbs2-storage-simple/lib/HBS2/Storage/Compact.hs

446 lines
13 KiB
Haskell

{-# LANGUAGE PatternSynonyms #-}
{-# Language ViewPatterns #-}
module HBS2.Storage.Compact where
import Data.Word
import Data.ByteString (ByteString)
import Data.ByteString qualified as BS
import Data.ByteString.Lazy qualified as LBS
import Data.ByteString.Builder as B
import Data.Binary.Get
import Data.Coerce
import Data.Function
import Data.List qualified as List
import Data.Maybe
import Data.Map (Map)
import Data.Map qualified as Map
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HM
import Data.Foldable
import Data.Traversable
import Data.Vector (Vector,(!))
import Data.Vector qualified as V
import Codec.Serialise
import GHC.Generics
-- import System.IO
import Lens.Micro.Platform
import Control.Monad.Except
import Control.Monad.Trans.Maybe
import Control.Monad
import Control.Concurrent.STM.TSem
import Safe
import UnliftIO
import Debug.Trace
-- compact storage
-- for the off-tree data representation
-- may be it will be faster, than Simple storage
-- who knows
newtype EntryOffset = EntryOffset Word64
deriving newtype (Ord,Eq,Num,Enum,Real,Integral,Show)
deriving stock Generic
newtype FwdEntryOffset = FwdEntryOffset Word64
deriving newtype (Ord,Eq,Num,Enum,Real,Integral,Show)
deriving stock Generic
newtype EntrySize = EntrySize Word64
deriving newtype (Ord,Eq,Num,Enum,Real,Integral,Show)
deriving stock Generic
newtype EntryNum = EntryNum Word32
deriving newtype (Ord,Eq,Num,Enum,Real,Integral,Show)
deriving stock Generic
data IndexEntry =
IndexEntry
{ idxEntryOffset :: !EntryOffset
, idxEntrySize :: !EntrySize
, idxEntrySeq :: !Word64
, idxEntryTomb :: !Bool
, idxEntryKey :: !ByteString
}
deriving stock (Show,Generic)
instance Serialise EntryOffset
instance Serialise EntrySize
instance Serialise EntryNum
instance Serialise IndexEntry
data Header =
Header
{ hdrMagic :: Word16
, hdrVersion :: Word16
, hdrFwdOffset :: FwdEntryOffset
, hdrIndexOffset :: EntryOffset
, hdrIndexEntries :: EntryNum
, hdrPrev :: EntryOffset
}
deriving stock (Show,Generic)
data E = New ByteString
| Off IndexEntry
| Del IndexEntry
data Entry = Entry Integer E
pattern Fresh :: Entry -> Entry
pattern Fresh e <- e@(Entry _ ( isFresh -> True ))
pattern Tomb :: Entry -> Entry
pattern Tomb e <- e@(Entry _ ( isTomb -> True ))
isTomb :: E -> Bool
isTomb (Off e) = idxEntryTomb e
isTomb _ = False
isFresh :: E -> Bool
isFresh e = case e of
New{} -> True
Del{} -> True
_ -> False
data CompactStorage =
CompactStorage
{ csHandle :: MVar Handle
, csHeaderOff :: IORef EntryOffset
, csSeq :: TVar Integer
, csKeys :: Vector (TVar (HashMap ByteString Entry))
}
type ForCompactStorage m = MonadIO m
data CompactStorageOpenOpt
data CompactStorageOpenError =
InvalidHeader
| BrokenIndex
| InvalidFwdSection
deriving stock (Typeable,Show)
instance Exception CompactStorageOpenError
buckets :: Int
buckets = 8
-- FIXME: buckets-hardcode
getKeyPrefix :: ByteString -> Int
getKeyPrefix bs = maybe 0 (fromIntegral.fst) (BS.uncons bs) `mod` buckets
{-# INLINE getKeyPrefix #-}
compactStorageOpen :: ForCompactStorage m
=> [CompactStorageOpenOpt]
-> FilePath
-> m CompactStorage
compactStorageOpen _ fp = do
ha <- openFile fp ReadWriteMode
sz <- hFileSize ha
mha <- newMVar ha
hoff0 <- newIORef 0
keys0 <- replicateM buckets (newTVarIO mempty) <&> V.fromList
-- ss <- newIORef 0
ss <- newTVarIO 0
if sz == 0 then
pure $ CompactStorage mha hoff0 ss keys0
else do
(p,header) <- readHeader mha Nothing >>= maybe (throwIO InvalidHeader) pure
hoff <- newIORef p
let sto = CompactStorage mha hoff ss keys0
readIndex sto (hdrIndexOffset header) (hdrIndexEntries header)
flip fix (hdrPrev header) $ \next -> \case
0 -> pure ()
off -> do
(_,pHeader) <- readHeader mha (Just off) >>= maybe (throwIO InvalidHeader) pure
readIndex sto (hdrIndexOffset pHeader) (hdrIndexEntries pHeader)
next (hdrPrev pHeader)
pure sto
readIndex :: ForCompactStorage m
=> CompactStorage
-> EntryOffset
-> EntryNum
-> m ()
readIndex sto offset num = liftIO do
withMVar (csHandle sto) $ \ha -> do
hSeek ha AbsoluteSeek (fromIntegral offset)
(rn,entries) <- flip fix (num, mempty, 0) $ \next left -> do
case left of
(0,acc,n) -> pure (n,acc)
(n,acc,rn) -> do
what <- runMaybeT do
slen <- liftIO (try @_ @IOException (LBS.hGet ha 2))
<&> either (const Nothing) Just
& MaybeT
len <- either (const Nothing) (Just . view _3) (runGetOrFail getWord16be slen)
& MaybeT . pure
sIdx <- liftIO (try @_ @IOException (LBS.hGet ha (fromIntegral len)))
>>= either (const mzero) pure
deserialiseOrFail @IndexEntry sIdx
& either (const mzero) pure
case what of
Nothing -> pure (0,mempty :: [IndexEntry])
Just idx -> next (pred n, idx : acc, succ rn)
when (rn /= num) do
throwIO BrokenIndex
let new = [ (idxEntryKey e,Entry 0 (Off e)) | e <- entries ]
-- readIndex from newer to older
-- so we keep only the newer values in map
atomically do
for_ new $ \(k,v) -> do
let tv = csKeys sto ! getKeyPrefix k
modifyTVar tv (HM.insertWith (\_ o -> o) k v)
compactStorageCommit :: ForCompactStorage m => CompactStorage -> m ()
compactStorageCommit sto = liftIO do
withMVar (csHandle sto) $ \ha -> do
hSeek ha SeekFromEnd 0
kv <- atomically do
mapM readTVar (csKeys sto) <&> mconcat . V.toList . fmap HM.toList
let items = [ (k, v) | (k, v@Fresh{}) <- kv ]
unless (List.null items) do
-- write fwd
offFwd <- hTell ha
LBS.hPut ha (toLazyByteString $ word64BE 0)
let off0 = offFwd + 8
-- write data
idxEntries <- flip fix (off0, items, mempty) $ \next (off, what, idx) -> do
case what of
[] -> pure idx
((_,Entry i (Del e)):rest) | not (idxEntryTomb e) -> do
next (off + 0, rest, (e { idxEntryTomb = True },i) : idx)
((k,Entry i (New v)):rest) -> do
BS.hPut ha v
let sz = fromIntegral $ BS.length v
next (off + sz, rest, (IndexEntry (fromIntegral off) (fromIntegral sz) 0 False k,i) : idx)
((_,Entry _ _):rest) -> do
next (off + 0, rest, idx)
offIdx0 <- hTell ha <&> fromIntegral
-- write index
for_ idxEntries $ \(e,_) -> do
let lbs = serialise e
LBS.hPut ha (B.toLazyByteString $
word16BE (fromIntegral $ LBS.length lbs)
<> B.lazyByteString lbs)
offPrev <- readIORef (csHeaderOff sto)
offCommitHead <- hTell ha
-- FIXME: maybe-slow-length-calc
appendHeader ha (fromIntegral offFwd) (Just offPrev) offIdx0 (fromIntegral $ length idxEntries)
hSeek ha AbsoluteSeek offFwd
LBS.hPut ha (toLazyByteString $ word64BE (fromIntegral offCommitHead))
hFlush ha
hSeek ha SeekFromEnd 0
offLast <- hTell ha <&> fromIntegral
-- atomically do
atomicWriteIORef (csHeaderOff sto) (offLast - headerSize 1)
atomically do
for_ idxEntries $ \(e,i) -> do
let k = idxEntryKey e
let tv = csKeys sto ! getKeyPrefix k
modifyTVar tv (HM.alter (doAlter (Entry i (Off e))) k)
where
doAlter y@(Entry i (Off e)) v0 = case v0 of
-- deleted-during-commit
Nothing -> Just (Entry i (Del e))
Just x | getSeq x > getSeq y -> Just x
| otherwise -> Just y
doAlter _ v = v
getSeq = \case
Entry i _ -> i
compactStorageDel :: ForCompactStorage m => CompactStorage -> ByteString -> m ()
compactStorageDel sto key = do
let tvar = csKeys sto ! getKeyPrefix key
val <- readTVarIO tvar <&> HM.lookup key
case val of
Nothing -> pure ()
Just (Entry i (Del _)) -> pure ()
Just (Entry _ (Off e)) -> do
atomically do
j <- newSequenceSTM sto
modifyTVar tvar (HM.insert key (Entry j (Del e)))
Just (Entry i (New v)) -> do
-- FIXME: if-commit-in-progress-then-put-tomb
atomically $ modifyTVar tvar (HM.delete key)
newSequenceSTM :: CompactStorage -> STM Integer
newSequenceSTM sto = stateTVar (csSeq sto) (\n -> (n+1,n))
compactStoragePut :: ForCompactStorage m => CompactStorage -> ByteString -> ByteString -> m ()
compactStoragePut sto k v = do
-- TODO: ASAP-do-not-write-value-if-not-changed
let tvar = csKeys sto ! getKeyPrefix k
atomically $ do
c <- newSequenceSTM sto
modifyTVar tvar (HM.insert k (Entry c (New v)))
compactStorageGet :: ForCompactStorage m => CompactStorage -> ByteString -> m (Maybe ByteString)
compactStorageGet sto key = do
let tvar = csKeys sto ! getKeyPrefix key
val <- readTVarIO tvar <&> HM.lookup key
case val of
Nothing -> pure Nothing
Just (Tomb{}) -> pure Nothing
Just (Entry _ (Del _)) -> pure Nothing
Just (Entry _ (New s)) -> pure (Just s)
Just (Entry _ (Off e)) -> liftIO do
r <- withMVar (csHandle sto) $ \ha -> do
try @_ @IOException do
hSeek ha AbsoluteSeek (fromIntegral $ idxEntryOffset e)
BS.hGet ha (fromIntegral $ idxEntrySize e)
either throwIO (pure . Just) r
compactStorageClose :: ForCompactStorage m => CompactStorage -> m ()
compactStorageClose sto = do
compactStorageCommit sto
-- FIXME: hangs-forever-on-io-exception
liftIO $ withMVar (csHandle sto) hClose
compactStorageFindLiveHeads :: ForCompactStorage m
=> FilePath
-> m [(EntryOffset, Header)]
compactStorageFindLiveHeads path = liftIO do
withFile path ReadMode $ \ha -> do
mv <- newMVar ha
flip fix (mempty :: [(EntryOffset, Header)]) $ \next acc -> do
what <- runMaybeT do
fwdOff <- hTell ha
-- fwd section
fwd <- lift (LBS.hGet ha 8)
<&> runGetOrFail getWord64be
>>= either (const mzero) pure
<&> view _3
h@(o,header) <- MaybeT $ readHeader mv (Just $ fromIntegral fwd)
let magicOk = hdrMagic header == headerMagic
let fwdOk = hdrFwdOffset header == fromIntegral fwdOff
if magicOk && fwdOk then
pure h
else
mzero
maybe (pure acc) (\h -> next ( h : acc) ) what
appendHeader :: ForCompactStorage m
=> Handle
-> FwdEntryOffset -- fwd section offset
-> Maybe EntryOffset -- prev. header
-> EntryOffset
-> EntryNum
-> m ()
appendHeader ha fwdOff poffset ioffset num = do
let bs = word16BE headerMagic -- 2
<> word16BE headerVersion -- 4
<> word64BE (coerce fwdOff) -- 12
<> word64BE (coerce ioffset) -- 20
<> word32BE (coerce num) -- 24
<> word64BE (coerce $ fromMaybe 0 poffset) -- 32
liftIO $ LBS.hPut ha (B.toLazyByteString bs)
readHeader :: ForCompactStorage m
=> MVar Handle
-> Maybe EntryOffset
-> m (Maybe (EntryOffset, Header))
readHeader mha moff = do
(off,bs) <- liftIO $ withMVar mha $ \ha -> do
case moff of
Nothing -> do
hSeek ha SeekFromEnd (negate $ headerSize 1)
Just off -> do
hSeek ha AbsoluteSeek (fromIntegral off)
p <- hTell ha <&> fromIntegral
(p,) <$> LBS.hGet ha (headerSize 1)
let what = flip runGetOrFail bs do
Header <$> getWord16be
<*> getWord16be
<*> getFwdOffset
<*> getOffset
<*> getNum
<*> getOffset
pure $ either (const Nothing) (fmap (off,) . Just . view _3) what
where
getOffset = EntryOffset <$> getWord64be
getNum = EntryNum <$> getWord32be
getFwdOffset = FwdEntryOffset <$> getWord64be
headerMagic :: Word16
headerMagic = 32264
headerVersion :: Word16
headerVersion = 1
headerSize :: Integral a => Word16 -> a
headerSize 1 = fromIntegral (32 :: Integer)
headerSize _ = error "unsupported header version"