HPoqtobDAT implement-create-bundle

This commit is contained in:
Dmitry Zuikov 2023-09-14 11:04:56 +03:00
parent 5ffb44ac49
commit ed279f1d34
7 changed files with 321 additions and 1 deletions

View File

@ -1,2 +1,3 @@
(fixme-set "workflow" "test" "CG2C18TK8v")
(fixme-set "assigned" "HPoqtobDAT" "voidlizard")
(fixme-set "workflow" "test" "HPoqtobDAT")

View File

@ -81,6 +81,7 @@ library
, HBS2.Data.Types.Crypto
, HBS2.Data.Types.Peer
, HBS2.Data.Types.Refs
, HBS2.Data.Bundle
, HBS2.Defaults
, HBS2.Events
, HBS2.Hash
@ -115,6 +116,7 @@ library
, HBS2.Prelude
, HBS2.Prelude.Plated
, HBS2.Storage
, HBS2.Storage.Operations
, HBS2.System.Logger.Simple
, HBS2.System.Logger.Simple.Class
, Dialog.Core
@ -181,6 +183,7 @@ library
, unordered-containers
, unliftio
, unliftio-core
, zlib
hs-source-dirs: lib

View File

@ -0,0 +1,171 @@
{-# Language AllowAmbiguousTypes #-}
{-# Language UndecidableInstances #-}
module HBS2.Data.Bundle where
import HBS2.Prelude
import HBS2.Storage
import HBS2.Storage.Operations
import HBS2.Hash
import HBS2.Data.Types.Refs
import HBS2.Data.Detect
import Data.Word
import Codec.Compression.GZip as GZip
import Codec.Serialise
import Control.Monad
import Control.Monad.Trans.Maybe
import Data.ByteString.Lazy.Char8 (ByteString)
import Data.ByteString.Lazy.Char8 qualified as LBS
import Data.Functor
import Data.List qualified as List
import Data.Either
import Streaming.Prelude qualified as S
import Streaming()
{- HLINT ignore "Use newtype instead of data" -}
-- у нас может быть много способов хранить данные:
-- сжимать целиком (эффективно, но медленно)
-- сжимать по секциям (быстрее, но менее эффективно)
data BundleHead =
BundleHeadSimple
{ bundleHeadSectionSize :: Word16
}
deriving stock (Generic,Show)
instance Serialise BundleHead
bundleHeadSize :: Integral a => a
bundleHeadSize = 64
data BundleSection =
BundleSection
{ _bundleSectionSize :: Word32
, _bundleSectionHash :: Maybe HashRef
}
deriving stock (Generic,Show)
instance Serialise BundleSection
sectionHeadSize :: Integral a => a
sectionHeadSize = 64
-- TODO: implement-create-bundle
-- создаёт bundle в том же самом хранилище,
-- допустим.
-- Возвращает HashRef
-- Если нам нужен сам бандл - всегда сможем
-- сделать hbs2 cat.
-- t HashRef -- это что-то, что даёт нам список хэшей?
-- для каждого хэша надо бы уметь считать
-- его данные, и куда-то их записать в лог.
-- лог может быть очень большим. как его
-- писать-то? допустим, через tmp файл -- тогда
-- довольно прямолинейно, но функция требует
-- файловой системы.
-- можно поблочно, но это прямо довольно-таки сложно
-- и неатомарно. Как же сделать?
-- непрямолинейное решение:
-- пишем в буфер (какой?)
-- как понаписали достат. кол - то что? меркл
-- дерево у нас создаётся через toPtree
createBundle :: ( MonadIO m
, Storage sto HbSync ByteString IO
)
=> sto
-> [HashRef]
-> m (Maybe HashRef)
createBundle sto refs = runMaybeT do
-- читать блок из сторейджа
blocks <- forM refs $ \href -> do
blk <- MaybeT $ liftIO $ getBlock sto (fromHashRef href)
let compressed = compressWith params blk
let size = LBS.length compressed
let section = BundleSection (fromIntegral size) (Just href)
let sbs = serialise section
let pad = sectionHeadSize - LBS.length sbs
let pads = LBS.replicate pad '\x0'
pure (sbs <> pads <> compressed)
let buHead = serialise (BundleHeadSimple sectionHeadSize)
let buPadded = buHead <> LBS.replicate (bundleHeadSize - LBS.length buHead) '\x0'
let blob = buPadded <> mconcat blocks
wtf <- liftIO $ writeAsMerkle sto blob
pure (HashRef wtf)
where
params = defaultCompressParams { compressLevel = bestSpeed }
data ImportError =
ImportBlocksMissed [HashRef]
| ImportBadJournal
deriving stock (Eq,Show)
missedBlocks :: ImportError -> [HashRef]
missedBlocks = \case
ImportBlocksMissed blk -> blk
_ -> mempty
class MonadIO m => ImportBundle bundle m where
importBundle :: forall sto . Storage sto HbSync ByteString m
=> sto
-> ( (Maybe HashRef, ByteString) -> m () )
-> bundle
-> m (Either ImportError ())
instance MonadIO m => ImportBundle HashRef m where
-- можем только целиком считать в память, потом пройтись по секциям и записать объекты
-- по памяти капец, что может быть
importBundle sto action bundle = do
let h = fromHashRef bundle
res <- S.toList_ $
deepScan ScanDeep yieldMissed h (lift . getBlock sto) $ \ha -> do
-- FIXME: annoying-shit
when (ha /= h) do
lift (getBlock sto ha) >>= maybe (yieldMissed ha) (S.yield . Right)
let missed = lefts res
if not (null missed) then
pure $ Left $ ImportBlocksMissed $ foldMap missedBlocks missed
else do
let bss = mconcat (rights res)
let (hs, bs) = LBS.splitAt bundleHeadSize bss
-- FIXME: fix-possible-memory-exhausting
case deserialiseOrFail @BundleHead hs of
Left{} -> pure $ Left ImportBadJournal
Right hd -> go hd bs
where
yieldMissed = S.yield . Left . ImportBlocksMissed . pure . HashRef
go hd bs
| LBS.null bs = pure $ Right ()
| otherwise = do
let ss = bundleHeadSectionSize hd
let (bsh, allBsRest) = LBS.splitAt sectionHeadSize bs
case deserialiseOrFail @BundleSection bsh of
Left{} -> do
pure $ Left ImportBadJournal
Right header -> do
let sz = fromIntegral $ _bundleSectionSize header
let (blk, rest) = LBS.splitAt sz allBsRest
if LBS.length blk /= sz then do
pure $ Left ImportBadJournal
else do
action (_bundleSectionHash header, GZip.decompress blk)
go hd rest

View File

@ -0,0 +1,38 @@
{-# Language AllowAmbiguousTypes #-}
{-# Language UndecidableInstances #-}
module HBS2.Storage.Operations where
import HBS2.Prelude.Plated
import HBS2.Hash
import HBS2.Storage
import HBS2.Merkle
import HBS2.Data.Types.Refs
import HBS2.Defaults
import Streaming.Prelude qualified as S
import Streaming qualified as S
import Data.Functor
import Data.Function
import Data.Bifunctor
import Data.ByteString.Lazy (ByteString)
import Data.ByteString.Lazy qualified as B
class (MonadIO m, Storage storage hash block m) => MerkleWriter block hash storage m where
writeAsMerkle :: storage -> block -> m (Hash hash)
instance (MonadIO m, h ~ HbSync, Storage s h ByteString m) => MerkleWriter ByteString h s m where
writeAsMerkle sto bs = do
hashes <- S.each (B.unpack bs)
& S.chunksOf (fromIntegral defBlockSize)
& S.mapped (fmap (first B.pack) . S.toList)
& S.mapM (\blk -> enqueueBlock sto blk >> pure blk)
& S.map (HashRef . hashObject)
& S.toList_
-- FIXME: handle-hardcode
let pt = toPTree (MaxSize 256) (MaxNum 256) hashes -- FIXME: settings
makeMerkle 0 pt $ \(_,_,bss) -> void $ putBlock sto bss

View File

@ -15,6 +15,7 @@ main =
testCase "testSimpleStorageRandomReadWrite" testSimpleStorageRandomReadWrite
, testCase "testSimpleStorageNoKeys" testSimpleStorageNoKeys
, testCase "testSimpleStorageRefs" testSimpleStorageRefs
, testCase "testSimpleStorageBundles" testSimpleStorageBundles
]

View File

@ -4,8 +4,10 @@ import HBS2.OrDie
import HBS2.Hash
import HBS2.Clock
import HBS2.Prelude.Plated
import HBS2.Data.Types.Refs
import HBS2.Storage
import HBS2.Storage.Simple
import HBS2.Data.Bundle
import Control.Monad.Except
import Control.Monad
@ -23,6 +25,7 @@ import System.FilePath.Posix
import System.IO.Temp
import Test.QuickCheck
import System.TimeIt
import System.IO
import Test.Tasty.HUnit
@ -211,3 +214,44 @@ testSimpleStorageRefs = do
pure ()
testSimpleStorageBundles :: IO ()
testSimpleStorageBundles = do
withSystemTempDirectory "simpleStorageTest" $ \dir -> do
let opts = [ StoragePrefix (dir </> ".storage")
]
storage <- simpleStorageInit opts :: IO (SimpleStorage HbSync)
worker <- async (simpleStorageWorker storage)
link worker
hPrint stderr "HERE I TEST BUNDLES"
-- тут я хочу сгенерить 100 рандомных байтстрок
bss <- generate $ replicateM 100 $ do
n <- choose (1, 1024)
LBS.pack <$> vectorOf n (choose (32, 126 :: Word8))
-- записать их при помощи putBlock
-- сохранить их хэши
hashes <- catMaybes <$> mapM (putBlock storage) bss
-- сделать bundle
bundle <- createBundle storage (fmap HashRef hashes) `orDie` "Can't create bundle"
-- удалить их
mapM_ (delBlock storage) hashes
-- убедиться, что реально удалены
here <- mapM (hasBlock storage) hashes
assertBool "all-blocks-deleted" (null (catMaybes here))
-- импортировать bundle
result <- importBundle storage (void . putBlock storage . snd) bundle
hereWeGoAgain <- mapM (hasBlock storage) hashes
assertBool "all-blocks-here-again" (not (null (catMaybes hereWeGoAgain)))

View File

@ -13,6 +13,7 @@ import HBS2.Net.Proto.RefLog(RefLogKey(..))
import HBS2.Prelude.Plated
import HBS2.Storage.Simple
import HBS2.Storage.Simple.Extra
import HBS2.Data.Bundle
import HBS2.OrDie
@ -410,6 +411,7 @@ main = join . customExecParser (prefs showHelpOnError) $
<> command "show-peer-key" (info pShowPeerKey (progDesc "show peer key from credential file"))
<> command "groupkey-new" (info pNewGroupkey (progDesc "generates a new groupkey"))
<> command "reflog" (info pReflog (progDesc "reflog commands"))
<> command "bundle" (info pBundle(progDesc "bundle commands"))
)
common = do
@ -528,3 +530,63 @@ main = join . customExecParser (prefs showHelpOnError) $
print $ "deleted" <+> pretty d
hFlush stdout
pBundle = hsubparser ( command "create" (info pBundleCreate (progDesc "create bundle"))
<> command "list" (info pBundleList (progDesc "list bundle"))
<> command "import" (info pBundleImport (progDesc "import objects from bundle"))
)
pBundleCreate = do
o <- common
fname <- optional $ strOption (long "file" <> short 'f' <> help "hash list file (plaintext)")
pure $ withStore o $ \sto -> do
handle <- maybe (pure stdin) (flip openFile ReadMode . unOptFile) fname
ls <- hGetContents handle <&> lines
let hashes = mapMaybe (fromStringMay @HashRef) ls
when (length ls /= length hashes) do
die "Invalid hashref found"
bundle <- createBundle sto hashes `orDie` "can't create bundle"
print $ pretty bundle
pBundleImport = do
o <- common
mbHref <- strArgument (metavar "HASHREF")
pure $ withStore o $ \sto -> do
href <- pure (fromStringMay @HashRef mbHref) `orDie` "invalid hashref"
r <- importBundle sto (void . putBlock sto . snd) href
case r of
Right{} -> pure ()
Left e -> die (show e)
pBundleList = do
o <- common
mbHref <- strArgument (metavar "HASHREF")
doCheck <- optional (flag' False ( long "check" <> help "check hashes" )) <&> fromMaybe False
pure $ withStore o $ \sto -> do
href <- pure (fromStringMay @HashRef mbHref) `orDie` "invalid hashref"
r <- importBundle sto (outSection doCheck) href
case r of
Right{} -> pure ()
Left e -> die (show e)
where
outSection :: Bool -> (Maybe HashRef, ByteString) -> IO ()
outSection True x@(Just h, bs) = do
let hh = HashRef $ hashObject @HbSync bs
unless (hh == h) do
die $ "hash mismatch:" <> show (pretty h <+> pretty hh)
printHash x
outSection False x = printHash x
outSection _ x@(Nothing, _) = printHash x
printHash = void . print . pretty . fst