diff --git a/.fixme/log b/.fixme/log index d93a4ec8..bf7ca57e 100644 --- a/.fixme/log +++ b/.fixme/log @@ -1,2 +1,3 @@ -(fixme-set "workflow" "test" "CG2C18TK8v") \ No newline at end of file +(fixme-set "assigned" "HPoqtobDAT" "voidlizard") +(fixme-set "workflow" "test" "HPoqtobDAT") \ No newline at end of file diff --git a/hbs2-core/hbs2-core.cabal b/hbs2-core/hbs2-core.cabal index b337620d..e6c2ddaa 100644 --- a/hbs2-core/hbs2-core.cabal +++ b/hbs2-core/hbs2-core.cabal @@ -81,6 +81,7 @@ library , HBS2.Data.Types.Crypto , HBS2.Data.Types.Peer , HBS2.Data.Types.Refs + , HBS2.Data.Bundle , HBS2.Defaults , HBS2.Events , HBS2.Hash @@ -115,6 +116,7 @@ library , HBS2.Prelude , HBS2.Prelude.Plated , HBS2.Storage + , HBS2.Storage.Operations , HBS2.System.Logger.Simple , HBS2.System.Logger.Simple.Class , Dialog.Core @@ -181,6 +183,7 @@ library , unordered-containers , unliftio , unliftio-core + , zlib hs-source-dirs: lib diff --git a/hbs2-core/lib/HBS2/Data/Bundle.hs b/hbs2-core/lib/HBS2/Data/Bundle.hs new file mode 100644 index 00000000..74261431 --- /dev/null +++ b/hbs2-core/lib/HBS2/Data/Bundle.hs @@ -0,0 +1,171 @@ +{-# Language AllowAmbiguousTypes #-} +{-# Language UndecidableInstances #-} +module HBS2.Data.Bundle where + +import HBS2.Prelude +import HBS2.Storage +import HBS2.Storage.Operations +import HBS2.Hash +import HBS2.Data.Types.Refs +import HBS2.Data.Detect + +import Data.Word + +import Codec.Compression.GZip as GZip +import Codec.Serialise +import Control.Monad +import Control.Monad.Trans.Maybe +import Data.ByteString.Lazy.Char8 (ByteString) +import Data.ByteString.Lazy.Char8 qualified as LBS +import Data.Functor +import Data.List qualified as List +import Data.Either + +import Streaming.Prelude qualified as S +import Streaming() + +{- HLINT ignore "Use newtype instead of data" -} + +-- у нас может быть много способов хранить данные: +-- сжимать целиком (эффективно, но медленно) +-- сжимать по секциям (быстрее, но менее эффективно) +data BundleHead = + BundleHeadSimple + { bundleHeadSectionSize :: Word16 + } + deriving stock (Generic,Show) + +instance Serialise BundleHead + +bundleHeadSize :: Integral a => a +bundleHeadSize = 64 + +data BundleSection = + BundleSection + { _bundleSectionSize :: Word32 + , _bundleSectionHash :: Maybe HashRef + } + deriving stock (Generic,Show) + +instance Serialise BundleSection + +sectionHeadSize :: Integral a => a +sectionHeadSize = 64 + +-- TODO: implement-create-bundle +-- создаёт bundle в том же самом хранилище, +-- допустим. +-- Возвращает HashRef +-- Если нам нужен сам бандл - всегда сможем +-- сделать hbs2 cat. +-- t HashRef -- это что-то, что даёт нам список хэшей? +-- для каждого хэша надо бы уметь считать +-- его данные, и куда-то их записать в лог. +-- лог может быть очень большим. как его +-- писать-то? допустим, через tmp файл -- тогда +-- довольно прямолинейно, но функция требует +-- файловой системы. +-- можно поблочно, но это прямо довольно-таки сложно +-- и неатомарно. Как же сделать? +-- непрямолинейное решение: +-- пишем в буфер (какой?) +-- как понаписали достат. кол - то что? меркл +-- дерево у нас создаётся через toPtree + +createBundle :: ( MonadIO m + , Storage sto HbSync ByteString IO + ) + => sto + -> [HashRef] + -> m (Maybe HashRef) + +createBundle sto refs = runMaybeT do + -- читать блок из сторейджа + blocks <- forM refs $ \href -> do + blk <- MaybeT $ liftIO $ getBlock sto (fromHashRef href) + + let compressed = compressWith params blk + let size = LBS.length compressed + let section = BundleSection (fromIntegral size) (Just href) + + let sbs = serialise section + let pad = sectionHeadSize - LBS.length sbs + let pads = LBS.replicate pad '\x0' + + pure (sbs <> pads <> compressed) + + let buHead = serialise (BundleHeadSimple sectionHeadSize) + let buPadded = buHead <> LBS.replicate (bundleHeadSize - LBS.length buHead) '\x0' + + let blob = buPadded <> mconcat blocks + + wtf <- liftIO $ writeAsMerkle sto blob + + pure (HashRef wtf) + + where + params = defaultCompressParams { compressLevel = bestSpeed } + +data ImportError = + ImportBlocksMissed [HashRef] + | ImportBadJournal + deriving stock (Eq,Show) + +missedBlocks :: ImportError -> [HashRef] +missedBlocks = \case + ImportBlocksMissed blk -> blk + _ -> mempty + +class MonadIO m => ImportBundle bundle m where + importBundle :: forall sto . Storage sto HbSync ByteString m + => sto + -> ( (Maybe HashRef, ByteString) -> m () ) + -> bundle + -> m (Either ImportError ()) + + +instance MonadIO m => ImportBundle HashRef m where + -- можем только целиком считать в память, потом пройтись по секциям и записать объекты + -- по памяти капец, что может быть + importBundle sto action bundle = do + let h = fromHashRef bundle + + res <- S.toList_ $ + deepScan ScanDeep yieldMissed h (lift . getBlock sto) $ \ha -> do + -- FIXME: annoying-shit + when (ha /= h) do + lift (getBlock sto ha) >>= maybe (yieldMissed ha) (S.yield . Right) + + let missed = lefts res + + if not (null missed) then + pure $ Left $ ImportBlocksMissed $ foldMap missedBlocks missed + else do + let bss = mconcat (rights res) + let (hs, bs) = LBS.splitAt bundleHeadSize bss + -- FIXME: fix-possible-memory-exhausting + case deserialiseOrFail @BundleHead hs of + Left{} -> pure $ Left ImportBadJournal + Right hd -> go hd bs + + where + yieldMissed = S.yield . Left . ImportBlocksMissed . pure . HashRef + + go hd bs + | LBS.null bs = pure $ Right () + | otherwise = do + let ss = bundleHeadSectionSize hd + let (bsh, allBsRest) = LBS.splitAt sectionHeadSize bs + case deserialiseOrFail @BundleSection bsh of + Left{} -> do + pure $ Left ImportBadJournal + + Right header -> do + let sz = fromIntegral $ _bundleSectionSize header + let (blk, rest) = LBS.splitAt sz allBsRest + if LBS.length blk /= sz then do + pure $ Left ImportBadJournal + else do + action (_bundleSectionHash header, GZip.decompress blk) + go hd rest + diff --git a/hbs2-core/lib/HBS2/Storage/Operations.hs b/hbs2-core/lib/HBS2/Storage/Operations.hs new file mode 100644 index 00000000..2d746cd1 --- /dev/null +++ b/hbs2-core/lib/HBS2/Storage/Operations.hs @@ -0,0 +1,38 @@ +{-# Language AllowAmbiguousTypes #-} +{-# Language UndecidableInstances #-} +module HBS2.Storage.Operations where + +import HBS2.Prelude.Plated +import HBS2.Hash +import HBS2.Storage +import HBS2.Merkle +import HBS2.Data.Types.Refs +import HBS2.Defaults + +import Streaming.Prelude qualified as S +import Streaming qualified as S +import Data.Functor +import Data.Function + +import Data.Bifunctor +import Data.ByteString.Lazy (ByteString) +import Data.ByteString.Lazy qualified as B + + +class (MonadIO m, Storage storage hash block m) => MerkleWriter block hash storage m where + writeAsMerkle :: storage -> block -> m (Hash hash) + +instance (MonadIO m, h ~ HbSync, Storage s h ByteString m) => MerkleWriter ByteString h s m where + writeAsMerkle sto bs = do + + hashes <- S.each (B.unpack bs) + & S.chunksOf (fromIntegral defBlockSize) + & S.mapped (fmap (first B.pack) . S.toList) + & S.mapM (\blk -> enqueueBlock sto blk >> pure blk) + & S.map (HashRef . hashObject) + & S.toList_ + + -- FIXME: handle-hardcode + let pt = toPTree (MaxSize 256) (MaxNum 256) hashes -- FIXME: settings + makeMerkle 0 pt $ \(_,_,bss) -> void $ putBlock sto bss + diff --git a/hbs2-storage-simple/test/Main.hs b/hbs2-storage-simple/test/Main.hs index c98199d2..c820789a 100644 --- a/hbs2-storage-simple/test/Main.hs +++ b/hbs2-storage-simple/test/Main.hs @@ -15,6 +15,7 @@ main = testCase "testSimpleStorageRandomReadWrite" testSimpleStorageRandomReadWrite , testCase "testSimpleStorageNoKeys" testSimpleStorageNoKeys , testCase "testSimpleStorageRefs" testSimpleStorageRefs + , testCase "testSimpleStorageBundles" testSimpleStorageBundles ] diff --git a/hbs2-storage-simple/test/TestSimpleStorage.hs b/hbs2-storage-simple/test/TestSimpleStorage.hs index fadda6e4..f4c97dcc 100644 --- a/hbs2-storage-simple/test/TestSimpleStorage.hs +++ b/hbs2-storage-simple/test/TestSimpleStorage.hs @@ -4,8 +4,10 @@ import HBS2.OrDie import HBS2.Hash import HBS2.Clock import HBS2.Prelude.Plated +import HBS2.Data.Types.Refs import HBS2.Storage import HBS2.Storage.Simple +import HBS2.Data.Bundle import Control.Monad.Except import Control.Monad @@ -23,6 +25,7 @@ import System.FilePath.Posix import System.IO.Temp import Test.QuickCheck import System.TimeIt +import System.IO import Test.Tasty.HUnit @@ -211,3 +214,44 @@ testSimpleStorageRefs = do pure () +testSimpleStorageBundles :: IO () +testSimpleStorageBundles = do + withSystemTempDirectory "simpleStorageTest" $ \dir -> do + + let opts = [ StoragePrefix (dir ".storage") + ] + + storage <- simpleStorageInit opts :: IO (SimpleStorage HbSync) + + worker <- async (simpleStorageWorker storage) + + link worker + + hPrint stderr "HERE I TEST BUNDLES" + + -- тут я хочу сгенерить 100 рандомных байтстрок + bss <- generate $ replicateM 100 $ do + n <- choose (1, 1024) + LBS.pack <$> vectorOf n (choose (32, 126 :: Word8)) + + -- записать их при помощи putBlock + -- сохранить их хэши + hashes <- catMaybes <$> mapM (putBlock storage) bss + + -- сделать bundle + bundle <- createBundle storage (fmap HashRef hashes) `orDie` "Can't create bundle" + + -- удалить их + mapM_ (delBlock storage) hashes + + -- убедиться, что реально удалены + here <- mapM (hasBlock storage) hashes + assertBool "all-blocks-deleted" (null (catMaybes here)) + + -- импортировать bundle + result <- importBundle storage (void . putBlock storage . snd) bundle + + hereWeGoAgain <- mapM (hasBlock storage) hashes + assertBool "all-blocks-here-again" (not (null (catMaybes hereWeGoAgain))) + + diff --git a/hbs2/Main.hs b/hbs2/Main.hs index afd5d721..7537fe9d 100644 --- a/hbs2/Main.hs +++ b/hbs2/Main.hs @@ -13,6 +13,7 @@ import HBS2.Net.Proto.RefLog(RefLogKey(..)) import HBS2.Prelude.Plated import HBS2.Storage.Simple import HBS2.Storage.Simple.Extra +import HBS2.Data.Bundle import HBS2.OrDie @@ -410,6 +411,7 @@ main = join . customExecParser (prefs showHelpOnError) $ <> command "show-peer-key" (info pShowPeerKey (progDesc "show peer key from credential file")) <> command "groupkey-new" (info pNewGroupkey (progDesc "generates a new groupkey")) <> command "reflog" (info pReflog (progDesc "reflog commands")) + <> command "bundle" (info pBundle(progDesc "bundle commands")) ) common = do @@ -528,3 +530,63 @@ main = join . customExecParser (prefs showHelpOnError) $ print $ "deleted" <+> pretty d hFlush stdout + pBundle = hsubparser ( command "create" (info pBundleCreate (progDesc "create bundle")) + <> command "list" (info pBundleList (progDesc "list bundle")) + <> command "import" (info pBundleImport (progDesc "import objects from bundle")) + ) + + pBundleCreate = do + o <- common + fname <- optional $ strOption (long "file" <> short 'f' <> help "hash list file (plaintext)") + pure $ withStore o $ \sto -> do + handle <- maybe (pure stdin) (flip openFile ReadMode . unOptFile) fname + + ls <- hGetContents handle <&> lines + let hashes = mapMaybe (fromStringMay @HashRef) ls + + when (length ls /= length hashes) do + die "Invalid hashref found" + + bundle <- createBundle sto hashes `orDie` "can't create bundle" + + print $ pretty bundle + + + pBundleImport = do + o <- common + mbHref <- strArgument (metavar "HASHREF") + pure $ withStore o $ \sto -> do + href <- pure (fromStringMay @HashRef mbHref) `orDie` "invalid hashref" + r <- importBundle sto (void . putBlock sto . snd) href + case r of + Right{} -> pure () + Left e -> die (show e) + + pBundleList = do + o <- common + mbHref <- strArgument (metavar "HASHREF") + doCheck <- optional (flag' False ( long "check" <> help "check hashes" )) <&> fromMaybe False + pure $ withStore o $ \sto -> do + href <- pure (fromStringMay @HashRef mbHref) `orDie` "invalid hashref" + r <- importBundle sto (outSection doCheck) href + case r of + Right{} -> pure () + Left e -> die (show e) + + where + outSection :: Bool -> (Maybe HashRef, ByteString) -> IO () + + outSection True x@(Just h, bs) = do + let hh = HashRef $ hashObject @HbSync bs + + unless (hh == h) do + die $ "hash mismatch:" <> show (pretty h <+> pretty hh) + + printHash x + + outSection False x = printHash x + + outSection _ x@(Nothing, _) = printHash x + + printHash = void . print . pretty . fst +