This commit is contained in:
voidlizard 2025-01-01 10:46:10 +03:00
parent 2dd26b3050
commit 98be2b056b
5 changed files with 192 additions and 159 deletions

View File

@ -10,6 +10,7 @@
module Main where module Main where
import HBS2.Git3.Prelude import HBS2.Git3.Prelude
import HBS2.Git3.State.Index
import HBS2.Peer.CLI.Detect import HBS2.Peer.CLI.Detect
import HBS2.Peer.RPC.API.LWWRef import HBS2.Peer.RPC.API.LWWRef
@ -306,29 +307,6 @@ readCommitChainHPSQ filt _ h0 action = flip runContT pure $ callCC \_ -> do
Nothing -> (a, Just (n, HS.fromList p)) Nothing -> (a, Just (n, HS.fromList p))
Just (l,s) -> (a, Just (min l n, s <> HS.fromList p)) Just (l,s) -> (a, Just (min l n, s <> HS.fromList p))
readLogFileLBS :: forall opts m . ( MonadIO m, ReadLogOpts opts, BytesReader m )
=> opts
-> ( GitHash -> Int -> ByteString -> m () )
-> m Int
readLogFileLBS _ action = flip fix 0 \go n -> do
done <- noBytesLeft
if done then pure n
else do
ssize <- readBytesMaybe 4
>>= orThrow SomeReadLogError
<&> fromIntegral . N.word32 . LBS.toStrict
hash <- readBytesMaybe 20
>>= orThrow SomeReadLogError
<&> GitHash . BS.copy . LBS.toStrict
sdata <- readBytesMaybe ( ssize - 20 )
>>= orThrow SomeReadLogError
void $ action hash (fromIntegral ssize) sdata
go (succ n)
readIndexFromFile :: forall m . MonadIO m readIndexFromFile :: forall m . MonadIO m
=> FilePath => FilePath
@ -747,22 +725,24 @@ theDict = do
_ -> throwIO (BadFormException @C nil) _ -> throwIO (BadFormException @C nil)
entry $ bindMatch "test:git:log:index:flat:search:binary" $ nil_ $ \syn -> lift do entry $ bindMatch "reflog:index:search" $ nil_ $ \syn -> lift $ flip runContT pure do
let (_, argz) = splitOpts [] syn let (_, argz) = splitOpts [] syn
let argzz = [ x | StringLike x <- argz ] hash <- headMay [ x | GitHashLike x <- argz ] & orThrowUser "need sha1"
hash <- headMay argzz rq <- newTQueueIO
>>= fromStringMay @GitHash
& orThrowUser "no hash specified"
idxName <- headMay (tail argzz) ContT $ withAsync $ startReflogIndexQueryQueue rq
& orThrowUser "no index specified"
file <- liftIO $ mmapFileByteString idxName Nothing answ_ <- newEmptyTMVarIO
r <- liftIO $ binarySearchBS 56 (BS.take 20 . BS.drop 4) (coerce hash) file
liftIO $ print $ pretty r atomically $ writeTQueue rq (hash,answ_)
answ <- atomically $ readTMVar answ_
for_ answ $ \a -> do
liftIO $ print $ pretty a
entry $ bindMatch "test:git:log:index:flat:search:linear:test" $ nil_ $ \case entry $ bindMatch "test:git:log:index:flat:search:linear:test" $ nil_ $ \case
[ StringLike fn ] -> do [ StringLike fn ] -> do
@ -1052,72 +1032,19 @@ theDict = do
mergeSortedFilesN (BS.take 20) files out mergeSortedFilesN (BS.take 20) files out
entry $ bindMatch "reflog:index:path" $ nil_ $ const $ lift do
indexPath >>= liftIO . print . pretty
-- let entriesListOf lbs = S.toList_ $ runConsumeLBS lbs $ readSections $ \s ss -> do -- let entriesListOf lbs = S.toList_ $ runConsumeLBS lbs $ readSections $ \s ss -> do
entry $ bindMatch "test:git:reflog:index:files" $ nil_ $ \syn -> lift do entry $ bindMatch "reflog:index:files" $ nil_ $ \syn -> lift do
reflog <- getGitRemoteKey >>= orThrowUser "reflog not set" files <- listObjectIndexFiles
idxPath <- getStatePath (AsBase58 reflog) <&> (</> "index") cur <- pwd
mkdir idxPath for_ files $ \(f',s) -> do
idx <- dirFiles idxPath let f = makeRelative cur f'
for_ idx $ \f -> liftIO $ print $ pretty f liftIO $ print $ fill 10 (pretty s) <+> pretty f
entry $ bindMatch "test:git:reflog:index" $ nil_ $ \syn -> lift $ connectedDo do entry $ bindMatch "reflog:index:build" $ nil_ $ const $ lift $ connectedDo do
reflog <- getGitRemoteKey >>= orThrowUser "reflog not set" writeReflogIndex
api <- getClientAPI @RefLogAPI @UNIX
sto <- getStorage
flip runContT pure do
what' <- lift $ callRpcWaitMay @RpcRefLogGet (TimeoutSec 2) api reflog
>>= orThrowUser "rpc timeout"
what <- ContT $ maybe1 what' none
idxPath <- getStatePath (AsBase58 reflog) <&> (</> "index")
mkdir idxPath
notice $ "STATE" <+> pretty idxPath
sink <- S.toList_ do
walkMerkle (coerce what) (getBlock sto) $ \case
Left{} -> throwIO MissedBlockError
Right (hs :: [HashRef]) -> do
for_ hs $ \h -> void $ runMaybeT do
tx <- getBlock sto (coerce h)
>>= toMPlus
RefLogUpdate{..} <- deserialiseOrFail @(RefLogUpdate L4Proto) tx
& toMPlus
AnnotatedHashRef _ href <- deserialiseOrFail @AnnotatedHashRef (LBS.fromStrict _refLogUpdData)
& toMPlus
-- FIXME: error logging
lbs <- liftIO (runExceptT (getTreeContents sto href))
>>= orThrow MissedBlockError
pieces <- S.toList_ do
void $ runConsumeLBS (ZstdL.decompress lbs) $ readLogFileLBS () $ \o s _ -> do
lift $ S.yield o
lift $ S.yield (h, pieces)
liftIO $ forConcurrently_ sink $ \(tx, pieces) -> do
idxName <- emptyTempFile idxPath "objects-.idx"
let ss = L.sort pieces
UIO.withBinaryFileAtomic idxName WriteMode $ \wh -> do
for_ ss $ \sha1 -> do
let key = coerce @_ @N.ByteString sha1
let value = coerce @_ @N.ByteString tx
-- notice $ pretty sha1 <+> pretty tx
writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)
-- files <- dirFiles idxPath
-- <&> filter ((== ".idx") . takeExtension)
-- out <- liftIO $ emptyTempFile idxPath "objects-.idx"
-- liftIO $ mergeSortedFilesN (LBS.take 20) files out
entry $ bindMatch "test:git:export" $ nil_ $ \syn -> lift $ connectedDo do entry $ bindMatch "test:git:export" $ nil_ $ \syn -> lift $ connectedDo do
let (opts, argz) = splitOpts [("--index",1),("--ref",1)] syn let (opts, argz) = splitOpts [("--index",1),("--ref",1)] syn
@ -1438,26 +1365,6 @@ linearSearchLBS hash lbs = do
pure $ listToMaybe found pure $ listToMaybe found
binarySearchBS :: Monad m
=> Int -- ^ record size
-> ( BS.ByteString -> BS.ByteString ) -- ^ key extractor
-> BS.ByteString -- ^ key
-> BS.ByteString -- ^ source
-> m (Maybe Int)
binarySearchBS rs getKey s source = do
let maxn = BS.length source `div` rs
loop 0 maxn
where
loop l u | u <= l = pure Nothing
| otherwise = do
let e = getKey (BS.drop ( k * rs ) source)
case compare e s of
EQ -> pure $ Just (k * rs)
LT -> loop (k+1) u
GT -> loop l k
where k = (l + u) `div` 2
-- debugPrefix :: LoggerEntry -> LoggerEntry -- debugPrefix :: LoggerEntry -> LoggerEntry
debugPrefix = toStderr . logPrefix "[debug] " debugPrefix = toStderr . logPrefix "[debug] "

View File

@ -206,3 +206,25 @@ writeCompressedStreamZstd stream source sink = do
Nothing -> writeCompressedChunkZstd sink sn Nothing >> none Nothing -> writeCompressedChunkZstd sink sn Nothing >> none
Just lbs -> writeCompressedChunkZstd sink sn (Just lbs) >>= next Just lbs -> writeCompressedChunkZstd sink sn (Just lbs) >>= next
binarySearchBS :: Monad m
=> Int -- ^ record size
-> ( BS.ByteString -> BS.ByteString ) -- ^ key extractor
-> BS.ByteString -- ^ key
-> BS.ByteString -- ^ source
-> m (Maybe Int)
binarySearchBS rs getKey s source = do
let maxn = BS.length source `div` rs
loop 0 maxn
where
loop l u | u <= l = pure Nothing
| otherwise = do
let e = getKey (BS.drop ( k * rs ) source)
case compare e s of
EQ -> pure $ Just (k * rs)
LT -> loop (k+1) u
GT -> loop l k
where k = (l + u) `div` 2

View File

@ -26,7 +26,7 @@ import HBS2.Storage as Exported
import HBS2.Storage.Operations.Class as Exported import HBS2.Storage.Operations.Class as Exported
import HBS2.System.Logger.Simple.ANSI as Exported import HBS2.System.Logger.Simple.ANSI as Exported
import HBS2.Git3.Types import HBS2.Git3.Types as Exported
-- TODO: about-to-remove -- TODO: about-to-remove
import DBPipe.SQLite import DBPipe.SQLite
@ -42,7 +42,9 @@ import Data.HashSet (HashSet(..))
import Data.HashSet qualified as HS import Data.HashSet qualified as HS
import Data.Kind import Data.Kind
import System.Exit qualified as Q import System.Exit qualified as Q
import System.IO.MMap as Exported
import GHC.Natural as Exported
import UnliftIO as Exported import UnliftIO as Exported
@ -75,7 +77,9 @@ instance GitWritePacksOpts (HashSet GitWritePacksOptVal) where
excludeParents o = not $ HS.member WriteFullPack o excludeParents o = not $ HS.member WriteFullPack o
data Git3Exception = data Git3Exception =
Git3PeerNotConnected Git3PeerNotConnected
| Git3ReflogNotSet
| Git3RpcTimeout
deriving (Show,Typeable,Generic) deriving (Show,Typeable,Generic)
instance Exception Git3Exception instance Exception Git3Exception

View File

@ -7,10 +7,14 @@ import HBS2.Git3.State.Types
import HBS2.Data.Log.Structured import HBS2.Data.Log.Structured
import Data.ByteString qualified as BS
import Data.ByteString.Lazy ( ByteString )
import Data.ByteString.Lazy qualified as LBS
import Data.List qualified as L import Data.List qualified as L
import Network.ByteOrder qualified as N import Network.ByteOrder qualified as N
import System.IO.Temp as Temp import System.IO.Temp as Temp
import Data.ByteString.Lazy qualified as LBS import Data.ByteString.Lazy qualified as LBS
import Data.Maybe
import Codec.Compression.Zstd.Lazy qualified as ZstdL import Codec.Compression.Zstd.Lazy qualified as ZstdL
import Streaming.Prelude qualified as S import Streaming.Prelude qualified as S
@ -20,58 +24,151 @@ import UnliftIO
import UnliftIO.IO.File qualified as UIO import UnliftIO.IO.File qualified as UIO
-- writeReflogIndex = do
-- reflog <- getGitRemoteKey >>= orThrowUser "reflog not set" readLogFileLBS :: forall opts m . ( MonadIO m, ReadLogOpts opts, BytesReader m )
=> opts
-> ( GitHash -> Int -> ByteString -> m () )
-> m Int
-- api <- getClientAPI @RefLogAPI @UNIX readLogFileLBS _ action = flip fix 0 \go n -> do
done <- noBytesLeft
if done then pure n
else do
ssize <- readBytesMaybe 4
>>= orThrow SomeReadLogError
<&> fromIntegral . N.word32 . LBS.toStrict
-- sto <- getStorage hash <- readBytesMaybe 20
>>= orThrow SomeReadLogError
<&> GitHash . BS.copy . LBS.toStrict
-- flip runContT pure do sdata <- readBytesMaybe ( ssize - 20 )
>>= orThrow SomeReadLogError
-- what' <- lift $ callRpcWaitMay @RpcRefLogGet (TimeoutSec 2) api reflog void $ action hash (fromIntegral ssize) sdata
-- >>= orThrowUser "rpc timeout" go (succ n)
-- what <- ContT $ maybe1 what' none indexPath :: forall m . ( Git3Perks m
, MonadReader Git3Env m
, HasClientAPI PeerAPI UNIX m
, HasClientAPI RefLogAPI UNIX m
, HasStorage m
) => m FilePath
indexPath = do
reflog <- getGitRemoteKey >>= orThrow Git3ReflogNotSet
getStatePath (AsBase58 reflog) <&> (</> "index")
-- idxPath <- getStatePath (AsBase58 reflog) <&> (</> "index") listObjectIndexFiles :: forall m . ( Git3Perks m
-- mkdir idxPath , MonadReader Git3Env m
, HasClientAPI PeerAPI UNIX m
, HasClientAPI RefLogAPI UNIX m
, HasStorage m
) => m [(FilePath, Natural)]
-- notice $ "STATE" <+> pretty idxPath listObjectIndexFiles = do
path <- indexPath
dirFiles path
<&> filter ( ("objects*.idx" ?==) . takeFileName )
>>= \fs -> for fs $ \f -> do
z <- fileSize f <&> fromIntegral
pure (f,z)
-- sink <- S.toList_ do startReflogIndexQueryQueue :: forall m . ( Git3Perks m
-- walkMerkle (coerce what) (getBlock sto) $ \case , MonadReader Git3Env m
-- Left{} -> throwIO MissedBlockError , HasClientAPI PeerAPI UNIX m
-- Right (hs :: [HashRef]) -> do , HasClientAPI RefLogAPI UNIX m
-- for_ hs $ \h -> void $ runMaybeT do , HasStorage m
)
=> TQueue (GitHash, TMVar [HashRef])
-> m ()
-- tx <- getBlock sto (coerce h) startReflogIndexQueryQueue rq = flip runContT pure do
-- >>= toMPlus files <- lift $ listObjectIndexFiles <&> fmap fst
-- RefLogUpdate{..} <- deserialiseOrFail @(RefLogUpdate L4Proto) tx -- один файл - не более, чем один поток
-- & toMPlus -- мапим файлы
-- возвращаем функцию запроса?
-- для каждого файла -- мы создаём отдельную очередь,
-- нам надо искать во всех файлах
-- AnnotatedHashRef _ href <- deserialiseOrFail @AnnotatedHashRef (LBS.fromStrict _refLogUpdData) mmaped <- liftIO $ for files (liftIO . flip mmapFileByteString Nothing)
-- & toMPlus
-- -- FIXME: error logging forever $ liftIO do
-- lbs <- liftIO (runExceptT (getTreeContents sto href)) (githash, answ) <- atomically $ readTQueue rq
-- >>= orThrow MissedBlockError
-- pieces <- S.toList_ do let s = coerce githash
-- void $ runConsumeLBS (ZstdL.decompress lbs) $ readLogFileLBS () $ \o s _ -> do
-- lift $ S.yield o
-- lift $ S.yield (h, pieces) found <- forConcurrently mmaped $ \bs -> runMaybeT do
-- FIXME: size-hardcodes
w <- binarySearchBS 56 ( BS.take 20 . BS.drop 4 ) s bs
>>= toMPlus
let v = BS.drop ( w * 56 ) bs & BS.take 32
pure $ coerce @_ @HashRef v
atomically $ writeTMVar answ ( catMaybes found )
writeReflogIndex :: forall m . ( Git3Perks m
, MonadReader Git3Env m
, HasClientAPI PeerAPI UNIX m
, HasClientAPI RefLogAPI UNIX m
, HasStorage m
) => m ()
writeReflogIndex = do
reflog <- getGitRemoteKey >>= orThrow Git3ReflogNotSet
api <- getClientAPI @RefLogAPI @UNIX
sto <- getStorage
flip runContT pure do
what' <- lift $ callRpcWaitMay @RpcRefLogGet (TimeoutSec 2) api reflog
>>= orThrow Git3RpcTimeout
what <- ContT $ maybe1 what' none
idxPath <- lift indexPath
mkdir idxPath
notice $ "STATE" <+> pretty idxPath
sink <- S.toList_ do
walkMerkle (coerce what) (getBlock sto) $ \case
Left{} -> throwIO MissedBlockError
Right (hs :: [HashRef]) -> do
for_ hs $ \h -> void $ runMaybeT do
tx <- getBlock sto (coerce h)
>>= toMPlus
RefLogUpdate{..} <- deserialiseOrFail @(RefLogUpdate L4Proto) tx
& toMPlus
AnnotatedHashRef _ href <- deserialiseOrFail @AnnotatedHashRef (LBS.fromStrict _refLogUpdData)
& toMPlus
-- FIXME: error logging
lbs <- liftIO (runExceptT (getTreeContents sto href))
>>= orThrow MissedBlockError
pieces <- S.toList_ do
void $ runConsumeLBS (ZstdL.decompress lbs) $ readLogFileLBS () $ \o s _ -> do
lift $ S.yield o
lift $ S.yield (h, pieces)
liftIO $ forConcurrently_ sink $ \(tx, pieces) -> do
idxName <- emptyTempFile idxPath "objects-.idx"
let ss = L.sort pieces
UIO.withBinaryFileAtomic idxName WriteMode $ \wh -> do
for_ ss $ \sha1 -> do
let key = coerce @_ @N.ByteString sha1
let value = coerce @_ @N.ByteString tx
-- notice $ pretty sha1 <+> pretty tx
writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)
-- liftIO $ forConcurrently_ sink $ \(tx, pieces) -> do
-- idxName <- emptyTempFile idxPath "objects-.idx"
-- let ss = L.sort pieces
-- UIO.withBinaryFileAtomic idxName WriteMode $ \wh -> do
-- for_ ss $ \sha1 -> do
-- let key = coerce @_ @N.ByteString sha1
-- let value = coerce @_ @N.ByteString tx
-- -- notice $ pretty sha1 <+> pretty tx
-- writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)

View File

@ -1,8 +1,11 @@
module HBS2.Git3.Types where module HBS2.Git3.Types
( module HBS2.Git3.Types
, module Exported
) where
import HBS2.Prelude.Plated import HBS2.Prelude.Plated
import HBS2.Net.Auth.Credentials import HBS2.Net.Auth.Credentials
import HBS2.Git.Local import HBS2.Git.Local as Exported
type GitRemoteKey = PubKey 'Sign 'HBS2Basic type GitRemoteKey = PubKey 'Sign 'HBS2Basic