This commit is contained in:
voidlizard 2024-12-30 13:32:57 +03:00
parent b1836d2081
commit b9e9d4799f
2 changed files with 152 additions and 1 deletions

View File

@ -1,3 +1,4 @@
{-# Language MultiWayIf #-}
module HBS2.System.Dir module HBS2.System.Dir
( module HBS2.System.Dir ( module HBS2.System.Dir
, module System.FilePath , module System.FilePath
@ -5,13 +6,16 @@ module HBS2.System.Dir
, module UnliftIO , module UnliftIO
) where ) where
import HBS2.Prelude.Plated
import System.FilePath import System.FilePath
import System.FilePattern import System.FilePattern
import System.Directory qualified as D import System.Directory qualified as D
import Data.ByteString.Lazy qualified as LBS import Data.ByteString.Lazy qualified as LBS
import UnliftIO import UnliftIO
import Control.Exception qualified as E import Control.Exception qualified as E
import Control.Monad
import Streaming.Prelude qualified as S
data MkDirOpt = MkDirOptNone data MkDirOpt = MkDirOptNone
@ -86,4 +90,30 @@ rm fn = liftIO $ D.removePathForcibly fn
home :: MonadIO m => m FilePath home :: MonadIO m => m FilePath
home = liftIO D.getHomeDirectory home = liftIO D.getHomeDirectory
data DirEntry = EntryFile FilePath | EntryDir FilePath | EntryOther FilePath
dirFiles :: MonadIO m => FilePath -> m [FilePath]
dirFiles d = S.toList_ $ do
dirEntries d $ \case
EntryFile f -> S.yield f >> pure True
_ -> pure True
dirEntries :: MonadIO m => FilePath -> ( DirEntry -> m Bool ) -> m ()
dirEntries dir what = do
es <- liftIO $ D.listDirectory dir
flip fix es $ \next -> \case
[] -> pure ()
(x:xs) -> do
let entry = dir </> x
isFile <- liftIO (D.doesFileExist entry)
isDir <- liftIO (D.doesDirectoryExist entry)
if | isFile -> continueThen (what (EntryFile entry)) (next xs)
| isDir -> continueThen (what (EntryDir entry)) (next xs)
| otherwise -> continueThen (what (EntryOther entry)) (next xs)
where
continueThen a b = do
r <- a
when r b

View File

@ -61,6 +61,8 @@ import Codec.Compression.Zlib qualified as Zlib
import Data.HashPSQ qualified as HPSQ import Data.HashPSQ qualified as HPSQ
import Data.HashPSQ (HashPSQ) import Data.HashPSQ (HashPSQ)
import qualified Data.OrdPSQ as PSQ
import Data.Maybe import Data.Maybe
import Data.List qualified as L import Data.List qualified as L
import Data.List (sortBy) import Data.List (sortBy)
@ -86,6 +88,8 @@ import Data.Generics.Product
import Lens.Micro.Platform import Lens.Micro.Platform
import Streaming.Prelude qualified as S import Streaming.Prelude qualified as S
import Streaming hiding (run,chunksOf)
import System.Exit qualified as Q import System.Exit qualified as Q
import System.Environment qualified as E import System.Environment qualified as E
import System.Process.Typed import System.Process.Typed
@ -549,6 +553,90 @@ class HasExportOpts m where
setCompressionLevel :: Int -> m () setCompressionLevel :: Int -> m ()
mergeSortedFiles :: forall m . MonadUnliftIO m
=> (ByteString -> ByteString)
-> FilePath
-> FilePath
-> FilePath
-> m ()
mergeSortedFiles getKey file1 file2 outFile = do
l1 <- parseFile file1
l2 <- parseFile file2
UIO.withBinaryFileAtomic outFile WriteMode $ \hOut ->
mergeEntries l1 l2 getKey (\s -> writeSection s (liftIO . LBS.hPutStr hOut))
mapM_ rm [file1, file2]
where
parseFile :: FilePath -> m [ByteString]
parseFile path = do
lbs <- liftIO $ LBS.readFile path
S.toList_ $ runConsumeLBS lbs $ readSections $ \_ sdata -> lift $ S.yield sdata
mergeEntries :: [ByteString]
-> [ByteString]
-> (ByteString -> ByteString)
-> (ByteString -> m ()) -> m ()
mergeEntries [] ys _ write = mapM_ write ys
mergeEntries xs [] _ write = mapM_ write xs
mergeEntries (x:xs) (y:ys) extractKey write
| extractKey x <= extractKey y = write x >> mergeEntries xs (y:ys) extractKey write
| otherwise = write y >> mergeEntries (x:xs) ys extractKey write
mergeSortedFilesN :: forall m . MonadUnliftIO m
=> (ByteString -> ByteString) -- ^ Функция извлечения ключа
-> [FilePath] -- ^ Входные файлы
-> FilePath -- ^ Выходной файл
-> m ()
mergeSortedFilesN getKey inputFiles outFile = do
-- Парсим все файлы
lists <- traverse parseFile inputFiles
-- Используем ленивые списки для обработки
UIO.withBinaryFileAtomic outFile WriteMode $ \hOut ->
mergeEntriesN lists getKey (\s -> writeSection s (liftIO . LBS.hPutStr hOut))
-- Удаляем исходные файлы
mapM_ rm inputFiles
where
parseFile :: FilePath -> m [ByteString]
parseFile path = do
lbs <- liftIO $ LBS.readFile path
S.toList_ $ runConsumeLBS lbs $ readSections $ \_ sdata -> lift $ S.yield sdata
mergeEntriesN :: [[ByteString]] -- ^ Списки данных
-> (ByteString -> ByteString) -- ^ Функция извлечения ключа
-> (ByteString -> m ()) -- ^ Функция записи
-> m ()
mergeEntriesN lists extractKey write = do
let initialQueue = buildQueue lists extractKey
mergeQueue initialQueue extractKey write
buildQueue :: [[ByteString]]
-> (ByteString -> ByteString)
-> PSQ.OrdPSQ ByteString Int [ByteString]
buildQueue xs extractKey =
foldr (\(i, x:xs') queue -> PSQ.insert (extractKey x) i (x:xs') queue) PSQ.empty (zip [0..] xs)
mergeQueue :: PSQ.OrdPSQ ByteString Int [ByteString]
-> (ByteString -> ByteString)
-> (ByteString -> m ())
-> m ()
mergeQueue queue extractKey write = unless (PSQ.null queue) $ do
let Just (key, _, x:xs', queue') = PSQ.minView queue
write x
let updatedQueue = if null xs'
then queue'
else PSQ.insert (extractKey (head xs')) 0 xs' queue'
mergeQueue updatedQueue extractKey write
theDict :: forall m . ( HBS2GitPerks m theDict :: forall m . ( HBS2GitPerks m
-- , HasClientAPI PeerAPI UNIX m -- , HasClientAPI PeerAPI UNIX m
-- , HasStorage m -- , HasStorage m
@ -1109,6 +1197,32 @@ theDict = do
on conflict (sha1) on conflict (sha1)
do update set tx = excluded.tx|] (p,h) do update set tx = excluded.tx|] (p,h)
entry $ bindMatch "test:git:reflog:index:merge" $ nil_ $ \case
[ StringLike f1, StringLike f2] -> lift do
mergeSortedFiles (LBS.take 20) f1 f2 "jopakita"
_ -> throwIO (BadFormException @C nil)
entry $ bindMatch "test:git:reflog:index:compact" $ nil_ $ \syn -> lift do
reflog <- getGitRemoteKey >>= orThrowUser "reflog not set"
idxPath <- getStatePath (AsBase58 reflog) <&> (</> "index")
mkdir idxPath
files <- dirFiles idxPath
<&> filter ((== ".idx") . takeExtension)
out <- liftIO $ emptyTempFile idxPath "objects-.idx"
mergeSortedFilesN (LBS.take 20) files out
-- let entriesListOf lbs = S.toList_ $ runConsumeLBS lbs $ readSections $ \s ss -> do
entry $ bindMatch "test:git:reflog:index:files" $ nil_ $ \syn -> lift do
reflog <- getGitRemoteKey >>= orThrowUser "reflog not set"
idxPath <- getStatePath (AsBase58 reflog) <&> (</> "index")
mkdir idxPath
idx <- dirFiles idxPath
for_ idx $ \f -> liftIO $ print $ pretty f
entry $ bindMatch "test:git:reflog:index" $ nil_ $ \syn -> lift $ connectedDo do entry $ bindMatch "test:git:reflog:index" $ nil_ $ \syn -> lift $ connectedDo do
reflog <- getGitRemoteKey >>= orThrowUser "reflog not set" reflog <- getGitRemoteKey >>= orThrowUser "reflog not set"
@ -1163,6 +1277,13 @@ theDict = do
-- notice $ pretty sha1 <+> pretty tx -- notice $ pretty sha1 <+> pretty tx
writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh) writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)
files <- dirFiles idxPath
<&> filter ((== ".idx") . takeExtension)
out <- liftIO $ emptyTempFile idxPath "objects-.idx"
liftIO $ mergeSortedFilesN (LBS.take 20) files out
entry $ bindMatch "test:git:export" $ nil_ $ \syn -> lift $ connectedDo do entry $ bindMatch "test:git:export" $ nil_ $ \syn -> lift $ connectedDo do
let (opts, argz) = splitOpts [("--index",1),("--ref",1)] syn let (opts, argz) = splitOpts [("--index",1),("--ref",1)] syn