mirror of https://github.com/voidlizard/hbs2
compact-index
This commit is contained in:
parent
48132864a6
commit
8a5ffe2e7b
|
@ -68,9 +68,6 @@ import Data.Generics.Labels
|
|||
import Data.Generics.Product
|
||||
import Lens.Micro.Platform
|
||||
|
||||
import Data.Heap (Entry(..))
|
||||
import Data.Heap qualified as Heap
|
||||
|
||||
import Streaming.Prelude qualified as S
|
||||
|
||||
import System.Exit qualified as Q
|
||||
|
@ -400,46 +397,6 @@ mergeSortedFiles getKey file1 file2 outFile = do
|
|||
| otherwise = write y >> mergeEntries (x:xs) ys extractKey write
|
||||
|
||||
|
||||
mergeSortedFilesN :: forall m . MonadUnliftIO m
|
||||
=> (N.ByteString -> N.ByteString) -- ^ Функция извлечения ключа
|
||||
-> [FilePath] -- ^ Входные файлы
|
||||
-> FilePath -- ^ Выходной файл
|
||||
-> m ()
|
||||
|
||||
mergeSortedFilesN _ [] out = rm out
|
||||
|
||||
mergeSortedFilesN _ [_] out = rm out
|
||||
|
||||
mergeSortedFilesN getKey inputFiles outFile = do
|
||||
|
||||
mmaped <- for inputFiles $ \fn -> do
|
||||
liftIO (mmapFileByteString fn Nothing)
|
||||
|
||||
liftIO $ UIO.withBinaryFileAtomic outFile WriteMode $ \hOut -> do
|
||||
flip fix (mmaped, Heap.empty) $ \next (mmf, win) -> do
|
||||
let (entries, files) = fmap readEntry mmf & unzip
|
||||
let values = [ Entry (getKey e) e | e <- catMaybes entries ]
|
||||
let e' = (win <> Heap.fromList values) & Heap.uncons
|
||||
maybe1 e' none $ \(Entry _ e, newWin) -> do
|
||||
liftIO $ writeSection (LBS.fromStrict e) (LBS.hPutStr hOut)
|
||||
next (catMaybes files, newWin)
|
||||
|
||||
mapM_ rm inputFiles
|
||||
|
||||
where
|
||||
readEntry :: BS.ByteString -> (Maybe BS.ByteString, Maybe BS.ByteString)
|
||||
|
||||
readEntry src | BS.length src < 4 = (mzero, mzero)
|
||||
|
||||
readEntry src = do
|
||||
let (size, rest) = BS.splitAt 4 src & over _1 ( fromIntegral . N.word32 )
|
||||
let (e, rest2) = BS.splitAt size rest
|
||||
|
||||
if BS.length e < size then
|
||||
(mzero, mzero)
|
||||
else
|
||||
(Just e, Just rest2)
|
||||
|
||||
|
||||
theDict :: forall m . ( HBS2GitPerks m
|
||||
-- , HasClientAPI PeerAPI UNIX m
|
||||
|
@ -476,6 +433,8 @@ theDict = do
|
|||
|
||||
_ -> throwIO (BadFormException @C nil)
|
||||
|
||||
entry $ bindValue "index-block-size" (mkInt $ 32 * 1024 * 1024)
|
||||
|
||||
entry $ bindMatch "git:tree:ls" $ nil_ $ const do
|
||||
r <- gitReadTree "HEAD"
|
||||
for_ r $ \GitTreeEntry{..} -> do
|
||||
|
@ -1027,17 +986,12 @@ theDict = do
|
|||
on conflict (sha1)
|
||||
do update set tx = excluded.tx|] (p,h)
|
||||
|
||||
entry $ bindMatch "reflog:index:compact" $ nil_ $ \syn -> lift do
|
||||
reflog <- getGitRemoteKey >>= orThrowUser "reflog not set"
|
||||
idxPath <- getStatePath (AsBase58 reflog) <&> (</> "index")
|
||||
mkdir idxPath
|
||||
entry $ bindMatch "reflog:index:compact" $ nil_ $ \_ -> do
|
||||
size <- lookupValue "index-block-size" >>= \case
|
||||
LitIntVal n -> pure (fromIntegral n)
|
||||
_ -> pure 33554432
|
||||
|
||||
files <- dirFiles idxPath
|
||||
<&> filter ((== ".idx") . takeExtension)
|
||||
|
||||
out <- liftIO $ emptyTempFile idxPath "objects-.idx"
|
||||
|
||||
mergeSortedFilesN (BS.take 20) files out
|
||||
lift $ compactIndex size
|
||||
|
||||
entry $ bindMatch "reflog:index:path" $ nil_ $ const $ lift do
|
||||
indexPath >>= liftIO . print . pretty
|
||||
|
|
|
@ -106,7 +106,6 @@ class HasExportOpts m where
|
|||
getCompressionLevel :: m Int
|
||||
setCompressionLevel :: Int -> m ()
|
||||
|
||||
|
||||
class HasGitRemoteKey m where
|
||||
getGitRemoteKey :: m (Maybe GitRemoteKey)
|
||||
setGitRemoteKey :: GitRemoteKey -> m ()
|
||||
|
|
|
@ -13,6 +13,8 @@ import Data.ByteString.Lazy qualified as LBS
|
|||
import Data.List qualified as L
|
||||
import Network.ByteOrder qualified as N
|
||||
import System.IO.Temp as Temp
|
||||
import Data.Heap (Entry(..))
|
||||
import Data.Heap qualified as Heap
|
||||
import Data.ByteString.Lazy qualified as LBS
|
||||
import Data.Fixed
|
||||
import Data.Maybe
|
||||
|
@ -38,6 +40,7 @@ import Codec.Serialise
|
|||
import Streaming.Prelude qualified as S
|
||||
import Streaming hiding (run,chunksOf)
|
||||
import System.TimeIt
|
||||
import Lens.Micro.Platform
|
||||
|
||||
import UnliftIO
|
||||
import UnliftIO.IO.File qualified as UIO
|
||||
|
@ -85,6 +88,65 @@ data Index a =
|
|||
Index { entries :: [IndexEntry]
|
||||
}
|
||||
|
||||
|
||||
mergeSortedFilesN :: forall m . MonadUnliftIO m
|
||||
=> (N.ByteString -> N.ByteString) -- ^ Функция извлечения ключа
|
||||
-> [FilePath] -- ^ Входные файлы
|
||||
-> FilePath -- ^ Выходной файл
|
||||
-> m ()
|
||||
|
||||
mergeSortedFilesN _ [] out = rm out
|
||||
|
||||
mergeSortedFilesN _ [_] out = rm out
|
||||
|
||||
mergeSortedFilesN getKey inputFiles outFile = do
|
||||
|
||||
mmaped <- for inputFiles $ \fn -> do
|
||||
liftIO (mmapFileByteString fn Nothing)
|
||||
|
||||
liftIO $ UIO.withBinaryFileAtomic outFile WriteMode $ \hOut -> do
|
||||
flip fix (mmaped, Heap.empty) $ \next (mmf, win) -> do
|
||||
let (entries, files) = fmap readEntry mmf & unzip
|
||||
let values = [ Entry (getKey e) e | e <- catMaybes entries ]
|
||||
let e' = (win <> Heap.fromList values) & Heap.uncons
|
||||
maybe1 e' none $ \(Entry _ e, newWin) -> do
|
||||
liftIO $ writeSection (LBS.fromStrict e) (LBS.hPutStr hOut)
|
||||
next (catMaybes files, newWin)
|
||||
|
||||
mapM_ rm inputFiles
|
||||
|
||||
where
|
||||
readEntry :: BS.ByteString -> (Maybe BS.ByteString, Maybe BS.ByteString)
|
||||
|
||||
readEntry src | BS.length src < 4 = (mzero, mzero)
|
||||
|
||||
readEntry src = do
|
||||
let (size, rest) = BS.splitAt 4 src & over _1 ( fromIntegral . N.word32 )
|
||||
let (e, rest2) = BS.splitAt size rest
|
||||
|
||||
if BS.length e < size then
|
||||
(mzero, mzero)
|
||||
else
|
||||
(Just e, Just rest2)
|
||||
|
||||
compactIndex :: forall m . (Git3Perks m, MonadReader Git3Env m) => Natural -> m ()
|
||||
compactIndex maxSize = do
|
||||
reflog <- getGitRemoteKey >>= orThrowUser "reflog not set"
|
||||
idxPath <- getStatePath (AsBase58 reflog) <&> (</> "index")
|
||||
mkdir idxPath
|
||||
files <- listObjectIndexFiles <&> L.sortOn snd
|
||||
|
||||
let blocks = fix (\next (acc, group, remaining) ->
|
||||
case remaining of
|
||||
[] -> [reverse group | not (null group)]
|
||||
((file, size) : rest)
|
||||
| acc + size > maxSize -> reverse group : next (size, [(file, size)], rest)
|
||||
| otherwise -> next (acc + size, (file, size) : group, rest))
|
||||
|
||||
forM_ (blocks (0, [], files)) $ \block -> do
|
||||
out <- liftIO $ emptyTempFile idxPath "objects-.idx"
|
||||
mergeSortedFilesN (BS.take 20) (map fst block) out
|
||||
|
||||
openIndex :: forall a m . (Git3Perks m, MonadReader Git3Env m)
|
||||
=> m (Index a)
|
||||
|
||||
|
@ -294,3 +356,5 @@ updateReflogIndex = do
|
|||
-- notice $ pretty sha1 <+> pretty tx
|
||||
writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)
|
||||
|
||||
lift $ compactIndex ( 32 * 1024 * 1024 )
|
||||
|
||||
|
|
Loading…
Reference in New Issue