mirror of https://github.com/voidlizard/hbs2
compact-index
This commit is contained in:
parent
48132864a6
commit
8a5ffe2e7b
|
@ -68,9 +68,6 @@ import Data.Generics.Labels
|
||||||
import Data.Generics.Product
|
import Data.Generics.Product
|
||||||
import Lens.Micro.Platform
|
import Lens.Micro.Platform
|
||||||
|
|
||||||
import Data.Heap (Entry(..))
|
|
||||||
import Data.Heap qualified as Heap
|
|
||||||
|
|
||||||
import Streaming.Prelude qualified as S
|
import Streaming.Prelude qualified as S
|
||||||
|
|
||||||
import System.Exit qualified as Q
|
import System.Exit qualified as Q
|
||||||
|
@ -400,46 +397,6 @@ mergeSortedFiles getKey file1 file2 outFile = do
|
||||||
| otherwise = write y >> mergeEntries (x:xs) ys extractKey write
|
| otherwise = write y >> mergeEntries (x:xs) ys extractKey write
|
||||||
|
|
||||||
|
|
||||||
mergeSortedFilesN :: forall m . MonadUnliftIO m
|
|
||||||
=> (N.ByteString -> N.ByteString) -- ^ Функция извлечения ключа
|
|
||||||
-> [FilePath] -- ^ Входные файлы
|
|
||||||
-> FilePath -- ^ Выходной файл
|
|
||||||
-> m ()
|
|
||||||
|
|
||||||
mergeSortedFilesN _ [] out = rm out
|
|
||||||
|
|
||||||
mergeSortedFilesN _ [_] out = rm out
|
|
||||||
|
|
||||||
mergeSortedFilesN getKey inputFiles outFile = do
|
|
||||||
|
|
||||||
mmaped <- for inputFiles $ \fn -> do
|
|
||||||
liftIO (mmapFileByteString fn Nothing)
|
|
||||||
|
|
||||||
liftIO $ UIO.withBinaryFileAtomic outFile WriteMode $ \hOut -> do
|
|
||||||
flip fix (mmaped, Heap.empty) $ \next (mmf, win) -> do
|
|
||||||
let (entries, files) = fmap readEntry mmf & unzip
|
|
||||||
let values = [ Entry (getKey e) e | e <- catMaybes entries ]
|
|
||||||
let e' = (win <> Heap.fromList values) & Heap.uncons
|
|
||||||
maybe1 e' none $ \(Entry _ e, newWin) -> do
|
|
||||||
liftIO $ writeSection (LBS.fromStrict e) (LBS.hPutStr hOut)
|
|
||||||
next (catMaybes files, newWin)
|
|
||||||
|
|
||||||
mapM_ rm inputFiles
|
|
||||||
|
|
||||||
where
|
|
||||||
readEntry :: BS.ByteString -> (Maybe BS.ByteString, Maybe BS.ByteString)
|
|
||||||
|
|
||||||
readEntry src | BS.length src < 4 = (mzero, mzero)
|
|
||||||
|
|
||||||
readEntry src = do
|
|
||||||
let (size, rest) = BS.splitAt 4 src & over _1 ( fromIntegral . N.word32 )
|
|
||||||
let (e, rest2) = BS.splitAt size rest
|
|
||||||
|
|
||||||
if BS.length e < size then
|
|
||||||
(mzero, mzero)
|
|
||||||
else
|
|
||||||
(Just e, Just rest2)
|
|
||||||
|
|
||||||
|
|
||||||
theDict :: forall m . ( HBS2GitPerks m
|
theDict :: forall m . ( HBS2GitPerks m
|
||||||
-- , HasClientAPI PeerAPI UNIX m
|
-- , HasClientAPI PeerAPI UNIX m
|
||||||
|
@ -476,6 +433,8 @@ theDict = do
|
||||||
|
|
||||||
_ -> throwIO (BadFormException @C nil)
|
_ -> throwIO (BadFormException @C nil)
|
||||||
|
|
||||||
|
entry $ bindValue "index-block-size" (mkInt $ 32 * 1024 * 1024)
|
||||||
|
|
||||||
entry $ bindMatch "git:tree:ls" $ nil_ $ const do
|
entry $ bindMatch "git:tree:ls" $ nil_ $ const do
|
||||||
r <- gitReadTree "HEAD"
|
r <- gitReadTree "HEAD"
|
||||||
for_ r $ \GitTreeEntry{..} -> do
|
for_ r $ \GitTreeEntry{..} -> do
|
||||||
|
@ -1027,17 +986,12 @@ theDict = do
|
||||||
on conflict (sha1)
|
on conflict (sha1)
|
||||||
do update set tx = excluded.tx|] (p,h)
|
do update set tx = excluded.tx|] (p,h)
|
||||||
|
|
||||||
entry $ bindMatch "reflog:index:compact" $ nil_ $ \syn -> lift do
|
entry $ bindMatch "reflog:index:compact" $ nil_ $ \_ -> do
|
||||||
reflog <- getGitRemoteKey >>= orThrowUser "reflog not set"
|
size <- lookupValue "index-block-size" >>= \case
|
||||||
idxPath <- getStatePath (AsBase58 reflog) <&> (</> "index")
|
LitIntVal n -> pure (fromIntegral n)
|
||||||
mkdir idxPath
|
_ -> pure 33554432
|
||||||
|
|
||||||
files <- dirFiles idxPath
|
lift $ compactIndex size
|
||||||
<&> filter ((== ".idx") . takeExtension)
|
|
||||||
|
|
||||||
out <- liftIO $ emptyTempFile idxPath "objects-.idx"
|
|
||||||
|
|
||||||
mergeSortedFilesN (BS.take 20) files out
|
|
||||||
|
|
||||||
entry $ bindMatch "reflog:index:path" $ nil_ $ const $ lift do
|
entry $ bindMatch "reflog:index:path" $ nil_ $ const $ lift do
|
||||||
indexPath >>= liftIO . print . pretty
|
indexPath >>= liftIO . print . pretty
|
||||||
|
|
|
@ -106,7 +106,6 @@ class HasExportOpts m where
|
||||||
getCompressionLevel :: m Int
|
getCompressionLevel :: m Int
|
||||||
setCompressionLevel :: Int -> m ()
|
setCompressionLevel :: Int -> m ()
|
||||||
|
|
||||||
|
|
||||||
class HasGitRemoteKey m where
|
class HasGitRemoteKey m where
|
||||||
getGitRemoteKey :: m (Maybe GitRemoteKey)
|
getGitRemoteKey :: m (Maybe GitRemoteKey)
|
||||||
setGitRemoteKey :: GitRemoteKey -> m ()
|
setGitRemoteKey :: GitRemoteKey -> m ()
|
||||||
|
|
|
@ -13,6 +13,8 @@ import Data.ByteString.Lazy qualified as LBS
|
||||||
import Data.List qualified as L
|
import Data.List qualified as L
|
||||||
import Network.ByteOrder qualified as N
|
import Network.ByteOrder qualified as N
|
||||||
import System.IO.Temp as Temp
|
import System.IO.Temp as Temp
|
||||||
|
import Data.Heap (Entry(..))
|
||||||
|
import Data.Heap qualified as Heap
|
||||||
import Data.ByteString.Lazy qualified as LBS
|
import Data.ByteString.Lazy qualified as LBS
|
||||||
import Data.Fixed
|
import Data.Fixed
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
|
@ -38,6 +40,7 @@ import Codec.Serialise
|
||||||
import Streaming.Prelude qualified as S
|
import Streaming.Prelude qualified as S
|
||||||
import Streaming hiding (run,chunksOf)
|
import Streaming hiding (run,chunksOf)
|
||||||
import System.TimeIt
|
import System.TimeIt
|
||||||
|
import Lens.Micro.Platform
|
||||||
|
|
||||||
import UnliftIO
|
import UnliftIO
|
||||||
import UnliftIO.IO.File qualified as UIO
|
import UnliftIO.IO.File qualified as UIO
|
||||||
|
@ -85,6 +88,65 @@ data Index a =
|
||||||
Index { entries :: [IndexEntry]
|
Index { entries :: [IndexEntry]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
mergeSortedFilesN :: forall m . MonadUnliftIO m
|
||||||
|
=> (N.ByteString -> N.ByteString) -- ^ Функция извлечения ключа
|
||||||
|
-> [FilePath] -- ^ Входные файлы
|
||||||
|
-> FilePath -- ^ Выходной файл
|
||||||
|
-> m ()
|
||||||
|
|
||||||
|
mergeSortedFilesN _ [] out = rm out
|
||||||
|
|
||||||
|
mergeSortedFilesN _ [_] out = rm out
|
||||||
|
|
||||||
|
mergeSortedFilesN getKey inputFiles outFile = do
|
||||||
|
|
||||||
|
mmaped <- for inputFiles $ \fn -> do
|
||||||
|
liftIO (mmapFileByteString fn Nothing)
|
||||||
|
|
||||||
|
liftIO $ UIO.withBinaryFileAtomic outFile WriteMode $ \hOut -> do
|
||||||
|
flip fix (mmaped, Heap.empty) $ \next (mmf, win) -> do
|
||||||
|
let (entries, files) = fmap readEntry mmf & unzip
|
||||||
|
let values = [ Entry (getKey e) e | e <- catMaybes entries ]
|
||||||
|
let e' = (win <> Heap.fromList values) & Heap.uncons
|
||||||
|
maybe1 e' none $ \(Entry _ e, newWin) -> do
|
||||||
|
liftIO $ writeSection (LBS.fromStrict e) (LBS.hPutStr hOut)
|
||||||
|
next (catMaybes files, newWin)
|
||||||
|
|
||||||
|
mapM_ rm inputFiles
|
||||||
|
|
||||||
|
where
|
||||||
|
readEntry :: BS.ByteString -> (Maybe BS.ByteString, Maybe BS.ByteString)
|
||||||
|
|
||||||
|
readEntry src | BS.length src < 4 = (mzero, mzero)
|
||||||
|
|
||||||
|
readEntry src = do
|
||||||
|
let (size, rest) = BS.splitAt 4 src & over _1 ( fromIntegral . N.word32 )
|
||||||
|
let (e, rest2) = BS.splitAt size rest
|
||||||
|
|
||||||
|
if BS.length e < size then
|
||||||
|
(mzero, mzero)
|
||||||
|
else
|
||||||
|
(Just e, Just rest2)
|
||||||
|
|
||||||
|
compactIndex :: forall m . (Git3Perks m, MonadReader Git3Env m) => Natural -> m ()
|
||||||
|
compactIndex maxSize = do
|
||||||
|
reflog <- getGitRemoteKey >>= orThrowUser "reflog not set"
|
||||||
|
idxPath <- getStatePath (AsBase58 reflog) <&> (</> "index")
|
||||||
|
mkdir idxPath
|
||||||
|
files <- listObjectIndexFiles <&> L.sortOn snd
|
||||||
|
|
||||||
|
let blocks = fix (\next (acc, group, remaining) ->
|
||||||
|
case remaining of
|
||||||
|
[] -> [reverse group | not (null group)]
|
||||||
|
((file, size) : rest)
|
||||||
|
| acc + size > maxSize -> reverse group : next (size, [(file, size)], rest)
|
||||||
|
| otherwise -> next (acc + size, (file, size) : group, rest))
|
||||||
|
|
||||||
|
forM_ (blocks (0, [], files)) $ \block -> do
|
||||||
|
out <- liftIO $ emptyTempFile idxPath "objects-.idx"
|
||||||
|
mergeSortedFilesN (BS.take 20) (map fst block) out
|
||||||
|
|
||||||
openIndex :: forall a m . (Git3Perks m, MonadReader Git3Env m)
|
openIndex :: forall a m . (Git3Perks m, MonadReader Git3Env m)
|
||||||
=> m (Index a)
|
=> m (Index a)
|
||||||
|
|
||||||
|
@ -294,3 +356,5 @@ updateReflogIndex = do
|
||||||
-- notice $ pretty sha1 <+> pretty tx
|
-- notice $ pretty sha1 <+> pretty tx
|
||||||
writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)
|
writeSection ( LBS.fromChunks [key,value] ) (LBS.hPutStr wh)
|
||||||
|
|
||||||
|
lift $ compactIndex ( 32 * 1024 * 1024 )
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue