This commit is contained in:
voidlizard 2024-12-23 07:45:06 +03:00
parent 0fcbfcc635
commit 050603f82b
1 changed files with 81 additions and 237 deletions

View File

@ -1242,6 +1242,17 @@ instance Exception ReadLogError
instance ReadLogOpts ()
type NumBytes = Int
class Monad m => BytesReader m where
noBytesLeft :: m Bool
readBytes :: NumBytes -> m ByteString
readBytesMaybe :: NumBytes -> m (Maybe ByteString)
readBytesMaybe n = do
bs <- readBytes n
if LBS.length bs == fromIntegral n then pure (Just bs) else pure Nothing
newtype ConsumeLBS m a = ConsumeLBS { fromConsumeLBS :: StateT ByteString m a }
deriving newtype ( Applicative
, Functor
@ -1261,6 +1272,13 @@ readChunkThrow n = do
put $! that
pure this
readChunkSimple :: Monad m => Int -> ConsumeLBS m ByteString
readChunkSimple n = do
lbs <- get
let (this, that) = LBS.splitAt (fromIntegral n) lbs
put $! that
pure this
reminds :: Monad m => ConsumeLBS m Int
reminds = gets (fromIntegral . LBS.length)
@ -1270,6 +1288,10 @@ consumed = gets LBS.null
runConsumeLBS :: Monad m => ByteString -> ConsumeLBS m a -> m a
runConsumeLBS s m = evalStateT (fromConsumeLBS m) s
instance Monad m => BytesReader (ConsumeLBS m) where
readBytes n = readChunkSimple n
noBytesLeft = consumed
readLogFileLBS :: forall opts m . ( MonadIO m, ReadLogOpts opts )
=> opts
-> ByteString
@ -1277,12 +1299,20 @@ readLogFileLBS :: forall opts m . ( MonadIO m, ReadLogOpts opts )
-> m Int
readLogFileLBS _ lbs action = runConsumeLBS lbs $ flip fix 0 \go n -> do
done <- consumed
done <- noBytesLeft
if done then pure n
else do
ssize <- readChunkThrow 4 <&> fromIntegral . N.word32 . LBS.toStrict
hash <- readChunkThrow 20 <&> GitHash . LBS.toStrict
sdata <- readChunkThrow ( ssize - 20 )
ssize <- readBytesMaybe 4
>>= orThrow SomeReadLogError
<&> fromIntegral . N.word32 . LBS.toStrict
hash <- readBytesMaybe 20
>>= orThrow SomeReadLogError
<&> GitHash . LBS.toStrict
sdata <- readBytesMaybe ( ssize - 20 )
>>= orThrow SomeReadLogError
void $ lift $ action hash (fromIntegral ssize) sdata
go (succ n)
@ -1775,6 +1805,12 @@ theDict = do
for_ (HS.fromList r) $ \x -> do
liftIO $ print x
entry $ bindMatch "test:git:zstd:train" $ nil_ $ \case
[ StringLike fn ] -> do
file <- liftIO $ mmapFileByteString fn Nothing
pure ()
_ -> throwIO (BadFormException @C nil)
entry $ bindMatch "test:git:read-log-file" $ nil_ $ \syn -> lift do
let (_, argz) = splitOpts [] syn
@ -1811,8 +1847,14 @@ theDict = do
done <- consumed
if done then pure ()
else do
ssize <- readChunkThrow 4 <&> fromIntegral . N.word32 . LBS.toStrict
hash <- readChunkThrow 20 <&> GitHash . LBS.toStrict
ssize <- readBytesMaybe 4
>>= orThrow SomeReadLogError
<&> fromIntegral . N.word32 . LBS.toStrict
hash <- readBytesMaybe 20
>>= orThrow SomeReadLogError
<&> GitHash . LBS.toStrict
liftIO $ print $ pretty hash <+> pretty ssize
go (succ n)
@ -1825,8 +1867,14 @@ theDict = do
done <- consumed
if done then pure ()
else do
ssize <- readChunkThrow 4 <&> fromIntegral . N.word32 . LBS.toStrict
hash <- readChunkThrow 20 <&> GitHash . LBS.toStrict
ssize <- readBytesMaybe 4
>>= orThrow SomeReadLogError
<&> fromIntegral . N.word32 . LBS.toStrict
hash <- readBytesMaybe 20
>>= orThrow SomeReadLogError
<&> GitHash . LBS.toStrict
lift $ S.yield hash
go (succ n)
@ -1834,7 +1882,7 @@ theDict = do
for_ hashes $ \h -> do
-- found <- binSearchBS 24 (BS.take 20 . BS.drop 4) ( show . pretty . GitHash ) (coerce h) file
found <- liftIO $ binSearchWTF 24 (BS.take 20 . BS.drop 4) (coerce h) file
found <- liftIO $ binarySearchBS 24 (BS.take 20 . BS.drop 4) (coerce h) file
liftIO $ print $ pretty h <+> pretty (isJust found)
_ -> throwIO (BadFormException @C nil)
@ -1852,7 +1900,7 @@ theDict = do
& orThrowUser "no index specified"
file <- liftIO $ mmapFileByteString idxName Nothing
r <- liftIO $ binSearchWTF 24 (BS.take 20 . BS.drop 4) (coerce hash) file
r <- liftIO $ binarySearchBS 24 (BS.take 20 . BS.drop 4) (coerce hash) file
liftIO $ print $ pretty r
@ -1865,13 +1913,19 @@ theDict = do
done <- consumed
if done then pure ()
else do
ssize <- readChunkThrow 4 <&> fromIntegral . N.word32 . LBS.toStrict
hash <- readChunkThrow 20 <&> GitHash . LBS.toStrict
ssize <- readBytesMaybe 4
>>= orThrow SomeReadLogError
<&> fromIntegral . N.word32 . LBS.toStrict
hash <- readBytesMaybe 20
>>= orThrow SomeReadLogError
<&> GitHash . LBS.toStrict
lift $ S.yield hash
go (succ n)
for_ hashes $ \h ->do
found <- linearSearch h lbs
found <- linearSearchLBS h lbs
liftIO $ print $ pretty h <+> pretty (isJust found)
_ -> throwIO (BadFormException @C nil)
@ -1887,7 +1941,7 @@ theDict = do
done <- consumed
if done then pure ()
else do
shit <- LBS.toStrict <$> readChunkThrow 24
shit <- LBS.toStrict <$> (readBytesMaybe 24 >>= orThrow SomeReadLogError)
lift $ S.yield shit
go (succ n)
@ -1902,48 +1956,12 @@ theDict = do
_ -> throwIO (BadFormException @C nil)
-- let m = HS.fromList (fmap (coerce @_ @BS.ByteString) hashes)
-- for_ hashes $ \h -> do
-- let found = HS.member (coerce h) m
-- liftIO $ print $ pretty h <+> pretty found
entry $ bindMatch "test:git:log:index:naive:search:map:test" $ nil_ $ \case
[ StringLike fn ] -> do
lbs <- liftIO $ LBS.readFile fn
hashes <- S.toList_ $ runConsumeLBS lbs $ flip fix 0 \go n -> do
done <- consumed
if done then pure ()
else do
ssize <- readChunkThrow 4 <&> fromIntegral . N.word32 . LBS.toStrict
hash <- readChunkThrow 20 <&> GitHash . LBS.toStrict
lift $ S.yield hash
go (succ n)
let m = HS.fromList (fmap (coerce @_ @BS.ByteString) hashes)
for_ hashes $ \h -> do
let found = HS.member (coerce h) m
liftIO $ print $ pretty h <+> pretty found
-- for_ hashes $ \h ->do
-- found <- linearSearch h lbs
-- liftIO $ print $ pretty h <+> pretty (isJust found)
_ -> throwIO (BadFormException @C nil)
entry $ bindMatch "test:git:log:index:naive:search:linear" $ nil_ $ \case
[ StringLike ha, StringLike fn ] -> lift do
hash <- fromStringMay @GitHash ha & orThrowUser "not a git hash"
lbs <- liftIO $ LBS.readFile fn
found <- linearSearch hash lbs
found <- linearSearchLBS hash lbs
liftIO $ print $ pretty found
_ -> throwIO (BadFormException @C nil)
@ -1969,26 +1987,6 @@ theDict = do
_ -> throwIO (BadFormException @C nil)
-- let (_, argz) = splitOpts [] syn
-- let fnames = [ x | StringLike x <- argz]
-- s <- randomIO @Word16
-- liftIO $ withBinaryFile (show ("index" <> pretty s <> ".idx")) AppendMode $ \fh -> do
-- for_ fnames $ \f -> do
-- theLog <- liftIO $ LBS.readFile f
-- all <- S.toList_ $ void $ readLogFileLBS () theLog $ \h s lbs -> do
-- S.yield (coerce @_ @BS.ByteString h)
-- debug $ "object" <+> pretty h
-- let sorted = L.sort all
-- for_ sorted $ \ghs -> do
-- let ks = BS.length ghs
-- let entrySize = N.bytestring32 (fromIntegral ks)
-- BS.hPutStr fh entrySize
-- BS.hPutStr fh ghs
entry $ bindMatch "test:git:log:index:entry" $ nil_ $ \case
[LitIntVal i, StringLike fn] -> lift do
@ -2098,14 +2096,19 @@ theDict = do
BS.hPutStr ofile kbs
LBS.hPutStr ofile lbs
linearSearch hash lbs = do
linearSearchLBS hash lbs = do
found <- S.toList_ $ runConsumeLBS lbs $ flip fix 0 \go n -> do
done <- consumed
if done then pure ()
else do
ssize <- readChunkThrow 4 <&> fromIntegral . N.word32 . LBS.toStrict
hash1 <- readChunkThrow 20 <&> LBS.toStrict
ssize <- readBytesMaybe 4
>>= orThrow SomeReadLogError
<&> fromIntegral . N.word32 . LBS.toStrict
hash1 <- readBytesMaybe 20
>>= orThrow SomeReadLogError
<&> LBS.toStrict
case (compare hash1 (coerce hash)) of
EQ -> lift $ S.yield n
@ -2113,53 +2116,15 @@ linearSearch hash lbs = do
pure $ listToMaybe found
-- binarySearch :: BS.ByteString -> Int -> BS.ByteString -> Maybe Int
-- binarySearch file recordSize targetKey = go 0 (BS.length file `div` recordSize - 1)
-- where
-- go lo hi
-- | lo > hi = do
-- -- putStrLn $ "Key not found: " ++ BSC.unpack targetKey
-- pure Nothing
-- | otherwise = do
-- let mid = (lo + hi) `div` 2
-- let offset = mid * recordSize
-- let record = BS.take recordSize (BS.drop offset file)
-- let key = getKey record
-- -- putStrLn $ "lo: " ++ show lo ++ ", hi: " ++ show hi ++ ", mid: " ++ show mid ++ ", offset: " ++ show offset
-- -- putStrLn $ "Key at offset: " ++ BSC.unpack key ++ ", Target key: " ++ BSC.unpack targetKey
-- case compare key targetKey of
-- EQ -> do
-- -- putStrLn $ "Key found at offset: " ++ show offset
-- pure (Just offset)
-- LT -> go (mid + 1) hi
-- GT -> go lo (mid - 1)
-- getKey :: BS.ByteString -> BS.ByteString
-- getKey bs = BS.take 20 $ BS.drop 4 bs
-- binarySearchByBounds :: (PrimMonad m, MVector v e)
-- => Comparison e -> v (PrimState m) e -> e -> Int -> Int -> m Int
-- binarySearchByBounds cmp vec e = loop
-- where
-- loop !l !u
-- | u <= l = return l
-- | otherwise = do e' <- unsafeRead vec k
-- case cmp e' e of
-- LT -> loop (k+1) u
-- EQ -> return k
-- GT -> loop l k
-- where k = midPoint u l
-- {-# INLINE binarySearchByBounds #-}
binSearchWTF :: MonadIO m
=> Int -- ^ record size
binarySearchBS :: MonadIO m
=> Int -- ^ record size
-> ( BS.ByteString -> BS.ByteString ) -- ^ key extractor
-> BS.ByteString -- ^ key
-> BS.ByteString -- ^ key
-> BS.ByteString -- ^ source
-> m (Maybe Int)
binSearchWTF rs getKey s source = do
binarySearchBS rs getKey s source = do
let maxn = BS.length source `div` rs
loop 0 maxn
where
@ -2173,127 +2138,6 @@ binSearchWTF rs getKey s source = do
where k = (l + u) `div` 2
binSearchBS :: MonadIO m
=> Int
-> ( BS.ByteString -> BS.ByteString )
-> ( BS.ByteString -> String )
-> BS.ByteString
-> BS.ByteString
-> m (Maybe Int)
binSearchBS rlen getKey nice s source = do
flip fix (0,source) $ \next (i,bs) -> do
let num = BS.length bs `quot` rlen
let n = BS.length bs `mod` rlen
when (n /= 0 ) $ error "FUCKED!"
if | num == 0 -> pure Nothing
| num == 1 && getKey bs == s -> pure (Just i)
| num == 1 -> pure Nothing
| otherwise -> do
let iMid = (num `div` 2)
let iOff = iMid * rlen
let (sa,sb) = BS.splitAt iOff bs
let el = getKey sb
if | s < el -> next (i, sa)
| s > el -> next (i, sb)
| otherwise -> pure $ Just i
-- case compare s el of
-- EQ -> pure (Just iOff)
-- LT -> next (i, sa)
-- GT -> next (i+iMid, BS.drop rlen sb)
-- binarySearchIO :: BS.ByteString -> Int -> BS.ByteString -> IO (Maybe Int)
-- binarySearchIO file recordSize targetKey = go 0 (BS.length file `div` recordSize - 1)
-- where
-- go lo hi
-- | lo > hi = do
-- print $ "Key not found: " <+> pretty (GitHash targetKey)
-- pure Nothing
-- | lo == hi = do
-- let offset = lo * recordSize
-- let record = BS.take recordSize (BS.drop offset file)
-- let key = getKey record
-- print $ "Final check at offset: " <+> pretty offset
-- print $ "Key at offset: " <+> pretty (GitHash key) <+> "Target key: " <+> pretty (GitHash targetKey)
-- if key == targetKey
-- then do
-- print $ "Key found at offset: " <+> pretty offset
-- pure (Just lo)
-- else do
-- print $ "Key not found in final check: " <+> pretty (GitHash targetKey)
-- pure Nothing
-- | otherwise = do
-- let mid = (lo + hi) `div` 2
-- let offset = mid * recordSize
-- let record = BS.take recordSize (BS.drop offset file)
-- let key = getKey record
-- print $ "lo: " <+> pretty lo <+> ", hi: " <+> pretty hi <+> ", mid: " <+> pretty mid <+> ", offset: " <+> pretty offset
-- print $ "Key at offset: " <+> pretty (GitHash key) <+> "Target key: " <+> pretty (GitHash targetKey)
-- case compare key targetKey of
-- EQ -> do
-- print $ "Key found at mid: " <+> pretty mid <+> ", offset: " <+> pretty offset
-- pure (Just mid)
-- LT -> do
-- print $ "Moving right"
-- go (mid + 1) hi
-- GT -> do
-- print $ "Moving left"
-- go lo (mid - 1)
-- getKey :: BS.ByteString -> BS.ByteString
-- getKey bs = BS.take 20 $ BS.drop 4 bs
binarySearchIO :: BS.ByteString -> Int -> BS.ByteString -> IO (Maybe Int)
binarySearchIO file recordSize targetKey = go 0 (BS.length file `div` recordSize - 1)
where
go lo hi
| lo > hi = do
pure Nothing
| otherwise = do
let mid = (lo + hi) `div` 2
let offset = mid * recordSize
let record = BS.take recordSize (BS.drop offset file)
let key = getKey record
case compare key targetKey of
EQ -> do
pure (Just mid)
LT -> do
go (mid + 1) hi
GT -> do
go lo (mid - 1)
getKey :: BS.ByteString -> BS.ByteString
getKey bs = BS.take 20 $ BS.drop 4 bs
-- binarySearch :: BS.ByteString -> Int -> BS.ByteString -> Maybe Int
-- binarySearch file recordSize targetKey = go 0 (BS.length file `div` recordSize - 1)
-- where
-- go lo hi
-- | lo > hi = Nothing
-- | otherwise =
-- let mid = (lo + hi) `div` 2
-- offset = mid * recordSize
-- record = BS.take recordSize (BS.drop offset file)
-- key = BS.take 20 $ BS.drop 4 record
-- in case compare key targetKey of
-- EQ -> Just mid
-- LT -> go (mid + 1) hi
-- GT -> go lo (mid - 1)
-- debugPrefix :: LoggerEntry -> LoggerEntry
debugPrefix = toStderr . logPrefix "[debug] "