From 050603f82bf3c156fcaba6c295e72b76c0b1fb42 Mon Sep 17 00:00:00 2001 From: voidlizard Date: Mon, 23 Dec 2024 07:45:06 +0300 Subject: [PATCH] wip --- hbs2-git3/app/Main.hs | 318 +++++++++++------------------------------- 1 file changed, 81 insertions(+), 237 deletions(-) diff --git a/hbs2-git3/app/Main.hs b/hbs2-git3/app/Main.hs index 9f26a8ee..3baebe24 100644 --- a/hbs2-git3/app/Main.hs +++ b/hbs2-git3/app/Main.hs @@ -1242,6 +1242,17 @@ instance Exception ReadLogError instance ReadLogOpts () +type NumBytes = Int + +class Monad m => BytesReader m where + noBytesLeft :: m Bool + readBytes :: NumBytes -> m ByteString + + readBytesMaybe :: NumBytes -> m (Maybe ByteString) + readBytesMaybe n = do + bs <- readBytes n + if LBS.length bs == fromIntegral n then pure (Just bs) else pure Nothing + newtype ConsumeLBS m a = ConsumeLBS { fromConsumeLBS :: StateT ByteString m a } deriving newtype ( Applicative , Functor @@ -1261,6 +1272,13 @@ readChunkThrow n = do put $! that pure this +readChunkSimple :: Monad m => Int -> ConsumeLBS m ByteString +readChunkSimple n = do + lbs <- get + let (this, that) = LBS.splitAt (fromIntegral n) lbs + put $! that + pure this + reminds :: Monad m => ConsumeLBS m Int reminds = gets (fromIntegral . LBS.length) @@ -1270,6 +1288,10 @@ consumed = gets LBS.null runConsumeLBS :: Monad m => ByteString -> ConsumeLBS m a -> m a runConsumeLBS s m = evalStateT (fromConsumeLBS m) s +instance Monad m => BytesReader (ConsumeLBS m) where + readBytes n = readChunkSimple n + noBytesLeft = consumed + readLogFileLBS :: forall opts m . ( MonadIO m, ReadLogOpts opts ) => opts -> ByteString @@ -1277,12 +1299,20 @@ readLogFileLBS :: forall opts m . ( MonadIO m, ReadLogOpts opts ) -> m Int readLogFileLBS _ lbs action = runConsumeLBS lbs $ flip fix 0 \go n -> do - done <- consumed + done <- noBytesLeft if done then pure n else do - ssize <- readChunkThrow 4 <&> fromIntegral . N.word32 . LBS.toStrict - hash <- readChunkThrow 20 <&> GitHash . LBS.toStrict - sdata <- readChunkThrow ( ssize - 20 ) + ssize <- readBytesMaybe 4 + >>= orThrow SomeReadLogError + <&> fromIntegral . N.word32 . LBS.toStrict + + hash <- readBytesMaybe 20 + >>= orThrow SomeReadLogError + <&> GitHash . LBS.toStrict + + sdata <- readBytesMaybe ( ssize - 20 ) + >>= orThrow SomeReadLogError + void $ lift $ action hash (fromIntegral ssize) sdata go (succ n) @@ -1775,6 +1805,12 @@ theDict = do for_ (HS.fromList r) $ \x -> do liftIO $ print x + entry $ bindMatch "test:git:zstd:train" $ nil_ $ \case + [ StringLike fn ] -> do + file <- liftIO $ mmapFileByteString fn Nothing + pure () + + _ -> throwIO (BadFormException @C nil) entry $ bindMatch "test:git:read-log-file" $ nil_ $ \syn -> lift do let (_, argz) = splitOpts [] syn @@ -1811,8 +1847,14 @@ theDict = do done <- consumed if done then pure () else do - ssize <- readChunkThrow 4 <&> fromIntegral . N.word32 . LBS.toStrict - hash <- readChunkThrow 20 <&> GitHash . LBS.toStrict + ssize <- readBytesMaybe 4 + >>= orThrow SomeReadLogError + <&> fromIntegral . N.word32 . LBS.toStrict + + hash <- readBytesMaybe 20 + >>= orThrow SomeReadLogError + <&> GitHash . LBS.toStrict + liftIO $ print $ pretty hash <+> pretty ssize go (succ n) @@ -1825,8 +1867,14 @@ theDict = do done <- consumed if done then pure () else do - ssize <- readChunkThrow 4 <&> fromIntegral . N.word32 . LBS.toStrict - hash <- readChunkThrow 20 <&> GitHash . LBS.toStrict + ssize <- readBytesMaybe 4 + >>= orThrow SomeReadLogError + <&> fromIntegral . N.word32 . LBS.toStrict + + hash <- readBytesMaybe 20 + >>= orThrow SomeReadLogError + <&> GitHash . LBS.toStrict + lift $ S.yield hash go (succ n) @@ -1834,7 +1882,7 @@ theDict = do for_ hashes $ \h -> do -- found <- binSearchBS 24 (BS.take 20 . BS.drop 4) ( show . pretty . GitHash ) (coerce h) file - found <- liftIO $ binSearchWTF 24 (BS.take 20 . BS.drop 4) (coerce h) file + found <- liftIO $ binarySearchBS 24 (BS.take 20 . BS.drop 4) (coerce h) file liftIO $ print $ pretty h <+> pretty (isJust found) _ -> throwIO (BadFormException @C nil) @@ -1852,7 +1900,7 @@ theDict = do & orThrowUser "no index specified" file <- liftIO $ mmapFileByteString idxName Nothing - r <- liftIO $ binSearchWTF 24 (BS.take 20 . BS.drop 4) (coerce hash) file + r <- liftIO $ binarySearchBS 24 (BS.take 20 . BS.drop 4) (coerce hash) file liftIO $ print $ pretty r @@ -1865,13 +1913,19 @@ theDict = do done <- consumed if done then pure () else do - ssize <- readChunkThrow 4 <&> fromIntegral . N.word32 . LBS.toStrict - hash <- readChunkThrow 20 <&> GitHash . LBS.toStrict + ssize <- readBytesMaybe 4 + >>= orThrow SomeReadLogError + <&> fromIntegral . N.word32 . LBS.toStrict + + hash <- readBytesMaybe 20 + >>= orThrow SomeReadLogError + <&> GitHash . LBS.toStrict + lift $ S.yield hash go (succ n) for_ hashes $ \h ->do - found <- linearSearch h lbs + found <- linearSearchLBS h lbs liftIO $ print $ pretty h <+> pretty (isJust found) _ -> throwIO (BadFormException @C nil) @@ -1887,7 +1941,7 @@ theDict = do done <- consumed if done then pure () else do - shit <- LBS.toStrict <$> readChunkThrow 24 + shit <- LBS.toStrict <$> (readBytesMaybe 24 >>= orThrow SomeReadLogError) lift $ S.yield shit go (succ n) @@ -1902,48 +1956,12 @@ theDict = do _ -> throwIO (BadFormException @C nil) - -- let m = HS.fromList (fmap (coerce @_ @BS.ByteString) hashes) - - - -- for_ hashes $ \h -> do - -- let found = HS.member (coerce h) m - -- liftIO $ print $ pretty h <+> pretty found - - - entry $ bindMatch "test:git:log:index:naive:search:map:test" $ nil_ $ \case - [ StringLike fn ] -> do - - lbs <- liftIO $ LBS.readFile fn - - hashes <- S.toList_ $ runConsumeLBS lbs $ flip fix 0 \go n -> do - done <- consumed - if done then pure () - else do - ssize <- readChunkThrow 4 <&> fromIntegral . N.word32 . LBS.toStrict - hash <- readChunkThrow 20 <&> GitHash . LBS.toStrict - lift $ S.yield hash - go (succ n) - - let m = HS.fromList (fmap (coerce @_ @BS.ByteString) hashes) - - - for_ hashes $ \h -> do - let found = HS.member (coerce h) m - liftIO $ print $ pretty h <+> pretty found - --- for_ hashes $ \h ->do --- found <- linearSearch h lbs --- liftIO $ print $ pretty h <+> pretty (isJust found) - - _ -> throwIO (BadFormException @C nil) - - entry $ bindMatch "test:git:log:index:naive:search:linear" $ nil_ $ \case [ StringLike ha, StringLike fn ] -> lift do hash <- fromStringMay @GitHash ha & orThrowUser "not a git hash" lbs <- liftIO $ LBS.readFile fn - found <- linearSearch hash lbs + found <- linearSearchLBS hash lbs liftIO $ print $ pretty found _ -> throwIO (BadFormException @C nil) @@ -1969,26 +1987,6 @@ theDict = do _ -> throwIO (BadFormException @C nil) - -- let (_, argz) = splitOpts [] syn - -- let fnames = [ x | StringLike x <- argz] - - -- s <- randomIO @Word16 - -- liftIO $ withBinaryFile (show ("index" <> pretty s <> ".idx")) AppendMode $ \fh -> do - - -- for_ fnames $ \f -> do - -- theLog <- liftIO $ LBS.readFile f - - -- all <- S.toList_ $ void $ readLogFileLBS () theLog $ \h s lbs -> do - -- S.yield (coerce @_ @BS.ByteString h) - -- debug $ "object" <+> pretty h - - -- let sorted = L.sort all - - -- for_ sorted $ \ghs -> do - -- let ks = BS.length ghs - -- let entrySize = N.bytestring32 (fromIntegral ks) - -- BS.hPutStr fh entrySize - -- BS.hPutStr fh ghs entry $ bindMatch "test:git:log:index:entry" $ nil_ $ \case [LitIntVal i, StringLike fn] -> lift do @@ -2098,14 +2096,19 @@ theDict = do BS.hPutStr ofile kbs LBS.hPutStr ofile lbs -linearSearch hash lbs = do +linearSearchLBS hash lbs = do found <- S.toList_ $ runConsumeLBS lbs $ flip fix 0 \go n -> do done <- consumed if done then pure () else do - ssize <- readChunkThrow 4 <&> fromIntegral . N.word32 . LBS.toStrict - hash1 <- readChunkThrow 20 <&> LBS.toStrict + ssize <- readBytesMaybe 4 + >>= orThrow SomeReadLogError + <&> fromIntegral . N.word32 . LBS.toStrict + + hash1 <- readBytesMaybe 20 + >>= orThrow SomeReadLogError + <&> LBS.toStrict case (compare hash1 (coerce hash)) of EQ -> lift $ S.yield n @@ -2113,53 +2116,15 @@ linearSearch hash lbs = do pure $ listToMaybe found --- binarySearch :: BS.ByteString -> Int -> BS.ByteString -> Maybe Int --- binarySearch file recordSize targetKey = go 0 (BS.length file `div` recordSize - 1) --- where --- go lo hi --- | lo > hi = do --- -- putStrLn $ "Key not found: " ++ BSC.unpack targetKey --- pure Nothing --- | otherwise = do --- let mid = (lo + hi) `div` 2 --- let offset = mid * recordSize --- let record = BS.take recordSize (BS.drop offset file) --- let key = getKey record --- -- putStrLn $ "lo: " ++ show lo ++ ", hi: " ++ show hi ++ ", mid: " ++ show mid ++ ", offset: " ++ show offset --- -- putStrLn $ "Key at offset: " ++ BSC.unpack key ++ ", Target key: " ++ BSC.unpack targetKey --- case compare key targetKey of --- EQ -> do --- -- putStrLn $ "Key found at offset: " ++ show offset --- pure (Just offset) --- LT -> go (mid + 1) hi --- GT -> go lo (mid - 1) --- getKey :: BS.ByteString -> BS.ByteString --- getKey bs = BS.take 20 $ BS.drop 4 bs - --- binarySearchByBounds :: (PrimMonad m, MVector v e) --- => Comparison e -> v (PrimState m) e -> e -> Int -> Int -> m Int --- binarySearchByBounds cmp vec e = loop --- where --- loop !l !u --- | u <= l = return l --- | otherwise = do e' <- unsafeRead vec k --- case cmp e' e of --- LT -> loop (k+1) u --- EQ -> return k --- GT -> loop l k --- where k = midPoint u l --- {-# INLINE binarySearchByBounds #-} - - -binSearchWTF :: MonadIO m - => Int -- ^ record size +binarySearchBS :: MonadIO m + => Int -- ^ record size -> ( BS.ByteString -> BS.ByteString ) -- ^ key extractor - -> BS.ByteString -- ^ key + -> BS.ByteString -- ^ key -> BS.ByteString -- ^ source -> m (Maybe Int) -binSearchWTF rs getKey s source = do +binarySearchBS rs getKey s source = do let maxn = BS.length source `div` rs loop 0 maxn where @@ -2173,127 +2138,6 @@ binSearchWTF rs getKey s source = do where k = (l + u) `div` 2 -binSearchBS :: MonadIO m - => Int - -> ( BS.ByteString -> BS.ByteString ) - -> ( BS.ByteString -> String ) - -> BS.ByteString - -> BS.ByteString - -> m (Maybe Int) - -binSearchBS rlen getKey nice s source = do - - flip fix (0,source) $ \next (i,bs) -> do - - let num = BS.length bs `quot` rlen - - let n = BS.length bs `mod` rlen - - when (n /= 0 ) $ error "FUCKED!" - - if | num == 0 -> pure Nothing - | num == 1 && getKey bs == s -> pure (Just i) - | num == 1 -> pure Nothing - | otherwise -> do - - let iMid = (num `div` 2) - let iOff = iMid * rlen - - let (sa,sb) = BS.splitAt iOff bs - - let el = getKey sb - - if | s < el -> next (i, sa) - | s > el -> next (i, sb) - | otherwise -> pure $ Just i - - -- case compare s el of - -- EQ -> pure (Just iOff) - -- LT -> next (i, sa) - -- GT -> next (i+iMid, BS.drop rlen sb) - --- binarySearchIO :: BS.ByteString -> Int -> BS.ByteString -> IO (Maybe Int) --- binarySearchIO file recordSize targetKey = go 0 (BS.length file `div` recordSize - 1) --- where --- go lo hi --- | lo > hi = do --- print $ "Key not found: " <+> pretty (GitHash targetKey) --- pure Nothing --- | lo == hi = do --- let offset = lo * recordSize --- let record = BS.take recordSize (BS.drop offset file) --- let key = getKey record --- print $ "Final check at offset: " <+> pretty offset --- print $ "Key at offset: " <+> pretty (GitHash key) <+> "Target key: " <+> pretty (GitHash targetKey) --- if key == targetKey --- then do --- print $ "Key found at offset: " <+> pretty offset --- pure (Just lo) --- else do --- print $ "Key not found in final check: " <+> pretty (GitHash targetKey) --- pure Nothing --- | otherwise = do --- let mid = (lo + hi) `div` 2 --- let offset = mid * recordSize --- let record = BS.take recordSize (BS.drop offset file) --- let key = getKey record - --- print $ "lo: " <+> pretty lo <+> ", hi: " <+> pretty hi <+> ", mid: " <+> pretty mid <+> ", offset: " <+> pretty offset --- print $ "Key at offset: " <+> pretty (GitHash key) <+> "Target key: " <+> pretty (GitHash targetKey) - --- case compare key targetKey of --- EQ -> do --- print $ "Key found at mid: " <+> pretty mid <+> ", offset: " <+> pretty offset --- pure (Just mid) --- LT -> do --- print $ "Moving right" --- go (mid + 1) hi --- GT -> do --- print $ "Moving left" --- go lo (mid - 1) - --- getKey :: BS.ByteString -> BS.ByteString --- getKey bs = BS.take 20 $ BS.drop 4 bs - - -binarySearchIO :: BS.ByteString -> Int -> BS.ByteString -> IO (Maybe Int) -binarySearchIO file recordSize targetKey = go 0 (BS.length file `div` recordSize - 1) - where - go lo hi - | lo > hi = do - pure Nothing - | otherwise = do - let mid = (lo + hi) `div` 2 - let offset = mid * recordSize - let record = BS.take recordSize (BS.drop offset file) - let key = getKey record - case compare key targetKey of - EQ -> do - pure (Just mid) - LT -> do - go (mid + 1) hi - GT -> do - go lo (mid - 1) - - getKey :: BS.ByteString -> BS.ByteString - getKey bs = BS.take 20 $ BS.drop 4 bs - - --- binarySearch :: BS.ByteString -> Int -> BS.ByteString -> Maybe Int --- binarySearch file recordSize targetKey = go 0 (BS.length file `div` recordSize - 1) --- where --- go lo hi --- | lo > hi = Nothing --- | otherwise = --- let mid = (lo + hi) `div` 2 --- offset = mid * recordSize --- record = BS.take recordSize (BS.drop offset file) --- key = BS.take 20 $ BS.drop 4 record --- in case compare key targetKey of --- EQ -> Just mid --- LT -> go (mid + 1) hi --- GT -> go lo (mid - 1) - -- debugPrefix :: LoggerEntry -> LoggerEntry debugPrefix = toStderr . logPrefix "[debug] "