wip, index => 20 bytes of payload (key:8 offset:8 size:4) + some bf6 functions

This commit is contained in:
voidlizard 2025-07-31 07:20:03 +03:00
parent e45c507f80
commit 63ff57f54b
2 changed files with 88 additions and 14 deletions

View File

@ -17,9 +17,23 @@ import System.IO.Temp as Temp
import Streaming.Prelude qualified as S import Streaming.Prelude qualified as S
data IndexEntry = IndexEntry {-# UNPACK #-} !FileKey !Word64 !Word32 -- we need size in order to return block size faster
-- w/o search in fossil
data IndexEntry = IndexEntry {-# UNPACK #-} !FileKey !NCQOffset !NCQSize
deriving stock (Eq,Show) deriving stock (Eq,Show)
ncqIndexEntryPadding :: Int
ncqIndexEntryPadding = 0
ncqIndexPayloadSize :: Int
ncqIndexPayloadSize = fileKey + fileOffset + blockSize + padding
where
fileKey = 8
fileOffset = 8
blockSize = 4
padding = ncqIndexEntryPadding
{-# INLINE ncqIndexPayloadSize #-}
unpackIndexEntry :: ByteString -> IndexEntry unpackIndexEntry :: ByteString -> IndexEntry
unpackIndexEntry entryBs = do unpackIndexEntry entryBs = do
let (fks,rest1) = BS.splitAt 4 entryBs let (fks,rest1) = BS.splitAt 4 entryBs
@ -31,12 +45,21 @@ unpackIndexEntry entryBs = do
IndexEntry fk off size IndexEntry fk off size
{-# INLINE unpackIndexEntry #-} {-# INLINE unpackIndexEntry #-}
packIndexEntryPayload :: IndexEntry -> ByteString
packIndexEntryPayload (IndexEntry fk offset blockSize) = do
let fks = N.bytestring64 (coerce fk)
let rs = (blockSize + ncqSLen) & fromIntegral @_ @Word32 & N.bytestring32
let os = fromIntegral @_ @Word64 offset & N.bytestring64
let padding = BS.replicate ncqIndexEntryPadding 0
let record = fks <> os <> rs <> padding
record
emptyKey :: ByteString emptyKey :: ByteString
emptyKey = BS.replicate 32 0 emptyKey = BS.replicate 32 0
-- FIXME: better-hashtable-params -- FIXME: better-hashtable-params
ncqIndexAlloc :: NWayHashAlloc ncqIndexAlloc :: NWayHashAlloc
ncqIndexAlloc = nwayAllocDef 1.15 32 8 32 ncqIndexAlloc = nwayAllocDef 1.15 32 8 ncqIndexPayloadSize
ncqLookupIndex :: MonadUnliftIO m ncqLookupIndex :: MonadUnliftIO m
=> HashRef => HashRef
@ -76,14 +99,8 @@ ncqIndexFile n fk = runMaybeT do
ncqStorageScanDataFile n fp $ \offset w key s -> case ncqIsMeta s of ncqStorageScanDataFile n fp $ \offset w key s -> case ncqIsMeta s of
Just M -> none Just M -> none
_ -> do _ -> do
-- we need size in order to return block size faster let entry = IndexEntry (coerce fk) (fromIntegral offset) (fromIntegral w)
-- w/o search in fossil let record = packIndexEntryPayload entry
let fks = N.bytestring64 (coerce fk)
let rs = (w + ncqSLen) & fromIntegral @_ @Word32 & N.bytestring32
let os = fromIntegral @_ @Word64 offset & N.bytestring64
let padding = BS.replicate 12 0
let record = fks <> os <> rs <> padding
-- debug $ "WRITE INDEX ENTRY" <+> pretty (BS.length record)
S.yield (coerce key, record) S.yield (coerce key, record)
let (dir,name) = splitFileName fp let (dir,name) = splitFileName fp

View File

@ -72,6 +72,7 @@ import System.IO.Temp qualified as Temp
import System.Exit qualified as Exit import System.Exit qualified as Exit
import System.Random as R import System.Random as R
import System.Random.Shuffle (shuffleM) import System.Random.Shuffle (shuffleM)
import System.FilePattern
import Text.InterpolatedString.Perl6 (qc) import Text.InterpolatedString.Perl6 (qc)
import Lens.Micro.Platform import Lens.Micro.Platform
import UnliftIO import UnliftIO
@ -1084,6 +1085,8 @@ internalEntries = do
entry $ bindValue "chr:cr" (mkStr "\r") entry $ bindValue "chr:cr" (mkStr "\r")
entry $ bindValue "chr:tab" (mkStr "\t") entry $ bindValue "chr:tab" (mkStr "\t")
entry $ bindValue "chr:space" (mkStr " ") entry $ bindValue "chr:space" (mkStr " ")
entry $ bindValue "chr:dot" (mkStr ".")
entry $ bindValue "dot" (mkStr ".")
entry $ bindAlias "local" "define" entry $ bindAlias "local" "define"
@ -1468,15 +1471,14 @@ internalEntries = do
entry $ bindMatch "floor" $ \case entry $ bindMatch "floor" $ \case
[LitScientificVal x] -> [LitScientificVal x] ->
pure $ mkDouble (realToFrac $ floor x) pure $ mkInt (floor x)
_ -> throwIO (BadFormException @c nil) _ -> throwIO (BadFormException @c nil)
entry $ bindMatch "ceiling" $ \case entry $ bindMatch "ceiling" $ \case
[LitScientificVal x] -> [LitScientificVal x] ->
pure $ mkDouble (realToFrac $ ceiling x) pure $ mkInt (ceiling x)
_ -> throwIO (BadFormException @c nil) _ -> throwIO (BadFormException @c nil)
entry $ bindMatch "fixed" $ \case entry $ bindMatch "fixed" $ \case
[LitIntVal 1, LitScientificVal x] -> do [LitIntVal 1, LitScientificVal x] -> do
@ -2216,6 +2218,13 @@ internalEntries = do
_ -> pure nil _ -> pure nil
brief "get file size"
$ args [ arg "list" "filename" ]
$ returns "file-size" "double"
$ entry $ bindMatch "file:size" $ \case
[ StringLike p ] -> mkInt <$> fileSize p
_ -> throwIO $ BadFormException @c nil
entry $ bindMatch "path:split" $ \case entry $ bindMatch "path:split" $ \case
[ StringLike p ] -> pure $ mkList (fmap mkStr (splitPath p)) [ StringLike p ] -> pure $ mkList (fmap mkStr (splitPath p))
_ -> throwIO $ BadFormException @c nil _ -> throwIO $ BadFormException @c nil
@ -2257,9 +2266,16 @@ internalEntries = do
_ -> throwIO $ BadFormException @c nil _ -> throwIO $ BadFormException @c nil
entry $ bindMatch "dir:list:files" $ \case entry $ bindMatch "dir:list:files" $ \case
[ StringLike p, StringLike pat ] -> lift do
dirFiles p <&> mkList . fmap mkSym . filter ( (pat ?==) . takeFileName )
[ StringLike p ] -> lift do [ StringLike p ] -> lift do
dirFiles p <&> mkList . fmap mkSym dirFiles p <&> mkList . fmap mkSym
_ -> throwIO $ BadFormException @c nil
[] -> dirFiles "." <&> mkList . fmap mkSym
e -> throwIO $ BadFormException @c (mkList e)
entry $ bindMatch "dir:list:all" $ \case entry $ bindMatch "dir:list:all" $ \case
[ StringLike p ] -> lift do [ StringLike p ] -> lift do
@ -2298,6 +2314,24 @@ internalEntries = do
_ -> mkDouble <$> randomIO _ -> mkDouble <$> randomIO
brief "average of numeric list"
$ args [ arg "list" "list" ]
$ returns "double" "double"
$ entry $ bindMatch "stat:avg" \case
[ ListVal es ] -> pure $ safeAvg es
es -> pure $ safeAvg es
entry $ bindAlias "avg" "stat:avg"
brief "median of numeric list"
$ args [ arg "list" "list" ]
$ returns "double" "double"
$ entry $ bindMatch "stat:median" \case
[ ListVal es ] -> pure $ safeMedian es
es -> pure $ safeMedian es
entry $ bindAlias "median" "stat:median"
entry $ bindMatch "random:shuffle" $ \input -> do entry $ bindMatch "random:shuffle" $ \input -> do
case arglistOrList input of case arglistOrList input of
[] -> pure $ mkList [] [] -> pure $ mkList []
@ -2399,6 +2433,29 @@ internalEntries = do
safeAvg :: forall c . IsContext c => [Syntax c] -> Syntax c
safeAvg [] = mkDouble 0.0
safeAvg es = mkDouble $ sum (map asDouble es) / fromIntegral (List.length es)
safeMedian :: forall c . IsContext c => [Syntax c] -> Syntax c
safeMedian [] = mkDouble 0.0
safeMedian esSorted =
let sorted = List.sort (map asDouble esSorted)
n = length sorted
in mkDouble $
if odd n
then sorted !! (n `div` 2)
else let i = n `div` 2
in (sorted !! (i - 1) + sorted !! i) / 2
asDouble :: forall c . IsContext c => Syntax c -> Double
asDouble = \case
LitIntVal n -> realToFrac n
LitScientificVal n -> realToFrac n
LitBoolVal False -> 0.0
LitBoolVal True -> 1.0
_ -> 0.0
arglistOrList :: forall c . IsContext c => [Syntax c] -> [Syntax c] arglistOrList :: forall c . IsContext c => [Syntax c] -> [Syntax c]
arglistOrList = \case arglistOrList = \case
[ ListVal xs ] -> xs [ ListVal xs ] -> xs