From 5e8dd6cd46850db56538fb8790e0e5fec8d87558 Mon Sep 17 00:00:00 2001 From: Andrei Borzenkov Date: Fri, 4 Oct 2024 20:27:17 +0400 Subject: [PATCH] Move bytestring-mmap inside repo and add compatiblity layer --- flake.lock | 17 --- flake.nix | 18 +-- miscellaneous/bytestring-mmap/LICENSE | 27 ++++ miscellaneous/bytestring-mmap/README.md | 2 + miscellaneous/bytestring-mmap/Setup.lhs | 3 + .../bytestring-mmap/System/IO/Posix/MMap.hs | 114 +++++++++++++++ .../System/IO/Posix/MMap/Internal.hs | 54 ++++++++ .../System/IO/Posix/MMap/Lazy.hs | 130 ++++++++++++++++++ .../bytestring-mmap/System/Posix/IO/Compat.hs | 12 ++ .../bytestring-mmap/bytestring-mmap.cabal | 42 ++++++ .../cbits/hs_bytestring_mmap.c | 22 +++ .../include/hs_bytestring_mmap.h | 15 ++ .../bytestring-mmap/tests/big-lazy.hs | 22 +++ miscellaneous/bytestring-mmap/tests/big.hs | 12 ++ miscellaneous/bytestring-mmap/tests/cp.hs | 37 +++++ .../bytestring-mmap/tests/fast-cp.hs | 29 ++++ miscellaneous/bytestring-mmap/tests/files.hs | 46 +++++++ .../bytestring-mmap/tests/pressure.hs | 32 +++++ miscellaneous/bytestring-mmap/tests/small.hs | 15 ++ miscellaneous/bytestring-mmap/tests/test | 19 +++ 20 files changed, 638 insertions(+), 30 deletions(-) create mode 100644 miscellaneous/bytestring-mmap/LICENSE create mode 100644 miscellaneous/bytestring-mmap/README.md create mode 100644 miscellaneous/bytestring-mmap/Setup.lhs create mode 100644 miscellaneous/bytestring-mmap/System/IO/Posix/MMap.hs create mode 100644 miscellaneous/bytestring-mmap/System/IO/Posix/MMap/Internal.hs create mode 100644 miscellaneous/bytestring-mmap/System/IO/Posix/MMap/Lazy.hs create mode 100644 miscellaneous/bytestring-mmap/System/Posix/IO/Compat.hs create mode 100644 miscellaneous/bytestring-mmap/bytestring-mmap.cabal create mode 100644 miscellaneous/bytestring-mmap/cbits/hs_bytestring_mmap.c create mode 100644 miscellaneous/bytestring-mmap/include/hs_bytestring_mmap.h create mode 100644 miscellaneous/bytestring-mmap/tests/big-lazy.hs create mode 100644 miscellaneous/bytestring-mmap/tests/big.hs create mode 100644 miscellaneous/bytestring-mmap/tests/cp.hs create mode 100644 miscellaneous/bytestring-mmap/tests/fast-cp.hs create mode 100644 miscellaneous/bytestring-mmap/tests/files.hs create mode 100644 miscellaneous/bytestring-mmap/tests/pressure.hs create mode 100644 miscellaneous/bytestring-mmap/tests/small.hs create mode 100644 miscellaneous/bytestring-mmap/tests/test diff --git a/flake.lock b/flake.lock index 1a85dd8e..cc29d32a 100644 --- a/flake.lock +++ b/flake.lock @@ -1,21 +1,5 @@ { "nodes": { - "bytestring-mmap": { - "flake": false, - "locked": { - "lastModified": 1727193872, - "narHash": "sha256-L39kMCMry/BNJngt0+yvSIMnJJzWR9ZoyXbEyniEfwU=", - "owner": "ivanovs-4", - "repo": "bytestring-mmap", - "rev": "f43e5e06718ed904487f17e7725c12098773c12f", - "type": "github" - }, - "original": { - "owner": "ivanovs-4", - "repo": "bytestring-mmap", - "type": "github" - } - }, "db-pipe": { "inputs": { "haskell-flake-utils": [ @@ -294,7 +278,6 @@ }, "root": { "inputs": { - "bytestring-mmap": "bytestring-mmap", "db-pipe": "db-pipe", "flake-utils": "flake-utils", "fuzzy": "fuzzy", diff --git a/flake.nix b/flake.nix index 40cd1bba..2f379910 100644 --- a/flake.nix +++ b/flake.nix @@ -38,11 +38,6 @@ inputs = { flake = false; }; - bytestring-mmap = { - url = "github:ivanovs-4/bytestring-mmap"; - flake = false; - }; - }; outputs = { self, nixpkgs, flake-utils, ... }@inputs: @@ -80,8 +75,7 @@ outputs = { self, nixpkgs, flake-utils, ... }@inputs: defaultOverlay = final: prev: (prev.lib.composeManyExtensions - [ - overlay + [ overlay inputs.suckless-conf.overlays.default inputs.db-pipe.overlays.default ]) final prev; @@ -116,8 +110,8 @@ outputs = { self, nixpkgs, flake-utils, ... }@inputs: haskellPackages = pkgs.haskellPackages.override { overrides = new: old: with pkgs.haskell.lib; { - scotty = new.callHackage "scotty" "0.21" { }; - bytestring-mmap = jailbreakUnbreak old.bytestring-mmap; # old.callCabal2nix "bytestring-mmap" inputs.bytestring-mmap {}; + scotty = new.callHackage "scotty" "0.21" {}; + bytestring-mmap = old.callCabal2nix "bytestring-mmap" "${self}/miscellaneous/bytestring-mmap" {}; skylighting-lucid = new.callHackage "skylighting-lucid" "1.0.4" { }; wai-app-file-cgi = dontCoverage (dontCheck (jailbreakUnbreak old.wai-app-file-cgi)); saltine = old.callCabal2nix "saltine" inputs.saltine { inherit (pkgs) libsodium; }; @@ -134,13 +128,11 @@ outputs = { self, nixpkgs, flake-utils, ... }@inputs: (_name: packagePostOverrides) # we can't apply overrides inside our overlay because it will remove linking info (pkgs.lib.getAttrs packageNames pkgs.haskellPackages); - # dynamic packages don't work at the moment, because - # ivanovs-4/bytestring-mmap doesn't compile with ghc 9.4 - # and bytestring-mmap doesn't compire with ghc > 9.6 packagesDynamic = makePackages pkgs; packagesStatic = makePackages pkgs.pkgsStatic; in { legacyPackages = pkgs; + overlays.default = defaultOverlay; packages = packagesDynamic // @@ -157,7 +149,7 @@ outputs = { self, nixpkgs, flake-utils, ... }@inputs: }; }; - devShell.default = pkgs.haskellPackages.shellFor { + devShells.default = pkgs.haskellPackages.shellFor { packages = _: pkgs.lib.attrsets.attrVals packageNames pkgs.haskellPackages; # withHoogle = true; buildInputs = ( diff --git a/miscellaneous/bytestring-mmap/LICENSE b/miscellaneous/bytestring-mmap/LICENSE new file mode 100644 index 00000000..ff029854 --- /dev/null +++ b/miscellaneous/bytestring-mmap/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) Don Stewart + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. diff --git a/miscellaneous/bytestring-mmap/README.md b/miscellaneous/bytestring-mmap/README.md new file mode 100644 index 00000000..b3bf4cf7 --- /dev/null +++ b/miscellaneous/bytestring-mmap/README.md @@ -0,0 +1,2 @@ +Fork of https://hackage.haskell.org/package/bytestring-mmap since original repo +is not available at http://code.haskell.org/~dons/code/bytestring-mmap/ diff --git a/miscellaneous/bytestring-mmap/Setup.lhs b/miscellaneous/bytestring-mmap/Setup.lhs new file mode 100644 index 00000000..5bde0de9 --- /dev/null +++ b/miscellaneous/bytestring-mmap/Setup.lhs @@ -0,0 +1,3 @@ +#!/usr/bin/env runhaskell +> import Distribution.Simple +> main = defaultMain diff --git a/miscellaneous/bytestring-mmap/System/IO/Posix/MMap.hs b/miscellaneous/bytestring-mmap/System/IO/Posix/MMap.hs new file mode 100644 index 00000000..3ed20851 --- /dev/null +++ b/miscellaneous/bytestring-mmap/System/IO/Posix/MMap.hs @@ -0,0 +1,114 @@ +{-# LANGUAGE ForeignFunctionInterface #-} +-------------------------------------------------------------------- +-- | +-- Module : System.IO.Posix.MMap +-- Copyright : (c) Galois, Inc. 2007 +-- License : BSD3 +-- +-- Maintainer: Don Stewart +-- Stability : provisional +-- Portability: non-portable -- posix only +-- +-- mmap a file or device into memory as a strict ByteString. +-- +module System.IO.Posix.MMap ( + + -- $mmap_intro + -- $mmap_unmap + + -- * Memory mapped files + unsafeMMapFile -- :: FilePath -> IO ByteString + +-- $mmap_intro +-- +-- 'unsafeMMapFile' mmaps a file or device into memory as a strict +-- 'ByteString'. The file is not actually copied strictly into memory, +-- but instead pages from the file will be loaded into the address +-- space on demand. +-- +-- We can consider mmap as lazy IO pushed into the virtual memory +-- subsystem. +-- +-- The file is mapped using MAP_SHARED: modifications to the file +-- will be immediately shared with any other process accessing the +-- file. This has no effect from the Haskell point of view, since +-- ByteStrings are treated as immutable values. +-- +-- However, if the file is written to by any other process on the +-- system while it is in use in Haskell, those changes will be +-- immediately reflected on the Haskell side, destroying referential +-- transparency. +-- +-- It is only safe to mmap a file if you know you are the sole user. +-- +-- For more details about mmap, and its consequences, see: +-- +-- * +-- +-- * +-- + +-- $mmap_unmap +-- +-- When the entire file is out of scope, the Haskell storage manager +-- will call munmap to free the file, using a finaliser. Until then, as +-- much of the file as you access will be allocated. +-- +-- Note that the Haskell storage manager doesn't know how large a +-- resource is associated with an mmapped file. If you allocate many +-- such files, the garbage collector will only see the 'ForeignPtr's +-- that have been allocated, not the corresponding ByteArrays. The +-- result will be that the GC runs less often that you hoped, as it +-- looks like only a few bytes have been allocated on the Haskell heap. +-- +-- Use of 'performGC' or 'finalizeForeignPtr' when you know that +-- the object is going out of scope can ensure that resources are +-- released appropriately. +-- + + ) where + +import System.IO.Posix.MMap.Internal + +-- import System.IO +-- import qualified System.IO as IO +import Foreign.Ptr + +import Control.Exception +import Data.ByteString + +import System.Posix hiding (openFd) +import System.Posix.IO.Compat (openFd) + +-- | The 'unsafeMMapFile' function maps a file or device into memory, +-- returning a strict 'ByteString' that accesses the mapped file. +-- If the mmap fails for some reason, an error is thrown. +-- +-- Memory mapped files will behave as if they were read lazily -- +-- pages from the file will be loaded into memory on demand. +-- +-- The storage manager is used to free the mapped memory. When +-- the garbage collector notices there are no further references to the +-- mapped memory, a call to munmap is made. It is not necessary to do +-- this yourself. In tight memory situations, it may be profitable to +-- use 'performGC' or 'finalizeForeignPtr' to force an unmap. +-- +-- Note: this operation may break referential transparency! If +-- any other process on the system changes the file when it is mapped +-- into Haskell, the contents of your 'ByteString' will change. +-- +unsafeMMapFile :: FilePath -> IO ByteString +unsafeMMapFile f = do + fd <- openFd f ReadOnly defaultFileFlags + always (closeFd fd) $ do + stat <- getFdStatus fd + let size = fromIntegral (fileSize stat) + if size <= 0 + then return empty -- BSD mmap won't accept a length of zero + else do + ptr <- c_mmap size (fromIntegral fd) + if ptr == nullPtr + then error "System.IO.Posix.MMap.mmapFile: unable to mmap file" + else unsafePackMMapPtr ptr size + + where always = flip finally diff --git a/miscellaneous/bytestring-mmap/System/IO/Posix/MMap/Internal.hs b/miscellaneous/bytestring-mmap/System/IO/Posix/MMap/Internal.hs new file mode 100644 index 00000000..81ab08e1 --- /dev/null +++ b/miscellaneous/bytestring-mmap/System/IO/Posix/MMap/Internal.hs @@ -0,0 +1,54 @@ +{-# LANGUAGE ForeignFunctionInterface #-} +-------------------------------------------------------------------- +-- | +-- Module : System.IO.Posix.MMap.Internal +-- Copyright : (c) Galois, Inc. 2007 +-- License : BSD3 +-- +-- Maintainer: Don Stewart +-- Stability : provisional +-- Portability: non-portable -- posix only +-- +-- Low level mmap access. +-- +module System.IO.Posix.MMap.Internal ( + + -- * Converting an mmapped pointer to a 'ByteString' + unsafePackMMapPtr, -- :: Ptr Word8 -> CSize -> IO ByteString + + -- * Low level bindings + c_mmap, -- :: CSize -> CInt -> IO (Ptr Word8) + c_munmap -- :: Ptr Word8 -> CSize -> IO CInt + + ) where + +import System.IO +import qualified System.IO as IO +import Foreign.C.Types +import Foreign.Ptr +import qualified Foreign.Concurrent as FC + +import Control.Monad +import Data.Word +import Data.ByteString.Internal +-- import Data.ByteString + +-- | Create a bytestring from a memory mapped Ptr. +-- A finalizer will be associated with the resource, that will call +-- munmap when the storage manager detects that the resource is no longer +-- in use. +unsafePackMMapPtr :: Ptr Word8 -> CSize -> IO ByteString +unsafePackMMapPtr p s = do + fp <- FC.newForeignPtr p $ do + v <- c_munmap p s + when (v == -1) $ IO.hPutStrLn stderr $ + "System.IO.Posix.MMap: warning, failed to unmap " + ++ show s ++" bytes at "++show p + return (fromForeignPtr fp 0 (fromIntegral s)) +{-# INLINE unsafePackMMapPtr #-} + +foreign import ccall unsafe "hs_bytestring_mmap.h hs_bytestring_mmap" + c_mmap :: CSize -> CInt -> IO (Ptr Word8) + +foreign import ccall unsafe "hs_bytestring_mmap.h munmap" + c_munmap :: Ptr Word8 -> CSize -> IO CInt diff --git a/miscellaneous/bytestring-mmap/System/IO/Posix/MMap/Lazy.hs b/miscellaneous/bytestring-mmap/System/IO/Posix/MMap/Lazy.hs new file mode 100644 index 00000000..140419ed --- /dev/null +++ b/miscellaneous/bytestring-mmap/System/IO/Posix/MMap/Lazy.hs @@ -0,0 +1,130 @@ +{-# LANGUAGE CPP, BangPatterns, ForeignFunctionInterface #-} +-------------------------------------------------------------------- +-- | +-- Module : System.IO.Posix.MMap +-- Copyright : (c) Galois, Inc. 2007 +-- License : BSD3 +-- +-- Maintainer: Don Stewart +-- Stability : provisional +-- Portability: non-portable -- posix only +-- +-- Lazy, chunk-wise memory mapping. +-- +-- Memory map a file as a lazy ByteString. Finalisers are associated +-- cached-sized portions of the file, which will be deallocated as +-- those chunks go out of scope. +-- +-- Unlike strict Bytestrings, mmapFile for Lazy ByteStrings will +-- deallocate chunks of the file. +-- +-- The storage manager is used to free chunks of the mapped memory. When +-- the garbage collector notices there are no further references to +-- a chunk, a call to munmap is made. +-- +-- In effect, the file is mmapped once, lazily, then covered with finalizers +-- for each chunk. When any chunk goes out of scope, that part is +-- deallocated. We must allocate the spine of the structure strictly +-- though, to ensure finalizers are registered for the entire file. +-- +-- The Haskell garbage collector decides when to run based on heap +-- pressure, however the mmap stores memory outside the Haskell heap, +-- so those resources are not counted when deciding to run the garbage +-- collect. The result is that finalizers run less often than you might +-- expect, and it is possible to write a lazy bytestring mmap program +-- that never deallocates (and thus doesn't run in constant space). +-- 'performGC' or 'finalizerForeignPtr' can be used to trigger collection +-- at sensible points. +-- +-- Note: this operation may break referential transparency! If +-- any other process on the system changes the file when it is mapped +-- into Haskell, the contents of your 'ByteString' will change. +-- +module System.IO.Posix.MMap.Lazy ( + + unsafeMMapFile -- :: FilePath -> IO ByteString + + ) where + +import System.IO.Posix.MMap.Internal + +-- import System.IO +import Foreign.C.Types +import Foreign.Ptr +-- import Control.Monad + +import Control.Exception +import Data.Word +import Data.ByteString.Lazy.Internal + +import System.Posix hiding (openFd) +import System.Posix.IO.Compat (openFd) + +-- +-- | The 'unsafeMMapFile' function maps a file or device into memory as +-- a lazy ByteString, made of 64*pagesize unmappable chunks of bytes. +-- +-- Memory mapped files will behave as if they were read lazily -- +-- pages from the file will be loaded into memory on demand. +-- +-- The storage manager is used to free chunks that go out of scope, +-- and unlike strict bytestrings, memory mapped lazy ByteStrings will +-- be deallocated in chunks (so you can write traversals that run in +-- constant space). +-- +-- However, the size of the mmapped resource is not known by the Haskell +-- GC, it appears only as a small ForeignPtr. This means that the +-- Haskell GC may not not run as often as you'd like, leading to delays +-- in unmapping chunks. +-- +-- Appropriate use of performGC or finalizerForeignPtr may be required +-- to ensure deallocation, as resources allocated by mmap are not +-- tracked by the Haskell garbage collector. +-- +-- For example, when writing out a lazy bytestring allocated with mmap, +-- you may wish to finalizeForeignPtr when each chunk is written, as the +-- chunk goes out of scope, rather than relying on the garbage collector +-- to notice the chunk has gone. +-- +-- This operation is unsafe: if the file is written to by any other +-- process on the system, the 'ByteString' contents will change in +-- Haskell. +-- +unsafeMMapFile :: FilePath -> IO ByteString +unsafeMMapFile path = do + fd <- openFd path ReadOnly defaultFileFlags + always (closeFd fd) $ do + stat <- getFdStatus fd + let size = fromIntegral (fileSize stat) + ptr <- c_mmap size (fromIntegral fd) + if ptr == nullPtr + then error "System.IO.Posix.MMap.Lazy: unable to mmap file!" + else chunks chunk_size ptr (fromIntegral size) + where + always = flip finally + + -- must be page aligned. + chunk_size = 64 * fromIntegral pagesize -- empircally derived + +-- +-- Break the file up into chunks. + -- Have separate munmap finalizers for each chunk. +-- +chunks :: CSize -> Ptr Word8 -> CSize -> IO ByteString +chunks chunk_size p bytes = loop p bytes +#ifndef __HADDOCK__ + where + loop !ptr !rest + | rest <= 0 = return Empty + | otherwise = let s = min chunk_size rest + ptr' = ptr `plusPtr` fromIntegral s + rest' = rest - s + in do c <- unsafePackMMapPtr ptr s + cs <- loop ptr' rest' -- need to be strict + return (chunk c cs) -- to ensure we cover the whole file + -- with finalizers +#endif + +foreign import ccall unsafe "unistd.h getpagesize" + pagesize :: CInt + diff --git a/miscellaneous/bytestring-mmap/System/Posix/IO/Compat.hs b/miscellaneous/bytestring-mmap/System/Posix/IO/Compat.hs new file mode 100644 index 00000000..dc083a2a --- /dev/null +++ b/miscellaneous/bytestring-mmap/System/Posix/IO/Compat.hs @@ -0,0 +1,12 @@ +{-# LANGUAGE CPP #-} +module System.Posix.IO.Compat where + +import qualified System.Posix as Unix + + +openFd :: FilePath -> Unix.OpenMode -> Unix.OpenFileFlags -> IO Unix.Fd +#if MIN_VERSION_unix(2,8,0) +openFd = Unix.openFd +#else +openFd file openMode = Unix.openFd file openMode Nothing +#endif diff --git a/miscellaneous/bytestring-mmap/bytestring-mmap.cabal b/miscellaneous/bytestring-mmap/bytestring-mmap.cabal new file mode 100644 index 00000000..75732455 --- /dev/null +++ b/miscellaneous/bytestring-mmap/bytestring-mmap.cabal @@ -0,0 +1,42 @@ +name: bytestring-mmap +version: 0.2.3 +synopsis: mmap support for strict ByteStrings +description: + . + This library provides a wrapper to mmap(2), allowing files or + devices to be lazily loaded into memory as strict or lazy + ByteStrings, using the virtual memory subsystem to do on-demand + loading. + . +category: System +homepage: http://code.haskell.org/~dons/code/bytestring-mmap/ +license: BSD3 +license-file: LICENSE +author: Don Stewart +maintainer: Don Stewart +build-type: Simple +cabal-version: >= 1.2 + +flag split-base + description: Choose the new smaller, split-up base package. + +library + build-depends: unix + if flag(split-base) + build-depends: base >= 3 && < 6, bytestring >= 0.9 + else + build-depends: base < 3 + extensions: CPP, ForeignFunctionInterface, BangPatterns + + exposed-modules: System.IO.Posix.MMap + System.IO.Posix.MMap.Lazy + System.IO.Posix.MMap.Internal + + other-modules: System.Posix.IO.Compat + + ghc-options: -Wall -O2 + + c-sources: cbits/hs_bytestring_mmap.c + include-dirs: include + includes: hs_bytestring_mmap.h + install-includes: hs_bytestring_mmap.h diff --git a/miscellaneous/bytestring-mmap/cbits/hs_bytestring_mmap.c b/miscellaneous/bytestring-mmap/cbits/hs_bytestring_mmap.c new file mode 100644 index 00000000..0ec1e778 --- /dev/null +++ b/miscellaneous/bytestring-mmap/cbits/hs_bytestring_mmap.c @@ -0,0 +1,22 @@ +/* + * hs_bytestring_mmap.c + * + * License : BSD3 + * + * Copyright (C) 2003 David Roundy + * 2005-7 Don Stewart + * + * Maintainer: Don Stewart + */ +#include "hs_bytestring_mmap.h" + +/* + * mmap len bytes from fd into memory, read only. + */ +unsigned char *hs_bytestring_mmap(size_t len, int fd) { + void *result = mmap(0, len, PROT_READ, MAP_SHARED, fd, 0); + if (result == MAP_FAILED) + return (unsigned char *)0; + else + return (unsigned char *)result; +} diff --git a/miscellaneous/bytestring-mmap/include/hs_bytestring_mmap.h b/miscellaneous/bytestring-mmap/include/hs_bytestring_mmap.h new file mode 100644 index 00000000..7e45dcd7 --- /dev/null +++ b/miscellaneous/bytestring-mmap/include/hs_bytestring_mmap.h @@ -0,0 +1,15 @@ +/* + * hs_bytestring_mmap.h + * + * License : BSD3 + * + * Copyright (C) 2003 David Roundy + * 2005-7 Don Stewart + * + * Maintainer: Don Stewart + */ + +#include +#include + +unsigned char *hs_bytestring_mmap(size_t len, int fd); diff --git a/miscellaneous/bytestring-mmap/tests/big-lazy.hs b/miscellaneous/bytestring-mmap/tests/big-lazy.hs new file mode 100644 index 00000000..c9187796 --- /dev/null +++ b/miscellaneous/bytestring-mmap/tests/big-lazy.hs @@ -0,0 +1,22 @@ +import qualified Data.ByteString.Lazy as L +import System.IO.Posix.MMap.Lazy +import Control.Monad +import Text.Printf +import System.Mem + +main = do + s <- unsafeMMapFile "/usr/obj/data/1G" + go 0 s + where + go n s + | L.null s = return () + | otherwise + = do -- printf "%d\n" + L.head s `seq` return () + when (n `mod` 1000 == 0) $ do + performGC -- tune this value for when to run the GC + go (n+1) (L.drop 4096 s) + + +-- forM_ [0, (1024) .. L.length s-1] $ \n -> do + diff --git a/miscellaneous/bytestring-mmap/tests/big.hs b/miscellaneous/bytestring-mmap/tests/big.hs new file mode 100644 index 00000000..bed78e84 --- /dev/null +++ b/miscellaneous/bytestring-mmap/tests/big.hs @@ -0,0 +1,12 @@ +import qualified Data.ByteString as S +import System.IO.Posix.MMap +import Control.Monad +import Text.Printf + +main = do + s <- unsafeMMapFile "/usr/obj/data/1G" + print "This program should touch only 1 page per 100k" + + forM_ [0, (1024) .. S.length s-1] $ \n -> do + printf "n=%d := %d\n" n (S.index s n) + diff --git a/miscellaneous/bytestring-mmap/tests/cp.hs b/miscellaneous/bytestring-mmap/tests/cp.hs new file mode 100644 index 00000000..5427b5f9 --- /dev/null +++ b/miscellaneous/bytestring-mmap/tests/cp.hs @@ -0,0 +1,37 @@ +-- A non-copying cp based on mmap. + +import System.IO.Posix.MMap +import qualified Data.ByteString as S + +import Text.Printf +import Control.Exception +import System.CPUTime +import System.Cmd +import System.Directory + +import System.Environment + +time :: IO t -> IO t +time a = do + start <- getCPUTime + v <- a + v `seq` return () + end <- getCPUTime + let diff = (fromIntegral (end - start)) / (10^12) + printf "Computation time: %0.3f sec\n" (diff :: Double) + return v + +main = do + [f] <- getArgs + + putStrLn "mmap copy" + time $ S.writeFile "file-1" =<< unsafeMMapFile f + putChar '\n' + + putStrLn "lazy copy" + time $ S.writeFile "file-2" =<< S.readFile f + putChar '\n' + + system $ "diff " ++ "file-1 " ++ "file-2" + removeFile "file-1" + removeFile "file-2" diff --git a/miscellaneous/bytestring-mmap/tests/fast-cp.hs b/miscellaneous/bytestring-mmap/tests/fast-cp.hs new file mode 100644 index 00000000..d7f266ca --- /dev/null +++ b/miscellaneous/bytestring-mmap/tests/fast-cp.hs @@ -0,0 +1,29 @@ +import qualified System.IO.Posix.MMap.Lazy as L +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString.Lazy.Internal as L +import qualified Data.ByteString.Internal as S +import qualified Data.ByteString as S + +import Foreign.ForeignPtr +import System.Environment +import System.IO +import Control.Exception + +main = do + [f,g] <- getArgs + writeFile' g =<< L.unsafeMMapFile f + +-- +-- An implementation of writeFile for bytestrings that +-- that finalises chunks as they go out the door. +-- +writeFile' :: FilePath -> L.ByteString -> IO () +writeFile' f txt = bracket (openBinaryFile f WriteMode) hClose (\hdl -> hPut hdl txt) + +hPut :: Handle -> L.ByteString -> IO () +hPut h cs = L.foldrChunks (\chunk rest -> do S.hPut h chunk + unmap chunk + rest) + (return ()) cs + + where unmap c = finalizeForeignPtr fp where (fp,_,_) = S.toForeignPtr c diff --git a/miscellaneous/bytestring-mmap/tests/files.hs b/miscellaneous/bytestring-mmap/tests/files.hs new file mode 100644 index 00000000..d480caeb --- /dev/null +++ b/miscellaneous/bytestring-mmap/tests/files.hs @@ -0,0 +1,46 @@ +import qualified Data.ByteString as S +import qualified Data.ByteString.Lazy as L +import System.IO.Posix.MMap +import qualified System.IO.Posix.MMap.Lazy as LM + +import System.Directory +import System.Posix.Files +import System.IO +import System.FilePath +import Control.Monad +import Control.Applicative +import Text.Printf +import System.Cmd +import System.Exit +import System.Mem +import Control.Exception + +main = do + print "Testing Lazy.mmap == Strict.mmap == Strict.ByteString.readFile" + system "find /home/dons/ghc/ -type f > files_to_read" + always (removeFile "files_to_read") $ do + fs <- lines <$> readFile "files_to_read" + + {- + ss <- getDirectoryContents dir + fs <- filterM (\f -> do st <- getFileStatus (dir f) + return (not $ isDirectory st)) ss + -} + + printf "Comparing %d files\n" (length fs) + forM_ (zip [1..] fs) $ \(i,f) -> do + t <- eq f + if t + then when (i `mod` 1000 == 0) $ putStr "Ok. " >> hFlush stdout + else exitWith (ExitFailure 1) + + print "All good." + + where + always = flip finally + +eq f = do + m <- unsafeMMapFile f + lm <- LM.unsafeMMapFile f + s <- S.readFile f + return (m == s && L.fromChunks [m] == lm) diff --git a/miscellaneous/bytestring-mmap/tests/pressure.hs b/miscellaneous/bytestring-mmap/tests/pressure.hs new file mode 100644 index 00000000..6dd09621 --- /dev/null +++ b/miscellaneous/bytestring-mmap/tests/pressure.hs @@ -0,0 +1,32 @@ +-- A non-copying cp based on mmap. + +import System.IO.Posix.MMap +import Control.Monad +import System.Mem +import qualified Data.ByteString as S +import Text.Printf +import Control.Exception +import System.CPUTime + +main = do + + --should run in constant space, and be faster: + time $ forM_ [0..1000] $ \_ -> do + unsafeMMapFile "/usr/share/dict/words" + + putStrLn "\nShould be faster than:\n" + + --should run in constant space: + time $ forM_ [0..1000] $ \_ -> do + S.readFile "/usr/share/dict/words" + + +time :: IO t -> IO t +time a = do + start <- getCPUTime + v <- a + v `seq` return () + end <- getCPUTime + let diff = (fromIntegral (end - start)) / (10^12) + printf "Computation time: %0.3f sec\n" (diff :: Double) + return v diff --git a/miscellaneous/bytestring-mmap/tests/small.hs b/miscellaneous/bytestring-mmap/tests/small.hs new file mode 100644 index 00000000..812540fe --- /dev/null +++ b/miscellaneous/bytestring-mmap/tests/small.hs @@ -0,0 +1,15 @@ +import System.Directory +import System.IO.Posix.MMap +import System.Posix.Files +import System.FilePath +import Control.Monad +import Control.Applicative + +main = do +-- let dir = "/home/dons/lambdabot/_darcs/patches" +-- ss <- getDirectoryContents dir +-- fs <- filterM (\f -> do st <- getFileStatus (dir f) +-- return (not $ isDirectory st)) ss + + fs <- lines <$> readFile "/tmp/files" + mapM_ unsafeMMapFile fs diff --git a/miscellaneous/bytestring-mmap/tests/test b/miscellaneous/bytestring-mmap/tests/test new file mode 100644 index 00000000..9f14b843 --- /dev/null +++ b/miscellaneous/bytestring-mmap/tests/test @@ -0,0 +1,19 @@ +#!/bin/sh + +set -e + +compile="ghc -no-recomp -O --make " + +$compile files.hs && ./files +rm files + +$compile cp.hs && ./cp /usr/share/dict/cracklib-small ./words +rm cp + +$compile pressure.hs && ./pressure + +#big-lazy.hs +#big.hs +#fast-cp.hs +#pressure.hs +#small.hs