summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2023-08-16 15:44:40 +1000
committerJustin Bedo <cu@cua0.org>2023-08-16 15:44:40 +1000
commit5846b2de2fccd90a61713fcd1832397347a01017 (patch)
treea88dc853bfe7453d3b9e2fe2f38729ee38ee6d42
parentb1b959c2af53044b942766eb84b25c114637aee3 (diff)
switch zlib bindings
This resolves a memory leak during unparsing. Speed is faster now overall too. # Please enter the commit message for your changes. Lines starting # with '#' will be ignored, and an empty message aborts the commit. # # On branch master # Your branch is up to date with 'origin/master'. # # Changes to be committed: # modified: Data/FastQ.hs # modified: package.yaml #
-rw-r--r--Data/FastQ.hs70
-rw-r--r--package.yaml4
2 files changed, 56 insertions, 18 deletions
diff --git a/Data/FastQ.hs b/Data/FastQ.hs
index 91f0d5f..1f4f814 100644
--- a/Data/FastQ.hs
+++ b/Data/FastQ.hs
@@ -7,7 +7,7 @@
module Data.FastQ where
-import Codec.Compression.GZip
+import Codec.Zlib
import Control.Monad
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.Bifunctor (bimap)
@@ -42,19 +42,40 @@ makeLenses ''Read
type ReadPair = (Read, Read)
+gzipWindow = WindowBits 31
+
parse :: MonadIO m => FilePath -> FilePath -> Stream m ReadPair
parse l r =
S.zipWith
parseEntry
- (S.unfold streamFile l & AS.splitOn 64)
- (S.unfold streamFile r & AS.splitOn 64)
+ (S.unfold streamFile l & AS.splitOn 64 & S.drop 1)
+ (S.unfold streamFile r & AS.splitOn 64 & S.drop 1)
where
streamFile :: MonadIO m => Unfold m FilePath (Array Word8)
streamFile = Unfold step seed
where
- seed = fmap (BSL.tail . decompress) . liftIO . BSL.readFile
- step (BSL.Chunk bs bl) = pure $ Yield (SB.toArray bs) bl
- step BSL.Empty = pure Stop
+ seed path = liftIO $ do
+ h <- openFile path ReadMode
+ i <- initInflate gzipWindow
+ pure $ Just (h, i, Nothing)
+ step Nothing = pure Stop
+ step (Just (h, i, Nothing)) = liftIO $ step' h i
+ step (Just (h, i, Just popper)) = liftIO $ do
+ popper >>= \case
+ Just str -> pure $ Yield (SB.toArray str) (Just (h, i, Just popper))
+ Nothing -> step' h i
+
+ step' h i = do
+ chunk <- B.hGet h BSL.defaultChunkSize
+ if chunk == B.empty
+ then do
+ str <- finishInflate i
+ pure $ Yield (SB.toArray str) Nothing
+ else do
+ p <- feedInflate i chunk
+ p >>= \case
+ Just str -> pure $ Yield (SB.toArray str) (Just (h, i, Just p))
+ Nothing -> step' h i
parseEntry l r =
let [hdr, seq, "+", qual] = SB.fromArray l & BC.lines
@@ -63,17 +84,36 @@ parse l r =
unparse :: FilePath -> FilePath -> Stream IO ReadPair -> IO ()
unparse l r str = do
- (compress' -> left, compress' -> right) <- fmap unparse' str & toLazyBS
lh <- openFile l WriteMode
rh <- openFile r WriteMode
- writeFiles lh rh left right
+ ld <- initDeflate 0 gzipWindow
+ rd <- initDeflate 0 gzipWindow
+ fmap unparse' str & toLazyBS >>= uncurry (writeFiles lh rh ld rd)
where
- writeFiles l r (BSL.Chunk a as) (BSL.Chunk b bs) = do
- B.hPut l a
- B.hPut r b
- writeFiles l r as bs
- writeFiles l _ a BSL.Empty = BSL.hPut l a
- writeFiles _ r BSL.Empty b = BSL.hPut r b
+ writeFiles l r ld rd (BSL.Chunk a as) (BSL.Chunk b bs) = do
+ putCompressed ld l a
+ putCompressed rd r b
+ writeFiles l r ld rd as bs
+ writeFiles l r ld rd BSL.Empty BSL.Empty = do
+ flush rd r
+ flush ld l
+ writeFiles _ _ _ _ _ _ = error "unparse: stream mismatch"
+
+ putCompressed d h chunk = do
+ popper <- feedDeflate d chunk
+ writePopper popper h
+
+ flush d h =
+ finishDeflate d >>= \case
+ Just rest -> B.hPut h rest
+ Nothing -> pure ()
+
+ writePopper p h =
+ p >>= \case
+ Just str -> do
+ B.hPut h str
+ writePopper p h
+ Nothing -> pure ()
unparse' :: ReadPair -> (ByteString, ByteString)
unparse' read =
@@ -84,5 +124,3 @@ unparse l r str = do
toLazyBS :: Stream IO (ByteString, ByteString) -> IO (BSL.ByteString, BSL.ByteString)
toLazyBS =
S.foldrM (\(l, r) b -> bimap (BSL.chunk ("@" <> l)) (BSL.chunk ("@" <> r)) <$> unsafeInterleaveIO b) (pure (BSL.Empty, BSL.Empty))
-
- compress' = compressWith (defaultCompressParams {compressLevel = bestSpeed})
diff --git a/package.yaml b/package.yaml
index 27d1ca9..3085a53 100644
--- a/package.yaml
+++ b/package.yaml
@@ -3,7 +3,7 @@ name: dedumi
dependencies:
- base
- fusion-plugin
- - zlib
+ - zlib-bindings
- cuckoo
- microlens-th
- microlens
@@ -15,6 +15,6 @@ dependencies:
executables:
dedumi:
main: dedumi.hs
- ghc-options: [-O2, -fspec-constr-recursive=10, -fmax-worker-args=16, -fplugin=Fusion.Plugin, -Wall, -Wno-name-shadowing]
+ ghc-options: [-O2, -fdicts-strict, -fspec-constr-recursive=16, -fmax-worker-args=16, -fplugin=Fusion.Plugin, -Wall, -Wno-name-shadowing]
other-modules:
- Data.FastQ