summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2023-08-16 22:41:22 +1000
committerJustin Bedo <cu@cua0.org>2023-08-16 22:41:22 +1000
commit2765051c4533483d11b2ea3fb4c660e74300ef01 (patch)
treefb02ee59bd6973f46ee66e4947aef24c21bab925
parent5dffdecd5112d6c60c6ff285a8dd871086b7bc30 (diff)
switch to streaming-commons and handle BGZip
-rw-r--r--Data/FastQ.hs52
-rw-r--r--package.yaml2
2 files changed, 30 insertions, 24 deletions
diff --git a/Data/FastQ.hs b/Data/FastQ.hs
index 3e2f26c..158db3a 100644
--- a/Data/FastQ.hs
+++ b/Data/FastQ.hs
@@ -2,12 +2,12 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE TupleSections #-}
{-# LANGUAGE ViewPatterns #-}
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
module Data.FastQ where
-import Codec.Zlib
import Control.Monad
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.Bifunctor (bimap)
@@ -17,6 +17,7 @@ import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString.Lazy.Internal as BSL
import Data.Function
+import Data.Streaming.Zlib
import Data.Word (Word8)
import Lens.Micro
import Lens.Micro.TH
@@ -29,7 +30,6 @@ import Streamly.Internal.Data.Stream.Chunked as AS
import Streamly.Internal.Data.Stream.StreamD.Type (Step (..))
import Streamly.Internal.Data.Unfold.Type (Unfold (..))
import System.IO
-import System.IO.Unsafe (unsafeInterleaveIO)
import Prelude hiding (Read)
data Read = Read
@@ -49,40 +49,45 @@ parse :: MonadIO m => FilePath -> FilePath -> Stream m ReadPair
parse l r =
S.zipWith
parseEntry
- (S.unfold streamFile l & AS.splitOn 64 & S.drop 1)
- (S.unfold streamFile r & AS.splitOn 64 & S.drop 1)
+ (S.unfold streamBGZFile l & AS.splitOn 64 & S.drop 1)
+ (S.unfold streamBGZFile r & AS.splitOn 64 & S.drop 1)
where
- streamFile :: MonadIO m => Unfold m FilePath (Array Word8)
- streamFile = Unfold step seed
+ streamBGZFile :: MonadIO m => Unfold m FilePath (Array Word8)
+ streamBGZFile = Unfold step seed
where
seed path = liftIO $ do
h <- openFile path ReadMode
i <- initInflate gzipWindow
pure $ Just (h, i, Nothing)
step Nothing = pure Stop
- step (Just (h, i, Nothing)) = liftIO $ step' h i
- step (Just (h, i, Just popper)) = liftIO $ do
- popper >>= \case
- Just str -> pure $ Yield (SB.toArray str) (Just (h, i, Just popper))
- Nothing -> step' h i
-
- step' h i = do
+ step (Just (h, i, Nothing)) = liftIO $ do
chunk <- B.hGet h BSL.defaultChunkSize
if chunk == B.empty
then do
+ hClose h
str <- finishInflate i
pure $ Yield (SB.toArray str) Nothing
else do
- p <- feedInflate i chunk
- p >>= \case
- Just str -> pure $ Yield (SB.toArray str) (Just (h, i, Just p))
- Nothing -> step' h i
+ complete <- isCompleteInflate i
+ (frag, unused, i) <-
+ if complete
+ then do
+ f <- finishInflate i
+ u <- getUnusedInflate i
+ (f,u,) <$> initInflate gzipWindow
+ else pure ("", "", i)
+ popper <- feedInflate i $ unused <> chunk
+ pure $ Yield (SB.toArray frag) (Just (h, i, Just popper))
+ step (Just (h, i, Just popper)) = liftIO $ do
+ popper >>= \case
+ PRNext str -> pure $ Yield (SB.toArray str) (Just (h, i, Just popper))
+ PRDone -> pure $ Yield (SB.toArray "") (Just (h, i, Nothing))
+ PRError e -> error $ "parse:" <> show e
parseEntry l r =
- let [hdr, seq, "+", qual] = SB.fromArray l & BC.lines
- [hdr', seq', "+", qual'] = SB.fromArray r & BC.lines
- str = error $ "parseEntry:" <> BC.unpack str
- in (Read qual seq hdr, Read qual' seq' hdr')
+ case (SB.fromArray l & BC.lines, SB.fromArray r & BC.lines) of
+ ([hdr, seq, "+", qual], [hdr', seq', "+", qual']) -> (Read qual seq hdr, Read qual' seq' hdr')
+ e -> error $ "parseEntry:" <> show e
unparse :: FilePath -> FilePath -> Stream IO ReadPair -> IO ()
unparse l r str = do
@@ -108,10 +113,11 @@ unparse l r str = do
writePopper h p =
p >>= \case
- Just str -> do
+ PRNext str -> do
B.hPut h str
writePopper h p
- Nothing -> pure ()
+ PRDone -> pure ()
+ PRError e -> error $ "parse:" <> show e
unparse' :: ReadPair -> (ByteString, ByteString)
unparse' read =
diff --git a/package.yaml b/package.yaml
index 3085a53..bf95141 100644
--- a/package.yaml
+++ b/package.yaml
@@ -3,7 +3,7 @@ name: dedumi
dependencies:
- base
- fusion-plugin
- - zlib-bindings
+ - streaming-commons
- cuckoo
- microlens-th
- microlens