summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2023-08-18 11:31:44 +1000
committerJustin Bedo <cu@cua0.org>2023-08-18 11:34:03 +1000
commite17f009dc5f1750f47f4ef950b2032b4bd283e67 (patch)
tree7c553834c65179739defa3230e6f30cc0d5073a5
parent2765051c4533483d11b2ea3fb4c660e74300ef01 (diff)
add tests and bugfix for small BGZip files
-rw-r--r--Data/FastQ.hs38
-rw-r--r--dedumi.hs2
-rw-r--r--package.yaml8
-rw-r--r--test.hs33
4 files changed, 62 insertions, 19 deletions
diff --git a/Data/FastQ.hs b/Data/FastQ.hs
index 158db3a..940c875 100644
--- a/Data/FastQ.hs
+++ b/Data/FastQ.hs
@@ -10,12 +10,10 @@ module Data.FastQ where
import Control.Monad
import Control.Monad.IO.Class (MonadIO, liftIO)
-import Data.Bifunctor (bimap)
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
-import qualified Data.ByteString.Lazy as BSL
-import qualified Data.ByteString.Lazy.Internal as BSL
+import Data.ByteString.Lazy.Internal (defaultChunkSize)
import Data.Function
import Data.Streaming.Zlib
import Data.Word (Word8)
@@ -43,6 +41,7 @@ makeLenses ''Read
type ReadPair = (Read, Read)
+gzipWindow :: WindowBits
gzipWindow = WindowBits 31
parse :: MonadIO m => FilePath -> FilePath -> Stream m ReadPair
@@ -61,22 +60,27 @@ parse l r =
pure $ Just (h, i, Nothing)
step Nothing = pure Stop
step (Just (h, i, Nothing)) = liftIO $ do
- chunk <- B.hGet h BSL.defaultChunkSize
- if chunk == B.empty
+ complete <- isCompleteInflate i
+ (frag, unused, i) <-
+ if complete
+ then do
+ f <- finishInflate i
+ u <- getUnusedInflate i
+ (f,u,) <$> initInflate gzipWindow
+ else pure ("", "", i)
+ if unused == ""
then do
- hClose h
- str <- finishInflate i
- pure $ Yield (SB.toArray str) Nothing
+ chunk <- B.hGet h defaultChunkSize
+ if chunk == B.empty
+ then do
+ hClose h
+ str <- finishInflate i
+ pure $ Yield (SB.toArray $ frag <> str) Nothing
+ else do
+ popper <- feedInflate i $ chunk
+ pure $ Yield (SB.toArray frag) (Just (h, i, Just popper))
else do
- complete <- isCompleteInflate i
- (frag, unused, i) <-
- if complete
- then do
- f <- finishInflate i
- u <- getUnusedInflate i
- (f,u,) <$> initInflate gzipWindow
- else pure ("", "", i)
- popper <- feedInflate i $ unused <> chunk
+ popper <- feedInflate i $ unused
pure $ Yield (SB.toArray frag) (Just (h, i, Just popper))
step (Just (h, i, Just popper)) = liftIO $ do
popper >>= \case
diff --git a/dedumi.hs b/dedumi.hs
index 4215026..5ca07a1 100644
--- a/dedumi.hs
+++ b/dedumi.hs
@@ -15,7 +15,6 @@ import Data.Function
import GHC.Prim (RealWorld)
import GHC.TypeLits
import Lens.Micro
-import qualified Streamly.Data.Fold as F
import qualified Streamly.Data.Stream as S
import System.Environment
@@ -58,5 +57,4 @@ main = do
parse p1 p2
& S.filterM (insert' f)
& fmap trim
- -- & S.fold (F.drainMapM print)
& unparse p3 p4
diff --git a/package.yaml b/package.yaml
index bf95141..f0438a7 100644
--- a/package.yaml
+++ b/package.yaml
@@ -18,3 +18,11 @@ executables:
ghc-options: [-O2, -fdicts-strict, -fspec-constr-recursive=16, -fmax-worker-args=16, -fplugin=Fusion.Plugin, -Wall, -Wno-name-shadowing]
other-modules:
- Data.FastQ
+
+tests:
+ tests:
+ main: test.hs
+ dependencies:
+ - QuickCheck
+ other-modules:
+ - Data.FastQ
diff --git a/test.hs b/test.hs
new file mode 100644
index 0000000..a8780ff
--- /dev/null
+++ b/test.hs
@@ -0,0 +1,33 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE QuasiQuotes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+module Main where
+
+import qualified Data.ByteString.Char8 as B
+import Data.FastQ
+import Data.Function
+import qualified Streamly.Data.Fold as F
+import qualified Streamly.Data.Stream as S
+import qualified Streamly.Data.Unfold as U
+import Test.QuickCheck
+import Test.QuickCheck.Arbitrary
+import Test.QuickCheck.Monadic
+import Prelude hiding (Read)
+
+instance Arbitrary Read where
+ arbitrary = Read <$> genStr <*> genStr <*> genStr
+ where
+ genBS = fmap B.pack . listOf . elements
+ genStr = genBS $ [' ' .. '*'] ++ [',' .. '?'] ++ ['A' .. '~']
+
+main :: IO ()
+main = quickCheckWith stdArgs {maxSuccess = 10000} $ \rp -> monadicIO $ do
+ rp' <- run $ do
+ S.unfold U.fromList rp & unparse "a" "b"
+ cnt <- B.readFile "a"
+ B.writeFile "a" $ cnt <> cnt
+ cnt <- B.readFile "b"
+ B.writeFile "b" $ cnt <> cnt
+ parse "a" "b" & S.fold F.toList
+ assert $ rp <> rp == rp'