diff options
| -rw-r--r-- | Data/FastQ.hs | 38 | ||||
| -rw-r--r-- | dedumi.hs | 2 | ||||
| -rw-r--r-- | package.yaml | 8 | ||||
| -rw-r--r-- | test.hs | 33 | 
4 files changed, 62 insertions, 19 deletions
| diff --git a/Data/FastQ.hs b/Data/FastQ.hs index 158db3a..940c875 100644 --- a/Data/FastQ.hs +++ b/Data/FastQ.hs @@ -10,12 +10,10 @@ module Data.FastQ where  import Control.Monad  import Control.Monad.IO.Class (MonadIO, liftIO) -import Data.Bifunctor (bimap)  import Data.ByteString (ByteString)  import qualified Data.ByteString as B  import qualified Data.ByteString.Char8 as BC -import qualified Data.ByteString.Lazy as BSL -import qualified Data.ByteString.Lazy.Internal as BSL +import Data.ByteString.Lazy.Internal (defaultChunkSize)  import Data.Function  import Data.Streaming.Zlib  import Data.Word (Word8) @@ -43,6 +41,7 @@ makeLenses ''Read  type ReadPair = (Read, Read) +gzipWindow :: WindowBits  gzipWindow = WindowBits 31  parse :: MonadIO m => FilePath -> FilePath -> Stream m ReadPair @@ -61,22 +60,27 @@ parse l r =            pure $ Just (h, i, Nothing)          step Nothing = pure Stop          step (Just (h, i, Nothing)) = liftIO $ do -          chunk <- B.hGet h BSL.defaultChunkSize -          if chunk == B.empty +          complete <- isCompleteInflate i +          (frag, unused, i) <- +            if complete +              then do +                f <- finishInflate i +                u <- getUnusedInflate i +                (f,u,) <$> initInflate gzipWindow +              else pure ("", "", i) +          if unused == ""              then do -              hClose h -              str <- finishInflate i -              pure $ Yield (SB.toArray str) Nothing +              chunk <- B.hGet h defaultChunkSize +              if chunk == B.empty +                then do +                  hClose h +                  str <- finishInflate i +                  pure $ Yield (SB.toArray $ frag <> str) Nothing +                else do +                  popper <- feedInflate i $ chunk +                  pure $ Yield (SB.toArray frag) (Just (h, i, Just popper))              else do -              complete <- isCompleteInflate i -              (frag, unused, i) <- -                if complete -                  then do -                    f <- finishInflate i -                    u <- getUnusedInflate i -                    (f,u,) <$> initInflate gzipWindow -                  else pure ("", "", i) -              popper <- feedInflate i $ unused <> chunk +              popper <- feedInflate i $ unused                pure $ Yield (SB.toArray frag) (Just (h, i, Just popper))          step (Just (h, i, Just popper)) = liftIO $ do            popper >>= \case @@ -15,7 +15,6 @@ import Data.Function  import GHC.Prim (RealWorld)  import GHC.TypeLits  import Lens.Micro -import qualified Streamly.Data.Fold as F  import qualified Streamly.Data.Stream as S  import System.Environment @@ -58,5 +57,4 @@ main = do    parse p1 p2      & S.filterM (insert' f)      & fmap trim -    -- & S.fold (F.drainMapM print)      & unparse p3 p4 diff --git a/package.yaml b/package.yaml index bf95141..f0438a7 100644 --- a/package.yaml +++ b/package.yaml @@ -18,3 +18,11 @@ executables:      ghc-options: [-O2, -fdicts-strict, -fspec-constr-recursive=16, -fmax-worker-args=16, -fplugin=Fusion.Plugin, -Wall, -Wno-name-shadowing]      other-modules:        - Data.FastQ + +tests: +  tests: +    main: test.hs +    dependencies: +      - QuickCheck +    other-modules: +      - Data.FastQ @@ -0,0 +1,33 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE ScopedTypeVariables #-} + +module Main where + +import qualified Data.ByteString.Char8 as B +import Data.FastQ +import Data.Function +import qualified Streamly.Data.Fold as F +import qualified Streamly.Data.Stream as S +import qualified Streamly.Data.Unfold as U +import Test.QuickCheck +import Test.QuickCheck.Arbitrary +import Test.QuickCheck.Monadic +import Prelude hiding (Read) + +instance Arbitrary Read where +  arbitrary = Read <$> genStr <*> genStr <*> genStr +    where +      genBS = fmap B.pack . listOf . elements +      genStr = genBS $ [' ' .. '*'] ++ [',' .. '?'] ++ ['A' .. '~'] + +main :: IO () +main = quickCheckWith stdArgs {maxSuccess = 10000} $ \rp -> monadicIO $ do +  rp' <- run $ do +    S.unfold U.fromList rp & unparse "a" "b" +    cnt <- B.readFile "a" +    B.writeFile "a" $ cnt <> cnt +    cnt <- B.readFile "b" +    B.writeFile "b" $ cnt <> cnt +    parse "a" "b" & S.fold F.toList +  assert $ rp <> rp == rp' | 
