From 5dffdecd5112d6c60c6ff285a8dd871086b7bc30 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Wed, 16 Aug 2023 16:38:39 +1000 Subject: remove lazy bytestrings --- Data/FastQ.hs | 34 ++++++++++++++-------------------- dedumi.hs | 6 ++++-- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/Data/FastQ.hs b/Data/FastQ.hs index 1f4f814..3e2f26c 100644 --- a/Data/FastQ.hs +++ b/Data/FastQ.hs @@ -21,6 +21,7 @@ import Data.Word (Word8) import Lens.Micro import Lens.Micro.TH import Streamly.Data.Array (Array) +import qualified Streamly.Data.Fold as F import Streamly.Data.Stream (Stream) import qualified Streamly.Data.Stream as S import qualified Streamly.External.ByteString as SB @@ -80,6 +81,7 @@ parse l r = parseEntry l r = let [hdr, seq, "+", qual] = SB.fromArray l & BC.lines [hdr', seq', "+", qual'] = SB.fromArray r & BC.lines + str = error $ "parseEntry:" <> BC.unpack str in (Read qual seq hdr, Read qual' seq' hdr') unparse :: FilePath -> FilePath -> Stream IO ReadPair -> IO () @@ -88,39 +90,31 @@ unparse l r str = do rh <- openFile r WriteMode ld <- initDeflate 0 gzipWindow rd <- initDeflate 0 gzipWindow - fmap unparse' str & toLazyBS >>= uncurry (writeFiles lh rh ld rd) + fmap unparse' str & S.fold (F.drainMapM $ writeFiles lh rh ld rd) + flush rd rh + flush ld lh + hClose rh + hClose lh where - writeFiles l r ld rd (BSL.Chunk a as) (BSL.Chunk b bs) = do + writeFiles l r ld rd (a, b) = do putCompressed ld l a putCompressed rd r b - writeFiles l r ld rd as bs - writeFiles l r ld rd BSL.Empty BSL.Empty = do - flush rd r - flush ld l - writeFiles _ _ _ _ _ _ = error "unparse: stream mismatch" putCompressed d h chunk = do popper <- feedDeflate d chunk - writePopper popper h + writePopper h popper - flush d h = - finishDeflate d >>= \case - Just rest -> B.hPut h rest - Nothing -> pure () + flush d h = finishDeflate d & writePopper h - writePopper p h = + writePopper h p = p >>= \case Just str -> do B.hPut h str - writePopper p h + writePopper h p Nothing -> pure () unparse' :: ReadPair -> (ByteString, ByteString) unparse' read = - ( BC.unlines [read ^. _1 . header, read ^. _1 . nucs, "+", read ^. _1 . qual], - BC.unlines [read ^. _2 . header, read ^. _2 . nucs, "+", read ^. _2 . qual] + ( BC.unlines ["@" <> read ^. _1 . header, read ^. _1 . nucs, "+", read ^. _1 . qual], + BC.unlines ["@" <> read ^. _2 . header, read ^. _2 . nucs, "+", read ^. _2 . qual] ) - - toLazyBS :: Stream IO (ByteString, ByteString) -> IO (BSL.ByteString, BSL.ByteString) - toLazyBS = - S.foldrM (\(l, r) b -> bimap (BSL.chunk ("@" <> l)) (BSL.chunk ("@" <> r)) <$> unsafeInterleaveIO b) (pure (BSL.Empty, BSL.Empty)) diff --git a/dedumi.hs b/dedumi.hs index aa682f1..4215026 100644 --- a/dedumi.hs +++ b/dedumi.hs @@ -15,6 +15,7 @@ import Data.Function import GHC.Prim (RealWorld) import GHC.TypeLits import Lens.Micro +import qualified Streamly.Data.Fold as F import qualified Streamly.Data.Stream as S import System.Environment @@ -42,10 +43,10 @@ insert' :: (KnownNat b, KnownNat f) => CuckooFilter RealWorld b f ByteString -> insert' f x = let y = B.take (umiLength + extraHashBases) (x ^. _1 . nucs) <> B.take (umiLength + extraHashBases) (x ^. _2 . nucs) in member f y >>= \case - True -> pure True + True -> pure False False -> insert f y >>= \case - True -> pure False + True -> pure True False -> error "filter full" main :: IO () @@ -57,4 +58,5 @@ main = do parse p1 p2 & S.filterM (insert' f) & fmap trim + -- & S.fold (F.drainMapM print) & unparse p3 p4 -- cgit v1.2.3