summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2023-08-16 16:38:39 +1000
committerJustin Bedo <cu@cua0.org>2023-08-16 16:38:39 +1000
commit5dffdecd5112d6c60c6ff285a8dd871086b7bc30 (patch)
tree9a21bea3cb90b7386eb569e34b87e99f84608159
parent664bf16afb161d9b4fb7cc3bcfb86ede1bc55e9b (diff)
remove lazy bytestrings
-rw-r--r--Data/FastQ.hs34
-rw-r--r--dedumi.hs6
2 files changed, 18 insertions, 22 deletions
diff --git a/Data/FastQ.hs b/Data/FastQ.hs
index 1f4f814..3e2f26c 100644
--- a/Data/FastQ.hs
+++ b/Data/FastQ.hs
@@ -21,6 +21,7 @@ import Data.Word (Word8)
import Lens.Micro
import Lens.Micro.TH
import Streamly.Data.Array (Array)
+import qualified Streamly.Data.Fold as F
import Streamly.Data.Stream (Stream)
import qualified Streamly.Data.Stream as S
import qualified Streamly.External.ByteString as SB
@@ -80,6 +81,7 @@ parse l r =
parseEntry l r =
let [hdr, seq, "+", qual] = SB.fromArray l & BC.lines
[hdr', seq', "+", qual'] = SB.fromArray r & BC.lines
+ str = error $ "parseEntry:" <> BC.unpack str
in (Read qual seq hdr, Read qual' seq' hdr')
unparse :: FilePath -> FilePath -> Stream IO ReadPair -> IO ()
@@ -88,39 +90,31 @@ unparse l r str = do
rh <- openFile r WriteMode
ld <- initDeflate 0 gzipWindow
rd <- initDeflate 0 gzipWindow
- fmap unparse' str & toLazyBS >>= uncurry (writeFiles lh rh ld rd)
+ fmap unparse' str & S.fold (F.drainMapM $ writeFiles lh rh ld rd)
+ flush rd rh
+ flush ld lh
+ hClose rh
+ hClose lh
where
- writeFiles l r ld rd (BSL.Chunk a as) (BSL.Chunk b bs) = do
+ writeFiles l r ld rd (a, b) = do
putCompressed ld l a
putCompressed rd r b
- writeFiles l r ld rd as bs
- writeFiles l r ld rd BSL.Empty BSL.Empty = do
- flush rd r
- flush ld l
- writeFiles _ _ _ _ _ _ = error "unparse: stream mismatch"
putCompressed d h chunk = do
popper <- feedDeflate d chunk
- writePopper popper h
+ writePopper h popper
- flush d h =
- finishDeflate d >>= \case
- Just rest -> B.hPut h rest
- Nothing -> pure ()
+ flush d h = finishDeflate d & writePopper h
- writePopper p h =
+ writePopper h p =
p >>= \case
Just str -> do
B.hPut h str
- writePopper p h
+ writePopper h p
Nothing -> pure ()
unparse' :: ReadPair -> (ByteString, ByteString)
unparse' read =
- ( BC.unlines [read ^. _1 . header, read ^. _1 . nucs, "+", read ^. _1 . qual],
- BC.unlines [read ^. _2 . header, read ^. _2 . nucs, "+", read ^. _2 . qual]
+ ( BC.unlines ["@" <> read ^. _1 . header, read ^. _1 . nucs, "+", read ^. _1 . qual],
+ BC.unlines ["@" <> read ^. _2 . header, read ^. _2 . nucs, "+", read ^. _2 . qual]
)
-
- toLazyBS :: Stream IO (ByteString, ByteString) -> IO (BSL.ByteString, BSL.ByteString)
- toLazyBS =
- S.foldrM (\(l, r) b -> bimap (BSL.chunk ("@" <> l)) (BSL.chunk ("@" <> r)) <$> unsafeInterleaveIO b) (pure (BSL.Empty, BSL.Empty))
diff --git a/dedumi.hs b/dedumi.hs
index aa682f1..4215026 100644
--- a/dedumi.hs
+++ b/dedumi.hs
@@ -15,6 +15,7 @@ import Data.Function
import GHC.Prim (RealWorld)
import GHC.TypeLits
import Lens.Micro
+import qualified Streamly.Data.Fold as F
import qualified Streamly.Data.Stream as S
import System.Environment
@@ -42,10 +43,10 @@ insert' :: (KnownNat b, KnownNat f) => CuckooFilter RealWorld b f ByteString ->
insert' f x =
let y = B.take (umiLength + extraHashBases) (x ^. _1 . nucs) <> B.take (umiLength + extraHashBases) (x ^. _2 . nucs)
in member f y >>= \case
- True -> pure True
+ True -> pure False
False ->
insert f y >>= \case
- True -> pure False
+ True -> pure True
False -> error "filter full"
main :: IO ()
@@ -57,4 +58,5 @@ main = do
parse p1 p2
& S.filterM (insert' f)
& fmap trim
+ -- & S.fold (F.drainMapM print)
& unparse p3 p4