summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2023-08-02 17:18:44 +1000
committerJustin Bedo <cu@cua0.org>2023-08-02 17:18:44 +1000
commit2a78ced9c247f5a059d8456617391e3e5c846c23 (patch)
tree286e6e16971c2d26a0c2cfa684c311930cd94721
parent172f54474f3e92ad4df5c193a8605d4333da33e1 (diff)
first full implementation
-rw-r--r--dedumi.hs54
-rw-r--r--flake.nix9
-rw-r--r--package.yaml5
3 files changed, 65 insertions, 3 deletions
diff --git a/dedumi.hs b/dedumi.hs
index e69de29..01d9b37 100644
--- a/dedumi.hs
+++ b/dedumi.hs
@@ -0,0 +1,54 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE NumericUnderscores #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE TypeFamilies #-}
+
+module Main where
+
+import Data.ByteString (ByteString)
+import qualified Data.ByteString as B
+import Data.Cuckoo
+import Data.FastQ
+import Data.Function
+import Data.Typeable
+import Lens.Micro
+import qualified Streamly.Data.Stream as S
+import System.Environment
+import Prelude hiding (reads)
+
+instance CuckooFilterHash ByteString where
+ cuckooHash (Salt s) = saltedFnv1aByteString s
+ cuckooFingerprint (Salt s) = saltedSipHashByteString s
+ {-# INLINE cuckooHash #-}
+ {-# INLINE cuckooFingerprint #-}
+
+umiLength = 8
+
+trim x =
+ x
+ & reads . _1 . nucs %~ B.drop umiLength
+ & reads . _2 . nucs %~ B.drop umiLength
+ & reads . _1 . qual %~ B.drop umiLength
+ & reads . _2 . qual %~ B.drop umiLength
+
+insert' f x =
+ let y = B.take umiLength (x ^. reads . _1 . nucs) <> B.take umiLength (x ^. reads . _2 . nucs)
+ in member f y >>= \case
+ True -> pure True
+ False ->
+ insert f y >>= \case
+ True -> pure False
+ False -> error "filter full"
+
+main = do
+ [p1, p2, p3, p4] <- getArgs
+
+ f <- newCuckooFilter @4 @13 @ByteString 0 20_000_000
+
+ parse p1 p2
+ & S.filterM (insert' f)
+ & fmap trim
+ & unparse p3 p4
+
+ pure ()
diff --git a/flake.nix b/flake.nix
index e15a476..ca94e85 100644
--- a/flake.nix
+++ b/flake.nix
@@ -3,10 +3,15 @@
outputs = {self, nixpkgs}:
let
system = "x86_64-linux";
- pkgs = import nixpkgs {inherit system;};
+ pkgs = import nixpkgs {inherit system; config.allowBroken=true;};
+ hp = pkgs.haskell.packages.ghc928.override {
+ overrides = self: super: rec {
+ cuckoo = pkgs.haskell.lib.dontCheck super.cuckoo;
+ };
+ };
in
{
- packages.${system}.default = pkgs.haskellPackages.callCabal2nix "dedumi" ./. {};
+ packages.${system}.default = hp.callCabal2nix "dedumi" ./. {};
devShells.${system}.default = self.packages.${system}.default.env;
};
}
diff --git a/package.yaml b/package.yaml
index b0752ee..a923f2d 100644
--- a/package.yaml
+++ b/package.yaml
@@ -3,7 +3,9 @@ name: dedumi
dependencies:
- base
- zlib
- - microlens-platform
+ - cuckoo
+ - microlens-th
+ - microlens
- bytestring
- streamly-bytestring
- streamly
@@ -13,3 +15,4 @@ dependencies:
executables:
dedumi:
main: dedumi.hs
+ ghc-options: [-O2, -fspec-constr-recursive=10, -fmax-worker-args=16]