diff options
author | Justin Bedo <cu@cua0.org> | 2023-08-02 17:18:44 +1000 |
---|---|---|
committer | Justin Bedo <cu@cua0.org> | 2023-08-02 17:18:44 +1000 |
commit | 2a78ced9c247f5a059d8456617391e3e5c846c23 (patch) | |
tree | 286e6e16971c2d26a0c2cfa684c311930cd94721 | |
parent | 172f54474f3e92ad4df5c193a8605d4333da33e1 (diff) |
first full implementation
-rw-r--r-- | dedumi.hs | 54 | ||||
-rw-r--r-- | flake.nix | 9 | ||||
-rw-r--r-- | package.yaml | 5 |
3 files changed, 65 insertions, 3 deletions
@@ -0,0 +1,54 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeFamilies #-} + +module Main where + +import Data.ByteString (ByteString) +import qualified Data.ByteString as B +import Data.Cuckoo +import Data.FastQ +import Data.Function +import Data.Typeable +import Lens.Micro +import qualified Streamly.Data.Stream as S +import System.Environment +import Prelude hiding (reads) + +instance CuckooFilterHash ByteString where + cuckooHash (Salt s) = saltedFnv1aByteString s + cuckooFingerprint (Salt s) = saltedSipHashByteString s + {-# INLINE cuckooHash #-} + {-# INLINE cuckooFingerprint #-} + +umiLength = 8 + +trim x = + x + & reads . _1 . nucs %~ B.drop umiLength + & reads . _2 . nucs %~ B.drop umiLength + & reads . _1 . qual %~ B.drop umiLength + & reads . _2 . qual %~ B.drop umiLength + +insert' f x = + let y = B.take umiLength (x ^. reads . _1 . nucs) <> B.take umiLength (x ^. reads . _2 . nucs) + in member f y >>= \case + True -> pure True + False -> + insert f y >>= \case + True -> pure False + False -> error "filter full" + +main = do + [p1, p2, p3, p4] <- getArgs + + f <- newCuckooFilter @4 @13 @ByteString 0 20_000_000 + + parse p1 p2 + & S.filterM (insert' f) + & fmap trim + & unparse p3 p4 + + pure () @@ -3,10 +3,15 @@ outputs = {self, nixpkgs}: let system = "x86_64-linux"; - pkgs = import nixpkgs {inherit system;}; + pkgs = import nixpkgs {inherit system; config.allowBroken=true;}; + hp = pkgs.haskell.packages.ghc928.override { + overrides = self: super: rec { + cuckoo = pkgs.haskell.lib.dontCheck super.cuckoo; + }; + }; in { - packages.${system}.default = pkgs.haskellPackages.callCabal2nix "dedumi" ./. {}; + packages.${system}.default = hp.callCabal2nix "dedumi" ./. {}; devShells.${system}.default = self.packages.${system}.default.env; }; } diff --git a/package.yaml b/package.yaml index b0752ee..a923f2d 100644 --- a/package.yaml +++ b/package.yaml @@ -3,7 +3,9 @@ name: dedumi dependencies: - base - zlib - - microlens-platform + - cuckoo + - microlens-th + - microlens - bytestring - streamly-bytestring - streamly @@ -13,3 +15,4 @@ dependencies: executables: dedumi: main: dedumi.hs + ghc-options: [-O2, -fspec-constr-recursive=10, -fmax-worker-args=16] |