diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/shard-regex.nix | 54 | ||||
-rw-r--r-- | lib/shard.nix | 50 |
2 files changed, 55 insertions, 49 deletions
diff --git a/lib/shard-regex.nix b/lib/shard-regex.nix new file mode 100644 index 0000000..06fba4d --- /dev/null +++ b/lib/shard-regex.nix @@ -0,0 +1,54 @@ +{bionix, n}: + +with bionix; +with lib.types; + +input: +let + re = let f = matchFiletype' "shard-regex" { + fa = _: "^>"; + fq = _: "^@"; + gz = f; + bz2 = f; + }; + in f input.filetype; + decompress = matchFiletype "shard-regex-decompression" { + fa = _: "cat"; + fq = _: "cat"; + gz = _: "gunzip"; + bz2 = _: "bunzip2"; + } input; + compress = matchFiletype "shard-regex-compression" { + fa = _: "cat"; + fq = _: "cat"; + gz = _: "gzip"; + bz2 = _: "bzip2"; + } input; + compressPkgs = with bionix.pkgs; matchFiletype "shard-regex-compression" { + fa = _: []; + fq = _: []; + gz = _: [ gzip ]; + bz2 = _: [ bzip2 ]; + } input; +in stage { + name = "shard"; + outputs = [ "out" ] ++ builtins.genList (i: "out" + toString (i + 2)) (n - 1); + buildInputs = [ pkgs.gawk ] ++ compressPkgs; + buildCommand = let + awkScript = pkgs.writeText "shard.awk" '' + BEGIN{cout=0} + FNR==NR{out[nout++] = $0;next} + /${re}/{cout = (cout + 1) % nout} + {print > out[cout]} + ''; + in '' + for o in $outputs ; do + echo $(basename ''${!o}) >> outputs + done + awk -f ${awkScript} outputs <(${decompress} < ${input}) + for o in $outputs ; do + ${compress} < $(basename ''${!o}) > ''${!o} + done + ''; + passthru.filetype = input.filetype; +} diff --git a/lib/shard.nix b/lib/shard.nix index 58e335f..95cf251 100644 --- a/lib/shard.nix +++ b/lib/shard.nix @@ -4,54 +4,6 @@ with bionix; with lib.types; { - regex = n: (exec (_: input: let - re = let f = matchFiletype "shard-regex" { - fa = _: "^>"; - fq = _: "^@"; - gz = f; - bz2 = f; - }; - in f input; - decompress = matchFiletype "shard-regex-decompression" { - fa = _: "cat"; - fq = _: "cat"; - gz = _: "gunzip"; - bz2 = _: "bunzip2"; - } input; - compress = matchFiletype "shard-regex-compression" { - fa = _: "cat"; - fq = _: "cat"; - gz = _: "gzip"; - bz2 = _: "bzip2"; - } input; - compressPkgs = with bionix.pkgs; matchFiletype "shard-regex-compression" { - fa = _: []; - fq = _: []; - gz = _: [ gzip ]; - bz2 = _: [ bzip2 ]; - } input; - in outputDrvs (stage { - name = "shard"; - outputs = [ "out" ] ++ builtins.genList (i: "out" + toString (i + 2)) (n - 1); - buildInputs = [ pkgs.gawk ] ++ compressPkgs; - buildCommand = let - awkScript = pkgs.writeText "shard.awk" '' - BEGIN{cout=0} - FNR==NR{out[nout++] = $0;next} - /${re}/{cout = (cout + 1) % nout} - {print > out[cout]} - ''; - in '' - for o in $outputs ; do - echo $(basename ''${!o}) >> outputs - done - awk -f ${awkScript} outputs <(${decompress} < ${input}) - for o in $outputs ; do - ${compress} < $(basename ''${!o}) > ''${!o} - done - ''; - passthru.filetype = input.filetype; - }))) []; - + regex = n: input: outputDrvs (callBionixE ./shard-regex.nix { n = n; } input); fastQPair = n: {input1, input2}: lib.zipListsWith (i: j: {input1 = i; input2 = j;}) (lib.shard.regex n input1) (lib.shard.regex n input2); } |