aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2019-05-02 12:23:06 +1000
committerJustin Bedo <cu@cua0.org>2019-05-02 12:23:06 +1000
commitd03b5f29594e900fe1dbc7ba8657609d3581a7a7 (patch)
tree6dab9bde4432418b043d7cceb03b2e5d6001078f
parent11b31bdad648c9af92ac6997303bc10849a7a2ef (diff)
shard: missing file
-rw-r--r--lib/shard.nix57
1 files changed, 57 insertions, 0 deletions
diff --git a/lib/shard.nix b/lib/shard.nix
new file mode 100644
index 0000000..58e335f
--- /dev/null
+++ b/lib/shard.nix
@@ -0,0 +1,57 @@
+{bionix}:
+
+with bionix;
+with lib.types;
+
+{
+ regex = n: (exec (_: input: let
+ re = let f = matchFiletype "shard-regex" {
+ fa = _: "^>";
+ fq = _: "^@";
+ gz = f;
+ bz2 = f;
+ };
+ in f input;
+ decompress = matchFiletype "shard-regex-decompression" {
+ fa = _: "cat";
+ fq = _: "cat";
+ gz = _: "gunzip";
+ bz2 = _: "bunzip2";
+ } input;
+ compress = matchFiletype "shard-regex-compression" {
+ fa = _: "cat";
+ fq = _: "cat";
+ gz = _: "gzip";
+ bz2 = _: "bzip2";
+ } input;
+ compressPkgs = with bionix.pkgs; matchFiletype "shard-regex-compression" {
+ fa = _: [];
+ fq = _: [];
+ gz = _: [ gzip ];
+ bz2 = _: [ bzip2 ];
+ } input;
+ in outputDrvs (stage {
+ name = "shard";
+ outputs = [ "out" ] ++ builtins.genList (i: "out" + toString (i + 2)) (n - 1);
+ buildInputs = [ pkgs.gawk ] ++ compressPkgs;
+ buildCommand = let
+ awkScript = pkgs.writeText "shard.awk" ''
+ BEGIN{cout=0}
+ FNR==NR{out[nout++] = $0;next}
+ /${re}/{cout = (cout + 1) % nout}
+ {print > out[cout]}
+ '';
+ in ''
+ for o in $outputs ; do
+ echo $(basename ''${!o}) >> outputs
+ done
+ awk -f ${awkScript} outputs <(${decompress} < ${input})
+ for o in $outputs ; do
+ ${compress} < $(basename ''${!o}) > ''${!o}
+ done
+ '';
+ passthru.filetype = input.filetype;
+ }))) [];
+
+ fastQPair = n: {input1, input2}: lib.zipListsWith (i: j: {input1 = i; input2 = j;}) (lib.shard.regex n input1) (lib.shard.regex n input2);
+}