diff options
author | Justin Bedo <cu@cua0.org> | 2019-05-02 14:42:30 +1000 |
---|---|---|
committer | Justin Bedo <cu@cua0.org> | 2019-05-02 14:45:46 +1000 |
commit | e1bb4233c72b0af954ec560759c408d92b2cea15 (patch) | |
tree | ebc742ff842ada49c131f9ea20a3e931bdb22da6 | |
parent | d03b5f29594e900fe1dbc7ba8657609d3581a7a7 (diff) |
shard-regex: split into separate file to fix exec issues
-rw-r--r-- | default.nix | 1 | ||||
-rw-r--r-- | lib/shard-regex.nix | 54 | ||||
-rw-r--r-- | lib/shard.nix | 50 |
3 files changed, 56 insertions, 49 deletions
diff --git a/default.nix b/default.nix index 61f2974..25834a9 100644 --- a/default.nix +++ b/default.nix @@ -9,6 +9,7 @@ let callBionix = callBionix; id = x: x; exec = f: x: y: f x y; + exec' = f: exec (_: f) {}; callBionixE = p: exec (callBionix p); types = callBionix ./lib/types.nix {}; diff --git a/lib/shard-regex.nix b/lib/shard-regex.nix new file mode 100644 index 0000000..06fba4d --- /dev/null +++ b/lib/shard-regex.nix @@ -0,0 +1,54 @@ +{bionix, n}: + +with bionix; +with lib.types; + +input: +let + re = let f = matchFiletype' "shard-regex" { + fa = _: "^>"; + fq = _: "^@"; + gz = f; + bz2 = f; + }; + in f input.filetype; + decompress = matchFiletype "shard-regex-decompression" { + fa = _: "cat"; + fq = _: "cat"; + gz = _: "gunzip"; + bz2 = _: "bunzip2"; + } input; + compress = matchFiletype "shard-regex-compression" { + fa = _: "cat"; + fq = _: "cat"; + gz = _: "gzip"; + bz2 = _: "bzip2"; + } input; + compressPkgs = with bionix.pkgs; matchFiletype "shard-regex-compression" { + fa = _: []; + fq = _: []; + gz = _: [ gzip ]; + bz2 = _: [ bzip2 ]; + } input; +in stage { + name = "shard"; + outputs = [ "out" ] ++ builtins.genList (i: "out" + toString (i + 2)) (n - 1); + buildInputs = [ pkgs.gawk ] ++ compressPkgs; + buildCommand = let + awkScript = pkgs.writeText "shard.awk" '' + BEGIN{cout=0} + FNR==NR{out[nout++] = $0;next} + /${re}/{cout = (cout + 1) % nout} + {print > out[cout]} + ''; + in '' + for o in $outputs ; do + echo $(basename ''${!o}) >> outputs + done + awk -f ${awkScript} outputs <(${decompress} < ${input}) + for o in $outputs ; do + ${compress} < $(basename ''${!o}) > ''${!o} + done + ''; + passthru.filetype = input.filetype; +} diff --git a/lib/shard.nix b/lib/shard.nix index 58e335f..95cf251 100644 --- a/lib/shard.nix +++ b/lib/shard.nix @@ -4,54 +4,6 @@ with bionix; with lib.types; { - regex = n: (exec (_: input: let - re = let f = matchFiletype "shard-regex" { - fa = _: "^>"; - fq = _: "^@"; - gz = f; - bz2 = f; - }; - in f input; - decompress = matchFiletype "shard-regex-decompression" { - fa = _: "cat"; - fq = _: "cat"; - gz = _: "gunzip"; - bz2 = _: "bunzip2"; - } input; - compress = matchFiletype "shard-regex-compression" { - fa = _: "cat"; - fq = _: "cat"; - gz = _: "gzip"; - bz2 = _: "bzip2"; - } input; - compressPkgs = with bionix.pkgs; matchFiletype "shard-regex-compression" { - fa = _: []; - fq = _: []; - gz = _: [ gzip ]; - bz2 = _: [ bzip2 ]; - } input; - in outputDrvs (stage { - name = "shard"; - outputs = [ "out" ] ++ builtins.genList (i: "out" + toString (i + 2)) (n - 1); - buildInputs = [ pkgs.gawk ] ++ compressPkgs; - buildCommand = let - awkScript = pkgs.writeText "shard.awk" '' - BEGIN{cout=0} - FNR==NR{out[nout++] = $0;next} - /${re}/{cout = (cout + 1) % nout} - {print > out[cout]} - ''; - in '' - for o in $outputs ; do - echo $(basename ''${!o}) >> outputs - done - awk -f ${awkScript} outputs <(${decompress} < ${input}) - for o in $outputs ; do - ${compress} < $(basename ''${!o}) > ''${!o} - done - ''; - passthru.filetype = input.filetype; - }))) []; - + regex = n: input: outputDrvs (callBionixE ./shard-regex.nix { n = n; } input); fastQPair = n: {input1, input2}: lib.zipListsWith (i: j: {input1 = i; input2 = j;}) (lib.shard.regex n input1) (lib.shard.regex n input2); } |