aboutsummaryrefslogtreecommitdiff
path: root/lib/shard-regex.nix
blob: 06fba4dc15706bc26dd04a94f0ac6ca1c59fd6ff (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
{bionix, n}:

with bionix;
with lib.types;

input:
let
  re = let f = matchFiletype' "shard-regex" {
      fa = _: "^>";
      fq = _: "^@";
      gz = f;
      bz2 = f;
    };
  in f input.filetype;
  decompress = matchFiletype "shard-regex-decompression" {
      fa = _: "cat";
      fq = _: "cat";
      gz = _: "gunzip";
      bz2 = _: "bunzip2";
    } input;
  compress = matchFiletype "shard-regex-compression" {
      fa = _: "cat";
      fq = _: "cat";
      gz = _: "gzip";
      bz2 = _: "bzip2";
    } input;
  compressPkgs = with bionix.pkgs; matchFiletype "shard-regex-compression" {
      fa = _: [];
      fq = _: [];
      gz = _: [ gzip ];
      bz2 = _: [ bzip2 ];
    } input;
in stage {
  name = "shard";
  outputs = [ "out" ] ++ builtins.genList (i: "out" + toString (i + 2)) (n - 1);
  buildInputs = [ pkgs.gawk ] ++ compressPkgs;
  buildCommand = let
    awkScript = pkgs.writeText "shard.awk" ''
      BEGIN{cout=0}
      FNR==NR{out[nout++] = $0;next}
      /${re}/{cout = (cout + 1) % nout}
      {print > out[cout]}
    '';
  in ''
    for o in $outputs ; do
      echo $(basename ''${!o}) >> outputs
    done
    awk -f ${awkScript} outputs <(${decompress} < ${input})
    for o in $outputs ; do
      ${compress} < $(basename ''${!o}) > ''${!o}
    done
  '';
  passthru.filetype = input.filetype;
}