aboutsummaryrefslogtreecommitdiff
path: root/lib/shard-regex.nix
blob: 9dc17200aa8dab6fd8d7081f9aa82ccb63197ed9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
{ bionix, n }:

with bionix;
with lib.types;

input:
let
  re =
    let f = matchFiletype' "shard-regex" {
      fa = _: "^>";
      fq = _: "^@";
      gz = f;
      bz2 = f;
    };
    in f input.filetype;
  decompress = matchFiletype "shard-regex-decompression"
    {
      fa = _: "cat";
      fq = _: "cat";
      gz = _: "gunzip";
      bz2 = _: "bunzip2";
    }
    input;
  compress = matchFiletype "shard-regex-compression"
    {
      fa = _: "cat";
      fq = _: "cat";
      gz = _: "gzip";
      bz2 = _: "bzip2";
    }
    input;
  compressPkgs = with bionix.pkgs; matchFiletype "shard-regex-compression"
    {
      fa = _: [ ];
      fq = _: [ ];
      gz = _: [ gzip ];
      bz2 = _: [ bzip2 ];
    }
    input;
in
stage {
  name = "shard";
  outputs = [ "out" ] ++ builtins.genList (i: "out" + toString (i + 2)) (n - 1);
  buildInputs = [ pkgs.gawk ] ++ compressPkgs;
  buildCommand =
    let
      awkScript = pkgs.writeText "shard.awk" ''
        BEGIN{cout=0}
        FNR==NR{out[nout++] = $0;next}
        /${re}/{cout = (cout + 1) % nout}
        {print > out[cout]}
      '';
    in
    ''
      for o in $outputs ; do
        echo $(basename ''${!o}) >> outputs
      done
      awk -f ${awkScript} outputs <(${decompress} < ${input})
      for o in $outputs ; do
        ${compress} < $(basename ''${!o}) > ''${!o}
      done
    '';
  passthru.filetype = input.filetype;
}