blob: 06fba4dc15706bc26dd04a94f0ac6ca1c59fd6ff (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
{bionix, n}:
with bionix;
with lib.types;
input:
let
re = let f = matchFiletype' "shard-regex" {
fa = _: "^>";
fq = _: "^@";
gz = f;
bz2 = f;
};
in f input.filetype;
decompress = matchFiletype "shard-regex-decompression" {
fa = _: "cat";
fq = _: "cat";
gz = _: "gunzip";
bz2 = _: "bunzip2";
} input;
compress = matchFiletype "shard-regex-compression" {
fa = _: "cat";
fq = _: "cat";
gz = _: "gzip";
bz2 = _: "bzip2";
} input;
compressPkgs = with bionix.pkgs; matchFiletype "shard-regex-compression" {
fa = _: [];
fq = _: [];
gz = _: [ gzip ];
bz2 = _: [ bzip2 ];
} input;
in stage {
name = "shard";
outputs = [ "out" ] ++ builtins.genList (i: "out" + toString (i + 2)) (n - 1);
buildInputs = [ pkgs.gawk ] ++ compressPkgs;
buildCommand = let
awkScript = pkgs.writeText "shard.awk" ''
BEGIN{cout=0}
FNR==NR{out[nout++] = $0;next}
/${re}/{cout = (cout + 1) % nout}
{print > out[cout]}
'';
in ''
for o in $outputs ; do
echo $(basename ''${!o}) >> outputs
done
awk -f ${awkScript} outputs <(${decompress} < ${input})
for o in $outputs ; do
${compress} < $(basename ''${!o}) > ''${!o}
done
'';
passthru.filetype = input.filetype;
}
|