blob: 9dc17200aa8dab6fd8d7081f9aa82ccb63197ed9 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
{ bionix, n }:
with bionix;
with lib.types;
input:
let
re =
let f = matchFiletype' "shard-regex" {
fa = _: "^>";
fq = _: "^@";
gz = f;
bz2 = f;
};
in f input.filetype;
decompress = matchFiletype "shard-regex-decompression"
{
fa = _: "cat";
fq = _: "cat";
gz = _: "gunzip";
bz2 = _: "bunzip2";
}
input;
compress = matchFiletype "shard-regex-compression"
{
fa = _: "cat";
fq = _: "cat";
gz = _: "gzip";
bz2 = _: "bzip2";
}
input;
compressPkgs = with bionix.pkgs; matchFiletype "shard-regex-compression"
{
fa = _: [ ];
fq = _: [ ];
gz = _: [ gzip ];
bz2 = _: [ bzip2 ];
}
input;
in
stage {
name = "shard";
outputs = [ "out" ] ++ builtins.genList (i: "out" + toString (i + 2)) (n - 1);
buildInputs = [ pkgs.gawk ] ++ compressPkgs;
buildCommand =
let
awkScript = pkgs.writeText "shard.awk" ''
BEGIN{cout=0}
FNR==NR{out[nout++] = $0;next}
/${re}/{cout = (cout + 1) % nout}
{print > out[cout]}
'';
in
''
for o in $outputs ; do
echo $(basename ''${!o}) >> outputs
done
awk -f ${awkScript} outputs <(${decompress} < ${input})
for o in $outputs ; do
${compress} < $(basename ''${!o}) > ''${!o}
done
'';
passthru.filetype = input.filetype;
}
|