blob: b409344e885ccfa8a5739f517aea9893a21fc25c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
{ bionix
, indexAttrs ? {}
, bamIndexAttrs ? {}
, flags ? null
}:
{normal, tumour}:
with bionix;
with lib;
with types;
let
filename = path: last (splitString "/" path);
getref = f: matchFiletype "strelka-callSomatic" { bam = x: x.ref; } f;
inputs = [ normal tumour ];
refs = map getref inputs;
ref = head refs;
in
assert (length (unique refs) == 1);
let
out = stage {
name = "strelka-callSomatic";
buildInputs = with pkgs; [ strelka ];
outputs = [ "out" "indels" "snvs" ];
buildCommand = ''
ln -s ${ref} ref.fa
ln -s ${bionix.samtools.faidx indexAttrs ref} ref.fa.fai
${concatMapStringsSep "\n" (p: "ln -s ${p} ${filename p}.bam") inputs}
${concatMapStringsSep "\n" (p: "ln -s ${bionix.samtools.index bamIndexAttrs p} ${filename p}.bai") inputs}
configureStrelkaSomaticWorkflow.py \
--normalBam ${filename normal}.bam \
--tumourBam ${filename tumour}.bam \
--ref ref.fa \
--runDir $TMPDIR
./runWorkflow.py \
-m local \
-j $NIX_BUILD_CORES
# Strelka writes runtime stats and timestamps;
# both have to be stripped to provide determinism
cd results/variants
rm *.tbi
for f in *.vcf.gz; do
gunzip $f
g=$(basename $f .gz)
sed -i '/^##fileDate/d' $g
sed -i '/^##startTime/d' $g
sed -i '/^##cmd/d' $g
done
mv somatic.indels.vcf $indels
mv somatic.snvs.vcf $snvs
ln -s $snvs $out
'';
passthru.multicore = true;
passthru.filetype = types.filetype.vcf {ref = ref;};
};
in out
|