aboutsummaryrefslogtreecommitdiff
path: root/tools/strelka-callSomatic.nix
blob: 6a8f25225cc99c79ce77a8d76f56e659cd3d7ca9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
{ bionix
, indexAttrs ? { }
, bamIndexAttrs ? { }
, flags ? null
}:

{ normal, tumour }:

with bionix;
with lib;
with types;

let
  filename = path: last (splitString "/" path);
  getref = matchFiletype "strelka-callSomatic" { bam = x: x.ref; };
  inputs = [ normal tumour ];
  refs = map getref inputs;
  ref = head refs;

in

assert (length (unique refs) == 1);

let

  out = stage {
    name = "strelka-callSomatic";
    buildInputs = with pkgs; [ strelka ];
    outputs = [ "out" "indels" "snvs" ];
    buildCommand = ''
      ln -s ${ref} ref.fa
      ln -s ${bionix.samtools.faidx indexAttrs ref} ref.fa.fai
      ${concatMapStringsSep "\n" (p: "ln -s ${p} ${filename p}.bam") inputs}
      ${concatMapStringsSep "\n" (p: "ln -s ${bionix.samtools.index bamIndexAttrs p} ${filename p}.bai") inputs}

      configureStrelkaSomaticWorkflow.py \
        --normalBam ${filename normal}.bam \
        --tumourBam ${filename tumour}.bam \
        --ref ref.fa \
        --runDir $TMPDIR

      ./runWorkflow.py \
        -m local \
        -j $NIX_BUILD_CORES

      # Strelka writes runtime stats and timestamps;
      # both have to be stripped to provide determinism
      cd results/variants
      rm *.tbi
      for f in *.vcf.gz; do
        gunzip $f
        g=$(basename $f .gz)
        sed -i '/^##fileDate/d' $g
        sed -i '/^##startTime/d' $g
        sed -i '/^##cmd/d' $g
      done
      mv somatic.indels.vcf $indels
      mv somatic.snvs.vcf $snvs

      ln -s $snvs $out
    '';
    passthru.multicore = true;
    passthru.filetype = types.filetype.vcf { inherit ref; };
  };

in
out