aboutsummaryrefslogtreecommitdiff
path: root/tools/strelka-call.nix
blob: dc07291d503f3dcba53f9533225be2b965425c03 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
{ bionix
, indexAttrs ? {}
, bamIndexAttrs ? {}
, flags ? null
}:

inputs:

with bionix;
with lib;
with types;

let
  filename = path: last (splitString "/" path);
  getref = f: matchFiletype "strelka-call" { bam = x: x.ref; } f;
  refs = map getref inputs;
  ref = head refs;

in

assert (length (unique refs) == 1);

let
out = stage {
  name = "strelka-call";
  buildInputs = with pkgs; [ strelka ];
  outputs = [ "out" "variants" ];
  buildCommand = ''
    ln -s ${ref} ref.fa
    ln -s ${bionix.samtools.faidx indexAttrs ref} ref.fa.fai
    ${concatMapStringsSep "\n" (p: "ln -s ${p} ${filename p}.bam") inputs}
    ${concatMapStringsSep "\n" (p: "ln -s ${bionix.samtools.index bamIndexAttrs p} ${filename p}.bai") inputs}

    configureStrelkaGermlineWorkflow.py \
      ${concatMapStringsSep " " (i: "--bam ${filename i}.bam") inputs} \
      --ref ref.fa \
      --runDir $TMPDIR

    ./runWorkflow.py \
      -m local \
      -j $NIX_BUILD_CORES 2>&1

    # Strelka writes runtime stats and timestamps;
    # both have to be stripped to provide determinism
    cd results/variants
    rm *.tbi genome.vcf.gz
    for f in *.vcf.gz; do
      gunzip $f
      g=$(basename $f .gz)
      sed -i '/^##fileDate/d' $g
      sed -i '/^##startTime/d' $g
    done
    mv variants.vcf $variants
    ln -s $variants variants.vcf
    mkdir $out
    cp -r * $out
  '';
  passthru.multicore = true;
};
ft = {filetype = types.filetype.vcf {ref = ref;};};
in out // { variants = out.variants // ft; }