aboutsummaryrefslogtreecommitdiff
path: root/tools/gridss-variants.nix
blob: fc26c5a9480ef05847fa7a013facf43460f9aafe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
{ bionix
, bwaIndexAttrs ? {}
, faidxAttrs ? {}
, indexAttrs ? {}
, assemblyAttrs ? {}
, collectMetricsAttrs ? {}
, softClipsToSplitReadsAttrs ? { flags = "REALIGN_ENTIRE_READ=true"; }
, flags ? null
, config ? null
, heapSize ? "4g"
}:

with bionix;
with lib;
with types;
with gridss;

inputs:

let
  getref = matchFiletype "gridss-variants" { bam = x: x.ref; };
  ref = getref (head inputs);
  sorted = matchFileSorting "gridss-variants" { coord = _: true; };
  homoRef = length (unique (map getref inputs)) == 1;

  linkInput = f: attrs: input: ''
    BASENAME=$(basename ${input})
    WRKDIR="''${BASENAME}.gridss.working"
    if [[ ! -e $WRKDIR ]] ; then
      mkdir $WRKDIR
    fi
    for f in ${f attrs input}/* ; do
      ln -s $f $WRKDIR/$BASENAME.''${f##*.}
    done
  '';

  linkSV = input: ''
    BASENAME=$(basename ${input})
    WRKDIR="''${BASENAME}.gridss.working"
    if [[ ! -e $WRKDIR ]] ; then
      mkdir $WRKDIR
    fi
    ln -s ${input} $WRKDIR/$BASENAME.sv.bam
    ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai
  '';

  assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.assemble assemblyAttrs inputs));

  mkLinks = ''
    ln -s ${ref} ref.fa
    ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
    for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
      ln -s $f
    done
    ${concatMapStringsSep "\n" (linkSV) inputs}
    ${linkSV assembly}
    ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
    ${linkInput collectMetrics collectMetricsAttrs assembly}
  '';

in

assert (all sorted inputs);
assert (homoRef);

rec {
  identify = stage rec {
    name = "gridss-identifyVariants";
    buildInputs = with pkgs; [ jre samtools ];
    buildCommand = mkLinks + ''
      java -Xmx${heapSize} -Dsamjdk.create_index=true \
        -cp ${jar} gridss.IdentifyVariants \
        REFERENCE_SEQUENCE=ref.fa \
        ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
        ASSEMBLY=${assembly} \
        OUTPUT_VCF=out.vcf \
        ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
        WORKING_DIR=$TMPDIR/ \
        TMP_DIR=$TMPDIR/

      mv out.vcf $out
      '';
    passthru = {
      filetype = filetype.vcf { ref = ref; };
      gridss.assembly = assembly;
    };
  };

  annotate = stage rec {
    name = "gridss-annotateVariants";
    buildInputs = with pkgs; [ jre ];
    buildCommand = mkLinks + ''
      ln -s ${bionix.gridss.identifyVariants {inherit bwaIndexAttrs faidxAttrs indexAttrs assemblyAttrs collectMetricsAttrs softClipsToSplitReadsAttrs flags config; } inputs} input.vcf
      java -Xmx${heapSize} -Dsamjdk.create_index=true \
        -cp ${jar} gridss.AnnotateVariants \
        REFERENCE_SEQUENCE=ref.fa \
        ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
        ASSEMBLY=${assembly} \
        INPUT_VCF=input.vcf \
        OUTPUT_VCF=out.vcf \
        WORKING_DIR=$TMPDIR/ \
        ${optionalString (config != null) ("OPTIONS_FILE=" +  bionix.gridss.gridssConfig config)} \
        TMP_DIR=$TMPDIR/

      mv out.vcf $out
      '';
    passthru = {
      filetype = filetype.vcf { ref = ref; };
      gridss.assembly = assembly;
    };
  };
}