aboutsummaryrefslogtreecommitdiff
path: root/tools/gridss.nix
blob: cadc2d799da6a31ef638b9071673cab9e97f27a2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
{bionix}:

with bionix;
with lib;

rec {
  jar = pkgs.fetchurl {
    url = "https://github.com/PapenfussLab/gridss/releases/download/v2.11.1/gridss-2.11.1-gridss-jar-with-dependencies.jar";
    sha256 = "sha256-x3+vdaNzEOhCQXSqmU6U3S2xUXy0SSdMle0QhiM2Qb8=";
  };

  /* Generate configuration file for GRIDSS. Takes attribute sets to GRIDSS ini style format.
  Type: genConfig :: attrSet -> ini file
  */
  genConfig = callBionix ./gridss-configFile.nix {};

  /* Invoke the callVariants tool
  Type: callVariants :: {blacklist :: drv = null, config :: ini = null, heapSize :: String = "31g", ...} -> [bam] -> variants
  */
  callVariants = callBionixE ./gridss-callVariants.nix;

  /* Invoke computeSamTags tool
  Type: computeSamTags :: {config :: ini = null, heapSize :: String = "1G", ...} -> bam -> bam
  */
  computeSamTags = callBionixE ./gridss-computeSamTags.nix;

  /* Invoke softClipsToSplitReads tool
  Type: softClipsToSplitReads :: {alignerStreaming :: Bool = false, config :: ini = null, heapSize :: String = "2G", ...} -> bam -> bam
  */
  softClipsToSplitReads = callBionixE ./gridss-softClipsToSplitReads.nix;

  /* Invoke collectMetrics tool
  Type: collectMetrics :: {thresholdCoverage :: Int = 10000, config :: ini = null, heapSize :: String = "1G", ...} -> bam -> metrics
  */
  collectMetrics = callBionixE ./gridss-collectMetrics.nix;

  /* Invoke extractSVReads tool
  Type: extractSVReads :: {unmappedReads :: Bool = false, minClipLength :: Int = 5, config :: ini = null, ...} -> bam -> bam
  */
  extractSVReads = callBionixE ./gridss-extractSVReads.nix;

  /* Invoke assembly tool
  Type: assemble :: {config :: ini = null, heapSize :: String = "31g", ...} -> [bam] -> bam
  */
  assemble = callBionixE ./gridss-assemble.nix;
  shardedAssemble = n: a: input:
    let assemblies = genList (i: bionix.gridss.assemble (a // { jobNodes = n; jobIndex = i;}) input) n;
    in if n <= 1 then bionix.gridss.assemble a input else bionix.gridss.assemble (a // {workdirs = map (a: a.work) assemblies;}) input;

  /* Invoke identifyVariants tool
  Type: identifyVariants :: {config :: ini = null, heapSize :: String = "4g", ...} -> [bam] -> VCF
  */
  identifyVariants = exec (attrs: input: ((callBionix ./gridss-variants.nix attrs) input).identify);

  /* Invoke annotateVariants tool
  Type: annotateVariants :: {config :: ini = null, heapSize :: String = "4g", ...} -> [bam] -> VCF
  */
  annotateVariants = exec (attrs: input: ((callBionix ./gridss-variants.nix attrs) input).annotate);

  /* As annotateVariants except include assembly in output */
  annotateAndAssemble = exec (attrs: input: ((callBionix ./gridss-variants.nix attrs) input).annotateAndAssemble);

  /* Preprocess BAM files to extract SV reads and convert soft clips to split reads
  Type: preprocessBam :: bam -> bam
  */
  preprocessBam = with samtools;
    flip pipe [
      (gridss.extractSVReads {})
      (sort {nameSort = true;})
      (gridss.computeSamTags {})
      (gridss.softClipsToSplitReads {})
      (sort {})
    ];

  /* Call SVs: entire pipeline including preprocessing. It is recommended to use this function rather than the individual tools above.
  Type: call :: [bam] -> GRIDSS result
  */
  call = inputs: gridss.annotateVariants {} (map gridss.preprocessBam inputs);

  /* As call but include assemblies in output */
  callAndAssemble = inputs: gridss.annotateAndAssemble {} (map gridss.preprocessBam inputs);
}