aboutsummaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2018-10-29 15:33:53 +1100
committerJustin Bedo <cu@cua0.org>2018-10-29 15:36:33 +1100
commite7cd661d1c5fb4135e3d436e151294e26aef9127 (patch)
tree71ef7647d15d57bc2db2cf8ec532da794fddb2fa /tools
parent8fb986fd88705fc01be7145b04fa229092c1e69e (diff)
Split gridss into constituents
Wrap each individual command for GRIDSS so that bionix executed the pipeline rather than GRIDSS. This patch introduces a "call" function that executed the whole pipeline in bionix on an arbitrary BAM file. Resolves #10.
Diffstat (limited to 'tools')
-rw-r--r--tools/gridss-annotateVariants.nix75
-rw-r--r--tools/gridss-assemble.nix58
-rw-r--r--tools/gridss-callVariants.nix6
-rw-r--r--tools/gridss-collectGridssMetrics.nix32
-rw-r--r--tools/gridss-collectMetrics.nix30
-rw-r--r--tools/gridss-computeSamTags.nix42
-rw-r--r--tools/gridss-extractSVReads.nix40
-rw-r--r--tools/gridss-identifyVariants.nix72
-rw-r--r--tools/gridss-softClipsToSplitReads.nix43
-rw-r--r--tools/gridss.nix16
10 files changed, 408 insertions, 6 deletions
diff --git a/tools/gridss-annotateVariants.nix b/tools/gridss-annotateVariants.nix
new file mode 100644
index 0000000..4f66c6c
--- /dev/null
+++ b/tools/gridss-annotateVariants.nix
@@ -0,0 +1,75 @@
+{ bionix
+, nixpkgs
+, bwaIndexAttrs ? {}
+, faidxAttrs ? {}
+, assemblyAttrs ? {}
+, extractSVReadsAttrs ? {}
+, collectMetricsAttrs ? {}
+, softClipsToSplitReadsAttrs ? {}
+, identifyVariantsAttrs ? {}
+, flags ? null
+}:
+
+with nixpkgs;
+with lib;
+with bionix.types;
+with bionix.gridss;
+
+inputs:
+
+let
+ getref = matchFiletype "gridss-annotateVariants" { bam = x: x.ref; };
+ ref = getref (head inputs);
+ sorted = matchFileSorting "gridss-annotateVariants" { coord = _: true; };
+ homoRef = length (unique (map getref inputs)) == 1;
+
+ linkInput = f: attrs: input: ''
+ BASENAME=$(basename ${input})
+ WRKDIR="''${BASENAME}.gridss.working"
+ if [[ ! -e $WRKDIR ]] ; then
+ mkdir $WRKDIR
+ fi
+ for f in ${f attrs input}/* ; do
+ ln -s $f $WRKDIR/$BASENAME.''${f#*.}
+ done
+ '';
+
+ assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.samtools.sort { nameSort = true;} (bionix.gridss.assemble assemblyAttrs inputs)));
+in
+
+assert (all sorted inputs);
+assert (homoRef);
+
+stdenv.mkDerivation rec {
+ name = "gridss-identifyVariants";
+ buildInputs = [ jre ];
+ buildCommand = ''
+ ln -s ${ref} ref.fa
+ ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
+ for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
+ ln -s $f
+ done
+ ${concatMapStringsSep "\n" (linkInput extractSVReads extractSVReadsAttrs) inputs}
+ ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
+ ${linkInput collectMetrics collectMetricsAttrs assembly}
+ ASSBASE=$(basename ${assembly})
+ ln -s ${assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bam
+ ln -s ${bionix.samtools.index {} assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bai
+ ln -s ${identifyVariants identifyVariantsAttrs inputs} input.vcf
+ java -Xmx4g -Dsamjdk.create_index=true \
+ -cp ${jar} gridss.AnnotateVariants \
+ REFERENCE_SEQUENCE=ref.fa \
+ ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
+ ASSEMBLY=${assembly} \
+ INPUT_VCF=input.vcf \
+ OUTPUT_VCF=out.vcf \
+ WORKING_DIR=$TMPDIR/ \
+ TMP_DIR=$TMPDIR/
+
+ mv out.vcf $out
+ '';
+ passthru = {
+ filetype = filetype.vcf { ref = ref; };
+ gridss.assembly = assembly;
+ };
+}
diff --git a/tools/gridss-assemble.nix b/tools/gridss-assemble.nix
new file mode 100644
index 0000000..cdda748
--- /dev/null
+++ b/tools/gridss-assemble.nix
@@ -0,0 +1,58 @@
+{ bionix
+, nixpkgs
+, bwaIndexAttrs ? {}
+, faidxAttrs ? {}
+, collectMetricsAttrs ? {}
+, extractSVReadsAttrs ? {}
+, flags ? null
+}:
+
+with nixpkgs;
+with lib;
+with bionix.types;
+
+inputs:
+
+let
+ getref = matchFiletype "gridss-assemble" { bam = x: x.ref; };
+ ref = getref (head inputs);
+ sorted = matchFileSorting "gridss-assemble" { coord = _: true; };
+ homoRef = length (unique (map getref inputs)) == 1;
+
+ linkInput = input: ''
+ BASENAME=$(basename ${input})
+ WRKDIR="''${BASENAME}.gridss.working"
+ mkdir $WRKDIR
+ for f in ${bionix.gridss.extractSVReads extractSVReadsAttrs input}/* ; do
+ ln -s $f $WRKDIR/$BASENAME.''${f#*.}
+ done
+ for f in ${bionix.gridss.collectMetrics collectMetricsAttrs input}/* ; do
+ ln -s $f $WRKDIR/$BASENAME.''${f#*.}
+ done
+ '';
+in
+
+assert (all sorted inputs);
+assert (homoRef);
+
+stdenv.mkDerivation rec {
+ name = "gridss-assemble";
+ buildInputs = [ jre bwa ];
+ buildCommand = ''
+ ln -s ${ref} ref.fa
+ ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
+ for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
+ ln -s $f
+ done
+ ${concatMapStringsSep "\n" linkInput inputs}
+ java -Xmx31g -Dsamjdk.create_index=true \
+ -cp ${bionix.gridss.jar} gridss.AssembleBreakends \
+ REFERENCE_SEQUENCE=ref.fa \
+ ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
+ WORKER_THREADS=$NIX_BUILD_CORES \
+ OUTPUT=$out \
+ WORKING_DIR=$TMPDIR/ \
+ TMP_DIR=$TMPDIR/
+ '';
+ passthru.filetype = filetype.bam { ref = ref; sorting = sort.coord {}; };
+}
diff --git a/tools/gridss-callVariants.nix b/tools/gridss-callVariants.nix
index 799c930..416eb7c 100644
--- a/tools/gridss-callVariants.nix
+++ b/tools/gridss-callVariants.nix
@@ -23,10 +23,6 @@ assert (length (unique refs) == 1);
stdenv.mkDerivation rec {
name = "gridss-callVariants";
buildInputs = [ jre R bwa ];
- jar = fetchurl {
- url = "https://github.com/PapenfussLab/gridss/releases/download/v2.0.0/gridss-2.0.0-gridss-jar-with-dependencies.jar";
- sha256 = "01srl3qvv060whqg1y1fpxjc5cwga5wscs1bmf1v3z87dignra7k";
- };
buildCommand = ''
ln -s ${ref} ref.fa
ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
@@ -41,7 +37,7 @@ stdenv.mkDerivation rec {
-Dsamjdk.use_async_io_write_samtools=true \
-Dsamjdk.use_async_io_write_tribble=true \
-Dgridss.gridss.output_to_temp_file=true \
- -cp ${jar} gridss.CallVariants \
+ -cp ${bionix.gridss.jar} gridss.CallVariants \
WORKER_THREADS=$NIX_BUILD_CORES \
TMP_DIR=. \
WORKING_DIR=. \
diff --git a/tools/gridss-collectGridssMetrics.nix b/tools/gridss-collectGridssMetrics.nix
new file mode 100644
index 0000000..bb8d2e9
--- /dev/null
+++ b/tools/gridss-collectGridssMetrics.nix
@@ -0,0 +1,32 @@
+{ bionix
+, nixpkgs
+, thresholdCoverage ? 10000
+, flags ? null
+}:
+
+with nixpkgs;
+with lib;
+with bionix.types;
+
+input:
+
+let
+ ref = matchFiletype "gridss-collectGridssMetrics" { bam = x: x.ref; } input;
+ sorted = matchFileSorting "gridss-collectGridssMetrics" { name = _: true; } input;
+in
+
+
+stdenv.mkDerivation rec {
+ name = "gridss-collectGridssMetrics";
+ buildInputs = [ jre ];
+ buildCommand = ''
+ mkdir $out
+ ln -s ${input} input.bam
+ java -Xmx1G -cp ${bionix.gridss.jar} \
+ gridss.analysis.CollectGridssMetrics \
+ ${optionalString sorted "ASSUME_SORTED=true"} \
+ I=input.bam \
+ O=$out \
+ THRESHOLD_COVERAGE=${toString thresholdCoverage}
+ '';
+}
diff --git a/tools/gridss-collectMetrics.nix b/tools/gridss-collectMetrics.nix
new file mode 100644
index 0000000..4688808
--- /dev/null
+++ b/tools/gridss-collectMetrics.nix
@@ -0,0 +1,30 @@
+{ bionix
+, nixpkgs
+, thresholdCoverage ? 10000
+, flags ? null
+}:
+
+with nixpkgs;
+with lib;
+with bionix.types;
+
+input:
+
+let
+ ref = matchFiletype "gridss-collectMetrics" { bam = x: x.ref; } input;
+in
+
+
+stdenv.mkDerivation rec {
+ name = "gridss-collectMetrics";
+ buildInputs = [ jre R ];
+ buildCommand = ''
+ mkdir $out
+ java -Xmx1G -cp ${bionix.gridss.jar} \
+ gridss.analysis.CollectGridssMetrics \
+ I=${input}\
+ O=$out/input \
+ AS=true \
+ THRESHOLD_COVERAGE=${toString thresholdCoverage}
+ '';
+}
diff --git a/tools/gridss-computeSamTags.nix b/tools/gridss-computeSamTags.nix
new file mode 100644
index 0000000..f75cea0
--- /dev/null
+++ b/tools/gridss-computeSamTags.nix
@@ -0,0 +1,42 @@
+{ bionix
+, nixpkgs
+, blacklist ? null
+, bwaIndexAttrs ? {}
+, faidxAttrs ? {}
+, flags ? null
+}:
+
+with nixpkgs;
+with lib;
+with bionix.types;
+
+input:
+
+let
+ ref = matchFiletype "gridss-computeSamTags" { bam = x: x.ref; } input;
+ sorted = matchFileSorting "gridss-computeSamTags" { name = _: true; } input;
+in
+
+assert(sorted);
+
+stdenv.mkDerivation rec {
+ name = "gridss-computeSamTags";
+ buildInputs = [ jre ];
+ buildCommand = ''
+ ln -s ${ref} ref.fa
+ ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
+ for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
+ ln -s $f
+ done
+ java -Xmx1G \
+ -Dsamjdk.create_index=false \
+ -cp ${bionix.gridss.jar} gridss.ComputeSamTags \
+ REFERENCE_SEQUENCE=ref.fa \
+ WORKING_DIR=$TMP_DIR \
+ TMP_DIR=$TMP_DIR \
+ I=${input} \
+ O=$out \
+ AS=true
+ '';
+ passthru.filetype = input.filetype;
+}
diff --git a/tools/gridss-extractSVReads.nix b/tools/gridss-extractSVReads.nix
new file mode 100644
index 0000000..a10abf5
--- /dev/null
+++ b/tools/gridss-extractSVReads.nix
@@ -0,0 +1,40 @@
+{ bionix
+, nixpkgs
+, dictIndexAttrs ? {}
+, faidxAttrs ? {}
+, flags ? null
+, unmappedReads ? false
+, minClipLength ? 5
+}:
+
+with nixpkgs;
+with lib;
+with bionix.types;
+
+input:
+
+let
+ ref = matchFiletype "gridss-extractSVReads" { bam = x: x.ref; } input;
+in
+
+
+stdenv.mkDerivation rec {
+ name = "gridss-extractSVReads";
+ buildInputs = [ jre R ];
+ buildCommand = ''
+ ln -s ${ref} ref.fa
+ ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
+ ln -s ${bionix.samtools.dict dictIndexAttrs ref} ref.fa.dict
+ ln -s ${input} input.bam
+ mkdir $out
+ java -Dsamjdk.create_index=true \
+ -cp ${bionix.gridss.jar} gridss.ExtractSVReads \
+ REFERENCE_SEQUENCE=ref.fa \
+ I=input.bam \
+ O=$out/input.sv.bam \
+ METRICS_OUTPUT=$out/input.sv_metrics \
+ INSERT_SIZE_METRICS=$out/input.insert_size_metrics \
+ UNMAPPED_READS=${if unmappedReads then "true" else "false"} \
+ MIN_CLIP_LENGTH=${toString minClipLength}
+ '';
+}
diff --git a/tools/gridss-identifyVariants.nix b/tools/gridss-identifyVariants.nix
new file mode 100644
index 0000000..f44771b
--- /dev/null
+++ b/tools/gridss-identifyVariants.nix
@@ -0,0 +1,72 @@
+{ bionix
+, nixpkgs
+, bwaIndexAttrs ? {}
+, faidxAttrs ? {}
+, assemblyAttrs ? {}
+, extractSVReadsAttrs ? {}
+, collectMetricsAttrs ? {}
+, softClipsToSplitReadsAttrs ? {}
+, flags ? null
+}:
+
+with nixpkgs;
+with lib;
+with bionix.types;
+with bionix.gridss;
+
+inputs:
+
+let
+ getref = matchFiletype "gridss-identifyVariants" { bam = x: x.ref; };
+ ref = getref (head inputs);
+ sorted = matchFileSorting "gridss-identifyVariants" { coord = _: true; };
+ homoRef = length (unique (map getref inputs)) == 1;
+
+ linkInput = f: attrs: input: ''
+ BASENAME=$(basename ${input})
+ WRKDIR="''${BASENAME}.gridss.working"
+ if [[ ! -e $WRKDIR ]] ; then
+ mkdir $WRKDIR
+ fi
+ for f in ${f attrs input}/* ; do
+ ln -s $f $WRKDIR/$BASENAME.''${f#*.}
+ done
+ '';
+
+ assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.samtools.sort { nameSort = true;} (bionix.gridss.assemble assemblyAttrs inputs)));
+in
+
+assert (all sorted inputs);
+assert (homoRef);
+
+stdenv.mkDerivation rec {
+ name = "gridss-identifyVariants";
+ buildInputs = [ jre ];
+ buildCommand = ''
+ ln -s ${ref} ref.fa
+ ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
+ for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
+ ln -s $f
+ done
+ ${concatMapStringsSep "\n" (linkInput extractSVReads extractSVReadsAttrs) inputs}
+ ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
+ ${linkInput collectMetrics collectMetricsAttrs assembly}
+ ASSBASE=$(basename ${assembly})
+ ln -s ${assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bam
+ ln -s ${bionix.samtools.index {} assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bai
+ java -Xmx4g -Dsamjdk.create_index=true \
+ -cp ${jar} gridss.IdentifyVariants \
+ REFERENCE_SEQUENCE=ref.fa \
+ ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
+ ASSEMBLY=${assembly} \
+ OUTPUT_VCF=out.vcf \
+ WORKING_DIR=$TMPDIR/ \
+ TMP_DIR=$TMPDIR/
+
+ mv out.vcf $out
+ '';
+ passthru = {
+ filetype = filetype.vcf { ref = ref; };
+ gridss.assembly = assembly;
+ };
+}
diff --git a/tools/gridss-softClipsToSplitReads.nix b/tools/gridss-softClipsToSplitReads.nix
new file mode 100644
index 0000000..8a7dca3
--- /dev/null
+++ b/tools/gridss-softClipsToSplitReads.nix
@@ -0,0 +1,43 @@
+{ bionix
+, nixpkgs
+, bwaIndexAttrs ? {}
+, faidxAttrs ? {}
+, alignerStreaming ? false
+, flags ? null
+}:
+
+with nixpkgs;
+with lib;
+with bionix.types;
+
+input:
+
+let
+ ref = matchFiletype "gridss-softClipsToSplitReads" { bam = x: x.ref; } input;
+in
+
+assert (matchFileSorting "gridss-softClipsToSplitReads" { name = _: true; } input);
+
+stdenv.mkDerivation rec {
+ name = "gridss-softClipsToSplitReads";
+ buildInputs = [ jre ];
+ buildCommand = ''
+ ln -s ${ref} ref.fa
+ ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
+ for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
+ ln -s $f
+ done
+ java -Xmx2G -Dsamjdk.create_index=false \
+ -cp ${bionix.gridss.jar} gridss.SoftClipsToSplitReads \
+ REFERENCE_SEQUENCE=ref.fa \
+ I=${input} \
+ O=$out \
+ ${optionalString alignerStreaming "ALIGNER_STREAMING=true"} \
+ WORKER_THREADS=$NIX_BUILD_CORES
+ '';
+ passthru.filetype =
+ if alignerStreaming then
+ filetype.bam { ref = ref; sort = sorting.none {}; }
+ else
+ input.filetype;
+}
diff --git a/tools/gridss.nix b/tools/gridss.nix
index 7a2f217..d46a8ce 100644
--- a/tools/gridss.nix
+++ b/tools/gridss.nix
@@ -1,7 +1,21 @@
{bionix, nixpkgs}:
+with nixpkgs;
with bionix;
-{
+rec {
+ jar = fetchurl {
+ url = "https://github.com/PapenfussLab/gridss/releases/download/v2.0.0/gridss-2.0.0-gridss-jar-with-dependencies.jar";
+ sha256 = "01srl3qvv060whqg1y1fpxjc5cwga5wscs1bmf1v3z87dignra7k";
+ };
callVariants = callBionix ./gridss-callVariants.nix;
+ computeSamTags = callBionix ./gridss-computeSamTags.nix;
+ softClipsToSplitReads = callBionix ./gridss-softClipsToSplitReads.nix;
+ collectMetrics = callBionix ./gridss-collectMetrics.nix;
+ extractSVReads = callBionix ./gridss-extractSVReads.nix;
+ assemble = callBionix ./gridss-assemble.nix;
+ identifyVariants = callBionix ./gridss-identifyVariants.nix;
+ annotateVariants = callBionix ./gridss-annotateVariants.nix;
+ preprocessBam = input: with samtools; markdup {} (sort {} (fixmate {mateScore = true;} (softClipsToSplitReads {} (computeSamTags {} (sort {nameSort = true;} input)))));
+ call = inputs: annotateVariants {} (map preprocessBam inputs);
}