aboutsummaryrefslogtreecommitdiff
path: root/tools/gridss-annotateVariants.nix
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2018-10-29 15:33:53 +1100
committerJustin Bedo <cu@cua0.org>2018-10-29 15:36:33 +1100
commite7cd661d1c5fb4135e3d436e151294e26aef9127 (patch)
tree71ef7647d15d57bc2db2cf8ec532da794fddb2fa /tools/gridss-annotateVariants.nix
parent8fb986fd88705fc01be7145b04fa229092c1e69e (diff)
Split gridss into constituents
Wrap each individual command for GRIDSS so that bionix executed the pipeline rather than GRIDSS. This patch introduces a "call" function that executed the whole pipeline in bionix on an arbitrary BAM file. Resolves #10.
Diffstat (limited to 'tools/gridss-annotateVariants.nix')
-rw-r--r--tools/gridss-annotateVariants.nix75
1 files changed, 75 insertions, 0 deletions
diff --git a/tools/gridss-annotateVariants.nix b/tools/gridss-annotateVariants.nix
new file mode 100644
index 0000000..4f66c6c
--- /dev/null
+++ b/tools/gridss-annotateVariants.nix
@@ -0,0 +1,75 @@
+{ bionix
+, nixpkgs
+, bwaIndexAttrs ? {}
+, faidxAttrs ? {}
+, assemblyAttrs ? {}
+, extractSVReadsAttrs ? {}
+, collectMetricsAttrs ? {}
+, softClipsToSplitReadsAttrs ? {}
+, identifyVariantsAttrs ? {}
+, flags ? null
+}:
+
+with nixpkgs;
+with lib;
+with bionix.types;
+with bionix.gridss;
+
+inputs:
+
+let
+ getref = matchFiletype "gridss-annotateVariants" { bam = x: x.ref; };
+ ref = getref (head inputs);
+ sorted = matchFileSorting "gridss-annotateVariants" { coord = _: true; };
+ homoRef = length (unique (map getref inputs)) == 1;
+
+ linkInput = f: attrs: input: ''
+ BASENAME=$(basename ${input})
+ WRKDIR="''${BASENAME}.gridss.working"
+ if [[ ! -e $WRKDIR ]] ; then
+ mkdir $WRKDIR
+ fi
+ for f in ${f attrs input}/* ; do
+ ln -s $f $WRKDIR/$BASENAME.''${f#*.}
+ done
+ '';
+
+ assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.samtools.sort { nameSort = true;} (bionix.gridss.assemble assemblyAttrs inputs)));
+in
+
+assert (all sorted inputs);
+assert (homoRef);
+
+stdenv.mkDerivation rec {
+ name = "gridss-identifyVariants";
+ buildInputs = [ jre ];
+ buildCommand = ''
+ ln -s ${ref} ref.fa
+ ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
+ for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
+ ln -s $f
+ done
+ ${concatMapStringsSep "\n" (linkInput extractSVReads extractSVReadsAttrs) inputs}
+ ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
+ ${linkInput collectMetrics collectMetricsAttrs assembly}
+ ASSBASE=$(basename ${assembly})
+ ln -s ${assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bam
+ ln -s ${bionix.samtools.index {} assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bai
+ ln -s ${identifyVariants identifyVariantsAttrs inputs} input.vcf
+ java -Xmx4g -Dsamjdk.create_index=true \
+ -cp ${jar} gridss.AnnotateVariants \
+ REFERENCE_SEQUENCE=ref.fa \
+ ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
+ ASSEMBLY=${assembly} \
+ INPUT_VCF=input.vcf \
+ OUTPUT_VCF=out.vcf \
+ WORKING_DIR=$TMPDIR/ \
+ TMP_DIR=$TMPDIR/
+
+ mv out.vcf $out
+ '';
+ passthru = {
+ filetype = filetype.vcf { ref = ref; };
+ gridss.assembly = assembly;
+ };
+}