aboutsummaryrefslogtreecommitdiff
path: root/tools/gridss-variants.nix
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2018-12-12 10:23:11 +1100
committerJustin Bedo <cu@cua0.org>2018-12-12 10:23:11 +1100
commit979bf4c65ad668b668750b58eefa9996f51b1242 (patch)
tree64d4064b48e0604185cd1a0ff0c5141c31b1d6a2 /tools/gridss-variants.nix
parent10bdecf6c1338d7f531ddf7b41da14dfe4a4ac33 (diff)
gridss: refactor identify & annotate variants
Diffstat (limited to 'tools/gridss-variants.nix')
-rw-r--r--tools/gridss-variants.nix112
1 files changed, 112 insertions, 0 deletions
diff --git a/tools/gridss-variants.nix b/tools/gridss-variants.nix
new file mode 100644
index 0000000..a838da6
--- /dev/null
+++ b/tools/gridss-variants.nix
@@ -0,0 +1,112 @@
+{ bionix
+, nixpkgs
+, bwaIndexAttrs ? {}
+, faidxAttrs ? {}
+, indexAttrs ? {}
+, assemblyAttrs ? {}
+, collectMetricsAttrs ? {}
+, softClipsToSplitReadsAttrs ? { flags = "REALIGN_ENTIRE_READ=true"; }
+, flags ? null
+, config ? null
+}:
+
+with nixpkgs;
+with lib;
+with bionix.types;
+with bionix.gridss;
+
+inputs:
+
+let
+ getref = matchFiletype "gridss-variants" { bam = x: x.ref; };
+ ref = getref (head inputs);
+ sorted = matchFileSorting "gridss-variants" { coord = _: true; };
+ homoRef = length (unique (map getref inputs)) == 1;
+
+ linkInput = f: attrs: input: ''
+ BASENAME=$(basename ${input})
+ WRKDIR="''${BASENAME}.gridss.working"
+ if [[ ! -e $WRKDIR ]] ; then
+ mkdir $WRKDIR
+ fi
+ for f in ${f attrs input}/* ; do
+ ln -s $f $WRKDIR/$BASENAME.''${f##*.}
+ done
+ '';
+
+ linkSV = input: ''
+ BASENAME=$(basename ${input})
+ WRKDIR="''${BASENAME}.gridss.working"
+ if [[ ! -e $WRKDIR ]] ; then
+ mkdir $WRKDIR
+ fi
+ ln -s ${input} $WRKDIR/$BASENAME.sv.bam
+ ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai
+ '';
+
+ assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.assemble assemblyAttrs inputs));
+
+ mkLinks = ''
+ ln -s ${ref} ref.fa
+ ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
+ for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
+ ln -s $f
+ done
+ ${concatMapStringsSep "\n" (linkSV) inputs}
+ ${linkSV assembly}
+ ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
+ ${linkInput collectMetrics collectMetricsAttrs assembly}
+ '';
+
+in
+
+assert (all sorted inputs);
+assert (homoRef);
+
+rec {
+ identify = stdenv.mkDerivation rec {
+ name = "gridss-identifyVariants";
+ buildInputs = [ jre samtools ];
+ buildCommand = mkLinks + ''
+ java -Xmx4g -Dsamjdk.create_index=true \
+ -cp ${jar} gridss.IdentifyVariants \
+ REFERENCE_SEQUENCE=ref.fa \
+ ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
+ ASSEMBLY=${assembly} \
+ OUTPUT_VCF=out.vcf \
+ ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
+ WORKING_DIR=$TMPDIR/ \
+ TMP_DIR=$TMPDIR/
+
+ mv out.vcf $out
+ '';
+ passthru = {
+ filetype = filetype.vcf { ref = ref; };
+ gridss.assembly = assembly;
+ };
+ };
+
+ annotate = stdenv.mkDerivation rec {
+ name = "gridss-annotateVariants";
+ buildInputs = [ jre ];
+ buildCommand = mkLinks + ''
+ ln -s ${identify} input.vcf
+ java -Xmx4g -Dsamjdk.create_index=true \
+ -cp ${jar} gridss.AnnotateVariants \
+ REFERENCE_SEQUENCE=ref.fa \
+ ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
+ ASSEMBLY=${assembly} \
+ INPUT_VCF=input.vcf \
+ OUTPUT_VCF=out.vcf \
+ WORKING_DIR=$TMPDIR/ \
+ ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
+ TMP_DIR=$TMPDIR/
+
+ mv out.vcf $out
+ '';
+ passthru = {
+ filetype = filetype.vcf { ref = ref; };
+ gridss.assembly = assembly;
+ };
+ };
+}