From e7cd661d1c5fb4135e3d436e151294e26aef9127 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Mon, 29 Oct 2018 15:33:53 +1100 Subject: Split gridss into constituents Wrap each individual command for GRIDSS so that bionix executed the pipeline rather than GRIDSS. This patch introduces a "call" function that executed the whole pipeline in bionix on an arbitrary BAM file. Resolves #10. --- tools/gridss-annotateVariants.nix | 75 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 tools/gridss-annotateVariants.nix (limited to 'tools/gridss-annotateVariants.nix') diff --git a/tools/gridss-annotateVariants.nix b/tools/gridss-annotateVariants.nix new file mode 100644 index 0000000..4f66c6c --- /dev/null +++ b/tools/gridss-annotateVariants.nix @@ -0,0 +1,75 @@ +{ bionix +, nixpkgs +, bwaIndexAttrs ? {} +, faidxAttrs ? {} +, assemblyAttrs ? {} +, extractSVReadsAttrs ? {} +, collectMetricsAttrs ? {} +, softClipsToSplitReadsAttrs ? {} +, identifyVariantsAttrs ? {} +, flags ? null +}: + +with nixpkgs; +with lib; +with bionix.types; +with bionix.gridss; + +inputs: + +let + getref = matchFiletype "gridss-annotateVariants" { bam = x: x.ref; }; + ref = getref (head inputs); + sorted = matchFileSorting "gridss-annotateVariants" { coord = _: true; }; + homoRef = length (unique (map getref inputs)) == 1; + + linkInput = f: attrs: input: '' + BASENAME=$(basename ${input}) + WRKDIR="''${BASENAME}.gridss.working" + if [[ ! -e $WRKDIR ]] ; then + mkdir $WRKDIR + fi + for f in ${f attrs input}/* ; do + ln -s $f $WRKDIR/$BASENAME.''${f#*.} + done + ''; + + assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.samtools.sort { nameSort = true;} (bionix.gridss.assemble assemblyAttrs inputs))); +in + +assert (all sorted inputs); +assert (homoRef); + +stdenv.mkDerivation rec { + name = "gridss-identifyVariants"; + buildInputs = [ jre ]; + buildCommand = '' + ln -s ${ref} ref.fa + ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai + for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do + ln -s $f + done + ${concatMapStringsSep "\n" (linkInput extractSVReads extractSVReadsAttrs) inputs} + ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs} + ${linkInput collectMetrics collectMetricsAttrs assembly} + ASSBASE=$(basename ${assembly}) + ln -s ${assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bam + ln -s ${bionix.samtools.index {} assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bai + ln -s ${identifyVariants identifyVariantsAttrs inputs} input.vcf + java -Xmx4g -Dsamjdk.create_index=true \ + -cp ${jar} gridss.AnnotateVariants \ + REFERENCE_SEQUENCE=ref.fa \ + ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \ + ASSEMBLY=${assembly} \ + INPUT_VCF=input.vcf \ + OUTPUT_VCF=out.vcf \ + WORKING_DIR=$TMPDIR/ \ + TMP_DIR=$TMPDIR/ + + mv out.vcf $out + ''; + passthru = { + filetype = filetype.vcf { ref = ref; }; + gridss.assembly = assembly; + }; +} -- cgit v1.2.3