From 979bf4c65ad668b668750b58eefa9996f51b1242 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Wed, 12 Dec 2018 10:23:11 +1100 Subject: gridss: refactor identify & annotate variants --- tools/gridss-annotateVariants.nix | 85 ----------------------------- tools/gridss-identifyVariants.nix | 82 ---------------------------- tools/gridss-variants.nix | 112 ++++++++++++++++++++++++++++++++++++++ tools/gridss.nix | 4 +- 4 files changed, 114 insertions(+), 169 deletions(-) delete mode 100644 tools/gridss-annotateVariants.nix delete mode 100644 tools/gridss-identifyVariants.nix create mode 100644 tools/gridss-variants.nix (limited to 'tools') diff --git a/tools/gridss-annotateVariants.nix b/tools/gridss-annotateVariants.nix deleted file mode 100644 index 3560f77..0000000 --- a/tools/gridss-annotateVariants.nix +++ /dev/null @@ -1,85 +0,0 @@ -{ bionix -, nixpkgs -, bwaIndexAttrs ? {} -, faidxAttrs ? {} -, indexAttrs ? {} -, assemblyAttrs ? {} -, collectMetricsAttrs ? {} -, softClipsToSplitReadsAttrs ? { flags = "REALIGN_ENTIRE_READ=true"; } -, identifyVariantsAttrs ? {} -, flags ? null -, config ? null -}: - -with nixpkgs; -with lib; -with bionix.types; -with bionix.gridss; - -inputs: - -let - getref = matchFiletype "gridss-annotateVariants" { bam = x: x.ref; }; - ref = getref (head inputs); - sorted = matchFileSorting "gridss-annotateVariants" { coord = _: true; }; - homoRef = length (unique (map getref inputs)) == 1; - - linkInput = f: attrs: input: '' - BASENAME=$(basename ${input}) - WRKDIR="''${BASENAME}.gridss.working" - if [[ ! -e $WRKDIR ]] ; then - mkdir $WRKDIR - fi - for f in ${f attrs input}/* ; do - ln -s $f $WRKDIR/$BASENAME.''${f##*.} - done - ''; - - linkSV = input: '' - BASENAME=$(basename ${input}) - WRKDIR="''${BASENAME}.gridss.working" - if [[ ! -e $WRKDIR ]] ; then - mkdir $WRKDIR - fi - ln -s ${input} $WRKDIR/$BASENAME.sv.bam - ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai - ''; - - assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.assemble assemblyAttrs inputs)); -in - -assert (all sorted inputs); -assert (homoRef); - -stdenv.mkDerivation rec { - name = "gridss-identifyVariants"; - buildInputs = [ jre ]; - buildCommand = '' - ln -s ${ref} ref.fa - ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai - for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do - ln -s $f - done - ${concatMapStringsSep "\n" (linkSV) inputs} - ${linkSV assembly} - ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs} - ${linkInput collectMetrics collectMetricsAttrs assembly} - ln -s ${identifyVariants identifyVariantsAttrs inputs} input.vcf - java -Xmx4g -Dsamjdk.create_index=true \ - -cp ${jar} gridss.AnnotateVariants \ - REFERENCE_SEQUENCE=ref.fa \ - ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \ - ASSEMBLY=${assembly} \ - INPUT_VCF=input.vcf \ - OUTPUT_VCF=out.vcf \ - WORKING_DIR=$TMPDIR/ \ - ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \ - TMP_DIR=$TMPDIR/ - - mv out.vcf $out - ''; - passthru = { - filetype = filetype.vcf { ref = ref; }; - gridss.assembly = assembly; - }; -} diff --git a/tools/gridss-identifyVariants.nix b/tools/gridss-identifyVariants.nix deleted file mode 100644 index 23a9d85..0000000 --- a/tools/gridss-identifyVariants.nix +++ /dev/null @@ -1,82 +0,0 @@ -{ bionix -, nixpkgs -, bwaIndexAttrs ? {} -, faidxAttrs ? {} -, indexAttrs ? {} -, assemblyAttrs ? {} -, collectMetricsAttrs ? {} -, softClipsToSplitReadsAttrs ? {} -, flags ? null -, config ? null -}: - -with nixpkgs; -with lib; -with bionix.types; -with bionix.gridss; - -inputs: - -let - getref = matchFiletype "gridss-identifyVariants" { bam = x: x.ref; }; - ref = getref (head inputs); - sorted = matchFileSorting "gridss-identifyVariants" { coord = _: true; }; - homoRef = length (unique (map getref inputs)) == 1; - - linkInput = f: attrs: input: '' - BASENAME=$(basename ${input}) - WRKDIR="''${BASENAME}.gridss.working" - if [[ ! -e $WRKDIR ]] ; then - mkdir $WRKDIR - fi - for f in ${f attrs input}/* ; do - ln -s $f $WRKDIR/$BASENAME.''${f##*.} - done - ''; - - linkSV = input: '' - BASENAME=$(basename ${input}) - WRKDIR="''${BASENAME}.gridss.working" - if [[ ! -e $WRKDIR ]] ; then - mkdir $WRKDIR - fi - ln -s ${input} $WRKDIR/$BASENAME.sv.bam - ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai - ''; - - assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.samtools.sort { nameSort = true;} (bionix.gridss.assemble assemblyAttrs inputs))); -in - -assert (all sorted inputs); -assert (homoRef); - -stdenv.mkDerivation rec { - name = "gridss-identifyVariants"; - buildInputs = [ jre samtools ]; - buildCommand = '' - ln -s ${ref} ref.fa - ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai - for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do - ln -s $f - done - ${concatMapStringsSep "\n" (linkSV) inputs} - ${linkSV assembly} - ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs} - ${linkInput collectMetrics collectMetricsAttrs assembly} - java -Xmx4g -Dsamjdk.create_index=true \ - -cp ${jar} gridss.IdentifyVariants \ - REFERENCE_SEQUENCE=ref.fa \ - ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \ - ASSEMBLY=${assembly} \ - OUTPUT_VCF=out.vcf \ - ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \ - WORKING_DIR=$TMPDIR/ \ - TMP_DIR=$TMPDIR/ - - mv out.vcf $out - ''; - passthru = { - filetype = filetype.vcf { ref = ref; }; - gridss.assembly = assembly; - }; -} diff --git a/tools/gridss-variants.nix b/tools/gridss-variants.nix new file mode 100644 index 0000000..a838da6 --- /dev/null +++ b/tools/gridss-variants.nix @@ -0,0 +1,112 @@ +{ bionix +, nixpkgs +, bwaIndexAttrs ? {} +, faidxAttrs ? {} +, indexAttrs ? {} +, assemblyAttrs ? {} +, collectMetricsAttrs ? {} +, softClipsToSplitReadsAttrs ? { flags = "REALIGN_ENTIRE_READ=true"; } +, flags ? null +, config ? null +}: + +with nixpkgs; +with lib; +with bionix.types; +with bionix.gridss; + +inputs: + +let + getref = matchFiletype "gridss-variants" { bam = x: x.ref; }; + ref = getref (head inputs); + sorted = matchFileSorting "gridss-variants" { coord = _: true; }; + homoRef = length (unique (map getref inputs)) == 1; + + linkInput = f: attrs: input: '' + BASENAME=$(basename ${input}) + WRKDIR="''${BASENAME}.gridss.working" + if [[ ! -e $WRKDIR ]] ; then + mkdir $WRKDIR + fi + for f in ${f attrs input}/* ; do + ln -s $f $WRKDIR/$BASENAME.''${f##*.} + done + ''; + + linkSV = input: '' + BASENAME=$(basename ${input}) + WRKDIR="''${BASENAME}.gridss.working" + if [[ ! -e $WRKDIR ]] ; then + mkdir $WRKDIR + fi + ln -s ${input} $WRKDIR/$BASENAME.sv.bam + ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai + ''; + + assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.assemble assemblyAttrs inputs)); + + mkLinks = '' + ln -s ${ref} ref.fa + ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai + for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do + ln -s $f + done + ${concatMapStringsSep "\n" (linkSV) inputs} + ${linkSV assembly} + ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs} + ${linkInput collectMetrics collectMetricsAttrs assembly} + ''; + +in + +assert (all sorted inputs); +assert (homoRef); + +rec { + identify = stdenv.mkDerivation rec { + name = "gridss-identifyVariants"; + buildInputs = [ jre samtools ]; + buildCommand = mkLinks + '' + java -Xmx4g -Dsamjdk.create_index=true \ + -cp ${jar} gridss.IdentifyVariants \ + REFERENCE_SEQUENCE=ref.fa \ + ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \ + ASSEMBLY=${assembly} \ + OUTPUT_VCF=out.vcf \ + ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \ + WORKING_DIR=$TMPDIR/ \ + TMP_DIR=$TMPDIR/ + + mv out.vcf $out + ''; + passthru = { + filetype = filetype.vcf { ref = ref; }; + gridss.assembly = assembly; + }; + }; + + annotate = stdenv.mkDerivation rec { + name = "gridss-annotateVariants"; + buildInputs = [ jre ]; + buildCommand = mkLinks + '' + ln -s ${identify} input.vcf + java -Xmx4g -Dsamjdk.create_index=true \ + -cp ${jar} gridss.AnnotateVariants \ + REFERENCE_SEQUENCE=ref.fa \ + ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \ + ASSEMBLY=${assembly} \ + INPUT_VCF=input.vcf \ + OUTPUT_VCF=out.vcf \ + WORKING_DIR=$TMPDIR/ \ + ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \ + TMP_DIR=$TMPDIR/ + + mv out.vcf $out + ''; + passthru = { + filetype = filetype.vcf { ref = ref; }; + gridss.assembly = assembly; + }; + }; +} diff --git a/tools/gridss.nix b/tools/gridss.nix index b85e84e..d47a669 100644 --- a/tools/gridss.nix +++ b/tools/gridss.nix @@ -15,8 +15,8 @@ rec { collectMetrics = callBionix ./gridss-collectMetrics.nix; extractSVReads = callBionix ./gridss-extractSVReads.nix; assemble = callBionix ./gridss-assemble.nix; - identifyVariants = callBionix ./gridss-identifyVariants.nix; - annotateVariants = callBionix ./gridss-annotateVariants.nix; + identifyVariants = attrs: input: ((callBionix ./gridss-variants.nix attrs) input).identify; + annotateVariants = attrs: input: ((callBionix ./gridss-variants.nix attrs) input).annotate; preprocessBam = input: with samtools; sort {} (gridss.softClipsToSplitReads {} (gridss.computeSamTags {} (sort {nameSort = true;} (gridss.extractSVReads {} (markdup {} (sort {} (fixmate {mateScore = true;} (sort {nameSort = true;} input)))))))); call = inputs: bionix.gridss.annotateVariants {} (map gridss.preprocessBam inputs); } -- cgit v1.2.3