aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2018-12-12 10:23:11 +1100
committerJustin Bedo <cu@cua0.org>2018-12-12 10:23:11 +1100
commit979bf4c65ad668b668750b58eefa9996f51b1242 (patch)
tree64d4064b48e0604185cd1a0ff0c5141c31b1d6a2
parent10bdecf6c1338d7f531ddf7b41da14dfe4a4ac33 (diff)
gridss: refactor identify & annotate variants
-rw-r--r--tools/gridss-annotateVariants.nix85
-rw-r--r--tools/gridss-identifyVariants.nix82
-rw-r--r--tools/gridss-variants.nix112
-rw-r--r--tools/gridss.nix4
4 files changed, 114 insertions, 169 deletions
diff --git a/tools/gridss-annotateVariants.nix b/tools/gridss-annotateVariants.nix
deleted file mode 100644
index 3560f77..0000000
--- a/tools/gridss-annotateVariants.nix
+++ /dev/null
@@ -1,85 +0,0 @@
-{ bionix
-, nixpkgs
-, bwaIndexAttrs ? {}
-, faidxAttrs ? {}
-, indexAttrs ? {}
-, assemblyAttrs ? {}
-, collectMetricsAttrs ? {}
-, softClipsToSplitReadsAttrs ? { flags = "REALIGN_ENTIRE_READ=true"; }
-, identifyVariantsAttrs ? {}
-, flags ? null
-, config ? null
-}:
-
-with nixpkgs;
-with lib;
-with bionix.types;
-with bionix.gridss;
-
-inputs:
-
-let
- getref = matchFiletype "gridss-annotateVariants" { bam = x: x.ref; };
- ref = getref (head inputs);
- sorted = matchFileSorting "gridss-annotateVariants" { coord = _: true; };
- homoRef = length (unique (map getref inputs)) == 1;
-
- linkInput = f: attrs: input: ''
- BASENAME=$(basename ${input})
- WRKDIR="''${BASENAME}.gridss.working"
- if [[ ! -e $WRKDIR ]] ; then
- mkdir $WRKDIR
- fi
- for f in ${f attrs input}/* ; do
- ln -s $f $WRKDIR/$BASENAME.''${f##*.}
- done
- '';
-
- linkSV = input: ''
- BASENAME=$(basename ${input})
- WRKDIR="''${BASENAME}.gridss.working"
- if [[ ! -e $WRKDIR ]] ; then
- mkdir $WRKDIR
- fi
- ln -s ${input} $WRKDIR/$BASENAME.sv.bam
- ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai
- '';
-
- assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.assemble assemblyAttrs inputs));
-in
-
-assert (all sorted inputs);
-assert (homoRef);
-
-stdenv.mkDerivation rec {
- name = "gridss-identifyVariants";
- buildInputs = [ jre ];
- buildCommand = ''
- ln -s ${ref} ref.fa
- ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
- for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
- ln -s $f
- done
- ${concatMapStringsSep "\n" (linkSV) inputs}
- ${linkSV assembly}
- ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
- ${linkInput collectMetrics collectMetricsAttrs assembly}
- ln -s ${identifyVariants identifyVariantsAttrs inputs} input.vcf
- java -Xmx4g -Dsamjdk.create_index=true \
- -cp ${jar} gridss.AnnotateVariants \
- REFERENCE_SEQUENCE=ref.fa \
- ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
- ASSEMBLY=${assembly} \
- INPUT_VCF=input.vcf \
- OUTPUT_VCF=out.vcf \
- WORKING_DIR=$TMPDIR/ \
- ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
- TMP_DIR=$TMPDIR/
-
- mv out.vcf $out
- '';
- passthru = {
- filetype = filetype.vcf { ref = ref; };
- gridss.assembly = assembly;
- };
-}
diff --git a/tools/gridss-identifyVariants.nix b/tools/gridss-identifyVariants.nix
deleted file mode 100644
index 23a9d85..0000000
--- a/tools/gridss-identifyVariants.nix
+++ /dev/null
@@ -1,82 +0,0 @@
-{ bionix
-, nixpkgs
-, bwaIndexAttrs ? {}
-, faidxAttrs ? {}
-, indexAttrs ? {}
-, assemblyAttrs ? {}
-, collectMetricsAttrs ? {}
-, softClipsToSplitReadsAttrs ? {}
-, flags ? null
-, config ? null
-}:
-
-with nixpkgs;
-with lib;
-with bionix.types;
-with bionix.gridss;
-
-inputs:
-
-let
- getref = matchFiletype "gridss-identifyVariants" { bam = x: x.ref; };
- ref = getref (head inputs);
- sorted = matchFileSorting "gridss-identifyVariants" { coord = _: true; };
- homoRef = length (unique (map getref inputs)) == 1;
-
- linkInput = f: attrs: input: ''
- BASENAME=$(basename ${input})
- WRKDIR="''${BASENAME}.gridss.working"
- if [[ ! -e $WRKDIR ]] ; then
- mkdir $WRKDIR
- fi
- for f in ${f attrs input}/* ; do
- ln -s $f $WRKDIR/$BASENAME.''${f##*.}
- done
- '';
-
- linkSV = input: ''
- BASENAME=$(basename ${input})
- WRKDIR="''${BASENAME}.gridss.working"
- if [[ ! -e $WRKDIR ]] ; then
- mkdir $WRKDIR
- fi
- ln -s ${input} $WRKDIR/$BASENAME.sv.bam
- ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai
- '';
-
- assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.samtools.sort { nameSort = true;} (bionix.gridss.assemble assemblyAttrs inputs)));
-in
-
-assert (all sorted inputs);
-assert (homoRef);
-
-stdenv.mkDerivation rec {
- name = "gridss-identifyVariants";
- buildInputs = [ jre samtools ];
- buildCommand = ''
- ln -s ${ref} ref.fa
- ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
- for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
- ln -s $f
- done
- ${concatMapStringsSep "\n" (linkSV) inputs}
- ${linkSV assembly}
- ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
- ${linkInput collectMetrics collectMetricsAttrs assembly}
- java -Xmx4g -Dsamjdk.create_index=true \
- -cp ${jar} gridss.IdentifyVariants \
- REFERENCE_SEQUENCE=ref.fa \
- ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
- ASSEMBLY=${assembly} \
- OUTPUT_VCF=out.vcf \
- ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
- WORKING_DIR=$TMPDIR/ \
- TMP_DIR=$TMPDIR/
-
- mv out.vcf $out
- '';
- passthru = {
- filetype = filetype.vcf { ref = ref; };
- gridss.assembly = assembly;
- };
-}
diff --git a/tools/gridss-variants.nix b/tools/gridss-variants.nix
new file mode 100644
index 0000000..a838da6
--- /dev/null
+++ b/tools/gridss-variants.nix
@@ -0,0 +1,112 @@
+{ bionix
+, nixpkgs
+, bwaIndexAttrs ? {}
+, faidxAttrs ? {}
+, indexAttrs ? {}
+, assemblyAttrs ? {}
+, collectMetricsAttrs ? {}
+, softClipsToSplitReadsAttrs ? { flags = "REALIGN_ENTIRE_READ=true"; }
+, flags ? null
+, config ? null
+}:
+
+with nixpkgs;
+with lib;
+with bionix.types;
+with bionix.gridss;
+
+inputs:
+
+let
+ getref = matchFiletype "gridss-variants" { bam = x: x.ref; };
+ ref = getref (head inputs);
+ sorted = matchFileSorting "gridss-variants" { coord = _: true; };
+ homoRef = length (unique (map getref inputs)) == 1;
+
+ linkInput = f: attrs: input: ''
+ BASENAME=$(basename ${input})
+ WRKDIR="''${BASENAME}.gridss.working"
+ if [[ ! -e $WRKDIR ]] ; then
+ mkdir $WRKDIR
+ fi
+ for f in ${f attrs input}/* ; do
+ ln -s $f $WRKDIR/$BASENAME.''${f##*.}
+ done
+ '';
+
+ linkSV = input: ''
+ BASENAME=$(basename ${input})
+ WRKDIR="''${BASENAME}.gridss.working"
+ if [[ ! -e $WRKDIR ]] ; then
+ mkdir $WRKDIR
+ fi
+ ln -s ${input} $WRKDIR/$BASENAME.sv.bam
+ ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai
+ '';
+
+ assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.assemble assemblyAttrs inputs));
+
+ mkLinks = ''
+ ln -s ${ref} ref.fa
+ ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
+ for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
+ ln -s $f
+ done
+ ${concatMapStringsSep "\n" (linkSV) inputs}
+ ${linkSV assembly}
+ ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
+ ${linkInput collectMetrics collectMetricsAttrs assembly}
+ '';
+
+in
+
+assert (all sorted inputs);
+assert (homoRef);
+
+rec {
+ identify = stdenv.mkDerivation rec {
+ name = "gridss-identifyVariants";
+ buildInputs = [ jre samtools ];
+ buildCommand = mkLinks + ''
+ java -Xmx4g -Dsamjdk.create_index=true \
+ -cp ${jar} gridss.IdentifyVariants \
+ REFERENCE_SEQUENCE=ref.fa \
+ ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
+ ASSEMBLY=${assembly} \
+ OUTPUT_VCF=out.vcf \
+ ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
+ WORKING_DIR=$TMPDIR/ \
+ TMP_DIR=$TMPDIR/
+
+ mv out.vcf $out
+ '';
+ passthru = {
+ filetype = filetype.vcf { ref = ref; };
+ gridss.assembly = assembly;
+ };
+ };
+
+ annotate = stdenv.mkDerivation rec {
+ name = "gridss-annotateVariants";
+ buildInputs = [ jre ];
+ buildCommand = mkLinks + ''
+ ln -s ${identify} input.vcf
+ java -Xmx4g -Dsamjdk.create_index=true \
+ -cp ${jar} gridss.AnnotateVariants \
+ REFERENCE_SEQUENCE=ref.fa \
+ ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
+ ASSEMBLY=${assembly} \
+ INPUT_VCF=input.vcf \
+ OUTPUT_VCF=out.vcf \
+ WORKING_DIR=$TMPDIR/ \
+ ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
+ TMP_DIR=$TMPDIR/
+
+ mv out.vcf $out
+ '';
+ passthru = {
+ filetype = filetype.vcf { ref = ref; };
+ gridss.assembly = assembly;
+ };
+ };
+}
diff --git a/tools/gridss.nix b/tools/gridss.nix
index b85e84e..d47a669 100644
--- a/tools/gridss.nix
+++ b/tools/gridss.nix
@@ -15,8 +15,8 @@ rec {
collectMetrics = callBionix ./gridss-collectMetrics.nix;
extractSVReads = callBionix ./gridss-extractSVReads.nix;
assemble = callBionix ./gridss-assemble.nix;
- identifyVariants = callBionix ./gridss-identifyVariants.nix;
- annotateVariants = callBionix ./gridss-annotateVariants.nix;
+ identifyVariants = attrs: input: ((callBionix ./gridss-variants.nix attrs) input).identify;
+ annotateVariants = attrs: input: ((callBionix ./gridss-variants.nix attrs) input).annotate;
preprocessBam = input: with samtools; sort {} (gridss.softClipsToSplitReads {} (gridss.computeSamTags {} (sort {nameSort = true;} (gridss.extractSVReads {} (markdup {} (sort {} (fixmate {mateScore = true;} (sort {nameSort = true;} input))))))));
call = inputs: bionix.gridss.annotateVariants {} (map gridss.preprocessBam inputs);
}