aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorl-d-s <distefano.l@wehi.edu.au>2018-12-12 10:30:27 +1100
committerl-d-s <distefano.l@wehi.edu.au>2018-12-12 10:30:27 +1100
commit875d5f28926a2dd3e066a4ce08a2ce18baee4661 (patch)
treef76116cd6b1a915ece6b1e2435745a420d9eee5a
parentf0357e09b8c473b55d22c4632692fcc79021570d (diff)
parent979bf4c65ad668b668750b58eefa9996f51b1242 (diff)
Merge branch 'master' of https://github.com/PapenfussLab/bionix
-rw-r--r--tools/gridss-annotateVariants.nix88
-rw-r--r--tools/gridss-identifyVariants.nix84
-rw-r--r--tools/gridss-variants.nix113
-rw-r--r--tools/gridss.nix4
4 files changed, 115 insertions, 174 deletions
diff --git a/tools/gridss-annotateVariants.nix b/tools/gridss-annotateVariants.nix
deleted file mode 100644
index e0a6f0f..0000000
--- a/tools/gridss-annotateVariants.nix
+++ /dev/null
@@ -1,88 +0,0 @@
-{ bionix
-, nixpkgs
-, bwaIndexAttrs ? {}
-, faidxAttrs ? {}
-, indexAttrs ? {}
-, assemblyAttrs ? {}
-, collectMetricsAttrs ? {}
-, softClipsToSplitReadsAttrs ? { flags = "REALIGN_ENTIRE_READ=true"; }
-, identifyVariantsAttrs ? {}
-, flags ? null
-, config ? null
-, heapSize ? "4g"
-
-}:
-
-with nixpkgs;
-with lib;
-with bionix.types;
-with bionix.gridss;
-
-inputs:
-
-let
- getref = matchFiletype "gridss-annotateVariants" { bam = x: x.ref; };
- ref = getref (head inputs);
- sorted = matchFileSorting "gridss-annotateVariants" { coord = _: true; };
- homoRef = length (unique (map getref inputs)) == 1;
-
- linkInput = f: attrs: input: ''
- BASENAME=$(basename ${input})
- WRKDIR="''${BASENAME}.gridss.working"
- if [[ ! -e $WRKDIR ]] ; then
- mkdir $WRKDIR
- fi
- for f in ${f attrs input}/* ; do
- ln -s $f $WRKDIR/$BASENAME.''${f##*.}
- done
- '';
-
- linkSV = input: ''
- BASENAME=$(basename ${input})
- WRKDIR="''${BASENAME}.gridss.working"
- if [[ ! -e $WRKDIR ]] ; then
- mkdir $WRKDIR
- fi
- ln -s ${input} $WRKDIR/$BASENAME.sv.bam
- ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai
- '';
-
- assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.assemble assemblyAttrs inputs));
-in
-
-assert (all sorted inputs);
-assert (homoRef);
-
-stdenv.mkDerivation rec {
- name = "gridss-identifyVariants";
- buildInputs = [ jre ];
- buildCommand = ''
- TMPDIR=$(pwd)
- ln -s ${ref} ref.fa
- ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
- for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
- ln -s $f
- done
- ${concatMapStringsSep "\n" (linkSV) inputs}
- ${linkSV assembly}
- ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
- ${linkInput collectMetrics collectMetricsAttrs assembly}
- ln -s ${identifyVariants identifyVariantsAttrs inputs} input.vcf
- java -Xmx${heapSize} -Dsamjdk.create_index=true \
- -cp ${jar} gridss.AnnotateVariants \
- REFERENCE_SEQUENCE=ref.fa \
- ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
- ASSEMBLY=${assembly} \
- INPUT_VCF=input.vcf \
- OUTPUT_VCF=out.vcf \
- WORKING_DIR=$TMPDIR/ \
- ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
- TMP_DIR=$TMPDIR/
-
- mv out.vcf $out
- '';
- passthru = {
- filetype = filetype.vcf { ref = ref; };
- gridss.assembly = assembly;
- };
-}
diff --git a/tools/gridss-identifyVariants.nix b/tools/gridss-identifyVariants.nix
deleted file mode 100644
index f484ed0..0000000
--- a/tools/gridss-identifyVariants.nix
+++ /dev/null
@@ -1,84 +0,0 @@
-{ bionix
-, nixpkgs
-, bwaIndexAttrs ? {}
-, faidxAttrs ? {}
-, indexAttrs ? {}
-, assemblyAttrs ? {}
-, collectMetricsAttrs ? {}
-, softClipsToSplitReadsAttrs ? {}
-, flags ? null
-, config ? null
-, heapSize ? "4g"
-
-}:
-
-with nixpkgs;
-with lib;
-with bionix.types;
-with bionix.gridss;
-
-inputs:
-
-let
- getref = matchFiletype "gridss-identifyVariants" { bam = x: x.ref; };
- ref = getref (head inputs);
- sorted = matchFileSorting "gridss-identifyVariants" { coord = _: true; };
- homoRef = length (unique (map getref inputs)) == 1;
-
- linkInput = f: attrs: input: ''
- BASENAME=$(basename ${input})
- WRKDIR="''${BASENAME}.gridss.working"
- if [[ ! -e $WRKDIR ]] ; then
- mkdir $WRKDIR
- fi
- for f in ${f attrs input}/* ; do
- ln -s $f $WRKDIR/$BASENAME.''${f##*.}
- done
- '';
-
- linkSV = input: ''
- BASENAME=$(basename ${input})
- WRKDIR="''${BASENAME}.gridss.working"
- if [[ ! -e $WRKDIR ]] ; then
- mkdir $WRKDIR
- fi
- ln -s ${input} $WRKDIR/$BASENAME.sv.bam
- ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai
- '';
-
- assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.samtools.sort { nameSort = true;} (bionix.gridss.assemble assemblyAttrs inputs)));
-in
-
-assert (all sorted inputs);
-assert (homoRef);
-
-stdenv.mkDerivation rec {
- name = "gridss-identifyVariants";
- buildInputs = [ jre samtools ];
- buildCommand = ''
- ln -s ${ref} ref.fa
- ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
- for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
- ln -s $f
- done
- ${concatMapStringsSep "\n" (linkSV) inputs}
- ${linkSV assembly}
- ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
- ${linkInput collectMetrics collectMetricsAttrs assembly}
- java -Xmx${heapSize} -Dsamjdk.create_index=true \
- -cp ${jar} gridss.IdentifyVariants \
- REFERENCE_SEQUENCE=ref.fa \
- ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
- ASSEMBLY=${assembly} \
- OUTPUT_VCF=out.vcf \
- ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
- WORKING_DIR=$TMPDIR/ \
- TMP_DIR=$TMPDIR/
-
- mv out.vcf $out
- '';
- passthru = {
- filetype = filetype.vcf { ref = ref; };
- gridss.assembly = assembly;
- };
-}
diff --git a/tools/gridss-variants.nix b/tools/gridss-variants.nix
new file mode 100644
index 0000000..eb5957f
--- /dev/null
+++ b/tools/gridss-variants.nix
@@ -0,0 +1,113 @@
+{ bionix
+, nixpkgs
+, bwaIndexAttrs ? {}
+, faidxAttrs ? {}
+, indexAttrs ? {}
+, assemblyAttrs ? {}
+, collectMetricsAttrs ? {}
+, softClipsToSplitReadsAttrs ? { flags = "REALIGN_ENTIRE_READ=true"; }
+, flags ? null
+, config ? null
+, heapSize ? "4g"
+}:
+
+with nixpkgs;
+with lib;
+with bionix.types;
+with bionix.gridss;
+
+inputs:
+
+let
+ getref = matchFiletype "gridss-variants" { bam = x: x.ref; };
+ ref = getref (head inputs);
+ sorted = matchFileSorting "gridss-variants" { coord = _: true; };
+ homoRef = length (unique (map getref inputs)) == 1;
+
+ linkInput = f: attrs: input: ''
+ BASENAME=$(basename ${input})
+ WRKDIR="''${BASENAME}.gridss.working"
+ if [[ ! -e $WRKDIR ]] ; then
+ mkdir $WRKDIR
+ fi
+ for f in ${f attrs input}/* ; do
+ ln -s $f $WRKDIR/$BASENAME.''${f##*.}
+ done
+ '';
+
+ linkSV = input: ''
+ BASENAME=$(basename ${input})
+ WRKDIR="''${BASENAME}.gridss.working"
+ if [[ ! -e $WRKDIR ]] ; then
+ mkdir $WRKDIR
+ fi
+ ln -s ${input} $WRKDIR/$BASENAME.sv.bam
+ ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai
+ '';
+
+ assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.assemble assemblyAttrs inputs));
+
+ mkLinks = ''
+ ln -s ${ref} ref.fa
+ ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
+ for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
+ ln -s $f
+ done
+ ${concatMapStringsSep "\n" (linkSV) inputs}
+ ${linkSV assembly}
+ ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
+ ${linkInput collectMetrics collectMetricsAttrs assembly}
+ '';
+
+in
+
+assert (all sorted inputs);
+assert (homoRef);
+
+rec {
+ identify = stdenv.mkDerivation rec {
+ name = "gridss-identifyVariants";
+ buildInputs = [ jre samtools ];
+ buildCommand = mkLinks + ''
+ java -Xmx${heapSize} -Dsamjdk.create_index=true \
+ -cp ${jar} gridss.IdentifyVariants \
+ REFERENCE_SEQUENCE=ref.fa \
+ ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
+ ASSEMBLY=${assembly} \
+ OUTPUT_VCF=out.vcf \
+ ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
+ WORKING_DIR=$TMPDIR/ \
+ TMP_DIR=$TMPDIR/
+
+ mv out.vcf $out
+ '';
+ passthru = {
+ filetype = filetype.vcf { ref = ref; };
+ gridss.assembly = assembly;
+ };
+ };
+
+ annotate = stdenv.mkDerivation rec {
+ name = "gridss-annotateVariants";
+ buildInputs = [ jre ];
+ buildCommand = mkLinks + ''
+ ln -s ${identify} input.vcf
+ java -Xmx${heapSize} -Dsamjdk.create_index=true \
+ -cp ${jar} gridss.AnnotateVariants \
+ REFERENCE_SEQUENCE=ref.fa \
+ ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
+ ASSEMBLY=${assembly} \
+ INPUT_VCF=input.vcf \
+ OUTPUT_VCF=out.vcf \
+ WORKING_DIR=$TMPDIR/ \
+ ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
+ TMP_DIR=$TMPDIR/
+
+ mv out.vcf $out
+ '';
+ passthru = {
+ filetype = filetype.vcf { ref = ref; };
+ gridss.assembly = assembly;
+ };
+ };
+}
diff --git a/tools/gridss.nix b/tools/gridss.nix
index b85e84e..d47a669 100644
--- a/tools/gridss.nix
+++ b/tools/gridss.nix
@@ -15,8 +15,8 @@ rec {
collectMetrics = callBionix ./gridss-collectMetrics.nix;
extractSVReads = callBionix ./gridss-extractSVReads.nix;
assemble = callBionix ./gridss-assemble.nix;
- identifyVariants = callBionix ./gridss-identifyVariants.nix;
- annotateVariants = callBionix ./gridss-annotateVariants.nix;
+ identifyVariants = attrs: input: ((callBionix ./gridss-variants.nix attrs) input).identify;
+ annotateVariants = attrs: input: ((callBionix ./gridss-variants.nix attrs) input).annotate;
preprocessBam = input: with samtools; sort {} (gridss.softClipsToSplitReads {} (gridss.computeSamTags {} (sort {nameSort = true;} (gridss.extractSVReads {} (markdup {} (sort {} (fixmate {mateScore = true;} (sort {nameSort = true;} input))))))));
call = inputs: bionix.gridss.annotateVariants {} (map gridss.preprocessBam inputs);
}