aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorl-d-s <distefano.l@wehi.edu.au>2018-11-22 12:04:23 +1100
committerl-d-s <distefano.l@wehi.edu.au>2018-11-22 12:04:23 +1100
commit7c36ee4ec628767c834ad803aac76d93e31f0486 (patch)
tree2edf423614fc077c4027b7996f690b004ec4f25b
parent3c568bacd397a3ddcb203811f7fa5a1f8af760a3 (diff)
parent0ade062d2da8a7111b14aa3d72ef7c741e98352d (diff)
Merge branch 'master' of https://github.com/PapenfussLab/bionix
-rw-r--r--default.nix9
-rw-r--r--tools/gridss-annotateVariants.nix20
-rw-r--r--tools/gridss-assemble.nix7
-rw-r--r--tools/gridss-extractSVReads.nix10
-rw-r--r--tools/gridss-identifyVariants.nix20
-rw-r--r--tools/gridss-softClipsToSplitReads.nix4
-rw-r--r--tools/gridss.nix4
-rw-r--r--tools/samtools-sort.nix2
8 files changed, 50 insertions, 26 deletions
diff --git a/default.nix b/default.nix
index f835fa0..50b85a3 100644
--- a/default.nix
+++ b/default.nix
@@ -1,6 +1,8 @@
{nixpkgs ? import <nixpkgs> {}}:
let
+ inherit (nixpkgs) fetchurl;
+
bionix = nixpkgs.lib.makeExtensible (self:
let callBionix = file: attrs: import file ({ bionix = self; nixpkgs = nixpkgs; } // attrs);
in with self; {
@@ -29,5 +31,12 @@ let
def = f: defs: attrs: f (defs // attrs);
defQsub = qsubAttrs: f: defs: qsubAttr qsubAttrs (def f defs);
+ # Fetching files of specific type
+ fetchFastQ = attrs: with types; tagFiletype (filetype.fq {}) (fetchurl attrs);
+ fetchFastA = attrs: with types; tagFiletype (filetype.fa {}) (fetchurl attrs);
+ fetchFastQGZ = attrs: with types; tagFiletype (filetype.gz (filetype.fq {})) (fetchurl attrs);
+ fetchFastAGZ = attrs: with types; tagFiletype (filetype.gz (filetype.fa {})) (fetchurl attrs);
+
+
});
in bionix
diff --git a/tools/gridss-annotateVariants.nix b/tools/gridss-annotateVariants.nix
index 6c8fbc6..122c451 100644
--- a/tools/gridss-annotateVariants.nix
+++ b/tools/gridss-annotateVariants.nix
@@ -2,8 +2,8 @@
, nixpkgs
, bwaIndexAttrs ? {}
, faidxAttrs ? {}
+, indexAttrs ? {}
, assemblyAttrs ? {}
-, extractSVReadsAttrs ? {}
, collectMetricsAttrs ? {}
, softClipsToSplitReadsAttrs ? {}
, identifyVariantsAttrs ? {}
@@ -35,6 +35,16 @@ let
done
'';
+ linkSV = input: ''
+ BASENAME=$(basename ${input})
+ WRKDIR="''${BASENAME}.gridss.working"
+ if [[ ! -e $WRKDIR ]] ; then
+ mkdir $WRKDIR
+ fi
+ ln -s ${input} $WRKDIR/$BASENAME.sv.bam
+ ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai
+ '';
+
assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.samtools.sort { nameSort = true;} (bionix.gridss.assemble assemblyAttrs inputs)));
in
@@ -50,12 +60,10 @@ stdenv.mkDerivation rec {
for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
ln -s $f
done
- ${concatMapStringsSep "\n" (linkInput extractSVReads extractSVReadsAttrs) inputs}
+ ${concatMapStringsSep "\n" (linkSV) inputs}
+ ${linkSV assembly}
${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
${linkInput collectMetrics collectMetricsAttrs assembly}
- ASSBASE=$(basename ${assembly})
- ln -s ${assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bam
- ln -s ${bionix.samtools.index {} assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bai
ln -s ${identifyVariants identifyVariantsAttrs inputs} input.vcf
java -Xmx4g -Dsamjdk.create_index=true \
-cp ${jar} gridss.AnnotateVariants \
@@ -65,7 +73,7 @@ stdenv.mkDerivation rec {
INPUT_VCF=input.vcf \
OUTPUT_VCF=out.vcf \
WORKING_DIR=$TMPDIR/ \
- ${optionalString (config != null) ("CONFIGURATION_FILE=" + bionix.gridss.ggridssConfig config)} \
+ ${optionalString (config != null) ("CONFIGURATION_FILE=" + bionix.gridss.gridssConfig config)} \
TMP_DIR=$TMPDIR/
mv out.vcf $out
diff --git a/tools/gridss-assemble.nix b/tools/gridss-assemble.nix
index 1e25731..d1a3e4b 100644
--- a/tools/gridss-assemble.nix
+++ b/tools/gridss-assemble.nix
@@ -2,8 +2,8 @@
, nixpkgs
, bwaIndexAttrs ? {}
, faidxAttrs ? {}
+, indexAttrs ? {}
, collectMetricsAttrs ? {}
-, extractSVReadsAttrs ? {}
, flags ? null
}:
@@ -23,9 +23,8 @@ let
BASENAME=$(basename ${input})
WRKDIR="''${BASENAME}.gridss.working"
mkdir $WRKDIR
- for f in ${bionix.gridss.extractSVReads extractSVReadsAttrs input}/* ; do
- ln -s $f $WRKDIR/$BASENAME.''${f#*.}
- done
+ ln -s ${input} $WRKDIR/$BASENAME.sv.bam
+ ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai
for f in ${bionix.gridss.collectMetrics collectMetricsAttrs input}/* ; do
ln -s $f $WRKDIR/$BASENAME.''${f#*.}
done
diff --git a/tools/gridss-extractSVReads.nix b/tools/gridss-extractSVReads.nix
index 54edc85..b50a01c 100644
--- a/tools/gridss-extractSVReads.nix
+++ b/tools/gridss-extractSVReads.nix
@@ -5,6 +5,7 @@
, flags ? null
, unmappedReads ? false
, minClipLength ? 5
+, collectMetricsAttrs ? {}
, config ? null
}:
@@ -27,16 +28,17 @@ stdenv.mkDerivation rec {
ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
ln -s ${bionix.samtools.dict dictIndexAttrs ref} ref.fa.dict
ln -s ${input} input.bam
- mkdir $out
+ for f in ${bionix.gridss.collectMetrics collectMetricsAttrs input}/* ; do
+ ln -s $f
+ done
java -Dsamjdk.create_index=true \
-cp ${bionix.gridss.jar} gridss.ExtractSVReads \
REFERENCE_SEQUENCE=ref.fa \
I=input.bam \
- O=$out/input.sv.bam \
- METRICS_OUTPUT=$out/input.sv_metrics \
- INSERT_SIZE_METRICS=$out/input.insert_size_metrics \
+ O=$out \
UNMAPPED_READS=${if unmappedReads then "true" else "false"} \
${optionalString (config != null) ("CONFIGURATION_FILE=" + bionix.gridss.gridssConfig config)} \
MIN_CLIP_LENGTH=${toString minClipLength}
'';
+ passthru.filetype = input.filetype;
}
diff --git a/tools/gridss-identifyVariants.nix b/tools/gridss-identifyVariants.nix
index a53fcb7..e5c74a4 100644
--- a/tools/gridss-identifyVariants.nix
+++ b/tools/gridss-identifyVariants.nix
@@ -2,8 +2,8 @@
, nixpkgs
, bwaIndexAttrs ? {}
, faidxAttrs ? {}
+, indexAttrs ? {}
, assemblyAttrs ? {}
-, extractSVReadsAttrs ? {}
, collectMetricsAttrs ? {}
, softClipsToSplitReadsAttrs ? {}
, flags ? null
@@ -34,6 +34,16 @@ let
done
'';
+ linkSV = input: ''
+ BASENAME=$(basename ${input})
+ WRKDIR="''${BASENAME}.gridss.working"
+ if [[ ! -e $WRKDIR ]] ; then
+ mkdir $WRKDIR
+ fi
+ ln -s ${input} $WRKDIR/$BASENAME.sv.bam
+ ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai
+ '';
+
assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.samtools.sort { nameSort = true;} (bionix.gridss.assemble assemblyAttrs inputs)));
in
@@ -42,19 +52,17 @@ assert (homoRef);
stdenv.mkDerivation rec {
name = "gridss-identifyVariants";
- buildInputs = [ jre ];
+ buildInputs = [ jre samtools ];
buildCommand = ''
ln -s ${ref} ref.fa
ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
ln -s $f
done
- ${concatMapStringsSep "\n" (linkInput extractSVReads extractSVReadsAttrs) inputs}
+ ${concatMapStringsSep "\n" (linkSV) inputs}
+ ${linkSV assembly}
${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
${linkInput collectMetrics collectMetricsAttrs assembly}
- ASSBASE=$(basename ${assembly})
- ln -s ${assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bam
- ln -s ${bionix.samtools.index {} assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bai
java -Xmx4g -Dsamjdk.create_index=true \
-cp ${jar} gridss.IdentifyVariants \
REFERENCE_SEQUENCE=ref.fa \
diff --git a/tools/gridss-softClipsToSplitReads.nix b/tools/gridss-softClipsToSplitReads.nix
index 47a3abd..2fff15c 100644
--- a/tools/gridss-softClipsToSplitReads.nix
+++ b/tools/gridss-softClipsToSplitReads.nix
@@ -17,8 +17,6 @@ let
ref = matchFiletype "gridss-softClipsToSplitReads" { bam = x: x.ref; } input;
in
-assert (matchFileSorting "gridss-softClipsToSplitReads" { name = _: true; } input);
-
stdenv.mkDerivation rec {
name = "gridss-softClipsToSplitReads";
buildInputs = [ jre bwa ];
@@ -37,5 +35,5 @@ stdenv.mkDerivation rec {
${optionalString (config != null) ("CONFIGURATION_FILE=" + bionix.gridss.gridssConfig config)} \
WORKER_THREADS=$NIX_BUILD_CORES
'';
- passthru.filetype = filetype.bam { ref = ref; sorting = sort.none {}; };
+ passthru.filetype = filetype.bam { ref = ref; sorting = matchFileSorting "grids-softClipsToSplitReads" { coord = _: input.sorting; name = _: sort.none {}; none = _: input.sorting;} input;};
}
diff --git a/tools/gridss.nix b/tools/gridss.nix
index 1e4e2d1..b85e84e 100644
--- a/tools/gridss.nix
+++ b/tools/gridss.nix
@@ -17,6 +17,6 @@ rec {
assemble = callBionix ./gridss-assemble.nix;
identifyVariants = callBionix ./gridss-identifyVariants.nix;
annotateVariants = callBionix ./gridss-annotateVariants.nix;
- preprocessBam = input: with samtools; markdup {} (sort {} (fixmate {mateScore = true;} (bionix.gridss.softClipsToSplitReads {} (bionix.gridss.computeSamTags {} (sort {nameSort = true;} input)))));
- call = inputs: bionix.gridss.annotateVariants {} (map bionix.gridss.preprocessBam inputs);
+ preprocessBam = input: with samtools; sort {} (gridss.softClipsToSplitReads {} (gridss.computeSamTags {} (sort {nameSort = true;} (gridss.extractSVReads {} (markdup {} (sort {} (fixmate {mateScore = true;} (sort {nameSort = true;} input))))))));
+ call = inputs: bionix.gridss.annotateVariants {} (map gridss.preprocessBam inputs);
}
diff --git a/tools/samtools-sort.nix b/tools/samtools-sort.nix
index 5a02dab..e77f3db 100644
--- a/tools/samtools-sort.nix
+++ b/tools/samtools-sort.nix
@@ -19,7 +19,7 @@ assert (matchFiletype "samtools-sort" { bam = _: true; sam = _: true; cram = _:
let
outfmtR = if outfmt != null then outfmt input else input.filetype;
outFmtFlags = matchFiletype "samtools-sort-outfmt" { bam = _: "-O BAM"; sam = _: "-O SAM"; cram = ref: "-O CRAM -T ${ref}"; } {filetype = outfmtR;};
- alreadySorted = matchFileSorting "samtools-sort" { name = _: nameSort; coord = _: !nameSort; } input;
+ alreadySorted = matchFileSorting "samtools-sort" { name = _: nameSort; coord = _: !nameSort; none = _: false;} input;
in stdenv.mkDerivation {
name = "samtools-sort";
buildInputs = [ samtools ];