diff options
-rw-r--r-- | default.nix | 9 | ||||
-rw-r--r-- | tools/gridss-annotateVariants.nix | 20 | ||||
-rw-r--r-- | tools/gridss-assemble.nix | 7 | ||||
-rw-r--r-- | tools/gridss-extractSVReads.nix | 10 | ||||
-rw-r--r-- | tools/gridss-identifyVariants.nix | 20 | ||||
-rw-r--r-- | tools/gridss-softClipsToSplitReads.nix | 4 | ||||
-rw-r--r-- | tools/gridss.nix | 4 | ||||
-rw-r--r-- | tools/samtools-sort.nix | 2 |
8 files changed, 50 insertions, 26 deletions
diff --git a/default.nix b/default.nix index f835fa0..50b85a3 100644 --- a/default.nix +++ b/default.nix @@ -1,6 +1,8 @@ {nixpkgs ? import <nixpkgs> {}}: let + inherit (nixpkgs) fetchurl; + bionix = nixpkgs.lib.makeExtensible (self: let callBionix = file: attrs: import file ({ bionix = self; nixpkgs = nixpkgs; } // attrs); in with self; { @@ -29,5 +31,12 @@ let def = f: defs: attrs: f (defs // attrs); defQsub = qsubAttrs: f: defs: qsubAttr qsubAttrs (def f defs); + # Fetching files of specific type + fetchFastQ = attrs: with types; tagFiletype (filetype.fq {}) (fetchurl attrs); + fetchFastA = attrs: with types; tagFiletype (filetype.fa {}) (fetchurl attrs); + fetchFastQGZ = attrs: with types; tagFiletype (filetype.gz (filetype.fq {})) (fetchurl attrs); + fetchFastAGZ = attrs: with types; tagFiletype (filetype.gz (filetype.fa {})) (fetchurl attrs); + + }); in bionix diff --git a/tools/gridss-annotateVariants.nix b/tools/gridss-annotateVariants.nix index 6c8fbc6..122c451 100644 --- a/tools/gridss-annotateVariants.nix +++ b/tools/gridss-annotateVariants.nix @@ -2,8 +2,8 @@ , nixpkgs , bwaIndexAttrs ? {} , faidxAttrs ? {} +, indexAttrs ? {} , assemblyAttrs ? {} -, extractSVReadsAttrs ? {} , collectMetricsAttrs ? {} , softClipsToSplitReadsAttrs ? {} , identifyVariantsAttrs ? {} @@ -35,6 +35,16 @@ let done ''; + linkSV = input: '' + BASENAME=$(basename ${input}) + WRKDIR="''${BASENAME}.gridss.working" + if [[ ! -e $WRKDIR ]] ; then + mkdir $WRKDIR + fi + ln -s ${input} $WRKDIR/$BASENAME.sv.bam + ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai + ''; + assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.samtools.sort { nameSort = true;} (bionix.gridss.assemble assemblyAttrs inputs))); in @@ -50,12 +60,10 @@ stdenv.mkDerivation rec { for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do ln -s $f done - ${concatMapStringsSep "\n" (linkInput extractSVReads extractSVReadsAttrs) inputs} + ${concatMapStringsSep "\n" (linkSV) inputs} + ${linkSV assembly} ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs} ${linkInput collectMetrics collectMetricsAttrs assembly} - ASSBASE=$(basename ${assembly}) - ln -s ${assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bam - ln -s ${bionix.samtools.index {} assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bai ln -s ${identifyVariants identifyVariantsAttrs inputs} input.vcf java -Xmx4g -Dsamjdk.create_index=true \ -cp ${jar} gridss.AnnotateVariants \ @@ -65,7 +73,7 @@ stdenv.mkDerivation rec { INPUT_VCF=input.vcf \ OUTPUT_VCF=out.vcf \ WORKING_DIR=$TMPDIR/ \ - ${optionalString (config != null) ("CONFIGURATION_FILE=" + bionix.gridss.ggridssConfig config)} \ + ${optionalString (config != null) ("CONFIGURATION_FILE=" + bionix.gridss.gridssConfig config)} \ TMP_DIR=$TMPDIR/ mv out.vcf $out diff --git a/tools/gridss-assemble.nix b/tools/gridss-assemble.nix index 1e25731..d1a3e4b 100644 --- a/tools/gridss-assemble.nix +++ b/tools/gridss-assemble.nix @@ -2,8 +2,8 @@ , nixpkgs , bwaIndexAttrs ? {} , faidxAttrs ? {} +, indexAttrs ? {} , collectMetricsAttrs ? {} -, extractSVReadsAttrs ? {} , flags ? null }: @@ -23,9 +23,8 @@ let BASENAME=$(basename ${input}) WRKDIR="''${BASENAME}.gridss.working" mkdir $WRKDIR - for f in ${bionix.gridss.extractSVReads extractSVReadsAttrs input}/* ; do - ln -s $f $WRKDIR/$BASENAME.''${f#*.} - done + ln -s ${input} $WRKDIR/$BASENAME.sv.bam + ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai for f in ${bionix.gridss.collectMetrics collectMetricsAttrs input}/* ; do ln -s $f $WRKDIR/$BASENAME.''${f#*.} done diff --git a/tools/gridss-extractSVReads.nix b/tools/gridss-extractSVReads.nix index 54edc85..b50a01c 100644 --- a/tools/gridss-extractSVReads.nix +++ b/tools/gridss-extractSVReads.nix @@ -5,6 +5,7 @@ , flags ? null , unmappedReads ? false , minClipLength ? 5 +, collectMetricsAttrs ? {} , config ? null }: @@ -27,16 +28,17 @@ stdenv.mkDerivation rec { ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai ln -s ${bionix.samtools.dict dictIndexAttrs ref} ref.fa.dict ln -s ${input} input.bam - mkdir $out + for f in ${bionix.gridss.collectMetrics collectMetricsAttrs input}/* ; do + ln -s $f + done java -Dsamjdk.create_index=true \ -cp ${bionix.gridss.jar} gridss.ExtractSVReads \ REFERENCE_SEQUENCE=ref.fa \ I=input.bam \ - O=$out/input.sv.bam \ - METRICS_OUTPUT=$out/input.sv_metrics \ - INSERT_SIZE_METRICS=$out/input.insert_size_metrics \ + O=$out \ UNMAPPED_READS=${if unmappedReads then "true" else "false"} \ ${optionalString (config != null) ("CONFIGURATION_FILE=" + bionix.gridss.gridssConfig config)} \ MIN_CLIP_LENGTH=${toString minClipLength} ''; + passthru.filetype = input.filetype; } diff --git a/tools/gridss-identifyVariants.nix b/tools/gridss-identifyVariants.nix index a53fcb7..e5c74a4 100644 --- a/tools/gridss-identifyVariants.nix +++ b/tools/gridss-identifyVariants.nix @@ -2,8 +2,8 @@ , nixpkgs , bwaIndexAttrs ? {} , faidxAttrs ? {} +, indexAttrs ? {} , assemblyAttrs ? {} -, extractSVReadsAttrs ? {} , collectMetricsAttrs ? {} , softClipsToSplitReadsAttrs ? {} , flags ? null @@ -34,6 +34,16 @@ let done ''; + linkSV = input: '' + BASENAME=$(basename ${input}) + WRKDIR="''${BASENAME}.gridss.working" + if [[ ! -e $WRKDIR ]] ; then + mkdir $WRKDIR + fi + ln -s ${input} $WRKDIR/$BASENAME.sv.bam + ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai + ''; + assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.samtools.sort { nameSort = true;} (bionix.gridss.assemble assemblyAttrs inputs))); in @@ -42,19 +52,17 @@ assert (homoRef); stdenv.mkDerivation rec { name = "gridss-identifyVariants"; - buildInputs = [ jre ]; + buildInputs = [ jre samtools ]; buildCommand = '' ln -s ${ref} ref.fa ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do ln -s $f done - ${concatMapStringsSep "\n" (linkInput extractSVReads extractSVReadsAttrs) inputs} + ${concatMapStringsSep "\n" (linkSV) inputs} + ${linkSV assembly} ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs} ${linkInput collectMetrics collectMetricsAttrs assembly} - ASSBASE=$(basename ${assembly}) - ln -s ${assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bam - ln -s ${bionix.samtools.index {} assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bai java -Xmx4g -Dsamjdk.create_index=true \ -cp ${jar} gridss.IdentifyVariants \ REFERENCE_SEQUENCE=ref.fa \ diff --git a/tools/gridss-softClipsToSplitReads.nix b/tools/gridss-softClipsToSplitReads.nix index 47a3abd..2fff15c 100644 --- a/tools/gridss-softClipsToSplitReads.nix +++ b/tools/gridss-softClipsToSplitReads.nix @@ -17,8 +17,6 @@ let ref = matchFiletype "gridss-softClipsToSplitReads" { bam = x: x.ref; } input; in -assert (matchFileSorting "gridss-softClipsToSplitReads" { name = _: true; } input); - stdenv.mkDerivation rec { name = "gridss-softClipsToSplitReads"; buildInputs = [ jre bwa ]; @@ -37,5 +35,5 @@ stdenv.mkDerivation rec { ${optionalString (config != null) ("CONFIGURATION_FILE=" + bionix.gridss.gridssConfig config)} \ WORKER_THREADS=$NIX_BUILD_CORES ''; - passthru.filetype = filetype.bam { ref = ref; sorting = sort.none {}; }; + passthru.filetype = filetype.bam { ref = ref; sorting = matchFileSorting "grids-softClipsToSplitReads" { coord = _: input.sorting; name = _: sort.none {}; none = _: input.sorting;} input;}; } diff --git a/tools/gridss.nix b/tools/gridss.nix index 1e4e2d1..b85e84e 100644 --- a/tools/gridss.nix +++ b/tools/gridss.nix @@ -17,6 +17,6 @@ rec { assemble = callBionix ./gridss-assemble.nix; identifyVariants = callBionix ./gridss-identifyVariants.nix; annotateVariants = callBionix ./gridss-annotateVariants.nix; - preprocessBam = input: with samtools; markdup {} (sort {} (fixmate {mateScore = true;} (bionix.gridss.softClipsToSplitReads {} (bionix.gridss.computeSamTags {} (sort {nameSort = true;} input))))); - call = inputs: bionix.gridss.annotateVariants {} (map bionix.gridss.preprocessBam inputs); + preprocessBam = input: with samtools; sort {} (gridss.softClipsToSplitReads {} (gridss.computeSamTags {} (sort {nameSort = true;} (gridss.extractSVReads {} (markdup {} (sort {} (fixmate {mateScore = true;} (sort {nameSort = true;} input)))))))); + call = inputs: bionix.gridss.annotateVariants {} (map gridss.preprocessBam inputs); } diff --git a/tools/samtools-sort.nix b/tools/samtools-sort.nix index 5a02dab..e77f3db 100644 --- a/tools/samtools-sort.nix +++ b/tools/samtools-sort.nix @@ -19,7 +19,7 @@ assert (matchFiletype "samtools-sort" { bam = _: true; sam = _: true; cram = _: let outfmtR = if outfmt != null then outfmt input else input.filetype; outFmtFlags = matchFiletype "samtools-sort-outfmt" { bam = _: "-O BAM"; sam = _: "-O SAM"; cram = ref: "-O CRAM -T ${ref}"; } {filetype = outfmtR;}; - alreadySorted = matchFileSorting "samtools-sort" { name = _: nameSort; coord = _: !nameSort; } input; + alreadySorted = matchFileSorting "samtools-sort" { name = _: nameSort; coord = _: !nameSort; none = _: false;} input; in stdenv.mkDerivation { name = "samtools-sort"; buildInputs = [ samtools ]; |