From ec77e9c3a9c2dc7425ee07474f525dd0a3d01fab Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Thu, 1 Nov 2018 15:40:47 +1100 Subject: samtools-sort: don't sort if already sorted --- tools/samtools-sort.nix | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/samtools-sort.nix b/tools/samtools-sort.nix index ab9d603..5a02dab 100644 --- a/tools/samtools-sort.nix +++ b/tools/samtools-sort.nix @@ -11,7 +11,7 @@ with nixpkgs; with lib; let - inherit (bionix.types) matchFiletype coordSort; + inherit (bionix.types) matchFiletype coordSort matchFileSorting; in assert (matchFiletype "samtools-sort" { bam = _: true; sam = _: true; cram = _: true; } input); @@ -19,11 +19,20 @@ assert (matchFiletype "samtools-sort" { bam = _: true; sam = _: true; cram = _: let outfmtR = if outfmt != null then outfmt input else input.filetype; outFmtFlags = matchFiletype "samtools-sort-outfmt" { bam = _: "-O BAM"; sam = _: "-O SAM"; cram = ref: "-O CRAM -T ${ref}"; } {filetype = outfmtR;}; + alreadySorted = matchFileSorting "samtools-sort" { name = _: nameSort; coord = _: !nameSort; } input; in stdenv.mkDerivation { name = "samtools-sort"; buildInputs = [ samtools ]; - buildCommand = '' - samtools sort -@ $NIX_BUILD_CORES ${optionalString nameSort "-n"} ${outFmtFlags} ${optionalString (flags != null) flags} ${input} > $out - ''; + buildCommand = + if alreadySorted then + "ln -s $out ${input}" + else + '' + samtools sort -@ $NIX_BUILD_CORES \ + ${optionalString nameSort "-n"} \ + ${outFmtFlags} \ + ${optionalString (flags != null) flags} \ + ${input} > $out + ''; passthru.filetype = if nameSort then bionix.types.nameSort outfmtR else coordSort outfmtR; } -- cgit v1.2.3 From f416c145204765a1566782f0ff384d3f65d6ed35 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Fri, 2 Nov 2018 15:35:22 +1100 Subject: kallisto: init --- default.nix | 1 + lib/references.nix | 22 ++++++++++++++++++++++ lib/types.nix | 3 ++- tools/kallisto-index.nix | 22 ++++++++++++++++++++++ tools/kallisto-quant.nix | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ tools/kallisto.nix | 8 ++++++++ 6 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 tools/kallisto-index.nix create mode 100644 tools/kallisto-quant.nix create mode 100644 tools/kallisto.nix diff --git a/default.nix b/default.nix index 5218a99..f835fa0 100644 --- a/default.nix +++ b/default.nix @@ -15,6 +15,7 @@ let fastqc = callBionix ./tools/fastqc.nix {}; gridss = callBionix ./tools/gridss.nix {}; infercnv = callBionix ./tools/infercnv.nix {}; + kallisto = callBionix ./tools/kallisto.nix {}; mosdepth = callBionix ./tools/mosdepth.nix {}; mutect = callBionix ./tools/mutect.nix {}; platypus = callBionix ./tools/platypus.nix {}; diff --git a/lib/references.nix b/lib/references.nix index 498cfd9..bdeefa6 100755 --- a/lib/references.nix +++ b/lib/references.nix @@ -46,6 +46,28 @@ rec { ''; passthru.filetype = filetype.vcf { ref = seq; }; }; + ensembl = { + cdna = stdenvNoCC.mkDerivation rec { + name = "ensembl-grch38-cdna-${version}"; + version = "94"; + src = fetchurl { + url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"; + sha256 = "1fc5d6p2wlwsm49wnmxmm3byjx5jvr6z9fpzrq7v7fpb086adl0h"; + }; + buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; + }; + ncrna = stdenvNoCC.mkDerivation rec { + name = "ensembl-grch38-ncrna-${version}"; + version = "94"; + src = fetchurl { + url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/ncrna/Homo_sapiens.GRCh38.ncrna.fa.gz"; + sha256 = "1cpasykwriila52nqgvw6d3mjyh6d9qi613hvhn4h1dxkqzgnjff"; + }; + buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; + }; + }; }; grcm38 = grcm38-p6; diff --git a/lib/types.nix b/lib/types.nix index 4dbc6ff..ef9b1b7 100644 --- a/lib/types.nix +++ b/lib/types.nix @@ -21,7 +21,8 @@ let in rec { - matchFiletype = sym: y: x: if x ? filetype then match x.filetype (defError (idft sym) y filetype) else abort "unknown filetype for ${sym}"; + matchFiletype = sym: y: x: if x ? filetype then matchFiletype' sym y x.filetype else abort "unknown filetype for ${sym}"; + matchFiletype' = sym: y: x: match x (defError (idft sym) y filetype); filetype = make-type "filetype" { fa = {}; fq = {}; diff --git a/tools/kallisto-index.nix b/tools/kallisto-index.nix new file mode 100644 index 0000000..33dfb80 --- /dev/null +++ b/tools/kallisto-index.nix @@ -0,0 +1,22 @@ +{bionix +, nixpkgs +, kmerSize ? 31 +, unique ? false}: + +with nixpkgs; +with lib; +with bionix.types; + +assert (kmerSize > 1); + +input: + +assert (matchFiletype input { fa = _: true; } input); + +stdenv.mkDerivation { + name = "kallisto-index"; + buildInputs = [ kallisto ]; + buildCommand = '' + kallisto index -k ${toString kmerSize} ${optionalString unique "--make-unique"} -i $out ${input} + ''; +} diff --git a/tools/kallisto-quant.nix b/tools/kallisto-quant.nix new file mode 100644 index 0000000..c410721 --- /dev/null +++ b/tools/kallisto-quant.nix @@ -0,0 +1,48 @@ +{bionix +, nixpkgs +, indexFlags ? {} +, bias ? false +, bootstrapSamples ? 0 +, seed ? 42 +, plaintext ? false +, fusion ? false +, single ? false +, frStranded ? false +, rfStranded ? false +, fragmentLength ? null +, fragmentSD ? null +, ref}: + +with nixpkgs; +with lib; + +assert (!single || (fragmentLength != null && fragmentSD != null)); + +inputs: + +let + inherit (bionix.types) matchFiletype'; + isFastQ = matchFiletype' "kallisto-quant" {fq = _: true; gz = isFastQ; }; +in + +assert (all (x: isFastQ (x.filetype)) inputs); + +stdenv.mkDerivation { + name = "kallisto-quant"; + buildInputs = [ kallisto ]; + buildCommand = '' + mkdir $out + kallisto quant \ + -i ${bionix.kallisto.index indexFlags ref} \ + -o $out \ + ${optionalString bias "--bias"} \ + ${optionalString (bootstrapSamples > 0) "-b ${toString bootstrapSamples} --seed=${toString seed}"} \ + ${optionalString plaintext "--plaintext"} \ + ${optionalString fusion "--fusion"} \ + ${optionalString single "--single -l ${toString fragmentLength} -s ${toString fragmentSD}"} \ + ${optionalString frStranded "--fr-stranded"} \ + ${optionalString rfStranded "--rf-stranded"} \ + -t $NIX_BUILD_CORES \ + ${concatStringsSep " " inputs} + ''; +} diff --git a/tools/kallisto.nix b/tools/kallisto.nix new file mode 100644 index 0000000..f13f493 --- /dev/null +++ b/tools/kallisto.nix @@ -0,0 +1,8 @@ +{bionix, nixpkgs}: + +with bionix; + +{ + index = callBionix ./kallisto-index.nix; + quant = callBionix ./kallisto-quant.nix; +} -- cgit v1.2.3 From ceb73223717abfb0de1a0905833b0cbac0b3fa23 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Fri, 2 Nov 2018 16:11:29 +1100 Subject: Allow bwa-mem to take gzipped fastq inputs --- tools/bwa-mem.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bwa-mem.nix b/tools/bwa-mem.nix index 3420294..2b3bc64 100644 --- a/tools/bwa-mem.nix +++ b/tools/bwa-mem.nix @@ -17,7 +17,7 @@ with bionix.compression; let fa = f: matchFiletype "bwa-ref" { fa = _: f; } f; - fq = f: matchFiletype "bwa-input" { fq = _: f; } f; + fq = f: matchFiletype "bwa-input" { fq = _: f; gz = matchFiletype' "bwa-input" { fq = _: f; }; } f; in stdenv.mkDerivation { name = "bwa-mem"; -- cgit v1.2.3 From b46138826260f1ed963ffe18fef7ec4ce70b6de8 Mon Sep 17 00:00:00 2001 From: l-d-s Date: Mon, 12 Nov 2018 16:54:03 +1100 Subject: Function to great ini-style gridss config file. --- tools/gridss-configFile.nix | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 tools/gridss-configFile.nix diff --git a/tools/gridss-configFile.nix b/tools/gridss-configFile.nix new file mode 100644 index 0000000..838c292 --- /dev/null +++ b/tools/gridss-configFile.nix @@ -0,0 +1,36 @@ +{bionix, nixpkgs}: + +with nixpkgs; + +let + attrsToGridssConfigString = attrsToGridssConfigStringPrepend ""; + + attrsToGridssConfigStringPrepend = prepend: attrs: + lib.concatStringsSep "\n" ( + lib.attrValues ( + lib.mapAttrs + (name: attr: prepend + (iniLine name attr)) + attrs)); + + iniLine = name: attr: + let attrType = builtins.typeOf attr; + in + if (iniLineByAttrType ? ${attrType}) + then (iniLineByAttrType.${attrType} name attr) + else builtins.throw ( + "`gridssConfig` cannot convert attribute of type \"" + attrType + "\"."); + + iniLineByAttrType = { + string = name: attr: name + " = " + attr; + int = name: attr: name + " = " + builtins.toString attr; + float = name: attr: name + " = " + ( + builtins.head ( + builtins.match "([0-9]+\.0?[1-9]*)0+" (builtins.toString attr))); + bool = name: attr: name + " = " + (if attr == true then "true" else "false"); + attrs = name: attr: attrsToGridssConfigStringPrepend (name + ".") attr; + # Allows for repeated fields (e.g. for adapters): + list = name: attr: concatStringsSep "\n" (map (x: iniLine name x) attr); + }; +in configAttrs: (writeText + "gridss.properties.override" + (attrsToGridssConfigString configAttrs)) -- cgit v1.2.3 From 4574deb8e1649eaf63df424585d11645c7cfa7dd Mon Sep 17 00:00:00 2001 From: l-d-s Date: Mon, 12 Nov 2018 16:54:30 +1100 Subject: Attempt to integrate config file into GRIDSS stage --- tools/gridss-annotateVariants.nix | 2 ++ tools/gridss-callVariants.nix | 2 ++ tools/gridss-collectGridssMetrics.nix | 2 ++ tools/gridss-collectMetrics.nix | 2 ++ tools/gridss-computeSamTags.nix | 2 ++ tools/gridss-extractSVReads.nix | 2 ++ tools/gridss-identifyVariants.nix | 2 ++ tools/gridss-softClipsToSplitReads.nix | 2 ++ tools/gridss.nix | 1 + 9 files changed, 17 insertions(+) diff --git a/tools/gridss-annotateVariants.nix b/tools/gridss-annotateVariants.nix index 4f66c6c..33992c4 100644 --- a/tools/gridss-annotateVariants.nix +++ b/tools/gridss-annotateVariants.nix @@ -8,6 +8,7 @@ , softClipsToSplitReadsAttrs ? {} , identifyVariantsAttrs ? {} , flags ? null +, config ? null }: with nixpkgs; @@ -64,6 +65,7 @@ stdenv.mkDerivation rec { INPUT_VCF=input.vcf \ OUTPUT_VCF=out.vcf \ WORKING_DIR=$TMPDIR/ \ + ${optionalString config ("CONFIGURATION_FILE=" + gridssConfig config)} \ TMP_DIR=$TMPDIR/ mv out.vcf $out diff --git a/tools/gridss-callVariants.nix b/tools/gridss-callVariants.nix index 416eb7c..dc978fa 100644 --- a/tools/gridss-callVariants.nix +++ b/tools/gridss-callVariants.nix @@ -4,6 +4,7 @@ , bwaIndexAttrs ? {} , faidxAttrs ? {} , flags ? null +, config ? null }: with nixpkgs; @@ -41,6 +42,7 @@ stdenv.mkDerivation rec { WORKER_THREADS=$NIX_BUILD_CORES \ TMP_DIR=. \ WORKING_DIR=. \ + ${optionalString config ("CONFIGURATION_FILE=" + gridssConfig config)} \ REFERENCE_SEQUENCE="ref.fa" \ ${concatMapStringsSep " " (i: "INPUT=\"${i}\"") inputs} \ OUTPUT="$out/gridss.vcf" \ diff --git a/tools/gridss-collectGridssMetrics.nix b/tools/gridss-collectGridssMetrics.nix index bb8d2e9..86732b8 100644 --- a/tools/gridss-collectGridssMetrics.nix +++ b/tools/gridss-collectGridssMetrics.nix @@ -2,6 +2,7 @@ , nixpkgs , thresholdCoverage ? 10000 , flags ? null +, config ? null }: with nixpkgs; @@ -25,6 +26,7 @@ stdenv.mkDerivation rec { java -Xmx1G -cp ${bionix.gridss.jar} \ gridss.analysis.CollectGridssMetrics \ ${optionalString sorted "ASSUME_SORTED=true"} \ + ${optionalString config ("CONFIGURATION_FILE=" + gridssConfig config)} \ I=input.bam \ O=$out \ THRESHOLD_COVERAGE=${toString thresholdCoverage} diff --git a/tools/gridss-collectMetrics.nix b/tools/gridss-collectMetrics.nix index 4688808..c1909f1 100644 --- a/tools/gridss-collectMetrics.nix +++ b/tools/gridss-collectMetrics.nix @@ -2,6 +2,7 @@ , nixpkgs , thresholdCoverage ? 10000 , flags ? null +, config ? null }: with nixpkgs; @@ -22,6 +23,7 @@ stdenv.mkDerivation rec { mkdir $out java -Xmx1G -cp ${bionix.gridss.jar} \ gridss.analysis.CollectGridssMetrics \ + ${optionalString config ("CONFIGURATION_FILE=" + gridssConfig config)} \ I=${input}\ O=$out/input \ AS=true \ diff --git a/tools/gridss-computeSamTags.nix b/tools/gridss-computeSamTags.nix index f75cea0..5b19a3a 100644 --- a/tools/gridss-computeSamTags.nix +++ b/tools/gridss-computeSamTags.nix @@ -4,6 +4,7 @@ , bwaIndexAttrs ? {} , faidxAttrs ? {} , flags ? null +, config ? null }: with nixpkgs; @@ -34,6 +35,7 @@ stdenv.mkDerivation rec { REFERENCE_SEQUENCE=ref.fa \ WORKING_DIR=$TMP_DIR \ TMP_DIR=$TMP_DIR \ + ${optionalString config ("CONFIGURATION_FILE=" + gridssConfig config)} \ I=${input} \ O=$out \ AS=true diff --git a/tools/gridss-extractSVReads.nix b/tools/gridss-extractSVReads.nix index a10abf5..e3ed1cc 100644 --- a/tools/gridss-extractSVReads.nix +++ b/tools/gridss-extractSVReads.nix @@ -5,6 +5,7 @@ , flags ? null , unmappedReads ? false , minClipLength ? 5 +, config ? null }: with nixpkgs; @@ -35,6 +36,7 @@ stdenv.mkDerivation rec { METRICS_OUTPUT=$out/input.sv_metrics \ INSERT_SIZE_METRICS=$out/input.insert_size_metrics \ UNMAPPED_READS=${if unmappedReads then "true" else "false"} \ + ${optionalString config ("CONFIGURATION_FILE=" + gridssConfig config)} \ MIN_CLIP_LENGTH=${toString minClipLength} ''; } diff --git a/tools/gridss-identifyVariants.nix b/tools/gridss-identifyVariants.nix index f44771b..16982e2 100644 --- a/tools/gridss-identifyVariants.nix +++ b/tools/gridss-identifyVariants.nix @@ -7,6 +7,7 @@ , collectMetricsAttrs ? {} , softClipsToSplitReadsAttrs ? {} , flags ? null +, config ? null }: with nixpkgs; @@ -60,6 +61,7 @@ stdenv.mkDerivation rec { ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \ ASSEMBLY=${assembly} \ OUTPUT_VCF=out.vcf \ + ${optionalString config ("CONFIGURATION_FILE=" + gridssConfig config)} \ WORKING_DIR=$TMPDIR/ \ TMP_DIR=$TMPDIR/ diff --git a/tools/gridss-softClipsToSplitReads.nix b/tools/gridss-softClipsToSplitReads.nix index 8a7dca3..35cd7b4 100644 --- a/tools/gridss-softClipsToSplitReads.nix +++ b/tools/gridss-softClipsToSplitReads.nix @@ -4,6 +4,7 @@ , faidxAttrs ? {} , alignerStreaming ? false , flags ? null +, config ? null }: with nixpkgs; @@ -33,6 +34,7 @@ stdenv.mkDerivation rec { I=${input} \ O=$out \ ${optionalString alignerStreaming "ALIGNER_STREAMING=true"} \ + ${optionalString config ("CONFIGURATION_FILE=" + gridssConfig config)} \ WORKER_THREADS=$NIX_BUILD_CORES ''; passthru.filetype = diff --git a/tools/gridss.nix b/tools/gridss.nix index d46a8ce..bc2f0b8 100644 --- a/tools/gridss.nix +++ b/tools/gridss.nix @@ -8,6 +8,7 @@ rec { url = "https://github.com/PapenfussLab/gridss/releases/download/v2.0.0/gridss-2.0.0-gridss-jar-with-dependencies.jar"; sha256 = "01srl3qvv060whqg1y1fpxjc5cwga5wscs1bmf1v3z87dignra7k"; }; + gridssConfig = callBionix ./gridss-configFile.nix {}; callVariants = callBionix ./gridss-callVariants.nix; computeSamTags = callBionix ./gridss-computeSamTags.nix; softClipsToSplitReads = callBionix ./gridss-softClipsToSplitReads.nix; -- cgit v1.2.3