From cdb7056a14915354d879c519dd396204e4af3959 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Fri, 10 Apr 2020 09:47:44 +1000 Subject: update tnpair example to run on colo828 --- examples/ex-tnpair/README.md | 22 ++++++++++++++++ examples/ex-tnpair/cluster.nix | 25 ++++++++++++++++++ examples/ex-tnpair/default.nix | 50 ++++++++++++++++++++++++++++++++++++ examples/ex-tnpair/tnpair | 58 ------------------------------------------ examples/ex-tnpair/tnpair.nix | 56 +++++++++++++++++++++++++++------------- 5 files changed, 136 insertions(+), 75 deletions(-) create mode 100644 examples/ex-tnpair/README.md create mode 100644 examples/ex-tnpair/cluster.nix create mode 100644 examples/ex-tnpair/default.nix delete mode 100755 examples/ex-tnpair/tnpair diff --git a/examples/ex-tnpair/README.md b/examples/ex-tnpair/README.md new file mode 100644 index 0000000..0eef691 --- /dev/null +++ b/examples/ex-tnpair/README.md @@ -0,0 +1,22 @@ +This example is a tumour-normal processing workflow applied to a +publically available whole-genome sequencing (WGS) dataset. As this +example uses WGS data, a large amount of data will be downloaded. +Furthermore, a large amount of space will be required to build the final +products. + +# Building on local machine + +Run `nix build -I bionix=../..` in this directory. + +# Building via HPC (slurm or torque) + +Run `nix build -f cluster.nix` to build on slurm. Note that Nix must be +configured such that the temporary build directories are created on +shared storage. + +For Torque, run `nix build -f cluster.nix -I bionix=../.. --argstr +tmpDir /scratch/`. Unlike the slurm handler, a shared tmpdir location +must be specified. + +In both cases, you may need to adjust the resource limits specified in +cluster.nix to suit your particular cluster hardware. diff --git a/examples/ex-tnpair/cluster.nix b/examples/ex-tnpair/cluster.nix new file mode 100644 index 0000000..d7537b2 --- /dev/null +++ b/examples/ex-tnpair/cluster.nix @@ -0,0 +1,25 @@ +{ bionix ? import { }, tmpDir ? null }: + +let + bionix' = (bionix."${if tmpDir == null then "slurm" else "qsub"}" { + ppn = 24; + mem = 7; + walltime = "3:00:00"; + } // bionix.lib.optionalAttrs (tmpDir != null) { inherit tmpDir; }).extend + (self: super: + with self; { + minimap2.align = def super.minimap2.align { + mem = 15; + walltime = "16:00:00"; + }; + samtools = super.samtools // (with super.samtools; { + markdup = def markdup { walltime = "12:00:00"; }; + fixmate = def fixmate { walltime = "10:00:00"; }; + sort = def sort { + mem = 27; + flags = "-m 1G"; + }; + }); + }); + +in import ./. { bionix = bionix'; } diff --git a/examples/ex-tnpair/default.nix b/examples/ex-tnpair/default.nix new file mode 100644 index 0000000..f9581f8 --- /dev/null +++ b/examples/ex-tnpair/default.nix @@ -0,0 +1,50 @@ +{ bionix ? import { } }: + +with bionix; +with pkgs; +with lib; + +let + pair = { + normal = { + type = "reference"; + inputs = { + input1 = { + url = + "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR275/009/ERR2752449/ERR2752449_1.fastq.gz"; + sha256 = + "52f8b1b1a58b60c66ce566371dfe7a1301a787e8521a4ee41019bbf4f4d18dfe"; + }; + input2 = { + url = + "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR275/009/ERR2752449/ERR2752449_2.fastq.gz"; + sha256 = + "9d1e2ea772bbdf5ff3ee6a44d2d4244155b7d195a37745a2028628e2543cd8f0"; + }; + }; + }; + + tumour = { + type = "melanoma"; + inputs = { + input1 = { + url = + "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR275/000/ERR2752450/ERR2752450_1.fastq.gz"; + sha256 = + "2b3c98c36c2b2b6bc4682401a592a900f8eb2a143f93494ee448d6b075c12ec7"; + }; + input2 = { + url = + "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR275/000/ERR2752450/ERR2752450_2.fastq.gz"; + sha256 = + "0569beded708ef520dadca45ab8a70bd890caf441a0ad3749397f315dc1d2e8c"; + }; + }; + }; + }; + + fetch = s: mapAttrs (_: fetchFastQGZ) s.inputs; + +in import ./tnpair.nix { + inherit pair fetch bionix; +} diff --git a/examples/ex-tnpair/tnpair b/examples/ex-tnpair/tnpair deleted file mode 100755 index d90374b..0000000 --- a/examples/ex-tnpair/tnpair +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/sh - -set -e - -if [[ $# -ne 5 ]] ; then - echo "Usage: $0 ref normal1 normal2 tumour1 tumour2" - exit 1 -fi - -function cleanup { - if [[ -e tnpair-$$ ]]; then - rm tnpair-$$ - fi -} -trap cleanup INT TERM EXIT - -ref=`readlink -f $1` -norm1=`readlink -f $2` -norm2=`readlink -f $3` -tumour1=`readlink -f $4` -tumour2=`readlink -f $5` - -refhash=`nix-hash --base32 --type sha256 --flat $ref` -norm1hash=`nix-hash --base32 --type sha256 --flat $norm1` -norm2hash=`nix-hash --base32 --type sha256 --flat $norm2` -tumour1hash=`nix-hash --base32 --type sha256 --flat $tumour1` -tumour2hash=`nix-hash --base32 --type sha256 --flat $tumour2` - -cat > tnpair-$$ < {}, pair, fetch}: with bionix; with lib; +with types; + +with minimap2; +with samtools; +with snpeff; let - input = mapAttrs (_: fetchFastQGZ); - - preprocess = flip pipe [ - input - (bwa.align { ref = fetchFastA ref; }) - (samtools.fixmate {}) - (samtools.sort {}) - (samtools.markdup {}) + preprocess = s: pipe s [ + fetch + (align { preset = "sr"; ref = ref.grch38.seq; flags = "-R'@RG\\tID:${s.type}\\tSM:${s.type}'"; }) + (fixmate {}) + (sort { }) + (markdup { }) ]; + dropErrors = input: stage { + name = "drop-errors"; + buildCommand = '' + grep -v "ERROR_" ${input} > $out + ''; + passthru.filetype = input.filetype; + }; + + bams = mapAttrs (_: preprocess) pair; + + variants = let + somatic = strelka.callSomatic { } bams; in mapAttrs (_: flip pipe [ + (compression.uncompress { }) + (snpeff.annotate { db = ref.grch38.snpeff.db; }) + dropErrors + (snpeff.dbnsfp { dbnsfp = ref.grch38.snpeff.dbnsfp; }) + ]) { + "snvs.vcf" = somatic.snvs; + "indels.vcf" = somatic.snvs; + "germline.vcf" = strelka.call { } [bams.normal]; + }; + + cnvs = cnvkit.callCNV { } { normals = [ bams.normal ]; tumours = [ bams.tumour ]; }; + in linkOutputs { - strelka = strelka.callSomatic {} {normal = preprocess normal; tumour = preprocess tumour;}; - "normal.bam" = preprocess normal; - "tumour.bam" = preprocess tumour; + inherit variants; + alignments = linkOutputs (mapAttrs' (n: nameValuePair (n + ".bam")) bams); + cnvkit = cnvs; } -- cgit v1.2.3