From a1d18efc18772a233aa759b622c3a9960824f109 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Tue, 30 Apr 2019 09:47:49 +1000 Subject: cleanup examples and stray files --- abcbs_2018.md | 44 ----------------------- conda.nix | 28 --------------- examples/ex-nextflow/nextflow-example1.nix | 30 ++++++++++++++++ examples/ex-tnpair/tnpair | 58 ++++++++++++++++++++++++++++++ examples/ex-tnpair/tnpair.nix | 26 ++++++++++++++ examples/ex-wdl/wdl-scatter-gather.nix | 33 +++++++++++++++++ examples/tnpair | 58 ------------------------------ examples/tnpair.nix | 26 -------------- test-tnpair.nix | 4 +-- 9 files changed, 149 insertions(+), 158 deletions(-) delete mode 100644 abcbs_2018.md delete mode 100644 conda.nix create mode 100644 examples/ex-nextflow/nextflow-example1.nix create mode 100755 examples/ex-tnpair/tnpair create mode 100644 examples/ex-tnpair/tnpair.nix create mode 100644 examples/ex-wdl/wdl-scatter-gather.nix delete mode 100755 examples/tnpair delete mode 100644 examples/tnpair.nix diff --git a/abcbs_2018.md b/abcbs_2018.md deleted file mode 100644 index a8adc45..0000000 --- a/abcbs_2018.md +++ /dev/null @@ -1,44 +0,0 @@ -**Nix for reproducible research** - -Justin Bedő, Leon Di Stefano, and Tony Papenfuss - -> A challenge for bioinformaticians is to make our computations reproducible — that is, easy to rerun, combine, share, and guaranteed to generate the same results. -> We show how Nix, a next generation cross-platform software deployment system, cleanly overcomes problems usually tackled with a combination of package managers (e.g., conda), containers (e.g., Docker, Singularity), and workflow engines (e.g., Toil, Ruffus). -> -> On its own Nix can be used as a package manager; it can also easily create isolated development environments and export portable containers to share with others. -> We have created a number of transparent and lightweight extensions that enable Nix to succinctly specify bioinformatics analysis environments and pipelines locally, in HPC environments, or in the cloud. -> -> Nix uses hash-based naming to ensure that what it builds is uniquely specified, isolation and completeness to ensure that its build processes are deterministic, and a simple programming language to ensure that the whole system is easy to manage. -> It has an extensive package collection, which includes all of CRAN and Bioconductor, and the conda package manager allowing access to Bioconda recipes. -> Nix is well supported and general-purpose software that has been in development for over 10 years. -> -> We will demonstrate how Nix with our extensions can be used to succinctly specify a typical bioinformatics pipeline and contrast this against other dedicated bioinformatics pipeline languages. -> We then show how it can be executed in whole or in part on an HPC queuing system -> Finally, we show that the pipeline can also be executed using cloud resources. - -### Stuff to match in competitors - -- **A few standard pipelines** -- Dealing with big files -- Slightly complicated analyses -- local, HPC, and cloud execution -- Resumable, parallel -- Bioconda import - -### Points of difference - -- **Full-stack reproduciblity with one tool** -- **A language rather than a configuration format (cf. CWL/Javascript)** -- Not bioinformatics-specific -- Mature (~10y) -- Containers obsolete (but easy to generate) -- Higher level of reproducibility overall (hashing of inputs, outputs, derivations) -- Safety - - Declarative language - - Type/tag system (to do) - -### Weaknesses - -- Small bioinformatics collection -- No build execution stats -- Subtleties around filesystems and the Nix store diff --git a/conda.nix b/conda.nix deleted file mode 100644 index bbcb7cf..0000000 --- a/conda.nix +++ /dev/null @@ -1,28 +0,0 @@ -{ stdenv, lib, writeScript, conda }: - -{ - buildCondaEnv = { depends ? [], run }: stdenv.mkDerivation { - name = "conda-env"; - propagatedBuildInputs = [ conda ] ++ depends; - buildCommand = '' - mkdir $out - HOME=$out - conda-shell-4.3.31 << EOF - conda-install - ${run} - EOF - ''; - }; - - inCondaEnv = env: run: stdenv.mkDerivation { - name = "with-conda-env"; - buildCommand = '' - #!${stdenv.shell} - export HOME=${env} - ${conda}/bin/conda-shell-4.3.31 << EOF - ${run} - EOF - ''; - }; -} - diff --git a/examples/ex-nextflow/nextflow-example1.nix b/examples/ex-nextflow/nextflow-example1.nix new file mode 100644 index 0000000..b31984a --- /dev/null +++ b/examples/ex-nextflow/nextflow-example1.nix @@ -0,0 +1,30 @@ +# This is a translation of the Nextflow example found at +# https://www.nextflow.io/example1.html +{ bionix ? import {} +, input ? ./sample.fa}: + +with bionix; +with lib; + +let + splitSequences = fa: stage { + name = "splitSequences"; + buildInputs = [ pkgs.gawk ]; + buildCommand = '' + awk '/^>/{f="seq_"++d} {print > f}' ${fa} + mkdir $out + cp seq* $out + ''; + }; + + reverse = fa: stage { + name = "reverse"; + buildCommand = '' + ${pkgs.utillinux}/bin/rev ${fa} > $out + ''; + }; + +in pipe [ + splitSequences + (each reverse) +] input diff --git a/examples/ex-tnpair/tnpair b/examples/ex-tnpair/tnpair new file mode 100755 index 0000000..5b84d5c --- /dev/null +++ b/examples/ex-tnpair/tnpair @@ -0,0 +1,58 @@ +#!/bin/sh + +set -e + +if [[ $# -ne 5 ]] ; then + echo "Usage: $0 ref normal1 normal2 tumour1 tumour2" + exit 1 +fi + +function cleanup { + if [[ -e tnpair-$$ ]]; then + rm tnpair-$$ + fi +} +trap cleanup INT TERM EXIT + +ref=`readlink -f $1` +norm1=`readlink -f $2` +norm2=`readlink -f $3` +tumour1=`readlink -f $4` +tumour2=`readlink -f $5` + +refhash=`nix-hash --base32 --type sha256 --flat $ref` +norm1hash=`nix-hash --base32 --type sha256 --flat $norm1` +norm2hash=`nix-hash --base32 --type sha256 --flat $norm2` +tumour1hash=`nix-hash --base32 --type sha256 --flat $tumour1` +tumour2hash=`nix-hash --base32 --type sha256 --flat $tumour2` + +cat > tnpair-$$ < {} +, normal +, tumour +, ref +}: + +with bionix; +with lib; + +let + input = mapAttrs (_: fetchFastQGZ); + + preprocess = pipe [ + input + (bwa.align { ref = fetchFastA ref; }) + (samtools.fixmate {}) + (samtools.sort {}) + (samtools.markdup {}) + ]; + +in linkDrv [ + (ln (strelka.call {} {normal = preprocess normal; tumour = preprocess tumour;}) "strelka") + (ln (preprocess normal) "normal.bam") + (ln (preprocess tumour) "tumour.bam") +] diff --git a/examples/ex-wdl/wdl-scatter-gather.nix b/examples/ex-wdl/wdl-scatter-gather.nix new file mode 100644 index 0000000..387d382 --- /dev/null +++ b/examples/ex-wdl/wdl-scatter-gather.nix @@ -0,0 +1,33 @@ +# The scatter-gather example from https://github.com/openwdl/wdl +# translated to bionix +{ bionix ? import {} }: + +with bionix; +with lib; + +let + + prepare = splitString "\n" (removeSuffix "\n" (readFile (stage { + name = "prepare"; + buildInputs = [ pkgs.python3 ]; + buildCommand = '' + python -c "print('one\ntwo\nthree\nfour', end=''')" > $out + ''; + }))); + + analysis = str: removeSuffix "\n" (readFile (stage { + name = "analysis"; + buildInputs = [ pkgs.python ]; + buildCommand = '' + python -c "print('_${str}_')" > $out + ''; + })); + + gather = strs: stage { + name = "gather"; + buildCommand = '' + echo ${concatStringsSep " " strs} > $out + ''; + }; + +in gather (map analysis prepare) diff --git a/examples/tnpair b/examples/tnpair deleted file mode 100755 index 5b84d5c..0000000 --- a/examples/tnpair +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/sh - -set -e - -if [[ $# -ne 5 ]] ; then - echo "Usage: $0 ref normal1 normal2 tumour1 tumour2" - exit 1 -fi - -function cleanup { - if [[ -e tnpair-$$ ]]; then - rm tnpair-$$ - fi -} -trap cleanup INT TERM EXIT - -ref=`readlink -f $1` -norm1=`readlink -f $2` -norm2=`readlink -f $3` -tumour1=`readlink -f $4` -tumour2=`readlink -f $5` - -refhash=`nix-hash --base32 --type sha256 --flat $ref` -norm1hash=`nix-hash --base32 --type sha256 --flat $norm1` -norm2hash=`nix-hash --base32 --type sha256 --flat $norm2` -tumour1hash=`nix-hash --base32 --type sha256 --flat $tumour1` -tumour2hash=`nix-hash --base32 --type sha256 --flat $tumour2` - -cat > tnpair-$$ < {} -, normal -, tumour -, ref -}: - -with bionix; -with lib; - -let - input = mapAttrs (_: fetchFastQGZ); - - preprocess = pipe [ - input - (bwa.align { ref = fetchFastA ref; }) - (samtools.fixmate {}) - (samtools.sort {}) - (samtools.markdup {}) - ]; - -in linkDrv [ - (ln (strelka.call {} {normal = preprocess normal; tumour = preprocess tumour;}) "strelka") - (ln (preprocess normal) "normal.bam") - (ln (preprocess tumour) "tumour.bam") -] diff --git a/test-tnpair.nix b/test-tnpair.nix index cd8620a..1d281fa 100644 --- a/test-tnpair.nix +++ b/test-tnpair.nix @@ -49,8 +49,8 @@ let (ln (strelka.snvs tnpairResult.variants) "strelka.snvs.vcf") (ln (strelka.variants tnpairResult.glvariants) "strelka.gl.vcf") (ln (bowtie.align {inherit ref;} tnpair.normal.files) "alignments/bowtie-normal.bam") - (ln (minimap2.align {inherit ref; preset = "sr"; } tnpair.normal.files) "alignments/minimap2-normal.bam") - (ln (snap.align {inherit ref; } tnpair.normal.files) "alignments/snap-normal.bam") + #(ln (minimap2.align {inherit ref; preset = "sr"; } tnpair.normal.files) "alignments/minimap2-normal.bam") + #(ln (snap.align {inherit ref; } tnpair.normal.files) "alignments/snap-normal.bam") (ln (gridss.callVariants {} (with tnpairResult.alignments; [normal tumour])) "gridss") (ln (gridss.call (with tnpairResult.alignments; [normal tumour])) "gridss2") (ln (gridss.callAndAssemble (with tnpairResult.alignments; [normal tumour])) "gridss3") -- cgit v1.2.3