diff options
-rw-r--r-- | abcbs_2018.md | 44 | ||||
-rw-r--r-- | conda.nix | 28 | ||||
-rw-r--r-- | examples/ex-nextflow/nextflow-example1.nix | 30 | ||||
-rwxr-xr-x | examples/ex-tnpair/tnpair (renamed from examples/tnpair) | 0 | ||||
-rw-r--r-- | examples/ex-tnpair/tnpair.nix (renamed from examples/tnpair.nix) | 0 | ||||
-rw-r--r-- | examples/ex-wdl/wdl-scatter-gather.nix | 33 | ||||
-rw-r--r-- | test-tnpair.nix | 4 |
7 files changed, 65 insertions, 74 deletions
diff --git a/abcbs_2018.md b/abcbs_2018.md deleted file mode 100644 index a8adc45..0000000 --- a/abcbs_2018.md +++ /dev/null @@ -1,44 +0,0 @@ -**Nix for reproducible research** - -Justin Bedő, Leon Di Stefano, and Tony Papenfuss - -> A challenge for bioinformaticians is to make our computations reproducible — that is, easy to rerun, combine, share, and guaranteed to generate the same results. -> We show how Nix, a next generation cross-platform software deployment system, cleanly overcomes problems usually tackled with a combination of package managers (e.g., conda), containers (e.g., Docker, Singularity), and workflow engines (e.g., Toil, Ruffus). -> -> On its own Nix can be used as a package manager; it can also easily create isolated development environments and export portable containers to share with others. -> We have created a number of transparent and lightweight extensions that enable Nix to succinctly specify bioinformatics analysis environments and pipelines locally, in HPC environments, or in the cloud. -> -> Nix uses hash-based naming to ensure that what it builds is uniquely specified, isolation and completeness to ensure that its build processes are deterministic, and a simple programming language to ensure that the whole system is easy to manage. -> It has an extensive package collection, which includes all of CRAN and Bioconductor, and the conda package manager allowing access to Bioconda recipes. -> Nix is well supported and general-purpose software that has been in development for over 10 years. -> -> We will demonstrate how Nix with our extensions can be used to succinctly specify a typical bioinformatics pipeline and contrast this against other dedicated bioinformatics pipeline languages. -> We then show how it can be executed in whole or in part on an HPC queuing system -> Finally, we show that the pipeline can also be executed using cloud resources. - -### Stuff to match in competitors - -- **A few standard pipelines** -- Dealing with big files -- Slightly complicated analyses -- local, HPC, and cloud execution -- Resumable, parallel -- Bioconda import - -### Points of difference - -- **Full-stack reproduciblity with one tool** -- **A language rather than a configuration format (cf. CWL/Javascript)** -- Not bioinformatics-specific -- Mature (~10y) -- Containers obsolete (but easy to generate) -- Higher level of reproducibility overall (hashing of inputs, outputs, derivations) -- Safety - - Declarative language - - Type/tag system (to do) - -### Weaknesses - -- Small bioinformatics collection -- No build execution stats -- Subtleties around filesystems and the Nix store diff --git a/conda.nix b/conda.nix deleted file mode 100644 index bbcb7cf..0000000 --- a/conda.nix +++ /dev/null @@ -1,28 +0,0 @@ -{ stdenv, lib, writeScript, conda }: - -{ - buildCondaEnv = { depends ? [], run }: stdenv.mkDerivation { - name = "conda-env"; - propagatedBuildInputs = [ conda ] ++ depends; - buildCommand = '' - mkdir $out - HOME=$out - conda-shell-4.3.31 << EOF - conda-install - ${run} - EOF - ''; - }; - - inCondaEnv = env: run: stdenv.mkDerivation { - name = "with-conda-env"; - buildCommand = '' - #!${stdenv.shell} - export HOME=${env} - ${conda}/bin/conda-shell-4.3.31 << EOF - ${run} - EOF - ''; - }; -} - diff --git a/examples/ex-nextflow/nextflow-example1.nix b/examples/ex-nextflow/nextflow-example1.nix new file mode 100644 index 0000000..b31984a --- /dev/null +++ b/examples/ex-nextflow/nextflow-example1.nix @@ -0,0 +1,30 @@ +# This is a translation of the Nextflow example found at +# https://www.nextflow.io/example1.html +{ bionix ? import <bionix> {} +, input ? ./sample.fa}: + +with bionix; +with lib; + +let + splitSequences = fa: stage { + name = "splitSequences"; + buildInputs = [ pkgs.gawk ]; + buildCommand = '' + awk '/^>/{f="seq_"++d} {print > f}' ${fa} + mkdir $out + cp seq* $out + ''; + }; + + reverse = fa: stage { + name = "reverse"; + buildCommand = '' + ${pkgs.utillinux}/bin/rev ${fa} > $out + ''; + }; + +in pipe [ + splitSequences + (each reverse) +] input diff --git a/examples/tnpair b/examples/ex-tnpair/tnpair index 5b84d5c..5b84d5c 100755 --- a/examples/tnpair +++ b/examples/ex-tnpair/tnpair diff --git a/examples/tnpair.nix b/examples/ex-tnpair/tnpair.nix index 2939db4..2939db4 100644 --- a/examples/tnpair.nix +++ b/examples/ex-tnpair/tnpair.nix diff --git a/examples/ex-wdl/wdl-scatter-gather.nix b/examples/ex-wdl/wdl-scatter-gather.nix new file mode 100644 index 0000000..387d382 --- /dev/null +++ b/examples/ex-wdl/wdl-scatter-gather.nix @@ -0,0 +1,33 @@ +# The scatter-gather example from https://github.com/openwdl/wdl +# translated to bionix +{ bionix ? import <bionix> {} }: + +with bionix; +with lib; + +let + + prepare = splitString "\n" (removeSuffix "\n" (readFile (stage { + name = "prepare"; + buildInputs = [ pkgs.python3 ]; + buildCommand = '' + python -c "print('one\ntwo\nthree\nfour', end=''')" > $out + ''; + }))); + + analysis = str: removeSuffix "\n" (readFile (stage { + name = "analysis"; + buildInputs = [ pkgs.python ]; + buildCommand = '' + python -c "print('_${str}_')" > $out + ''; + })); + + gather = strs: stage { + name = "gather"; + buildCommand = '' + echo ${concatStringsSep " " strs} > $out + ''; + }; + +in gather (map analysis prepare) diff --git a/test-tnpair.nix b/test-tnpair.nix index cd8620a..1d281fa 100644 --- a/test-tnpair.nix +++ b/test-tnpair.nix @@ -49,8 +49,8 @@ let (ln (strelka.snvs tnpairResult.variants) "strelka.snvs.vcf") (ln (strelka.variants tnpairResult.glvariants) "strelka.gl.vcf") (ln (bowtie.align {inherit ref;} tnpair.normal.files) "alignments/bowtie-normal.bam") - (ln (minimap2.align {inherit ref; preset = "sr"; } tnpair.normal.files) "alignments/minimap2-normal.bam") - (ln (snap.align {inherit ref; } tnpair.normal.files) "alignments/snap-normal.bam") + #(ln (minimap2.align {inherit ref; preset = "sr"; } tnpair.normal.files) "alignments/minimap2-normal.bam") + #(ln (snap.align {inherit ref; } tnpair.normal.files) "alignments/snap-normal.bam") (ln (gridss.callVariants {} (with tnpairResult.alignments; [normal tumour])) "gridss") (ln (gridss.call (with tnpairResult.alignments; [normal tumour])) "gridss2") (ln (gridss.callAndAssemble (with tnpairResult.alignments; [normal tumour])) "gridss3") |