From dda29ca7685d360d5428dd827d11a9e4139a0872 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Thu, 6 Oct 2022 10:21:34 +1100 Subject: init --- .gitignore | 1 + common/flake.lock | 58 ++++++++++++++++++++++++ day1/README.md | 93 +++++++++++++++++++++++++++++++++++++++ day2/ex1-hello-world/default.nix | 22 +++++++++ day2/ex1-hello-world/flake.lock | 1 + day2/ex1-hello-world/flake.nix | 20 +++++++++ day2/ex1-hello-world/solution.nix | 24 ++++++++++ day2/ex2-dlrow-olleh/default.nix | 30 +++++++++++++ day2/ex2-dlrow-olleh/flake.lock | 1 + day2/ex2-dlrow-olleh/flake.nix | 20 +++++++++ day2/ex2-dlrow-olleh/solution.nix | 14 ++++++ 11 files changed, 284 insertions(+) create mode 100644 .gitignore create mode 100755 common/flake.lock create mode 100644 day1/README.md create mode 100644 day2/ex1-hello-world/default.nix create mode 120000 day2/ex1-hello-world/flake.lock create mode 100755 day2/ex1-hello-world/flake.nix create mode 100644 day2/ex1-hello-world/solution.nix create mode 100644 day2/ex2-dlrow-olleh/default.nix create mode 120000 day2/ex2-dlrow-olleh/flake.lock create mode 100755 day2/ex2-dlrow-olleh/flake.nix create mode 100644 day2/ex2-dlrow-olleh/solution.nix diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e2f5dd2 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +result \ No newline at end of file diff --git a/common/flake.lock b/common/flake.lock new file mode 100755 index 0000000..77cb71a --- /dev/null +++ b/common/flake.lock @@ -0,0 +1,58 @@ +{ + "nodes": { + "bionix": { + "locked": { + "lastModified": 1664157112, + "narHash": "sha256-llJ4nRFxMhpy+tEywH1qzwcQjIQu5DCs6z2sIgel31w=", + "owner": "papenfusslab", + "repo": "bionix", + "rev": "81cfa50e6b345942951b68eac0e184ea025f2ae4", + "type": "github" + }, + "original": { + "owner": "papenfusslab", + "repo": "bionix", + "type": "github" + } + }, + "flake-utils": { + "locked": { + "lastModified": 1659877975, + "narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1665010605, + "narHash": "sha256-e2ysiL/iQndCRSPfilXrMA1bKG28DzXxhO4W0ZgQtos=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "8e2d88af1ab3563cee80ecf2add3f933fdfb4fa3", + "type": "github" + }, + "original": { + "owner": "nixos", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "bionix": "bionix", + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/day1/README.md b/day1/README.md new file mode 100644 index 0000000..f5be657 --- /dev/null +++ b/day1/README.md @@ -0,0 +1,93 @@ +# Day 1 - BioNix Workshop + +Let's start by defining *computational reproducibility* as always +obtaining the same output from a computation given the same inputs. In +other words, computational reproducibility is about making computations +*deterministic*. In the research context, this is important as +reproducibility allows others (and ourselves) to verify and build upon +what we have done in future. + +# A functional view of things and why Nix is needed + +What makes reproducibility difficult is the management of *state*, or +the context within with a computation takes place. State manipulation is +widespread: how many apps updates or system updates do you recall +automatically being installed over the past year? Do you think your +analysis today will be the same in one years time if your software stack +has changed? + +One way to deal with this problem is to make computations *pure* by forbidding +the use of anything that is not explicitly stated as an input. This is +the same idea of pure functional programming, only at the higher level of +executing software. + +Nix effectively enforces purity for software execution by ensuring the software +cannot access anything outside of the specified inputs. By this way, it can +guarantee a very high degree of reproducibility. Nix is a general build engine +most commonly used for building software today, but as we will see a bit later +it can also execute computational biology workflows in a pure manner with a small +library called BioNix. + +# Pipelines in BioNix + +``` +# This is an example pipeline specification to do multi-sample variant calling +# with the Platypus variant caller. Each input is preprocessed by aligning +# against a reference genome (defaults to GRCH38), fixing mate information, and +# marking duplicates. Finally platypus is called over all samples. +{ bionix ? import { } +, inputs +, ref ? bionix.ref.grch38.seq +}: + +with bionix; +with lib; + +let + preprocess = flip pipe [ + (bwa.align { inherit ref; }) + (samtools.sort { nameSort = true; }) + (samtools.fixmate { }) + (samtools.sort { }) + (samtools.markdup { }) + ]; + +in +platypus.call { } (map preprocess inputs) +``` + +# Nix the language + +We will start with learning Nix the langauge, which is used for +specifying workflows. If you are familar with JSON, it is very similar +in terms of availble data types but has one very important addition: +functions. Let's cover the basic data types and their syntax: + +- Booleans: `true` and `false` +- Strings: `"this is a string"` +- Numbers: `0`, `1.234` +- Lists: `[ 0 1.234 "string" ]` +- Attribute sets: `{ a = 5; b = "something else"; }` +- Comments: `# this is a comment` +- Functions: `x: x + 1` +- Variable binding: `let x = 5; in x #=> 5` +- Function application: `let f = x: x + 1; in f 5 #=> 6` +- File paths: `/path/to/file` + +Some common operators: +- Boolean conjunctions and disjunctions: `true || false #=> true` `true && false #=> false` +- Ordering: `3 < 3 #=> false`, `3 <= 3 #=> true` +- Conditionals: `if 3 < 4 then "a" else "b" #=> a` +- Addition and subtraction: `3 + 4 #=> 7`, `3 - 4 #=> -1` +- Multiplication and division: `3 * 4 #=> 12`, `3.0 / 4 #=> 0.75` +- String concatenation: `"hello " + "world" #=> "hello world"` +- String interpolation: `"hello ${"world"}" #=> "hello world"`, `"1 + 2 = ${toString (1 + 2)}" #=> "1 + 2 = 3"` +- Attribute set unions: `{ a = 5; } // { b = 6; } #=> { a = 5; b = 6; }` + +# About this interface + +This workshop uses [A tour of +nix](https://github.com/nixcloud/tour_of_nix) with some altered content +for the purposes of learning enough of Nix the language to write +workflows in BioNix during the second part. Click next to continue to +the exercises. diff --git a/day2/ex1-hello-world/default.nix b/day2/ex1-hello-world/default.nix new file mode 100644 index 0000000..8eaffa7 --- /dev/null +++ b/day2/ex1-hello-world/default.nix @@ -0,0 +1,22 @@ +/* + This first exercise demonstrates how to define a processing stage, +which is just instructions on how to compute an output (the +`buildCommand`) from some inputs (in this case, there are no inputs). +Each stage must minimally define a `name`, shell code to build the +output in `buildCommand`, and any software that's required in +`buildInputs`. Note that `buildInputs = []` if not defined, meaning no +extra requirements over the standard environment. + +Try replacing the echo below with output from the GNU hello program. +Hint: the GNU hello program is available at `pkgs.hello` and the +executable is called `hello`. +*/ +{bionix}: +with bionix; + stage { + name = "hello-world"; + + buildCommand = '' + echo hello world > $out + ''; + } diff --git a/day2/ex1-hello-world/flake.lock b/day2/ex1-hello-world/flake.lock new file mode 120000 index 0000000..981422e --- /dev/null +++ b/day2/ex1-hello-world/flake.lock @@ -0,0 +1 @@ +../../common/flake.lock \ No newline at end of file diff --git a/day2/ex1-hello-world/flake.nix b/day2/ex1-hello-world/flake.nix new file mode 100755 index 0000000..bfaca96 --- /dev/null +++ b/day2/ex1-hello-world/flake.nix @@ -0,0 +1,20 @@ +{ + inputs = { + nixpkgs.url = "github:nixos/nixpkgs"; + bionix.url = "github:papenfusslab/bionix"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { + self, + nixpkgs, + bionix, + flake-utils, + }: + flake-utils.lib.eachDefaultSystem (system: let + pkgs = import nixpkgs {inherit system;}; + bionix' = import bionix {nixpkgs = pkgs;}; + in { + defaultPackage = bionix'.callBionix ./. {}; + }); +} diff --git a/day2/ex1-hello-world/solution.nix b/day2/ex1-hello-world/solution.nix new file mode 100644 index 0000000..d0531a2 --- /dev/null +++ b/day2/ex1-hello-world/solution.nix @@ -0,0 +1,24 @@ +/* + This first exercise demonstrates how to define a processing stage, +which is just instructions on how to compute an output (the +`buildCommand`) from some inputs (in this case, there are no inputs). +Each stage must minimally define a `name`, shell code to build the +output in `buildCommand`, and any software that's required in +`buildInputs`. Note that `buildInputs = []` if not defined, meaning no +extra requirements over the standard environment. + +Try replacing the echo below with output from the GNU hello program. +Hint: the GNU hello program is available at `pkgs.hello` and the +executable is called `hello`. +*/ +{bionix}: +with bionix; + stage { + name = "hello-world"; + + buildInputs = [pkgs.hello]; + + buildCommand = '' + hello > $out + ''; + } diff --git a/day2/ex2-dlrow-olleh/default.nix b/day2/ex2-dlrow-olleh/default.nix new file mode 100644 index 0000000..91bbe1f --- /dev/null +++ b/day2/ex2-dlrow-olleh/default.nix @@ -0,0 +1,30 @@ +/* +This exercise demonstrates the first workflow consisting of multiple +steps. We will build upon the previous example by using the string +produced in exercise 1 as input to this workflow. You can see how other +files can be imported and used in the construction of workflows using +`callBionix`, which imports a file and passes it `bionix` along with +potentially some additional arguments. + +Goal: fill out the stage here to reverse the string using the `rev` +program, which reads lines on stdin and writes them to stdout with the +characters reversed. You will need to choose exactly *which* rev you +want to use in `buildInputs`: there are three providers available +(busybox, toybox, utillinux) and you can try them all. + +Bonus: change the last line to call rev twice, thereby reversing the +strings back to the original orientation +*/ +{bionix}: +with bionix; let + hello-world = callBionix ../ex1-hello-world {}; + + rev = input: + stage { + name = "rev"; + buildInputs = []; + buildCommand = '' + ''; + }; +in + rev hello-world diff --git a/day2/ex2-dlrow-olleh/flake.lock b/day2/ex2-dlrow-olleh/flake.lock new file mode 120000 index 0000000..981422e --- /dev/null +++ b/day2/ex2-dlrow-olleh/flake.lock @@ -0,0 +1 @@ +../../common/flake.lock \ No newline at end of file diff --git a/day2/ex2-dlrow-olleh/flake.nix b/day2/ex2-dlrow-olleh/flake.nix new file mode 100755 index 0000000..bfaca96 --- /dev/null +++ b/day2/ex2-dlrow-olleh/flake.nix @@ -0,0 +1,20 @@ +{ + inputs = { + nixpkgs.url = "github:nixos/nixpkgs"; + bionix.url = "github:papenfusslab/bionix"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { + self, + nixpkgs, + bionix, + flake-utils, + }: + flake-utils.lib.eachDefaultSystem (system: let + pkgs = import nixpkgs {inherit system;}; + bionix' = import bionix {nixpkgs = pkgs;}; + in { + defaultPackage = bionix'.callBionix ./. {}; + }); +} diff --git a/day2/ex2-dlrow-olleh/solution.nix b/day2/ex2-dlrow-olleh/solution.nix new file mode 100644 index 0000000..ab235fe --- /dev/null +++ b/day2/ex2-dlrow-olleh/solution.nix @@ -0,0 +1,14 @@ +{bionix}: +with bionix; let + hello-world = callBionix ../ex1-hello-world {}; + + rev = input: + stage { + name = "rev"; + buildInputs = [pkgs.toybox]; + buildCommand = '' + rev < ${input} > $out + ''; + }; +in + rev (rev hello-world) -- cgit v1.2.3