aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2022-10-06 10:21:34 +1100
committerJustin Bedo <cu@cua0.org>2022-10-07 14:58:01 +1100
commitdda29ca7685d360d5428dd827d11a9e4139a0872 (patch)
tree0dcc853fd90abf245b6ba0a7506b2e1093bf7800
init
-rw-r--r--.gitignore1
-rwxr-xr-xcommon/flake.lock58
-rw-r--r--day1/README.md93
-rw-r--r--day2/ex1-hello-world/default.nix22
l---------day2/ex1-hello-world/flake.lock1
-rwxr-xr-xday2/ex1-hello-world/flake.nix20
-rw-r--r--day2/ex1-hello-world/solution.nix24
-rw-r--r--day2/ex2-dlrow-olleh/default.nix30
l---------day2/ex2-dlrow-olleh/flake.lock1
-rwxr-xr-xday2/ex2-dlrow-olleh/flake.nix20
-rw-r--r--day2/ex2-dlrow-olleh/solution.nix14
11 files changed, 284 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e2f5dd2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+result \ No newline at end of file
diff --git a/common/flake.lock b/common/flake.lock
new file mode 100755
index 0000000..77cb71a
--- /dev/null
+++ b/common/flake.lock
@@ -0,0 +1,58 @@
+{
+ "nodes": {
+ "bionix": {
+ "locked": {
+ "lastModified": 1664157112,
+ "narHash": "sha256-llJ4nRFxMhpy+tEywH1qzwcQjIQu5DCs6z2sIgel31w=",
+ "owner": "papenfusslab",
+ "repo": "bionix",
+ "rev": "81cfa50e6b345942951b68eac0e184ea025f2ae4",
+ "type": "github"
+ },
+ "original": {
+ "owner": "papenfusslab",
+ "repo": "bionix",
+ "type": "github"
+ }
+ },
+ "flake-utils": {
+ "locked": {
+ "lastModified": 1659877975,
+ "narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=",
+ "owner": "numtide",
+ "repo": "flake-utils",
+ "rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0",
+ "type": "github"
+ },
+ "original": {
+ "owner": "numtide",
+ "repo": "flake-utils",
+ "type": "github"
+ }
+ },
+ "nixpkgs": {
+ "locked": {
+ "lastModified": 1665010605,
+ "narHash": "sha256-e2ysiL/iQndCRSPfilXrMA1bKG28DzXxhO4W0ZgQtos=",
+ "owner": "nixos",
+ "repo": "nixpkgs",
+ "rev": "8e2d88af1ab3563cee80ecf2add3f933fdfb4fa3",
+ "type": "github"
+ },
+ "original": {
+ "owner": "nixos",
+ "repo": "nixpkgs",
+ "type": "github"
+ }
+ },
+ "root": {
+ "inputs": {
+ "bionix": "bionix",
+ "flake-utils": "flake-utils",
+ "nixpkgs": "nixpkgs"
+ }
+ }
+ },
+ "root": "root",
+ "version": 7
+}
diff --git a/day1/README.md b/day1/README.md
new file mode 100644
index 0000000..f5be657
--- /dev/null
+++ b/day1/README.md
@@ -0,0 +1,93 @@
+# Day 1 - BioNix Workshop
+
+Let's start by defining *computational reproducibility* as always
+obtaining the same output from a computation given the same inputs. In
+other words, computational reproducibility is about making computations
+*deterministic*. In the research context, this is important as
+reproducibility allows others (and ourselves) to verify and build upon
+what we have done in future.
+
+# A functional view of things and why Nix is needed
+
+What makes reproducibility difficult is the management of *state*, or
+the context within with a computation takes place. State manipulation is
+widespread: how many apps updates or system updates do you recall
+automatically being installed over the past year? Do you think your
+analysis today will be the same in one years time if your software stack
+has changed?
+
+One way to deal with this problem is to make computations *pure* by forbidding
+the use of anything that is not explicitly stated as an input. This is
+the same idea of pure functional programming, only at the higher level of
+executing software.
+
+Nix effectively enforces purity for software execution by ensuring the software
+cannot access anything outside of the specified inputs. By this way, it can
+guarantee a very high degree of reproducibility. Nix is a general build engine
+most commonly used for building software today, but as we will see a bit later
+it can also execute computational biology workflows in a pure manner with a small
+library called BioNix.
+
+# Pipelines in BioNix
+
+```
+# This is an example pipeline specification to do multi-sample variant calling
+# with the Platypus variant caller. Each input is preprocessed by aligning
+# against a reference genome (defaults to GRCH38), fixing mate information, and
+# marking duplicates. Finally platypus is called over all samples.
+{ bionix ? import <bionix> { }
+, inputs
+, ref ? bionix.ref.grch38.seq
+}:
+
+with bionix;
+with lib;
+
+let
+ preprocess = flip pipe [
+ (bwa.align { inherit ref; })
+ (samtools.sort { nameSort = true; })
+ (samtools.fixmate { })
+ (samtools.sort { })
+ (samtools.markdup { })
+ ];
+
+in
+platypus.call { } (map preprocess inputs)
+```
+
+# Nix the language
+
+We will start with learning Nix the langauge, which is used for
+specifying workflows. If you are familar with JSON, it is very similar
+in terms of availble data types but has one very important addition:
+functions. Let's cover the basic data types and their syntax:
+
+- Booleans: `true` and `false`
+- Strings: `"this is a string"`
+- Numbers: `0`, `1.234`
+- Lists: `[ 0 1.234 "string" ]`
+- Attribute sets: `{ a = 5; b = "something else"; }`
+- Comments: `# this is a comment`
+- Functions: `x: x + 1`
+- Variable binding: `let x = 5; in x #=> 5`
+- Function application: `let f = x: x + 1; in f 5 #=> 6`
+- File paths: `/path/to/file`
+
+Some common operators:
+- Boolean conjunctions and disjunctions: `true || false #=> true` `true && false #=> false`
+- Ordering: `3 < 3 #=> false`, `3 <= 3 #=> true`
+- Conditionals: `if 3 < 4 then "a" else "b" #=> a`
+- Addition and subtraction: `3 + 4 #=> 7`, `3 - 4 #=> -1`
+- Multiplication and division: `3 * 4 #=> 12`, `3.0 / 4 #=> 0.75`
+- String concatenation: `"hello " + "world" #=> "hello world"`
+- String interpolation: `"hello ${"world"}" #=> "hello world"`, `"1 + 2 = ${toString (1 + 2)}" #=> "1 + 2 = 3"`
+- Attribute set unions: `{ a = 5; } // { b = 6; } #=> { a = 5; b = 6; }`
+
+# About this interface
+
+This workshop uses [A tour of
+nix](https://github.com/nixcloud/tour_of_nix) with some altered content
+for the purposes of learning enough of Nix the language to write
+workflows in BioNix during the second part. Click next to continue to
+the exercises.
diff --git a/day2/ex1-hello-world/default.nix b/day2/ex1-hello-world/default.nix
new file mode 100644
index 0000000..8eaffa7
--- /dev/null
+++ b/day2/ex1-hello-world/default.nix
@@ -0,0 +1,22 @@
+/*
+ This first exercise demonstrates how to define a processing stage,
+which is just instructions on how to compute an output (the
+`buildCommand`) from some inputs (in this case, there are no inputs).
+Each stage must minimally define a `name`, shell code to build the
+output in `buildCommand`, and any software that's required in
+`buildInputs`. Note that `buildInputs = []` if not defined, meaning no
+extra requirements over the standard environment.
+
+Try replacing the echo below with output from the GNU hello program.
+Hint: the GNU hello program is available at `pkgs.hello` and the
+executable is called `hello`.
+*/
+{bionix}:
+with bionix;
+ stage {
+ name = "hello-world";
+
+ buildCommand = ''
+ echo hello world > $out
+ '';
+ }
diff --git a/day2/ex1-hello-world/flake.lock b/day2/ex1-hello-world/flake.lock
new file mode 120000
index 0000000..981422e
--- /dev/null
+++ b/day2/ex1-hello-world/flake.lock
@@ -0,0 +1 @@
+../../common/flake.lock \ No newline at end of file
diff --git a/day2/ex1-hello-world/flake.nix b/day2/ex1-hello-world/flake.nix
new file mode 100755
index 0000000..bfaca96
--- /dev/null
+++ b/day2/ex1-hello-world/flake.nix
@@ -0,0 +1,20 @@
+{
+ inputs = {
+ nixpkgs.url = "github:nixos/nixpkgs";
+ bionix.url = "github:papenfusslab/bionix";
+ flake-utils.url = "github:numtide/flake-utils";
+ };
+
+ outputs = {
+ self,
+ nixpkgs,
+ bionix,
+ flake-utils,
+ }:
+ flake-utils.lib.eachDefaultSystem (system: let
+ pkgs = import nixpkgs {inherit system;};
+ bionix' = import bionix {nixpkgs = pkgs;};
+ in {
+ defaultPackage = bionix'.callBionix ./. {};
+ });
+}
diff --git a/day2/ex1-hello-world/solution.nix b/day2/ex1-hello-world/solution.nix
new file mode 100644
index 0000000..d0531a2
--- /dev/null
+++ b/day2/ex1-hello-world/solution.nix
@@ -0,0 +1,24 @@
+/*
+ This first exercise demonstrates how to define a processing stage,
+which is just instructions on how to compute an output (the
+`buildCommand`) from some inputs (in this case, there are no inputs).
+Each stage must minimally define a `name`, shell code to build the
+output in `buildCommand`, and any software that's required in
+`buildInputs`. Note that `buildInputs = []` if not defined, meaning no
+extra requirements over the standard environment.
+
+Try replacing the echo below with output from the GNU hello program.
+Hint: the GNU hello program is available at `pkgs.hello` and the
+executable is called `hello`.
+*/
+{bionix}:
+with bionix;
+ stage {
+ name = "hello-world";
+
+ buildInputs = [pkgs.hello];
+
+ buildCommand = ''
+ hello > $out
+ '';
+ }
diff --git a/day2/ex2-dlrow-olleh/default.nix b/day2/ex2-dlrow-olleh/default.nix
new file mode 100644
index 0000000..91bbe1f
--- /dev/null
+++ b/day2/ex2-dlrow-olleh/default.nix
@@ -0,0 +1,30 @@
+/*
+This exercise demonstrates the first workflow consisting of multiple
+steps. We will build upon the previous example by using the string
+produced in exercise 1 as input to this workflow. You can see how other
+files can be imported and used in the construction of workflows using
+`callBionix`, which imports a file and passes it `bionix` along with
+potentially some additional arguments.
+
+Goal: fill out the stage here to reverse the string using the `rev`
+program, which reads lines on stdin and writes them to stdout with the
+characters reversed. You will need to choose exactly *which* rev you
+want to use in `buildInputs`: there are three providers available
+(busybox, toybox, utillinux) and you can try them all.
+
+Bonus: change the last line to call rev twice, thereby reversing the
+strings back to the original orientation
+*/
+{bionix}:
+with bionix; let
+ hello-world = callBionix ../ex1-hello-world {};
+
+ rev = input:
+ stage {
+ name = "rev";
+ buildInputs = [];
+ buildCommand = ''
+ '';
+ };
+in
+ rev hello-world
diff --git a/day2/ex2-dlrow-olleh/flake.lock b/day2/ex2-dlrow-olleh/flake.lock
new file mode 120000
index 0000000..981422e
--- /dev/null
+++ b/day2/ex2-dlrow-olleh/flake.lock
@@ -0,0 +1 @@
+../../common/flake.lock \ No newline at end of file
diff --git a/day2/ex2-dlrow-olleh/flake.nix b/day2/ex2-dlrow-olleh/flake.nix
new file mode 100755
index 0000000..bfaca96
--- /dev/null
+++ b/day2/ex2-dlrow-olleh/flake.nix
@@ -0,0 +1,20 @@
+{
+ inputs = {
+ nixpkgs.url = "github:nixos/nixpkgs";
+ bionix.url = "github:papenfusslab/bionix";
+ flake-utils.url = "github:numtide/flake-utils";
+ };
+
+ outputs = {
+ self,
+ nixpkgs,
+ bionix,
+ flake-utils,
+ }:
+ flake-utils.lib.eachDefaultSystem (system: let
+ pkgs = import nixpkgs {inherit system;};
+ bionix' = import bionix {nixpkgs = pkgs;};
+ in {
+ defaultPackage = bionix'.callBionix ./. {};
+ });
+}
diff --git a/day2/ex2-dlrow-olleh/solution.nix b/day2/ex2-dlrow-olleh/solution.nix
new file mode 100644
index 0000000..ab235fe
--- /dev/null
+++ b/day2/ex2-dlrow-olleh/solution.nix
@@ -0,0 +1,14 @@
+{bionix}:
+with bionix; let
+ hello-world = callBionix ../ex1-hello-world {};
+
+ rev = input:
+ stage {
+ name = "rev";
+ buildInputs = [pkgs.toybox];
+ buildCommand = ''
+ rev < ${input} > $out
+ '';
+ };
+in
+ rev (rev hello-world)