aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2022-10-07 14:58:01 +1100
committerJustin Bedo <cu@cua0.org>2022-10-07 15:51:34 +1100
commit4e71791f46031ed248030efed90b6d5ed53ae50c (patch)
tree4f79e33ec805b40535d4fe651674c7da851b5d9b
parentdda29ca7685d360d5428dd827d11a9e4139a0872 (diff)
add ex3
-rw-r--r--day2/ex3-R/default.nix42
l---------day2/ex3-R/flake.lock1
-rwxr-xr-xday2/ex3-R/flake.nix20
-rw-r--r--day2/ex3-R/script.R16
-rw-r--r--day2/ex3-R/solution.nix19
5 files changed, 98 insertions, 0 deletions
diff --git a/day2/ex3-R/default.nix b/day2/ex3-R/default.nix
new file mode 100644
index 0000000..614c24f
--- /dev/null
+++ b/day2/ex3-R/default.nix
@@ -0,0 +1,42 @@
+/*
+This example tries to demonstrate that even very simple workflows (this
+is only really one computational stage) can benefit from some formalisation
+to provide reproducibility. We borrow part of the edgeR example available from
+https://ucdavis-bioinformatics-training.github.io/2018-September-Bioinformatics-Prerequisites/friday/limma_biomart_vignettes.html
+and specify it in BioNix so that it's easily reproducible.
+
+One key learning goal in this exercise is to understand that Nix only
+allows _inputs_ to be referenced during the execution of a build to
+prevent side effects from creeping in. In particular, internet access is
+forbidden when the build sandbox is enabled (default, but not on Milton
+for technical reasons), meaning data cannot be fetched as part of a
+build as the original example does.
+
+We therefore fetch the count input as a separate stage and Nix will take
+care of downloading it for us. The caveate is that the content of things
+fetched from the internet must be verified to give reproducibility. Nix
+does this through hashing.
+
+Goal: fill out the below to specify required R packages, execute the
+build and observe the hash collision. Update the hash and see if the
+build now completes successfully.
+*/
+{bionix}:
+with bionix; let
+ R = pkgs.rWrapper.override {packages = with pkgs.rPackages; [];};
+
+ counts = pkgs.fetchurl {
+ url = "https://raw.githubusercontent.com/ucdavis-bioinformatics-training/2018-September-Bioinformatics-Prerequisites/master/friday/counts.tsv";
+ sha256 = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=";
+ };
+in
+ stage {
+ inherit counts;
+ name = "r-ex";
+
+ buildInputs = [R];
+
+ buildCommand = ''
+ Rscript ${./script.R}
+ '';
+ }
diff --git a/day2/ex3-R/flake.lock b/day2/ex3-R/flake.lock
new file mode 120000
index 0000000..981422e
--- /dev/null
+++ b/day2/ex3-R/flake.lock
@@ -0,0 +1 @@
+../../common/flake.lock \ No newline at end of file
diff --git a/day2/ex3-R/flake.nix b/day2/ex3-R/flake.nix
new file mode 100755
index 0000000..bfaca96
--- /dev/null
+++ b/day2/ex3-R/flake.nix
@@ -0,0 +1,20 @@
+{
+ inputs = {
+ nixpkgs.url = "github:nixos/nixpkgs";
+ bionix.url = "github:papenfusslab/bionix";
+ flake-utils.url = "github:numtide/flake-utils";
+ };
+
+ outputs = {
+ self,
+ nixpkgs,
+ bionix,
+ flake-utils,
+ }:
+ flake-utils.lib.eachDefaultSystem (system: let
+ pkgs = import nixpkgs {inherit system;};
+ bionix' = import bionix {nixpkgs = pkgs;};
+ in {
+ defaultPackage = bionix'.callBionix ./. {};
+ });
+}
diff --git a/day2/ex3-R/script.R b/day2/ex3-R/script.R
new file mode 100644
index 0000000..fcbf6e1
--- /dev/null
+++ b/day2/ex3-R/script.R
@@ -0,0 +1,16 @@
+library(edgeR)
+
+# It's easier to read environment variables than parse command line arguments
+counts <- read.delim(Sys.getenv("counts"), row.names=1)
+out <- Sys.getenv("out")
+
+group <- rep(c("A", "B"), each = 2)
+design <- model.matrix(~group)
+dge <- DGEList(counts=counts)
+keep <- filterByExpr(dge, design)
+dge <- dge[keep,,keep.lib.sizes=FALSE]
+dge <- calcNormFactors(dge)
+logCPM <- cpm(dge, log=TRUE, prior.count=3)
+fit <- lmFit(logCPM, design)
+fit <- eBayes(fit, trend=TRUE)
+write.table(topTable(fit, coef=ncol(design)), file = out)
diff --git a/day2/ex3-R/solution.nix b/day2/ex3-R/solution.nix
new file mode 100644
index 0000000..7d40f88
--- /dev/null
+++ b/day2/ex3-R/solution.nix
@@ -0,0 +1,19 @@
+{bionix}:
+with bionix; let
+ R = pkgs.rWrapper.override {packages = with pkgs.rPackages; [edgeR];};
+
+ counts = pkgs.fetchurl {
+ url = "https://raw.githubusercontent.com/ucdavis-bioinformatics-training/2018-September-Bioinformatics-Prerequisites/master/friday/counts.tsv";
+ sha256 = "sha256-ZmZ+vC4mKnmZKVJqbnEujDngwnSTZAxvQaZaNClUUWE=";
+ };
+in
+ stage {
+ inherit counts;
+ name = "r-ex";
+
+ buildInputs = [R];
+
+ buildCommand = ''
+ Rscript ${./script.R}
+ '';
+ }