From 4e71791f46031ed248030efed90b6d5ed53ae50c Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Fri, 7 Oct 2022 14:58:01 +1100 Subject: add ex3 --- day2/ex3-R/default.nix | 42 ++++++++++++++++++++++++++++++++++++++++++ day2/ex3-R/flake.lock | 1 + day2/ex3-R/flake.nix | 20 ++++++++++++++++++++ day2/ex3-R/script.R | 16 ++++++++++++++++ day2/ex3-R/solution.nix | 19 +++++++++++++++++++ 5 files changed, 98 insertions(+) create mode 100644 day2/ex3-R/default.nix create mode 120000 day2/ex3-R/flake.lock create mode 100755 day2/ex3-R/flake.nix create mode 100644 day2/ex3-R/script.R create mode 100644 day2/ex3-R/solution.nix diff --git a/day2/ex3-R/default.nix b/day2/ex3-R/default.nix new file mode 100644 index 0000000..614c24f --- /dev/null +++ b/day2/ex3-R/default.nix @@ -0,0 +1,42 @@ +/* +This example tries to demonstrate that even very simple workflows (this +is only really one computational stage) can benefit from some formalisation +to provide reproducibility. We borrow part of the edgeR example available from +https://ucdavis-bioinformatics-training.github.io/2018-September-Bioinformatics-Prerequisites/friday/limma_biomart_vignettes.html +and specify it in BioNix so that it's easily reproducible. + +One key learning goal in this exercise is to understand that Nix only +allows _inputs_ to be referenced during the execution of a build to +prevent side effects from creeping in. In particular, internet access is +forbidden when the build sandbox is enabled (default, but not on Milton +for technical reasons), meaning data cannot be fetched as part of a +build as the original example does. + +We therefore fetch the count input as a separate stage and Nix will take +care of downloading it for us. The caveate is that the content of things +fetched from the internet must be verified to give reproducibility. Nix +does this through hashing. + +Goal: fill out the below to specify required R packages, execute the +build and observe the hash collision. Update the hash and see if the +build now completes successfully. +*/ +{bionix}: +with bionix; let + R = pkgs.rWrapper.override {packages = with pkgs.rPackages; [];}; + + counts = pkgs.fetchurl { + url = "https://raw.githubusercontent.com/ucdavis-bioinformatics-training/2018-September-Bioinformatics-Prerequisites/master/friday/counts.tsv"; + sha256 = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="; + }; +in + stage { + inherit counts; + name = "r-ex"; + + buildInputs = [R]; + + buildCommand = '' + Rscript ${./script.R} + ''; + } diff --git a/day2/ex3-R/flake.lock b/day2/ex3-R/flake.lock new file mode 120000 index 0000000..981422e --- /dev/null +++ b/day2/ex3-R/flake.lock @@ -0,0 +1 @@ +../../common/flake.lock \ No newline at end of file diff --git a/day2/ex3-R/flake.nix b/day2/ex3-R/flake.nix new file mode 100755 index 0000000..bfaca96 --- /dev/null +++ b/day2/ex3-R/flake.nix @@ -0,0 +1,20 @@ +{ + inputs = { + nixpkgs.url = "github:nixos/nixpkgs"; + bionix.url = "github:papenfusslab/bionix"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { + self, + nixpkgs, + bionix, + flake-utils, + }: + flake-utils.lib.eachDefaultSystem (system: let + pkgs = import nixpkgs {inherit system;}; + bionix' = import bionix {nixpkgs = pkgs;}; + in { + defaultPackage = bionix'.callBionix ./. {}; + }); +} diff --git a/day2/ex3-R/script.R b/day2/ex3-R/script.R new file mode 100644 index 0000000..fcbf6e1 --- /dev/null +++ b/day2/ex3-R/script.R @@ -0,0 +1,16 @@ +library(edgeR) + +# It's easier to read environment variables than parse command line arguments +counts <- read.delim(Sys.getenv("counts"), row.names=1) +out <- Sys.getenv("out") + +group <- rep(c("A", "B"), each = 2) +design <- model.matrix(~group) +dge <- DGEList(counts=counts) +keep <- filterByExpr(dge, design) +dge <- dge[keep,,keep.lib.sizes=FALSE] +dge <- calcNormFactors(dge) +logCPM <- cpm(dge, log=TRUE, prior.count=3) +fit <- lmFit(logCPM, design) +fit <- eBayes(fit, trend=TRUE) +write.table(topTable(fit, coef=ncol(design)), file = out) diff --git a/day2/ex3-R/solution.nix b/day2/ex3-R/solution.nix new file mode 100644 index 0000000..7d40f88 --- /dev/null +++ b/day2/ex3-R/solution.nix @@ -0,0 +1,19 @@ +{bionix}: +with bionix; let + R = pkgs.rWrapper.override {packages = with pkgs.rPackages; [edgeR];}; + + counts = pkgs.fetchurl { + url = "https://raw.githubusercontent.com/ucdavis-bioinformatics-training/2018-September-Bioinformatics-Prerequisites/master/friday/counts.tsv"; + sha256 = "sha256-ZmZ+vC4mKnmZKVJqbnEujDngwnSTZAxvQaZaNClUUWE="; + }; +in + stage { + inherit counts; + name = "r-ex"; + + buildInputs = [R]; + + buildCommand = '' + Rscript ${./script.R} + ''; + } -- cgit v1.2.3