From ac3a4e0363c1fde0f1dbff75b95aa27d9acd5e5e Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Mon, 10 Oct 2022 13:22:03 +1100 Subject: insert exercise 4 --- day2/ex4-bwa/default.nix | 52 ---------------------- day2/ex4-bwa/flake.lock | 1 - day2/ex4-bwa/flake.nix | 20 --------- day2/ex4-bwa/solution.nix | 19 -------- day2/ex4-scanpy/default.nix | 72 ++++++++++++++++++++++++++++++ day2/ex4-scanpy/flake.lock | 103 +++++++++++++++++++++++++++++++++++++++++++ day2/ex4-scanpy/flake.nix | 38 ++++++++++++++++ day2/ex4-scanpy/solution.nix | 42 ++++++++++++++++++ day2/ex5-bwa/default.nix | 52 ++++++++++++++++++++++ day2/ex5-bwa/flake.lock | 1 + day2/ex5-bwa/flake.nix | 20 +++++++++ day2/ex5-bwa/solution.nix | 19 ++++++++ 12 files changed, 347 insertions(+), 92 deletions(-) delete mode 100644 day2/ex4-bwa/default.nix delete mode 120000 day2/ex4-bwa/flake.lock delete mode 100755 day2/ex4-bwa/flake.nix delete mode 100644 day2/ex4-bwa/solution.nix create mode 100644 day2/ex4-scanpy/default.nix create mode 100755 day2/ex4-scanpy/flake.lock create mode 100755 day2/ex4-scanpy/flake.nix create mode 100644 day2/ex4-scanpy/solution.nix create mode 100644 day2/ex5-bwa/default.nix create mode 120000 day2/ex5-bwa/flake.lock create mode 100755 day2/ex5-bwa/flake.nix create mode 100644 day2/ex5-bwa/solution.nix diff --git a/day2/ex4-bwa/default.nix b/day2/ex4-bwa/default.nix deleted file mode 100644 index 9c8356a..0000000 --- a/day2/ex4-bwa/default.nix +++ /dev/null @@ -1,52 +0,0 @@ -/* -BioNix is a thin wrapper over Nix: there is not much functionality -required for pipelining that is not already present in the base build -engine. As such, the focus is on providing an interface that is -convenient for specifying common bioinformatics workflows. To this end, -BioNix provides a library of tools to help simplify the specification of -common bioinformatics pipelines, with a notable focus on genomics tools. -You can see the available tools at -https://github.com/PapenfussLab/bionix/tree/master/tools. - -This exercise aims to demonstrate how to both use a tool available in -BioNix on some input data and how to chain them together. We will do a -simple alignment with BWA on some simulated reads from a bacterial -genome. The BWA tool is provided by -https://github.com/PapenfussLab/bionix/blob/master/tools/bwa.nix and as -you can see there are alignment functions for alignment with BWA/BWA2, -as well as corresponding index functions for indexing a reference -genome. Don't worry about indexing, this will be handled automatically, -you only have to declare you want an alignment and what genome and the -index will be generated if needed. - -Exercise: - -1. Sample data along with a reference will be fetched from github. As -before, hashes of the content must be known. Fill in the hashes to fully -specify the inputs. - -2. With the hashes in place, the expression should evaluate and BWA -should run. Try swapping BWA out with some of the other available -aligners in BioNix (e.g., bowtie, hisat2, minimap2, whisper). - -3. Aligners produce *unsorted* output, but co-ordinate sorted alignments -are usually desired as they are indexable by position. Pass the aligned -output to the samtools sort function to sort the alignments into co- -ordinate order. -*/ -{bionix}: -with bionix; let - input = { - input1 = fetchFastQ { - url = "https://raw.githubusercontent.com/PapenfussLab/bionix/bac9248a5e08e8afdf5485a6e27cfe72e1ca5090/examples/sample1-1.fq"; - }; - input2 = fetchFastQ { - url = "https://raw.githubusercontent.com/PapenfussLab/bionix/bac9248a5e08e8afdf5485a6e27cfe72e1ca5090/examples/sample1-2.fq"; - }; - }; - - ref = fetchFastA { - url = "https://raw.githubusercontent.com/PapenfussLab/bionix/bac9248a5e08e8afdf5485a6e27cfe72e1ca5090/examples/ref.fa"; - }; -in - bwa.align {} input diff --git a/day2/ex4-bwa/flake.lock b/day2/ex4-bwa/flake.lock deleted file mode 120000 index 981422e..0000000 --- a/day2/ex4-bwa/flake.lock +++ /dev/null @@ -1 +0,0 @@ -../../common/flake.lock \ No newline at end of file diff --git a/day2/ex4-bwa/flake.nix b/day2/ex4-bwa/flake.nix deleted file mode 100755 index bfaca96..0000000 --- a/day2/ex4-bwa/flake.nix +++ /dev/null @@ -1,20 +0,0 @@ -{ - inputs = { - nixpkgs.url = "github:nixos/nixpkgs"; - bionix.url = "github:papenfusslab/bionix"; - flake-utils.url = "github:numtide/flake-utils"; - }; - - outputs = { - self, - nixpkgs, - bionix, - flake-utils, - }: - flake-utils.lib.eachDefaultSystem (system: let - pkgs = import nixpkgs {inherit system;}; - bionix' = import bionix {nixpkgs = pkgs;}; - in { - defaultPackage = bionix'.callBionix ./. {}; - }); -} diff --git a/day2/ex4-bwa/solution.nix b/day2/ex4-bwa/solution.nix deleted file mode 100644 index 79fc7dc..0000000 --- a/day2/ex4-bwa/solution.nix +++ /dev/null @@ -1,19 +0,0 @@ -{bionix}: -with bionix; let - input = { - input1 = fetchFastQ { - url = "https://raw.githubusercontent.com/PapenfussLab/bionix/master/examples/sample1-1.fq"; - sha256 = "sha256-qE6s8hKowiz3mvCq8/7xAzUz77xG9rAcsI2E50xMAk4="; - }; - input2 = fetchFastQ { - url = "https://raw.githubusercontent.com/PapenfussLab/bionix/master/examples/sample1-2.fq"; - sha256 = "sha256-s02R49HX/qeJp4t/eZwsKwV9D07uLGId8CEpU2dB8zM="; - }; - }; - - ref = fetchFastA { - url = "https://raw.githubusercontent.com/PapenfussLab/bionix/master/examples/ref.fa"; - sha256 = "sha256-V3zqOJFuGtukDRQttK/pGfKofgOlKrridHaWYhGGyWs="; - }; -in - samtools.sort {} (bwa.align {inherit ref;} input) diff --git a/day2/ex4-scanpy/default.nix b/day2/ex4-scanpy/default.nix new file mode 100644 index 0000000..4d950cf --- /dev/null +++ b/day2/ex4-scanpy/default.nix @@ -0,0 +1,72 @@ +/* +This exercise is similar to the previous one, only in python instead of +R for those who are more familar with python. For this example, we will +make the scanpy tutorial notebook[1] reproducible. + +Unlike the R ecosysytem, Python dependency management is significantly +more difficult. There is some tooling available for building +reproducible Python environments, most notably mach-nix[2]. We will use +this to provide an environment containing jupyter (for running the +notebook) and scanpy; unfortunately scanpy has some unresolved bugs[2] +and we will avoid them by pinning the anndata dependency to a known +working version. + +This expression sets up the build directory to match the layout assumed +by the notebook. Note that it also explicitly sets a Numba cache +directory to the build directory: during a build the only writable +location are the output paths allocated in the nix store and the +(temporary) build directory. Jupyter is executed to convert the +notebook into a html file, and the output is copied to the store along +with some ancillary h5 blobs produced by the notebook. + +Goal: As in exercise 3, the data cannot be fetched during execution as +internet access is not permitted. Furthermore, the notebook itself also +has to be retrieved. Fill out the required hashes for fetching these two +pieces of data. Run the build, and review the output in a browser (e.g., +with `chromium ./result`). + +1: https://scanpy-tutorials.readthedocs.io/en/latest/pbmc3k.html +2: https://github.com/DavHau/mach-nix +*/ +{ + bionix, + mach-nix, +}: +with bionix; let + python = mach-nix.mkPython { + requirements = '' + jupyterlab + scanpy + anndata=0.7.8 + leidenalg + ''; + }; +in + stage { + name = "scanpy-tutorial.html"; + buildInputs = [python]; + outputs = ["out" "results"]; + + src = pkgs.fetchurl { + url = "http://cf.10xgenomics.com/samples/cell-exp/1.1.0/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz"; + sha256 = "sha256-hH1uvZoeyado8r5+QMpCy/516+ttdqTCQWcEFpncKLU="; + }; + + notebook = pkgs.fetchurl { + url = "https://github.com/scverse/scanpy-tutorials/raw/532f755ac31d9baf00116a44243b73174765a6a6/pbmc3k.ipynb"; + sha256 = "sha256-mwIPHKAsDd1F4F9fNnAfrapBehBN3jjEsrLGsI37Igg="; + }; + + buildCommand = '' + export NUMBA_CACHE_DIR=$TMPDIR + cp $notebook notebook.ipynb + mkdir data + tar -zxf $src -C data + mkdir write + jupyter nbconvert --execute --to html ./notebook.ipynb + cp notebook.html $out + cp -r write $results + ''; + + stripStorePaths = false; + } diff --git a/day2/ex4-scanpy/flake.lock b/day2/ex4-scanpy/flake.lock new file mode 100755 index 0000000..eb5a579 --- /dev/null +++ b/day2/ex4-scanpy/flake.lock @@ -0,0 +1,103 @@ +{ + "nodes": { + "bionix": { + "locked": { + "lastModified": 1664157112, + "narHash": "sha256-llJ4nRFxMhpy+tEywH1qzwcQjIQu5DCs6z2sIgel31w=", + "owner": "papenfusslab", + "repo": "bionix", + "rev": "81cfa50e6b345942951b68eac0e184ea025f2ae4", + "type": "github" + }, + "original": { + "owner": "papenfusslab", + "repo": "bionix", + "type": "github" + } + }, + "flake-utils": { + "locked": { + "lastModified": 1659877975, + "narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "mach-nix": { + "inputs": { + "flake-utils": [ + "flake-utils" + ], + "nixpkgs": [ + "nixpkgs" + ], + "pypi-deps-db": [ + "pypi-deps-db" + ] + }, + "locked": { + "lastModified": 1654084003, + "narHash": "sha256-j/XrVVistvM+Ua+0tNFvO5z83isL+LBgmBi9XppxuKA=", + "owner": "DavHau", + "repo": "mach-nix", + "rev": "7e14360bde07dcae32e5e24f366c83272f52923f", + "type": "github" + }, + "original": { + "id": "mach-nix", + "ref": "3.5.0", + "type": "indirect" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1659446231, + "narHash": "sha256-hekabNdTdgR/iLsgce5TGWmfIDZ86qjPhxDg/8TlzhE=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "eabc38219184cc3e04a974fe31857d8e0eac098d", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "release-21.11", + "repo": "nixpkgs", + "type": "github" + } + }, + "pypi-deps-db": { + "flake": false, + "locked": { + "lastModified": 1665346953, + "narHash": "sha256-zU5SVQhz+t9W4pMm8LsN7tBTFMF9cOcO7kfzmoIPmRo=", + "owner": "DavHau", + "repo": "pypi-deps-db", + "rev": "6cb0a565a31ae61a61aff6f71dbdf2ee5bba8076", + "type": "github" + }, + "original": { + "owner": "DavHau", + "repo": "pypi-deps-db", + "type": "github" + } + }, + "root": { + "inputs": { + "bionix": "bionix", + "flake-utils": "flake-utils", + "mach-nix": "mach-nix", + "nixpkgs": "nixpkgs", + "pypi-deps-db": "pypi-deps-db" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/day2/ex4-scanpy/flake.nix b/day2/ex4-scanpy/flake.nix new file mode 100755 index 0000000..9587213 --- /dev/null +++ b/day2/ex4-scanpy/flake.nix @@ -0,0 +1,38 @@ +{ + inputs = { + nixpkgs.url = "github:nixos/nixpkgs/release-21.11"; + bionix.url = "github:papenfusslab/bionix"; + flake-utils.url = "github:numtide/flake-utils"; + + mach-nix = { + url = "mach-nix/3.5.0"; + inputs = { + nixpkgs.follows = "nixpkgs"; + flake-utils.follows = "flake-utils"; + pypi-deps-db.follows = "pypi-deps-db"; + }; + }; + + pypi-deps-db = { + url = "github:DavHau/pypi-deps-db"; + flake = false; + }; + }; + + outputs = { + self, + nixpkgs, + bionix, + flake-utils, + mach-nix, + ... + }: + flake-utils.lib.eachDefaultSystem (system: let + pkgs = import nixpkgs { + inherit system; + }; + bionix' = import bionix {nixpkgs = pkgs;}; + in { + defaultPackage = bionix'.callBionix ./. {mach-nix = mach-nix.lib."${system}";}; + }); +} diff --git a/day2/ex4-scanpy/solution.nix b/day2/ex4-scanpy/solution.nix new file mode 100644 index 0000000..dcc2037 --- /dev/null +++ b/day2/ex4-scanpy/solution.nix @@ -0,0 +1,42 @@ +{ + bionix, + mach-nix, +}: +with bionix; let + python = mach-nix.mkPython { + requirements = '' + jupyterlab + scanpy + anndata=0.7.8 + leidenalg + ''; + }; +in + stage { + name = "scanpy-tutorial.html"; + buildInputs = [python]; + outputs = ["out" "results"]; + + src = pkgs.fetchurl { + url = "http://cf.10xgenomics.com/samples/cell-exp/1.1.0/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz"; + sha256 = "sha256-hH1uvZoeyado8r5+QMpCy/516+ttdqTCQWcEFpncKLU="; + }; + + notebook = pkgs.fetchurl { + url = "https://github.com/scverse/scanpy-tutorials/raw/532f755ac31d9baf00116a44243b73174765a6a6/pbmc3k.ipynb"; + sha256 = "sha256-mwIPHKAsDd1F4F9fNnAfrapBehBN3jjEsrLGsI37Igg="; + }; + + buildCommand = '' + export NUMBA_CACHE_DIR=$TMPDIR + cp $notebook notebook.ipynb + mkdir data + tar -zxf $src -C data + mkdir write + jupyter nbconvert --execute --to html ./notebook.ipynb + cp notebook.html $out + cp -r write $results + ''; + + stripStorePaths = false; + } diff --git a/day2/ex5-bwa/default.nix b/day2/ex5-bwa/default.nix new file mode 100644 index 0000000..9c8356a --- /dev/null +++ b/day2/ex5-bwa/default.nix @@ -0,0 +1,52 @@ +/* +BioNix is a thin wrapper over Nix: there is not much functionality +required for pipelining that is not already present in the base build +engine. As such, the focus is on providing an interface that is +convenient for specifying common bioinformatics workflows. To this end, +BioNix provides a library of tools to help simplify the specification of +common bioinformatics pipelines, with a notable focus on genomics tools. +You can see the available tools at +https://github.com/PapenfussLab/bionix/tree/master/tools. + +This exercise aims to demonstrate how to both use a tool available in +BioNix on some input data and how to chain them together. We will do a +simple alignment with BWA on some simulated reads from a bacterial +genome. The BWA tool is provided by +https://github.com/PapenfussLab/bionix/blob/master/tools/bwa.nix and as +you can see there are alignment functions for alignment with BWA/BWA2, +as well as corresponding index functions for indexing a reference +genome. Don't worry about indexing, this will be handled automatically, +you only have to declare you want an alignment and what genome and the +index will be generated if needed. + +Exercise: + +1. Sample data along with a reference will be fetched from github. As +before, hashes of the content must be known. Fill in the hashes to fully +specify the inputs. + +2. With the hashes in place, the expression should evaluate and BWA +should run. Try swapping BWA out with some of the other available +aligners in BioNix (e.g., bowtie, hisat2, minimap2, whisper). + +3. Aligners produce *unsorted* output, but co-ordinate sorted alignments +are usually desired as they are indexable by position. Pass the aligned +output to the samtools sort function to sort the alignments into co- +ordinate order. +*/ +{bionix}: +with bionix; let + input = { + input1 = fetchFastQ { + url = "https://raw.githubusercontent.com/PapenfussLab/bionix/bac9248a5e08e8afdf5485a6e27cfe72e1ca5090/examples/sample1-1.fq"; + }; + input2 = fetchFastQ { + url = "https://raw.githubusercontent.com/PapenfussLab/bionix/bac9248a5e08e8afdf5485a6e27cfe72e1ca5090/examples/sample1-2.fq"; + }; + }; + + ref = fetchFastA { + url = "https://raw.githubusercontent.com/PapenfussLab/bionix/bac9248a5e08e8afdf5485a6e27cfe72e1ca5090/examples/ref.fa"; + }; +in + bwa.align {} input diff --git a/day2/ex5-bwa/flake.lock b/day2/ex5-bwa/flake.lock new file mode 120000 index 0000000..981422e --- /dev/null +++ b/day2/ex5-bwa/flake.lock @@ -0,0 +1 @@ +../../common/flake.lock \ No newline at end of file diff --git a/day2/ex5-bwa/flake.nix b/day2/ex5-bwa/flake.nix new file mode 100755 index 0000000..bfaca96 --- /dev/null +++ b/day2/ex5-bwa/flake.nix @@ -0,0 +1,20 @@ +{ + inputs = { + nixpkgs.url = "github:nixos/nixpkgs"; + bionix.url = "github:papenfusslab/bionix"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { + self, + nixpkgs, + bionix, + flake-utils, + }: + flake-utils.lib.eachDefaultSystem (system: let + pkgs = import nixpkgs {inherit system;}; + bionix' = import bionix {nixpkgs = pkgs;}; + in { + defaultPackage = bionix'.callBionix ./. {}; + }); +} diff --git a/day2/ex5-bwa/solution.nix b/day2/ex5-bwa/solution.nix new file mode 100644 index 0000000..79fc7dc --- /dev/null +++ b/day2/ex5-bwa/solution.nix @@ -0,0 +1,19 @@ +{bionix}: +with bionix; let + input = { + input1 = fetchFastQ { + url = "https://raw.githubusercontent.com/PapenfussLab/bionix/master/examples/sample1-1.fq"; + sha256 = "sha256-qE6s8hKowiz3mvCq8/7xAzUz77xG9rAcsI2E50xMAk4="; + }; + input2 = fetchFastQ { + url = "https://raw.githubusercontent.com/PapenfussLab/bionix/master/examples/sample1-2.fq"; + sha256 = "sha256-s02R49HX/qeJp4t/eZwsKwV9D07uLGId8CEpU2dB8zM="; + }; + }; + + ref = fetchFastA { + url = "https://raw.githubusercontent.com/PapenfussLab/bionix/master/examples/ref.fa"; + sha256 = "sha256-V3zqOJFuGtukDRQttK/pGfKofgOlKrridHaWYhGGyWs="; + }; +in + samtools.sort {} (bwa.align {inherit ref;} input) -- cgit v1.2.3