From 4f427f345fb703c5db7ac01eb440a69cce09872b Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Mon, 21 Nov 2022 09:39:44 +1100 Subject: init --- .envrc | 1 + .gitignore | 15 +++++++++++ flake.lock | 42 +++++++++++++++++++++++++++++++ flake.nix | 20 +++++++++++++++ slides.bib | 13 ++++++++++ slides.tex | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 176 insertions(+) create mode 100644 .envrc create mode 100644 .gitignore create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 slides.bib create mode 100644 slides.tex diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e06371d --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +result* +slides.pdf +slides.aux +slides.bbl +slides.bcf +slides.blg +slides.fdb_latexmk +slides.fls +slides.log +slides.nav +slides.out +slides.run.xml +slides.snm +slides.toc +.direnv diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..e43d86b --- /dev/null +++ b/flake.lock @@ -0,0 +1,42 @@ +{ + "nodes": { + "flake-utils": { + "locked": { + "lastModified": 1667395993, + "narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1668984324, + "narHash": "sha256-HvRTBPCVb6kudgw6qe1/D/mD4ttUBl5EQAQuv++7kSU=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "983ef7741cedf9a9f916f2c5739d1eaf0997a251", + "type": "github" + }, + "original": { + "owner": "nixos", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..42dfd89 --- /dev/null +++ b/flake.nix @@ -0,0 +1,20 @@ +{ + description = "Slides for Bioinformatics division talk 2022"; + inputs = { + nixpkgs.url = "github:nixos/nixpkgs"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { + self, + nixpkgs, + flake-utils, + }: + flake-utils.lib.eachDefaultSystem (system: let + pkgs = import nixpkgs {inherit system;}; + in { + devShell = pkgs.mkShell { + buildInputs = with pkgs; [biber pkgs.texlive.combined.scheme-full]; + }; + }); +} diff --git a/slides.bib b/slides.bib new file mode 100644 index 0000000..366242b --- /dev/null +++ b/slides.bib @@ -0,0 +1,13 @@ +@article{Aitchison1982, + doi = {10.1111/j.2517-6161.1982.tb01195.x}, + url = {https://doi.org/10.1111/j.2517-6161.1982.tb01195.x}, + year = {1982}, + month = jan, + publisher = {Wiley}, + volume = {44}, + number = {2}, + pages = {139--160}, + author = {J. Aitchison}, + title = {The Statistical Analysis of Compositional Data}, + journal = {Journal of the Royal Statistical Society: Series B (Methodological)} +} diff --git a/slides.tex b/slides.tex new file mode 100644 index 0000000..0eaed66 --- /dev/null +++ b/slides.tex @@ -0,0 +1,85 @@ +\documentclass[aspectratio=169,UKenglish]{beamer} + +\usetheme{metropolis} +\usepackage[sfdefault]{FiraSans} +\usefonttheme{professionalfonts} +\setbeamerfont{footnote}{size= + \tiny} + +\usepackage{microtype} + +\usepackage{tikz} +\usetikzlibrary{shapes} +\usetikzlibrary{bayesnet} +\usepackage{stmaryrd} + +\newcommand{\R}{\mathbb{R}} +\newcommand{\bx}{\mathbf{x}} +\DeclareMathOperator{\alr}{alr} +\DeclareMathOperator{\clr}{clr} + +\usepackage[natbib=true,url=false,style=verbose-ibid]{biblatex} +\addbibresource{slides.bib} +\AtBeginBibliography{\small} + +\author{Justin Bed\H{o}} +\title{Exploration of deep mutational scanning data with unsupervised methods} +\date{December 13, 2022} + +\begin{document} + + \maketitle + + \section{Deep Mutational Scanning (DMS) data} + + \begin{frame}{Deep Mutational Scanning (DMS) data} Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. + Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. + Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + \end{frame} + + \section{Compositional data} + + \begin{frame}{Basics} + \begin{definition}[Compositional data] Data \(X \in \R^{n \times d}\) is compositional if rows \(\bx_i\) are in the simplex + \[S^d=\{\,\bx \in \R^d : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\} \] + for constant \(\kappa > 0\). + \end{definition} Information is therefore given only by the ratios of components and any composition can be normalised to the standard simplex where \(\kappa = 1\) (c.f., dividing by library size). + \end{frame} + + \begin{frame}{Isomorphisms to Euclidean vector spaces} The simplex forms a \(d-1\) dimensional Euclidean vector space + \footfullcite{Aitchison1982}: + \begin{definition}[Additive logratio transform] + \[\alr(\bx)_i = \log \frac{x_i}{x_0} \] + \end{definition} + \begin{definition}[Center logratio transform] + \[\clr(\bx)_i = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}} \] + \end{definition} + \end{frame} + + \begin{frame}{PCA on DMS data} + \begin{block}{Transformation approach} + \begin{enumerate} + \item Map DMS data to Euclidean space via ALR/CLR + \item Apply standard PCA + \end{enumerate} + \end{block} + \begin{block}{Problems} + \begin{itemize} + \item Zeros: + \begin{enumerate} + \item geometric mean is \(0\) \(\Rightarrow\) CLR is undefined + \item ALR is undefined for unobserved components + \end{enumerate} + \item Interpretation: + \begin{enumerate} + \item ALR is not isometry + \item CLR is degenerate + \end{enumerate} + \end{itemize} + \end{block} + \end{frame} + + \section{Bregman divergences} + +\end{document} -- cgit v1.2.3