summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2022-11-21 09:39:44 +1100
committerJustin Bedo <cu@cua0.org>2022-11-21 16:59:38 +1100
commit4f427f345fb703c5db7ac01eb440a69cce09872b (patch)
tree72d3d7847468db64067982e13d1c78ff018fef59
init
-rw-r--r--.envrc1
-rw-r--r--.gitignore15
-rw-r--r--flake.lock42
-rw-r--r--flake.nix20
-rw-r--r--slides.bib13
-rw-r--r--slides.tex85
6 files changed, 176 insertions, 0 deletions
diff --git a/.envrc b/.envrc
new file mode 100644
index 0000000..3550a30
--- /dev/null
+++ b/.envrc
@@ -0,0 +1 @@
+use flake
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e06371d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,15 @@
+result*
+slides.pdf
+slides.aux
+slides.bbl
+slides.bcf
+slides.blg
+slides.fdb_latexmk
+slides.fls
+slides.log
+slides.nav
+slides.out
+slides.run.xml
+slides.snm
+slides.toc
+.direnv
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..e43d86b
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,42 @@
+{
+ "nodes": {
+ "flake-utils": {
+ "locked": {
+ "lastModified": 1667395993,
+ "narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=",
+ "owner": "numtide",
+ "repo": "flake-utils",
+ "rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f",
+ "type": "github"
+ },
+ "original": {
+ "owner": "numtide",
+ "repo": "flake-utils",
+ "type": "github"
+ }
+ },
+ "nixpkgs": {
+ "locked": {
+ "lastModified": 1668984324,
+ "narHash": "sha256-HvRTBPCVb6kudgw6qe1/D/mD4ttUBl5EQAQuv++7kSU=",
+ "owner": "nixos",
+ "repo": "nixpkgs",
+ "rev": "983ef7741cedf9a9f916f2c5739d1eaf0997a251",
+ "type": "github"
+ },
+ "original": {
+ "owner": "nixos",
+ "repo": "nixpkgs",
+ "type": "github"
+ }
+ },
+ "root": {
+ "inputs": {
+ "flake-utils": "flake-utils",
+ "nixpkgs": "nixpkgs"
+ }
+ }
+ },
+ "root": "root",
+ "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..42dfd89
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,20 @@
+{
+ description = "Slides for Bioinformatics division talk 2022";
+ inputs = {
+ nixpkgs.url = "github:nixos/nixpkgs";
+ flake-utils.url = "github:numtide/flake-utils";
+ };
+
+ outputs = {
+ self,
+ nixpkgs,
+ flake-utils,
+ }:
+ flake-utils.lib.eachDefaultSystem (system: let
+ pkgs = import nixpkgs {inherit system;};
+ in {
+ devShell = pkgs.mkShell {
+ buildInputs = with pkgs; [biber pkgs.texlive.combined.scheme-full];
+ };
+ });
+}
diff --git a/slides.bib b/slides.bib
new file mode 100644
index 0000000..366242b
--- /dev/null
+++ b/slides.bib
@@ -0,0 +1,13 @@
+@article{Aitchison1982,
+ doi = {10.1111/j.2517-6161.1982.tb01195.x},
+ url = {https://doi.org/10.1111/j.2517-6161.1982.tb01195.x},
+ year = {1982},
+ month = jan,
+ publisher = {Wiley},
+ volume = {44},
+ number = {2},
+ pages = {139--160},
+ author = {J. Aitchison},
+ title = {The Statistical Analysis of Compositional Data},
+ journal = {Journal of the Royal Statistical Society: Series B (Methodological)}
+}
diff --git a/slides.tex b/slides.tex
new file mode 100644
index 0000000..0eaed66
--- /dev/null
+++ b/slides.tex
@@ -0,0 +1,85 @@
+\documentclass[aspectratio=169,UKenglish]{beamer}
+
+\usetheme{metropolis}
+\usepackage[sfdefault]{FiraSans}
+\usefonttheme{professionalfonts}
+\setbeamerfont{footnote}{size=
+ \tiny}
+
+\usepackage{microtype}
+
+\usepackage{tikz}
+\usetikzlibrary{shapes}
+\usetikzlibrary{bayesnet}
+\usepackage{stmaryrd}
+
+\newcommand{\R}{\mathbb{R}}
+\newcommand{\bx}{\mathbf{x}}
+\DeclareMathOperator{\alr}{alr}
+\DeclareMathOperator{\clr}{clr}
+
+\usepackage[natbib=true,url=false,style=verbose-ibid]{biblatex}
+\addbibresource{slides.bib}
+\AtBeginBibliography{\small}
+
+\author{Justin Bed\H{o}}
+\title{Exploration of deep mutational scanning data with unsupervised methods}
+\date{December 13, 2022}
+
+\begin{document}
+
+ \maketitle
+
+ \section{Deep Mutational Scanning (DMS) data}
+
+ \begin{frame}{Deep Mutational Scanning (DMS) data} Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
+ Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+ Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
+ Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+ \end{frame}
+
+ \section{Compositional data}
+
+ \begin{frame}{Basics}
+ \begin{definition}[Compositional data] Data \(X \in \R^{n \times d}\) is compositional if rows \(\bx_i\) are in the simplex
+ \[S^d=\{\,\bx \in \R^d : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\} \]
+ for constant \(\kappa > 0\).
+ \end{definition} Information is therefore given only by the ratios of components and any composition can be normalised to the standard simplex where \(\kappa = 1\) (c.f., dividing by library size).
+ \end{frame}
+
+ \begin{frame}{Isomorphisms to Euclidean vector spaces} The simplex forms a \(d-1\) dimensional Euclidean vector space
+ \footfullcite{Aitchison1982}:
+ \begin{definition}[Additive logratio transform]
+ \[\alr(\bx)_i = \log \frac{x_i}{x_0} \]
+ \end{definition}
+ \begin{definition}[Center logratio transform]
+ \[\clr(\bx)_i = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}} \]
+ \end{definition}
+ \end{frame}
+
+ \begin{frame}{PCA on DMS data}
+ \begin{block}{Transformation approach}
+ \begin{enumerate}
+ \item Map DMS data to Euclidean space via ALR/CLR
+ \item Apply standard PCA
+ \end{enumerate}
+ \end{block}
+ \begin{block}{Problems}
+ \begin{itemize}
+ \item Zeros:
+ \begin{enumerate}
+ \item geometric mean is \(0\) \(\Rightarrow\) CLR is undefined
+ \item ALR is undefined for unobserved components
+ \end{enumerate}
+ \item Interpretation:
+ \begin{enumerate}
+ \item ALR is not isometry
+ \item CLR is degenerate
+ \end{enumerate}
+ \end{itemize}
+ \end{block}
+ \end{frame}
+
+ \section{Bregman divergences}
+
+\end{document}