From a6df2a5886383bbf1d782802bfd65fdcf4dc319f Mon Sep 17 00:00:00 2001
From: Justin Bedo <cu@cua0.org>
Date: Wed, 7 Dec 2022 15:36:01 +1100
Subject: intro slides

---
 slides.tex | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 78 insertions(+), 18 deletions(-)

(limited to 'slides.tex')

diff --git a/slides.tex b/slides.tex
index 8af2bff..f8121d9 100644
--- a/slides.tex
+++ b/slides.tex
@@ -29,7 +29,7 @@
 \addbibresource{slides.bib}
 \AtBeginBibliography{\small}
 
-%% Tikz relative positioning https://tex.stackexchange.com/questions/89588/positioning-relative-to-page-in-tikz
+% Tikz relative positioning https://tex.stackexchange.com/questions/89588/positioning-relative-to-page-in-tikz
 \makeatletter
 \def
 \parsecomma#1,#2
@@ -79,27 +79,87 @@
 \newcommand{\dms}{\ac{dms}
   \xspace}
 
+\definecolor{cb1}{HTML}{1b9e77}
+\definecolor{cb2}{HTML}{d95f02}
+\definecolor{cb3}{HTML}{7570b3}
+
 \author{Justin Bed\H{o}}
-\title{Exploration of deep mutational scanning data with unsupervised methods}
+\title{Representation learning of compositional counts: exploration of deep mutational scanning data}
 \date{December 13, 2022}
 
 \begin{document}
 
   \maketitle
 
-  \section{Deep mutational scanning data}
+  \begin{frame}{Variants of Uncertain Significance
+      \footfullcite{Liu2020}}
+    \begin{center}
+      \input{clinvar.tikz}
+    \end{center}
+  \end{frame}
 
-  \begin{frame}{\dms data} Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
-    Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-    Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
-    Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+  \begin{frame}{\dms}
+    \begin{quote} Deep mutational scanning is a method for systematically introducing mutations into a gene and then analyzing the resulting protein products to see how the changes affect the protein's function.
+    \end{quote}
+    \begin{enumerate}
+      \item Growing resource of functional data
+      \item MaveDB
+      \footfullcite{Esposito2019}
+      \footnote{\url{https://www.mavedb.org}} catalogs a number of datasets and provides easy access
+    \end{enumerate}
   \end{frame}
 
-  \section{Compositional data}
+  \begin{frame}{Deep Mutational Scanning: Overview
+      \footfullcite{Fowler2014}}
+    \begin{tikzpicture}
+      \node at (page cs:0,0.75){\(t_0\)};
+      \node at (page cs:0.53,0.75){\(t_1\)};
+      \node(a) at (page cs:-0.75,0.5){\includegraphics[width=0.3
+          \textwidth]{Protein-BRCA1.png}};
+      \node(b) at (page cs:0,0.5){\begin{tikzpicture}
+          \node[circle,draw,fill=cb1] at (page cs:-0.06,0){};
+          \node[circle,draw,fill=cb1] at (page cs:0,0){};
+          \node[circle,draw,fill=cb1] at (page cs:0.06,0){};
+          \node[circle,draw,fill=cb2] at (page cs:-0.06,0.1){};
+          \node[circle,draw,fill=cb2] at (page cs:0.06,0.1){};
+          \node[circle,draw,fill=cb2] at (page cs:0,0.1){};
+          \node[circle,draw,fill=cb3] at (page cs:-0.06,-0.1){};
+          \node[circle,draw,fill=cb3] at (page cs:0,-0.1){};
+          \node[circle,draw,fill=cb3] at (page cs:0.06,-0.1){};
+        \end{tikzpicture}};
+
+      \node(c) at (page cs:0.5,0.5){\begin{tikzpicture}
+          \node[circle,draw,fill=cb1] at (page cs:0.5,0){};
+          \node[circle,draw,fill=cb1] at (page cs:0.56,0){};
+          \node[circle,draw,fill=cb1] at (page cs:0.62,0){};
+          \node[circle,draw,fill=cb1] at (page cs:0.68,0){};
+          \node[circle,draw,fill=cb1] at (page cs:0.74,0){};
+          \node[circle,draw,fill=cb2] at (page cs:0.5,0.1){};
+          \node[circle,draw,fill=cb3] at (page cs:0.5,-0.1){};
+          \node[circle,draw,fill=cb3] at (page cs:0.56,-0.1){};
+        \end{tikzpicture}};
+
+      \node(d) at (page cs:0.2,-0.25){\includegraphics[width=0.3
+          \textwidth]{nextseq500.jpg}};
+
+      \draw[->] (a) -- (b) node[midway,above]{mutagenesis};
+      \draw[->] (b) -- (c) node[midway,above]{selection};
+      \draw[->] (b) -- (d);
+      \draw[->] (c) -- (d);
+    \end{tikzpicture}
+  \end{frame}
+
+  \begin{frame}{Deep Mutational Scanning: Integration issues}
+    \begin{enumerate}
+      \item Scores calculated a variety of ways, e.g., Rubin et al.
+      \footfullcite{Rubin2017}:
+      \[L_{v,t}=\log\left(\frac{(c_{v,t}+\frac12)(c_{wt,0}+\frac12)}{(c_{v,0}+\frac12)(c_{wt,t}+\frac12)}\right)  \]
+    \end{enumerate}
+  \end{frame}
 
   \begin{frame}{Basics}
     \begin{definition}[Compositional data] Data \(X \in \R^{n \times d}\) is compositional if rows \(\bx_i\) are in the simplex
-      \[S^d=\{\,\bx \in \R^d : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\}  \]
+      \[S^d=\{\,\bx \in \R^d : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\}    \]
       for constant \(\kappa > 0\).
     \end{definition} Information is therefore given only by the ratios of components and any composition can be normalised to the standard simplex where \(\kappa = 1\) (c.f., dividing by library size).
   \end{frame}
@@ -107,10 +167,10 @@
   \begin{frame}{Isomorphisms to Euclidean vector spaces} The simplex forms a \(d-1\) dimensional Euclidean vector space
     \footfullcite{Aitchison1982}:
     \begin{definition}[\ac{alr}]
-      \[\alr(\bx)_i = \log \frac{x_i}{x_0}  \]
+      \[\alr(\bx)_i = \log \frac{x_i}{x_0}    \]
     \end{definition}
     \begin{definition}[\ac{clr}]
-      \[\clr(\bx)_i = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}}  \]
+      \[\clr(\bx)_i = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}}    \]
     \end{definition}
   \end{frame}
 
@@ -148,7 +208,7 @@
 
   \begin{frame}{Traditional
       \ac{pca}} Given \(\X\in \R^{n\times d}\) minimise loss
-    \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}}       \]
+    \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}}         \]
     s.t.
     \(\V \in \R^{n \times k}\), \(\A \in \R^{k \times d}\), and \(\V^\intercal \V = \I\).
 
@@ -161,24 +221,24 @@
       \ac{pca}}
     \begin{definition}{Bregman Divergence} Let \(\varphi \colon \R^d \to \R\) be a smooth ($C^1$) convex function on convex set \(\Omega\).
       The Bregman divergence \(D_\varphi\) with generator \(\varphi\) is
-      \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle.       \]
+      \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle.         \]
     \end{definition}
 
     Denote the convex conjugate of \(\varphi\) as \(\varphi^*(\bu) \triangleq \sup_\bv\left\{\langle \bu,\bv\rangle-\varphi(\bv)\right\}\).
     The exponential family
     \ac{pca} is then given by minimising loss
-    \[\ell_{\varphi} \triangleq D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right)       \]
+    \[\ell_{\varphi} \triangleq D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right)         \]
     under the same constraints as previously, approximating \(\X \sim \nabla\varphi^*\left(\V\A\right)\).
   \end{frame}
 
   \begin{frame}{Aitchison's simplex and exponential
       \ac{pca}} Aitchison's log-transformation is a dual affine coordinate space made explicit with
-    \[\varphi(z) = z\log(z) - z \Leftrightarrow \varphi^*(z) = e^z,    \]
+    \[\varphi(z) = z\log(z) - z \Leftrightarrow \varphi^*(z) = e^z,      \]
     but what about normalisation?
 
     Consider
     \ac{alr}:
-    \[\alr(\bx) \triangleq x_0 \sum_{i=1}^d\varphi\left(\frac{x_i}{x_0}\right) \Leftrightarrow \alr^*(\bx) = x_0\sum_{i=1}^d e^{\frac{x_i}{x_0}}  \]
+    \[\alr(\bx) \triangleq x_0 \sum_{i=1}^d\varphi\left(\frac{x_i}{x_0}\right) \Leftrightarrow \alr^*(\bx) = x_0\sum_{i=1}^d e^{\frac{x_i}{x_0}}    \]
 
   \end{frame}
 
@@ -186,9 +246,9 @@
     \begin{theorem}{Scaled Bregman
         \footfullcite{nock2016scaled}} Let \(\varphi \colon \mathcal{X} \to \R\) be convex differentiable and \(g \colon \mathcal{X} \to \R\) be differentiable.
       Then
-      \[g(\bx)\cdot D_\varphi\left(\frac{\bx}{g(\bx)}\,\middle\Vert\,\frac{\by}{g(\by)}\right) =  D_{\breve{\varphi}}\left(\bx\,\middle\Vert\,\by\right)  \]
+      \[g(\bx)\cdot D_\varphi\left(\frac{\bx}{g(\bx)}\,\middle\Vert\,\frac{\by}{g(\by)}\right) =  D_{\breve{\varphi}}\left(\bx\,\middle\Vert\,\by\right)    \]
       where
-      \[\breve{\varphi} \triangleq g(\bx) \cdot \varphi\left(\frac{x}{g(\bx)}\right)   \]
+      \[\breve{\varphi} \triangleq g(\bx) \cdot \varphi\left(\frac{x}{g(\bx)}\right)     \]
     \end{theorem}
 
     Avalos et al.
-- 
cgit v1.2.3