diff options
Diffstat (limited to 'slides.tex')
-rw-r--r-- | slides.tex | 119 |
1 files changed, 75 insertions, 44 deletions
@@ -5,7 +5,7 @@ \usefonttheme{professionalfonts} \setbeamerfont{footnote}{size= \tiny} - \usepackage{unicode-math} +\usepackage{unicode-math} \usepackage{microtype} @@ -29,7 +29,6 @@ \DeclareMathOperator{\alr}{alr} \DeclareMathOperator{\clr}{clr} - \usepackage[natbib=true,url=false,style=verbose-ibid]{biblatex} \addbibresource{slides.bib} \AtBeginBibliography{\small} @@ -167,20 +166,30 @@ \begin{frame}{Deep Mutational Scanning: Integration issues} \begin{enumerate} - \item Scores calculated a variety of ways, e.g., Rubin et al. - \footfullcite{Rubin2017}: - \[L_{v,t}=\log\left(\frac{(c_{v,t}+\frac12)(c_{wt,0}+\frac12)}{(c_{v,0}+\frac12)(c_{wt,t}+\frac12)}\right) \] \item Assays can measure different properties \item Numerous different experimental designs + \item Scores calculated a variety of ways, e.g., Rubin et al. + \footfullcite{Rubin2017}: + \[L_{v,t}=\log\left(\frac{(c_{v,t}+\frac12)(c_{wt,0}+\frac12)}{(c_{v,0}+\frac12)(c_{wt,t}+\frac12)}\right) \] \end{enumerate} \end{frame} + \begin{frame}{Representational learning on + \ac{dms} data} For a given protein: + \begin{itemize} + \item Learn a representation of the available + \ac{dms} data + \item unsupervised to deal with varying designs + \item work on counts not scores + \end{itemize} + \end{frame} + \begin{frame}{Compositional simplex} \begin{columns}[T] \begin{column}{.63 \textwidth} - \begin{definition}[Compositional data] Data \(X \in \R^{n \times d}\) is compositional if rows \(\bx_i\) are in the simplex - \[S^d=\{\,\bx \in \R^d : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\} \] + \begin{definition}[Compositional data] Data \(X \in \R_{\geq 0}^{n \times d}\) is compositional if rows \(\bx_i\) are in the simplex + \[S^d=\{\,\bx \in \R^d_{\geq 0} : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\} \] for constant \(\kappa > 0\). \end{definition} \end{column} @@ -201,10 +210,10 @@ \begin{frame}{Isomorphisms to Euclidean vector spaces} The simplex forms a \(d-1\) dimensional Euclidean vector space \footfullcite{Aitchison1982}: \begin{definition}[\ac{alr}] - \[\alr_i(\bx) = \log \frac{x_i}{x_0} \] + \[\alr_i(\bx) = \log \frac{x_i}{x_0} \] \end{definition} \begin{definition}[\ac{clr}] - \[\clr_i(\bx) = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}} \] + \[\clr_i(\bx) = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}} \] \end{definition} \end{frame} @@ -220,22 +229,14 @@ \ac{pca} \end{enumerate} \end{block} + \pause \begin{block}{Problems} \begin{itemize} \item Zeros: \begin{enumerate} - \item $\log(0)$ undefined + \item \(\log(0)\) undefined \(\Rightarrow\) can't handle unobserved components \item geometric mean is \(0\) \(\Rightarrow\) \ac{clr} is undefined - \item - \ac{alr} is undefined for unobserved components in the ref. - \end{enumerate} - \item Interpretation: - \begin{enumerate} - \item - \ac{alr} is not an isometry - \item - \ac{clr} is degenerate \end{enumerate} \end{itemize} \end{block} @@ -243,36 +244,36 @@ \begin{frame}{Traditional \ac{pca}} Given \(\X\in \R^{n\times d}\) minimise loss - \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}}\] + \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}} \] s.t. \(\V \in \R^{n \times k}\), \(\A \in \R^{k \times d}\), and \(\V^\intercal \V = \I\). - Has been generalised to exponential families + \pause Has been generalised to exponential families \footfullcite{collins2001generalization} via Bregman divergences \footfullcite{Amari2016-ua}. \end{frame} \begin{frame}{Exponential family \ac{pca}} - \begin{definition}[Bregman Divergence] Let \(\varphi \colon \R^d \to \R\) be a smooth ($C^1$) convex function on convex set \(\Omega\). + \begin{definition}[Bregman Divergence] Let \(\varphi \colon \R^d \to \R\) be a differentiable convex function. The Bregman divergence \(D_\varphi\) with generator \(\varphi\) is - \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle. \] + \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle. \] \end{definition} - Denote the convex conjugate of \(\varphi\) as \(\varphi^*(\bu) \triangleq \sup_\bv\left\{\langle \bu,\bv\rangle-\varphi(\bv)\right\}\). + \pause Denote the convex conjugate of \(\varphi\) as \(\varphi^*(\bu) \triangleq \sup_\bv\left\{\langle \bu,\bv\rangle-\varphi(\bv)\right\}\). The exponential family \ac{pca} is then given by minimising loss - \[\ell_{\varphi} \triangleq D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right) \] + \[\ell_{\varphi} \triangleq D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right) \] under the same constraints as previously, approximating \(\X \sim \nabla\varphi^*\left(\V\A\right)\). \end{frame} \begin{frame}{Aitchison's simplex and exponential \ac{pca}} Aitchison's log-transformation is a dual affine coordinate space made explicit with - \[\varphi(z) = z\log(z) - z \Leftrightarrow \varphi^*(z) = e^z,\] + \[\varphi(z) = z\log(z) - z \Leftrightarrow \varphi^*(z) = e^z, \] but what about normalisation? - Consider + \pause Consider \ac{alr}: - \[\alr(\bx) \triangleq x_0 \sum_{i=1}^d\varphi\left(\frac{x_i}{x_0}\right) \Leftrightarrow \alr^*(\bx) = x_0\sum_{i=1}^d e^{\frac{x_i}{x_0}} \] + \[\alr(\bx) \triangleq x_0 \sum_{i=1}^d\varphi\left(\frac{x_i}{x_0}\right) \Leftrightarrow \alr^*(\bx) = x_0\sum_{i=1}^d e^{\frac{x_i}{x_0}} \] \end{frame} @@ -280,9 +281,9 @@ \begin{theorem}[Scaled Bregman \footfullcite{nock2016scaled}] Let \(\varphi \colon \mathcal{X} \to \R\) be convex differentiable and \(g \colon \mathcal{X} \to \R\) be differentiable. Then - \[g(\bx)\cdot D_\varphi\left(\frac{\bx}{g(\bx)}\,\middle\Vert\,\frac{\by}{g(\by)}\right) = D_{\breve{\varphi}}\left(\bx\,\middle\Vert\,\by\right) \] + \[D_{\breve{\varphi}}\left(\bx\,\middle\Vert\,\by\right) = g(\bx)\cdot D_\varphi\left(\frac{\bx}{g(\bx)}\,\middle\Vert\,\frac{\by}{g(\by)}\right) \] where - \[\breve{\varphi} \triangleq g(\bx) \cdot \varphi\left(\frac{x}{g(\bx)}\right)\] + \[\breve{\varphi} \triangleq g(\bx) \cdot \varphi\left(\frac{x}{g(\bx)}\right) \] \end{theorem} Avalos et al. @@ -291,11 +292,16 @@ \ac{clr} recently. \end{frame} - \begin{frame}{Medians instead of means} - Zeros still a problem, as geometric mean is $0$. Instead, use median as gague - function. + \begin{frame}{\textsc{Clr} undefined if any component is unobserved} + \begin{itemize} + \item Zeros still a problem for + \ac{clr} as geometric mean is \(0\). + \item[\(\Rightarrow\)] use median as gague function. + \end{itemize} \end{frame} + \section{Experiments} + \begin{frame}{Activation-Induced Deaminase \footfullcite{Gajula2014}} \begin{tikzpicture}[remember picture,overlay] @@ -356,17 +362,42 @@ \end{frame} \begin{frame}{\textsc{Brca1}: Positional effects} - \begin{columns}[T] - \begin{column}{.4\textwidth} - \[\V\A+\U^\intercal\Q\PP\] - where $\U \in \R^n$, $\Q \in \R^l$, $\PP \in \mathbb{2}^{l\times d}$ - \end{column}\hfill - \begin{column}{.58\textwidth} - \begin{tikzpicture} - \node[scale=.45]{\input{position.tikz}}; - \end{tikzpicture} - \end{column} - \end{columns} + \begin{columns}[T] + \begin{column}{.4 + \textwidth} + \vspace{1cm} + \[\V\A+\U^\intercal\Q\PP \] + where \(\U \in \R^n\), \(\Q \in \R^l\), \(\PP \in \mathbb{2}^{l\times d}\) + \end{column} + \hfill + \begin{column}{.58 + \textwidth} + \begin{tikzpicture} + \node[scale=.45]{\input{position.tikz}}; + \end{tikzpicture} + \end{column} + \end{columns} + \end{frame} + + \begin{frame}{Acknowledgements} + \begin{columns}[T] + \begin{column}{.4 + \textwidth} + \textbf{Papenfuss lab} + \begin{itemize} + \item Tony Papenfuss + \item + \textit{Alan Rubin} + \item + \textit{Matthew Wakefield} + \end{itemize} + \end{column} + \hfill + \begin{column}{.4 + \textwidth} + \textbf{Stafford Fox medical research foundation} + \end{column} + \end{columns} \end{frame} \end{document} |