From e72ee50421716fde6646afd3a444b993413d3440 Mon Sep 17 00:00:00 2001
From: Justin Bedo <cu@cua0.org>
Date: Fri, 9 Dec 2022 10:46:11 +1100
Subject: add illustrative figures and abtract

---
 slides.tex | 99 +++++++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 76 insertions(+), 23 deletions(-)

(limited to 'slides.tex')

diff --git a/slides.tex b/slides.tex
index f8121d9..1cf5229 100644
--- a/slides.tex
+++ b/slides.tex
@@ -5,12 +5,13 @@
 \usefonttheme{professionalfonts}
 \setbeamerfont{footnote}{size=
   \tiny}
+  \usepackage{unicode-math}
 
 \usepackage{microtype}
 
 \usepackage{tikz}
-\usetikzlibrary{shapes}
-\usetikzlibrary{bayesnet}
+\usepackage{pgfplots}
+\usepgfplotslibrary{ternary}
 \usepackage{stmaryrd}
 
 \newcommand{\R}{\mathbb{R}}
@@ -22,9 +23,13 @@
 \newcommand{\V}{\mathbf{V}}
 \newcommand{\A}{\mathbf{A}}
 \newcommand{\I}{\mathbf{I}}
+\newcommand{\U}{\mathbf{u}}
+\newcommand{\Q}{\mathbf{q}}
+\newcommand{\PP}{\mathbf{P}}
 \DeclareMathOperator{\alr}{alr}
 \DeclareMathOperator{\clr}{clr}
 
+
 \usepackage[natbib=true,url=false,style=verbose-ibid]{biblatex}
 \addbibresource{slides.bib}
 \AtBeginBibliography{\small}
@@ -84,9 +89,19 @@
 \definecolor{cb3}{HTML}{7570b3}
 
 \author{Justin Bed\H{o}}
-\title{Representation learning of compositional counts: exploration of deep mutational scanning data}
+\title{Representation learning of compositional counts: an exploration of deep mutational scanning data}
 \date{December 13, 2022}
 
+% Abstract:
+
+% Deep mutational scanning data provides important functional information on the % effects of protein variants. Many different aspects of proteins can be assayed, % many different experimental designs are possible, and many different scores are % computed leading to very heterogeneous data that is difficult to integrate.
+
+% In this talk I will explore a representational learning approach on raw count % data. This technique uses recent methods combining compositional data analysis % with a generalised form of principal component analysis to infer protein % representations without specific knowledge of the experimental design or assay % type.
+
+% Bio
+
+% Dr Justin Bedő is the Stafford Fox Centenary Fellow in Bioinformatics and % Computational Biology at the Walter and Eliza Hall Institute. He studied % computer science followed by a PhD in machine learning at the Australian % National University and was awarded his doctorate in 2009. He subsequently % worked as a researcher across both academia and industry at NICTA, IBISC % (Informatique, BioInformatique, Systèmes Complexes) CNRS, and IBM Research on % machine learning methods development and applications to biology before joining % the WEHI in 2016.
+
 \begin{document}
 
   \maketitle
@@ -105,6 +120,7 @@
       \item Growing resource of functional data
       \item MaveDB
       \footfullcite{Esposito2019}
+      \unskip
       \footnote{\url{https://www.mavedb.org}} catalogs a number of datasets and provides easy access
     \end{enumerate}
   \end{frame}
@@ -153,24 +169,42 @@
     \begin{enumerate}
       \item Scores calculated a variety of ways, e.g., Rubin et al.
       \footfullcite{Rubin2017}:
-      \[L_{v,t}=\log\left(\frac{(c_{v,t}+\frac12)(c_{wt,0}+\frac12)}{(c_{v,0}+\frac12)(c_{wt,t}+\frac12)}\right)  \]
+      \[L_{v,t}=\log\left(\frac{(c_{v,t}+\frac12)(c_{wt,0}+\frac12)}{(c_{v,0}+\frac12)(c_{wt,t}+\frac12)}\right)   \]
+      \item Assays can measure different properties
+      \item Numerous different experimental designs
     \end{enumerate}
   \end{frame}
 
-  \begin{frame}{Basics}
-    \begin{definition}[Compositional data] Data \(X \in \R^{n \times d}\) is compositional if rows \(\bx_i\) are in the simplex
-      \[S^d=\{\,\bx \in \R^d : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\}    \]
-      for constant \(\kappa > 0\).
-    \end{definition} Information is therefore given only by the ratios of components and any composition can be normalised to the standard simplex where \(\kappa = 1\) (c.f., dividing by library size).
+  \begin{frame}{Compositional simplex}
+    \begin{columns}[T]
+      \begin{column}{.63
+          \textwidth}
+        \begin{definition}[Compositional data] Data \(X \in \R^{n \times d}\) is compositional if rows \(\bx_i\) are in the simplex
+          \[S^d=\{\,\bx \in \R^d : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\}     \]
+          for constant \(\kappa > 0\).
+        \end{definition}
+      \end{column}
+      \hfill
+      \begin{column}{.26
+          \textwidth}
+        \begin{tikzpicture}[scale=0.5]
+          \begin{ternaryaxis}
+            \addplot3 coordinates{(0.25,0.5,0.25)};
+            \path (0.25,0.5,0.25) coordinate (M) (1,0,0) coordinate (C) (0,1,0) coordinate (A) (0,0,1) coordinate (B);
+          \end{ternaryaxis}
+        \end{tikzpicture}
+      \end{column}
+    \end{columns}
+    \vspace{10pt} \(\Rightarrow\) Information is given only by the ratios of components and any composition can be normalised to the standard simplex where \(\kappa = 1\) (c.f., dividing by library size).
   \end{frame}
 
   \begin{frame}{Isomorphisms to Euclidean vector spaces} The simplex forms a \(d-1\) dimensional Euclidean vector space
     \footfullcite{Aitchison1982}:
     \begin{definition}[\ac{alr}]
-      \[\alr(\bx)_i = \log \frac{x_i}{x_0}    \]
+      \[\alr_i(\bx) = \log \frac{x_i}{x_0}     \]
     \end{definition}
     \begin{definition}[\ac{clr}]
-      \[\clr(\bx)_i = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}}    \]
+      \[\clr_i(\bx) = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}}     \]
     \end{definition}
   \end{frame}
 
@@ -190,6 +224,7 @@
       \begin{itemize}
         \item Zeros:
         \begin{enumerate}
+          \item $\log(0)$ undefined
           \item geometric mean is \(0\) \(\Rightarrow\)
           \ac{clr} is undefined
           \item
@@ -198,7 +233,7 @@
         \item Interpretation:
         \begin{enumerate}
           \item
-          \ac{alr} is not isometry
+          \ac{alr} is not an isometry
           \item
           \ac{clr} is degenerate
         \end{enumerate}
@@ -208,7 +243,7 @@
 
   \begin{frame}{Traditional
       \ac{pca}} Given \(\X\in \R^{n\times d}\) minimise loss
-    \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}}         \]
+    \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}}\]
     s.t.
     \(\V \in \R^{n \times k}\), \(\A \in \R^{k \times d}\), and \(\V^\intercal \V = \I\).
 
@@ -219,36 +254,35 @@
 
   \begin{frame}{Exponential family
       \ac{pca}}
-    \begin{definition}{Bregman Divergence} Let \(\varphi \colon \R^d \to \R\) be a smooth ($C^1$) convex function on convex set \(\Omega\).
+    \begin{definition}[Bregman Divergence] Let \(\varphi \colon \R^d \to \R\) be a smooth ($C^1$) convex function on convex set \(\Omega\).
       The Bregman divergence \(D_\varphi\) with generator \(\varphi\) is
-      \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle.         \]
+      \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle.          \]
     \end{definition}
-
     Denote the convex conjugate of \(\varphi\) as \(\varphi^*(\bu) \triangleq \sup_\bv\left\{\langle \bu,\bv\rangle-\varphi(\bv)\right\}\).
     The exponential family
     \ac{pca} is then given by minimising loss
-    \[\ell_{\varphi} \triangleq D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right)         \]
+    \[\ell_{\varphi} \triangleq D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right)          \]
     under the same constraints as previously, approximating \(\X \sim \nabla\varphi^*\left(\V\A\right)\).
   \end{frame}
 
   \begin{frame}{Aitchison's simplex and exponential
       \ac{pca}} Aitchison's log-transformation is a dual affine coordinate space made explicit with
-    \[\varphi(z) = z\log(z) - z \Leftrightarrow \varphi^*(z) = e^z,      \]
+    \[\varphi(z) = z\log(z) - z \Leftrightarrow \varphi^*(z) = e^z,\]
     but what about normalisation?
 
     Consider
     \ac{alr}:
-    \[\alr(\bx) \triangleq x_0 \sum_{i=1}^d\varphi\left(\frac{x_i}{x_0}\right) \Leftrightarrow \alr^*(\bx) = x_0\sum_{i=1}^d e^{\frac{x_i}{x_0}}    \]
+    \[\alr(\bx) \triangleq x_0 \sum_{i=1}^d\varphi\left(\frac{x_i}{x_0}\right) \Leftrightarrow \alr^*(\bx) = x_0\sum_{i=1}^d e^{\frac{x_i}{x_0}}     \]
 
   \end{frame}
 
   \begin{frame}{Scaled Bregman}
-    \begin{theorem}{Scaled Bregman
-        \footfullcite{nock2016scaled}} Let \(\varphi \colon \mathcal{X} \to \R\) be convex differentiable and \(g \colon \mathcal{X} \to \R\) be differentiable.
+    \begin{theorem}[Scaled Bregman
+        \footfullcite{nock2016scaled}] Let \(\varphi \colon \mathcal{X} \to \R\) be convex differentiable and \(g \colon \mathcal{X} \to \R\) be differentiable.
       Then
-      \[g(\bx)\cdot D_\varphi\left(\frac{\bx}{g(\bx)}\,\middle\Vert\,\frac{\by}{g(\by)}\right) =  D_{\breve{\varphi}}\left(\bx\,\middle\Vert\,\by\right)    \]
+      \[g(\bx)\cdot D_\varphi\left(\frac{\bx}{g(\bx)}\,\middle\Vert\,\frac{\by}{g(\by)}\right) =  D_{\breve{\varphi}}\left(\bx\,\middle\Vert\,\by\right)     \]
       where
-      \[\breve{\varphi} \triangleq g(\bx) \cdot \varphi\left(\frac{x}{g(\bx)}\right)     \]
+      \[\breve{\varphi} \triangleq g(\bx) \cdot \varphi\left(\frac{x}{g(\bx)}\right)\]
     \end{theorem}
 
     Avalos et al.
@@ -257,6 +291,11 @@
     \ac{clr} recently.
   \end{frame}
 
+  \begin{frame}{Medians instead of means}
+    Zeros still a problem, as geometric mean is $0$. Instead, use median as gague
+    function.
+  \end{frame}
+
   \begin{frame}{Activation-Induced Deaminase
       \footfullcite{Gajula2014}}
     \begin{tikzpicture}[remember picture,overlay]
@@ -316,4 +355,18 @@
     \end{tikzpicture}
   \end{frame}
 
+  \begin{frame}{\textsc{Brca1}: Positional effects}
+  \begin{columns}[T]
+  \begin{column}{.4\textwidth}
+  \[\V\A+\U^\intercal\Q\PP\]
+    where $\U \in \R^n$, $\Q \in \R^l$, $\PP \in \mathbb{2}^{l\times d}$
+    \end{column}\hfill
+    \begin{column}{.58\textwidth}
+  \begin{tikzpicture}
+    \node[scale=.45]{\input{position.tikz}};
+  \end{tikzpicture}
+  \end{column}
+  \end{columns}
+  \end{frame}
+
 \end{document}
-- 
cgit v1.2.3