\documentclass[aspectratio=169,UKenglish]{beamer}

\usetheme{metropolis}
\usepackage[sfdefault]{FiraSans}
\usefonttheme{professionalfonts}
\setbeamerfont{footnote}{size=
  \tiny}
\usepackage{unicode-math}

\usepackage{microtype}

\usepackage{tikz}
\usepackage{pgfplots}
\usepgfplotslibrary{ternary}
\usepackage{stmaryrd}

\newcommand{\R}{\mathbb{R}}
\newcommand{\bx}{\mathbf{x}}
\newcommand{\by}{\mathbf{y}}
\newcommand{\bu}{\mathbf{u}}
\newcommand{\bv}{\mathbf{v}}
\newcommand{\X}{\mathbf{X}}
\newcommand{\V}{\mathbf{V}}
\newcommand{\A}{\mathbf{A}}
\newcommand{\I}{\mathbf{I}}
\newcommand{\U}{\mathbf{u}}
\newcommand{\Q}{\mathbf{q}}
\newcommand{\PP}{\mathbf{P}}
\DeclareMathOperator{\alr}{alr}
\DeclareMathOperator{\clr}{clr}

\usepackage[natbib=true,url=false,style=verbose-ibid]{biblatex}
\addbibresource{slides.bib}
\AtBeginBibliography{\small}

% Tikz relative positioning https://tex.stackexchange.com/questions/89588/positioning-relative-to-page-in-tikz
\makeatletter
\def
\parsecomma#1,#2
\endparsecomma{\def
  \page@x{#1}
  \def
  \page@y{#2}}
\tikzdeclarecoordinatesystem{page}{\parsecomma#1
  \endparsecomma
  \pgfpointanchor{current page}{north east}
  \pgf@xc=
  \pgf@x
  \pgf@yc=
  \pgf@y
  \pgfpointanchor{current page}{south west}
  \pgf@xb=
  \pgf@x
  \pgf@yb=
  \pgf@y
  \pgfmathparse{(\pgf@xc-
    \pgf@xb)/2.*
    \page@x+(
    \pgf@xc+
    \pgf@xb)/2.}
  \expandafter
  \pgf@x
  \expandafter=
  \pgfmathresult pt
  \pgfmathparse{(\pgf@yc-
    \pgf@yb)/2.*
    \page@y+(
    \pgf@yc+
    \pgf@yb)/2.}
  \expandafter
  \pgf@y
  \expandafter=
  \pgfmathresult pt}
\makeatother

\usepackage{acronym}
\usepackage{xspace}
\renewcommand*{\acsfont}[1]{\textsc{#1}}
\newacro{dms}{Deep Mutational Scanning}
\newacro{clr}{Centred Log-Ratio}
\newacro{alr}{Additive Log-Ratio}
\newacro{pca}{Principal Component Analysis}
\newcommand{\dms}{\ac{dms}
  \xspace}

\definecolor{cb1}{HTML}{1b9e77}
\definecolor{cb2}{HTML}{d95f02}
\definecolor{cb3}{HTML}{7570b3}

\author{Justin Bed\H{o}}
\title{Representation learning of compositional counts: an exploration of deep mutational scanning data}
\date{July 25, 2023}

\begin{document}

  \maketitle

  \begin{frame}{Variants of Uncertain Significance
      \footfullcite{Liu2020}}
    \begin{center}
      \input{clinvar.tikz}
    \end{center}
  \end{frame}

  \begin{frame}{\dms}
    \begin{quote} Deep mutational scanning is a method for systematically introducing mutations into a gene and then analyzing the resulting protein products to see how the changes affect the protein's function.
    \end{quote}
    \begin{enumerate}
      \item Growing resource of functional data
      \item MaveDB
      \footfullcite{Esposito2019}
      \unskip
      \footnote{\url{https://www.mavedb.org}} catalogs a number of datasets and provides easy access
    \end{enumerate}
  \end{frame}

  \begin{frame}{Deep Mutational Scanning: Overview
      \footfullcite{Fowler2014}}
    \begin{tikzpicture}
      \node at (page cs:0,0.75){\(t_0\)};
      \node at (page cs:0.53,0.75){\(t_1\)};
      \node(a) at (page cs:-0.75,0.5){\includegraphics[width=0.3
          \textwidth]{Protein-BRCA1.png}};
      \node(b) at (page cs:0,0.5){\begin{tikzpicture}
          \node[circle,draw,fill=cb1] at (page cs:-0.06,0){};
          \node[circle,draw,fill=cb1] at (page cs:0,0){};
          \node[circle,draw,fill=cb1] at (page cs:0.06,0){};
          \node[circle,draw,fill=cb2] at (page cs:-0.06,0.1){};
          \node[circle,draw,fill=cb2] at (page cs:0.06,0.1){};
          \node[circle,draw,fill=cb2] at (page cs:0,0.1){};
          \node[circle,draw,fill=cb3] at (page cs:-0.06,-0.1){};
          \node[circle,draw,fill=cb3] at (page cs:0,-0.1){};
          \node[circle,draw,fill=cb3] at (page cs:0.06,-0.1){};
        \end{tikzpicture}};

      \node(c) at (page cs:0.5,0.5){\begin{tikzpicture}
          \node[circle,draw,fill=cb1] at (page cs:0.5,0){};
          \node[circle,draw,fill=cb1] at (page cs:0.56,0){};
          \node[circle,draw,fill=cb1] at (page cs:0.62,0){};
          \node[circle,draw,fill=cb1] at (page cs:0.68,0){};
          \node[circle,draw,fill=cb1] at (page cs:0.74,0){};
          \node[circle,draw,fill=cb2] at (page cs:0.5,0.1){};
          \node[circle,draw,fill=cb3] at (page cs:0.5,-0.1){};
          \node[circle,draw,fill=cb3] at (page cs:0.56,-0.1){};
        \end{tikzpicture}};

      \node(d) at (page cs:0.2,-0.25){\includegraphics[width=0.3
          \textwidth]{nextseq500.jpg}};

      \draw[->] (a) -- (b) node[midway,above]{mutagenesis};
      \draw[->] (b) -- (c) node[midway,above]{selection};
      \draw[->] (b) -- (d);
      \draw[->] (c) -- (d);
    \end{tikzpicture}
  \end{frame}

  \begin{frame}{Deep Mutational Scanning: Integration issues}
    \begin{enumerate}
      \item Assays can measure different properties
      \item Numerous different experimental designs
      \item Scores calculated a variety of ways, e.g., Rubin et al.
      \footfullcite{Rubin2017}:
      \[L_{v,t}=\log\left(\frac{(c_{v,t}+\frac12)(c_{wt,0}+\frac12)}{(c_{v,0}+\frac12)(c_{wt,t}+\frac12)}\right)    \]
    \end{enumerate}
  \end{frame}

  \begin{frame}{Representational learning on
      \ac{dms} data} For a given protein:
    \begin{itemize}
      \item Learn a representation of the available
      \ac{dms} data
      \item unsupervised to deal with varying designs
      \item work on counts not scores
    \end{itemize}
  \end{frame}

  \begin{frame}{Compositional simplex}
    \begin{columns}[T]
      \begin{column}{.63
          \textwidth}
        \begin{definition}[Compositional data] Data \(X \in \R_{\geq 0}^{n \times d}\) is compositional if rows \(\bx_i\) are in the simplex
          \[S^d=\{\,\bx \in \R^d_{\geq 0} : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\}      \]
          for constant \(\kappa > 0\).
        \end{definition}
      \end{column}
      \hfill
      \begin{column}{.26
          \textwidth}
        \begin{tikzpicture}[scale=0.5]
          \begin{ternaryaxis}
            \addplot3 coordinates{(0.25,0.5,0.25)};
            \path (0.25,0.5,0.25) coordinate (M) (1,0,0) coordinate (C) (0,1,0) coordinate (A) (0,0,1) coordinate (B);
          \end{ternaryaxis}
        \end{tikzpicture}
      \end{column}
    \end{columns}
    \vspace{10pt} \(\Rightarrow\) Information is given only by the ratios of components and any composition can be normalised to the standard simplex where \(\kappa = 1\) (divide by library size).
  \end{frame}

  \begin{frame}{Isomorphisms to Euclidean vector spaces} The simplex forms a \(d-1\) dimensional Euclidean vector space
    \footfullcite{Aitchison1982}:
    \begin{definition}[\ac{alr}]
      \[\alr_i(\bx) = \log \frac{x_i}{x_0}      \]
    \end{definition}
    \begin{definition}[\ac{clr}]
      \[\clr_i(\bx) = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}}      \]
    \end{definition}
  \end{frame}

  \begin{frame}{\textsc{Pca} on
      \ac{dms} data}
    \begin{block}{Transformation approach}
      \begin{enumerate}
        \item Map
        \dms data to Euclidean space via
        \ac{alr} /
        \ac{clr}
        \item Apply standard
        \ac{pca}
      \end{enumerate}
    \end{block}
    \pause
    \begin{block}{Problems}
      \begin{itemize}
        \item Zeros:
        \begin{enumerate}
          \item \(\log(0)\) undefined \(\Rightarrow\) can't handle unobserved components
          \item geometric mean is \(0\) \(\Rightarrow\)
          \ac{clr} is undefined
        \end{enumerate}
      \end{itemize}
    \end{block}
  \end{frame}

  \begin{frame}{Traditional
      \ac{pca}} Given \(\X\in \R^{n\times d}\) minimise loss
    \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}} \]
    s.t.
    \(\V \in \R^{n \times k}\), \(\A \in \R^{k \times d}\), and \(\V^\intercal \V = \I\).

    \pause Has been generalised to exponential families
    \footfullcite{collins2001generalization} via Bregman divergences
    \footfullcite{Amari2016-ua}.
  \end{frame}

  \begin{frame}{Exponential family
      \ac{pca}}
    \begin{definition}[Bregman Divergence] Let \(\varphi \colon \R^d \to \R\) be a differentiable convex function.
      The Bregman divergence \(D_\varphi\) with generator \(\varphi\) is
      \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle.           \]
    \end{definition}
    \pause Denote the convex conjugate of \(\varphi\) as \(\varphi^*(\bu) \triangleq \sup_\bv\left\{\langle \bu,\bv\rangle-\varphi(\bv)\right\}\).
    The exponential family
    \ac{pca} is then given by minimising loss
    \[\ell_{\varphi} \triangleq D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right)           \]
    under the same constraints as previously, approximating \(\X \sim \nabla\varphi^*\left(\V\A\right)\).
  \end{frame}

  \begin{frame}{Aitchison's simplex and exponential
      \ac{pca}} Aitchison's log-transformation is a dual affine coordinate space made explicit with
    \[\varphi(z) = z\log(z) - z \Leftrightarrow \varphi^*(z) = e^z, \]
    but what about normalisation?

    \pause Consider
    \ac{alr}:
    \[\alr(\bx) \triangleq x_0 \sum_{i=1}^d\varphi\left(\frac{x_i}{x_0}\right) \Leftrightarrow \alr^*(\bx) = x_0\sum_{i=1}^d e^{\frac{x_i}{x_0}}      \]

  \end{frame}

  \begin{frame}{Scaled Bregman}
    \begin{theorem}[Scaled Bregman
        \footfullcite{nock2016scaled}] Let \(\varphi \colon \mathcal{X} \to \R\) be convex differentiable and \(g \colon \mathcal{X} \to \R\) be differentiable.
      Then
      \[D_{\breve{\varphi}}\left(\bx\,\middle\Vert\,\by\right) = g(\bx)\cdot D_\varphi\left(\frac{\bx}{g(\bx)}\,\middle\Vert\,\frac{\by}{g(\by)}\right) \]
      where
      \[\breve{\varphi} \triangleq g(\bx) \cdot \varphi\left(\frac{x}{g(\bx)}\right) \]
    \end{theorem}

    Avalos et al.
    \footfullcite{avalos2018representation}
    \ considered a relaxed form for
    \ac{clr} recently.
  \end{frame}

  \begin{frame}{\textsc{Clr} undefined if any component is unobserved}
    \begin{itemize}
      \item Zeros still a problem for
      \ac{clr} as geometric mean is \(0\).
      \item[\(\Rightarrow\)] use quantile as gague function.
    \end{itemize}
  \end{frame}

  \section{Experiments}

  \begin{frame}{Activation-Induced Deaminase
      \footfullcite{Gajula2014}}
    \begin{tikzpicture}
      \node at (page cs:-0.7,0.9){\textbf{Bregman}};
      \node at (page cs:0.3,0.9){\textbf{+1-log
          \ac{pca}}};
      \node[scale=0.8] at (page cs:-0.5,0.08){\input{106-samples.tikz}};
      \node[scale=0.8] at (page cs:0.5,0.08){\input{106-samples-log.tikz}};
    \end{tikzpicture}
  \end{frame}

  \begin{frame}{Activation-Induced Deaminase}
    \begin{tikzpicture}
      \node at (page cs:-0.5,0.08){\input{106-Leu113.tikz}};
      \node at (page cs:0.5,0.5){\includegraphics{gku689fig3-a.pdf}};
      \node at (page cs:0.5,-0.25){\includegraphics{gku689fig3-key.pdf}};
    \end{tikzpicture}
  \end{frame}

  \begin{frame}{Activation-Induced Deaminase}
    \begin{tikzpicture}
      \node at (page cs:-0.5,0.08){\input{106-Phe115.tikz}};
      \node at (page cs:0.5,0.5){\includegraphics{gku689fig3-b.pdf}};
      \node at (page cs:0.5,-0.25){\includegraphics{gku689fig3-key.pdf}};
    \end{tikzpicture}
  \end{frame}

  \begin{frame}{Activation-Induced Deaminase}
    \begin{tikzpicture}
      \node at (page cs:-0.5,0.08){\input{106-Glu117.tikz}};
      \node at (page cs:0.5,0.5){\includegraphics{gku689fig3-c.pdf}};
      \node at (page cs:0.5,-0.25){\includegraphics{gku689fig3-key.pdf}};
    \end{tikzpicture}
  \end{frame}

  \begin{frame}{\textsc{Erbb2}
      \footfullcite{Elazar2016}}
    \begin{tikzpicture}
      \node[scale=0.8] at (page cs: -0.5,0){\input{helix-erbb2.tikz}};
      \node at (page cs: 0.5,0.07){\includegraphics[width=0.4
          \textwidth]{helix-erbb2-pub.jpg}};
    \end{tikzpicture}
  \end{frame}

  \begin{frame}{\textsc{Brca1}
      \footfullcite{Findlay2018}}
    \begin{tikzpicture}[remember picture,overlay]
      \node[inner sep=0pt] at (5,0.5){\input{brca1-density.tikz}};

      \node[inner sep=0pt] at (11,1.25){\includegraphics{brca1-hist-pub.jpg}};
    \end{tikzpicture}
  \end{frame}

  \begin{frame}{\textsc{Brca1}: Positional effects}
     \centering
      \begin{tikzpicture}
        \node[scale=.45]{\input{position.tikz}};
      \end{tikzpicture}
  \end{frame}

  \begin{frame}{\textsc{Brca1}: Supervision}
    \centering
    \includegraphics[width=.7\linewidth]{supervised-pca.png}
  \end{frame}

  \begin{frame}{Acknowledgements}
    \begin{columns}[T]
      \begin{column}{.4
          \textwidth}
        \textbf{Papenfuss lab}
        \begin{itemize}
          \item Tony Papenfuss
          \item
          \textit{Alan Rubin}
          \item
          \textit{Matthew Wakefield}
        \end{itemize}
      \end{column}
      \hfill
      \begin{column}{.4
          \textwidth}
        \textbf{Stafford Fox medical research foundation}
      \end{column}
    \end{columns}
  \end{frame}

  \begin{frame}[standout]
    Thank you!
  \end{frame}  

\end{document}