summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--slides.tex119
1 files changed, 75 insertions, 44 deletions
diff --git a/slides.tex b/slides.tex
index 1cf5229..c908b50 100644
--- a/slides.tex
+++ b/slides.tex
@@ -5,7 +5,7 @@
\usefonttheme{professionalfonts}
\setbeamerfont{footnote}{size=
\tiny}
- \usepackage{unicode-math}
+\usepackage{unicode-math}
\usepackage{microtype}
@@ -29,7 +29,6 @@
\DeclareMathOperator{\alr}{alr}
\DeclareMathOperator{\clr}{clr}
-
\usepackage[natbib=true,url=false,style=verbose-ibid]{biblatex}
\addbibresource{slides.bib}
\AtBeginBibliography{\small}
@@ -167,20 +166,30 @@
\begin{frame}{Deep Mutational Scanning: Integration issues}
\begin{enumerate}
- \item Scores calculated a variety of ways, e.g., Rubin et al.
- \footfullcite{Rubin2017}:
- \[L_{v,t}=\log\left(\frac{(c_{v,t}+\frac12)(c_{wt,0}+\frac12)}{(c_{v,0}+\frac12)(c_{wt,t}+\frac12)}\right) \]
\item Assays can measure different properties
\item Numerous different experimental designs
+ \item Scores calculated a variety of ways, e.g., Rubin et al.
+ \footfullcite{Rubin2017}:
+ \[L_{v,t}=\log\left(\frac{(c_{v,t}+\frac12)(c_{wt,0}+\frac12)}{(c_{v,0}+\frac12)(c_{wt,t}+\frac12)}\right) \]
\end{enumerate}
\end{frame}
+ \begin{frame}{Representational learning on
+ \ac{dms} data} For a given protein:
+ \begin{itemize}
+ \item Learn a representation of the available
+ \ac{dms} data
+ \item unsupervised to deal with varying designs
+ \item work on counts not scores
+ \end{itemize}
+ \end{frame}
+
\begin{frame}{Compositional simplex}
\begin{columns}[T]
\begin{column}{.63
\textwidth}
- \begin{definition}[Compositional data] Data \(X \in \R^{n \times d}\) is compositional if rows \(\bx_i\) are in the simplex
- \[S^d=\{\,\bx \in \R^d : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\} \]
+ \begin{definition}[Compositional data] Data \(X \in \R_{\geq 0}^{n \times d}\) is compositional if rows \(\bx_i\) are in the simplex
+ \[S^d=\{\,\bx \in \R^d_{\geq 0} : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\} \]
for constant \(\kappa > 0\).
\end{definition}
\end{column}
@@ -201,10 +210,10 @@
\begin{frame}{Isomorphisms to Euclidean vector spaces} The simplex forms a \(d-1\) dimensional Euclidean vector space
\footfullcite{Aitchison1982}:
\begin{definition}[\ac{alr}]
- \[\alr_i(\bx) = \log \frac{x_i}{x_0} \]
+ \[\alr_i(\bx) = \log \frac{x_i}{x_0} \]
\end{definition}
\begin{definition}[\ac{clr}]
- \[\clr_i(\bx) = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}} \]
+ \[\clr_i(\bx) = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}} \]
\end{definition}
\end{frame}
@@ -220,22 +229,14 @@
\ac{pca}
\end{enumerate}
\end{block}
+ \pause
\begin{block}{Problems}
\begin{itemize}
\item Zeros:
\begin{enumerate}
- \item $\log(0)$ undefined
+ \item \(\log(0)\) undefined \(\Rightarrow\) can't handle unobserved components
\item geometric mean is \(0\) \(\Rightarrow\)
\ac{clr} is undefined
- \item
- \ac{alr} is undefined for unobserved components in the ref.
- \end{enumerate}
- \item Interpretation:
- \begin{enumerate}
- \item
- \ac{alr} is not an isometry
- \item
- \ac{clr} is degenerate
\end{enumerate}
\end{itemize}
\end{block}
@@ -243,36 +244,36 @@
\begin{frame}{Traditional
\ac{pca}} Given \(\X\in \R^{n\times d}\) minimise loss
- \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}}\]
+ \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}} \]
s.t.
\(\V \in \R^{n \times k}\), \(\A \in \R^{k \times d}\), and \(\V^\intercal \V = \I\).
- Has been generalised to exponential families
+ \pause Has been generalised to exponential families
\footfullcite{collins2001generalization} via Bregman divergences
\footfullcite{Amari2016-ua}.
\end{frame}
\begin{frame}{Exponential family
\ac{pca}}
- \begin{definition}[Bregman Divergence] Let \(\varphi \colon \R^d \to \R\) be a smooth ($C^1$) convex function on convex set \(\Omega\).
+ \begin{definition}[Bregman Divergence] Let \(\varphi \colon \R^d \to \R\) be a differentiable convex function.
The Bregman divergence \(D_\varphi\) with generator \(\varphi\) is
- \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle. \]
+ \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle. \]
\end{definition}
- Denote the convex conjugate of \(\varphi\) as \(\varphi^*(\bu) \triangleq \sup_\bv\left\{\langle \bu,\bv\rangle-\varphi(\bv)\right\}\).
+ \pause Denote the convex conjugate of \(\varphi\) as \(\varphi^*(\bu) \triangleq \sup_\bv\left\{\langle \bu,\bv\rangle-\varphi(\bv)\right\}\).
The exponential family
\ac{pca} is then given by minimising loss
- \[\ell_{\varphi} \triangleq D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right) \]
+ \[\ell_{\varphi} \triangleq D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right) \]
under the same constraints as previously, approximating \(\X \sim \nabla\varphi^*\left(\V\A\right)\).
\end{frame}
\begin{frame}{Aitchison's simplex and exponential
\ac{pca}} Aitchison's log-transformation is a dual affine coordinate space made explicit with
- \[\varphi(z) = z\log(z) - z \Leftrightarrow \varphi^*(z) = e^z,\]
+ \[\varphi(z) = z\log(z) - z \Leftrightarrow \varphi^*(z) = e^z, \]
but what about normalisation?
- Consider
+ \pause Consider
\ac{alr}:
- \[\alr(\bx) \triangleq x_0 \sum_{i=1}^d\varphi\left(\frac{x_i}{x_0}\right) \Leftrightarrow \alr^*(\bx) = x_0\sum_{i=1}^d e^{\frac{x_i}{x_0}} \]
+ \[\alr(\bx) \triangleq x_0 \sum_{i=1}^d\varphi\left(\frac{x_i}{x_0}\right) \Leftrightarrow \alr^*(\bx) = x_0\sum_{i=1}^d e^{\frac{x_i}{x_0}} \]
\end{frame}
@@ -280,9 +281,9 @@
\begin{theorem}[Scaled Bregman
\footfullcite{nock2016scaled}] Let \(\varphi \colon \mathcal{X} \to \R\) be convex differentiable and \(g \colon \mathcal{X} \to \R\) be differentiable.
Then
- \[g(\bx)\cdot D_\varphi\left(\frac{\bx}{g(\bx)}\,\middle\Vert\,\frac{\by}{g(\by)}\right) = D_{\breve{\varphi}}\left(\bx\,\middle\Vert\,\by\right) \]
+ \[D_{\breve{\varphi}}\left(\bx\,\middle\Vert\,\by\right) = g(\bx)\cdot D_\varphi\left(\frac{\bx}{g(\bx)}\,\middle\Vert\,\frac{\by}{g(\by)}\right) \]
where
- \[\breve{\varphi} \triangleq g(\bx) \cdot \varphi\left(\frac{x}{g(\bx)}\right)\]
+ \[\breve{\varphi} \triangleq g(\bx) \cdot \varphi\left(\frac{x}{g(\bx)}\right) \]
\end{theorem}
Avalos et al.
@@ -291,11 +292,16 @@
\ac{clr} recently.
\end{frame}
- \begin{frame}{Medians instead of means}
- Zeros still a problem, as geometric mean is $0$. Instead, use median as gague
- function.
+ \begin{frame}{\textsc{Clr} undefined if any component is unobserved}
+ \begin{itemize}
+ \item Zeros still a problem for
+ \ac{clr} as geometric mean is \(0\).
+ \item[\(\Rightarrow\)] use median as gague function.
+ \end{itemize}
\end{frame}
+ \section{Experiments}
+
\begin{frame}{Activation-Induced Deaminase
\footfullcite{Gajula2014}}
\begin{tikzpicture}[remember picture,overlay]
@@ -356,17 +362,42 @@
\end{frame}
\begin{frame}{\textsc{Brca1}: Positional effects}
- \begin{columns}[T]
- \begin{column}{.4\textwidth}
- \[\V\A+\U^\intercal\Q\PP\]
- where $\U \in \R^n$, $\Q \in \R^l$, $\PP \in \mathbb{2}^{l\times d}$
- \end{column}\hfill
- \begin{column}{.58\textwidth}
- \begin{tikzpicture}
- \node[scale=.45]{\input{position.tikz}};
- \end{tikzpicture}
- \end{column}
- \end{columns}
+ \begin{columns}[T]
+ \begin{column}{.4
+ \textwidth}
+ \vspace{1cm}
+ \[\V\A+\U^\intercal\Q\PP \]
+ where \(\U \in \R^n\), \(\Q \in \R^l\), \(\PP \in \mathbb{2}^{l\times d}\)
+ \end{column}
+ \hfill
+ \begin{column}{.58
+ \textwidth}
+ \begin{tikzpicture}
+ \node[scale=.45]{\input{position.tikz}};
+ \end{tikzpicture}
+ \end{column}
+ \end{columns}
+ \end{frame}
+
+ \begin{frame}{Acknowledgements}
+ \begin{columns}[T]
+ \begin{column}{.4
+ \textwidth}
+ \textbf{Papenfuss lab}
+ \begin{itemize}
+ \item Tony Papenfuss
+ \item
+ \textit{Alan Rubin}
+ \item
+ \textit{Matthew Wakefield}
+ \end{itemize}
+ \end{column}
+ \hfill
+ \begin{column}{.4
+ \textwidth}
+ \textbf{Stafford Fox medical research foundation}
+ \end{column}
+ \end{columns}
\end{frame}
\end{document}