diff options
| -rw-r--r-- | slides.bib | 15 | ||||
| -rw-r--r-- | slides.tex | 38 | 
2 files changed, 46 insertions, 7 deletions
@@ -29,4 +29,17 @@    address   = "Tokyo, Japan",    language  = "en"  } - +@article{nock2016scaled, +  title={A scaled Bregman theorem with applications}, +  author={Nock, Richard and Menon, Aditya and Ong, Cheng Soon}, +  journal={Advances in Neural Information Processing Systems}, +  volume={29}, +  year={2016} +} +@article{avalos2018representation, +  title={Representation learning of compositional data}, +  author={Avalos, Marta and Nock, Richard and Ong, Cheng Soon and Rouar, Julien and Sun, Ke}, +  journal={Advances in Neural Information Processing Systems}, +  volume={31}, +  year={2018} +} @@ -15,6 +15,7 @@  \newcommand{\R}{\mathbb{R}}  \newcommand{\bx}{\mathbf{x}} +\newcommand{\by}{\mathbf{y}}  \newcommand{\bu}{\mathbf{u}}  \newcommand{\bv}{\mathbf{v}}  \newcommand{\X}{\mathbf{X}} @@ -58,7 +59,7 @@    \begin{frame}{Basics}      \begin{definition}[Compositional data] Data \(X \in \R^{n \times d}\) is compositional if rows \(\bx_i\) are in the simplex -      \[S^d=\{\,\bx \in \R^d : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\}       \] +      \[S^d=\{\,\bx \in \R^d : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\} \]        for constant \(\kappa > 0\).      \end{definition} Information is therefore given only by the ratios of components and any composition can be normalised to the standard simplex where \(\kappa = 1\) (c.f., dividing by library size).    \end{frame} @@ -66,10 +67,10 @@    \begin{frame}{Isomorphisms to Euclidean vector spaces} The simplex forms a \(d-1\) dimensional Euclidean vector space      \footfullcite{Aitchison1982}:      \begin{definition}[\ac{alr}] -      \[\alr(\bx)_i = \log \frac{x_i}{x_0}       \] +      \[\alr(\bx)_i = \log \frac{x_i}{x_0} \]      \end{definition}      \begin{definition}[\ac{clr}] -      \[\clr(\bx)_i = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}}       \] +      \[\clr(\bx)_i = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}} \]      \end{definition}    \end{frame} @@ -107,7 +108,7 @@    \begin{frame}{Traditional        \ac{pca}} Given \(\X\in \R^{n\times d}\) minimise loss -    \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}}   \] +    \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}}      \]      s.t.      \(\V \in \R^{n \times k}\), \(\A \in \R^{k \times d}\), and \(\V^\intercal \V = \I\). @@ -120,14 +121,39 @@        \ac{pca}}      \begin{definition}{Bregman Divergence} Let \(\varphi \colon \R^d \to \R\) be a smooth ($C^1$) convex function on convex set \(\Omega\).        The Bregman divergence \(D_\varphi\) with generator \(\varphi\) is -      \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle.   \] +      \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle.      \]      \end{definition}      Denote the convex conjugate of \(\varphi\) as \(\varphi^*(\bu) \triangleq \sup_\bv\left\{\langle \bu,\bv\rangle-\varphi(\bv)\right\}\).      The exponential family      \ac{pca} is then given by minimising loss -    \[\ell_{\varphi} \triangleq {D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right)}^2   \] +    \[\ell_{\varphi} \triangleq D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right)      \]      under the same constraints as previously, approximating \(\X \sim \nabla\varphi^*\left(\V\A\right)\).    \end{frame} +  \begin{frame}{Aitchison's simplex and exponential +      \ac{pca}} Aitchison's log-transformation is a dual affine coordinate space made explicit with +    \[\varphi(z) = z\log(z) - z \Leftrightarrow \varphi^*(z) = e^z,   \] +    but what about normalisation? + +    Consider +    \ac{alr}: +    \[\alr(\bx) \triangleq x_0 \sum_{i=1}^d\varphi\left(\frac{x_i}{x_0}\right) \Leftrightarrow \alr^*(\bx) = x_0\sum_{i=1}^d e^{\frac{x_i}{x_0}} \] + +  \end{frame} + +  \begin{frame} +    \begin{theorem}{Scaled Bregman +        \footfullcite{nock2016scaled}} Let \(\varphi \colon \mathcal{X} \to \R\) be convex differentiable and \(g \colon \mathcal{X} \to \R\) be differentiable. +      Then +      \[g(\bx)\cdot D_\varphi\left(\frac{\bx}{g(\bx)}\,\middle\Vert\,\frac{\by}{g(\by)}\right) =  D_{\breve{\varphi}}\left(\bx\,\middle\Vert\,\by\right) \] +      where +      \[\breve{\varphi} \triangleq g(\bx) \cdot \varphi\left(\frac{x}{g(\bx)}\right)  \] +    \end{theorem} + +    Avalos et al. +    \footfullcite{avalos2018representation} +    \ considered a relaxed form for \ac{clr} recently. +  \end{frame} +  \end{document}  | 
