From 5703e89a4780e9333e69cfad71fcf2447e02e023 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Wed, 23 Nov 2022 11:22:12 +1100 Subject: add bregman generator and scaled theorem for ALR --- slides.bib | 15 ++++++++++++++- slides.tex | 38 ++++++++++++++++++++++++++++++++------ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/slides.bib b/slides.bib index 959e293..85db9d3 100644 --- a/slides.bib +++ b/slides.bib @@ -29,4 +29,17 @@ address = "Tokyo, Japan", language = "en" } - +@article{nock2016scaled, + title={A scaled Bregman theorem with applications}, + author={Nock, Richard and Menon, Aditya and Ong, Cheng Soon}, + journal={Advances in Neural Information Processing Systems}, + volume={29}, + year={2016} +} +@article{avalos2018representation, + title={Representation learning of compositional data}, + author={Avalos, Marta and Nock, Richard and Ong, Cheng Soon and Rouar, Julien and Sun, Ke}, + journal={Advances in Neural Information Processing Systems}, + volume={31}, + year={2018} +} diff --git a/slides.tex b/slides.tex index 8bf6c26..52f6f7a 100644 --- a/slides.tex +++ b/slides.tex @@ -15,6 +15,7 @@ \newcommand{\R}{\mathbb{R}} \newcommand{\bx}{\mathbf{x}} +\newcommand{\by}{\mathbf{y}} \newcommand{\bu}{\mathbf{u}} \newcommand{\bv}{\mathbf{v}} \newcommand{\X}{\mathbf{X}} @@ -58,7 +59,7 @@ \begin{frame}{Basics} \begin{definition}[Compositional data] Data \(X \in \R^{n \times d}\) is compositional if rows \(\bx_i\) are in the simplex - \[S^d=\{\,\bx \in \R^d : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\} \] + \[S^d=\{\,\bx \in \R^d : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\} \] for constant \(\kappa > 0\). \end{definition} Information is therefore given only by the ratios of components and any composition can be normalised to the standard simplex where \(\kappa = 1\) (c.f., dividing by library size). \end{frame} @@ -66,10 +67,10 @@ \begin{frame}{Isomorphisms to Euclidean vector spaces} The simplex forms a \(d-1\) dimensional Euclidean vector space \footfullcite{Aitchison1982}: \begin{definition}[\ac{alr}] - \[\alr(\bx)_i = \log \frac{x_i}{x_0} \] + \[\alr(\bx)_i = \log \frac{x_i}{x_0} \] \end{definition} \begin{definition}[\ac{clr}] - \[\clr(\bx)_i = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}} \] + \[\clr(\bx)_i = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}} \] \end{definition} \end{frame} @@ -107,7 +108,7 @@ \begin{frame}{Traditional \ac{pca}} Given \(\X\in \R^{n\times d}\) minimise loss - \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}} \] + \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}} \] s.t. \(\V \in \R^{n \times k}\), \(\A \in \R^{k \times d}\), and \(\V^\intercal \V = \I\). @@ -120,14 +121,39 @@ \ac{pca}} \begin{definition}{Bregman Divergence} Let \(\varphi \colon \R^d \to \R\) be a smooth ($C^1$) convex function on convex set \(\Omega\). The Bregman divergence \(D_\varphi\) with generator \(\varphi\) is - \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle. \] + \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle. \] \end{definition} Denote the convex conjugate of \(\varphi\) as \(\varphi^*(\bu) \triangleq \sup_\bv\left\{\langle \bu,\bv\rangle-\varphi(\bv)\right\}\). The exponential family \ac{pca} is then given by minimising loss - \[\ell_{\varphi} \triangleq {D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right)}^2 \] + \[\ell_{\varphi} \triangleq D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right) \] under the same constraints as previously, approximating \(\X \sim \nabla\varphi^*\left(\V\A\right)\). \end{frame} + \begin{frame}{Aitchison's simplex and exponential + \ac{pca}} Aitchison's log-transformation is a dual affine coordinate space made explicit with + \[\varphi(z) = z\log(z) - z \Leftrightarrow \varphi^*(z) = e^z, \] + but what about normalisation? + + Consider + \ac{alr}: + \[\alr(\bx) \triangleq x_0 \sum_{i=1}^d\varphi\left(\frac{x_i}{x_0}\right) \Leftrightarrow \alr^*(\bx) = x_0\sum_{i=1}^d e^{\frac{x_i}{x_0}} \] + + \end{frame} + + \begin{frame} + \begin{theorem}{Scaled Bregman + \footfullcite{nock2016scaled}} Let \(\varphi \colon \mathcal{X} \to \R\) be convex differentiable and \(g \colon \mathcal{X} \to \R\) be differentiable. + Then + \[g(\bx)\cdot D_\varphi\left(\frac{\bx}{g(\bx)}\,\middle\Vert\,\frac{\by}{g(\by)}\right) = D_{\breve{\varphi}}\left(\bx\,\middle\Vert\,\by\right) \] + where + \[\breve{\varphi} \triangleq g(\bx) \cdot \varphi\left(\frac{x}{g(\bx)}\right) \] + \end{theorem} + + Avalos et al. + \footfullcite{avalos2018representation} + \ considered a relaxed form for \ac{clr} recently. + \end{frame} + \end{document} -- cgit v1.2.3