add bregman generator and scaled theorem for ALR

author: Justin Bedo <cu@cua0.org> 2022-11-23 11:22:12 +1100
committer: Justin Bedo <cu@cua0.org> 2022-12-05 16:50:43 +1100
commit: 5703e89a4780e9333e69cfad71fcf2447e02e023 (patch)
tree: 7e9fa499638b3b972e37b1af4efa3632dd0ad360
parent: b13c582bbb9619bf5869451e24f6e6dade95c849 (diff)
2 files changed, 46 insertions, 7 deletions
diff --git a/slides.bib b/slides.bib
index 959e293..85db9d3 100644
--- a/slides.bib
+++ b/slides.bib
@@ -29,4 +29,17 @@
   address   = "Tokyo, Japan",
   language  = "en"
 }
-
+@article{nock2016scaled,
+  title={A scaled Bregman theorem with applications},
+  author={Nock, Richard and Menon, Aditya and Ong, Cheng Soon},
+  journal={Advances in Neural Information Processing Systems},
+  volume={29},
+  year={2016}
+}
+@article{avalos2018representation,
+  title={Representation learning of compositional data},
+  author={Avalos, Marta and Nock, Richard and Ong, Cheng Soon and Rouar, Julien and Sun, Ke},
+  journal={Advances in Neural Information Processing Systems},
+  volume={31},
+  year={2018}
+}
diff --git a/slides.tex b/slides.tex
index 8bf6c26..52f6f7a 100644
--- a/slides.tex
+++ b/slides.tex
@@ -15,6 +15,7 @@
 
 \newcommand{\R}{\mathbb{R}}
 \newcommand{\bx}{\mathbf{x}}
+\newcommand{\by}{\mathbf{y}}
 \newcommand{\bu}{\mathbf{u}}
 \newcommand{\bv}{\mathbf{v}}
 \newcommand{\X}{\mathbf{X}}
@@ -58,7 +59,7 @@
 
   \begin{frame}{Basics}
     \begin{definition}[Compositional data] Data \(X \in \R^{n \times d}\) is compositional if rows \(\bx_i\) are in the simplex
-      \[S^d=\{\,\bx \in \R^d : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\}       \]
+      \[S^d=\{\,\bx \in \R^d : \forall j,x_j > 0 ; \sum_{j=1}^d x_j = \kappa\,\} \]
       for constant \(\kappa > 0\).
     \end{definition} Information is therefore given only by the ratios of components and any composition can be normalised to the standard simplex where \(\kappa = 1\) (c.f., dividing by library size).
   \end{frame}
@@ -66,10 +67,10 @@
   \begin{frame}{Isomorphisms to Euclidean vector spaces} The simplex forms a \(d-1\) dimensional Euclidean vector space
     \footfullcite{Aitchison1982}:
     \begin{definition}[\ac{alr}]
-      \[\alr(\bx)_i = \log \frac{x_i}{x_0}       \]
+      \[\alr(\bx)_i = \log \frac{x_i}{x_0} \]
     \end{definition}
     \begin{definition}[\ac{clr}]
-      \[\clr(\bx)_i = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}}       \]
+      \[\clr(\bx)_i = \log \frac{x_i}{\left(\prod_{j=1}^d x_j\right)^{\frac 1 d}} \]
     \end{definition}
   \end{frame}
 
@@ -107,7 +108,7 @@
 
   \begin{frame}{Traditional
       \ac{pca}} Given \(\X\in \R^{n\times d}\) minimise loss
-    \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}}   \]
+    \[\ell_{\textsc{pca}} \triangleq {\lVert \X - \V\A \rVert}^2_{\textrm{F}}      \]
     s.t.
     \(\V \in \R^{n \times k}\), \(\A \in \R^{k \times d}\), and \(\V^\intercal \V = \I\).
 
@@ -120,14 +121,39 @@
       \ac{pca}}
     \begin{definition}{Bregman Divergence} Let \(\varphi \colon \R^d \to \R\) be a smooth ($C^1$) convex function on convex set \(\Omega\).
       The Bregman divergence \(D_\varphi\) with generator \(\varphi\) is
-      \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle.   \]
+      \[ D_\varphi\left(\bu\,\Vert\,\bv\right) \triangleq \varphi(\bu)-\varphi(\bv)-\langle \nabla\varphi(\bv),\bu-\bv\rangle.      \]
     \end{definition}
 
     Denote the convex conjugate of \(\varphi\) as \(\varphi^*(\bu) \triangleq \sup_\bv\left\{\langle \bu,\bv\rangle-\varphi(\bv)\right\}\).
     The exponential family
     \ac{pca} is then given by minimising loss
-    \[\ell_{\varphi} \triangleq {D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right)}^2   \]
+    \[\ell_{\varphi} \triangleq D_\varphi\left(\X\,\Vert\,\nabla\varphi^*\left(\V\A\right)\right)      \]
     under the same constraints as previously, approximating \(\X \sim \nabla\varphi^*\left(\V\A\right)\).
   \end{frame}
 
+  \begin{frame}{Aitchison's simplex and exponential
+      \ac{pca}} Aitchison's log-transformation is a dual affine coordinate space made explicit with
+    \[\varphi(z) = z\log(z) - z \Leftrightarrow \varphi^*(z) = e^z,   \]
+    but what about normalisation?
+
+    Consider
+    \ac{alr}:
+    \[\alr(\bx) \triangleq x_0 \sum_{i=1}^d\varphi\left(\frac{x_i}{x_0}\right) \Leftrightarrow \alr^*(\bx) = x_0\sum_{i=1}^d e^{\frac{x_i}{x_0}} \]
+
+  \end{frame}
+
+  \begin{frame}
+    \begin{theorem}{Scaled Bregman
+        \footfullcite{nock2016scaled}} Let \(\varphi \colon \mathcal{X} \to \R\) be convex differentiable and \(g \colon \mathcal{X} \to \R\) be differentiable.
+      Then
+      \[g(\bx)\cdot D_\varphi\left(\frac{\bx}{g(\bx)}\,\middle\Vert\,\frac{\by}{g(\by)}\right) =  D_{\breve{\varphi}}\left(\bx\,\middle\Vert\,\by\right) \]
+      where
+      \[\breve{\varphi} \triangleq g(\bx) \cdot \varphi\left(\frac{x}{g(\bx)}\right)  \]
+    \end{theorem}
+
+    Avalos et al.
+    \footfullcite{avalos2018representation}
+    \ considered a relaxed form for \ac{clr} recently.
+  \end{frame}
+
 \end{document}
author	Justin Bedo <cu@cua0.org>	2022-11-23 11:22:12 +1100
committer	Justin Bedo <cu@cua0.org>	2022-12-05 16:50:43 +1100
commit	5703e89a4780e9333e69cfad71fcf2447e02e023 (patch)
tree	7e9fa499638b3b972e37b1af4efa3632dd0ad360
parent	b13c582bbb9619bf5869451e24f6e6dade95c849 (diff)