summaryrefslogtreecommitdiff
path: root/Numeric/CODA/PCA.hs
diff options
context:
space:
mode:
Diffstat (limited to 'Numeric/CODA/PCA.hs')
-rw-r--r--Numeric/CODA/PCA.hs92
1 files changed, 92 insertions, 0 deletions
diff --git a/Numeric/CODA/PCA.hs b/Numeric/CODA/PCA.hs
new file mode 100644
index 0000000..01fa3f1
--- /dev/null
+++ b/Numeric/CODA/PCA.hs
@@ -0,0 +1,92 @@
+{-# LANGUAGE QuasiQuotes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TemplateHaskell #-}
+
+-- |
+-- Module : Numeric.CODA.PCA
+-- Description : Principal composition on untransformed compositional data (CoDA)
+-- Copyright : (c) Justin Bedo 2023
+-- License : MIT
+-- Maintainer : cu@cua0.org
+-- Stability : experimental
+--
+-- This module decomposes a (non-negative) matrix into @k@ components and computes
+-- an embedding in log space for futher analysis.
+module Numeric.CODA.PCA (PCA (..), pca) where
+
+import Data.Maybe
+import qualified Data.Vector.Storable as V
+import qualified Data.Vector.Storable.Mutable as VM
+import Foreign.C.Types
+import qualified Language.C.Inline as C
+import Numeric.LinearAlgebra.Data
+import System.IO.Unsafe
+
+-- | Datatype to hold the results of a decomposition
+data PCA = PCA
+ { -- | The decomposed row factors (n × k)
+ rowFactors :: Matrix Double,
+ -- | The decomposed column factors (m × k)
+ colFactors :: Matrix Double,
+ -- | Log-space embedding (n × m)
+ embedding :: Matrix Double,
+ -- | Final Bregman loss after optimisation
+ loss :: Double
+ }
+ deriving (Show)
+
+C.include "<../../../pca.h>"
+
+-- | Decomposes a non-negative matrix into the specified number components on the CoDA simplex
+-- via Bregman divergences
+pca ::
+ -- | An optional quantile to use as the reference; defaults to 0.5
+ Maybe Double ->
+ -- | number of components to decompose into
+ Int ->
+ -- | matrix to decompose
+ Matrix Double ->
+ PCA
+pca q k x = unsafePerformIO $ do
+ let (n, m) = size x
+ k' = fromIntegral k
+ n' = fromIntegral n
+ m' = fromIntegral m
+ q' = realToFrac $ fromMaybe 0.5 q
+ b <- VM.new (n * k)
+ c <- VM.new (m * k)
+ y <- VM.new (n * m)
+ l <- VM.new 1
+ V.unsafeWith (V.map realToFrac $ flatten x) $ \xptr ->
+ VM.unsafeWith l $ \lptr ->
+ VM.unsafeWith b $ \bptr ->
+ VM.unsafeWith c $ \cptr ->
+ VM.unsafeWith y $ \yptr ->
+ [C.block|void {
+ struct futhark_context_config *cfg = futhark_context_config_new();
+ struct futhark_context *ctx = futhark_context_new(cfg);
+ struct futhark_f64_2d *Xf = futhark_new_f64_2d(ctx, $(double *xptr), $(int n'), $(int m'));
+ struct futhark_f64_2d *Yf;
+ struct futhark_f64_2d *Bf;
+ struct futhark_f64_2d *Uf;
+
+ futhark_entry_pcaWithQuantile(ctx, &Bf, &Uf, &Yf, $(double *lptr), $(double q'), $(int k'), Xf);
+ futhark_context_sync(ctx);
+ futhark_values_f64_2d(ctx, Bf, $(double *bptr));
+ futhark_values_f64_2d(ctx, Uf, $(double *cptr));
+ futhark_values_f64_2d(ctx, Yf, $(double *yptr));
+
+ futhark_free_f64_2d(ctx, Xf);
+ futhark_free_f64_2d(ctx, Bf);
+ futhark_free_f64_2d(ctx, Uf);
+ futhark_free_f64_2d(ctx, Yf);
+ futhark_context_free(ctx);
+ futhark_context_config_free(cfg);
+ }|]
+ bF <- V.freeze b
+ cF <- V.freeze c
+ yF <- V.freeze y
+ lF <- VM.read l 0
+ pure $ PCA (toMatrix k bF) (toMatrix k cF) (toMatrix m yF) (realToFrac lF)
+ where
+ toMatrix d = reshape d . V.map realToFrac