summaryrefslogtreecommitdiff
path: root/Numeric/CODA/PCA.hs
blob: 01fa3f124fdf0de54a76daa0e3cb435543bd157e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}

-- |
-- Module      : Numeric.CODA.PCA
-- Description : Principal composition on untransformed compositional data (CoDA)
-- Copyright   : (c) Justin Bedo 2023
-- License     : MIT
-- Maintainer  : cu@cua0.org
-- Stability   : experimental
--
-- This module decomposes a (non-negative) matrix into @k@ components and computes
-- an embedding in log space for futher analysis.
module Numeric.CODA.PCA (PCA (..), pca) where

import Data.Maybe
import qualified Data.Vector.Storable as V
import qualified Data.Vector.Storable.Mutable as VM
import Foreign.C.Types
import qualified Language.C.Inline as C
import Numeric.LinearAlgebra.Data
import System.IO.Unsafe

-- | Datatype to hold the results of a decomposition
data PCA = PCA
  { -- | The decomposed row factors (n × k)
    rowFactors :: Matrix Double,
    -- | The decomposed column factors (m × k)
    colFactors :: Matrix Double,
    -- | Log-space embedding (n × m)
    embedding :: Matrix Double,
    -- | Final Bregman loss after optimisation
    loss :: Double
  }
  deriving (Show)

C.include "<../../../pca.h>"

-- | Decomposes a non-negative matrix into the specified number components on the CoDA simplex
-- via Bregman divergences
pca ::
  -- | An optional quantile to use as the reference; defaults to 0.5
  Maybe Double ->
  -- | number of components to decompose into
  Int ->
  -- | matrix to decompose
  Matrix Double ->
  PCA
pca q k x = unsafePerformIO $ do
  let (n, m) = size x
      k' = fromIntegral k
      n' = fromIntegral n
      m' = fromIntegral m
      q' = realToFrac $ fromMaybe 0.5 q
  b <- VM.new (n * k)
  c <- VM.new (m * k)
  y <- VM.new (n * m)
  l <- VM.new 1
  V.unsafeWith (V.map realToFrac $ flatten x) $ \xptr ->
    VM.unsafeWith l $ \lptr ->
      VM.unsafeWith b $ \bptr ->
        VM.unsafeWith c $ \cptr ->
          VM.unsafeWith y $ \yptr ->
            [C.block|void {
           struct futhark_context_config *cfg = futhark_context_config_new();
           struct futhark_context *ctx = futhark_context_new(cfg);
           struct futhark_f64_2d *Xf = futhark_new_f64_2d(ctx, $(double *xptr), $(int n'), $(int m'));
           struct futhark_f64_2d *Yf;
           struct futhark_f64_2d *Bf;
           struct futhark_f64_2d *Uf;

           futhark_entry_pcaWithQuantile(ctx, &Bf, &Uf, &Yf, $(double *lptr), $(double q'), $(int k'), Xf);
           futhark_context_sync(ctx);
           futhark_values_f64_2d(ctx, Bf, $(double *bptr));
           futhark_values_f64_2d(ctx, Uf, $(double *cptr));
           futhark_values_f64_2d(ctx, Yf, $(double *yptr));

           futhark_free_f64_2d(ctx, Xf);
           futhark_free_f64_2d(ctx, Bf);
           futhark_free_f64_2d(ctx, Uf);
           futhark_free_f64_2d(ctx, Yf);
           futhark_context_free(ctx);
           futhark_context_config_free(cfg);
        }|]
  bF <- V.freeze b
  cF <- V.freeze c
  yF <- V.freeze y
  lF <- VM.read l 0
  pure $ PCA (toMatrix k bF) (toMatrix k cF) (toMatrix m yF) (realToFrac lF)
  where
    toMatrix d = reshape d . V.map realToFrac