diff options
Diffstat (limited to 'Numeric')
| -rw-r--r-- | Numeric/CODA/PCA.hs | 92 | 
1 files changed, 92 insertions, 0 deletions
| diff --git a/Numeric/CODA/PCA.hs b/Numeric/CODA/PCA.hs new file mode 100644 index 0000000..01fa3f1 --- /dev/null +++ b/Numeric/CODA/PCA.hs @@ -0,0 +1,92 @@ +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TemplateHaskell #-} + +-- | +-- Module      : Numeric.CODA.PCA +-- Description : Principal composition on untransformed compositional data (CoDA) +-- Copyright   : (c) Justin Bedo 2023 +-- License     : MIT +-- Maintainer  : cu@cua0.org +-- Stability   : experimental +-- +-- This module decomposes a (non-negative) matrix into @k@ components and computes +-- an embedding in log space for futher analysis. +module Numeric.CODA.PCA (PCA (..), pca) where + +import Data.Maybe +import qualified Data.Vector.Storable as V +import qualified Data.Vector.Storable.Mutable as VM +import Foreign.C.Types +import qualified Language.C.Inline as C +import Numeric.LinearAlgebra.Data +import System.IO.Unsafe + +-- | Datatype to hold the results of a decomposition +data PCA = PCA +  { -- | The decomposed row factors (n × k) +    rowFactors :: Matrix Double, +    -- | The decomposed column factors (m × k) +    colFactors :: Matrix Double, +    -- | Log-space embedding (n × m) +    embedding :: Matrix Double, +    -- | Final Bregman loss after optimisation +    loss :: Double +  } +  deriving (Show) + +C.include "<../../../pca.h>" + +-- | Decomposes a non-negative matrix into the specified number components on the CoDA simplex +-- via Bregman divergences +pca :: +  -- | An optional quantile to use as the reference; defaults to 0.5 +  Maybe Double -> +  -- | number of components to decompose into +  Int -> +  -- | matrix to decompose +  Matrix Double -> +  PCA +pca q k x = unsafePerformIO $ do +  let (n, m) = size x +      k' = fromIntegral k +      n' = fromIntegral n +      m' = fromIntegral m +      q' = realToFrac $ fromMaybe 0.5 q +  b <- VM.new (n * k) +  c <- VM.new (m * k) +  y <- VM.new (n * m) +  l <- VM.new 1 +  V.unsafeWith (V.map realToFrac $ flatten x) $ \xptr -> +    VM.unsafeWith l $ \lptr -> +      VM.unsafeWith b $ \bptr -> +        VM.unsafeWith c $ \cptr -> +          VM.unsafeWith y $ \yptr -> +            [C.block|void { +           struct futhark_context_config *cfg = futhark_context_config_new(); +           struct futhark_context *ctx = futhark_context_new(cfg); +           struct futhark_f64_2d *Xf = futhark_new_f64_2d(ctx, $(double *xptr), $(int n'), $(int m')); +           struct futhark_f64_2d *Yf; +           struct futhark_f64_2d *Bf; +           struct futhark_f64_2d *Uf; + +           futhark_entry_pcaWithQuantile(ctx, &Bf, &Uf, &Yf, $(double *lptr), $(double q'), $(int k'), Xf); +           futhark_context_sync(ctx); +           futhark_values_f64_2d(ctx, Bf, $(double *bptr)); +           futhark_values_f64_2d(ctx, Uf, $(double *cptr)); +           futhark_values_f64_2d(ctx, Yf, $(double *yptr)); + +           futhark_free_f64_2d(ctx, Xf); +           futhark_free_f64_2d(ctx, Bf); +           futhark_free_f64_2d(ctx, Uf); +           futhark_free_f64_2d(ctx, Yf); +           futhark_context_free(ctx); +           futhark_context_config_free(cfg); +        }|] +  bF <- V.freeze b +  cF <- V.freeze c +  yF <- V.freeze y +  lF <- VM.read l 0 +  pure $ PCA (toMatrix k bF) (toMatrix k cF) (toMatrix m yF) (realToFrac lF) +  where +    toMatrix d = reshape d . V.map realToFrac | 
