summaryrefslogtreecommitdiff
path: root/pca.fut
diff options
context:
space:
mode:
Diffstat (limited to 'pca.fut')
-rw-r--r--pca.fut75
1 files changed, 75 insertions, 0 deletions
diff --git a/pca.fut b/pca.fut
new file mode 100644
index 0000000..a52df80
--- /dev/null
+++ b/pca.fut
@@ -0,0 +1,75 @@
+-- basics
+def matmul [n][m][p] 'a
+ (add: a -> a -> a) (mul: a -> a -> a) (zero: a)
+ (A: [n][m]a) (B: [m][p]a) : [n][p]a =
+ map (\A_row ->
+ map (\B_col ->
+ reduce add zero (map2 mul A_row B_col))
+ (transpose B))
+ A
+def matmul_f64 = matmul (+) (*) 0f64
+def sum = reduce (+) 0f64
+def dot u v = sum (map2 (*) u v)
+
+-- sets the first row to 1
+def unitfst [n][m] (X: *[n][m]f64): *[n][m]f64 =
+ X with [0] = tabulate m (const 1)
+def unitfstT [n][m] (X: *[n][m]f64): *[n][m]f64 = transpose (unitfst (transpose X))
+
+-- Bregman generators
+def phi (z: f64) : f64 = z * f64.log z - z
+def psi [d] (x: [d]f64) : f64 = sum (map ((*x[d-1]) <-< phi <-< (/(f64.epsilon+x[d-1]))) x)
+
+def embed [n][d][k] (B: [n][k]f64) (U: [k][d]f64) : [n][d]f64 =
+ let C = tabulate_2d k k (\i j -> (if i == j then 1f64 else 0f64) - f64.from_fraction 1 k)
+ in (map (map f64.exp) (matmul_f64 (matmul_f64 (unitfstT (copy B)) C) U))
+
+entry loss [n][d][k] (X: [n][d]f64) (B: [n][k]f64) (U: [k][d]f64) : f64 =
+ let Y = embed B U
+ in sum (map2 (\x y -> - psi y - dot (map2 (-) x y) (vjp psi y 1)) X Y)
+
+entry dloss [n][d][k] (X: [n][d]f64) (B: [n][k]f64) (U: [k][d]f64) : ([n][k]f64, [k][d]f64) = vjp (uncurry (loss X)) (B, U) 1
+
+-- Approximate quantiles
+def quantile [n] (q: f64) (xs: [n]f64) : f64 =
+ let (p, _, _) = loop (_, xs, idx) = (0/0, xs, f64.to_i64 (q*f64.from_fraction n 1)) while length xs > 1 do
+ let pivot = head xs
+ let (left, right) = partition (<=pivot) (tail xs)
+ let m = 1+length left
+ in if m > idx
+ then (pivot, left, idx)
+ else (pivot, right, idx - m)
+ in p
+
+-- Optimiser
+def rmsprop [n] (iters: i32) (beta: f64) (par: [n]f64) (df: [n]f64 -> [n]f64) : [n]f64 =
+ let step (i, par, s) =
+ let d = df par
+ let s' = map2 (\s x -> beta * s + (1-beta)*x**2) s d
+ let par' = map3 (\x s d -> x - 1e-3 * d / (f64.epsilon + f64.sqrt s)) par s' d
+ in (i-1, par', s')
+ let (_, par, _) = iterate_until (\(i,_,_) -> i <= 0) step (iters, par, replicate n 0)
+ in par
+
+def pca [n][d] (k: i64) (X: [n][d]f64) : ([n][k]f64, [k][d]f64, f64) =
+ let mX = sum (flatten X)
+ let X' = map (map (/ mX)) X
+ let fp x = x - f64.floor x
+ let B = tabulate_2d n k (\i j -> if j == k-1 then 1 else 1e-3 * (fp (0.5 + 1.3247 * f64.from_fraction (i * k + j) 1) - 0.5))
+ let U = tabulate_2d k d (\i j -> 1e-3 * (fp (0.5 + 1.7548 * f64.from_fraction (i * k + j) 1) - 0.5))
+ let pack [n][d][k] (B: [n][k]f64) (U: [k][d]f64) : []f64 = flatten B ++ flatten U
+ let unpack xs =
+ let (a, b) = split (n*k) xs
+ in (unflatten n k a, unflatten k d b)
+ let df [m] (par: [m]f64) : [m]f64 = (uncurry pack <-< uncurry (dloss X') <-< unpack) par :> [m]f64
+ let init = flatten B ++ flatten U
+ let par = rmsprop 10000 0.999 init df
+ let (Bf, Uf ) = unpack par
+ in (Bf, Uf, loss X' Bf Uf)
+
+entry pcaWithQuantile [n][d] (q: f64) (k: i64) (X: [n][d]f64) : ([n][k]f64, [d][k]f64, [n][d]f64, f64) =
+ let qs = map (quantile q) X
+ let Y = tabulate_2d n (d+1) (\i j -> if j == d then qs[i] else X[i][j])
+ let (B, U, l) = pca (1+k) Y
+ let Y' = embed B U
+ in (map (\x -> x[0:k]) B, (transpose U[0:k])[0:d], map (\x -> x[0:d]) Y', l)