From a0b5f58d8d2040306006a8b9deded629692ed8f3 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Tue, 7 Feb 2023 15:27:42 +1100 Subject: init --- pca.fut | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 pca.fut (limited to 'pca.fut') diff --git a/pca.fut b/pca.fut new file mode 100644 index 0000000..a52df80 --- /dev/null +++ b/pca.fut @@ -0,0 +1,75 @@ +-- basics +def matmul [n][m][p] 'a + (add: a -> a -> a) (mul: a -> a -> a) (zero: a) + (A: [n][m]a) (B: [m][p]a) : [n][p]a = + map (\A_row -> + map (\B_col -> + reduce add zero (map2 mul A_row B_col)) + (transpose B)) + A +def matmul_f64 = matmul (+) (*) 0f64 +def sum = reduce (+) 0f64 +def dot u v = sum (map2 (*) u v) + +-- sets the first row to 1 +def unitfst [n][m] (X: *[n][m]f64): *[n][m]f64 = + X with [0] = tabulate m (const 1) +def unitfstT [n][m] (X: *[n][m]f64): *[n][m]f64 = transpose (unitfst (transpose X)) + +-- Bregman generators +def phi (z: f64) : f64 = z * f64.log z - z +def psi [d] (x: [d]f64) : f64 = sum (map ((*x[d-1]) <-< phi <-< (/(f64.epsilon+x[d-1]))) x) + +def embed [n][d][k] (B: [n][k]f64) (U: [k][d]f64) : [n][d]f64 = + let C = tabulate_2d k k (\i j -> (if i == j then 1f64 else 0f64) - f64.from_fraction 1 k) + in (map (map f64.exp) (matmul_f64 (matmul_f64 (unitfstT (copy B)) C) U)) + +entry loss [n][d][k] (X: [n][d]f64) (B: [n][k]f64) (U: [k][d]f64) : f64 = + let Y = embed B U + in sum (map2 (\x y -> - psi y - dot (map2 (-) x y) (vjp psi y 1)) X Y) + +entry dloss [n][d][k] (X: [n][d]f64) (B: [n][k]f64) (U: [k][d]f64) : ([n][k]f64, [k][d]f64) = vjp (uncurry (loss X)) (B, U) 1 + +-- Approximate quantiles +def quantile [n] (q: f64) (xs: [n]f64) : f64 = + let (p, _, _) = loop (_, xs, idx) = (0/0, xs, f64.to_i64 (q*f64.from_fraction n 1)) while length xs > 1 do + let pivot = head xs + let (left, right) = partition (<=pivot) (tail xs) + let m = 1+length left + in if m > idx + then (pivot, left, idx) + else (pivot, right, idx - m) + in p + +-- Optimiser +def rmsprop [n] (iters: i32) (beta: f64) (par: [n]f64) (df: [n]f64 -> [n]f64) : [n]f64 = + let step (i, par, s) = + let d = df par + let s' = map2 (\s x -> beta * s + (1-beta)*x**2) s d + let par' = map3 (\x s d -> x - 1e-3 * d / (f64.epsilon + f64.sqrt s)) par s' d + in (i-1, par', s') + let (_, par, _) = iterate_until (\(i,_,_) -> i <= 0) step (iters, par, replicate n 0) + in par + +def pca [n][d] (k: i64) (X: [n][d]f64) : ([n][k]f64, [k][d]f64, f64) = + let mX = sum (flatten X) + let X' = map (map (/ mX)) X + let fp x = x - f64.floor x + let B = tabulate_2d n k (\i j -> if j == k-1 then 1 else 1e-3 * (fp (0.5 + 1.3247 * f64.from_fraction (i * k + j) 1) - 0.5)) + let U = tabulate_2d k d (\i j -> 1e-3 * (fp (0.5 + 1.7548 * f64.from_fraction (i * k + j) 1) - 0.5)) + let pack [n][d][k] (B: [n][k]f64) (U: [k][d]f64) : []f64 = flatten B ++ flatten U + let unpack xs = + let (a, b) = split (n*k) xs + in (unflatten n k a, unflatten k d b) + let df [m] (par: [m]f64) : [m]f64 = (uncurry pack <-< uncurry (dloss X') <-< unpack) par :> [m]f64 + let init = flatten B ++ flatten U + let par = rmsprop 10000 0.999 init df + let (Bf, Uf ) = unpack par + in (Bf, Uf, loss X' Bf Uf) + +entry pcaWithQuantile [n][d] (q: f64) (k: i64) (X: [n][d]f64) : ([n][k]f64, [d][k]f64, [n][d]f64, f64) = + let qs = map (quantile q) X + let Y = tabulate_2d n (d+1) (\i j -> if j == d then qs[i] else X[i][j]) + let (B, U, l) = pca (1+k) Y + let Y' = embed B U + in (map (\x -> x[0:k]) B, (transpose U[0:k])[0:d], map (\x -> x[0:d]) Y', l) -- cgit v1.2.3