From f85676749888e99fec419236d4823fe6250eddd1 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Sat, 10 Aug 2013 08:36:48 +1000 Subject: Initial import --- frand.c | 17 + linalg.c | 1189 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ linalg.h | 108 ++++++ mkfile | 16 + mkfile.p9p | 10 + 5 files changed, 1340 insertions(+) create mode 100644 frand.c create mode 100644 linalg.c create mode 100644 linalg.h create mode 100644 mkfile create mode 100644 mkfile.p9p diff --git a/frand.c b/frand.c new file mode 100644 index 0000000..e13e1d8 --- /dev/null +++ b/frand.c @@ -0,0 +1,17 @@ +#include +#include + +#define MASK 0x7fffffffL +#define NORM (1.0/(1.0+MASK)) + +double +frand(void) +{ + double x; + + do { + x = lrand() * NORM; + x = (x + lrand()) * NORM; + } while(x >= 1); + return x; +} diff --git a/linalg.c b/linalg.c new file mode 100644 index 0000000..5dfbba4 --- /dev/null +++ b/linalg.c @@ -0,0 +1,1189 @@ +#include +#include +#include + +const float meps = 5.96e-8; +//const double meps = 1.11e-16; + + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define sign(x) (((x) >= 0) - ((x) < 0)) + +uint MAXITERS = 1000000; + +static void * +emalloc(ulong size){ + void *v; + + v = malloc(size); + if(v == nil) + sysfatal("emalloc: allocating %ld: %r", size); + + return v; +} + +static void* +erealloc(void *v, ulong sz) +{ + v = realloc(v, sz); + if(v == nil) + sysfatal("erealloc: allocating %ld: %r", sz); + return v; +} + +static void * +cmalloc(void *p, ulong sz) +{ + if(p != nil) + return p; + p = emalloc(sz); + memset(p, 0, sz); + return p; +} + + +ushort +undilate2(uint x) +{ + x = (x | (x >> 1)) & 0x33333333; + x = (x | (x >> 2)) & 0x0F0F0F0F; + x = (x | (x >> 4)) & 0x00FF00FF; + x = (x | (x >> 8)) & 0x0000FFFF; + return x; +} + +uint +dilate2(ushort x) +{ + uint r = x; + r = (r | (r << 8)) & 0x00FF00FF; + r = (r | (r << 4)) & 0x0F0F0F0F; + r = (r | (r << 2)) & 0x33333333; + r = (r | (r << 1)) & 0x55555555; + return r; +} + +/*ushort +undilate3(uint x) +{ + x = (x * 0x00015) & 0x0E070381; + x = (x * 0x01041) & 0x0FF80001; + x = (x * 0x40001) & 0x0FFC0000; + return (ushort)(x >> 18); +} + +uint +dilate3(ushort x) +{ + uint r = x; + r = (r * 0x10001) & 0xFF0000FF; + r = (r * 0x00101) & 0x0F00F00F; + r = (r * 0x00011) & 0xC30C30C3; + r = (r * 0x00005) & 0x49249249; + return r; +}*/ + +void +fblock(block *p) +{ + free(p->data); + free(p); +} + +block * +zblock(block *p) +{ + memset(p->data, 0, p->sz); + return p; +} + +static void +growblock(block *p, ulong sz) +{ + if(p->data == nil) + p->allocated = 1; + if(p->sz < sz && p->allocated){ + p->data = erealloc(p->data, sz); + p->sz = sz; + } +} + +block * +cblock(block *dst, block *src) +{ + growblock(dst, src->sz); + memcpy(dst->data, src->data, src->sz); + return dst; +} + +vector* +nvector(vector *p, real *data, uint l) +{ + ulong sz; + p = cmalloc(p, sizeof *p); + p->len = l; + + if(data == nil){ + sz = l * sizeof(real); + growblock(p, sz); + return p; + } + + p->data = data; + return p; +} + +matrix* +nmatrix(matrix *p, real *data, ushort r, ushort c) +{ + uint len; + p = cmalloc(p, sizeof *p); + p->nr = r; + p->nc = c; + p->mr = dilate2(r); + p->mc = dilate2(c) << 1; + assert(r == undilate2(p->mr) && c == undilate2(p->mc >> 1)); + + if(r > 0 && c > 0){ + len = (dilate2(r - 1) | (dilate2(c - 1) << 1)) + 1; + p->offset = 3 * pow(4, p->levels = ceil(log(len) / log(4))); + nvector(p, data, len); + } + return p; +} + +cube * +ncube(cube *p, real *data, ushort r, ushort c, ushort s) +{ + p = cmalloc(p, sizeof *p); + p->nr = r; + p->nc = c; + p->ns = s; + p->mr = dilate2(r); + p->mc = dilate2(c) << 1; + p->stride = (dilate2(r - 1) | (dilate2(c - 1) << 1)) + 1; + + assert(r == undilate2(p->mr) && c == undilate2(p->mc >> 1)); + + if(r > 0 && c > 0 && s > 0) + nvector(p, data, s * p->stride); + return p; +} + +/* Quadtree matrix multiplication */ +#define NW(i) ((i) * 4 + 0) +#define SW(i) ((i) * 4 + 1) +#define NE(i) ((i) * 4 + 2) +#define SE(i) ((i) * 4 + 3) +#define MAX(a, b) ((a > b) ? (a) : (b)) + +void +_qdot(matrix *a, matrix *b, matrix *c, uint i, uint j, uint k, int mode){ + if(i >= a->offset){ + i -= a->offset; + j -= a->offset; + k -= a->offset; + if(mbndok(a, i) && mbndok(b, j)) + V(c, k) += V(a, i) * V(b, j); + return; + } + switch(mode){ + case 0: + _qdot(a, b, c, NW(i), NW(j), NW(k), mode); + _qdot(a, b, c, NE(i), SW(j), NW(k), mode); + _qdot(a, b, c, NW(i), NE(j), NE(k), mode); + _qdot(a, b, c, NE(i), SE(j), NE(k), mode); + _qdot(a, b, c, SW(i), NW(j), SW(k), mode); + _qdot(a, b, c, SE(i), SW(j), SW(k), mode); + _qdot(a, b, c, SW(i), NE(j), SE(k), mode); + _qdot(a, b, c, SE(i), SE(j), SE(k), mode); + break; + case 1: + _qdot(a, b, c, NW(i), NW(j), NW(k), mode); + _qdot(a, b, c, SW(i), SW(j), NW(k), mode); + _qdot(a, b, c, NW(i), NE(j), NE(k), mode); + _qdot(a, b, c, SW(i), SE(j), NE(k), mode); + _qdot(a, b, c, NE(i), NW(j), SW(k), mode); + _qdot(a, b, c, SE(i), SW(j), SW(k), mode); + _qdot(a, b, c, NE(i), NE(j), SE(k), mode); + _qdot(a, b, c, SE(i), SE(j), SE(k), mode); + break; + case 2: + _qdot(a, b, c, NW(i), NW(j), NW(k), mode); + _qdot(a, b, c, NE(i), NE(j), NW(k), mode); + _qdot(a, b, c, NW(i), SW(j), NE(k), mode); + _qdot(a, b, c, NE(i), SE(j), NE(k), mode); + _qdot(a, b, c, SW(i), NW(j), SW(k), mode); + _qdot(a, b, c, SE(i), NE(j), SW(k), mode); + _qdot(a, b, c, SW(i), SW(j), SE(k), mode); + _qdot(a, b, c, SE(i), SE(j), SE(k), mode); + break; + case 3: + _qdot(a, b, c, NW(i), NW(j), NW(k), mode); + _qdot(a, b, c, SW(i), NE(j), NW(k), mode); + _qdot(a, b, c, NW(i), SW(j), NE(k), mode); + _qdot(a, b, c, SW(i), SE(j), NE(k), mode); + _qdot(a, b, c, NE(i), NW(j), SW(k), mode); + _qdot(a, b, c, SE(i), NE(j), SW(k), mode); + _qdot(a, b, c, NE(i), SW(j), SE(k), mode); + _qdot(a, b, c, SE(i), SE(j), SE(k), mode); + break; + default: + abort(); + } +} + +matrix * +qdot(matrix *a, matrix *b, matrix *c, int mode) +{ + switch(mode){ + case 0: + assert(a->nc == b->nr); + c = (matrix *)zblock(nmatrix(c, nil, a->nr, b->nc)); + break; + case 1: + assert(a->nr == b->nr); + c = (matrix *)zblock(nmatrix(c, nil, a->nc, b->nc)); + break; + case 2: + assert(a->nc == b->nc); + c = (matrix *)zblock(nmatrix(c, nil, a->nr, b->nr)); + break; + case 3: + assert(a->nr == b->nc); + c = (matrix *)zblock(nmatrix(c, nil, a->nc, b->nr)); + break; + default: + abort(); + } + _qdot(a, b, c, 3, 3, 3, mode); + + return c; + +} + +/* trans specifies transpositions: + 0: no transposition + 1: a transposed + 2: b transposed */ +matrix * +mdot(matrix *a, matrix *b, matrix *c, int trans) +{ + uint i, k; + + switch(trans){ + case 0: + assert(a->nc == b->nr); + c = nmatrix(c, nil, a->nr, b->nc); + zblock(c); + +#pragma omp parallel for schedule(static, CHUNK) private(k) + for(i = 0; i < c->len; i++){ + if(mbndok(c, i)) + for(k = 0; k < a->mc; k = einc2(k)) + c->data[i] += a->data[(i & oddbits) | k] * b->data[(k >> 1) | (i & evenbits)]; + } + break; + case 1: + assert(a->nr == b->nr); + c = nmatrix(c, nil, a->nc, b->nc); + zblock(c); + +#pragma omp parallel for schedule(static, CHUNK) private(k) + for(i = 0; i < c->len; i++){ + if(mbndok(c, i)) + for(k = 0; k < a->mr; k = oinc2(k)) + c->data[i] += a->data[k | ((i & oddbits) << 1)] * b->data[k | (i & evenbits)]; + } + break; + case 2: + assert(a->nc == b->nc); + c = nmatrix(c, nil, a->nr, b->nr); + zblock(c); + +#pragma omp parallel for schedule(static, CHUNK) private(k) + for(i = 0; i < c->len; i++){ + if(mbndok(c, i)) + for(k = 0; k < a->mc; k = einc2(k)) + c->data[i] += a->data[(i & oddbits) | k] * b->data[((i & evenbits) >> 1) | k]; + } + break; + case 3: + assert(a->nr == b->nc); + c = nmatrix(c, nil, a->nc, b->nr); + zblock(c); + +#pragma omp parallel for schedule(static, CHUNK) private(k) + for(i = 0; i < c->len; i++){ + if(mbndok(c, i)) + for(k = 0; k < a->mr; k = oinc2(k)) + c->data[i] += a->data[k | ((i & oddbits) << 1)] * b->data[((i & evenbits) >> 1) | (k << 1)]; + } + break; + default: + sysfatal("mdot: unspecified transposition"); + } + + return c; +} + +vector * +vdot(matrix *a, vector *b, vector *c, int mode) +{ + ushort i, j; + uint id; + + switch(mode){ + case 0: + assert(a->nc == b->len); + c = (vector *)zblock(nvector(c, nil, a->nr)); + miter(a, id){ + i = undilate2(id & oddbits); + j = undilate2((id & evenbits) >> 1); + V(c, i) += a->data[id] * V(b, j); + } + break; + case 1: + assert(a->nr == b->len); + c = (vector *)zblock(nvector(c, nil, a->nc)); + miter(a, id){ + i = undilate2(id & oddbits); + j = undilate2((id & evenbits) >> 1); + V(c, j) += a->data[id] * V(b, i); + } + break; + default: + abort(); + } + + return c; +} + +real +dot(vector *a, vector *b) +{ + int i; + real r; + + assert(a->len == b->len); + + for(r = i = 0; i < a->len; i++) + r += V(a, i) * V(b, i); + + return r; +} + +matrix * +smprod(real a, matrix *b, matrix *c) +{ + uint i; + c = nmatrix(c, nil, b->nr, b->nc); + for(i = 0; i < b->len; i++) + if((i & oddbits) < b->mr && (i & evenbits) < b->mc) + c->data[i] = a * b->data[i]; + return c; +} + +vector * +sprod(real a, vector *in, vector *out) +{ + uint i; + + out = nvector(out, nil, in->len); + for(i = 0; i < in->len; i++) + V(out, i) = V(in, i) * a; + + return out; +} + +matrix * +madd(matrix *a, matrix *b, matrix *c) +{ + uint i; + assert(a->nr == b->nr && a->nc == b->nc); + c = nmatrix(c, nil, a->nr, a->nc); + for(i = 0; i < a->len; i++) + if((i & oddbits) < a->mr && (i & evenbits) < a->mc) + c->data[i] = a->data[i] + b->data[i]; + return c; +} + +vector * +vadd(vector *a, vector *b, vector *c) +{ + uint i; + + assert(a->len == b->len); + c = nvector(c, nil, a->len); + for(i = 0; i < a->len; i++) + V(c, i) = V(a, i) + V(b, i); + return c; +} + +vector * +vprod(vector *a, vector *b, vector *c) +{ + uint i; + + assert(a->len == b->len); + c = nvector(c, nil, a->len); + for(i = 0; i < a->len; i++) + V(c, i) = V(a, i) * V(b, i); + return c; +} + +vector * +vsub(vector *a, vector *b, vector *c) +{ + uint i; + + if(a->len != b->len) + fprint(2, "%ud %ud\n", a->len, b->len); + assert(a->len == b->len); + c = nvector(c, nil, a->len); + for(i = 0; i < a->len; i++) + V(c, i) = V(a, i) - V(b, i); + return c; +} + +/* Sorting */ +#define PARENT(i) (((i) - 1) / 2) +#define CHILD(i) ((i) * 2 + 1) + +static void +swap(real *a, uint *idx, uint i, uint j) +{ + real tmp, utmp; + tmp = a[i]; + a[i] = a[j]; + a[j] = tmp; + utmp = idx[i]; + idx[i] = idx[j]; + idx[j] = utmp; +} + +static void +sdown(real *a, uint *idx, uint top, uint len) +{ + uint u, v; + + v = CHILD(u = top); + if(v >= len) + return; + if(a[v] > a[u]) + u = v; + if(v + 1 < len && a[v + 1] > a[u]) + u = v + 1; + if(u != top){ + swap(a, idx, top, u); + sdown(a, idx, u, len); + } +} + +static uint * +hsort(real *a, uint len) +{ + int i; + uint *idx = emalloc(sizeof(uint) * len); + + for(i = 0; i < len; i++) + idx[i] = i; + + for(i = PARENT(len); i >= 0; i--) + sdown(a, idx, i, len); + for(len--; len > 0; len--){ + swap(a, idx, 0, len); + sdown(a, idx, 0, len); + } + + return idx; +} + +void +vsort(vector *a) +{ + free(hsort(a->data, a->len)); +} + +uint * +ivsort(vector *a) +{ + return hsort(a->data, a->len); +} + +/* Eigen vector decompositions */ +#define SQ(x) ((x) * (x)) + +typedef struct tuple tuple; +struct tuple{ + ushort i, j; + uint id, jd; + real v; +}; + +static void +drotate(matrix *x, uint i, uint j, uint k, uint l, + real p, real s) +{ + real g, h; + g = x->data[i | j]; + h = x->data[k | l]; + x->data[i | j] = g - s * (h + g * p); + x->data[k | l] = h + s * (g - h * p); +} + +/* Jacobi's method */ +matrix * +jacobi(matrix *x, matrix *vec, vector *val) +{ + uint n, i, j, it; + uint id, jd, kd, nd; + real beta, c, t, s, p, u, mval; + uint *m; + tuple max; + + assert(x->nr == x->nc); + n = x->nr; + nd = x->mr; + + vec = nmatrix(vec, nil, n, n); + zblock(vec); + val = nvector(val, nil, n); + for(i = 0, id = 0; i < n; i++, id = oinc2(id)){ + vec->data[id | (id << 1)] = 1; + V(val, i) = x->data[id | (id << 1)]; + } + + + /* Populate max array */ + m = malloc(sizeof(*m) * n); + memset(m, 0, sizeof(*m) * n); + for(i = id = 0; i < n - 1; i++, id = oinc2(id)) + for(j = jd = mval = 0; j < n; j++, jd = einc2(jd)) + if(mval < fabs(V(x, id | jd))){ + mval = fabs(V(x, id | jd)); + m[i] = jd; + } + + for(it = 0; it < MAXITERS; it++){ + max = (tuple){0, 0, 0, 0, 0}; + for(i = id = 0; i < n - 1; i++, id = oinc2(id)) + if(max.v < (u = fabs(x->data[id | m[i]]))) + max = (tuple){i, undilate2(m[i] >> 1), id, m[i], u}; +#ifdef DEBUG + if(it % 1000 == 0) + fprint(2, "jacobi: %d rotations %g max\n", it, max.v); +#endif + + beta = (V(val, max.j) - V(val, max.i)) / + (2 * x->data[max.id | max.jd]); + t = sign(beta) / (fabs(beta) + sqrt(SQ(beta) + 1)); + c = 1 / sqrt(SQ(t) + 1); + s = c * t; + p = s / (1 + c); + + /* Convergence test based on changing eigenvalues */ + if(fabs(t * x->data[max.id | max.jd]) < meps) + break; + + V(val, max.i) -= t * x->data[max.id | max.jd]; + V(val, max.j) += t * x->data[max.id | max.jd]; + x->data[max.id | max.jd] = x->data[(max.id << 1) | (max.jd >> 1)] = 0; + for(kd = 0; kd < max.id; kd = oinc2(kd)) + drotate(x, kd, max.id << 1, kd, max.jd, p, s); + for(kd = oinc2(max.id); kd < (max.jd >> 1); kd = oinc2(kd)) + drotate(x, max.id, kd << 1, kd, max.jd, p, s); + for(kd = einc2(max.jd); kd < (nd << 1); kd = einc2(kd)) + drotate(x, max.id, kd, max.jd >> 1, kd, p, s); + for(kd = 0; kd < nd; kd = oinc2(kd)) + drotate(vec, kd, max.id << 1, kd, max.jd, p, s); + + + /* Update max array */ + for(j = max.i + 1, jd = oinc2(max.id) << 1; j < n; j++, jd = einc2(jd)) + if(fabs(x->data[max.id | m[max.i]]) < fabs(x->data[max.id | jd])) + m[max.i] = jd; + for(j = max.j + 1, jd = einc2(max.jd); j < n; j++, jd = einc2(jd)) + if(fabs(x->data[(max.jd >> 1) | m[max.j]]) < fabs(x->data[(max.jd >> 1) | jd])) + m[max.j] = jd; + } +#ifdef DEBUG + if(it == MAXITERS) + fprint(2, "jacobi: warning: non-convergent after %d iterations\n", it); + else + fprint(2, "jacobi: terminated: %d iterations\n", it); + + fprint(2, "jacobi: eigenvectors calculated\n"); +#endif + + free(m); + return vec; +} + +/* QR method */ +matrix * +eigen(matrix *in, matrix *vec, vector *val) +{ + uint i, j, cnt; + matrix *x, *q, *r; + + assert(in->nr == in->nc); + + vec = (matrix *)zblock(nmatrix(vec, nil, in->nr, in->nr)); + for(i = 0; i < vec->nr; i++) + M(vec, i, i) = 1; + + q = nmatrix(nil, nil, 0, 0); + r = nmatrix(nil, nil, 0, 0); + x = nmatrix(nil, nil, in->nr, in->nc); + val = (vector *) zblock(nvector(val, nil, x->nr)); + cblock(x, in); + for(i = 0; i < MAXITERS; i++){ + qr(x, q, r); + mdot(r, q, x, 0); + mdot(vec, q, r, 0); + cblock(vec, r); + + for(j = cnt = 0; j < x->nr; j++){ + if(fabs(V(val, j) - M(x, j, j)) < meps) + cnt++; + V(val, j) = M(x, j, j); + } + + /* Convergence check */ + if(cnt >= x->nr) + break; + } + + + fblock(q); + fblock(x); + fblock(r); + return vec; +} + +real +l2(vector *a) +{ + real sum; + uint i; + for(sum = i = 0; i < a->len; i++) + sum = hypot(sum, V(a, i)); + return sum; +} + +void +vnorm(uint nargs, ...) +{ + real sum; + uint i, j; + va_list args; + vector *a; + + sum = 0; + va_start(args, nargs); + for(i = 0; i < nargs; i++){ + a = va_arg(args, vector *); + for(j = 0; j < a->len; j++) + sum = hypot(sum, V(a, j)); + } + va_end(args); + if(fabs(sum) <= meps) + return; + va_start(args, nargs); + for(i = 0; i < nargs; i++){ + a = va_arg(args, vector *); + for(j = 0; j < a->len; j++) + V(a, j) /= sum; + } + va_end(args); +} + +/* Lanczos's algorithm */ +matrix * +tridiag(matrix *A, matrix *v, matrix *T) +{ + vector *w, *tmp, *tmp2, *tmp3, *v1, *v0; + real alpha, beta; + ushort i, j, m; + uint id, jd; + + assert(A->nr == A->nc); + m = A->nr; + T = (matrix *)zblock(nmatrix(T, nil, m, m)); + v = nmatrix(v, nil, m, m); + beta = 0; + w = nvector(nil, nil, m); + tmp = nvector(nil, nil, 0); + tmp2 = nvector(nil, nil, 0); + tmp3 = nvector(nil, nil, 0); + v1 = nvector(nil, nil, m); + v0 = nvector(nil, nil, m); + + for(i = 0; i < m; i++) + V(v1, i) = frand() - 0.5; + vnorm(1, v1); + + for(i = id = 0; i < m; i++, id = oinc2(id)){ + for(j = jd = 0; j < m; j++, jd = oinc2(jd)) + v->data[jd | (id << 1)] = V(v1, j); + if(i > 0){ + T->data[odec2(id) | (id << 1)] = beta; + vsub(vdot(A, v1, tmp, 0), sprod(beta, v0, tmp2), w); + }else + vdot(A, v1, w, 0); + T->data[id | (id << 1)] = alpha = dot(w, v1); + vsub(w, sprod(alpha, v1, tmp3), w); + beta = l2(w); + cblock(v0, v1); + if(i < m - 1){ + T->data[oinc2(id) | (id << 1)] = beta; + sprod(1 / beta, w, v1); + } + } + free(v0); + free(v1); + fblock(tmp); + fblock(tmp2); + fblock(tmp3); + fblock(w); + + return T; +} + +/* Applies simple Nystrom extension to estimate eigenvectors. + Returned vectors are unordered and not othorganalised */ +matrix * +nystrom(matrix *A, matrix *vec, vector *val) +{ + matrix *B, *tmp; + uint i, j, k, io, id; + + B = nmatrix(nil, A->data, A->nr / 2, A->nr / 2); + tmp = nmatrix(nil, nil, 0, 0); + + /* Find eigenvectors of the submatrix */ + eigen(B, vec, val); + + /* Nystrom extension */ + vec = nmatrix(vec, nil, A->nr, vec->nc); + + /* Zero extension */ + for(i = dilate2(vec->nr / 2); i < vec->mr; i = oinc2(i)) + for(j = 0; j < vec->mc; j = einc2(j)) + V(vec, i + j) = 0; + + /* Initial matrix product */ + for(io = ((i = dilate2(vec->nr / 2)) << 1); i < vec->mr; i = oinc2(i), io = einc2(io)) + for(j = 0; j < vec->mc; j = einc2(j)) + for(k = 0; k < (vec->mc >> 1); k = oinc2(k)) + V(vec, i + j) += V(A, k + io) * V(vec, k + j); + + /* Divide by eigenvalues */ + for(i = dilate2(vec->nr / 2); i < vec->mr; i = oinc2(i)) + for(j = k = 0; j < vec->mc; j = einc2(j), k++) + V(vec, i + j) /= V(val, k); + + /* Calculate approximate eigenvalues */ + val = nvector(val, nil, vec->nc); + mdot(A, vec, tmp, 0); + zblock(val); + for(i = id = 0; i < val->len; i++, id = einc2(id)) + for(j = 0; j < tmp->mr; j = oinc2(j)) + V(val, i) += V(tmp, j + id) / tmp->nr; + + fblock(tmp); + free(B); + + return vec; +} + +/* QR decomposition */ +matrix * +qr(matrix *x, matrix *q, matrix *r) +{ + real norm; + int i, j, k; + uint id, jd, kd; + vector *rdiag; + matrix *qr; + + //assert(x->nc == x->nr); + + r = (matrix *)zblock(nmatrix(r, nil, x->nr, x->nc)); + q = (matrix *)zblock(nmatrix(q, nil, x->nr, x->nr)); + qr = nmatrix(nil, nil, x->nr, x->nc); + rdiag = (vector *)zblock(nvector(nil, nil, q->nr)); + cblock(qr, x); + + for(j = jd = 0; j < qr->nc; j++, jd = einc2(jd)){ +#ifdef DEBUG + if(j % 100 == 0) + fprint(2, "qr: householder: %d remaining\n", qr->nc - j); +#endif + for(norm = 0, i = j, id = jd >> 1; i < qr->nr; i++, id = oinc2(id)) + norm = hypot(norm, V(qr, id | jd)); + if(norm == 0) + continue; + if(V(qr, (jd >> 1) | jd) < 0) + norm = -norm; + V(rdiag, j) = -norm; + + for(i = j, id = jd >> 1; i < qr->nr; i++, id = oinc2(id)) + V(qr, id | jd) /= norm; + V(qr, (jd >> 1) | jd) += 1; + +#pragma omp parallel for schedule(static, CHUNK) private(i, id, kd, norm) + for(k = j + 1; k < qr->nc; k++){ + kd = dilate2(k) << 1; + for(norm = 0, i = j, id = jd >> 1; i < qr->nr; i++, id = oinc2(id)) + norm += V(qr, id | jd) * V(qr, id | kd); + norm /= -V(qr, (jd >> 1) | jd); + for(i = j, id = jd >> 1; i < qr->nr; i++, id = oinc2(id)) + V(qr, id | kd) += norm * V(qr, id | jd); + } + } + + for(i = id = 0; i < qr->nr; i++, id = oinc2(id)){ + for(j = i + 1, jd = einc2(id << 1); j < qr->nc; j++, jd = einc2(jd)) + V(r, id | jd) = V(qr, id | jd); + V(r, id | (id << 1)) = V(rdiag, i); + } + + for(j = q->nc - 1, jd = edec2(q->mc); j >= 0; j--, jd = edec2(jd)){ +#ifdef DEBUG + if(j % 100 == 0) + fprint(2, "qr: calculating q: %d remaining\n", j + 1); +#endif + V(q, (jd >> 1) | jd) = 1; +#pragma omp parallel for schedule(static, CHUNK) private(i, id, kd, norm) + for(k = j; k < q->nc; k++){ + kd = dilate2(k) << 1; + if(V(qr, (kd >> 1) | kd) == 0) + continue; + for(norm = 0, i = j, id = jd >> 1; i < q->nr; i++, id = oinc2(id)) + norm += V(qr, id | jd) * V(q, id | kd); + if(fabs(V(qr, (jd >> 1) | jd)) < meps) + norm = 0; + else + norm /= -V(qr, (jd >> 1) | jd); + for(i = j, id = jd >> 1; i < q->nr; i++, id = oinc2(id)) + V(q, id | kd) += norm * V(qr, id | jd); + } + } + +#ifdef DEBUG + fprint(2, "qr: terminated\n"); +#endif + fblock(qr); + fblock(rdiag); + return r; + +} + +static vector * +gsproj(matrix *x, vector *u, uint i, uint j) +{ + uint k, kd; + real mag, proj; + + mag = proj = 0; + for(k = 0; k < x->mr; k = oinc2(k)){ + proj += V(x, i | k) * V(x, j | k); + mag += SQ(V(x, j | k)); + } + u = nvector(u, nil, x->nr); + for(k = kd = 0; k < x->nr; k++, kd = oinc2(kd)) + V(u, k) = V(x, kd | j) * proj / mag; + return u; +} + +matrix * +gs(matrix *x) +{ + uint i, j, k, kd; + vector *u = nil; + real sum; + + for(i = 0; i < x->mc; i = einc2(i)){ + for(j = 0; j < i; j = einc2(j)){ + u = gsproj(x, u, i, j); + for(k = kd = 0; k < x->nr; k++, kd = oinc2(kd)) + V(x, i | kd) -= V(u, k); + } + for(j = sum = 0; j < x->mr; j = oinc2(j)) + sum = hypot(sum, V(x, i | j)); + for(j = 0; j < x->mr; j = oinc2(j)) + V(x, i | j) /= sum; + } + fblock(u); + return x; +} + +real +entropy(matrix *dist) +{ + uint i, j, n; + real gamma; + + assert(dist->nr == dist->nc); + n = dist->nr; + + /* Entropy tuning */ + for(gamma = i = 0; i < dist->mr; i = oinc2(i)) + for(j = einc2(i << 1); j < dist->mc; j = einc2(j)) + gamma += V(dist, i | j); + if(gamma == 0) return 1; + gamma = (real)(SQ(n) - n) / (2 * gamma); + fprint(2, "entropy: γ = %g\n", gamma); + return gamma; +} + +matrix * +mahalanobis(matrix *k, matrix *dist) +{ + uint i, j; + dist = nmatrix(dist, nil, k->nr, k->nr); + for(i = 0; i < dist->mr; i = oinc2(i)) + for(j = einc2(i); j < dist->mc; j = einc2(j)) + V(dist, i | j) = V(dist, tr(i, j)) = V(k, i | (i << 1)) - 2 * V(k, i | j) + V(k, (j >> 1) | j); + return dist; +} + +matrix * +msqrt(matrix *x, matrix *mval) +{ + uint i, cnt; + matrix *vec, *tmp; + vector *val; + + tmp = nmatrix(nil, nil, 0, 0); + vec = nmatrix(nil, nil, 0, 0); + val = nvector(nil, nil, 0); + eigen(x, vec, val); + + mval = (matrix *)zblock(nmatrix(mval, nil, val->len, val->len)); + for(i = cnt = 0; i < val->len; i++) + if(V(val, i) > meps) + M(mval, i, i) = sqrt(V(val, i)); + else{ + if(M(mval, i, i) < -meps) + cnt++; + M(mval, i, i) = 0; + } + if(cnt > 0) + fprint(2, "msqrt: %d negative eigenvectors\n", cnt); + + mdot(vec, mval, tmp, 0); + mdot(tmp, vec, mval, 2); + + fblock(tmp); + fblock(vec); + fblock(val); + return(mval); +} + +matrix * +minv(matrix *x, matrix *mval, real th) +{ + uint i; + matrix *vec, *tmp; + vector *val; + + tmp = nmatrix(nil, nil, 0, 0); + vec = nmatrix(nil, nil, 0, 0); + val = nvector(nil, nil, 0); + eigen(x, vec, val); + + mval = (matrix *)zblock(nmatrix(mval, nil, val->len, val->len)); + for(i = 0; i < val->len; i++){ + if(V(val, i) > th) + M(mval, i, i) = 1 / V(val, i); + else + M(mval, i, i) = 0; + } + + mdot(vec, mval, tmp, 0); + mdot(tmp, vec, mval, 2); + + fblock(tmp); + fblock(vec); + fblock(val); + return(mval); +} + +static vector * +backsub(matrix *a, vector *b, vector *x) +{ + int i, j; + uint id, jd; + real sum; + x = nvector(x, nil, b->len); + for(i = b->len - 1, id = dilate2(b->len - 1); i >= 0; i--, id = odec2(id)){ + sum = 0; + for(j = i + 1, jd = dilate2(i + 1) << 1; j < a->nc; j++, jd = einc2(jd)) + sum += V(a, id | jd) * V(x, j); + if(fabs(V(a, id | (id << 1))) < meps) + V(x, i) = 0; + else + V(x, i) = (V(b, i) - sum) / V(a, id | (id << 1)); + } + return x; +} + +vector * +solve(matrix *A, vector *b, vector *x) +{ + matrix *q, *r; + vector *c; + uint n; + + n = A->nr; + + assert(n == A->nc && + n == b->len); + + + q = nmatrix(nil, nil, 0, 0); + r = nmatrix(nil, nil, 0, 0); + qr(A, q, r); + + c = vdot(q, b, nil, 1); + x = backsub(r, c, x); + + fblock(q); + fblock(r); + fblock(c); + + return x; +} + +/* SVD calculated inefficiently */ +#define SQRT(x) (((x) > 0) ? sqrt(x) : 0) +#define INV(x) (((x) > 0) ? (1/(x)) : 0) +void +svd(matrix *x, matrix *u, vector *s, matrix *v) +{ + uint j, id, jd; + matrix *y = nmatrix(nil, nil, 0, 0); + mdot(x, x, y, 1); + + jacobi(y, v, s); + mdot(x, v, u, 0); + assert(u->nc == s->len); + for(j = 0; j < s->len; j++) + V(s, j) = SQRT(V(s, j)); + for(id = 0; id < u->mr; id = oinc2(id)) + for(j = jd = 0; j < u->nc; j++, jd = einc2(jd)) + V(u, id | jd) *= INV(V(s, j)); + fblock(y); +} + +matrix * +col(matrix *x, matrix *dest, uint c) +{ + uint i; + + dest = nmatrix(dest, nil, x->nr, 1); + c = dilate2(c) << 1; + + for(i = 0; i < x->mr; i = oinc2(i)) + V(dest, i) = V(x, i | c); + return dest; +} + +matrix * +row(matrix *x, matrix *dest, uint r) +{ + uint i; + + dest = nmatrix(dest, nil, 1, x->nc); + r = dilate2(r); + + for(i = 0; i < x->mr; i = einc2(i)) + V(dest, i) = V(x, i | r); + return dest; +} + +vector * +drop(matrix *x, vector *dest) +{ + uint i, id; + assert(x->nc == 1); + dest = nvector(dest, nil, x->nr); + for(i = id = 0; i < x->nr; i++, id = oinc2(id)) + V(dest, i) = V(x, id); + return dest; +} + +matrix * +raise(vector *x, matrix *dest) +{ + uint i, id; + dest = nmatrix(dest, nil, x->len, 1); + for(i = id = 0; i < x->len; i++, id = oinc2(id)) + V(dest, id) = V(x, i); + return dest; +} + +matrix * +trans(matrix *x, matrix *dest) +{ + uint i; + dest = nmatrix(dest, nil, x->nc, x->nr); + for(i = 0; i < x->len; i++) + V(dest, ((i & oddbits) << 1) | ((i & evenbits) >> 1)) = V(x, i); + return dest; +} + +matrix * +slice(cube *x, matrix *dest, uint k) +{ + uint i; + dest = nmatrix(dest, nil, x->nr, x->nc); + for(i = 0; i < x->stride; i++) + V(dest, i) = CV(x, i, k); + return dest; +} + +matrix * +diag(vector *d, matrix *x) +{ + uint i, id; + x = nmatrix(x, nil, d->len, d->len); + for(i = id = 0; i < d->len; i++, id = oinc2(id)) + V(x, id | (id << 1)) = V(d, i); + return x; +} + +vector * +msum(matrix *x, vector *s, uint dim) +{ + uint i; + switch(dim){ + case 0: + s = (vector *)zblock(nvector(s, nil, x->nr)); + miter(x, i) + V(s, undilate2(i & oddbits)) += V(x, i); + break; + case 1: + s = (vector *)zblock(nvector(s, nil, x->nc)); + miter(x, i) + V(s, undilate2((i & evenbits) >> 1)) += V(x, i); + break; + default: + abort(); + } + + return s; +} + +vector * +getdiag(matrix *x, vector *d) +{ + uint i, id; + d = nvector(d, nil, MIN(x->nr, x->nc)); + for(i = id = 0; i < d->len; i++, id = oinc2(id)) + V(d, i) = V(x, id | (id << 1)); + return d; +} diff --git a/linalg.h b/linalg.h new file mode 100644 index 0000000..5613100 --- /dev/null +++ b/linalg.h @@ -0,0 +1,108 @@ +#pragma src "/usr/doc/src/linalg" +#pragma lib "liblinalg.a" + +typedef float real; +extern const float meps; + +/* Cubes, matrics, and Vector data types */ + +/* Morton ordering */ +#define evenbits 0xAAAAAAAA +#define oddbits 0x55555555 +//#define threebits 0x49249249 +#define oinc2(i) (((i) - oddbits) & oddbits) +#define einc2(i) (((i) - evenbits) & evenbits) +#define odec2(i) (((i) - 1) & oddbits) +#define edec2(i) (((i) - 1) & evenbits) +#define tr(i, j) (((i) << 1) | ((j) >> 1)) +ushort undilate2(uint); +uint dilate2(ushort); + +/* Iterators */ +#define miter(m, i) for((i) = 0; (i) < (m)->len; (i)++) if(mbndok((m), (i))) +#define viter(v, i) for((i) = 0; (i) < (v)->len; (i)++) + +typedef struct block block; +typedef struct cube cube; +typedef struct matrix matrix; +typedef struct vector vector; + +#define C(x, i, j, k) ((x)->data[(k) * (x)->stride + (dilate2(i) | (dilate2(j) << 1))]) +#define CV(x, i, k) ((x)->data[(k) * (x)->stride + (i)]) +#define M(x, i, j) ((x)->data[dilate2(i) | (dilate2(j) << 1)]) +#define V(x, i) ((x)->data[(i)]) + +#define mbndok(a, i) (((i) & oddbits) < (a)->mr && ((i) & evenbits) < (a)->mc) + +struct block{ + real *data; + ulong sz; + uchar allocated:1; +}; + +struct vector { + block; + uint len; +}; + +struct matrix{ + vector; + ushort nr, nc; + uint mr, mc; + uint levels, offset; +}; + +struct cube{ + matrix; + ushort ns; + uint stride; +}; + +cube *ncube(cube *, real *, ushort, ushort, ushort); +matrix *nmatrix(matrix *, real *, ushort, ushort); +vector *nvector(vector *, real *, uint); + +void fblock(block *); +block *zblock(block *); +block *cblock(block *, block *); + +matrix *qdot(matrix *, matrix *, matrix *, int); +matrix *mdot(matrix *, matrix *, matrix *, int); +matrix *madd(matrix *, matrix *, matrix *); +matrix *smprod(real, matrix *, matrix *); +real dot(vector *, vector *); +vector *sprod(real, vector *, vector *); +vector *vadd(vector *, vector *, vector *); +vector *vprod(vector *, vector *, vector *); +vector *vsub(vector *, vector *, vector *); +vector *vdot(matrix *, vector *, vector *, int); +void vnorm(uint, ...); + +void vsort(vector *); +uint *ivsort(vector *); + +extern uint MAXITERS; +matrix *eigen(matrix *, matrix *, vector *); +matrix *jacobi(matrix *, matrix *, vector *); +matrix *tridiag(matrix *, matrix *, matrix *); +matrix *nystrom(matrix *, matrix *, vector *); +matrix *qr(matrix *, matrix *, matrix *); +matrix *gs(matrix *); + +real entropy(matrix *); +matrix *mahalanobis(matrix *, matrix *); +matrix *msqrt(matrix *, matrix *); +matrix *minv(matrix *, matrix *, real); +vector *solve(matrix *, vector *, vector *); + +void svd(matrix *, matrix *, vector *, matrix *); + +matrix *col(matrix *, matrix *, uint); +matrix *row(matrix *, matrix *, uint); +matrix *slice(cube *, matrix *, uint); +vector *drop(matrix *, vector *); +matrix *raise(vector *, matrix *); +matrix *trans(matrix *, matrix *); +matrix *diag(vector *, matrix *); +vector *msum(matrix *, vector *, uint); +vector *getdiag(matrix *, vector *); diff --git a/mkfile b/mkfile new file mode 100644 index 0000000..37d3c3e --- /dev/null +++ b/mkfile @@ -0,0 +1,16 @@ +