summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2013-08-10 08:36:48 +1000
committerJustin Bedo <cu@cua0.org>2013-08-10 08:36:48 +1000
commitf85676749888e99fec419236d4823fe6250eddd1 (patch)
tree3bae597f74c8fb73860dc6448429cbaf719fbbb1
Initial importHEADmaster
-rw-r--r--frand.c17
-rw-r--r--linalg.c1189
-rw-r--r--linalg.h108
-rw-r--r--mkfile16
-rw-r--r--mkfile.p9p10
5 files changed, 1340 insertions, 0 deletions
diff --git a/frand.c b/frand.c
new file mode 100644
index 0000000..e13e1d8
--- /dev/null
+++ b/frand.c
@@ -0,0 +1,17 @@
+#include <u.h>
+#include <libc.h>
+
+#define MASK 0x7fffffffL
+#define NORM (1.0/(1.0+MASK))
+
+double
+frand(void)
+{
+ double x;
+
+ do {
+ x = lrand() * NORM;
+ x = (x + lrand()) * NORM;
+ } while(x >= 1);
+ return x;
+}
diff --git a/linalg.c b/linalg.c
new file mode 100644
index 0000000..5dfbba4
--- /dev/null
+++ b/linalg.c
@@ -0,0 +1,1189 @@
+#include<u.h>
+#include<libc.h>
+#include<linalg.h>
+
+const float meps = 5.96e-8;
+//const double meps = 1.11e-16;
+
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define sign(x) (((x) >= 0) - ((x) < 0))
+
+uint MAXITERS = 1000000;
+
+static void *
+emalloc(ulong size){
+ void *v;
+
+ v = malloc(size);
+ if(v == nil)
+ sysfatal("emalloc: allocating %ld: %r", size);
+
+ return v;
+}
+
+static void*
+erealloc(void *v, ulong sz)
+{
+ v = realloc(v, sz);
+ if(v == nil)
+ sysfatal("erealloc: allocating %ld: %r", sz);
+ return v;
+}
+
+static void *
+cmalloc(void *p, ulong sz)
+{
+ if(p != nil)
+ return p;
+ p = emalloc(sz);
+ memset(p, 0, sz);
+ return p;
+}
+
+
+ushort
+undilate2(uint x)
+{
+ x = (x | (x >> 1)) & 0x33333333;
+ x = (x | (x >> 2)) & 0x0F0F0F0F;
+ x = (x | (x >> 4)) & 0x00FF00FF;
+ x = (x | (x >> 8)) & 0x0000FFFF;
+ return x;
+}
+
+uint
+dilate2(ushort x)
+{
+ uint r = x;
+ r = (r | (r << 8)) & 0x00FF00FF;
+ r = (r | (r << 4)) & 0x0F0F0F0F;
+ r = (r | (r << 2)) & 0x33333333;
+ r = (r | (r << 1)) & 0x55555555;
+ return r;
+}
+
+/*ushort
+undilate3(uint x)
+{
+ x = (x * 0x00015) & 0x0E070381;
+ x = (x * 0x01041) & 0x0FF80001;
+ x = (x * 0x40001) & 0x0FFC0000;
+ return (ushort)(x >> 18);
+}
+
+uint
+dilate3(ushort x)
+{
+ uint r = x;
+ r = (r * 0x10001) & 0xFF0000FF;
+ r = (r * 0x00101) & 0x0F00F00F;
+ r = (r * 0x00011) & 0xC30C30C3;
+ r = (r * 0x00005) & 0x49249249;
+ return r;
+}*/
+
+void
+fblock(block *p)
+{
+ free(p->data);
+ free(p);
+}
+
+block *
+zblock(block *p)
+{
+ memset(p->data, 0, p->sz);
+ return p;
+}
+
+static void
+growblock(block *p, ulong sz)
+{
+ if(p->data == nil)
+ p->allocated = 1;
+ if(p->sz < sz && p->allocated){
+ p->data = erealloc(p->data, sz);
+ p->sz = sz;
+ }
+}
+
+block *
+cblock(block *dst, block *src)
+{
+ growblock(dst, src->sz);
+ memcpy(dst->data, src->data, src->sz);
+ return dst;
+}
+
+vector*
+nvector(vector *p, real *data, uint l)
+{
+ ulong sz;
+ p = cmalloc(p, sizeof *p);
+ p->len = l;
+
+ if(data == nil){
+ sz = l * sizeof(real);
+ growblock(p, sz);
+ return p;
+ }
+
+ p->data = data;
+ return p;
+}
+
+matrix*
+nmatrix(matrix *p, real *data, ushort r, ushort c)
+{
+ uint len;
+ p = cmalloc(p, sizeof *p);
+ p->nr = r;
+ p->nc = c;
+ p->mr = dilate2(r);
+ p->mc = dilate2(c) << 1;
+ assert(r == undilate2(p->mr) && c == undilate2(p->mc >> 1));
+
+ if(r > 0 && c > 0){
+ len = (dilate2(r - 1) | (dilate2(c - 1) << 1)) + 1;
+ p->offset = 3 * pow(4, p->levels = ceil(log(len) / log(4)));
+ nvector(p, data, len);
+ }
+ return p;
+}
+
+cube *
+ncube(cube *p, real *data, ushort r, ushort c, ushort s)
+{
+ p = cmalloc(p, sizeof *p);
+ p->nr = r;
+ p->nc = c;
+ p->ns = s;
+ p->mr = dilate2(r);
+ p->mc = dilate2(c) << 1;
+ p->stride = (dilate2(r - 1) | (dilate2(c - 1) << 1)) + 1;
+
+ assert(r == undilate2(p->mr) && c == undilate2(p->mc >> 1));
+
+ if(r > 0 && c > 0 && s > 0)
+ nvector(p, data, s * p->stride);
+ return p;
+}
+
+/* Quadtree matrix multiplication */
+#define NW(i) ((i) * 4 + 0)
+#define SW(i) ((i) * 4 + 1)
+#define NE(i) ((i) * 4 + 2)
+#define SE(i) ((i) * 4 + 3)
+#define MAX(a, b) ((a > b) ? (a) : (b))
+
+void
+_qdot(matrix *a, matrix *b, matrix *c, uint i, uint j, uint k, int mode){
+ if(i >= a->offset){
+ i -= a->offset;
+ j -= a->offset;
+ k -= a->offset;
+ if(mbndok(a, i) && mbndok(b, j))
+ V(c, k) += V(a, i) * V(b, j);
+ return;
+ }
+ switch(mode){
+ case 0:
+ _qdot(a, b, c, NW(i), NW(j), NW(k), mode);
+ _qdot(a, b, c, NE(i), SW(j), NW(k), mode);
+ _qdot(a, b, c, NW(i), NE(j), NE(k), mode);
+ _qdot(a, b, c, NE(i), SE(j), NE(k), mode);
+ _qdot(a, b, c, SW(i), NW(j), SW(k), mode);
+ _qdot(a, b, c, SE(i), SW(j), SW(k), mode);
+ _qdot(a, b, c, SW(i), NE(j), SE(k), mode);
+ _qdot(a, b, c, SE(i), SE(j), SE(k), mode);
+ break;
+ case 1:
+ _qdot(a, b, c, NW(i), NW(j), NW(k), mode);
+ _qdot(a, b, c, SW(i), SW(j), NW(k), mode);
+ _qdot(a, b, c, NW(i), NE(j), NE(k), mode);
+ _qdot(a, b, c, SW(i), SE(j), NE(k), mode);
+ _qdot(a, b, c, NE(i), NW(j), SW(k), mode);
+ _qdot(a, b, c, SE(i), SW(j), SW(k), mode);
+ _qdot(a, b, c, NE(i), NE(j), SE(k), mode);
+ _qdot(a, b, c, SE(i), SE(j), SE(k), mode);
+ break;
+ case 2:
+ _qdot(a, b, c, NW(i), NW(j), NW(k), mode);
+ _qdot(a, b, c, NE(i), NE(j), NW(k), mode);
+ _qdot(a, b, c, NW(i), SW(j), NE(k), mode);
+ _qdot(a, b, c, NE(i), SE(j), NE(k), mode);
+ _qdot(a, b, c, SW(i), NW(j), SW(k), mode);
+ _qdot(a, b, c, SE(i), NE(j), SW(k), mode);
+ _qdot(a, b, c, SW(i), SW(j), SE(k), mode);
+ _qdot(a, b, c, SE(i), SE(j), SE(k), mode);
+ break;
+ case 3:
+ _qdot(a, b, c, NW(i), NW(j), NW(k), mode);
+ _qdot(a, b, c, SW(i), NE(j), NW(k), mode);
+ _qdot(a, b, c, NW(i), SW(j), NE(k), mode);
+ _qdot(a, b, c, SW(i), SE(j), NE(k), mode);
+ _qdot(a, b, c, NE(i), NW(j), SW(k), mode);
+ _qdot(a, b, c, SE(i), NE(j), SW(k), mode);
+ _qdot(a, b, c, NE(i), SW(j), SE(k), mode);
+ _qdot(a, b, c, SE(i), SE(j), SE(k), mode);
+ break;
+ default:
+ abort();
+ }
+}
+
+matrix *
+qdot(matrix *a, matrix *b, matrix *c, int mode)
+{
+ switch(mode){
+ case 0:
+ assert(a->nc == b->nr);
+ c = (matrix *)zblock(nmatrix(c, nil, a->nr, b->nc));
+ break;
+ case 1:
+ assert(a->nr == b->nr);
+ c = (matrix *)zblock(nmatrix(c, nil, a->nc, b->nc));
+ break;
+ case 2:
+ assert(a->nc == b->nc);
+ c = (matrix *)zblock(nmatrix(c, nil, a->nr, b->nr));
+ break;
+ case 3:
+ assert(a->nr == b->nc);
+ c = (matrix *)zblock(nmatrix(c, nil, a->nc, b->nr));
+ break;
+ default:
+ abort();
+ }
+ _qdot(a, b, c, 3, 3, 3, mode);
+
+ return c;
+
+}
+
+/* trans specifies transpositions:
+ 0: no transposition
+ 1: a transposed
+ 2: b transposed */
+matrix *
+mdot(matrix *a, matrix *b, matrix *c, int trans)
+{
+ uint i, k;
+
+ switch(trans){
+ case 0:
+ assert(a->nc == b->nr);
+ c = nmatrix(c, nil, a->nr, b->nc);
+ zblock(c);
+
+#pragma omp parallel for schedule(static, CHUNK) private(k)
+ for(i = 0; i < c->len; i++){
+ if(mbndok(c, i))
+ for(k = 0; k < a->mc; k = einc2(k))
+ c->data[i] += a->data[(i & oddbits) | k] * b->data[(k >> 1) | (i & evenbits)];
+ }
+ break;
+ case 1:
+ assert(a->nr == b->nr);
+ c = nmatrix(c, nil, a->nc, b->nc);
+ zblock(c);
+
+#pragma omp parallel for schedule(static, CHUNK) private(k)
+ for(i = 0; i < c->len; i++){
+ if(mbndok(c, i))
+ for(k = 0; k < a->mr; k = oinc2(k))
+ c->data[i] += a->data[k | ((i & oddbits) << 1)] * b->data[k | (i & evenbits)];
+ }
+ break;
+ case 2:
+ assert(a->nc == b->nc);
+ c = nmatrix(c, nil, a->nr, b->nr);
+ zblock(c);
+
+#pragma omp parallel for schedule(static, CHUNK) private(k)
+ for(i = 0; i < c->len; i++){
+ if(mbndok(c, i))
+ for(k = 0; k < a->mc; k = einc2(k))
+ c->data[i] += a->data[(i & oddbits) | k] * b->data[((i & evenbits) >> 1) | k];
+ }
+ break;
+ case 3:
+ assert(a->nr == b->nc);
+ c = nmatrix(c, nil, a->nc, b->nr);
+ zblock(c);
+
+#pragma omp parallel for schedule(static, CHUNK) private(k)
+ for(i = 0; i < c->len; i++){
+ if(mbndok(c, i))
+ for(k = 0; k < a->mr; k = oinc2(k))
+ c->data[i] += a->data[k | ((i & oddbits) << 1)] * b->data[((i & evenbits) >> 1) | (k << 1)];
+ }
+ break;
+ default:
+ sysfatal("mdot: unspecified transposition");
+ }
+
+ return c;
+}
+
+vector *
+vdot(matrix *a, vector *b, vector *c, int mode)
+{
+ ushort i, j;
+ uint id;
+
+ switch(mode){
+ case 0:
+ assert(a->nc == b->len);
+ c = (vector *)zblock(nvector(c, nil, a->nr));
+ miter(a, id){
+ i = undilate2(id & oddbits);
+ j = undilate2((id & evenbits) >> 1);
+ V(c, i) += a->data[id] * V(b, j);
+ }
+ break;
+ case 1:
+ assert(a->nr == b->len);
+ c = (vector *)zblock(nvector(c, nil, a->nc));
+ miter(a, id){
+ i = undilate2(id & oddbits);
+ j = undilate2((id & evenbits) >> 1);
+ V(c, j) += a->data[id] * V(b, i);
+ }
+ break;
+ default:
+ abort();
+ }
+
+ return c;
+}
+
+real
+dot(vector *a, vector *b)
+{
+ int i;
+ real r;
+
+ assert(a->len == b->len);
+
+ for(r = i = 0; i < a->len; i++)
+ r += V(a, i) * V(b, i);
+
+ return r;
+}
+
+matrix *
+smprod(real a, matrix *b, matrix *c)
+{
+ uint i;
+ c = nmatrix(c, nil, b->nr, b->nc);
+ for(i = 0; i < b->len; i++)
+ if((i & oddbits) < b->mr && (i & evenbits) < b->mc)
+ c->data[i] = a * b->data[i];
+ return c;
+}
+
+vector *
+sprod(real a, vector *in, vector *out)
+{
+ uint i;
+
+ out = nvector(out, nil, in->len);
+ for(i = 0; i < in->len; i++)
+ V(out, i) = V(in, i) * a;
+
+ return out;
+}
+
+matrix *
+madd(matrix *a, matrix *b, matrix *c)
+{
+ uint i;
+ assert(a->nr == b->nr && a->nc == b->nc);
+ c = nmatrix(c, nil, a->nr, a->nc);
+ for(i = 0; i < a->len; i++)
+ if((i & oddbits) < a->mr && (i & evenbits) < a->mc)
+ c->data[i] = a->data[i] + b->data[i];
+ return c;
+}
+
+vector *
+vadd(vector *a, vector *b, vector *c)
+{
+ uint i;
+
+ assert(a->len == b->len);
+ c = nvector(c, nil, a->len);
+ for(i = 0; i < a->len; i++)
+ V(c, i) = V(a, i) + V(b, i);
+ return c;
+}
+
+vector *
+vprod(vector *a, vector *b, vector *c)
+{
+ uint i;
+
+ assert(a->len == b->len);
+ c = nvector(c, nil, a->len);
+ for(i = 0; i < a->len; i++)
+ V(c, i) = V(a, i) * V(b, i);
+ return c;
+}
+
+vector *
+vsub(vector *a, vector *b, vector *c)
+{
+ uint i;
+
+ if(a->len != b->len)
+ fprint(2, "%ud %ud\n", a->len, b->len);
+ assert(a->len == b->len);
+ c = nvector(c, nil, a->len);
+ for(i = 0; i < a->len; i++)
+ V(c, i) = V(a, i) - V(b, i);
+ return c;
+}
+
+/* Sorting */
+#define PARENT(i) (((i) - 1) / 2)
+#define CHILD(i) ((i) * 2 + 1)
+
+static void
+swap(real *a, uint *idx, uint i, uint j)
+{
+ real tmp, utmp;
+ tmp = a[i];
+ a[i] = a[j];
+ a[j] = tmp;
+ utmp = idx[i];
+ idx[i] = idx[j];
+ idx[j] = utmp;
+}
+
+static void
+sdown(real *a, uint *idx, uint top, uint len)
+{
+ uint u, v;
+
+ v = CHILD(u = top);
+ if(v >= len)
+ return;
+ if(a[v] > a[u])
+ u = v;
+ if(v + 1 < len && a[v + 1] > a[u])
+ u = v + 1;
+ if(u != top){
+ swap(a, idx, top, u);
+ sdown(a, idx, u, len);
+ }
+}
+
+static uint *
+hsort(real *a, uint len)
+{
+ int i;
+ uint *idx = emalloc(sizeof(uint) * len);
+
+ for(i = 0; i < len; i++)
+ idx[i] = i;
+
+ for(i = PARENT(len); i >= 0; i--)
+ sdown(a, idx, i, len);
+ for(len--; len > 0; len--){
+ swap(a, idx, 0, len);
+ sdown(a, idx, 0, len);
+ }
+
+ return idx;
+}
+
+void
+vsort(vector *a)
+{
+ free(hsort(a->data, a->len));
+}
+
+uint *
+ivsort(vector *a)
+{
+ return hsort(a->data, a->len);
+}
+
+/* Eigen vector decompositions */
+#define SQ(x) ((x) * (x))
+
+typedef struct tuple tuple;
+struct tuple{
+ ushort i, j;
+ uint id, jd;
+ real v;
+};
+
+static void
+drotate(matrix *x, uint i, uint j, uint k, uint l,
+ real p, real s)
+{
+ real g, h;
+ g = x->data[i | j];
+ h = x->data[k | l];
+ x->data[i | j] = g - s * (h + g * p);
+ x->data[k | l] = h + s * (g - h * p);
+}
+
+/* Jacobi's method */
+matrix *
+jacobi(matrix *x, matrix *vec, vector *val)
+{
+ uint n, i, j, it;
+ uint id, jd, kd, nd;
+ real beta, c, t, s, p, u, mval;
+ uint *m;
+ tuple max;
+
+ assert(x->nr == x->nc);
+ n = x->nr;
+ nd = x->mr;
+
+ vec = nmatrix(vec, nil, n, n);
+ zblock(vec);
+ val = nvector(val, nil, n);
+ for(i = 0, id = 0; i < n; i++, id = oinc2(id)){
+ vec->data[id | (id << 1)] = 1;
+ V(val, i) = x->data[id | (id << 1)];
+ }
+
+
+ /* Populate max array */
+ m = malloc(sizeof(*m) * n);
+ memset(m, 0, sizeof(*m) * n);
+ for(i = id = 0; i < n - 1; i++, id = oinc2(id))
+ for(j = jd = mval = 0; j < n; j++, jd = einc2(jd))
+ if(mval < fabs(V(x, id | jd))){
+ mval = fabs(V(x, id | jd));
+ m[i] = jd;
+ }
+
+ for(it = 0; it < MAXITERS; it++){
+ max = (tuple){0, 0, 0, 0, 0};
+ for(i = id = 0; i < n - 1; i++, id = oinc2(id))
+ if(max.v < (u = fabs(x->data[id | m[i]])))
+ max = (tuple){i, undilate2(m[i] >> 1), id, m[i], u};
+#ifdef DEBUG
+ if(it % 1000 == 0)
+ fprint(2, "jacobi: %d rotations %g max\n", it, max.v);
+#endif
+
+ beta = (V(val, max.j) - V(val, max.i)) /
+ (2 * x->data[max.id | max.jd]);
+ t = sign(beta) / (fabs(beta) + sqrt(SQ(beta) + 1));
+ c = 1 / sqrt(SQ(t) + 1);
+ s = c * t;
+ p = s / (1 + c);
+
+ /* Convergence test based on changing eigenvalues */
+ if(fabs(t * x->data[max.id | max.jd]) < meps)
+ break;
+
+ V(val, max.i) -= t * x->data[max.id | max.jd];
+ V(val, max.j) += t * x->data[max.id | max.jd];
+ x->data[max.id | max.jd] = x->data[(max.id << 1) | (max.jd >> 1)] = 0;
+ for(kd = 0; kd < max.id; kd = oinc2(kd))
+ drotate(x, kd, max.id << 1, kd, max.jd, p, s);
+ for(kd = oinc2(max.id); kd < (max.jd >> 1); kd = oinc2(kd))
+ drotate(x, max.id, kd << 1, kd, max.jd, p, s);
+ for(kd = einc2(max.jd); kd < (nd << 1); kd = einc2(kd))
+ drotate(x, max.id, kd, max.jd >> 1, kd, p, s);
+ for(kd = 0; kd < nd; kd = oinc2(kd))
+ drotate(vec, kd, max.id << 1, kd, max.jd, p, s);
+
+
+ /* Update max array */
+ for(j = max.i + 1, jd = oinc2(max.id) << 1; j < n; j++, jd = einc2(jd))
+ if(fabs(x->data[max.id | m[max.i]]) < fabs(x->data[max.id | jd]))
+ m[max.i] = jd;
+ for(j = max.j + 1, jd = einc2(max.jd); j < n; j++, jd = einc2(jd))
+ if(fabs(x->data[(max.jd >> 1) | m[max.j]]) < fabs(x->data[(max.jd >> 1) | jd]))
+ m[max.j] = jd;
+ }
+#ifdef DEBUG
+ if(it == MAXITERS)
+ fprint(2, "jacobi: warning: non-convergent after %d iterations\n", it);
+ else
+ fprint(2, "jacobi: terminated: %d iterations\n", it);
+
+ fprint(2, "jacobi: eigenvectors calculated\n");
+#endif
+
+ free(m);
+ return vec;
+}
+
+/* QR method */
+matrix *
+eigen(matrix *in, matrix *vec, vector *val)
+{
+ uint i, j, cnt;
+ matrix *x, *q, *r;
+
+ assert(in->nr == in->nc);
+
+ vec = (matrix *)zblock(nmatrix(vec, nil, in->nr, in->nr));
+ for(i = 0; i < vec->nr; i++)
+ M(vec, i, i) = 1;
+
+ q = nmatrix(nil, nil, 0, 0);
+ r = nmatrix(nil, nil, 0, 0);
+ x = nmatrix(nil, nil, in->nr, in->nc);
+ val = (vector *) zblock(nvector(val, nil, x->nr));
+ cblock(x, in);
+ for(i = 0; i < MAXITERS; i++){
+ qr(x, q, r);
+ mdot(r, q, x, 0);
+ mdot(vec, q, r, 0);
+ cblock(vec, r);
+
+ for(j = cnt = 0; j < x->nr; j++){
+ if(fabs(V(val, j) - M(x, j, j)) < meps)
+ cnt++;
+ V(val, j) = M(x, j, j);
+ }
+
+ /* Convergence check */
+ if(cnt >= x->nr)
+ break;
+ }
+
+
+ fblock(q);
+ fblock(x);
+ fblock(r);
+ return vec;
+}
+
+real
+l2(vector *a)
+{
+ real sum;
+ uint i;
+ for(sum = i = 0; i < a->len; i++)
+ sum = hypot(sum, V(a, i));
+ return sum;
+}
+
+void
+vnorm(uint nargs, ...)
+{
+ real sum;
+ uint i, j;
+ va_list args;
+ vector *a;
+
+ sum = 0;
+ va_start(args, nargs);
+ for(i = 0; i < nargs; i++){
+ a = va_arg(args, vector *);
+ for(j = 0; j < a->len; j++)
+ sum = hypot(sum, V(a, j));
+ }
+ va_end(args);
+ if(fabs(sum) <= meps)
+ return;
+ va_start(args, nargs);
+ for(i = 0; i < nargs; i++){
+ a = va_arg(args, vector *);
+ for(j = 0; j < a->len; j++)
+ V(a, j) /= sum;
+ }
+ va_end(args);
+}
+
+/* Lanczos's algorithm */
+matrix *
+tridiag(matrix *A, matrix *v, matrix *T)
+{
+ vector *w, *tmp, *tmp2, *tmp3, *v1, *v0;
+ real alpha, beta;
+ ushort i, j, m;
+ uint id, jd;
+
+ assert(A->nr == A->nc);
+ m = A->nr;
+ T = (matrix *)zblock(nmatrix(T, nil, m, m));
+ v = nmatrix(v, nil, m, m);
+ beta = 0;
+ w = nvector(nil, nil, m);
+ tmp = nvector(nil, nil, 0);
+ tmp2 = nvector(nil, nil, 0);
+ tmp3 = nvector(nil, nil, 0);
+ v1 = nvector(nil, nil, m);
+ v0 = nvector(nil, nil, m);
+
+ for(i = 0; i < m; i++)
+ V(v1, i) = frand() - 0.5;
+ vnorm(1, v1);
+
+ for(i = id = 0; i < m; i++, id = oinc2(id)){
+ for(j = jd = 0; j < m; j++, jd = oinc2(jd))
+ v->data[jd | (id << 1)] = V(v1, j);
+ if(i > 0){
+ T->data[odec2(id) | (id << 1)] = beta;
+ vsub(vdot(A, v1, tmp, 0), sprod(beta, v0, tmp2), w);
+ }else
+ vdot(A, v1, w, 0);
+ T->data[id | (id << 1)] = alpha = dot(w, v1);
+ vsub(w, sprod(alpha, v1, tmp3), w);
+ beta = l2(w);
+ cblock(v0, v1);
+ if(i < m - 1){
+ T->data[oinc2(id) | (id << 1)] = beta;
+ sprod(1 / beta, w, v1);
+ }
+ }
+ free(v0);
+ free(v1);
+ fblock(tmp);
+ fblock(tmp2);
+ fblock(tmp3);
+ fblock(w);
+
+ return T;
+}
+
+/* Applies simple Nystrom extension to estimate eigenvectors.
+ Returned vectors are unordered and not othorganalised */
+matrix *
+nystrom(matrix *A, matrix *vec, vector *val)
+{
+ matrix *B, *tmp;
+ uint i, j, k, io, id;
+
+ B = nmatrix(nil, A->data, A->nr / 2, A->nr / 2);
+ tmp = nmatrix(nil, nil, 0, 0);
+
+ /* Find eigenvectors of the submatrix */
+ eigen(B, vec, val);
+
+ /* Nystrom extension */
+ vec = nmatrix(vec, nil, A->nr, vec->nc);
+
+ /* Zero extension */
+ for(i = dilate2(vec->nr / 2); i < vec->mr; i = oinc2(i))
+ for(j = 0; j < vec->mc; j = einc2(j))
+ V(vec, i + j) = 0;
+
+ /* Initial matrix product */
+ for(io = ((i = dilate2(vec->nr / 2)) << 1); i < vec->mr; i = oinc2(i), io = einc2(io))
+ for(j = 0; j < vec->mc; j = einc2(j))
+ for(k = 0; k < (vec->mc >> 1); k = oinc2(k))
+ V(vec, i + j) += V(A, k + io) * V(vec, k + j);
+
+ /* Divide by eigenvalues */
+ for(i = dilate2(vec->nr / 2); i < vec->mr; i = oinc2(i))
+ for(j = k = 0; j < vec->mc; j = einc2(j), k++)
+ V(vec, i + j) /= V(val, k);
+
+ /* Calculate approximate eigenvalues */
+ val = nvector(val, nil, vec->nc);
+ mdot(A, vec, tmp, 0);
+ zblock(val);
+ for(i = id = 0; i < val->len; i++, id = einc2(id))
+ for(j = 0; j < tmp->mr; j = oinc2(j))
+ V(val, i) += V(tmp, j + id) / tmp->nr;
+
+ fblock(tmp);
+ free(B);
+
+ return vec;
+}
+
+/* QR decomposition */
+matrix *
+qr(matrix *x, matrix *q, matrix *r)
+{
+ real norm;
+ int i, j, k;
+ uint id, jd, kd;
+ vector *rdiag;
+ matrix *qr;
+
+ //assert(x->nc == x->nr);
+
+ r = (matrix *)zblock(nmatrix(r, nil, x->nr, x->nc));
+ q = (matrix *)zblock(nmatrix(q, nil, x->nr, x->nr));
+ qr = nmatrix(nil, nil, x->nr, x->nc);
+ rdiag = (vector *)zblock(nvector(nil, nil, q->nr));
+ cblock(qr, x);
+
+ for(j = jd = 0; j < qr->nc; j++, jd = einc2(jd)){
+#ifdef DEBUG
+ if(j % 100 == 0)
+ fprint(2, "qr: householder: %d remaining\n", qr->nc - j);
+#endif
+ for(norm = 0, i = j, id = jd >> 1; i < qr->nr; i++, id = oinc2(id))
+ norm = hypot(norm, V(qr, id | jd));
+ if(norm == 0)
+ continue;
+ if(V(qr, (jd >> 1) | jd) < 0)
+ norm = -norm;
+ V(rdiag, j) = -norm;
+
+ for(i = j, id = jd >> 1; i < qr->nr; i++, id = oinc2(id))
+ V(qr, id | jd) /= norm;
+ V(qr, (jd >> 1) | jd) += 1;
+
+#pragma omp parallel for schedule(static, CHUNK) private(i, id, kd, norm)
+ for(k = j + 1; k < qr->nc; k++){
+ kd = dilate2(k) << 1;
+ for(norm = 0, i = j, id = jd >> 1; i < qr->nr; i++, id = oinc2(id))
+ norm += V(qr, id | jd) * V(qr, id | kd);
+ norm /= -V(qr, (jd >> 1) | jd);
+ for(i = j, id = jd >> 1; i < qr->nr; i++, id = oinc2(id))
+ V(qr, id | kd) += norm * V(qr, id | jd);
+ }
+ }
+
+ for(i = id = 0; i < qr->nr; i++, id = oinc2(id)){
+ for(j = i + 1, jd = einc2(id << 1); j < qr->nc; j++, jd = einc2(jd))
+ V(r, id | jd) = V(qr, id | jd);
+ V(r, id | (id << 1)) = V(rdiag, i);
+ }
+
+ for(j = q->nc - 1, jd = edec2(q->mc); j >= 0; j--, jd = edec2(jd)){
+#ifdef DEBUG
+ if(j % 100 == 0)
+ fprint(2, "qr: calculating q: %d remaining\n", j + 1);
+#endif
+ V(q, (jd >> 1) | jd) = 1;
+#pragma omp parallel for schedule(static, CHUNK) private(i, id, kd, norm)
+ for(k = j; k < q->nc; k++){
+ kd = dilate2(k) << 1;
+ if(V(qr, (kd >> 1) | kd) == 0)
+ continue;
+ for(norm = 0, i = j, id = jd >> 1; i < q->nr; i++, id = oinc2(id))
+ norm += V(qr, id | jd) * V(q, id | kd);
+ if(fabs(V(qr, (jd >> 1) | jd)) < meps)
+ norm = 0;
+ else
+ norm /= -V(qr, (jd >> 1) | jd);
+ for(i = j, id = jd >> 1; i < q->nr; i++, id = oinc2(id))
+ V(q, id | kd) += norm * V(qr, id | jd);
+ }
+ }
+
+#ifdef DEBUG
+ fprint(2, "qr: terminated\n");
+#endif
+ fblock(qr);
+ fblock(rdiag);
+ return r;
+
+}
+
+static vector *
+gsproj(matrix *x, vector *u, uint i, uint j)
+{
+ uint k, kd;
+ real mag, proj;
+
+ mag = proj = 0;
+ for(k = 0; k < x->mr; k = oinc2(k)){
+ proj += V(x, i | k) * V(x, j | k);
+ mag += SQ(V(x, j | k));
+ }
+ u = nvector(u, nil, x->nr);
+ for(k = kd = 0; k < x->nr; k++, kd = oinc2(kd))
+ V(u, k) = V(x, kd | j) * proj / mag;
+ return u;
+}
+
+matrix *
+gs(matrix *x)
+{
+ uint i, j, k, kd;
+ vector *u = nil;
+ real sum;
+
+ for(i = 0; i < x->mc; i = einc2(i)){
+ for(j = 0; j < i; j = einc2(j)){
+ u = gsproj(x, u, i, j);
+ for(k = kd = 0; k < x->nr; k++, kd = oinc2(kd))
+ V(x, i | kd) -= V(u, k);
+ }
+ for(j = sum = 0; j < x->mr; j = oinc2(j))
+ sum = hypot(sum, V(x, i | j));
+ for(j = 0; j < x->mr; j = oinc2(j))
+ V(x, i | j) /= sum;
+ }
+ fblock(u);
+ return x;
+}
+
+real
+entropy(matrix *dist)
+{
+ uint i, j, n;
+ real gamma;
+
+ assert(dist->nr == dist->nc);
+ n = dist->nr;
+
+ /* Entropy tuning */
+ for(gamma = i = 0; i < dist->mr; i = oinc2(i))
+ for(j = einc2(i << 1); j < dist->mc; j = einc2(j))
+ gamma += V(dist, i | j);
+ if(gamma == 0) return 1;
+ gamma = (real)(SQ(n) - n) / (2 * gamma);
+ fprint(2, "entropy: γ = %g\n", gamma);
+ return gamma;
+}
+
+matrix *
+mahalanobis(matrix *k, matrix *dist)
+{
+ uint i, j;
+ dist = nmatrix(dist, nil, k->nr, k->nr);
+ for(i = 0; i < dist->mr; i = oinc2(i))
+ for(j = einc2(i); j < dist->mc; j = einc2(j))
+ V(dist, i | j) = V(dist, tr(i, j)) = V(k, i | (i << 1)) - 2 * V(k, i | j) + V(k, (j >> 1) | j);
+ return dist;
+}
+
+matrix *
+msqrt(matrix *x, matrix *mval)
+{
+ uint i, cnt;
+ matrix *vec, *tmp;
+ vector *val;
+
+ tmp = nmatrix(nil, nil, 0, 0);
+ vec = nmatrix(nil, nil, 0, 0);
+ val = nvector(nil, nil, 0);
+ eigen(x, vec, val);
+
+ mval = (matrix *)zblock(nmatrix(mval, nil, val->len, val->len));
+ for(i = cnt = 0; i < val->len; i++)
+ if(V(val, i) > meps)
+ M(mval, i, i) = sqrt(V(val, i));
+ else{
+ if(M(mval, i, i) < -meps)
+ cnt++;
+ M(mval, i, i) = 0;
+ }
+ if(cnt > 0)
+ fprint(2, "msqrt: %d negative eigenvectors\n", cnt);
+
+ mdot(vec, mval, tmp, 0);
+ mdot(tmp, vec, mval, 2);
+
+ fblock(tmp);
+ fblock(vec);
+ fblock(val);
+ return(mval);
+}
+
+matrix *
+minv(matrix *x, matrix *mval, real th)
+{
+ uint i;
+ matrix *vec, *tmp;
+ vector *val;
+
+ tmp = nmatrix(nil, nil, 0, 0);
+ vec = nmatrix(nil, nil, 0, 0);
+ val = nvector(nil, nil, 0);
+ eigen(x, vec, val);
+
+ mval = (matrix *)zblock(nmatrix(mval, nil, val->len, val->len));
+ for(i = 0; i < val->len; i++){
+ if(V(val, i) > th)
+ M(mval, i, i) = 1 / V(val, i);
+ else
+ M(mval, i, i) = 0;
+ }
+
+ mdot(vec, mval, tmp, 0);
+ mdot(tmp, vec, mval, 2);
+
+ fblock(tmp);
+ fblock(vec);
+ fblock(val);
+ return(mval);
+}
+
+static vector *
+backsub(matrix *a, vector *b, vector *x)
+{
+ int i, j;
+ uint id, jd;
+ real sum;
+ x = nvector(x, nil, b->len);
+ for(i = b->len - 1, id = dilate2(b->len - 1); i >= 0; i--, id = odec2(id)){
+ sum = 0;
+ for(j = i + 1, jd = dilate2(i + 1) << 1; j < a->nc; j++, jd = einc2(jd))
+ sum += V(a, id | jd) * V(x, j);
+ if(fabs(V(a, id | (id << 1))) < meps)
+ V(x, i) = 0;
+ else
+ V(x, i) = (V(b, i) - sum) / V(a, id | (id << 1));
+ }
+ return x;
+}
+
+vector *
+solve(matrix *A, vector *b, vector *x)
+{
+ matrix *q, *r;
+ vector *c;
+ uint n;
+
+ n = A->nr;
+
+ assert(n == A->nc &&
+ n == b->len);
+
+
+ q = nmatrix(nil, nil, 0, 0);
+ r = nmatrix(nil, nil, 0, 0);
+ qr(A, q, r);
+
+ c = vdot(q, b, nil, 1);
+ x = backsub(r, c, x);
+
+ fblock(q);
+ fblock(r);
+ fblock(c);
+
+ return x;
+}
+
+/* SVD calculated inefficiently */
+#define SQRT(x) (((x) > 0) ? sqrt(x) : 0)
+#define INV(x) (((x) > 0) ? (1/(x)) : 0)
+void
+svd(matrix *x, matrix *u, vector *s, matrix *v)
+{
+ uint j, id, jd;
+ matrix *y = nmatrix(nil, nil, 0, 0);
+ mdot(x, x, y, 1);
+
+ jacobi(y, v, s);
+ mdot(x, v, u, 0);
+ assert(u->nc == s->len);
+ for(j = 0; j < s->len; j++)
+ V(s, j) = SQRT(V(s, j));
+ for(id = 0; id < u->mr; id = oinc2(id))
+ for(j = jd = 0; j < u->nc; j++, jd = einc2(jd))
+ V(u, id | jd) *= INV(V(s, j));
+ fblock(y);
+}
+
+matrix *
+col(matrix *x, matrix *dest, uint c)
+{
+ uint i;
+
+ dest = nmatrix(dest, nil, x->nr, 1);
+ c = dilate2(c) << 1;
+
+ for(i = 0; i < x->mr; i = oinc2(i))
+ V(dest, i) = V(x, i | c);
+ return dest;
+}
+
+matrix *
+row(matrix *x, matrix *dest, uint r)
+{
+ uint i;
+
+ dest = nmatrix(dest, nil, 1, x->nc);
+ r = dilate2(r);
+
+ for(i = 0; i < x->mr; i = einc2(i))
+ V(dest, i) = V(x, i | r);
+ return dest;
+}
+
+vector *
+drop(matrix *x, vector *dest)
+{
+ uint i, id;
+ assert(x->nc == 1);
+ dest = nvector(dest, nil, x->nr);
+ for(i = id = 0; i < x->nr; i++, id = oinc2(id))
+ V(dest, i) = V(x, id);
+ return dest;
+}
+
+matrix *
+raise(vector *x, matrix *dest)
+{
+ uint i, id;
+ dest = nmatrix(dest, nil, x->len, 1);
+ for(i = id = 0; i < x->len; i++, id = oinc2(id))
+ V(dest, id) = V(x, i);
+ return dest;
+}
+
+matrix *
+trans(matrix *x, matrix *dest)
+{
+ uint i;
+ dest = nmatrix(dest, nil, x->nc, x->nr);
+ for(i = 0; i < x->len; i++)
+ V(dest, ((i & oddbits) << 1) | ((i & evenbits) >> 1)) = V(x, i);
+ return dest;
+}
+
+matrix *
+slice(cube *x, matrix *dest, uint k)
+{
+ uint i;
+ dest = nmatrix(dest, nil, x->nr, x->nc);
+ for(i = 0; i < x->stride; i++)
+ V(dest, i) = CV(x, i, k);
+ return dest;
+}
+
+matrix *
+diag(vector *d, matrix *x)
+{
+ uint i, id;
+ x = nmatrix(x, nil, d->len, d->len);
+ for(i = id = 0; i < d->len; i++, id = oinc2(id))
+ V(x, id | (id << 1)) = V(d, i);
+ return x;
+}
+
+vector *
+msum(matrix *x, vector *s, uint dim)
+{
+ uint i;
+ switch(dim){
+ case 0:
+ s = (vector *)zblock(nvector(s, nil, x->nr));
+ miter(x, i)
+ V(s, undilate2(i & oddbits)) += V(x, i);
+ break;
+ case 1:
+ s = (vector *)zblock(nvector(s, nil, x->nc));
+ miter(x, i)
+ V(s, undilate2((i & evenbits) >> 1)) += V(x, i);
+ break;
+ default:
+ abort();
+ }
+
+ return s;
+}
+
+vector *
+getdiag(matrix *x, vector *d)
+{
+ uint i, id;
+ d = nvector(d, nil, MIN(x->nr, x->nc));
+ for(i = id = 0; i < d->len; i++, id = oinc2(id))
+ V(d, i) = V(x, id | (id << 1));
+ return d;
+}
diff --git a/linalg.h b/linalg.h
new file mode 100644
index 0000000..5613100
--- /dev/null
+++ b/linalg.h
@@ -0,0 +1,108 @@
+#pragma src "/usr/doc/src/linalg"
+#pragma lib "liblinalg.a"
+
+typedef float real;
+extern const float meps;
+
+/* Cubes, matrics, and Vector data types */
+
+/* Morton ordering */
+#define evenbits 0xAAAAAAAA
+#define oddbits 0x55555555
+//#define threebits 0x49249249
+#define oinc2(i) (((i) - oddbits) & oddbits)
+#define einc2(i) (((i) - evenbits) & evenbits)
+#define odec2(i) (((i) - 1) & oddbits)
+#define edec2(i) (((i) - 1) & evenbits)
+#define tr(i, j) (((i) << 1) | ((j) >> 1))
+ushort undilate2(uint);
+uint dilate2(ushort);
+
+/* Iterators */
+#define miter(m, i) for((i) = 0; (i) < (m)->len; (i)++) if(mbndok((m), (i)))
+#define viter(v, i) for((i) = 0; (i) < (v)->len; (i)++)
+
+typedef struct block block;
+typedef struct cube cube;
+typedef struct matrix matrix;
+typedef struct vector vector;
+
+#define C(x, i, j, k) ((x)->data[(k) * (x)->stride + (dilate2(i) | (dilate2(j) << 1))])
+#define CV(x, i, k) ((x)->data[(k) * (x)->stride + (i)])
+#define M(x, i, j) ((x)->data[dilate2(i) | (dilate2(j) << 1)])
+#define V(x, i) ((x)->data[(i)])
+
+#define mbndok(a, i) (((i) & oddbits) < (a)->mr && ((i) & evenbits) < (a)->mc)
+
+struct block{
+ real *data;
+ ulong sz;
+ uchar allocated:1;
+};
+
+struct vector {
+ block;
+ uint len;
+};
+
+struct matrix{
+ vector;
+ ushort nr, nc;
+ uint mr, mc;
+ uint levels, offset;
+};
+
+struct cube{
+ matrix;
+ ushort ns;
+ uint stride;
+};
+
+cube *ncube(cube *, real *, ushort, ushort, ushort);
+matrix *nmatrix(matrix *, real *, ushort, ushort);
+vector *nvector(vector *, real *, uint);
+
+void fblock(block *);
+block *zblock(block *);
+block *cblock(block *, block *);
+
+matrix *qdot(matrix *, matrix *, matrix *, int);
+matrix *mdot(matrix *, matrix *, matrix *, int);
+matrix *madd(matrix *, matrix *, matrix *);
+matrix *smprod(real, matrix *, matrix *);
+real dot(vector *, vector *);
+vector *sprod(real, vector *, vector *);
+vector *vadd(vector *, vector *, vector *);
+vector *vprod(vector *, vector *, vector *);
+vector *vsub(vector *, vector *, vector *);
+vector *vdot(matrix *, vector *, vector *, int);
+void vnorm(uint, ...);
+
+void vsort(vector *);