diff options
| -rw-r--r-- | frand.c | 17 | ||||
| -rw-r--r-- | linalg.c | 1189 | ||||
| -rw-r--r-- | linalg.h | 108 | ||||
| -rw-r--r-- | mkfile | 16 | ||||
| -rw-r--r-- | mkfile.p9p | 10 | 
5 files changed, 1340 insertions, 0 deletions
@@ -0,0 +1,17 @@ +#include	<u.h> +#include	<libc.h> + +#define	MASK	0x7fffffffL +#define	NORM	(1.0/(1.0+MASK)) + +double +frand(void) +{ +	double x; + +	do { +		x = lrand() * NORM; +		x = (x + lrand()) * NORM; +	} while(x >= 1); +	return x; +} diff --git a/linalg.c b/linalg.c new file mode 100644 index 0000000..5dfbba4 --- /dev/null +++ b/linalg.c @@ -0,0 +1,1189 @@ +#include<u.h> +#include<libc.h> +#include<linalg.h> + +const float meps = 5.96e-8; +//const double meps = 1.11e-16; + + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define sign(x) (((x) >= 0) - ((x) < 0)) + +uint MAXITERS = 1000000; + +static void * +emalloc(ulong size){ +	void *v; +	 +	v = malloc(size); +	if(v == nil) +		sysfatal("emalloc: allocating %ld: %r", size); +	 +	return v; +} + +static void* +erealloc(void *v, ulong sz) +{ +	v = realloc(v, sz); +	if(v == nil) +		sysfatal("erealloc: allocating %ld: %r", sz); +	return v; +} + +static void * +cmalloc(void *p, ulong sz) +{ +	if(p != nil) +		return p; +	p = emalloc(sz); +	memset(p, 0, sz); +	return p; +} + + +ushort +undilate2(uint x) +{ +	x = (x | (x >> 1)) & 0x33333333; +	x = (x | (x >> 2)) & 0x0F0F0F0F; +	x = (x | (x >> 4)) & 0x00FF00FF; +	x = (x | (x >> 8)) & 0x0000FFFF; +	return x; +} + +uint +dilate2(ushort x) +{ +	uint r = x; +	r = (r | (r << 8)) & 0x00FF00FF; +	r = (r | (r << 4)) & 0x0F0F0F0F; +	r = (r | (r << 2)) & 0x33333333; +	r = (r | (r << 1)) & 0x55555555; +	return r; +} + +/*ushort +undilate3(uint x) +{ +	x = (x * 0x00015) & 0x0E070381; +	x = (x * 0x01041) & 0x0FF80001; +	x = (x * 0x40001) & 0x0FFC0000; +	return (ushort)(x >> 18); +} + +uint +dilate3(ushort x) +{ +	uint r = x; +	r = (r * 0x10001) & 0xFF0000FF; +	r = (r * 0x00101) & 0x0F00F00F; +	r = (r * 0x00011) & 0xC30C30C3; +	r = (r * 0x00005) & 0x49249249; +	return r; +}*/ + +void +fblock(block *p) +{ +	free(p->data); +	free(p); +} + +block * +zblock(block *p) +{ +	memset(p->data, 0, p->sz); +	return p; +} + +static void +growblock(block *p, ulong sz) +{ +	if(p->data == nil) +		p->allocated = 1; +	if(p->sz < sz && p->allocated){ +		p->data = erealloc(p->data, sz); +		p->sz = sz; +	} +} + +block * +cblock(block *dst, block *src) +{ +	growblock(dst, src->sz); +	memcpy(dst->data, src->data, src->sz); +	return dst; +} + +vector* +nvector(vector *p, real *data, uint l) +{ +	ulong sz; +	p = cmalloc(p, sizeof *p); +	p->len = l; +	 +	if(data == nil){ +		sz = l * sizeof(real); +		growblock(p, sz); +		return p; +	} +	 +	p->data = data; +	return p; +} + +matrix* +nmatrix(matrix *p, real *data, ushort r, ushort c) +{ +	uint len; +	p = cmalloc(p, sizeof *p); +	p->nr = r; +	p->nc = c; +	p->mr = dilate2(r); +	p->mc = dilate2(c) << 1; +	assert(r == undilate2(p->mr) && c == undilate2(p->mc >> 1)); +	 +	if(r > 0 && c > 0){ +		len = (dilate2(r - 1) | (dilate2(c - 1) << 1)) + 1; +		p->offset = 3 * pow(4, p->levels = ceil(log(len) / log(4))); +		nvector(p, data, len); +	} +	return p; +} + +cube * +ncube(cube *p, real *data, ushort r, ushort c, ushort s) +{ +	p = cmalloc(p, sizeof *p); +	p->nr = r; +	p->nc = c; +	p->ns = s; +	p->mr = dilate2(r); +	p->mc = dilate2(c) << 1; +	p->stride = (dilate2(r - 1) | (dilate2(c - 1) << 1)) + 1; +	 +	assert(r == undilate2(p->mr) && c == undilate2(p->mc >> 1)); +	 +	if(r > 0 && c > 0 && s > 0) +		nvector(p, data, s * p->stride); +	return p; +} + +/* Quadtree matrix multiplication */ +#define NW(i) ((i) * 4 + 0) +#define SW(i) ((i) * 4 + 1) +#define NE(i) ((i) * 4 + 2) +#define SE(i) ((i) * 4 + 3) +#define MAX(a, b) ((a > b) ? (a) : (b)) + +void +_qdot(matrix *a, matrix *b, matrix *c, uint i, uint j, uint k, int mode){ +	if(i >= a->offset){ +		i -= a->offset; +		j -= a->offset; +		k -= a->offset; +		if(mbndok(a, i) && mbndok(b, j)) +			V(c, k) += V(a, i) * V(b, j); +		return; +	} +	switch(mode){ +	case 0: +		_qdot(a, b, c, NW(i), NW(j), NW(k), mode); +		_qdot(a, b, c, NE(i), SW(j), NW(k), mode); +		_qdot(a, b, c, NW(i), NE(j), NE(k), mode); +		_qdot(a, b, c, NE(i), SE(j), NE(k), mode); +		_qdot(a, b, c, SW(i), NW(j), SW(k), mode); +		_qdot(a, b, c, SE(i), SW(j), SW(k), mode); +		_qdot(a, b, c, SW(i), NE(j), SE(k), mode); +		_qdot(a, b, c, SE(i), SE(j), SE(k), mode); +		break; +	case 1: +		_qdot(a, b, c, NW(i), NW(j), NW(k), mode); +		_qdot(a, b, c, SW(i), SW(j), NW(k), mode); +		_qdot(a, b, c, NW(i), NE(j), NE(k), mode); +		_qdot(a, b, c, SW(i), SE(j), NE(k), mode); +		_qdot(a, b, c, NE(i), NW(j), SW(k), mode); +		_qdot(a, b, c, SE(i), SW(j), SW(k), mode); +		_qdot(a, b, c, NE(i), NE(j), SE(k), mode); +		_qdot(a, b, c, SE(i), SE(j), SE(k), mode); +		break; +	case 2: +		_qdot(a, b, c, NW(i), NW(j), NW(k), mode); +		_qdot(a, b, c, NE(i), NE(j), NW(k), mode); +		_qdot(a, b, c, NW(i), SW(j), NE(k), mode); +		_qdot(a, b, c, NE(i), SE(j), NE(k), mode); +		_qdot(a, b, c, SW(i), NW(j), SW(k), mode); +		_qdot(a, b, c, SE(i), NE(j), SW(k), mode); +		_qdot(a, b, c, SW(i), SW(j), SE(k), mode); +		_qdot(a, b, c, SE(i), SE(j), SE(k), mode); +		break; +	case 3: +		_qdot(a, b, c, NW(i), NW(j), NW(k), mode); +		_qdot(a, b, c, SW(i), NE(j), NW(k), mode); +		_qdot(a, b, c, NW(i), SW(j), NE(k), mode); +		_qdot(a, b, c, SW(i), SE(j), NE(k), mode); +		_qdot(a, b, c, NE(i), NW(j), SW(k), mode); +		_qdot(a, b, c, SE(i), NE(j), SW(k), mode); +		_qdot(a, b, c, NE(i), SW(j), SE(k), mode); +		_qdot(a, b, c, SE(i), SE(j), SE(k), mode); +		break; +	default: +		abort(); +	} +} + +matrix * +qdot(matrix *a, matrix *b, matrix *c, int mode) +{ +	switch(mode){ +	case 0: +		assert(a->nc == b->nr); +		c = (matrix *)zblock(nmatrix(c, nil, a->nr, b->nc)); +		break; +	case 1: +		assert(a->nr == b->nr); +		c = (matrix *)zblock(nmatrix(c, nil, a->nc, b->nc)); +		break; +	case 2: +		assert(a->nc == b->nc); +		c = (matrix *)zblock(nmatrix(c, nil, a->nr, b->nr)); +		break; +	case 3: +		assert(a->nr == b->nc); +		c = (matrix *)zblock(nmatrix(c, nil, a->nc, b->nr)); +		break; +	default: +		abort(); +	} +	_qdot(a, b, c, 3, 3, 3, mode); +	 +	return c; +	 +} + +/* trans specifies transpositions: + 	0: no transposition + 	1: a transposed + 	2: b transposed */ +matrix * +mdot(matrix *a, matrix *b, matrix *c, int trans) +{ +	uint i, k; +	 +	switch(trans){ +		case 0: +			assert(a->nc == b->nr); +			c = nmatrix(c, nil, a->nr, b->nc); +			zblock(c); +			 +#pragma omp parallel for schedule(static, CHUNK) private(k) +			for(i = 0; i < c->len; i++){ +				if(mbndok(c, i)) +					for(k = 0; k < a->mc; k = einc2(k)) +						c->data[i] += a->data[(i & oddbits) | k] * b->data[(k >> 1) | (i & evenbits)]; +			} +			break; +		case 1: +			assert(a->nr == b->nr); +			c = nmatrix(c, nil, a->nc, b->nc); +			zblock(c); +	 +#pragma omp parallel for schedule(static, CHUNK) private(k) +			for(i = 0; i < c->len; i++){ +				if(mbndok(c, i)) +					for(k = 0; k < a->mr; k = oinc2(k)) +						c->data[i] += a->data[k | ((i & oddbits) << 1)] * b->data[k | (i & evenbits)]; +			} +			break; +		case 2: +			assert(a->nc == b->nc); +			c = nmatrix(c, nil, a->nr, b->nr); +			zblock(c); +	 +#pragma omp parallel for schedule(static, CHUNK) private(k) +			for(i = 0; i < c->len; i++){ +				if(mbndok(c, i)) +					for(k = 0; k < a->mc; k = einc2(k)) +						c->data[i] += a->data[(i & oddbits) | k] * b->data[((i & evenbits) >> 1) | k]; +			} +			break; +		case 3: +			assert(a->nr == b->nc); +			c = nmatrix(c, nil, a->nc, b->nr); +			zblock(c); +			 +#pragma omp parallel for schedule(static, CHUNK) private(k) +			for(i = 0; i < c->len; i++){ +				if(mbndok(c, i)) +					for(k = 0; k < a->mr; k = oinc2(k)) +						c->data[i] += a->data[k | ((i & oddbits) << 1)] * b->data[((i & evenbits) >> 1) | (k << 1)]; +			} +			break; +		default: +			sysfatal("mdot: unspecified transposition"); +	} + +	return c; +} + +vector * +vdot(matrix *a, vector *b, vector *c, int mode) +{ +	ushort i, j; +	uint id; +	 +	switch(mode){ +	case 0: +		assert(a->nc == b->len); +		c = (vector *)zblock(nvector(c, nil, a->nr)); +		miter(a, id){ +			i = undilate2(id & oddbits); +			j = undilate2((id & evenbits) >> 1); +			V(c, i) += a->data[id] * V(b, j); +		} +		break; +	case 1: +		assert(a->nr == b->len); +		c = (vector *)zblock(nvector(c, nil, a->nc)); +		miter(a, id){ +			i = undilate2(id & oddbits); +			j = undilate2((id & evenbits) >> 1); +			V(c, j) += a->data[id] * V(b, i); +		} +		break; +	default: +		abort(); +	} +			 +	return c; +} + +real +dot(vector *a, vector *b) +{ +	int i; +	real r; +	 +	assert(a->len == b->len); +	 +	for(r = i = 0; i < a->len; i++) +		r += V(a, i) * V(b, i); +	 +	return r; +} + +matrix * +smprod(real a, matrix *b, matrix *c) +{ +	uint i; +	c = nmatrix(c, nil, b->nr, b->nc); +	for(i = 0; i < b->len; i++) +		if((i & oddbits) < b->mr && (i & evenbits) < b->mc) +			c->data[i] = a * b->data[i]; +	return c; +} + +vector * +sprod(real a, vector *in, vector *out) +{ +	uint i; +	 +	out = nvector(out, nil, in->len); +	for(i = 0; i < in->len; i++) +		V(out, i) = V(in, i) * a; +	 +	return out; +} + +matrix * +madd(matrix *a, matrix *b, matrix *c) +{ +	uint i; +	assert(a->nr == b->nr && a->nc == b->nc); +	c = nmatrix(c, nil, a->nr, a->nc); +	for(i = 0; i < a->len; i++) +		if((i & oddbits) < a->mr && (i & evenbits) < a->mc) +			c->data[i] = a->data[i] + b->data[i]; +	return c; +} + +vector * +vadd(vector *a, vector *b, vector *c) +{ +	uint i; +	 +	assert(a->len == b->len); +	c = nvector(c, nil, a->len); +	for(i = 0; i < a->len; i++) +		V(c, i) = V(a, i) + V(b, i); +	return c; +} + +vector * +vprod(vector *a, vector *b, vector *c) +{ +	uint i; +	 +	assert(a->len == b->len); +	c = nvector(c, nil, a->len); +	for(i = 0; i < a->len; i++) +		V(c, i) = V(a, i) * V(b, i); +	return c; +} + +vector * +vsub(vector *a, vector *b, vector *c) +{ +	uint i; +	 +	if(a->len != b->len) +		fprint(2, "%ud %ud\n", a->len, b->len); +	assert(a->len == b->len); +	c = nvector(c, nil, a->len); +	for(i = 0; i < a->len; i++) +		V(c, i) = V(a, i) - V(b, i); +	return c; +} + +/* Sorting */ +#define PARENT(i) (((i) - 1) / 2) +#define CHILD(i) ((i) * 2 + 1) + +static void +swap(real *a, uint *idx, uint i, uint j) +{ +	real tmp, utmp; +	tmp = a[i]; +	a[i] = a[j]; +	a[j] = tmp; +	utmp = idx[i]; +	idx[i] = idx[j]; +	idx[j] = utmp; +} + +static void +sdown(real *a, uint *idx, uint top, uint len) +{ +	uint u, v; +	 +	v = CHILD(u = top); +	if(v >= len) +		return; +	if(a[v] > a[u]) +		u = v; +	if(v + 1 < len && a[v + 1] > a[u]) +		u = v + 1; +	if(u != top){ +		swap(a, idx, top, u); +		sdown(a, idx, u, len); +	} +} + +static uint * +hsort(real *a, uint len) +{ +	int i; +	uint *idx = emalloc(sizeof(uint) * len); +	 +	for(i = 0; i < len; i++) +		idx[i] = i; +	 +	for(i = PARENT(len); i >= 0; i--) +		sdown(a, idx, i, len); +	for(len--; len > 0; len--){ +		swap(a, idx, 0, len); +		sdown(a, idx, 0, len); +	} +	 +	return idx; +} + +void +vsort(vector *a) +{ +	free(hsort(a->data, a->len)); +} + +uint * +ivsort(vector *a) +{ +	return hsort(a->data, a->len); +} + +/* Eigen vector decompositions */ +#define SQ(x) ((x) * (x)) + +typedef struct tuple tuple; +struct tuple{ +	ushort i, j; +	uint id, jd; +	real v; +}; + +static void +drotate(matrix *x, uint i, uint j, uint k, uint l, +	real p, real s) +{ +	real g, h; + 	g = x->data[i | j]; + 	h = x->data[k | l]; + 	x->data[i | j] = g - s * (h + g * p); +	x->data[k | l] = h + s * (g - h * p); +} + +/* Jacobi's method */ +matrix * +jacobi(matrix *x, matrix *vec, vector *val) +{ +	uint n, i, j, it; +	uint id, jd, kd, nd; +	real beta, c, t, s, p, u, mval; +	uint *m; +	tuple max; +	 +	assert(x->nr == x->nc); +	n = x->nr; +	nd = x->mr; + +	vec = nmatrix(vec, nil, n, n); +	zblock(vec); +	val = nvector(val, nil, n); +	for(i = 0, id = 0; i < n; i++, id = oinc2(id)){ +		vec->data[id | (id << 1)] = 1; +		V(val, i) = x->data[id | (id << 1)]; +	} +	 +	 +	/* Populate max array */ +	m = malloc(sizeof(*m) * n); +	memset(m, 0, sizeof(*m) * n); +	for(i = id = 0; i < n - 1; i++, id = oinc2(id)) +		for(j = jd = mval = 0; j < n; j++, jd = einc2(jd)) +			if(mval < fabs(V(x, id | jd))){ +				mval = fabs(V(x, id | jd)); +				m[i] = jd; +			} +	 +	for(it = 0; it < MAXITERS; it++){ +		max = (tuple){0, 0, 0, 0, 0}; +		for(i = id = 0; i < n - 1; i++, id = oinc2(id)) +			if(max.v < (u = fabs(x->data[id | m[i]]))) +				max = (tuple){i, undilate2(m[i] >> 1), id, m[i], u}; +#ifdef DEBUG +		if(it % 1000 == 0) +			fprint(2, "jacobi: %d rotations %g max\n", it, max.v); +#endif +		 +		beta = (V(val, max.j) - V(val, max.i)) / +			(2 * x->data[max.id | max.jd]); +		t = sign(beta) / (fabs(beta) + sqrt(SQ(beta) + 1)); +		c = 1 / sqrt(SQ(t) + 1); +		s = c * t; +		p = s / (1 + c); +		 +		/* Convergence test based on changing eigenvalues */ +		if(fabs(t * x->data[max.id | max.jd]) < meps) +			break; +		 +		V(val, max.i) -= t * x->data[max.id | max.jd]; +		V(val, max.j) += t * x->data[max.id | max.jd]; +		x->data[max.id | max.jd] = x->data[(max.id << 1) | (max.jd >> 1)] = 0; +		for(kd = 0; kd < max.id; kd = oinc2(kd)) +			drotate(x, kd, max.id << 1, kd, max.jd, p, s); +		for(kd = oinc2(max.id); kd < (max.jd >> 1); kd = oinc2(kd)) +			drotate(x, max.id, kd << 1, kd, max.jd, p, s); +		for(kd = einc2(max.jd); kd < (nd << 1); kd = einc2(kd)) +			drotate(x, max.id, kd, max.jd >> 1, kd, p, s); +		for(kd = 0; kd < nd; kd = oinc2(kd)) +			drotate(vec, kd, max.id << 1, kd, max.jd, p, s); + +		 +		/* Update max array */ +		for(j = max.i + 1, jd = oinc2(max.id) << 1; j < n; j++, jd = einc2(jd)) +			if(fabs(x->data[max.id | m[max.i]]) < fabs(x->data[max.id | jd])) +				m[max.i] = jd; +		for(j = max.j + 1, jd = einc2(max.jd); j < n; j++, jd = einc2(jd)) +			if(fabs(x->data[(max.jd >> 1) | m[max.j]]) < fabs(x->data[(max.jd >> 1) | jd])) +				m[max.j] = jd; +	} +#ifdef DEBUG +	if(it == MAXITERS) +		fprint(2, "jacobi: warning: non-convergent after %d iterations\n", it); +	else +		fprint(2, "jacobi: terminated: %d iterations\n", it); +		 +	fprint(2, "jacobi: eigenvectors calculated\n"); +#endif +	 +	free(m); +	return vec; +} + +/* QR method */ +matrix * +eigen(matrix *in, matrix *vec, vector *val) +{ +	uint i, j, cnt; +	matrix *x, *q, *r; +	 +	assert(in->nr == in->nc); +	 +	vec = (matrix *)zblock(nmatrix(vec, nil, in->nr, in->nr)); +	for(i = 0; i < vec->nr; i++) +		M(vec, i, i) = 1; +		 +	q = nmatrix(nil, nil, 0, 0); +	r = nmatrix(nil, nil, 0, 0); +	x = nmatrix(nil, nil, in->nr, in->nc); +	val = (vector *) zblock(nvector(val, nil, x->nr)); +	cblock(x, in); +	for(i = 0; i < MAXITERS; i++){ +		qr(x, q, r); +		mdot(r, q, x, 0); +		mdot(vec, q, r, 0); +		cblock(vec, r); +		 +		for(j = cnt = 0; j < x->nr; j++){ +			if(fabs(V(val, j) - M(x, j, j)) < meps) +				cnt++; +			V(val, j) = M(x, j, j); +		} +			 +		/* Convergence check */ +		if(cnt >= x->nr) +			break; +	} +	 +	 +	fblock(q); +	fblock(x); +	fblock(r); +	return vec; +} + +real +l2(vector *a) +{ +	real sum; +	uint i; +	for(sum = i = 0; i < a->len; i++) +		sum = hypot(sum, V(a, i)); +	return sum; +} + +void +vnorm(uint nargs, ...) +{ +	real sum; +	uint i, j; +	va_list args; +	vector *a; +	 +	sum = 0; +	va_start(args, nargs); +	for(i = 0; i < nargs; i++){ +		a = va_arg(args, vector *); +		for(j = 0; j < a->len; j++) +			sum = hypot(sum, V(a, j)); +	} +	va_end(args); +	if(fabs(sum) <= meps) +		return; +	va_start(args, nargs); +	for(i = 0; i < nargs; i++){ +		a = va_arg(args, vector *); +		for(j = 0; j < a->len; j++) +			V(a, j) /= sum; +	} +	va_end(args); +} + +/* Lanczos's algorithm */ +matrix * +tridiag(matrix *A, matrix *v, matrix *T) +{ +	vector *w, *tmp, *tmp2, *tmp3, *v1, *v0; +	real alpha, beta; +	ushort i, j, m; +	uint id, jd; +	 +	assert(A->nr == A->nc); +	m = A->nr; +	T = (matrix *)zblock(nmatrix(T, nil, m, m)); +	v = nmatrix(v, nil, m, m); +	beta = 0; +	w = nvector(nil, nil, m); +	tmp = nvector(nil, nil, 0); +	tmp2 = nvector(nil, nil, 0); +	tmp3 = nvector(nil, nil, 0); +	v1 = nvector(nil, nil, m); +	v0 = nvector(nil, nil, m); +	 +	for(i = 0; i < m; i++) +		V(v1, i) = frand() - 0.5; +	vnorm(1, v1); +	 +	for(i = id = 0; i < m; i++, id = oinc2(id)){ +		for(j = jd = 0; j < m; j++, jd = oinc2(jd)) +			v->data[jd | (id << 1)] = V(v1, j); +		if(i > 0){ +			T->data[odec2(id) | (id << 1)] = beta; +			vsub(vdot(A, v1, tmp, 0), sprod(beta, v0, tmp2), w); +		}else +			vdot(A, v1, w, 0); +		T->data[id | (id << 1)] = alpha = dot(w, v1); +		vsub(w, sprod(alpha, v1, tmp3), w); +		beta = l2(w); +		cblock(v0, v1); +		if(i < m - 1){ +			T->data[oinc2(id) | (id << 1)] = beta; +			sprod(1 / beta, w, v1); +		} +	} +	free(v0); +	free(v1); +	fblock(tmp); +	fblock(tmp2); +	fblock(tmp3); +	fblock(w); +	 +	return T; +} + +/* 	Applies simple Nystrom extension to estimate eigenvectors. +	Returned vectors are unordered and not othorganalised */ +matrix * +nystrom(matrix *A, matrix *vec, vector *val) +{ +	matrix *B, *tmp; +	uint i, j, k, io, id; +	 +	B = nmatrix(nil, A->data, A->nr / 2, A->nr / 2); +	tmp = nmatrix(nil, nil, 0, 0); +	 +	/* Find eigenvectors of the submatrix */ +	eigen(B, vec, val); +	 +	/* Nystrom extension */ +	vec = nmatrix(vec, nil, A->nr, vec->nc); +	 +	/* Zero extension */ +	for(i = dilate2(vec->nr / 2); i < vec->mr; i = oinc2(i)) +		for(j = 0; j < vec->mc; j = einc2(j)) +			V(vec, i + j) = 0; +			 +	/* Initial matrix product */ +	for(io = ((i = dilate2(vec->nr / 2)) << 1); i < vec->mr; i = oinc2(i), io = einc2(io)) +		for(j = 0; j < vec->mc; j = einc2(j)) +			for(k = 0; k < (vec->mc >> 1); k = oinc2(k)) +			 V(vec, i + j) += V(A, k + io) * V(vec, k + j); +		 +	/* Divide by eigenvalues */ +	for(i = dilate2(vec->nr / 2); i < vec->mr; i = oinc2(i)) +		for(j = k = 0; j < vec->mc; j = einc2(j), k++) +			V(vec, i + j) /= V(val, k); +			 +	/* Calculate approximate eigenvalues */ +	val = nvector(val, nil, vec->nc); +	mdot(A, vec, tmp, 0); +	zblock(val); +	for(i = id = 0; i < val->len; i++, id = einc2(id)) +		for(j = 0; j < tmp->mr; j = oinc2(j)) +			V(val, i) += V(tmp, j + id) / tmp->nr; +	 +	fblock(tmp); +	free(B); +	 +	return vec; +} + +/* QR decomposition */ +matrix * +qr(matrix *x, matrix *q, matrix *r) +{ +	real norm; +	int i, j, k; +	uint id, jd, kd; +	vector *rdiag; +	matrix *qr; +	 +	//assert(x->nc == x->nr); +	 +	r = (matrix *)zblock(nmatrix(r, nil, x->nr, x->nc)); +	q = (matrix *)zblock(nmatrix(q, nil, x->nr, x->nr)); +	qr = nmatrix(nil, nil, x->nr, x->nc); +	rdiag = (vector *)zblock(nvector(nil, nil, q->nr)); +	cblock(qr, x); +	 +	for(j = jd = 0; j < qr->nc; j++, jd = einc2(jd)){ +#ifdef DEBUG +		if(j % 100 == 0) +			fprint(2, "qr: householder: %d remaining\n", qr->nc - j); +#endif +		for(norm = 0, i = j, id = jd >> 1; i < qr->nr; i++, id = oinc2(id)) +			norm = hypot(norm, V(qr, id | jd)); +		if(norm == 0) +			continue; +		if(V(qr, (jd >> 1) | jd) < 0) +			norm = -norm; +		V(rdiag, j) = -norm; +			 +		for(i = j, id = jd >> 1; i < qr->nr; i++, id = oinc2(id)) +			V(qr, id | jd) /= norm; +		V(qr, (jd >> 1) | jd) += 1; +		 +#pragma omp parallel for schedule(static, CHUNK) private(i, id, kd, norm) +		for(k = j + 1; k < qr->nc; k++){ +			kd = dilate2(k) << 1; +			for(norm = 0, i = j, id = jd >> 1; i < qr->nr; i++, id = oinc2(id)) +				norm += V(qr, id | jd) * V(qr, id | kd); +			norm /= -V(qr, (jd >> 1) | jd); +			for(i = j, id = jd >> 1; i < qr->nr; i++, id = oinc2(id)) +				V(qr, id | kd) += norm * V(qr, id | jd); +		} +	} +	 +	for(i = id = 0; i < qr->nr; i++, id = oinc2(id)){ +		for(j = i + 1, jd = einc2(id << 1); j < qr->nc; j++, jd = einc2(jd)) +			V(r, id | jd) = V(qr, id | jd); +		V(r, id | (id << 1)) = V(rdiag, i); +	} +	 +	for(j = q->nc - 1, jd = edec2(q->mc); j >= 0; j--, jd = edec2(jd)){ +#ifdef DEBUG +		if(j % 100 == 0) +			fprint(2, "qr: calculating q: %d remaining\n", j + 1); +#endif +		V(q, (jd >> 1) | jd) = 1; +#pragma omp parallel for schedule(static, CHUNK) private(i, id, kd, norm) +		for(k = j; k < q->nc; k++){ +			kd = dilate2(k) << 1; +			if(V(qr, (kd >> 1) | kd) == 0) +				continue; +			for(norm = 0, i = j, id = jd >> 1; i < q->nr; i++, id = oinc2(id)) +				norm += V(qr, id | jd) * V(q, id | kd); +			if(fabs(V(qr, (jd >> 1) | jd)) < meps) +				norm = 0; +			else +				norm /= -V(qr, (jd >> 1) | jd); +			for(i = j, id = jd >> 1; i < q->nr; i++, id = oinc2(id)) +				V(q, id | kd) += norm * V(qr, id | jd); +		} +	} +	 +#ifdef DEBUG +	fprint(2, "qr: terminated\n"); +#endif +	fblock(qr); +	fblock(rdiag); +	return r; +	 +} + +static vector * +gsproj(matrix *x, vector *u, uint i, uint j) +{ +	uint k, kd; +	real mag, proj; +	 +	mag = proj = 0; +	for(k = 0; k < x->mr; k = oinc2(k)){ +		proj += V(x, i | k) * V(x, j | k); +		mag += SQ(V(x, j | k)); +	} +	u = nvector(u, nil, x->nr); +	for(k = kd = 0; k < x->nr; k++, kd = oinc2(kd)) +		V(u, k) = V(x, kd | j) * proj / mag; +	return u; +} + +matrix * +gs(matrix *x) +{ +	uint i, j, k, kd; +	vector *u = nil; +	real sum; +	 +	for(i = 0; i < x->mc; i = einc2(i)){ +		for(j = 0; j < i; j = einc2(j)){ +			u = gsproj(x, u, i, j); +			for(k = kd = 0; k < x->nr; k++, kd = oinc2(kd)) +				V(x, i | kd) -= V(u, k); +		} +		for(j = sum = 0; j < x->mr; j = oinc2(j)) +			sum = hypot(sum, V(x, i | j)); +		for(j = 0; j < x->mr; j = oinc2(j)) +			V(x, i | j) /= sum; +	} +	fblock(u); +	return x; +} + +real +entropy(matrix *dist) +{ +	uint i, j, n; +	real gamma; +	 +	assert(dist->nr == dist->nc); +	n = dist->nr; +	 +	/* Entropy tuning */ +	for(gamma = i = 0; i < dist->mr; i = oinc2(i)) +		for(j = einc2(i << 1); j < dist->mc; j = einc2(j)) +			gamma += V(dist, i | j); +	if(gamma == 0) return 1; +	gamma = (real)(SQ(n) - n) / (2 * gamma); +	fprint(2, "entropy: γ = %g\n", gamma); +	return gamma; +} +	 +matrix * +mahalanobis(matrix *k, matrix *dist) +{ +	uint i, j; +	dist = nmatrix(dist, nil, k->nr, k->nr); +	for(i = 0; i < dist->mr; i = oinc2(i)) +		for(j = einc2(i); j < dist->mc; j = einc2(j)) +			V(dist, i | j) = V(dist, tr(i, j)) = V(k, i | (i << 1)) - 2 * V(k, i | j) + V(k, (j >> 1) | j); +	return dist; +} + +matrix * +msqrt(matrix *x, matrix *mval) +{ +	uint i, cnt; +	matrix *vec, *tmp; +	vector *val; +	 +	tmp = nmatrix(nil, nil, 0, 0); +	vec = nmatrix(nil, nil, 0, 0); +	val = nvector(nil, nil, 0); +	eigen(x, vec, val); +	 +	mval = (matrix *)zblock(nmatrix(mval, nil, val->len, val->len)); +	for(i = cnt = 0; i < val->len; i++) +		if(V(val, i) > meps) +			M(mval, i, i) = sqrt(V(val, i)); +		else{ +			if(M(mval, i, i) < -meps) +				cnt++; +			M(mval, i, i) = 0; +		} +	if(cnt > 0) +		fprint(2, "msqrt: %d negative eigenvectors\n", cnt); +	 +	mdot(vec, mval, tmp, 0); +	mdot(tmp, vec, mval, 2);		 +	 +	fblock(tmp); +	fblock(vec); +	fblock(val); +	return(mval); +} + +matrix * +minv(matrix *x, matrix *mval, real th) +{ +	uint i; +	matrix *vec, *tmp; +	vector *val; +	 +	tmp = nmatrix(nil, nil, 0, 0); +	vec = nmatrix(nil, nil, 0, 0); +	val = nvector(nil, nil, 0); +	eigen(x, vec, val); +	 +	mval = (matrix *)zblock(nmatrix(mval, nil, val->len, val->len)); +	for(i = 0; i < val->len; i++){ +		if(V(val, i) > th) +			M(mval, i, i) = 1 / V(val, i); +		else +			M(mval, i, i) = 0; +	} +	 +	mdot(vec, mval, tmp, 0); +	mdot(tmp, vec, mval, 2);		 +	 +	fblock(tmp); +	fblock(vec); +	fblock(val); +	return(mval); +} + +static vector * +backsub(matrix *a, vector *b, vector *x) +{ +	int i, j; +	uint id, jd; +	real sum; +	x = nvector(x, nil, b->len); +	for(i = b->len - 1, id = dilate2(b->len - 1); i >= 0; i--, id = odec2(id)){ +		sum = 0; +		for(j = i + 1, jd = dilate2(i + 1) << 1; j < a->nc; j++, jd = einc2(jd)) +			sum += V(a, id | jd) * V(x, j); +		if(fabs(V(a, id | (id << 1))) < meps) +			V(x, i) = 0; +		else +			V(x, i) = (V(b, i) - sum) / V(a, id | (id << 1)); +	} +	return x; +} + +vector * +solve(matrix *A, vector *b, vector *x) +{ +	matrix *q, *r; +	vector *c; +	uint n; +	 +	n = A->nr; +	 +	assert(n == A->nc && +		n == b->len); +		 +	 +	q = nmatrix(nil, nil, 0, 0); +	r = nmatrix(nil, nil, 0, 0); +	qr(A, q, r); +	 +	c = vdot(q, b, nil, 1); +	x = backsub(r, c, x); +	 +	fblock(q); +	fblock(r); +	fblock(c); +	 +	return x; +} + +/* SVD calculated inefficiently */ +#define SQRT(x) (((x) > 0) ? sqrt(x) : 0) +#define INV(x) (((x) > 0) ? (1/(x)) : 0) +void +svd(matrix *x, matrix *u, vector *s, matrix *v) +{ +	uint j, id, jd; +	matrix *y = nmatrix(nil, nil, 0, 0); +	mdot(x, x, y, 1); +	 +	jacobi(y, v, s); +	mdot(x, v, u, 0); +	assert(u->nc == s->len); +	for(j = 0; j < s->len; j++) +		V(s, j) = SQRT(V(s, j)); +	for(id = 0; id < u->mr; id = oinc2(id)) +		for(j = jd = 0; j < u->nc; j++, jd = einc2(jd)) +				V(u, id | jd) *= INV(V(s, j)); +	fblock(y); +} + +matrix * +col(matrix *x, matrix *dest, uint c) +{ +	uint i; +	 +	dest = nmatrix(dest, nil, x->nr, 1); +	c = dilate2(c) << 1; +	 +	for(i = 0; i < x->mr; i = oinc2(i)) +		V(dest, i) = V(x, i | c); +	return dest; +} + +matrix * +row(matrix *x, matrix *dest, uint r) +{ +	uint i; +	 +	dest = nmatrix(dest, nil, 1, x->nc); +	r = dilate2(r); +	 +	for(i = 0; i < x->mr; i = einc2(i)) +		V(dest, i) = V(x, i | r); +	return dest; +} + +vector * +drop(matrix *x, vector *dest) +{ +	uint i, id; +	assert(x->nc == 1); +	dest = nvector(dest, nil, x->nr); +	for(i = id = 0; i < x->nr; i++, id = oinc2(id)) +		V(dest, i) = V(x, id); +	return dest; +} + +matrix * +raise(vector *x, matrix *dest) +{ +	uint i, id; +	dest = nmatrix(dest, nil, x->len, 1); +	for(i = id = 0; i < x->len; i++, id = oinc2(id)) +		V(dest, id) = V(x, i); +	return dest; +} + +matrix * +trans(matrix *x, matrix *dest) +{ +	uint i; +	dest = nmatrix(dest, nil, x->nc, x->nr); +	for(i = 0; i < x->len; i++) +		V(dest, ((i & oddbits) << 1) | ((i & evenbits) >> 1)) = V(x, i); +	return dest; +} + +matrix * +slice(cube *x, matrix *dest, uint k) +{ +	uint i; +	dest = nmatrix(dest, nil, x->nr, x->nc); +	for(i = 0; i < x->stride; i++) +		V(dest, i) = CV(x, i, k); +	return dest; +} + +matrix * +diag(vector *d, matrix *x) +{ +	uint i, id; +	x = nmatrix(x, nil, d->len, d->len); +	for(i = id = 0; i < d->len; i++, id = oinc2(id)) +		V(x, id | (id << 1)) = V(d, i); +	return x; +} + +vector * +msum(matrix *x, vector *s, uint dim) +{ +	uint i; +	switch(dim){ +	case 0: +		s = (vector *)zblock(nvector(s, nil, x->nr)); +		miter(x, i) +			V(s, undilate2(i & oddbits)) += V(x, i); +		break; +	case 1: +		s = (vector *)zblock(nvector(s, nil, x->nc)); +		miter(x, i) +			V(s, undilate2((i & evenbits) >> 1)) += V(x, i); +		break; +	default: +		abort(); +	} +	 +	return s; +} + +vector * +getdiag(matrix *x, vector *d) +{ +	uint i, id; +	d = nvector(d, nil, MIN(x->nr, x->nc)); +	for(i = id = 0; i < d->len; i++, id = oinc2(id)) +		V(d, i) = V(x, id | (id << 1)); +	return d; +} diff --git a/linalg.h b/linalg.h new file mode 100644 index 0000000..5613100 --- /dev/null +++ b/linalg.h @@ -0,0 +1,108 @@ +#pragma	src	"/usr/doc/src/linalg" +#pragma	lib	"liblinalg.a" + +typedef float real; +extern const float meps; + +/* Cubes, matrics, and Vector data types */ + +/* Morton ordering */ +#define evenbits 0xAAAAAAAA +#define oddbits 0x55555555 +//#define threebits 0x49249249 +#define oinc2(i) (((i) - oddbits) & oddbits) +#define einc2(i) (((i) - evenbits) & evenbits) +#define odec2(i) (((i) - 1) & oddbits) +#define edec2(i) (((i) - 1) & evenbits) +#define tr(i, j) (((i) << 1) | ((j) >> 1)) +ushort undilate2(uint); +uint dilate2(ushort); + +/* Iterators */ +#define miter(m, i) for((i) = 0; (i) < (m)->len; (i)++) if(mbndok((m), (i))) +#define viter(v, i) for((i) = 0; (i) < (v)->len; (i)++) + +typedef struct block block; +typedef struct cube cube; +typedef struct matrix matrix; +typedef struct vector vector; + +#define C(x, i, j, k) ((x)->data[(k) * (x)->stride + (dilate2(i) | (dilate2(j) << 1))]) +#define CV(x, i, k) ((x)->data[(k) * (x)->stride + (i)]) +#define M(x, i, j) ((x)->data[dilate2(i) | (dilate2(j) << 1)]) +#define V(x, i) ((x)->data[(i)]) + +#define mbndok(a, i) (((i) & oddbits) < (a)->mr && ((i) & evenbits) < (a)->mc) + +struct block{ +	real *data; +	ulong sz; +	uchar allocated:1; +}; + +struct vector { +	block; +	uint len; +}; + +struct matrix{ +	vector; +	ushort nr, nc; +	uint mr, mc; +	uint levels, offset; +}; + +struct cube{ +	matrix; +	ushort ns; +	uint stride; +}; + +cube *ncube(cube *, real *, ushort, ushort, ushort); +matrix *nmatrix(matrix *, real *, ushort, ushort); +vector *nvector(vector *, real *, uint); + +void fblock(block *); +block *zblock(block *); +block *cblock(block *, block *); + +matrix *qdot(matrix *, matrix *, matrix *, int); +matrix *mdot(matrix *, matrix *, matrix *, int); +matrix *madd(matrix *, matrix *, matrix *); +matrix *smprod(real, matrix *, matrix *); +real dot(vector *, vector *); +vector *sprod(real, vector *, vector *); +vector *vadd(vector *, vector *, vector *); +vector *vprod(vector *, vector *, vector *); +vector *vsub(vector *, vector *, vector *); +vector *vdot(matrix *, vector *, vector *, int); +void vnorm(uint, ...); + +void vsort(vector *); +uint *ivsort(vector *); + +extern uint MAXITERS; +matrix *eigen(matrix *, matrix *, vector *); +matrix *jacobi(matrix *, matrix *, ve  | 
