// datavore!

/**
 * The top-level Datavore namespace. All public methods and fields should be
 * registered on this object. Note that core Datavore source is surrounded by an
 * anonymous function, so any other declared globals will not be visible outside
 * of core methods. This also allows multiple versions of Datavore to coexist,
 * since each version will see their own <tt>dv</tt> namespace.
 *
 * @namespace The top-level Datavore namespace, <tt>dv</tt>.
 */
var dv = {};

/**
 * Datavore major and minor version numbers.
 *
 * @namespace Datavore major and minor version numbers.
 */
dv.version = {
  /**
   * The major version number.
   *
   * @type number
   * @constant
   */
  major: 0,

  /**
   * The minor version number.
   *
   * @type number
   * @constant
   */
  minor: 1
};

/**
 * @private Reports the specified error to the JavaScript console. Mozilla only
 * allows logging to the console for privileged code; if the console is
 * unavailable, the alert dialog box is used instead.
 *
 * @param e the exception that triggered the error.
 */
dv.error = function(e) {
  (typeof console == "undefined") ? alert(e) : console.error(e);
};

/**
 * @private Registers the specified listener for events of the specified type on
 * the specified target. For standards-compliant browsers, this method uses
 * <tt>addEventListener</tt>; for Internet Explorer, <tt>attachEvent</tt>.
 *
 * @param target a DOM element.
 * @param {string} type the type of event, such as "click".
 * @param {function} the event handler callback.
 */
dv.listen = function(target, type, listener) {
  listener = dv.listener(listener);
  return target.addEventListener
      ? target.addEventListener(type, listener, false)
      : target.attachEvent("on" + type, listener);
};

/**
 * @private Returns a wrapper for the specified listener function such that the
 * {@link dv.event} is set for the duration of the listener's invocation. The
 * wrapper is cached on the returned function, such that duplicate registrations
 * of the wrapped event handler are ignored.
 *
 * @param {function} f an event handler.
 * @returns {function} the wrapped event handler.
 */
dv.listener = function(f) {
  return f.$listener || (f.$listener = function(e) {
      try {
        dv.event = e;
        return f.call(this, e);
      } finally {
        delete dv.event;
      }
    });
};

// -- QUERY OPERATORS ---------------------------------------------------------

dv.array = function(n) {
	var a = [];
	for (var i=0; i<n; ++i) { a.push(0); }
	return a;
}

dv.noop = function() {};

dv.count = function(expr) {
	var op = {};
	op.init = function() {
		return {"*":["cnt"]};
	}
	op.done = function(ctx) { return ctx["cnt"]; };
	op.value = expr;
	return op;
}

dv.sum = function(expr) {	
	var op = {};
	op.init = function() {
		var o = {}; o[expr] = ["sum"]; return o;
	}
	op.done = function(ctx) { return ctx["sum_"+expr]; };
	op.value = expr;
	return op;
}

dv.avg = function(expr) {	
	var op = {};
	op.init = function() {
		var o = {"*":["cnt"]}; o[expr] = ["sum"]; return o;
	};
	op.done = function(ctx) {
		var akey = "avg_"+expr, avg = ctx[akey];
		if (!avg) {
			var sum = ctx["sum_"+expr], cnt = ctx["cnt"];			
			ctx[akey] = (avg = sum.map(function(v,i) { return v/cnt[i]; }));
		}
		return avg;
	};
	op.value = expr;
	return op;
}

dv.variance = function(expr, sample) {
    var op = {}, adj = sample ? 1 : 0;
	op.init = function() {
		var o = {"*":["cnt"]}; o[expr] = ["sum","ssq"]; return o;
	};
	op.done = function(ctx) {
		var cnt = ctx["cnt"], sum = ctx["sum_"+expr], ssq = ctx["ssq_"+expr];
		var akey = "avg_"+expr, avg = ctx[akey];
		if (!avg) {
			ctx[akey] = (avg = sum.map(function(v,i) { return v/cnt[i]; }));
		}
		return ssq.map(function(v,i) { return (v - avg[i]/cnt[i]) / (cnt[i]-adj); });
	};
	op.value = expr;
	return op;
}

dv.stdev = function(expr, sample) {
	var op = dv.variance(expr, sample), end = op.done;
	op.done = function(ctx) {
		var dev = end(ctx);
		for (var i=0; i<dev.length; ++i) { dev[i] = Math.sqrt(dev[i]); }
		return dev;
	}
	return op;
}

// -- DATA TABLE --------------------------------------------------------------

dv.table = function(input)
{
	var table = []; // the data table
	
	table.query = function(q) {
		var vals = q.vals,                                       // aggregates
		    dims = q.dims.map(function(n) { return table[n]; }), // dimensions
		    sz = dims.map(function(d) { return d.lut.length; }), // dimension cardinalities
		    C = sz.reduce(function(a,b) { return a * b; }, 1),   // cube cardinality
		    N = table[0].length, p, col, v, name, expr,          // temp variables
			cnt, sum, ssq, min, max, _cnt, _sum, _ssq, _min, _max, // aggregate columns
		    ctx = {}, emap = {}, exp = [], // aggregate state variables
		    i=0, j=0, k=0, idx=0; // indices		

		// Identify Requested Aggregates
		for (i=0; i<vals.length; ++i) {
			var req = vals[i].init();
			for (expr in req) {
				if (expr == "*") {
					req[expr].map(function(func) {
						ctx[func] = dv.array(C);
					});
				} else {
					idx = table[expr].index;
					name = table[expr].name;
					req[expr].map(function(func) {
						ctx[func+"_"+name] = (ctx[func+"_"+idx] = dv.array(C));
					});
					if (!emap[idx]) {
						emap[idx] = true;
						exp.push(idx);
					}
				}
			}
		}

		// Compute Cube Index Coefficients
		for (i=0, p=[1]; i<sz.length; ++i) {
			p.push(p[i]*sz[i]);
		}
		
		// Compute Aggregates
		for (j=0; j<exp.length; ++j) {
			expr = exp[j];
			cnt = ctx["cnt"]; _cnt = (cnt && j==0);
			sum = ctx["sum_"+expr]; _sum = (sum !== undefined);
			ssq = ctx["ssq_"+expr]; _ssq = (ssq !== undefined);
			min = ctx["min_"+expr]; _min = (min !== undefined);
			max = ctx["max_"+expr]; _max = (max !== undefined);
			col = table[expr];
			
			for (i=0; i<N; ++i) {
				for (idx=0, k=0; k<sz.length; ++k) {
					idx += p[k] * dims[k][i]; // compute cube index
				}
				v = col[i];
				if (_cnt) cnt[idx] += 1;
				if (_sum) sum[idx] += v;
				if (_ssq) ssq[idx] += v*v;
				if (_min && v < min[idx]) min[idx] = v;
				if (_max && v > max[idx]) max[idx] = v;
			}
		}
		
		// Generate Results
		var result = [], stride = 1, s, val;
		for (i=0; i<sz.length; ++i) {
			col = [];
			s = sz[i];
			val = 0;
			for (j=0, k=0, c=-1; j<C; ++j, ++k) {
				if (k == stride) { k = 0; val = (val + 1) % s; }
				col[j] = val;
			}
			stride *= s;
			result.push(col);
		}
		vals.map(function(op) { result.push(op.done(ctx)); });
		return result;
	}
	
	/** @private */
	function code(a) {
		var c = [], d = {}, v;
		for (var i=0; i<a.length; ++i) {
			if (d[v=a[i]] === undefined) { d[v] = 1; c.push(v); }
		}
		return c.sort();
	};
	
	/** @private */
	function dict(lut) {
		return lut.reduce(function(a,b,i) { a[b] = i; return a; }, {});
	};
	
	// populate data table
	input.forEach(function(d, idx) {
		var compress = (idx < 2); // TODO
		var vals = d.values;
		
		if (compress) {
			vals = [];
			vals.lut = code(d.values);
			for (var i=0, map=dict(vals.lut); i<d.values.length; ++i) {
				vals.push(map[d.values[i]]);
			}
		}
		vals.name = d.name;
		vals.index = table.length;
		table.push(vals);
		table[d.name] = vals;	
	});
	
	return table;
};

dv.graph = function(N, src, trg) {
	var G = [], _links;
	G.nodes = N;
	G.edges = src.length;
	G.source = src;
	G.target = trg;
	G.push(src);
	G.push(trg);
	
	G.init = function() {
		var i, u, v, links = [];
		for (i=0; i<N; ++i) {
			links.push([]);
		}
		for (i=0; i<src.length; ++i) {
			u = src[i];
			v = trg[i];
			links[u].push(v);
			links[v].push(u);
		}
		_links = links;
	}
	
	G.neighbors = function(n) {
		return _links[n];
	}
	
	G.init();
	return G;
}

// -- Node Statistics ---------------------------------------------------------

dv.graph.indegree = function(g)
{
	var i, N=g.nodes, E=g.edges, trg=g.target, deg = dv.array(N);
	for (i=0; i<E; ++i) deg[trg[i]] += 1;
	return deg;
}

dv.graph.outdegree = function(g)
{
	var i, N=g.nodes, E=g.edges, src=g.source, deg = dv.array(N);
	for (i=0; i<E; ++i) deg[src[i]] += 1;
	return deg;
}

dv.graph.degree = function(g)
{
	var i, N=g.nodes, E=g.edges, src=g.source, trg=g.target, deg = dv.array(N);
	for (i=0; i<E; ++i) {
		deg[src[i]] += 1;
		deg[trg[i]] += 1;
	}
	return deg;
}

/**
 * Calculates betweenness centrality measures for nodes in an unweighted graph.
 * The running time is O(|V|*|E|).
 * The algorithm used is due to Ulrik Brandes, as published in the
 * <a href="http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf">
 * Journal of Mathematical Sociology, 25(2):163-177, 2001</a>.
 */
dv.graph.bc = function(g)
{
	var N = g.nodes, links, stack, queue,
	    i, j, n, v, w, s, sn, sv, sw;

	// Score objects track centrality statistics
	function score() {
		var s = {};
		s.centrality = 0;
		s.reset = function() {
			s.predecessors = [];
			s.dependency = 0;
			s.distance = -1;
			s.paths = 0;
			return s;
		}
		return s.reset();
	}
	
	// init 1 score per node
	for (n=0, s=[]; n<N; ++n) {
		s.push(score());
	}
	
	// compute centrality
	for (n=0; n<N; ++n) {
		for (i=0; i<N; ++i) { s[i].reset(); }
		sn = s[n];
		sn.paths = 1;
		sn.distance = 0;
		
		stack = [];
		queue = [n];
		
		while (queue.length > 0) {
			v = queue.shift();
			stack.push(v);
			sv = s[v];
			
			links = g.neighbors(v);
			for (i=0; i<links.length; ++i) {
				w = links[i];
				sw = s[w];
				if (sw.distance < 0) {
					queue.push(w);
					sw.distance = sv.distance + 1;
				}
				if (sw.distance == sv.distance + 1) {
					sw.paths += sv.paths;
					sw.predecessors.push(v);
				}
			}
		}
		while (stack.length > 0) {
			w = stack.pop();
			sw = s[w];
			for (i=0; i<sw.predecessors.length; ++i) {
				v = sw.predecessors[i];
				sv = s[v];
				sv.dependency += (sv.paths/sw.paths) * (1+sw.dependency);
			}
			if (w !== n) sw.centrality += sw.dependency;
		}
	}
	return s.map(function(sc) { return sc.centrality; });
}

// -- Clustering --------------------------------------------------------------

dv.cluster = {};

dv.cluster.merge = function(a, b, p, n) {
	var m = {i:(+a),j:(+b),prev:p,next:n};
	if (p) p.next = m;
	if (n) n.prev = m;
	return m;
}

dv.cluster.community = function(matrix)
{
	var edge = dv.cluster.merge;
	function pass1(i,j,v) {
		if (i==j) {
			return 0;     // clear diagonal cells
		} else {
			zsum += v;    // sum other cells
			return v;
		}
	}
	function pass2(i,j,v) {
		v *= zsum;        // scale by matrix sum
		a[i] += v;        // sum columns
		e = edge(i,j,e);  // collect edges
		return v;
	}
	
	var dQ, maxDQ=0, Q=0, zsum=0, N=matrix.rows, 
	    a = dv.array(N), Z, z, x, y, v, na, tmp, i, j, k,
	    xy, yx, xk, kx, yk, ky;
		scores=[], merges=edge(-1,-1), merge=merges,
		E = edge(-1,-1), e = E, maxEdge = edge(0,0);

    // initialize weighted matrix, column sums and edges
    Z = matrix.clone();
	Z.visitNonZero(pass1); zsum = 1/zsum;
	Z.visitNonZero(pass2);
	z = Z.values();
    
    // compute clustering
    for (i=0; i<N-1 && E.next; ++i) {
        maxDQ = -Infinity;
        maxEdge.i = 0;
		maxEdge.j = 0;
        
		for (e=E.next; e; e=e.next) {
			x = e.i; y = e.j;
			if (x == y) continue;
			// compute delta Q
			xy = x*N+y; yx = y*N+x;
			dQ = z[xy] + z[yx] - 2*a[x]*a[y];
			// check against max so far
			if (dQ > maxDQ) {
				maxDQ = dQ;
				maxEdge.i = x;
				maxEdge.j = y;
			}
		}
        
        // update the graph
        x = maxEdge.i; y = maxEdge.j;
        if (y < x) { tmp = y; y = x; x = tmp; } // lower idx first

		xy = x*N; yx = y*N;
        for (k=0, na=0; k<N; ++k) {
			xk = xy+k; yk = yx+k;
            v = z[xk] + z[yk];
            if (v != 0) { 
                na += v;
				z[xk] = v;
				z[yk] = 0; // sparse?
            }
        }

        for (k=0; k<N; ++k) {
			kx = k*N+x; ky = k*N+y;
            v = z[kx] + z[ky];
			if (v != 0) {
				z[kx] = v;
				z[ky] = 0; // sparse?
            }
        }

        a[x] = na;
        a[y] = 0;
                
        // update edge list
		for (e=E.next; e; e=e.next) {
			if ( (e.i==x && e.j==y) || (e.i==y && e.j==x) ) {
				e.prev.next = e.next;
				if (e.next) e.next.prev = e.prev;
			} else if (e.i == y) {
				e.i = x;
			} else if (e.j == y) {
				e.j = x;
			}
		}
		
        Q += maxDQ;
        scores.push(Q);
		merge = edge(x, y, merge);
    } 
	return {"merges":merges, "scores":scores};
}

dv.cluster.groups = function(mergelist, idx) {
	var merges = mergelist.merges,
	    scores = mergelist.scores,
	    map = {}, groups, gid=1,
	    max, i, j, e, k1, k2, l1, l2;
	
	if (idx === undefined || idx < 0) {
		for (i=0,idx=-1,max=-Infinity; i<scores.length; ++i) {
			if (scores[i] > max) { max = scores[idx=i]; }
		}
	}
	
	for (i=0, e=merges.next; i <= idx; ++i, e=e.next) {
		k1 = e.i; k2 = e.j;
		if ((l1 = map[k1]) === undefined) {
			l1 = [k1];
			map[k1] = l1;
		}
		if ((l2 = map[k2]) === undefined) {
			l1.push(k2);
		} else {
			for (j=0; j<l2.length; ++j) l1.push(l2[j]);
			delete map[k2];
		}
	}
	
	groups = dv.array(merges.length+1);
	for (k1 in map) {
		l1 = map[k1];
		for (i=0; i<l1.length; ++i) {
			groups[l1[i]] = gid;
		}
		++gid;
	}
	
	return groups;
}