#include <xmtc.h>
#include "kspawn.h"
#include "arith.h"

#define K 8

psBaseReg n_not_quit;

#define DBUF_DECLARE(T, name, size, init) T name##_buf1[size], name##_buf2[size], *name##_read = init, *name##_write = name##_buf1, *name##_temp = name##_buf2
#define DBUF_FLIP(name) do { \
	name##_read = name##_write; \
	name##_write = name##_temp; \
	name##_temp = name##_read; \
} while (0)

static void list_rank_jump(int *D, int *R, int n) {
	if (n <= 2)
		return;
	if (n == 3) {
		int i;
		for (i = 0; i < 3; i++) {
			int next = D[i];
			R[i] += R[next];
			D[i] = D[next];
		}
		return;
	}

	DBUF_DECLARE(int, R, n, R);
	DBUF_DECLARE(int, D, n, D);

	int iter;
	for (iter = ceil_log2(n - 1); iter > 1; iter--) {
		begin_kspawn(n, K) {
			int next = D_read[$$];
			R_write[$$] = R_read[$$] + R_read[next];
			D_write[$$] = D_read[next];
		} end_kspawn

		DBUF_FLIP(R);
		DBUF_FLIP(D);
	}

	begin_kspawn(n, K) {
		int next = D_read[$$];
		R[$$] = R_read[$$] + R_read[next];
		//D[$$] = D_read[next];
	} end_kspawn
}

#define STARTTIME() int _time; xmt_readtimer32(_time)
#define SHOWTIME(s) do { int now; xmt_readtimer32(now); printf("%s: %d\n", s, now - _time); _time = now; } while (0)

static void list_rank_cut(int *D, int *R, int n, int offset) {
	//printf("list_rank(%d)\n", n);
	int ntcu;
	getTCUID(ntcu);
	
	// Use pointer jumping if the list is small enough
	if (n < 4 * ntcu) {
		list_rank_jump(D, R, n);
		return;
	}

	// Assume all elements will be needed in the compacted list
	int selected[n];
	begin_kspawn(n, K) {
		selected[$$] = 1;
	} end_kspawn
	
	// Choose approx. NTCU elements as heads of sublists
	// Rank the elements in the sublists until one of the following occurs:
	//   * Another head is encountered
	//   * The end of the list is encountered
	//   * The sublist is found to be longer than some threshold (cut)
	int S = n / ntcu;
	int ofs = offset % S;
	begin_kspawn(CDIV(n, S), K) {
		int i = S * $$ + ofs;
		if (i < n) {
			int next = D[i];
			int rank = R[i];
			int cut = S * 5 / 2;
			while (next % S != ofs && D[next] != next && cut-- > 0) {
				int dn = D[next];
				int rn = R[next];
				selected[next] = 0;
				D[next] = i;
				R[next] = rank;
				next = dn;
				rank += rn;
			}
			D[i] = next;
			R[i] = rank;
		}
	} end_kspawn

	// Compact all elements that are either heads or were never reached due to the cut
	int dc[n];
	int rc[n];
	int compact[n];
	n_not_quit = 0;
	begin_kspawn(n, K) {
		if (selected[$$]) {
			int i = 1;
			ps(i, n_not_quit);
			compact[$$] = i;
			dc[i] = D[$$];
			rc[i] = R[$$];
		}
	} end_kspawn
	int nc = n_not_quit;

	// Remap next pointer to use compacted index
	begin_kspawn(nc, K) {
		dc[$$] = compact[dc[$$]];
	} end_kspawn

	// Rank the compacted list
	list_rank_cut(dc, rc, nc, offset + 1);

	// Compute ranks for the original list from the compacted list
	begin_kspawn(n, K) {
		if (selected[$$])
			R[$$] = rc[compact[$$]];
		else
			R[$$] = rc[compact[D[$$]]] - R[$$];
	} end_kspawn
}

void list_rank(int *D, int *R, int n) {
	list_rank_cut(D, R, n, 0);
}
