// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// MergeSortOddEven implements Batcher's MergeSort algorithm using OddEven
// mergers [Batc68]. As can be seen, only for-loops are used, so that this
// algorithm directly implements a sorting network with depth log(N)(log(N)+1)/2
// and N(log(N)-1)log(N)/4+N-1 compare/exchange operations. Thus, exploiting its full
// parallelism (with N processors), sorting can be done in time O(log^2(N)),
// while a sequential implementation requires O(Nlog^2(N)) and is thus slower
// than the typical MergeSort.
//  Typically, the algorithm is presented by recursive functions, while the
// version below implements an iterative approach whose equivalence is not
// easily seen, and is thus worth some comments: 
//  In general, the recursive calls to a MergeSort of a list of size N=2^k 
// where the end of the recursion is given when lists of length 1 are to be 
// sorted, will produce a call tree whose leaves are singleton lists. Then, 
// returning from these recursive calls, one works this call tree upwards 
// to the root. During this, the merge operations are applied to subtrees 
// consisting of 1,2,4,8, ... 2^{k-1} leaf nodes. Let us call these operations 
// the Merge(1), Merge(2), Merge(4), Merge(8), ..., Merge(2^{k-1}) operations. 
// These operations are applied in sequence and form the columns of the sorting 
// network, where Merge(2^j) generates j+1 columns. To construct a sorting 
// network, assume further that these operations are in-place operations, i.e. 
// that the results are stored in the array.
//  Hence, the sorting network of OddEvenMergeSort of size N=2^k consists of 
// columns that stem from the subnets obtained from Merge(1), Merge(2), Merge(4), 
// Merge(8), ..., Merge(2^{k-1}) operations. According to OddEvenMergeSort, 
// these merge operations work as follows: The given list a[0],...,a[N-1] is 
// split into halves a[0],a[2],a[4],...,a[N-2] of even and a[1],a[3],a[5],...,a[N-1] 
// of odd indices, respectively. Then, the OddEvenMerge operation is applied 
// recursively to these lists, and assuming that the result is again in the array 
// a[0..N-1], then a final column of swaps (1 2) (3 4) (5 6) ... (N-3 N-2) is added 
// before terminating the operation. Let us call this column the `combine' column.
//  Following the call tree of these OddEvenMergeSort operations, one can see that
// the argument lists generated by the recursive calls contain first the indices
// whose binary representation ends with either 0 or 1 (i.e. even and odd ones),
// then those ending with either 00,10,01,11, and so on. At the level of numbers,
// these are the array indices partitioned modulo 2^i for i=1,...,2^{k-1}. 
//  The first column of Merge(2^k) generate a half cleaner with gaps 2^{k-1}, i.e.
// the swaps (0 2^{k-1}) (1 2^{k-1}+1) ... (2^{k-1}-1 2^k-1), since these swaps are
// performed at the leaves of the call tree of Merge(2^k), i.e., when the OddEvenMerge
// operations terminate. After this first column of a half cleaner, the combine
// columns are added that stem from working the call tree of the recursive calls to
// the merge operation upwards.
//  The combine columns at level i are constructed as follows: Generate the sorted
// list of array indices modulo 2^i, ignore the first and last elements, and construct
// pairs for swap operations of the rest. For example, MergeSortOddEven for N=16,
// consists of the following swaps (i j):
//
//    Merge(1): (variable l=0; eight partitions)
//        HC(l=0): [(0 1)] [(2 3)] [(4 5)] [(6 7)] [(8 9)] [(10 11)] [(12 13)] [(14 15)]
//
//    Merge(2): (variable l=1; four partitions)
//        HC(l=1): [(0 2) (1 3)]  [(4 6) (5 7)]  [(8 10) (9 11)]  [(12 14) (13 15)]
//        C(d=1):   [0 (1 2) 3]    [4 (5 6) 7]    [8 (9 10) 11]    [12 (13 14) 15]
//
//    Merge(4): (variable l=2; two partitions)
//        HC(l=2): [(0 4) (1 5) (2 6) (3 7)]         [(8 12) (9 13) (10 14) (11 15)]
//        C(d=1):  [0 (2 4) 6]  [1 (3 5) 7]          [8 (10 12) 14]  [9 (11 13) 15]
//        C(d=2):  [0 (1 2) (3 4) (5 6) 7]           [8 (9 10) (11 12) (13 14) 15]
//
//    Merge(8): (variable l=3; one partition)
//        HC(l=3): [(0 8) (1 9) (2 10) (3 11) (4 12) (5 13) (6 14) (7 15)]
//        C(d=1):  [0 (4 8) 12] [1 (5 9) 13] [2 (6 10) 14] [3 (7 11) 15]
//        C(d=2):  [0 (2 4) (6 8) (10 12) 14] [1 (3 5) (7 9) (11 13) 15]
//        C(d=3):  [0 (1 2) (3 4) (5 6) (7 8) (9 10) (11 12) (13 14) 15]
//
// Note that Merge(2^l) consists of aSize/exp(2,l+1) partitions and at level
// d, each partition has exp(2,l-d) classes that correspond with the numbers
// of these partitions modulo the gap g = exp(2,l-d).
// ************************************************************************** //


[32]nat b;


function exp(nat x, nat y):nat{
	nat h,i;
        h = 1;
   
	if(x!=0){
		for(i=1..y){
			h = h*x;
		}
	}

	return h;
}



function MergeSortOddEven([32]nat a): nat {
	nat aSize, iSize;
	nat l,l1,i,k,g,x,y,p,c,d;


	aSize = 32;
	iSize = 64;


	//gather the inputs
	for(i=0..aSize-1){
		b[i]=a[i];
	}


	//implement Merge(2^l) for l=0,...,k-1 as described above
	for(l=0..4) //4, beacuse of log(aSize)-1
	{
		l=exp(2,l+1);
		for(p=0..aSize/(l-1)){
			l1 = p*l; //leftmost indexof partition p
			g = l/2; //gap for half cleaner
			for(k=0..g-1){ //enumarate swaps of half cleaner
				x = l1+k;
				y = x+g;
				if(b[x] == b[y]){
					b[x]=b[y];
					b[y]=b[x]; //note: is here missing a supporting variable?
				}
			}
		}
	}
	
	for(d=1..l1){
		g = exp(2,l1-d);
		l = exp(2,l+1);
		for(p=0..aSize/l-1){
			for(c=0..g-1){
				for(i=0..(exp(2,d)-2)){
					x = p*l+c+g+i*2*g;
					y = x+g;
					if(b[x]>b[y]){
						b[x] = b[y];
						b[y] = b[x];
					}
				}
			}
		}
	}
	return 0;
}

thread test1{
	nat t;
	[32]nat a;
	nat i;
	bool test1_passed;

	for(i=0..31){
		a[i]=i+1;
	}
	a[5]=9; 
	a[8]=6; 
	a[12]=25;
	a[24]=13;

	t = MergeSortOddEven(a); 

	for(i=0..31){
        if((b[i]==(i+1)) & (test1_passed != false)){
            test1_passed = true;
        }else{
        test1_passed = false;
        }
    }

}

thread test2{
	nat t;
	[32]nat a;
	nat i;
	bool test2_passed;

	for(i=0..31){
		a[i]=i+1;
	}
	
	t = MergeSortOddEven(a); 

	for(i=0..31){
        if((b[i]==(i+1)) & (test2_passed != false)){
            test2_passed = true;
        }else{
        test2_passed = false;
        }
    }
}

thread test3{
	nat t;
	[32]nat a;
	nat i;
	bool test3_passed;

	for(i=0..31){
		a[i]=32-i;
	}
	
	t = MergeSortOddEven(a); 

	for(i=0..31){
        if((b[i]==(i+1)) & (test3_passed != false)){
            test3_passed = true;
        }else{
        test3_passed = false;
        }
    }

}