// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// MergeSortBitonic implements Batcher's MergeSort algorithm using bitonic    //
// mergers [Batc68]. As can be seen, only for-loops are used, so that this    //
// algorithm directly implements a sorting network with depth                 //
// log(N)*(log(N)+1)/2 and N*log(N)*(log(N)+1)/4 compare/exchange operations. //
// Using its full parallelism (with N processors), sorting can be done in time//
// O(log^2(N)), while a sequential implementation requires O(Nlog^2(N)) and is//
// thus slower than MergeSort.                                                //
// ************************************************************************** //

[16] nat b; 


function log(nat n) : nat {
    nat h,l;
    h = 1;
    l = 0;
    while(h<n) {
        h = h*2;
        l = l+1;
    }
    return l;
}

function exp(nat x, nat y):nat{
    nat h,i;
    h = 1;
   
    if(x!=0){
        for(i=1..y){
            h = h*x;
        }
    }

    return h;
}

function MergeSortBitonic (nat aSize): nat {
    nat l,p, temp,d,k, l1, left, right;
    for(l=0..log(aSize)-1){
        l1 = exp(2,l);
        for (p=0..(aSize/l1-2)/2){
            left = 2*p*l1;
            right = (2*p+2)*l1-1;
            if(b[left+k]>b[right-k]){
                temp = b[left+k];
                b[left+k] = b[right-k];
                b[right-k] = b[left+k];
            }
        }
    }

    for(d=1..l){
        l1 = exp(2,(l-d));
        for (p=0..(aSize-l1/2)/2){
            left = 2*p*l1;
            right = left + l1;
            for (k=0..l1-1){
                if(b[left+k]>b[right+k]){
                    temp = b[left+k];
                    b[left+k] = b[right+k];
                    b[right+k] = temp;
                }
            }
        }
    }
    
    return 0;
}

thread test{
    nat k;
    b[0] = 9;
    b[1] = 8;
    b[2] = 16;
    b[3] = 15;
    b[4] = 1;
    b[5] = 10;
    b[6] = 2;
    b[7] = 11;
    b[8] = 3;
    b[9] = 4;
    b[10] = 6;
    b[11] = 5;
    b[12] = 14;
    b[13] = 12;
    b[14] = 7;
    b[15] = 13;
  
    k = MergeSortBitonic(16);
}