// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// MergeSortBitonic implements Batcher's MergeSort algorithm using bitonic    //
// mergers [Batc68]. As can be seen, only for-loops are used, so that this    //
// algorithm directly implements a sorting network with depth                 //
// log(N)*(log(N)+1)/2 and N*log(N)*(log(N)+1)/4 compare/exchange operations. //
// Using its full parallelism (with N processors), sorting can be done in time//
// O(log^2(N)), while a sequential implementation requires O(Nlog^2(N)) and is//
// thus slower than MergeSort.                                                //
// ************************************************************************** //

[16] nat b;


function log(nat n):nat {
    nat h,l;
    h = 1;
    l = 0;
    while(h<n) {
        h = h*2;
        l = l+1;
    }
    return l;
}


function exp(nat x, nat y):nat {
    nat h,i;
    h = 1;
    if(x!=0)
        for(i=1..y)
            h = h*x;
    return h;
}


function MergeSortBitonic (nat aSize): nat {
    nat l,p, temp,d,k, l1, left, right;
    for(l=0..log(aSize)-1){
        l1 = exp(2,l);
        for (p=0..(aSize/l1-2)/2){
            left = 2*p*l1;
            right = (2*p+2)*l1-1;
            if(b[left+k]>b[right-k]){
                temp = b[left+k];
                b[left+k] = b[right-k];
                b[right-k] = b[left+k];
            }
        }
    }

    for(d=1..l) {
        l1 = exp(2,(l-d));
        for (p=0..(aSize-l1/2)/2) {
            left = 2*p*l1;
            right = left + l1;
            for (k=0..l1-1) {
                if(b[left+k]>b[right+k]) {
                    temp = b[left+k];
                    b[left+k] = b[right+k];
                    b[right+k] = temp;
                }
            }
        }
    }

    return 0;
}


thread test{
    nat k,i;
    bool test1_passed;
    b[0] = 9;
    b[1] = 8;
    b[2] = 16;
    b[3] = 15;
    b[4] = 1;
    b[5] = 10;
    b[6] = 2;
    b[7] = 11;
    b[8] = 3;
    b[9] = 4;
    b[10] = 6;
    b[11] = 5;
    b[12] = 14;
    b[13] = 12;
    b[14] = 7;
    b[15] = 13;

    test1_passed = true;

    k = MergeSortBitonic(16);

    for(i=0..15) {
        if((b[i]==(i+1)) & (test1_passed != false)) {
            test1_passed = true;
        } else {
        test1_passed = false;
        }
    }

}

thread test2 {
    nat k,i,aSize;
    bool test2_passed;
    aSize = 32;

    for(i=0..aSize-1)
        b[i] = i;


    test2_passed = true;
    k = MergeSortBitonic(aSize);

    for (i=0..aSize-1){
     if((b[i]==i) & (test2_passed != false)) {
            test2_passed = true;
        }else{
            test2_passed = false;
        }
    }
}

thread test3{
    nat k,i,aSize;
    bool test3_passed;
    aSize = 32;

    for(i=0..aSize-1){
        b[i] = aSize-1-i;
    }

    test3_passed = true;
    k = MergeSortBitonic(aSize);

    for (i=0..aSize-1){
     if((b[i]==i) & (test3_passed != false)){
            test3_passed = true;
        }else{
            test3_passed = false;
        }
    }
}