```// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// MergeSortBitonic implements Batcher's MergeSort algorithm using bitonic    //
// mergers [Batc68]. As can be seen, only for-loops are used, so that this    //
// algorithm directly implements a sorting network with depth                 //
// log(N)*(log(N)+1)/2 and N*log(N)*(log(N)+1)/4 compare/exchange operations. //
// Using its full parallelism (with N processors), sorting can be done in time//
// O(log^2(N)), while a sequential implementation requires O(Nlog^2(N)) and is//
// thus slower than MergeSort.                                                //
// ************************************************************************** //

macro aSize = 16;
macro iSize = 64;

module MergeSortBitonic([aSize]int{iSize} ?a,b, event !ready) {

// gather the inputs
for(i=0..aSize-1)
b[i] = a[i];

// perform log(aSize) in-place merging phases of partitions
// b[2*p*L..(2*p+1)*L-1] and b[(2*p+1)*L..(2*p+2)*L-1] of length
// L=exp(2,l)

for(l=0..log(aSize)-1) {
let(L=exp(2,l))
// the first column of half-cleaners rotates the second sequence
for(p=0..(aSize/L-2)/2) {
let(left=2*p*L)
let(right=(2*p+2)*L-1)
for(k=0..L-1)
if(b[left+k]>b[right-k]) {
next(b[left+k]) = b[right-k];
next(b[right-k]) = b[left+k];
}
}
w1: pause;

// next columns of half-cleaners consider partitions of
// lengths exp(2,l-1),...,exp(2,0)
for(d=1..l) {
let(L = exp(2,l-d))
for(p=0..(aSize/L-2)/2) {
// half cleaner for b[2*p*L..(2*p+1)*L-1] and b[(2*p+1)*L..(2*p+2)*L-1]
let(left=2*p*L)
let(right=left+L)
for(k=0..L-1) {
if(b[left+k]>b[right+k]) {
next(b[left+k]) = b[right+k];
next(b[right+k]) = b[left+k];
}
}
}
w2: pause;
}
}
}
drivenby Test01 {
for(i=0..aSize-1) a[i] = i;
for(i=0..aSize-1) assert(b[i] == i);
}
drivenby Test02 {
for(i=0..aSize-1) a[i] = aSize-1-i;