```// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// PairwiseSort implements Parberry's pairwise sorting network [Parb92].      //
// This sorting network has depth log(N)(log(N)+1)/2 and requires             //
// N(log(N)-1)log(N)/4+N-1 compare/exchange operations. Thus, exploiting its  //
// full parallelism (with N processors), sorting can be done in time          //
// O(log^2(N)), while a sequential implementation requires O(Nlog^2(N)) and   //
// is thus slower than the typical MergeSort.                                 //
//  Typically, the algorithm is presented by recursive functions, while the   //
// version below implements an iterative approach whose equivalence is not    //
// easily seen; see the comments in MergeSortOddEven.                         //
// ************************************************************************** //

macro aSize = 16;
macro iSize = 64;

module PairwiseSort([aSize]int{iSize} ?a,b, event !ready) {

// gather the inputs
for(i=0..aSize-1)
b[i] = a[i];

// implement half cleaner columns in all partitions
for(l=0..log(aSize)-1) {
// consider partitions (= subtrees of call tree) of size L defined below
let(L=exp(2,l+1))
for(p=0..aSize/L-1) {
let(l = p*L)    // leftmost index of partition p
let(g = L/2)    // gap for half cleaner
for(k=0..g-1)   // enumerate swaps of half cleaner
let(x = l+k)
let(y = x+g)
if(b[x]>b[y]) {
next(b[x]) = b[y];
next(b[y]) = b[x];
}
}
w1: pause;
}
// after the half cleaner columns, the shuffle columns are added
for(l=1..log(aSize)-1) {
for(d=1..l) {
let(g = exp(2,l+1-d)-1)     // gaps for the shuffle
let(L = exp(2,l+1))         // size of a partition
let(n = aSize/L)          // number of partitions
for(p=0..n-1) {             // for each partition p
for(i=0..(L-g-1)/2-1) {
let(x = p+(2*i+1)*n)
let(y = x+n*g)
if(b[x]>b[y]) {
next(b[x]) = b[y];
next(b[y]) = b[x];
}
}
}
w2: pause;
}
}
}
drivenby Test01 {
for(i=0..aSize-1) a[i] = i;
for(i=0..aSize-1) assert(b[i] == i);
}
drivenby Test02 {
for(i=0..aSize-1) a[i] = aSize-1-i;