// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// ShellSort [Shel59,Cyph89,PlPS92,Sedg96,JiLV00,Ciur01] is a variant of      //
// InsertionSort. It performs InsertionSort on h-sequences b[0],b[h],b[2h],...//
// of the array b and repeats this for smaller h until h=1 (so that in this   //
// last step an InsertionSort is made). It is to be noted that a h-sorted     //
// sequence remains h-sorted when sorting by another step h' is done later.   //
// The performance of ShellSort depends on the chosen set of numbers h (see   //
// [Ciur01]), where theoretically any sequence can be used that ends with h=1.//
// Donald Shell proposed the sequence h=1,2,4,8,... which is however not      //
// that good. A better sequence is h=1,3,7,15,31,... (i.e. h_i = 2^i-1) for   //
// which Knuth proved the worst case complexity O(n^1.5) which cannot be      //
// improved due to Prat. Another often used sequence is h=1,4,13, (i.e.       //
// h_{i+1} = 3*h_i+1). Pratt proved 1971 that h=1,2,3,4,6,9,8,12,18,27,..     //
// requires only O(n*(log(n))^2) steps where the sequence is obtained         //
// by adding 2*h_i and 3*h_i to the sequence if these numbers are not already //
// at the end of the sequence (i.e., h_i = 2^j*3^k for all j,k). Using this   //
// sequence and BubbleSort instead of InsertionSort, a sorting network can be //
// built with the same complexity of Batcher's bitonic sorter.                //
// ************************************************************************** //

macro aSize = 16;
macro iSize = 64;
macro hSize = 3;
macro maxh  = 5;

module ShellSort([aSize]nat{iSize} ?a,b, event !ready) {
    nat{aSize+1} i,j,h;
    nat{iSize} val;
    [hSize]nat{iSize} steps;

    steps[0] = 5;
    steps[1] = 3;
    steps[2] = 1;

    for(i=0..aSize-1)
        b[i] = a[i];       
    
    for(s=0..hSize-1) {
        h = steps[s];
        // perform insertion sort using gap h
        i = h;
        while(i<aSize) {
            // here, b[0..i-1] is h-sorted and b[i] must be
            // inserted in the corresponding h-subsequence
            val = b[i];
            j = i-h;
            while(b[j]>val & j>=h) {
                next(b[j+h]) = b[j];
                next(j) = j-h;
                w0: pause;
            }
            if(b[j]>val) {
                assert(j<h);
                next(b[j+h]) = b[j];
                next(b[j]) = val;
            } else {
                assert(b[j]<=val);
                next(b[j+h]) = val;
            }
            next(i) = i+1;
            w1: pause;
        }
        w2: pause;
    }
    emit(ready);
}
drivenby Test01 {
    for(i=0..aSize-1) a[i] = i;
    dw: await(ready);
    for(i=0..aSize-1) assert(b[i] == i);
}
drivenby Test02 {
    for(i=0..aSize-1) a[i] = aSize-1-i;
    dw: await(ready);
    for(i=0..aSize-1) assert(b[i] == i);
}
drivenby Test03 {
    for(i=0..aSize-1)
        a[i] = ((i%2==0)?i:((aSize%2==0)?aSize-i:aSize-1-i));
    dw: await(ready);
    for(i=0..aSize-1) assert(b[i] == i);
}