// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// MergeSort is one of the most efficient sorting algorithms, not only because//
// of its optimal best, average and worst case asymptotic complexity of only  //
// O(n*log(n)), but also due to practical runtimes. The version given below   //
// in Quartz performs a simple in-place merge, which is possible due to the   //
// concurrency available in Quartz.                                           //
// MergeSort is a divide-and-conquer algorithm that splits the list into      //
// halves, sorts them, and merges the sorted lists into a sorted list. The    //
// main problem is thereby to implement the merging efficiently. For a list   //
// of length n, log(n) merging phases are required in sequence, and in each   //
// phase the entire list, partitioned into sorted sequences, is merged into   //
// larger sorted partitions.                                                  //
// ************************************************************************** //

macro aSize = 16;
macro iSize = 64;

module MergeSort([aSize+1]int{iSize} ?a,b, event !ready) {
    [aSize]int{iSize} c;
    nat{aSize+1} i0,i1,j0,j1,j,l;
    l = 2;
    
    // initial swaps to achieve sorted partitions of length 2
    for(i=0..aSize/2-1)
        if(a[2*i] > a[2*i+1]) {
            next(b[2*i]) = a[2*i+1];
            next(b[2*i+1]) = a[2*i];
        } else {
            next(b[2*i]) = a[2*i];
            next(b[2*i+1]) = a[2*i+1];
        }
    w0: pause;

    // perform log(aSize) merging phases; l denotes the length of the sorted partitions
    // in each of these phases
    l = 2;
    while(l<=aSize/2) {
        // merging partitions of length l into array c
        i0 = 0; // current position in left  partition
        i1 = 0; // current position in right partition
        j  = 0; // position of c to write next
        j0 = 0; // start of current left  partition
        j1 = l; // start of current right partition
        while(j0+l<aSize) {
            // merge sorted sequences b[j0..j0+l-1] with b[j1..j1+l-1] into c[j0..j1+l-1]
            while(i0<l & i1<l) {
                if(b[i0+j0] > b[i1+j1]) {
                    next(c[j]) = b[i1+j1];
                    next(i1) = i1+1;
                    next(j) = j+1;
                } else {
                    next(c[j]) = b[i0+j0];
                    next(i0) = i0+1;
                    next(j) = j+1;
                }
                w1: pause;
                }
            // copy the rest of b[j0..j0+l-1] to c
            while(i0<l) {
                next(c[j]) = b[i0+j0];
                next(i0) = i0+1;
                next(j) = j+1;
                w2: pause;
                }
            // copy the rest of b[j1..j1+l-1] to c
            while(i1<l) {
                next(c[j]) = b[i1+j1];
                next(i1) = i1+1;
                next(j) = j+1;
                w3: pause;
                }
            // prepare indices for merging the next partition
            next(i0) = 0;
            next(i1) = 0;
            next(j0) = sat{aSize}(j0+l+l);
            next(j1) = sat{aSize}(j1+l+l);
            w4: pause;
            }
        // all partitions of size l are merged, thus copy c back to b
        for(i=0..aSize-1)
            b[i] = c[i];
        next(l) = l+l;
        w5: pause;
        }

    emit(ready);
}
drivenby Test01 {
    for(i=0..aSize-1) a[i] = i;
    dw: await(ready);
    for(i=0..aSize-1) assert(b[i] == i);
}
drivenby Test02 {
    for(i=0..aSize-1) a[i] = aSize-1-i;
    dw: await(ready);
    for(i=0..aSize-1) assert(b[i] == i);
}
drivenby Test03 {
    for(i=0..aSize-1)
        a[i] = ((i%2==0)?i:((aSize%2==0)?aSize-i:aSize-1-i));
    dw: await(ready);
    for(i=0..aSize-1) assert(b[i] == i);
}