// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// The following algorithm computes for an array a of length n the longest    //
// increasing subsequence in time O(log(n)) and work O(n). The subsequence is //
// determined by the start index idx and the length len of the subsequence.   //
// To compute it, we use a parallel prefix computation that computes for each //
// subsequence a[j*2^i..(j+1)*2^i-1] a tuple (lft,mid,rgt,pos) such that pos  //
// is the offset of the starting position of the longest subsequence, mid is  //
// its length, lft is the length of the longest prefix consisting of 1s, and  //
// rgt is the length of the longest suffix of 1s (that were initially computed//
// by the comparisons a[i]<=a[i+1]).                                          //
// ************************************************************************** //

macro M = 4;
macro N = exp(2,M);
macro max2(x,y) = (x<y ? y : x);
macro max3(x,y,z) = max2(x,max2(y,z));

module LongestIncreasingSubsequence([N+1]int ?a, nat !len,!idx, event !rdy) {
    [N]nat lft,mid,rgt,pos;
    nat n;
    // initialize the arrays
    n = 1;
    for(i=0..N-1) {
        lft[i] = (a[i]<=a[i+1] ? 1 : 0);
        mid[i] = (a[i]<=a[i+1] ? 1 : 0);
        rgt[i] = (a[i]<=a[i+1] ? 1 : 0);
        pos[i] = 0;
    }
    // combine halves
    for(h=0..M-1) {
        for(j=0..exp(2,M-1-h)-1)
            let(mx = max3(mid[2*j],mid[2*j+1],rgt[2*j]+lft[2*j+1])) {
            next(lft[j]) = (lft[2*j]<n ? lft[2*j] : n+lft[2*j+1]);
            next(mid[j]) = mx;
            next(rgt[j]) = (rgt[2*j+1]<n ? rgt[2*j+1] : rgt[2*j]+n);
            next(pos[j]) = (mx==mid[2*j+1] ? pos[2*j+1]+n : pos[2*j]);
        }
        next(n) = n+n;
        pause;
    }
    // termination signal
    len = mid[0]+1;
    idx = pos[0];
    emit(rdy);
}
drivenby {
    a = [1,2,-3,4,8,7,6,7,14,35,37,88,3,4,23,12,13];
    await(rdy);
}