// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// The following module implements Hirschberg's algorithm to compute the      //
// transitive hull of a boolean matrix in depth O(log(N)^2) using O(N^3)      //
// processors. Clearly, this algorithm is not as fast as the log(N) time      //
// version, it has the advantage that it does not require concurrent writes.  //
// Since there is in general a slowdown by O(log(N)), and since the CRCW      //
// algorithm requires time O(log(N)) by O(N) processors, this CREW algorithm  //
// is probably optimal.                                                       //
// ************************************************************************** //

macro N = 8;


module TransHull_OlogNlogN([N][N]bool ?a,[N][N]bool b,event !rdy) {
    [N][N]bool x;
    [N][N][N]bool p;
    
    // copy matrix a to local matrix x
    for(i=0..N-1)
        for(j=0..N-1)
            x[i][j] = a[i][j];

    // multiply matrix x log(N) times with itself to obtain the transitive hull
    for(step=1..log(N)) {
        // compute next(x[i][j]) = x[i][j] | exists(k=0..N-1) x[i][k] & x[k][j]
        // using a balanced tree for the disjunction with depth log(N)
        for(i=0..N-1)
            for(j=0..N-1)
                for(k=0..N-1)
                    p[k][i][j] = x[i][j] | x[i][k] & x[k][j];
        // for each pair (i,j), sum up p[0..N-1][i][j] in a parallel prefix tree
        for(h=0..log(N)-1) {
            for(i=0..N-1)
                for(j=0..N-1)
                    for(k=exp(2,h)..N-1)
                        next(p[k][i][j]) = p[k][i][j] | p[k-exp(2,h)][i][j];
            pause;
        }
        for(i=0..N-1)
            for(j=0..N-1)
                next(x[i][j]) = p[N-1][i][j]; 
        pause;
    }

    // collect the result
    for(i=0..N-1)
        for(j=0..N-1)
            b[i][j] = x[i][j];
    emit(rdy);
}
drivenby g1 {
    // the assignments below construct the following graph:
    //
    //                 3
    //                / \
    //               v   v
    //    0 -> 1 <-> 2-> 4 <-> 5 -> 6
    //               |         
    //               v         
    //               7<|       
    //               |_|
    //
    a[0][1] = true;
    a[1][2] = true;
    a[2][1] = true;
    a[2][4] = true;
    a[2][7] = true;
    a[3][2] = true;
    a[3][4] = true;
    a[4][5] = true;
    a[5][4] = true;
    a[5][6] = true;
    a[7][7] = true;
    await(rdy);
    for(i=0..N-1) 
        for(j=0..N-1)
            assert((b[i][j] <->
                    (i==0 & (j==1 | j==2 | j==4 | j==5 | j==6 | j==7 ) |
                     i==1 & (j==1 | j==2 | j==4 | j==5 | j==6 | j==7 ) |
                     i==2 & (j==1 | j==2 | j==4 | j==5 | j==6 | j==7 ) |
                     i==3 & (j==1 | j==2 | j==4 | j==5 | j==6 | j==7 ) |
                     i==4 & (              j==4 | j==5 | j==6        ) |
                     i==5 & (              j==4 | j==5 | j==6        ) |
                     i==7 &  j==7)
                )
          );
}
drivenby g2 {
    // worst case example: a chain of length N
    a[0][1] = true;
    a[1][2] = true;
    a[2][3] = true;
    a[3][4] = true;
    a[4][5] = true;
    a[5][6] = true;
    a[6][7] = true;
    await(rdy);
    assert(forall(i=0..N-1)
            forall(j=0..N-1)
                (b[i][j] <-> i<j)
          );
}