```// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// The following module implements Hirschberg's algorithm to compute the      //
// transitive hull of a boolean matrix in depth O(log(N)^2) using O(N^3)      //
// processors. Clearly, this algorithm is not as fast as the log(N) time      //
// version, it has the advantage that it does not require concurrent writes.  //
// Since there is in general a slowdown by O(log(N)), and since the CRCW      //
// algorithm requires time O(log(N)) by O(N) processors, this CREW algorithm  //
// is probably optimal.                                                       //
// ************************************************************************** //

macro N = 8;

module TransHull_OlogNlogN([N][N]bool ?a,[N][N]bool b,event !rdy) {
[N][N]bool x;
[N][N][N]bool p;

// copy matrix a to local matrix x
for(i=0..N-1)
for(j=0..N-1)
x[i][j] = a[i][j];

// multiply matrix x log(N) times with itself to obtain the transitive hull
for(step=1..log(N)) {
// compute next(x[i][j]) = x[i][j] | exists(k=0..N-1) x[i][k] & x[k][j]
// using a balanced tree for the disjunction with depth log(N)
for(i=0..N-1)
for(j=0..N-1)
for(k=0..N-1)
p[k][i][j] = x[i][j] | x[i][k] & x[k][j];
// for each pair (i,j), sum up p[0..N-1][i][j] in a parallel prefix tree
for(h=0..log(N)-1) {
for(i=0..N-1)
for(j=0..N-1)
for(k=exp(2,h)..N-1)
next(p[k][i][j]) = p[k][i][j] | p[k-exp(2,h)][i][j];
pause;
}
for(i=0..N-1)
for(j=0..N-1)
next(x[i][j]) = p[N-1][i][j];
pause;
}

// collect the result
for(i=0..N-1)
for(j=0..N-1)
b[i][j] = x[i][j];
emit(rdy);
}
drivenby g1 {
// the assignments below construct the following graph:
//
//                 3
//                / \
//               v   v
//    0 -> 1 <-> 2-> 4 <-> 5 -> 6
//               |
//               v
//               7<|
//               |_|
//
a[0][1] = true;
a[1][2] = true;
a[2][1] = true;
a[2][4] = true;
a[2][7] = true;
a[3][2] = true;
a[3][4] = true;
a[4][5] = true;
a[5][4] = true;
a[5][6] = true;
a[7][7] = true;
await(rdy);
for(i=0..N-1)
for(j=0..N-1)
assert((b[i][j] <->
(i==0 & (j==1 | j==2 | j==4 | j==5 | j==6 | j==7 ) |
i==1 & (j==1 | j==2 | j==4 | j==5 | j==6 | j==7 ) |
i==2 & (j==1 | j==2 | j==4 | j==5 | j==6 | j==7 ) |
i==3 & (j==1 | j==2 | j==4 | j==5 | j==6 | j==7 ) |
i==4 & (              j==4 | j==5 | j==6        ) |
i==5 & (              j==4 | j==5 | j==6        ) |
i==7 &  j==7)
)
);
}
drivenby g2 {
// worst case example: a chain of length N
a[0][1] = true;
a[1][2] = true;
a[2][3] = true;
a[3][4] = true;
a[4][5] = true;
a[5][6] = true;
a[6][7] = true;
await(rdy);
assert(forall(i=0..N-1)
forall(j=0..N-1)
(b[i][j] <-> i<j)
);
}

```