// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// The following module implements Hirschberg's algorithm to compute the      //
// transitive hull of a boolean matrix in depth O(log(N)^2) using O(N^3)      //
// processors. Clearly, this algorithm is not as fast as the log(N) time      //
// version, it has the advantage that it does not require concurrent writes.  //
// Since there is in general a slowdown by O(log(N)), and since the CRCW      //
// algorithm requires time O(log(N)) by O(N) processors, this CREW algorithm  //
// is probably optimal.                                                       //
// ************************************************************************** //

[8][8] bool x;
[8][8][8] bool p;
[8][8] bool b;

function exp(nat x0, nat y0):nat {
    nat h,i;
    h = 1;

    if(x0!=0) {
        for(i=1..y0) {
            h = h*x0;
        }
    }
    return h;
}


function log(nat n) : nat {
    nat h,l;
    h = 1;
    l = 0;
    while(h<n) {
            h = h*2;
            l = l+1;
     }

    return l;
}

function TransHull_OlogNlogN(nat n, [][]bool a):nat {
    nat i,j,k, step, h;
    // copy matrix a to local matrix x
        for(i=0..n-1){
            for(j=0..n-1){
                x[i][j] = a[i][j];
        }
    }
    
    // multiply matrix x log(N) times with itself to obtain the transitive hull
    for(step=1..log(n)){
    // compute (x[i][j]) = x[i][j] | exists(k=0..N-1) x[i][k] & x[k][j]
    // using a balanced tree for the disjunction with depth log(N)
        for(i=0..n-1){
            for(j=0..n-1){
                for(k=0..n-1){
                    p[k][i][j] = x[i][j] | x[i][k] & x[k][j];
                }
            }              
        }    
    }

     // for each pair (i,j), sum up p[0..N-1][i][j] in a parallel prefix tree

     for(h=0..log(n)-1){
        for(i=0..n-1){
            for(j=0..n-1){
                for(k=exp(2,h)..n-1){
                    p[k][i][j] = p[k][i][j] | p[k-exp(2,h)][i][j];
                }
            }
        }
    }

     for(i=0..n-1){
        for(j=0..n-1){
            (x[i][j]) = p[n-1][i][j]; 
        }
     }

    // collect the result
    for(i=0..n-1)
        for(j=0..n-1)
            b[i][j] = x[i][j];

            return 1;
}


thread test{
    nat t,i,j,n;
    bool test_passed;
    [8][8] bool a;
    a[0][1] = true;
    a[1][2] = true;
    a[2][1] = true;
    a[2][4] = true;
    a[2][7] = true;
    a[3][2] = true;
    a[3][4] = true;
    a[4][5] = true;
    a[5][4] = true;
    a[5][6] = true;
    a[7][7] = true;
    n = 8;
    t = TransHull_OlogNlogN(n,a);

    for(i=0..n-1){
        for(j=0..n-1){
            if(!((b[i][j] <->
                    (i==0 & (j==1 | j==2 | j==4 | j==5 | j==6 | j==7 ) |
                     i==1 & (j==1 | j==2 | j==4 | j==5 | j==6 | j==7 ) |
                     i==2 & (j==1 | j==2 | j==4 | j==5 | j==6 | j==7 ) |
                     i==3 & (j==1 | j==2 | j==4 | j==5 | j==6 | j==7 ) |
                     i==4 & (              j==4 | j==5 | j==6        ) |
                     i==5 & (              j==4 | j==5 | j==6        ) |
                     i==7 &  j==7)
                )))
                {
                    test_passed = false;
                }
        }
    }

}