// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //

package ComputerArchitecture.SystolicArrays.TransitiveClosure.KuLL87a;

macro N = 3;

// ----------------------------------------------------------------
// RotLocDepWarshall yields a spiral signal flow graph, which is
// disadvantageous due to the long wires through the entire array.
// The spiral arcs can be removed due to the following observations:
//    (1) Due to the propositional absorption law a|a&b = a, we
//        can simplify the assignments to t[i][j][k] in the cases
//        where either (i==k) or (j==k) holds.
//    (2) For each k, the columns of matrix c[][][k] contain the
//        same value, namely, c[i][.][k] = t[i][0][k-1]. The spiral
//        dependency that stems from the assignment with (i!=0)
//               t[(i-1)%N][N-1][k] = t[i][0][k-1]
//        can therefore be replaced with the following local one
//               t[(i-1)%N][N-1][k] = c[i][N-1][k]
//    (3) Analogously, the rows of matrix r[][][k] contain the
//        same value, namely, r[.][j][k] = t[0][j][k-1]. The spiral
//        dependency that stems from the assignment with (j!=0)
//               t[N-1][(j-1)%N][k] = t[0][j][k-1]
//        can therefore be replaced with the following local one
//               t[N-1][(j-1)%N][k] = r[N-1][j][k]
// There is just one exception, namely the assignment
//               t[N-1][N-1][k] = t[0][0][k-1]
// However, if the reflexive transitive closure is to be computed,
// then, we can simply replace this assignment as follows:
//               t[N-1][N-1][k] = true
// The variants below are obtained as follows:
//    (a) Opt1RotLocDepWarshall is obtained from RotLocDepWarshall
//        by using observation (1) above.
//    (b) Opt2RotLocDepWarshall is obtained from Opt1RotLocDepWarshall
//        by using observation (2) and (3) above.
// ----------------------------------------------------------------
 
module Opt2RotLocDepWarshall([N][N]bool ?a, [N][N][N]bool t, event !OnlyReflexive) {
    event [N][N][N]bool c, r;
    /*for(i=0..(N-1))
        for(j=0..(N-1)) {
            c[i][j][0] = ((j==0) ? a[i][0] : c[i][j-1][0]);
            r[i][j][0] = ((i==0) ? a[0][j] : r[i-1][j][0]);
            if((i==0)|(j==0))
                t[(i-1)%N][(j-1)%N][0] = a[i][j];
            else
                t[(i-1)%N][(j-1)%N][0] = a[i][j] | c[i][j][0] & r[i][j][0];
        }
        
    */
    //--- START WORKAROUND ---
    c[0][0][0] = a[0][0];
    r[0][0][0] = a[0][0];
   
    t[abs ((-1)%N)][abs ((-1)%N)][0] = a[0][0];
   
    for(j=1..(N-1)) {
        c[0][j][0] = c[0][j-1][0];
        r[0][j][0] = a[0][j];
        t[abs ((-1)%N)][abs ((-1+j)%N)][0] = a[0][j];
    }
   
    for(i=1..(N-1)){
        c[i][0][0] = a[i][0];
        r[i][0][0] = r[i-1][0][0];
        t[abs ((-1+i)%N)][abs ((-1)%N)][0] = a[i][0];
    }

    for(i=1..(N-1))
        for(j=1..(N-1)) {
            c[i][j][0] = c[i][j-1][0];
            r[i][j][0] = r[i-1][j][0];
            t[abs ((-1+i)%N)][abs ((-1+j)%N)][0] = a[i][j] | c[i][j][0] & r[i][j][0];
        }
    //--- END WORKAROUND ---
    pause;
    for(k=1..(N-1)) {
        /*for(i=0..(N-1))
            for(j=0..(N-1)) {
                c[i][j][k] = ((j==0) ? t[i][j][k-1] : c[i][j-1][k]);
                r[i][j][k] = ((i==0) ? t[i][j][k-1] : r[i-1][j][k]);
                if((i==0)&(j==0))
                    t[N-1][N-1][k] = t[0][0][k-1];
                else if(i==0)
                    t[N-1][j-1][k] = r[N-1][j][k];
                else if(j==0)
                    t[i-1][N-1][k] = c[i][N-1][k];
                else
                    t[i-1][j-1][k] = t[i][j][k-1] | c[i][j][k] & r[i][j][k];
            }
        */
        //--- START WORKAROUND ---
        c[0][0][k] = t[0][0][k-1];
        r[0][0][k] = t[0][0][k-1];

        t[N-1][N-1][k] = t[0][0][k-1];

        for(j=1..(N-1)) {
            c[0][j][k] = c[0][j-1][k];
            r[0][j][k] = t[0][j][k-1];
            t[N-1][j-1][k] = r[N-1][j][k];
        }

        for(i=1..(N-1)) {
            c[i][0][k] = t[i][0][k-1];
            r[i][0][k] = r[i-1][0][k];
            t[i-1][N-1][k] = c[i][N-1][k];
        }
        for(i=1..(N-1))
            for(j=1..(N-1)) {
                c[i][j][k] = c[i][j-1][k];
                r[i][j][k] = r[i-1][j][k];
                t[i-1][j-1][k] = t[i][j][k-1] | c[i][j][k] & r[i][j][k];
            }
        //--- END WORKAROUND ---

        pause;
    }
}