// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //

package ComputerArchitecture.SystolicArrays.TransitiveClosure.KuLL87a;

macro N = 3;



// ----------------------------------------------------------------
// The first variant removes global dependencies, i.e., all assignments
// do only use variables with array indices that differ by at most
// one from the left hand sides indices. To this end, propagating
// variables for columns and rows, c and r, respectively, are used.
// Note that these propagating variables are events and that the
// N^2 assignments of one k-dimension are done in a single cycle.
// ----------------------------------------------------------------

module LocDepWarshall([N][N]bool ?a, [N][N][N]bool t, event !OnlyReflexive) {
    event [N][N][N]bool c, r;
    /*for(i=0..(N-1))
        for(j=0..(N-1)) {
            c[i][j][0] = ((j==0) ? a[i][0] : c[i][j-1][0]);
            r[i][j][0] = ((i==0) ? a[0][j] : r[i-1][j][0]);
            if((i==0)|(j==0))
                t[i][j][0] = a[i][j];
            else
                t[i][j][0] = a[i][j] | c[i][j][0] & r[i][j][0];
        }
    */
    //--- START WORKAROUND ---
    c[0][0][0] = a[0][0];
    r[0][0][0] = a[0][0];
   
    t[0][0][0] = a[0][0];
   
    for(j=1..(N-1)) {
        c[0][j][0] = c[0][j-1][0];
        r[0][j][0] = a[0][j];
        t[0][j][0] = a[0][j];
    }
   
    for(i=1..(N-1)){
        c[i][0][0] = a[i][0];
        r[i][0][0] = r[i-1][0][0];
        t[i][0][0] = a[i][0];
    }
      
    for(i=1..(N-1))
        for(j=1..(N-1)) {
            c[i][j][0] = c[i][j-1][0];
            r[i][j][0] = r[i-1][j][0];
            t[i][j][0] = a[i][j] | c[i][j][0] & r[i][j][0];
        }
    //--- END WORKAROUND ---
    pause;
    for(k=1..(N-1)) {
        /*for(i=0..(N-1))
        for(j=0..(N-1)) {
            c[i][j][k] = ((j==k) ? t[i][j][k-1] : ((j<k) ? c[i][j+1][k] : c[i][j-1][k]));
            r[i][j][k] = ((i==k) ? t[i][j][k-1] : ((i<k) ? r[i+1][j][k] : r[i-1][j][k]));
            if((i==k)|(j==k))
                t[i][j][k] = t[i][j][k-1];
            else
                t[i][j][k] = t[i][j][k-1] | c[i][j][k] & r[i][j][k];
         }
      */
        //--- START WORKAROUND ---
        c[0][0][k] = c[0][1][k];
        r[0][0][k] = r[1][0][k];
       
        t[0][0][k] = t[0][0][k-1] | c[0][0][k] & r[0][0][k];
       
        for(j=1..(N-2)) {
            c[0][j][k] = ((j==k) ? t[0][j][k-1] : ((j<k) ? c[0][j+1][k] : c[0][j-1][k]));
            r[0][j][k] = r[1][j][k];
            if(j==k)
                t[0][j][k] = t[0][j][k-1];
            else
                t[0][j][k] = t[0][j][k-1] | c[0][j][k] & r[0][j][k];
        }
       
        c[0][(N-1)][k] = (((N-1)==k) ? t[0][(N-1)][k-1] : (c[0][(N-1)-1][k]));
        r[0][(N-1)][k] = r[1][(N-1)][k];
        if((N-1)==k)
            t[0][(N-1)][k] = t[0][(N-1)][k-1];
        else
            t[0][(N-1)][k] = t[0][(N-1)][k-1] | c[0][(N-1)][k] & r[0][(N-1)][k];
       
        for(i=1..(N-2)){
            c[i][0][k] = c[i][1][k];
            r[i][0][k] = ((i==k) ? t[i][0][k-1] : ((i<k) ? r[i+1][0][k] : r[i-1][0][k]));
            if(i==k)
                t[i][0][k] = t[i][0][k-1];
            else
                t[i][0][k] = t[i][0][k-1] | c[i][0][k] & r[i][0][k];

        }
       
        c[(N-1)][0][k] = c[(N-1)][1][k];
        r[(N-1)][0][k] = (((N-1)==k) ? t[(N-1)][0][k-1] : (r[(N-1)-1][0][k]));
        if((N-1)==k)
            t[(N-1)][0][k] = t[(N-1)][0][k-1];
        else
            t[(N-1)][0][k] = t[(N-1)][0][k-1] | c[(N-1)][0][k] & r[(N-1)][0][k];
          
        for(i=1..(N-1))
            for(j=1..(N-1)) {
                c[i][j][k] = c[i][j-1][k];
                r[i][j][k] = r[i-1][j][k];
                if((i==k)|(j==k))
                    t[i][j][k] = t[i][j][k-1];
                else
                    t[i][j][k] = t[i][j][k-1] | c[i][j][k] & r[i][j][k];
            }
        //--- END WORKAROUND ---

        pause;
    }
}


// ----------------------------------------------------------------
// The disadvantage of the algorithm in LocDepWarshall.qrz is that the flows of
// the propagating variables are done for most k's in two directions.
// The following reindexed version removes this by diagonally shifting
// the matrix t[][][k] in each step. Of course, the rotations can
// also be performed on the original algorithm, which yields the
// variant RotWarshall below (just for fun). RotLocDepWarshall
// is more interesting, since now the flows of c and r are
// unidirectional.
// ----------------------------------------------------------------