// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// The algorithm below computes a LU decomposition of a given NxN matrix as   //
// described in Cormen,Leiserson,Rivest and Stein's textbook on algorithms.   //
// In contrast to module DecompLU, the version in this file stores the        //
// matrices l and u in a single matrix c, where the lower left and upper right//
// triangles correspond with the matrices l and u, respectively (the diagonal //
// of 1s of matrix l is not contained in c).                                  //
// ************************************************************************** //

macro N = 3;
macro l(i,j) = (i<j?0:(i==j?1:c[i][j])); 
macro u(i,j) = (i<=j?c[i][j]:0);

module DecompLU2([N][N]real ?a,b,c,event !rdy) {
    
    // store input matrix a in matrix b
    for(i=0..N-1)
        for(j=0..N-1)
            b[i][j] = a[i][j];
    
    // compute LU decomposition of b
    for(k=0..N-1) {
        let(piv=b[k][k]) {
            c[k][k] = piv;
            for(i=k+1..N-1) {
                c[i][k] = b[i][k]/piv;
                c[k][i] = b[k][i];
            }
        }
        for(i=k+1..N-1) {
            for(j=k+1..N-1)
                next(b[i][j]) = b[i][j] - c[i][k] * c[k][j];
        }
        cp: pause;
    }
    emit(rdy);
}
drivenby {
    a[0][0] = 1.0;
    a[0][1] = 2.0;
    a[0][2] = 0.0;
    a[1][0] = 3.0;
    a[1][1] = 4.0;
    a[1][2] = 4.0;
    a[2][0] = 5.0;
    a[2][1] = 6.0;
    a[2][2] = 3.0;
    await(rdy);
    // check the result, i.e. that a = L * U holds, where L and U are stored in b   
    for(i=0..N-1)
        for(j=0..N-1)
            assert(a[i][j] == sum(k=0..N-1) (l(i,k) * u(k,j)));
}