// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// The algorithm below computes a LUP decomposition of a given NxN matrix as  //
// described in Cormen,Leiserson,Rivest and Stein's textbook on algorithms.   //
// Hence, the module computes three matrices L,U, and P such that for the     //
// input matrix A, we have P * A = L * U, with a lower left triangular matrix //
// L, an upper right triangular matrix U, and a permutation matrix P. Instead //
// of making use of three matrices L,U, and P, the algorithm below stores L   //
// and U in the matrix b, and the permutation matrix P in a single permutation//
// array p (so that P[i][j]=1 if p[i]=j holds, and P[i][j]=0 otherwise).      //
// Similar to module DecompLU2, the diagonal of L is not stored which consists//
// of only 1s.                                                                //
// ************************************************************************** //

macro N = 3;
macro rabs(x) = (x>0.0?x:-x);
macro l(i,j) = (i<j?0:(i==j?1:b[i][j])); 
macro u(i,j) = (i<=j?b[i][j]:0);


module DecompLUP([N][N]real ?a,b,[N]nat p,event !rdy) {
    real piv;
    nat{N} k1;
    
    for(i=0..N-1) {
        p[i] = i;
        for(j=0..N-1)
            b[i][j] = a[i][j];
    }
    
    for(k=0..N-1) {
        // determine pivot piv as largest value of b[k][k] .. b[N][k]
        // this requires the use of a permutation matrix, but leads to higher precision
        piv = 0.0;
        for(i=k..N-1) {
            if(rabs(b[i][k])>piv) {
                next(piv) = rabs(b[i][k]);
                next(k1) = i;
                pv: pause;
            }
        }
        // update permutation p
        next(p[k]) = p[k1];
        next(p[k1]) = p[k];
        // exchange `rows' b[k][1..N-1] and b[k1][1..N-1]
        for(i=0..N-1) {
            next(b[k][i]) = b[k1][i];
            next(b[k1][i]) = b[k][i];
        }
        ex: pause;
        for(i=k+1..N-1) {
            let(lambda = b[i][k]/b[k][k]) {
                next(b[i][k]) = lambda;
                for(j=k+1..N-1)
                    next(b[i][j]) = b[i][j] - lambda * b[k][j];
            }
        }
        cp: pause;
    }
    emit(rdy);
}
drivenby {
    a[0][0] = 1.0;
    a[0][1] = 2.0;
    a[0][2] = 0.0;
    a[1][0] = 3.0;
    a[1][1] = 4.0;
    a[1][2] = 4.0;
    a[2][0] = 5.0;
    a[2][1] = 6.0;
    a[2][2] = 3.0;
    await(rdy);
    // check the result, i.e. that P * a = L * U holds, where L and U are stored in b   
    for(i=0..N-1)
        for(j=0..N-1)
            assert(a[p[i]][j] == sum(k=0..N-1) (l(i,k) * u(k,j)));
}