```// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// The algorithm below computes a LUP decomposition of a given NxN matrix as  //
// described in Cormen,Leiserson,Rivest and Stein's textbook on algorithms.   //
// Hence, the module computes three matrices L,U, and P such that for the     //
// input matrix A, we have P * A = L * U, with a lower left triangular matrix //
// L, an upper right triangular matrix U, and a permutation matrix P. Instead //
// of making use of three matrices L,U, and P, the algorithm below stores L   //
// and U in the matrix b, and the permutation matrix P in a single permutation//
// array p (so that P[i][j]=1 if p[i]=j holds, and P[i][j]=0 otherwise).      //
// Similar to module DecompLU2, the diagonal of L is not stored which consists//
// of only 1s.                                                                //
// ************************************************************************** //

macro N = 3;
macro rabs(x) = (x>0.0?x:-x);
macro l(i,j) = (i<j?0:(i==j?1:b[i][j]));
macro u(i,j) = (i<=j?b[i][j]:0);

module DecompLUP([N][N]real ?a,b,[N]nat p,event !rdy) {
real piv;
nat{N} k1;

for(i=0..N-1) {
p[i] = i;
for(j=0..N-1)
b[i][j] = a[i][j];
}

for(k=0..N-1) {
// determine pivot piv as largest value of b[k][k] .. b[N][k]
// this requires the use of a permutation matrix, but leads to higher precision
piv = 0.0;
for(i=k..N-1) {
if(rabs(b[i][k])>piv) {
next(piv) = rabs(b[i][k]);
next(k1) = i;
pv: pause;
}
}
// update permutation p
next(p[k]) = p[k1];
next(p[k1]) = p[k];
// exchange `rows' b[k][1..N-1] and b[k1][1..N-1]
for(i=0..N-1) {
next(b[k][i]) = b[k1][i];
next(b[k1][i]) = b[k][i];
}
ex: pause;
for(i=k+1..N-1) {
let(lambda = b[i][k]/b[k][k]) {
next(b[i][k]) = lambda;
for(j=k+1..N-1)
next(b[i][j]) = b[i][j] - lambda * b[k][j];
}
}
cp: pause;
}
emit(rdy);
}
drivenby {
a[0][0] = 1.0;
a[0][1] = 2.0;
a[0][2] = 0.0;
a[1][0] = 3.0;
a[1][1] = 4.0;
a[1][2] = 4.0;
a[2][0] = 5.0;
a[2][1] = 6.0;
a[2][2] = 3.0;
await(rdy);
// check the result, i.e. that P * a = L * U holds, where L and U are stored in b
for(i=0..N-1)
for(j=0..N-1)
assert(a[p[i]][j] == sum(k=0..N-1) (l(i,k) * u(k,j)));
}
```