```// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// Fibonacci numbers are recursively defined as: F(n) := F(n-1) + F(n-2) with //
// F(0) := 0 and F(1) := 1. This definition can also be rephrased as a matrix //
// multiplication: (F(n-1),F(n)) = ([0,1],[1,1]) * (F(n-2),F(n-1)), so that   //
// (F(n-1),F(n)) = ([0,1],[1,1])^n * (0,1) holds.                             //
// For this reason, one can compute all powers of the above matrix to compute //
// all Fibonacci numbers in time O(log(n)) with O(n) work.                    //
// ************************************************************************** //

macro M = 4;
macro N = exp(2,M);

macro matMul(a,b) =
(a.0*b.0+a.1*b.2, a.0*b.1+a.1*b.3,
a.2*b.0+a.3*b.2, a.2*b.1+a.3*b.3);

module Fibonacci([N]nat fib, event rdy) {
[N](nat*nat*nat*nat) f;

// initialize array f with the Fibonacci matrix
for(i=0..N-1) f[i] = (0,1,1,1);

// bottom-up traversal requires time log(N)
// with N-1 work and N/2 processors
for(l=1..M) {
for(j=1..N/exp(2,l)) {
let(s = exp(2,l-1))
next(f[2*j*s-1]) = matMul(f[2*j*s-1],f[(2*j-1)*s-1]);
}
pause;
}

// top-down traversal requires time log(N)-1
// with N-log(N)-1 work and N/2 processors
for(i=1..M-1) {
let(l = M-i)
for(j=1..exp(2,i)-1) {
let(s = exp(2,l-1))
next(f[(2*j+1)*s-1]) = matMul(f[(2*j+1)*s-1],f[2*j*s-1]);
}
pause;
}

// get the results and signal termination
for(i=0..N-1) fib[i] = f[i].1;
emit(rdy);

}
drivenby {
await(rdy);
}
```