// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// Telephone numbers are recursively defined as T(n) := T(n-1) + (n-1)*T(n-2) //
// with T(0) := 1 and T(1) := 1. This definition can also be rephrased as a   //
// matrix multiplication: (T(n-1),T(n)) = ([0,1],[n-1,1]) * (T(n-1),T(n-2)),  //
// so that (T(n-1),T(n)) = ([0,1],[n-1,1]) *...* ([0,1],[0,1]) * (T(0),T(1)). //
// For this reason, one can compute all prefixes of the matrix products to    //
// compute all telephone numbers in time O(log(n)) with O(n) work.            //
// remark: tele[0...] = 1, 1, 2, 4, 10, 26, 76, 232, 764, 2620, 9496,...      //
// ************************************************************************** //

macro M = 4;
macro N = exp(2,M);

macro matMul(a,b) =
    (a.0*b.0+a.1*b.2, a.0*b.1+a.1*b.3,
     a.2*b.0+a.3*b.2, a.2*b.1+a.3*b.3);

module Telephone([N]nat tele, event rdy) {
    [N](nat*nat*nat*nat) t;

    // initialize array f with the Fibonacci matrix
    for(i=0..N-1) t[i] = (0,1,i,1);

    // bottom-up traversal requires time log(N)
    // with N-1 work and N/2 processors
    for(l=1..M) {
        for(j=1..N/exp(2,l)) {
            let(s = exp(2,l-1))
            next(t[2*j*s-1]) = matMul(t[2*j*s-1],t[(2*j-1)*s-1]);
        }
        pause;
    }

    // top-down traversal requires time log(N)-1
    // with N-log(N)-1 work and N/2 processors
    for(i=1..M-1) {
        let(l = M-i)
        for(j=1..exp(2,i)-1) {
            let(s = exp(2,l-1))
            next(t[(2*j+1)*s-1]) = matMul(t[(2*j+1)*s-1],t[2*j*s-1]);
        }
        pause;
    }

    // get the results and signal termination
    tele[0] = 1;
    for(i=0..N-2) tele[i+1] = t[i].2+t[i].3;
    emit(rdy);

}
drivenby {
    await(rdy);
}