// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// Binomial numbers B(n,k) are defined as B(n,k) := F(n)/(F(k)*F(n-k)) with   //
// the factorial numbers F(n) = 1*2*3*...*n. The following PRAM algorithm     //
// computes B(n,0),...,B(n,n) for a given number n in time O(log(n)) with O(n)//
// work. To this end, we first compute all factorial numbers up to n using the//
// parallel prefix algorithm, and then compute all binomial numbers in a final//
// step.                                                                      //
// ************************************************************************** //

macro M = 4;
macro N = exp(2,M);

module Binomial([N]nat b, event rdy) {

    [N]nat fac;
    // initialize array fac for computing all factorials
    for(i=0..N-1) fac[i] = i+1;

    // bottom-up traversal requires time log(N)
    // with N-1 work and N/2 processors
    for(l=1..M) {
        for(j=1..N/exp(2,l)) {
            let(s = exp(2,l-1))
            next(fac[2*j*s-1]) = fac[2*j*s-1] * fac[(2*j-1)*s-1];
        }
        pause;
    }

    // top-down traversal requires time log(N)-1
    // with N-log(N)-1 work and N/2 processors
    for(i=1..M-1) {
        let(l = M-i)
        for(j=1..exp(2,i)-1) {
            let(s = exp(2,l-1))
            next(fac[(2*j+1)*s-1]) = fac[(2*j+1)*s-1] * fac[2*j*s-1];
        }
        pause;
    }

    //compute the binomial numbers (note that fac[i] is the factorial of i+1)
    b[0] = 1;
    for(k=1..N-1)
        b[k] = fac[N-1] / (fac[k-1] * fac[N-k-1]);
    emit(rdy);

}
drivenby {
    await(rdy);
}