// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// The following module implements a multiplication of radix-B numbers. The   //
// computation is done by summing up the usual partial products by the use of //

// carry-ripple adders both for the intermediate steps and the final step:    //
//                                                                            //
//                               pp[0][1] pp[0][0] p[0]                       //
//                      pp[1][1] pp[1][0]  p[1]                               //
//             pp[2][1] pp[2][0]  p[2]                                        //
//     p[5]     p[4]     p[3]                                                 //
//    ------------------------------------------------------                  //
//     p[5]     p[4]     p[3]     p[2]     p[1]     p[0]                      //
//                                                                            //
// where B*pp[i][N-1..0]+p[i] = y[N-1..0] * x[i] + pp[i-1][N-1..0] for i>0    //
// and   B*pp[i][N-1..0]+p[i] = y[N-1..0] * x[i] for i==0.                    //

//                                                                            //

// The depth of the circuit is seen by observing that the evaluation can      //

// proceed diagonally through the array. Thus, the depth of the circuit       //

// is O(M+N) and therefore not optimal.                                       //

// The Quartz simulator requires T(M,N) = 4M+2N-4 micro steps per macro step. //
// ************************************************************************** //

macro B = 4;     // base of the radix numbers
macro M = 4;     // number of digits used for x
macro N = 3;     // number of digits used for y
macro natval(x,m) = sum(i=0..m-1) (x[i] * exp(B,i));

module NatMulCRACRA([M]nat{B} ?x,[N]nat{B} ?y,[M+N]nat{B} p) {
    event [M][N+1]nat{B} pp;  // digits of partial products
    event [M][N]nat{B} cp;  // carries for summation
    for(i=0..M-1) {
        // compute pp[i][N..0] = y[N-1..0] * x[i] + pp[i-1][N..1]
        for(j=0..N-1) 
            let(pin = (i==0 ? 0 : pp[i-1][j+1]))
            let(cin = (j==0 ? 0 : cp[i][j-1]))
            let(sm = x[i] * y[j] + pin + cin) {// has type nat{B*B}
            cp[i][j] = sm / B;
            pp[i][j] = sm % B;
        }
        pp[i][N] = cp[i][N-1];
    }
    for(k=0..M-1) p[k] = pp[k][0];
    for(k=0..N-1) p[k+M] = pp[M-1][k+1];
    // check the specification
    assert(natval(p,M+N) == natval(x,M) * natval(y,N));
}
drivenby Test01 {
    for(i=0..M-1)
        x[i] = i % B;
    for(i=0..N-1)
        y[i] = (N+i) % B;
}
drivenby Test02 {
    for(i=0..M-1)
        x[i] = (2*i+1) % B;
    for(i=0..N-1)
        y[i] = (N+2*i) % B;
}