// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// The following module implements a multiplication of radix-B numbers. The   //
// computation is done by summing up the usual partial products by the use of //
// carry-save adders for the intermediate steps and a carry-ripple adder for  //
// the final step.                                                            //
// The depth of the circuit is seen by observing that the evaluation can      //
// proceed row-wise for i=0..M-2, and columnwise for the final row. Thus, the //
// depth of the circuit is O(M+N) and therefore not optimal.                  //
// The Quartz simulator requires T(M,N) = 2M+2N micro steps per macro step.   //
// ************************************************************************** //

macro B = 4;     // base of the radix numbers
macro M = 4;     // number of digits used for x
macro N = 3;     // number of digits used for y
macro natval(x,m) = sum(i=0..m-1) (x[i] * exp(B,i));


module NatMulCSACRA([M]nat{B} ?x,[N]nat{B} ?y,[M+N]nat{B} p) {
    event [M][N]nat{B} pp;      // digits of partial products
    event [M][N]int{B} cp;      // carries for summation in CSA array
    event [N]int{2} cr;         // carries for summation in final CRA
    assert(B==2*(B/2));         // check that B is even
    // -------------------------------------------------------------------------
    // construct carry-save adder array
    // this part has depth O(M) since each row can be evaluated in O(1)
    // -------------------------------------------------------------------------
    for(i=0..M-1) {
        for(j=0..N-1)
            // pp[i][N..0] = pp[i-1][N-1..1] + cp[i-1][N-1..0] + x[i]*y[N-1..0]
            let(pin = (i==0 ? 0 : (j==N-1 ? 0 : pp[i-1][j+1])))
            let(cin = (i==0 ? 0 : cp[i-1][j])) {
            event nat{B*B} sm;
            sm = x[i] * y[j] + pin + cin;
            cp[i][j] = sm / B;
            pp[i][j] = sm % B;
        }
    }
    // -------------------------------------------------------------------------
    // final addition p[M+N-1..M] = pp[M-1][N-1..1] + cp[M-1][N-1..0] using CRA
    // -------------------------------------------------------------------------
    for(k=0..M-1) p[k] = pp[k][0];
    for(j=0..N-1)
        let(pin = (j==N-1 ? 0 : pp[M-1][j+1]))
        let(cin = (j==0 ? 0 : cr[j-1])) {
        event nat{2*B} sm;
        sm = pin + cp[M-1][j] + cin;
        cr[j] = sm / B;
        p[M+j] = sm % B;
    }
    // -------------------------------------------------------------------------
    // check the result
    // -------------------------------------------------------------------------
    assert(natval(p,M+N) == natval(x,M) * natval(y,N));
}
drivenby MinInt_MinInt {
    x[M-1] = B/2;
    for(i=0..M-2)
        x[i] = 0;
    y[N-1] = B/2;
    for(i=0..N-2)
        y[i] = 0;
}
drivenby MinInt_MaxInt {
    x[M-1] = B/2;
    for(i=0..M-2)
        x[i] = 0;
    y[N-1] = (B/2)-1;
    for(i=0..N-2)
        y[i] = B-1;
}
drivenby MaxInt_MinInt {
    x[M-1] = (B/2)-1;
    for(i=0..M-2)
        x[i] = B-1;
    y[N-1] = B/2;
    for(i=0..N-2)
        y[i] = 0;
}
drivenby MaxInt_MaxInt {
    x[M-1] = (B/2)-1;
    for(i=0..M-2)
        x[i] = B-1;
    y[N-1] = (B/2)-1;
    for(i=0..N-2)
        y[i] = B-1;
}
drivenby Test01 {
    for(i=0..M-1)
        x[i] = (-B/2-(i+1)) % B;
    for(i=0..N-1)
        y[i] = (-B/2-(i+1)) % B;
}
drivenby Test02 {
    for(i=0..M-1)
        x[i] = (-B/2-(i+1)) % B;
    for(i=0..N-1)
        y[i] = (-B-1-i) % B;
}
drivenby Test03 {
    for(i=0..M-1)
        x[i] = (-B-1-i) % B;
    for(i=0..N-1)
        y[i] = (-B/2-(i+1)) % B;
}
drivenby Test04 {
    for(i=0..M-1)
        x[i] = (-B-1-i) % B;
    for(i=0..N-1)
        y[i] = (-B-1-i) % B;
}