// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// The division of two n-bit radix-2 numbers [N]bool x,y can be reduced to    //
// the problem to compute the reciprocal z of y with n-bits, i.e., precision  //
// 2^{-n} which is a number z with 0 <= 1/y - z <= 2^{-n}. Given z, we can    //
// compute the quotient either as q=floor(x*z) or q=floor(x*z)+1, depending on//
// whether the remainder x-q*y satisfies x-q*y<y.                             //
//  To compute an approximation of the reciprocal, we apply Newton's iteration//
// to the function f(x) := 1/x - b so that x_{i+1} := x_i*(2-y*x_i) is        //
// obtained. For the computation, we first scale the operand y by shifting it //
// to the right so that the first bit on the right of the binary point is 1.  //
// This way, the new operand ys is in the interval 0.5 <= ys < 1.0 so that its//
// reciprocal is 2.0 >= 1/ys > 1.0. It is then sufficient to work with fixed  //
// point numbers with two bits on the left and N bits to the right of the     //
// binary point. The result z contains finally the N computed bits of the     //
// approximation to the reciprocal of 1/y, where only the N bits to the right //
// of the binary point are given.                                             //
//  In contrast to ReciprocalNewton.qrz, the version below scales bitvectors  //
// in a single step.                                                          //
// ************************************************************************** //

macro N = 32;
macro bval(x) = (x ? 1 : 0);
macro fval(x) = exp(2.0,-N) * sum(i=0..N+1) (bval(x{i}) * exp(2,i));
macro fmul(x,y) = (nat2bv(bv2nat(x) * bv2nat(y),2*(N+2)){2*N+1:N});
macro fsub(x,y) =  nat2bv(bv2nat(x) - bv2nat(y),N+2);
macro rsh(x) = false@(x{-1:1});
macro lsh(x) = x{-2:0}@false;

macro one = false @ true  @ {false::N};
macro two = true  @ false @ {false::N};


module ReciprocalNewton2(bv{N} ?y,bv{N} z,event rdy) {
    bv{N+2} x_old,x_new,ys;
    nat{N+1} j;
    real r_new,r_ys;

    // scale the operand y to ys = y*2^-j into interval [0.5,1.0]
    // determine j such that y{j-1} is the leftmost bit that is true
    for(i=1..N) 
        if(y{N-i} & forall(k=1..i-1) !y{N-k}) {
            j = N-i+1;
            ys = false@false@y{N-i:0}@{false::i-1};
            r_ys = fval(ys);
        }

    // iteration to compute 1/ys starting from 1.5 with max. initial error 0.5
    // log(N) iterations are required to compute the N bits
    x_new = false@true@true@{false::N-1};
    
    do {
        r_new = fval(x_new);
        next(x_old) = x_new; 
        next(x_new) = fmul(x_new,fsub(two,fmul(ys,x_new)));
        wi: pause;
    } while(x_old != x_new);
    r_new = fval(x_new);

    // having 1/ys, scale it to obtain 1/y
    for(i=1..N)
        if(i==j)
            next(x_new) = {false::i}@x_new{-1:i};
    ws: pause;
    r_new = fval(x_new);
    z = x_new{N-1:0};
    emit(rdy);

}
drivenby {
    y = nat2bv(10000,N);
    await(rdy);
}