```// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// The division of two n-bit radix-2 numbers [N]bool x,y can be reduced to    //
// the problem to compute the reciprocal z of y with n-bits, i.e., precision  //
// 2^{-n} which is a number z with 0 <= 1/y - z <= 2^{-n}. Given z, we can    //
// compute the quotient either as q=floor(x*z) or q=floor(x*z)+1, depending on//
// whether the remainder x-q*y satisfies x-q*y<y.                             //
//   According to BeCH84, the reciprocal z can be computed as a geometric sum //
// 2^j * sum(i=0..n-1) u^i, where u = 1-y*2^{-j} and j>=2 is that uniquely    //
// defined integer 2^{j-1}<=y<2^j (i.e. ceil(log(y))). The number u must be   //
// represented by n-bits, and its range for y>1 is 0 < u <= 0.5. Thus, the    //
// powers are in the range 0< u^i <= 0.5^i. To avoid loss of precision, BeCH84//
// suggested to represent u^i with the entire i*n bits that come out of the   //
// integer multiplication. The arithmetic operations below are based on a     //
// fixed point representation having 0 bits to the left and N^2 bits to the   //
// right of the decimal point.                                                //
//  Assuming that multiplication of M-bit numbers is done in O(log(M)) time,  //
// the algorithm below computes the reciprocal in time O(log(N)^2), but it is //
// only of theoretical interest due to the large bit numbers: it requires the //
// multiplication of N^2-bit numbers.                                         //
//  In the algorithm below, we use fixed point arithmetic where one bit is on //
// the left and N*N-1 bits are on the right of the decimal point. The output  //
// z contains the N bits of the reciprocal approximation right to the decimal //
// point with an error up to 2^{-N}. Note that it may still be the case that  //
// none of the bits is correct, since 0.1 may be approximated as 0.01111111.  //
// ************************************************************************** //

macro N = 8;
macro bval(x) = (x ? 1 : 0);
macro nval(x) = sum(i=0..N-1) (bval(x{i}) * exp(2,i));
macro fval(x) = nat2real(nval(x)) * exp(2.0,-N);
macro fmul(x,y) = (nat2bv(bv2nat(x) * bv2nat(y),2*N*N){-2:-(N*N+1)});
macro fadd(x,y) = (nat2bv(bv2nat(x) + bv2nat(y),N*N){-1:-(N*N)});
macro cmpl(x) = nat2bv(bv2nat(not x)+1,N);
macro rsh(x)  = false@(x{-1:1});

module ReciprocalBeCH(bv{N} ?y,bv{N} z,event rdy) {
bv{N} u;
nat{N+1} j;
[N]bv{N*N} p;

// determine u as maximal position where y{-1:j} are all zero
u = y;
j = N;
while(!u{-1}) {
next(u) = u{-2:0}@false;
next(j) = j-1;
wu: pause;
}
next(u) = cmpl(u);
nc: pause;

// powering, i.e. compute the first N powers of u
p[0] = true@{false::N*N-1};
for(i=1..N-1)
p[i] = false@u@{false::N*(N-1)-1};
for(i=0..log(N)-1) {
for(j=exp(2,i)..N-1) {
next(p[j]) = fmul(p[j],p[j-exp(2,i)]);
}
wp: pause;
}

// summing up the powers of u
for(i=0..log(N)-1) {
for(j=exp(2,i)..N-1) {
}
ws: pause;
}

// Extract the desired n-bits from p[N-1]; here we have the problem
// that e.g. 0.5 is 0b010..., but we obtain 0b0011.. which is the same.
// To avoid difficult rounding, we allow the error to become 2^{-N}
// which happens for powers of 2.
z = p[N-1]{-1:-N};
while(j>1) {
next(z) = rsh(z);
next(j) = j-1;
wr : pause;
}

// signal termination of the computation
emit(rdy);

// check the correctness
{real realz,recy,err;
realz = fval(z);
recy = 1.0/nat2real(nval(y));
err = recy - realz;
assert(realz <= recy);
assert(err <= exp(2.0,-N));
}

}
drivenby {
y = nat2bv(10,N);
await(rdy);
}
```