// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// The algorithm below improves module EigenvaluesShiftQR in that two shift   //
// operations are merged (to obtain the so-called double shift QR method).    //
// This is very important to achieve convergence for matrices with complex    //
// eigenvalues, since the shift p = a[N-1][N-1] used in EigenvaluesShiftQR    //
// will keep all entries of the matrices Q_k,R_k and A_k in the real numbers. //
// Thus, it cannot converge for matrices with complex eigenvalues. One way to //
// deal with this problem is to work with complex numbers using Wilkinson's   //
// shift as shown in module EigenvaluesQRC, another way is to perform a double//
// shift with the eigenvalues of the 2x2 matrix [[A[N-2][N-2],A[N-2][N-1]],   //
// [A[N-2][N-1],A[N-1][N-1]]]. It is interesting that these computation can   //
// be done with real numbers: Starting with matrix A_k, we consider 2x2 matrix//
// in the lower right corner and compute its eigenvalues e1 and e2 (that may  //
// be complex numbers). If these were used as shifts in QR-steps, we would    //
// compute B_k := R_k*Q_k + e1*E where Q_k*R_k := A_k - e1*E; and in the next //
// step, A_k+1 := R_k'*Q_k' + e2*E where Q_k'*R_k' := B_k - e2*E. Thus, we    //
// have Q_k'*R_k' = R_k*Q_k + (e1-e2)*E, and by multiplying with Q_k left and //
// R_k right, we have  Q_k*Q_k'*R_k'*R_k = Q_k*R_k*Q_k*R_k + (e1-e2)*Q_k*R_k, //
// i.e., Q_k*Q_k'*R_k'*R_k = Q_k*R_k*(Q_k*R_k + (e1-e2)*E). By definition of  //
// Q_k and R_k, we have Q_k*Q_k'*R_k'*R_k = (A_k - e1*E) * (A_k - e2*E), so   //
// that (Q_k*Q_k')*(R_k'*R_k) can also be obtained as the QR-decomposition of //
// (A_k-e1*E)*(A_k-e2*E) = A_k*A_k - 2*Re(e1)*A_k + |e_1|^2*E, which is a     //
// real valued matrix. Considering the 2x2 matrix [[A[N-2][N-2],A[N-2][N-1]], //
// [A[N-2][N-1],A[N-1][N-1]]] =:G, we can see that the matrix can be computed //
// as follows: (A_k-e1*E)*(A_k-e2*E) = A_k*A_k - trace(G)*A_k + det(G)*E.     //
// ************************************************************************** //

macro N = 5;

// numeric precision
macro rabs(x) = (x<0 ? -x : +x);
macro sqrt(x) = exp(x,0.5);
macro eps = 1.0e-10;


module EigenvaluesShift2QR([N][N]real ?m,a,[N]real e,event !rdy) {
    [N][N]real b,q,r;   // matrices q,r of QR-decomposition
    real trcG,detG;     // to determine double shift
    nat{N+1} n;         // current size of the matrix (due to deflation)
    event QRrdy;        // whether QR-decomposition terminated
    
    // initialize
    n = N;
    for(i=0..N-1)
        for(j=0..N-1)
            a[i][j] = m[i][j];

    do {
        do {
            // compute trace and determinant of corner matrix for double shift
            trcG = a[n-2][n-2] + a[n-1][n-1];
            detG = a[n-2][n-2] * a[n-1][n-1] - a[n-2][n-1] * a[n-1][n-2];
      
            // compute A_k*A_k - trace(G)*A_k + det(G)*E in matrix b 
            for(i=0..N-1)
                for(j=0..N-1)
                    next(b[i][j]) = detG - trcG*a[i][j] + sum(k=0..N-1) (a[i][k]*a[k][j]);
            pause;
      
            // decompose double shifted matrix b into factors Q_k and R_k
            DecompQR(b,q,r,QRrdy);
      
            // compute A_{k+1} := Q_k^T * A_k * Q_k and check for convergence
            for(i=0..N-1) {
                for(j=0..N-1) {
                    b[i][j] = sum(k=0..N-1) (a[i][k]*q[k][j]);
                    next(a[i][j]) = sum(k=0..N-1) (q[k][i]*b[k][j]);
                }
            }
            pause;
        // the following termination condition must be adapted for complex roots
        } while(rabs(a[n-1][n-2])>eps);
        next(n) = n-1;
        // eigenvalue e[n-1] has been found, we can deflate the matrix
        e[n-1] = a[n-1][n-1];
        for(i=0..N-1) {
            next(a[i][n-1]) = 0.0;
            next(a[n-1][i]) = 0.0;
        }
        pause;
    } while(n>1);

    e[n-1] = a[n-1][n-1];
    emit(rdy);
}
drivenby m0 {
    m[0] = [1.0,2.0,3.0,4.0,5.0];
    m[1] = [2.0,3.0,4.0,5.0,1.0];
    m[2] = [3.0,4.0,5.0,1.0,2.0];
    m[3] = [4.0,5.0,1.0,2.0,3.0];
    m[4] = [5.0,1.0,2.0,3.0,4.0];
    await(rdy);
}
drivenby m1 {
    // note: x^5 - 15x^4 + 85x^3 - 225x^2 + 274x - 120 
    //       = (x-1)(x-2)(x-3)(x-4)(x-5)
    m[0] = [ 15.0, -85.0, 225.0, -274.0, 120.0];
    m[1] = [  1.0,   0.0,   0.0,    0.0,   0.0];
    m[2] = [  0.0,   1.0,   0.0,    0.0,   0.0];
    m[3] = [  0.0,   0.0,   1.0,    0.0,   0.0];
    m[4] = [  0.0,   0.0,   0.0,    1.0,   0.0];
    await(rdy);
}
drivenby m2 {
    // note: x^5 - 2x^4 - 3x^3 + 6x^2 + 2x - 4 = (x-2)(x-1)(x+1)(x-sqrt(2))(x+sqrt(2))
    m[0] = [  2.0,   3.0,  -6.0,   -2.0,   4.0];
    m[1] = [  1.0,   0.0,   0.0,    0.0,   0.0];
    m[2] = [  0.0,   1.0,   0.0,    0.0,   0.0];
    m[3] = [  0.0,   0.0,   1.0,    0.0,   0.0];
    m[4] = [  0.0,   0.0,   0.0,    1.0,   0.0];
    await(rdy);
}
//drivenby m3 {
//     note: x^5+6x^4+14x^3+24x^2+33x+18 = (x+3)(x+2)(x+1)(x^2+3)
//    m[0] = [ -6.0, -14.0, -24.0,  -33.0, -18.0];
//    m[1] = [  1.0,   0.0,   0.0,    0.0,   0.0];
//    m[2] = [  0.0,   1.0,   0.0,    0.0,   0.0];
//    m[3] = [  0.0,   0.0,   1.0,    0.0,   0.0];
//    m[4] = [  0.0,   0.0,   0.0,    1.0,   0.0];
//    await(rdy);
//}
//drivenby m4 {
//     note: x^5 - 2x^4 + 2x^3 - 4x^2 - 3x + 6 = (x-2)(x-1)(x+1)(x^2+3)
//    m[0] = [  2.0,  -2.0,   4.0,    3.0,  -6.0];
//    m[1] = [  1.0,   0.0,   0.0,    0.0,   0.0];
//    m[2] = [  0.0,   1.0,   0.0,    0.0,   0.0];
//    m[3] = [  0.0,   0.0,   1.0,    0.0,   0.0];
//    m[4] = [  0.0,   0.0,   0.0,    1.0,   0.0];
//    await(rdy);
//}