// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //


nat x0,x1,x2,x3;
nat y0,y1,y2,y3;
nat p0,p1,p2,p3,p4,p5,p6,p7;
thread RadixBMulDadda {
nat t0,t1,t10,t100,t101,t102,t103,t104,t105,t106,t107,t108,t109,t11,t110,t111,t112,t113,t114,t115,t116,t117,t118,t119,t12,t120,t13,t14,t15,t16,t17,t18,t19,t2,t20,t21,t22,t23,t24,t25,t26,t27,t28,t29,t3,t30,t31,t32,t33,t34,t35,t36,t37,t38,t39,t4,t40,t41,t42,t43,t44,t45,t46,t47,t48,t49,t5,t50,t51,t52,t53,t54,t55,t56,t57,t58,t59,t6,t60,t61,t62,t63,t64,t65,t66,t67,t68,t69,t7,t70,t71,t72,t73,t74,t75,t76,t77,t78,t79,t8,t80,t81,t82,t83,t84,t85,t86,t87,t88,t89,t9,t90,t91,t92,t93,t94,t95,t96,t97,t98,t99;
bool t121,t122,t123,t124,t125,t126,t127,t128,t129,t130,t131,t132,t133,t134;
    // compute partial products
    t0 = x0 * y0;
    t1 = t0 / 256;
    t2 = t0 % 256;
    t3 = x0 * y1;
    t4 = t3 / 256;
    t5 = t3 % 256;
    t6 = x1 * y0;
    t7 = t6 / 256;
    t8 = t6 % 256;
    t9 = x0 * y2;
    t10 = t9 / 256;
    t11 = t9 % 256;
    t12 = x1 * y1;
    t13 = t12 / 256;
    t14 = t12 % 256;
    t15 = x2 * y0;
    t16 = t15 / 256;
    t17 = t15 % 256;
    t18 = x0 * y3;
    t19 = t18 / 256;
    t20 = t18 % 256;
    t21 = x1 * y2;
    t22 = t21 / 256;
    t23 = t21 % 256;
    t24 = x2 * y1;
    t25 = t24 / 256;
    t26 = t24 % 256;
    t27 = x3 * y0;
    t28 = t27 / 256;
    t29 = t27 % 256;
    t30 = x1 * y3;
    t31 = t30 / 256;
    t32 = t30 % 256;
    t33 = x2 * y2;
    t34 = t33 / 256;
    t35 = t33 % 256;
    t36 = x3 * y1;
    t37 = t36 / 256;
    t38 = t36 % 256;
    t39 = x2 * y3;
    t40 = t39 / 256;
    t41 = t39 % 256;
    t42 = x3 * y2;
    t43 = t42 / 256;
    t44 = t42 % 256;
    t45 = x3 * y3;
    t46 = t45 / 256;
    t47 = t45 % 256;
    // reduce heights of each column to 6
    t48 = t29 + t26;
    t49 = t48 / 256;
    t50 = t48 % 256;
    t51 = t38 + t35 + t32;
    t52 = t51 / 256;
    t53 = t51 % 256;
    // reduce heights of each column to 4
    t54 = t17 + t14;
    t55 = t54 / 256;
    t56 = t54 % 256;
    t57 = t13 + t10;
    t58 = t57 / 256;
    t59 = t57 % 256;
    t60 = t23 + t20 + t16;
    t61 = t60 / 256;
    t62 = t60 % 256;
    t63 = t19 + t49 + t53;
    t64 = t63 / 256;
    t65 = t63 % 256;
    t66 = t28 + t25 + t22;
    t67 = t66 / 256;
    t68 = t66 % 256;
    t69 = t34 + t31 + t52;
    t70 = t69 / 256;
    t71 = t69 % 256;
    t72 = t44 + t41 + t37;
    t73 = t72 / 256;
    t74 = t72 % 256;
    t75 = t47 + t43;
    t76 = t75 / 256;
    t77 = t75 % 256;
    // reduce heights of each column to 3
    t78 = t11 + t7;
    t79 = t78 / 256;
    t80 = t78 % 256;
    t81 = t50 + t55 + t59;
    t82 = t81 / 256;
    t83 = t81 % 256;
    t84 = t58 + t61 + t65;
    t85 = t84 / 256;
    t86 = t84 % 256;
    t87 = t64 + t67 + t71;
    t88 = t87 / 256;
    t89 = t87 % 256;
    t90 = t40 + t70 + t73;
    t91 = t90 / 256;
    t92 = t90 % 256;
    // reduce heights of each column to 2
    t93 = t8 + t5;
    t94 = t93 / 256;
    t95 = t93 % 256;
    t96 = t4 + t56 + t80;
    t97 = t96 / 256;
    t98 = t96 % 256;
    t99 = t62 + t79 + t83;
    t100 = t99 / 256;
    t101 = t99 % 256;
    t102 = t68 + t82 + t86;
    t103 = t102 / 256;
    t104 = t102 % 256;
    t105 = t74 + t85 + t89;
    t106 = t105 / 256;
    t107 = t105 % 256;
    t108 = t77 + t88 + t92;
    t109 = t108 / 256;
    t110 = t108 % 256;
    t111 = t46 + t76 + t91;
    t112 = t111 / 256;
    t113 = t111 % 256;
    // preliminary addition of the two remaining numbers
    t114 = t1 + t95;
    t115 = t94 + t98;
    t116 = t97 + t101;
    t117 = t100 + t104;
    t118 = t103 + t107;
    t119 = t106 + t110;
    t120 = t109 + t113;
    // compute generate and propagate pairs
    t121 = t114 > 255;
    t122 = t114 == 255;
    t123 = t115 > 255;
    t124 = t115 == 255;
    t125 = t116 > 255;
    t126 = t116 == 255;
    t127 = t117 > 255;
    t128 = t117 == 255;
    t129 = t118 > 255;
    t130 = t118 == 255;
    t131 = t119 > 255;
    t132 = t119 == 255;
    t133 = t120 > 255;
    t134 = t120 == 255;
    // parallel prefix tree for computing carry bits
    // up-level 1
    t123 = t124 & t121 | t123;
    t124 = t124 & t122;
    t127 = t128 & t125 | t127;
    t128 = t128 & t126;
    t131 = t132 & t129 | t131;
    t132 = t132 & t130;
    // up-level 2
    t127 = t128 & t123 | t127;
    t128 = t128 & t124;
    // down-level 4
    t131 = t132 & t127 | t131;
    t132 = t132 & t128;
    // down-level 5
    t133 = t134 & t131 | t133;
    t134 = t134 & t132;
    t125 = t126 & t123 | t125;
    t126 = t126 & t124;
    t133 = t134 & t131 | t133;
    t134 = t134 & t132;
    t129 = t130 & t127 | t129;
    t130 = t130 & t128;
    t133 = t134 & t131 | t133;
    t134 = t134 & t132;
    // compute final sum digits as the digits of the product
    t120 = t120+(t131?1:0);
    t119 = t119+(t129?1:0);
    t118 = t118+(t127?1:0);
    t117 = t117+(t125?1:0);
    t116 = t116+(t123?1:0);
    t115 = t115+(t121?1:0);
    // get the product digits
    p0 = t2;
    p1 = t114 % 256;
    p2 = t115 % 256;
    p3 = t116 % 256;
    p4 = t117 % 256;
    p5 = t118 % 256;
    p6 = t119 % 256;
    p7 = t120 % 256;
}