// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //


[4]nat x;
[4]nat y;
[8]nat p;
thread RadixBMulDadda {
nat t0,t1,t10,t100,t101,t102,t103,t104,t105,t106,t107,t108,t109,t11,t110,t111,t112,t113,t114,t115,t116,t117,t118,t119,t12,t120,t121,t122,t123,t124,t125,t126,t127,t128,t13,t14,t15,t16,t17,t18,t19,t2,t20,t21,t22,t23,t24,t25,t26,t27,t28,t29,t3,t30,t31,t32,t33,t34,t35,t36,t37,t38,t39,t4,t40,t41,t42,t43,t44,t45,t46,t47,t48,t49,t5,t50,t51,t52,t53,t54,t55,t56,t57,t58,t59,t6,t60,t61,t62,t63,t64,t65,t66,t67,t68,t69,t7,t70,t71,t72,t73,t74,t75,t76,t77,t78,t79,t8,t80,t81,t82,t83,t84,t85,t86,t87,t88,t89,t9,t90,t91,t92,t93,t94,t95,t96,t97,t98,t99;
bool t129,t130,t131,t132,t133,t134,t135,t136,t137,t138,t139,t140,t141,t142;
    t0 = x[0];
    t1 = x[1];
    t2 = x[2];
    t3 = x[3];
    t4 = y[0];
    t5 = y[1];
    t6 = y[2];
    t7 = y[3];
    // compute partial products
    t8 = t0 * t4;
    t9 = t8 / 256;
    t10 = t8 % 256;
    t11 = t0 * t5;
    t12 = t11 / 256;
    t13 = t11 % 256;
    t14 = t1 * t4;
    t15 = t14 / 256;
    t16 = t14 % 256;
    t17 = t0 * t6;
    t18 = t17 / 256;
    t19 = t17 % 256;
    t20 = t1 * t5;
    t21 = t20 / 256;
    t22 = t20 % 256;
    t23 = t2 * t4;
    t24 = t23 / 256;
    t25 = t23 % 256;
    t26 = t0 * t7;
    t27 = t26 / 256;
    t28 = t26 % 256;
    t29 = t1 * t6;
    t30 = t29 / 256;
    t31 = t29 % 256;
    t32 = t2 * t5;
    t33 = t32 / 256;
    t34 = t32 % 256;
    t35 = t3 * t4;
    t36 = t35 / 256;
    t37 = t35 % 256;
    t38 = t1 * t7;
    t39 = t38 / 256;
    t40 = t38 % 256;
    t41 = t2 * t6;
    t42 = t41 / 256;
    t43 = t41 % 256;
    t44 = t3 * t5;
    t45 = t44 / 256;
    t46 = t44 % 256;
    t47 = t2 * t7;
    t48 = t47 / 256;
    t49 = t47 % 256;
    t50 = t3 * t6;
    t51 = t50 / 256;
    t52 = t50 % 256;
    t53 = t3 * t7;
    t54 = t53 / 256;
    t55 = t53 % 256;
    // reduce heights of each column to 6
    t56 = t37 + t34;
    t57 = t56 / 256;
    t58 = t56 % 256;
    t59 = t46 + t43 + t40;
    t60 = t59 / 256;
    t61 = t59 % 256;
    // reduce heights of each column to 4
    t62 = t25 + t22;
    t63 = t62 / 256;
    t64 = t62 % 256;
    t65 = t21 + t18;
    t66 = t65 / 256;
    t67 = t65 % 256;
    t68 = t31 + t28 + t24;
    t69 = t68 / 256;
    t70 = t68 % 256;
    t71 = t27 + t57 + t61;
    t72 = t71 / 256;
    t73 = t71 % 256;
    t74 = t36 + t33 + t30;
    t75 = t74 / 256;
    t76 = t74 % 256;
    t77 = t42 + t39 + t60;
    t78 = t77 / 256;
    t79 = t77 % 256;
    t80 = t52 + t49 + t45;
    t81 = t80 / 256;
    t82 = t80 % 256;
    t83 = t55 + t51;
    t84 = t83 / 256;
    t85 = t83 % 256;
    // reduce heights of each column to 3
    t86 = t19 + t15;
    t87 = t86 / 256;
    t88 = t86 % 256;
    t89 = t58 + t63 + t67;
    t90 = t89 / 256;
    t91 = t89 % 256;
    t92 = t66 + t69 + t73;
    t93 = t92 / 256;
    t94 = t92 % 256;
    t95 = t72 + t75 + t79;
    t96 = t95 / 256;
    t97 = t95 % 256;
    t98 = t48 + t78 + t81;
    t99 = t98 / 256;
    t100 = t98 % 256;
    // reduce heights of each column to 2
    t101 = t16 + t13;
    t102 = t101 / 256;
    t103 = t101 % 256;
    t104 = t12 + t64 + t88;
    t105 = t104 / 256;
    t106 = t104 % 256;
    t107 = t70 + t87 + t91;
    t108 = t107 / 256;
    t109 = t107 % 256;
    t110 = t76 + t90 + t94;
    t111 = t110 / 256;
    t112 = t110 % 256;
    t113 = t82 + t93 + t97;
    t114 = t113 / 256;
    t115 = t113 % 256;
    t116 = t85 + t96 + t100;
    t117 = t116 / 256;
    t118 = t116 % 256;
    t119 = t54 + t84 + t99;
    t120 = t119 / 256;
    t121 = t119 % 256;
    // preliminary addition of the two remaining numbers
    t122 = t9 + t103;
    t123 = t102 + t106;
    t124 = t105 + t109;
    t125 = t108 + t112;
    t126 = t111 + t115;
    t127 = t114 + t118;
    t128 = t117 + t121;
    // compute generate and propagate pairs
    t129 = t122 > 255;
    t130 = t122 == 255;
    t131 = t123 > 255;
    t132 = t123 == 255;
    t133 = t124 > 255;
    t134 = t124 == 255;
    t135 = t125 > 255;
    t136 = t125 == 255;
    t137 = t126 > 255;
    t138 = t126 == 255;
    t139 = t127 > 255;
    t140 = t127 == 255;
    t141 = t128 > 255;
    t142 = t128 == 255;
    // parallel prefix tree for computing carry bits
    // up-level 1
    t131 = t132 & t129 | t131;
    t132 = t132 & t130;
    t135 = t136 & t133 | t135;
    t136 = t136 & t134;
    t139 = t140 & t137 | t139;
    t140 = t140 & t138;
    // up-level 2
    t135 = t136 & t131 | t135;
    t136 = t136 & t132;
    // down-level 4
    t139 = t140 & t135 | t139;
    t140 = t140 & t136;
    // down-level 5
    t141 = t142 & t139 | t141;
    t142 = t142 & t140;
    t133 = t134 & t131 | t133;
    t134 = t134 & t132;
    t141 = t142 & t139 | t141;
    t142 = t142 & t140;
    t137 = t138 & t135 | t137;
    t138 = t138 & t136;
    t141 = t142 & t139 | t141;
    t142 = t142 & t140;
    // compute final sum digits as the digits of the product
    t128 = t128+(t139?1:0);
    t127 = t127+(t137?1:0);
    t126 = t126+(t135?1:0);
    t125 = t125+(t133?1:0);
    t124 = t124+(t131?1:0);
    t123 = t123+(t129?1:0);
    // get the product digits
    p[0] = t10;
    p[1] = t122 % 256;
    p[2] = t123 % 256;
    p[3] = t124 % 256;
    p[4] = t125 % 256;
    p[5] = t126 % 256;
    p[6] = t127 % 256;
    p[7] = t128 % 256;
}