// ************************************************************************** // // // // eses eses // // eses eses // // eses eseses esesese eses Embedded Systems Group // // ese ese ese ese ese // // ese eseseses eseseses ese Department of Computer Science // // eses eses ese eses // // eses eseses eseseses eses University of Kaiserslautern // // eses eses // // // // ************************************************************************** // nat x0,x1,x2,x3; nat y0,y1,y2,y3; nat p0,p1,p2,p3,p4,p5,p6,p7; thread RadixBMulDadda { nat t0,t1,t10,t100,t101,t102,t103,t104,t105,t106,t107,t108,t109,t11,t110,t111,t112,t113,t114,t115,t116,t117,t118,t119,t12,t120,t13,t14,t15,t16,t17,t18,t19,t2,t20,t21,t22,t23,t24,t25,t26,t27,t28,t29,t3,t30,t31,t32,t33,t34,t35,t36,t37,t38,t39,t4,t40,t41,t42,t43,t44,t45,t46,t47,t48,t49,t5,t50,t51,t52,t53,t54,t55,t56,t57,t58,t59,t6,t60,t61,t62,t63,t64,t65,t66,t67,t68,t69,t7,t70,t71,t72,t73,t74,t75,t76,t77,t78,t79,t8,t80,t81,t82,t83,t84,t85,t86,t87,t88,t89,t9,t90,t91,t92,t93,t94,t95,t96,t97,t98,t99; bool t121,t122,t123,t124,t125,t126,t127,t128,t129,t130,t131,t132,t133,t134; // compute partial products t0 = x0 * y0; t1 = t0 / 256; t2 = t0 % 256; t3 = x0 * y1; t4 = t3 / 256; t5 = t3 % 256; t6 = x1 * y0; t7 = t6 / 256; t8 = t6 % 256; t9 = x0 * y2; t10 = t9 / 256; t11 = t9 % 256; t12 = x1 * y1; t13 = t12 / 256; t14 = t12 % 256; t15 = x2 * y0; t16 = t15 / 256; t17 = t15 % 256; t18 = x0 * y3; t19 = t18 / 256; t20 = t18 % 256; t21 = x1 * y2; t22 = t21 / 256; t23 = t21 % 256; t24 = x2 * y1; t25 = t24 / 256; t26 = t24 % 256; t27 = x3 * y0; t28 = t27 / 256; t29 = t27 % 256; t30 = x1 * y3; t31 = t30 / 256; t32 = t30 % 256; t33 = x2 * y2; t34 = t33 / 256; t35 = t33 % 256; t36 = x3 * y1; t37 = t36 / 256; t38 = t36 % 256; t39 = x2 * y3; t40 = t39 / 256; t41 = t39 % 256; t42 = x3 * y2; t43 = t42 / 256; t44 = t42 % 256; t45 = x3 * y3; t46 = t45 / 256; t47 = t45 % 256; // reduce heights of each column to 6 t48 = t29 + t26; t49 = t48 / 256; t50 = t48 % 256; t51 = t38 + t35 + t32; t52 = t51 / 256; t53 = t51 % 256; // reduce heights of each column to 4 t54 = t17 + t14; t55 = t54 / 256; t56 = t54 % 256; t57 = t13 + t10; t58 = t57 / 256; t59 = t57 % 256; t60 = t23 + t20 + t16; t61 = t60 / 256; t62 = t60 % 256; t63 = t19 + t49 + t53; t64 = t63 / 256; t65 = t63 % 256; t66 = t28 + t25 + t22; t67 = t66 / 256; t68 = t66 % 256; t69 = t34 + t31 + t52; t70 = t69 / 256; t71 = t69 % 256; t72 = t44 + t41 + t37; t73 = t72 / 256; t74 = t72 % 256; t75 = t47 + t43; t76 = t75 / 256; t77 = t75 % 256; // reduce heights of each column to 3 t78 = t11 + t7; t79 = t78 / 256; t80 = t78 % 256; t81 = t50 + t55 + t59; t82 = t81 / 256; t83 = t81 % 256; t84 = t58 + t61 + t65; t85 = t84 / 256; t86 = t84 % 256; t87 = t64 + t67 + t71; t88 = t87 / 256; t89 = t87 % 256; t90 = t40 + t70 + t73; t91 = t90 / 256; t92 = t90 % 256; // reduce heights of each column to 2 t93 = t8 + t5; t94 = t93 / 256; t95 = t93 % 256; t96 = t4 + t56 + t80; t97 = t96 / 256; t98 = t96 % 256; t99 = t62 + t79 + t83; t100 = t99 / 256; t101 = t99 % 256; t102 = t68 + t82 + t86; t103 = t102 / 256; t104 = t102 % 256; t105 = t74 + t85 + t89; t106 = t105 / 256; t107 = t105 % 256; t108 = t77 + t88 + t92; t109 = t108 / 256; t110 = t108 % 256; t111 = t46 + t76 + t91; t112 = t111 / 256; t113 = t111 % 256; // preliminary addition of the two remaining numbers t114 = t1 + t95; t115 = t94 + t98; t116 = t97 + t101; t117 = t100 + t104; t118 = t103 + t107; t119 = t106 + t110; t120 = t109 + t113; // compute generate and propagate pairs t121 = t114 > 255; t122 = t114 == 255; t123 = t115 > 255; t124 = t115 == 255; t125 = t116 > 255; t126 = t116 == 255; t127 = t117 > 255; t128 = t117 == 255; t129 = t118 > 255; t130 = t118 == 255; t131 = t119 > 255; t132 = t119 == 255; t133 = t120 > 255; t134 = t120 == 255; // parallel prefix tree for computing carry bits // up-level 1 t123 = t124 & t121 | t123; t124 = t124 & t122; t127 = t128 & t125 | t127; t128 = t128 & t126; t131 = t132 & t129 | t131; t132 = t132 & t130; // up-level 2 t127 = t128 & t123 | t127; t128 = t128 & t124; // down-level 4 t131 = t132 & t127 | t131; t132 = t132 & t128; // down-level 5 t133 = t134 & t131 | t133; t134 = t134 & t132; t125 = t126 & t123 | t125; t126 = t126 & t124; t133 = t134 & t131 | t133; t134 = t134 & t132; t129 = t130 & t127 | t129; t130 = t130 & t128; t133 = t134 & t131 | t133; t134 = t134 & t132; // compute final sum digits as the digits of the product t120 = t120 + (nat) (t131&(bool)1); t119 = t119 + (nat) (t129&(bool)1); t118 = t118 + (nat) (t127&(bool)1); t117 = t117 + (nat) (t125&(bool)1); t116 = t116 + (nat) (t123&(bool)1); t115 = t115 + (nat) (t121&(bool)1); // get the product digits p0 = t2; p1 = t114 % 256; p2 = t115 % 256; p3 = t116 % 256; p4 = t117 % 256; p5 = t118 % 256; p6 = t119 % 256; p7 = t120 % 256; }