// ************************************************************************** // // // // eses eses // // eses eses // // eses eseses esesese eses Embedded Systems Group // // ese ese ese ese ese // // ese eseseses eseseses ese Department of Computer Science // // eses eses ese eses // // eses eseses eseseses eses University of Kaiserslautern // // eses eses // // // // ************************************************************************** // [4]nat x; [4]nat y; [8]nat p; thread RadixBMulDadda { nat t0,t1,t10,t100,t101,t102,t103,t104,t105,t106,t107,t108,t109,t11,t110,t111,t112,t113,t114,t115,t116,t117,t118,t119,t12,t120,t121,t122,t123,t124,t125,t126,t127,t128,t13,t14,t15,t16,t17,t18,t19,t2,t20,t21,t22,t23,t24,t25,t26,t27,t28,t29,t3,t30,t31,t32,t33,t34,t35,t36,t37,t38,t39,t4,t40,t41,t42,t43,t44,t45,t46,t47,t48,t49,t5,t50,t51,t52,t53,t54,t55,t56,t57,t58,t59,t6,t60,t61,t62,t63,t64,t65,t66,t67,t68,t69,t7,t70,t71,t72,t73,t74,t75,t76,t77,t78,t79,t8,t80,t81,t82,t83,t84,t85,t86,t87,t88,t89,t9,t90,t91,t92,t93,t94,t95,t96,t97,t98,t99; bool t129,t130,t131,t132,t133,t134,t135,t136,t137,t138,t139,t140,t141,t142; t0 = x[0]; t1 = x[1]; t2 = x[2]; t3 = x[3]; t4 = y[0]; t5 = y[1]; t6 = y[2]; t7 = y[3]; // compute partial products t8 = t0 * t4; t9 = t8 / 256; t10 = t8 % 256; t11 = t0 * t5; t12 = t11 / 256; t13 = t11 % 256; t14 = t1 * t4; t15 = t14 / 256; t16 = t14 % 256; t17 = t0 * t6; t18 = t17 / 256; t19 = t17 % 256; t20 = t1 * t5; t21 = t20 / 256; t22 = t20 % 256; t23 = t2 * t4; t24 = t23 / 256; t25 = t23 % 256; t26 = t0 * t7; t27 = t26 / 256; t28 = t26 % 256; t29 = t1 * t6; t30 = t29 / 256; t31 = t29 % 256; t32 = t2 * t5; t33 = t32 / 256; t34 = t32 % 256; t35 = t3 * t4; t36 = t35 / 256; t37 = t35 % 256; t38 = t1 * t7; t39 = t38 / 256; t40 = t38 % 256; t41 = t2 * t6; t42 = t41 / 256; t43 = t41 % 256; t44 = t3 * t5; t45 = t44 / 256; t46 = t44 % 256; t47 = t2 * t7; t48 = t47 / 256; t49 = t47 % 256; t50 = t3 * t6; t51 = t50 / 256; t52 = t50 % 256; t53 = t3 * t7; t54 = t53 / 256; t55 = t53 % 256; // reduce heights of each column to 6 t56 = t37 + t34; t57 = t56 / 256; t58 = t56 % 256; t59 = t46 + t43 + t40; t60 = t59 / 256; t61 = t59 % 256; // reduce heights of each column to 4 t62 = t25 + t22; t63 = t62 / 256; t64 = t62 % 256; t65 = t21 + t18; t66 = t65 / 256; t67 = t65 % 256; t68 = t31 + t28 + t24; t69 = t68 / 256; t70 = t68 % 256; t71 = t27 + t57 + t61; t72 = t71 / 256; t73 = t71 % 256; t74 = t36 + t33 + t30; t75 = t74 / 256; t76 = t74 % 256; t77 = t42 + t39 + t60; t78 = t77 / 256; t79 = t77 % 256; t80 = t52 + t49 + t45; t81 = t80 / 256; t82 = t80 % 256; t83 = t55 + t51; t84 = t83 / 256; t85 = t83 % 256; // reduce heights of each column to 3 t86 = t19 + t15; t87 = t86 / 256; t88 = t86 % 256; t89 = t58 + t63 + t67; t90 = t89 / 256; t91 = t89 % 256; t92 = t66 + t69 + t73; t93 = t92 / 256; t94 = t92 % 256; t95 = t72 + t75 + t79; t96 = t95 / 256; t97 = t95 % 256; t98 = t48 + t78 + t81; t99 = t98 / 256; t100 = t98 % 256; // reduce heights of each column to 2 t101 = t16 + t13; t102 = t101 / 256; t103 = t101 % 256; t104 = t12 + t64 + t88; t105 = t104 / 256; t106 = t104 % 256; t107 = t70 + t87 + t91; t108 = t107 / 256; t109 = t107 % 256; t110 = t76 + t90 + t94; t111 = t110 / 256; t112 = t110 % 256; t113 = t82 + t93 + t97; t114 = t113 / 256; t115 = t113 % 256; t116 = t85 + t96 + t100; t117 = t116 / 256; t118 = t116 % 256; t119 = t54 + t84 + t99; t120 = t119 / 256; t121 = t119 % 256; // preliminary addition of the two remaining numbers t122 = t9 + t103; t123 = t102 + t106; t124 = t105 + t109; t125 = t108 + t112; t126 = t111 + t115; t127 = t114 + t118; t128 = t117 + t121; // compute generate and propagate pairs t129 = t122 > 255; t130 = t122 == 255; t131 = t123 > 255; t132 = t123 == 255; t133 = t124 > 255; t134 = t124 == 255; t135 = t125 > 255; t136 = t125 == 255; t137 = t126 > 255; t138 = t126 == 255; t139 = t127 > 255; t140 = t127 == 255; t141 = t128 > 255; t142 = t128 == 255; // parallel prefix tree for computing carry bits // up-level 1 t131 = t132 & t129 | t131; t132 = t132 & t130; t135 = t136 & t133 | t135; t136 = t136 & t134; t139 = t140 & t137 | t139; t140 = t140 & t138; // up-level 2 t135 = t136 & t131 | t135; t136 = t136 & t132; // down-level 4 t139 = t140 & t135 | t139; t140 = t140 & t136; // down-level 5 t141 = t142 & t139 | t141; t142 = t142 & t140; t133 = t134 & t131 | t133; t134 = t134 & t132; t141 = t142 & t139 | t141; t142 = t142 & t140; t137 = t138 & t135 | t137; t138 = t138 & t136; t141 = t142 & t139 | t141; t142 = t142 & t140; // compute final sum digits as the digits of the product t128 = t128+(t139?1:0); t127 = t127+(t137?1:0); t126 = t126+(t135?1:0); t125 = t125+(t133?1:0); t124 = t124+(t131?1:0); t123 = t123+(t129?1:0); // get the product digits p[0] = t10; p[1] = t122 % 256; p[2] = t123 % 256; p[3] = t124 % 256; p[4] = t125 % 256; p[5] = t126 % 256; p[6] = t127 % 256; p[7] = t128 % 256; }