// ************************************************************************** // // // // eses eses // // eses eses // // eses eseses esesese eses Embedded Systems Group // // ese ese ese ese ese // // ese eseseses eseseses ese Department of Computer Science // // eses eses ese eses // // eses eseses eseseses eses University of Kaiserslautern // // eses eses // // // // ************************************************************************** // [5]nat x; [5]nat y; [10]nat p; thread RadixBMulDadda { nat t0,t1,t10,t100,t101,t102,t103,t104,t105,t106,t107,t108,t109,t11,t110,t111,t112,t113,t114,t115,t116,t117,t118,t119,t12,t120,t121,t122,t123,t124,t125,t126,t127,t128,t129,t13,t130,t131,t132,t133,t134,t135,t136,t137,t138,t139,t14,t140,t141,t142,t143,t144,t145,t146,t147,t148,t149,t15,t150,t151,t152,t153,t154,t155,t156,t157,t158,t159,t16,t160,t161,t162,t163,t164,t165,t166,t167,t168,t169,t17,t170,t171,t172,t173,t174,t175,t176,t177,t178,t179,t18,t180,t181,t182,t183,t184,t185,t186,t187,t188,t189,t19,t190,t191,t192,t193,t194,t2,t20,t21,t22,t23,t24,t25,t26,t27,t28,t29,t3,t30,t31,t32,t33,t34,t35,t36,t37,t38,t39,t4,t40,t41,t42,t43,t44,t45,t46,t47,t48,t49,t5,t50,t51,t52,t53,t54,t55,t56,t57,t58,t59,t6,t60,t61,t62,t63,t64,t65,t66,t67,t68,t69,t7,t70,t71,t72,t73,t74,t75,t76,t77,t78,t79,t8,t80,t81,t82,t83,t84,t85,t86,t87,t88,t89,t9,t90,t91,t92,t93,t94,t95,t96,t97,t98,t99; bool t195,t196,t197,t198,t199,t200,t201,t202,t203,t204,t205,t206,t207,t208,t209,t210,t211,t212; // compute partial products t0 = x[0] * y[0]; t1 = t0 / 256; t2 = t0 % 256; t3 = x[0] * y[1]; t4 = t3 / 256; t5 = t3 % 256; t6 = x[1] * y[0]; t7 = t6 / 256; t8 = t6 % 256; t9 = x[0] * y[2]; t10 = t9 / 256; t11 = t9 % 256; t12 = x[1] * y[1]; t13 = t12 / 256; t14 = t12 % 256; t15 = x[2] * y[0]; t16 = t15 / 256; t17 = t15 % 256; t18 = x[0] * y[3]; t19 = t18 / 256; t20 = t18 % 256; t21 = x[1] * y[2]; t22 = t21 / 256; t23 = t21 % 256; t24 = x[2] * y[1]; t25 = t24 / 256; t26 = t24 % 256; t27 = x[3] * y[0]; t28 = t27 / 256; t29 = t27 % 256; t30 = x[0] * y[4]; t31 = t30 / 256; t32 = t30 % 256; t33 = x[1] * y[3]; t34 = t33 / 256; t35 = t33 % 256; t36 = x[2] * y[2]; t37 = t36 / 256; t38 = t36 % 256; t39 = x[3] * y[1]; t40 = t39 / 256; t41 = t39 % 256; t42 = x[4] * y[0]; t43 = t42 / 256; t44 = t42 % 256; t45 = x[1] * y[4]; t46 = t45 / 256; t47 = t45 % 256; t48 = x[2] * y[3]; t49 = t48 / 256; t50 = t48 % 256; t51 = x[3] * y[2]; t52 = t51 / 256; t53 = t51 % 256; t54 = x[4] * y[1]; t55 = t54 / 256; t56 = t54 % 256; t57 = x[2] * y[4]; t58 = t57 / 256; t59 = t57 % 256; t60 = x[3] * y[3]; t61 = t60 / 256; t62 = t60 % 256; t63 = x[4] * y[2]; t64 = t63 / 256; t65 = t63 % 256; t66 = x[3] * y[4]; t67 = t66 / 256; t68 = t66 % 256; t69 = x[4] * y[3]; t70 = t69 / 256; t71 = t69 % 256; t72 = x[4] * y[4]; t73 = t72 / 256; t74 = t72 % 256; // reduce heights of each column to 6 t75 = t29 + t26; t76 = t75 / 256; t77 = t75 % 256; t78 = t35 + t32 + t28; t79 = t78 / 256; t80 = t78 % 256; t81 = t44 + t41 + t38; t82 = t81 / 256; t83 = t81 % 256; t84 = t37 + t34; t85 = t84 / 256; t86 = t84 % 256; t87 = t47 + t43 + t40; t88 = t87 / 256; t89 = t87 % 256; t90 = t56 + t53 + t50; t91 = t90 / 256; t92 = t90 % 256; t93 = t55 + t52 + t49; t94 = t93 / 256; t95 = t93 % 256; t96 = t65 + t62 + t59; t97 = t96 / 256; t98 = t96 % 256; t99 = t71 + t68; t100 = t99 / 256; t101 = t99 % 256; // reduce heights of each column to 4 t102 = t17 + t14; t103 = t102 / 256; t104 = t102 % 256; t105 = t13 + t10; t106 = t105 / 256; t107 = t105 % 256; t108 = t23 + t20 + t16; t109 = t108 / 256; t110 = t108 % 256; t111 = t76 + t80 + t83; t112 = t111 / 256; t113 = t111 % 256; t114 = t25 + t22 + t19; t115 = t114 / 256; t116 = t114 % 256; t117 = t86 + t89 + t92; t118 = t117 / 256; t119 = t117 % 256; t120 = t31 + t79 + t82; t121 = t120 / 256; t122 = t120 % 256; t123 = t91 + t95 + t98; t124 = t123 / 256; t125 = t123 % 256; t126 = t46 + t85 + t88; t127 = t126 / 256; t128 = t126 % 256; t129 = t94 + t97 + t101; t130 = t129 / 256; t131 = t129 % 256; t132 = t64 + t61 + t58; t133 = t132 / 256; t134 = t132 % 256; t135 = t74 + t70 + t67; t136 = t135 / 256; t137 = t135 % 256; // reduce heights of each column to 3 t138 = t11 + t7; t139 = t138 / 256; t140 = t138 % 256; t141 = t77 + t103 + t107; t142 = t141 / 256; t143 = t141 % 256; t144 = t106 + t109 + t113; t145 = t144 / 256; t146 = t144 % 256; t147 = t112 + t115 + t119; t148 = t147 / 256; t149 = t147 % 256; t150 = t118 + t121 + t125; t151 = t150 / 256; t152 = t150 % 256; t153 = t124 + t127 + t131; t154 = t153 / 256; t155 = t153 % 256; t156 = t100 + t130 + t133; t157 = t156 / 256; t158 = t156 % 256; // reduce heights of each column to 2 t159 = t8 + t5; t160 = t159 / 256; t161 = t159 % 256; t162 = t4 + t104 + t140; t163 = t162 / 256; t164 = t162 % 256; t165 = t110 + t139 + t143; t166 = t165 / 256; t167 = t165 % 256; t168 = t116 + t142 + t146; t169 = t168 / 256; t170 = t168 % 256; t171 = t122 + t145 + t149; t172 = t171 / 256; t173 = t171 % 256; t174 = t128 + t148 + t152; t175 = t174 / 256; t176 = t174 % 256; t177 = t134 + t151 + t155; t178 = t177 / 256; t179 = t177 % 256; t180 = t137 + t154 + t158; t181 = t180 / 256; t182 = t180 % 256; t183 = t73 + t136 + t157; t184 = t183 / 256; t185 = t183 % 256; // preliminary addition of the two remaining numbers t186 = t1 + t161; t187 = t160 + t164; t188 = t163 + t167; t189 = t166 + t170; t190 = t169 + t173; t191 = t172 + t176; t192 = t175 + t179; t193 = t178 + t182; t194 = t181 + t185; // compute generate and propagate pairs t195 = t186 > 255; t196 = t186 == 255; t197 = t187 > 255; t198 = t187 == 255; t199 = t188 > 255; t200 = t188 == 255; t201 = t189 > 255; t202 = t189 == 255; t203 = t190 > 255; t204 = t190 == 255; t205 = t191 > 255; t206 = t191 == 255; t207 = t192 > 255; t208 = t192 == 255; t209 = t193 > 255; t210 = t193 == 255; t211 = t194 > 255; t212 = t194 == 255; // parallel prefix tree for computing carry bits // up-level 1 t197 = t198 & t195 | t197; t198 = t198 & t196; t201 = t202 & t199 | t201; t202 = t202 & t200; t205 = t206 & t203 | t205; t206 = t206 & t204; t209 = t210 & t207 | t209; t210 = t210 & t208; // up-level 2 t201 = t202 & t197 | t201; t202 = t202 & t198; t209 = t210 & t205 | t209; t210 = t210 & t206; // up-level 3 t209 = t210 & t201 | t209; t210 = t210 & t202; // down-level 5 // down-level 6 t205 = t206 & t201 | t205; t206 = t206 & t202; // down-level 7 t211 = t212 & t209 | t211; t212 = t212 & t210; t199 = t200 & t197 | t199; t200 = t200 & t198; t211 = t212 & t209 | t211; t212 = t212 & t210; t203 = t204 & t201 | t203; t204 = t204 & t202; t211 = t212 & t209 | t211; t212 = t212 & t210; t207 = t208 & t205 | t207; t208 = t208 & t206; t211 = t212 & t209 | t211; t212 = t212 & t210; // compute final sum digits as the digits of the product t194 = t194+(t209?1:0); t193 = t193+(t207?1:0); t192 = t192+(t205?1:0); t191 = t191+(t203?1:0); t190 = t190+(t201?1:0); t189 = t189+(t199?1:0); t188 = t188+(t197?1:0); t187 = t187+(t195?1:0); // get the product digits p[0] = t2; p[1] = t186 % 256; p[2] = t187 % 256; p[3] = t188 % 256; p[4] = t189 % 256; p[5] = t190 % 256; p[6] = t191 % 256; p[7] = t192 % 256; p[8] = t193 % 256; p[9] = t194 % 256; }