// ************************************************************************** // // // // eses eses // // eses eses // // eses eseses esesese eses Embedded Systems Group // // ese ese ese ese ese // // ese eseseses eseseses ese Department of Computer Science // // eses eses ese eses // // eses eseses eseseses eses University of Kaiserslautern // // eses eses // // // // ************************************************************************** // [5]nat x; [5]nat y; [10]nat p; thread RadixBMulDadda { nat t0,t1,t10,t100,t101,t102,t103,t104,t105,t106,t107,t108,t109,t11,t110,t111,t112,t113,t114,t115,t116,t117,t118,t119,t12,t120,t121,t122,t123,t124,t125,t126,t127,t128,t129,t13,t130,t131,t132,t133,t134,t135,t136,t137,t138,t139,t14,t140,t141,t142,t143,t144,t145,t146,t147,t148,t149,t15,t150,t151,t152,t153,t154,t155,t156,t157,t158,t159,t16,t160,t161,t162,t163,t164,t165,t166,t167,t168,t169,t17,t170,t171,t172,t173,t174,t175,t176,t177,t178,t179,t18,t180,t181,t182,t183,t184,t185,t186,t187,t188,t189,t19,t190,t191,t192,t193,t194,t195,t196,t197,t198,t199,t2,t20,t200,t201,t202,t203,t204,t21,t22,t23,t24,t25,t26,t27,t28,t29,t3,t30,t31,t32,t33,t34,t35,t36,t37,t38,t39,t4,t40,t41,t42,t43,t44,t45,t46,t47,t48,t49,t5,t50,t51,t52,t53,t54,t55,t56,t57,t58,t59,t6,t60,t61,t62,t63,t64,t65,t66,t67,t68,t69,t7,t70,t71,t72,t73,t74,t75,t76,t77,t78,t79,t8,t80,t81,t82,t83,t84,t85,t86,t87,t88,t89,t9,t90,t91,t92,t93,t94,t95,t96,t97,t98,t99; bool t205,t206,t207,t208,t209,t210,t211,t212,t213,t214,t215,t216,t217,t218,t219,t220,t221,t222; t0 = x[0]; t1 = x[1]; t2 = x[2]; t3 = x[3]; t4 = x[4]; t5 = y[0]; t6 = y[1]; t7 = y[2]; t8 = y[3]; t9 = y[4]; // compute partial products t10 = t0 * t5; t11 = t10 / 256; t12 = t10 % 256; t13 = t0 * t6; t14 = t13 / 256; t15 = t13 % 256; t16 = t1 * t5; t17 = t16 / 256; t18 = t16 % 256; t19 = t0 * t7; t20 = t19 / 256; t21 = t19 % 256; t22 = t1 * t6; t23 = t22 / 256; t24 = t22 % 256; t25 = t2 * t5; t26 = t25 / 256; t27 = t25 % 256; t28 = t0 * t8; t29 = t28 / 256; t30 = t28 % 256; t31 = t1 * t7; t32 = t31 / 256; t33 = t31 % 256; t34 = t2 * t6; t35 = t34 / 256; t36 = t34 % 256; t37 = t3 * t5; t38 = t37 / 256; t39 = t37 % 256; t40 = t0 * t9; t41 = t40 / 256; t42 = t40 % 256; t43 = t1 * t8; t44 = t43 / 256; t45 = t43 % 256; t46 = t2 * t7; t47 = t46 / 256; t48 = t46 % 256; t49 = t3 * t6; t50 = t49 / 256; t51 = t49 % 256; t52 = t4 * t5; t53 = t52 / 256; t54 = t52 % 256; t55 = t1 * t9; t56 = t55 / 256; t57 = t55 % 256; t58 = t2 * t8; t59 = t58 / 256; t60 = t58 % 256; t61 = t3 * t7; t62 = t61 / 256; t63 = t61 % 256; t64 = t4 * t6; t65 = t64 / 256; t66 = t64 % 256; t67 = t2 * t9; t68 = t67 / 256; t69 = t67 % 256; t70 = t3 * t8; t71 = t70 / 256; t72 = t70 % 256; t73 = t4 * t7; t74 = t73 / 256; t75 = t73 % 256; t76 = t3 * t9; t77 = t76 / 256; t78 = t76 % 256; t79 = t4 * t8; t80 = t79 / 256; t81 = t79 % 256; t82 = t4 * t9; t83 = t82 / 256; t84 = t82 % 256; // reduce heights of each column to 6 t85 = t39 + t36; t86 = t85 / 256; t87 = t85 % 256; t88 = t45 + t42 + t38; t89 = t88 / 256; t90 = t88 % 256; t91 = t54 + t51 + t48; t92 = t91 / 256; t93 = t91 % 256; t94 = t47 + t44; t95 = t94 / 256; t96 = t94 % 256; t97 = t57 + t53 + t50; t98 = t97 / 256; t99 = t97 % 256; t100 = t66 + t63 + t60; t101 = t100 / 256; t102 = t100 % 256; t103 = t65 + t62 + t59; t104 = t103 / 256; t105 = t103 % 256; t106 = t75 + t72 + t69; t107 = t106 / 256; t108 = t106 % 256; t109 = t81 + t78; t110 = t109 / 256; t111 = t109 % 256; // reduce heights of each column to 4 t112 = t27 + t24; t113 = t112 / 256; t114 = t112 % 256; t115 = t23 + t20; t116 = t115 / 256; t117 = t115 % 256; t118 = t33 + t30 + t26; t119 = t118 / 256; t120 = t118 % 256; t121 = t86 + t90 + t93; t122 = t121 / 256; t123 = t121 % 256; t124 = t35 + t32 + t29; t125 = t124 / 256; t126 = t124 % 256; t127 = t96 + t99 + t102; t128 = t127 / 256; t129 = t127 % 256; t130 = t41 + t89 + t92; t131 = t130 / 256; t132 = t130 % 256; t133 = t101 + t105 + t108; t134 = t133 / 256; t135 = t133 % 256; t136 = t56 + t95 + t98; t137 = t136 / 256; t138 = t136 % 256; t139 = t104 + t107 + t111; t140 = t139 / 256; t141 = t139 % 256; t142 = t74 + t71 + t68; t143 = t142 / 256; t144 = t142 % 256; t145 = t84 + t80 + t77; t146 = t145 / 256; t147 = t145 % 256; // reduce heights of each column to 3 t148 = t21 + t17; t149 = t148 / 256; t150 = t148 % 256; t151 = t87 + t113 + t117; t152 = t151 / 256; t153 = t151 % 256; t154 = t116 + t119 + t123; t155 = t154 / 256; t156 = t154 % 256; t157 = t122 + t125 + t129; t158 = t157 / 256; t159 = t157 % 256; t160 = t128 + t131 + t135; t161 = t160 / 256; t162 = t160 % 256; t163 = t134 + t137 + t141; t164 = t163 / 256; t165 = t163 % 256; t166 = t110 + t140 + t143; t167 = t166 / 256; t168 = t166 % 256; // reduce heights of each column to 2 t169 = t18 + t15; t170 = t169 / 256; t171 = t169 % 256; t172 = t14 + t114 + t150; t173 = t172 / 256; t174 = t172 % 256; t175 = t120 + t149 + t153; t176 = t175 / 256; t177 = t175 % 256; t178 = t126 + t152 + t156; t179 = t178 / 256; t180 = t178 % 256; t181 = t132 + t155 + t159; t182 = t181 / 256; t183 = t181 % 256; t184 = t138 + t158 + t162; t185 = t184 / 256; t186 = t184 % 256; t187 = t144 + t161 + t165; t188 = t187 / 256; t189 = t187 % 256; t190 = t147 + t164 + t168; t191 = t190 / 256; t192 = t190 % 256; t193 = t83 + t146 + t167; t194 = t193 / 256; t195 = t193 % 256; // preliminary addition of the two remaining numbers t196 = t11 + t171; t197 = t170 + t174; t198 = t173 + t177; t199 = t176 + t180; t200 = t179 + t183; t201 = t182 + t186; t202 = t185 + t189; t203 = t188 + t192; t204 = t191 + t195; // compute generate and propagate pairs t205 = t196 > 255; t206 = t196 == 255; t207 = t197 > 255; t208 = t197 == 255; t209 = t198 > 255; t210 = t198 == 255; t211 = t199 > 255; t212 = t199 == 255; t213 = t200 > 255; t214 = t200 == 255; t215 = t201 > 255; t216 = t201 == 255; t217 = t202 > 255; t218 = t202 == 255; t219 = t203 > 255; t220 = t203 == 255; t221 = t204 > 255; t222 = t204 == 255; // parallel prefix tree for computing carry bits // up-level 1 t207 = t208 & t205 | t207; t208 = t208 & t206; t211 = t212 & t209 | t211; t212 = t212 & t210; t215 = t216 & t213 | t215; t216 = t216 & t214; t219 = t220 & t217 | t219; t220 = t220 & t218; // up-level 2 t211 = t212 & t207 | t211; t212 = t212 & t208; t219 = t220 & t215 | t219; t220 = t220 & t216; // up-level 3 t219 = t220 & t211 | t219; t220 = t220 & t212; // down-level 5 // down-level 6 t215 = t216 & t211 | t215; t216 = t216 & t212; // down-level 7 t221 = t222 & t219 | t221; t222 = t222 & t220; t209 = t210 & t207 | t209; t210 = t210 & t208; t221 = t222 & t219 | t221; t222 = t222 & t220; t213 = t214 & t211 | t213; t214 = t214 & t212; t221 = t222 & t219 | t221; t222 = t222 & t220; t217 = t218 & t215 | t217; t218 = t218 & t216; t221 = t222 & t219 | t221; t222 = t222 & t220; // compute final sum digits as the digits of the product t204 = t204+(t219?1:0); t203 = t203+(t217?1:0); t202 = t202+(t215?1:0); t201 = t201+(t213?1:0); t200 = t200+(t211?1:0); t199 = t199+(t209?1:0); t198 = t198+(t207?1:0); t197 = t197+(t205?1:0); // get the product digits p[0] = t12; p[1] = t196 % 256; p[2] = t197 % 256; p[3] = t198 % 256; p[4] = t199 % 256; p[5] = t200 % 256; p[6] = t201 % 256; p[7] = t202 % 256; p[8] = t203 % 256; p[9] = t204 % 256; }