// ************************************************************************** // // // // eses eses // // eses eses // // eses eseses esesese eses Embedded Systems Group // // ese ese ese ese ese // // ese eseseses eseseses ese Department of Computer Science // // eses eses ese eses // // eses eseses eseseses eses University of Kaiserslautern // // eses eses // // // // ************************************************************************** // [6]nat x; [6]nat y; [12]nat p; thread RadixBMulDadda { nat t0,t1,t10,t100,t101,t102,t103,t104,t105,t106,t107,t108,t109,t11,t110,t111,t112,t113,t114,t115,t116,t117,t118,t119,t12,t120,t121,t122,t123,t124,t125,t126,t127,t128,t129,t13,t130,t131,t132,t133,t134,t135,t136,t137,t138,t139,t14,t140,t141,t142,t143,t144,t145,t146,t147,t148,t149,t15,t150,t151,t152,t153,t154,t155,t156,t157,t158,t159,t16,t160,t161,t162,t163,t164,t165,t166,t167,t168,t169,t17,t170,t171,t172,t173,t174,t175,t176,t177,t178,t179,t18,t180,t181,t182,t183,t184,t185,t186,t187,t188,t189,t19,t190,t191,t192,t193,t194,t195,t196,t197,t198,t199,t2,t20,t200,t201,t202,t203,t204,t205,t206,t207,t208,t209,t21,t210,t211,t212,t213,t214,t215,t216,t217,t218,t219,t22,t220,t221,t222,t223,t224,t225,t226,t227,t228,t229,t23,t230,t231,t232,t233,t234,t235,t236,t237,t238,t239,t24,t240,t241,t242,t243,t244,t245,t246,t247,t248,t249,t25,t250,t251,t252,t253,t254,t255,t256,t257,t258,t259,t26,t260,t261,t262,t263,t264,t265,t266,t267,t268,t269,t27,t270,t271,t272,t273,t274,t275,t276,t277,t278,t279,t28,t280,t281,t282,t283,t284,t285,t286,t287,t288,t289,t29,t290,t291,t292,t293,t294,t295,t296,t297,t298,t3,t30,t31,t32,t33,t34,t35,t36,t37,t38,t39,t4,t40,t41,t42,t43,t44,t45,t46,t47,t48,t49,t5,t50,t51,t52,t53,t54,t55,t56,t57,t58,t59,t6,t60,t61,t62,t63,t64,t65,t66,t67,t68,t69,t7,t70,t71,t72,t73,t74,t75,t76,t77,t78,t79,t8,t80,t81,t82,t83,t84,t85,t86,t87,t88,t89,t9,t90,t91,t92,t93,t94,t95,t96,t97,t98,t99; bool t299,t300,t301,t302,t303,t304,t305,t306,t307,t308,t309,t310,t311,t312,t313,t314,t315,t316,t317,t318,t319,t320; t0 = x[0]; t1 = x[1]; t2 = x[2]; t3 = x[3]; t4 = x[4]; t5 = x[5]; t6 = y[0]; t7 = y[1]; t8 = y[2]; t9 = y[3]; t10 = y[4]; t11 = y[5]; // compute partial products t12 = t0 * t6; t13 = t12 / 256; t14 = t12 % 256; t15 = t0 * t7; t16 = t15 / 256; t17 = t15 % 256; t18 = t1 * t6; t19 = t18 / 256; t20 = t18 % 256; t21 = t0 * t8; t22 = t21 / 256; t23 = t21 % 256; t24 = t1 * t7; t25 = t24 / 256; t26 = t24 % 256; t27 = t2 * t6; t28 = t27 / 256; t29 = t27 % 256; t30 = t0 * t9; t31 = t30 / 256; t32 = t30 % 256; t33 = t1 * t8; t34 = t33 / 256; t35 = t33 % 256; t36 = t2 * t7; t37 = t36 / 256; t38 = t36 % 256; t39 = t3 * t6; t40 = t39 / 256; t41 = t39 % 256; t42 = t0 * t10; t43 = t42 / 256; t44 = t42 % 256; t45 = t1 * t9; t46 = t45 / 256; t47 = t45 % 256; t48 = t2 * t8; t49 = t48 / 256; t50 = t48 % 256; t51 = t3 * t7; t52 = t51 / 256; t53 = t51 % 256; t54 = t4 * t6; t55 = t54 / 256; t56 = t54 % 256; t57 = t0 * t11; t58 = t57 / 256; t59 = t57 % 256; t60 = t1 * t10; t61 = t60 / 256; t62 = t60 % 256; t63 = t2 * t9; t64 = t63 / 256; t65 = t63 % 256; t66 = t3 * t8; t67 = t66 / 256; t68 = t66 % 256; t69 = t4 * t7; t70 = t69 / 256; t71 = t69 % 256; t72 = t5 * t6; t73 = t72 / 256; t74 = t72 % 256; t75 = t1 * t11; t76 = t75 / 256; t77 = t75 % 256; t78 = t2 * t10; t79 = t78 / 256; t80 = t78 % 256; t81 = t3 * t9; t82 = t81 / 256; t83 = t81 % 256; t84 = t4 * t8; t85 = t84 / 256; t86 = t84 % 256; t87 = t5 * t7; t88 = t87 / 256; t89 = t87 % 256; t90 = t2 * t11; t91 = t90 / 256; t92 = t90 % 256; t93 = t3 * t10; t94 = t93 / 256; t95 = t93 % 256; t96 = t4 * t9; t97 = t96 / 256; t98 = t96 % 256; t99 = t5 * t8; t100 = t99 / 256; t101 = t99 % 256; t102 = t3 * t11; t103 = t102 / 256; t104 = t102 % 256; t105 = t4 * t10; t106 = t105 / 256; t107 = t105 % 256; t108 = t5 * t9; t109 = t108 / 256; t110 = t108 % 256; t111 = t4 * t11; t112 = t111 / 256; t113 = t111 % 256; t114 = t5 * t10; t115 = t114 / 256; t116 = t114 % 256; t117 = t5 * t11; t118 = t117 / 256; t119 = t117 % 256; // reduce heights of each column to 9 t120 = t74 + t71 + t68; t121 = t120 / 256; t122 = t120 % 256; t123 = t80 + t77; t124 = t123 / 256; t125 = t123 % 256; t126 = t89 + t86 + t83; t127 = t126 / 256; t128 = t126 % 256; t129 = t101 + t98 + t95; t130 = t129 / 256; t131 = t129 % 256; // reduce heights of each column to 6 t132 = t41 + t38; t133 = t132 / 256; t134 = t132 % 256; t135 = t47 + t44 + t40; t136 = t135 / 256; t137 = t135 % 256; t138 = t56 + t53 + t50; t139 = t138 / 256; t140 = t138 % 256; t141 = t46 + t43; t142 = t141 / 256; t143 = t141 % 256; t144 = t55 + t52 + t49; t145 = t144 / 256; t146 = t144 % 256; t147 = t65 + t62 + t59; t148 = t147 / 256; t149 = t147 % 256; t150 = t121 + t125 + t128; t151 = t150 / 256; t152 = t150 % 256; t153 = t64 + t61 + t58; t154 = t153 / 256; t155 = t153 % 256; t156 = t73 + t70 + t67; t157 = t156 / 256; t158 = t156 % 256; t159 = t124 + t127 + t131; t160 = t159 / 256; t161 = t159 % 256; t162 = t82 + t79 + t76; t163 = t162 / 256; t164 = t162 % 256; t165 = t92 + t88 + t85; t166 = t165 / 256; t167 = t165 % 256; t168 = t91 + t130; t169 = t168 / 256; t170 = t168 % 256; t171 = t100 + t97 + t94; t172 = t171 / 256; t173 = t171 % 256; t174 = t110 + t107 + t104; t175 = t174 / 256; t176 = t174 % 256; t177 = t116 + t113 + t109; t178 = t177 / 256; t179 = t177 % 256; // reduce heights of each column to 4 t180 = t29 + t26; t181 = t180 / 256; t182 = t180 % 256; t183 = t25 + t22; t184 = t183 / 256; t185 = t183 % 256; t186 = t35 + t32 + t28; t187 = t186 / 256; t188 = t186 % 256; t189 = t133 + t137 + t140; t190 = t189 / 256; t191 = t189 % 256; t192 = t37 + t34 + t31; t193 = t192 / 256; t194 = t192 % 256; t195 = t143 + t146 + t149; t196 = t195 / 256; t197 = t195 % 256; t198 = t122 + t136 + t139; t199 = t198 / 256; t200 = t198 % 256; t201 = t152 + t155 + t158; t202 = t201 / 256; t203 = t201 % 256; t204 = t142 + t145 + t148; t205 = t204 / 256; t206 = t204 % 256; t207 = t161 + t164 + t167; t208 = t207 / 256; t209 = t207 % 256; t210 = t151 + t154 + t157; t211 = t210 / 256; t212 = t210 % 256; t213 = t170 + t173 + t176; t214 = t213 / 256; t215 = t213 % 256; t216 = t160 + t163 + t166; t217 = t216 / 256; t218 = t216 % 256; t219 = t172 + t175 + t179; t220 = t219 / 256; t221 = t219 % 256; t222 = t106 + t103 + t169; t223 = t222 / 256; t224 = t222 % 256; t225 = t119 + t115 + t112; t226 = t225 / 256; t227 = t225 % 256; // reduce heights of each column to 3 t228 = t23 + t19; t229 = t228 / 256; t230 = t228 % 256; t231 = t134 + t181 + t185; t232 = t231 / 256; t233 = t231 % 256; t234 = t184 + t187 + t191; t235 = t234 / 256; t236 = t234 % 256; t237 = t190 + t193 + t197; t238 = t237 / 256; t239 = t237 % 256; t240 = t196 + t199 + t203; t241 = t240 / 256; t242 = t240 % 256; t243 = t202 + t205 + t209; t244 = t243 / 256; t245 = t243 % 256; t246 = t208 + t211 + t215; t247 = t246 / 256; t248 = t246 % 256; t249 = t214 + t217 + t221; t250 = t249 / 256; t251 = t249 % 256; t252 = t178 + t220 + t223; t253 = t252 / 256; t254 = t252 % 256; // reduce heights of each column to 2 t255 = t20 + t17; t256 = t255 / 256; t257 = t255 % 256; t258 = t16 + t182 + t230; t259 = t258 / 256; t260 = t258 % 256; t261 = t188 + t229 + t233; t262 = t261 / 256; t263 = t261 % 256; t264 = t194 + t232 + t236; t265 = t264 / 256; t266 = t264 % 256; t267 = t200 + t235 + t239; t268 = t267 / 256; t269 = t267 % 256; t270 = t206 + t238 + t242; t271 = t270 / 256; t272 = t270 % 256; t273 = t212 + t241 + t245; t274 = t273 / 256; t275 = t273 % 256; t276 = t218 + t244 + t248; t277 = t276 / 256; t278 = t276 % 256; t279 = t224 + t247 + t251; t280 = t279 / 256; t281 = t279 % 256; t282 = t227 + t250 + t254; t283 = t282 / 256; t284 = t282 % 256; t285 = t118 + t226 + t253; t286 = t285 / 256; t287 = t285 % 256; // preliminary addition of the two remaining numbers t288 = t13 + t257; t289 = t256 + t260; t290 = t259 + t263; t291 = t262 + t266; t292 = t265 + t269; t293 = t268 + t272; t294 = t271 + t275; t295 = t274 + t278; t296 = t277 + t281; t297 = t280 + t284; t298 = t283 + t287; // compute generate and propagate pairs t299 = t288 > 255; t300 = t288 == 255; t301 = t289 > 255; t302 = t289 == 255; t303 = t290 > 255; t304 = t290 == 255; t305 = t291 > 255; t306 = t291 == 255; t307 = t292 > 255; t308 = t292 == 255; t309 = t293 > 255; t310 = t293 == 255; t311 = t294 > 255; t312 = t294 == 255; t313 = t295 > 255; t314 = t295 == 255; t315 = t296 > 255; t316 = t296 == 255; t317 = t297 > 255; t318 = t297 == 255; t319 = t298 > 255; t320 = t298 == 255; // parallel prefix tree for computing carry bits // up-level 1 t301 = t302 & t299 | t301; t302 = t302 & t300; t305 = t306 & t303 | t305; t306 = t306 & t304; t309 = t310 & t307 | t309; t310 = t310 & t308; t313 = t314 & t311 | t313; t314 = t314 & t312; t317 = t318 & t315 | t317; t318 = t318 & t316; // up-level 2 t305 = t306 & t301 | t305; t306 = t306 & t302; t313 = t314 & t309 | t313; t314 = t314 & t310; // up-level 3 t313 = t314 & t305 | t313; t314 = t314 & t306; // down-level 5 // down-level 6 t317 = t318 & t313 | t317; t318 = t318 & t314; t309 = t310 & t305 | t309; t310 = t310 & t306; t317 = t318 & t313 | t317; t318 = t318 & t314; // down-level 7 t319 = t320 & t313 | t319; t320 = t320 & t314; t303 = t304 & t301 | t303; t304 = t304 & t302; t319 = t320 & t313 | t319; t320 = t320 & t314; t307 = t308 & t305 | t307; t308 = t308 & t306; t319 = t320 & t313 | t319; t320 = t320 & t314; t311 = t312 & t317 | t311; t312 = t312 & t318; t319 = t320 & t313 | t319; t320 = t320 & t314; t315 = t316 & t309 | t315; t316 = t316 & t310; t319 = t320 & t313 | t319; t320 = t320 & t314; // compute final sum digits as the digits of the product t298 = t298+(t317?1:0); t297 = t297+(t315?1:0); t296 = t296+(t313?1:0); t295 = t295+(t311?1:0); t294 = t294+(t309?1:0); t293 = t293+(t307?1:0); t292 = t292+(t305?1:0); t291 = t291+(t303?1:0); t290 = t290+(t301?1:0); t289 = t289+(t299?1:0); // get the product digits p[0] = t14; p[1] = t288 % 256; p[2] = t289 % 256; p[3] = t290 % 256; p[4] = t291 % 256; p[5] = t292 % 256; p[6] = t293 % 256; p[7] = t294 % 256; p[8] = t295 % 256; p[9] = t296 % 256; p[10] = t297 % 256; p[11] = t298 % 256; }