// ************************************************************************** // // // // eses eses // // eses eses // // eses eseses esesese eses Embedded Systems Group // // ese ese ese ese ese // // ese eseseses eseseses ese Department of Computer Science // // eses eses ese eses // // eses eseses eseseses eses University of Kaiserslautern // // eses eses // // // // ************************************************************************** // [7]nat x; [7]nat y; [14]nat p; thread RadixBMulDadda { nat t0,t1,t10,t100,t101,t102,t103,t104,t105,t106,t107,t108,t109,t11,t110,t111,t112,t113,t114,t115,t116,t117,t118,t119,t12,t120,t121,t122,t123,t124,t125,t126,t127,t128,t129,t13,t130,t131,t132,t133,t134,t135,t136,t137,t138,t139,t14,t140,t141,t142,t143,t144,t145,t146,t147,t148,t149,t15,t150,t151,t152,t153,t154,t155,t156,t157,t158,t159,t16,t160,t161,t162,t163,t164,t165,t166,t167,t168,t169,t17,t170,t171,t172,t173,t174,t175,t176,t177,t178,t179,t18,t180,t181,t182,t183,t184,t185,t186,t187,t188,t189,t19,t190,t191,t192,t193,t194,t195,t196,t197,t198,t199,t2,t20,t200,t201,t202,t203,t204,t205,t206,t207,t208,t209,t21,t210,t211,t212,t213,t214,t215,t216,t217,t218,t219,t22,t220,t221,t222,t223,t224,t225,t226,t227,t228,t229,t23,t230,t231,t232,t233,t234,t235,t236,t237,t238,t239,t24,t240,t241,t242,t243,t244,t245,t246,t247,t248,t249,t25,t250,t251,t252,t253,t254,t255,t256,t257,t258,t259,t26,t260,t261,t262,t263,t264,t265,t266,t267,t268,t269,t27,t270,t271,t272,t273,t274,t275,t276,t277,t278,t279,t28,t280,t281,t282,t283,t284,t285,t286,t287,t288,t289,t29,t290,t291,t292,t293,t294,t295,t296,t297,t298,t299,t3,t30,t300,t301,t302,t303,t304,t305,t306,t307,t308,t309,t31,t310,t311,t312,t313,t314,t315,t316,t317,t318,t319,t32,t320,t321,t322,t323,t324,t325,t326,t327,t328,t329,t33,t330,t331,t332,t333,t334,t335,t336,t337,t338,t339,t34,t340,t341,t342,t343,t344,t345,t346,t347,t348,t349,t35,t350,t351,t352,t353,t354,t355,t356,t357,t358,t359,t36,t360,t361,t362,t363,t364,t365,t366,t367,t368,t369,t37,t370,t371,t372,t373,t374,t375,t376,t377,t378,t379,t38,t380,t381,t382,t383,t384,t385,t386,t387,t388,t389,t39,t390,t391,t392,t393,t394,t395,t396,t397,t398,t399,t4,t40,t400,t401,t402,t403,t404,t405,t406,t407,t408,t409,t41,t410,t42,t43,t44,t45,t46,t47,t48,t49,t5,t50,t51,t52,t53,t54,t55,t56,t57,t58,t59,t6,t60,t61,t62,t63,t64,t65,t66,t67,t68,t69,t7,t70,t71,t72,t73,t74,t75,t76,t77,t78,t79,t8,t80,t81,t82,t83,t84,t85,t86,t87,t88,t89,t9,t90,t91,t92,t93,t94,t95,t96,t97,t98,t99; bool t411,t412,t413,t414,t415,t416,t417,t418,t419,t420,t421,t422,t423,t424,t425,t426,t427,t428,t429,t430,t431,t432,t433,t434,t435,t436; t0 = x[0]; t1 = x[1]; t2 = x[2]; t3 = x[3]; t4 = x[4]; t5 = x[5]; t6 = x[6]; t7 = y[0]; t8 = y[1]; t9 = y[2]; t10 = y[3]; t11 = y[4]; t12 = y[5]; t13 = y[6]; // compute partial products t14 = t0 * t7; t15 = t14 / 256; t16 = t14 % 256; t17 = t0 * t8; t18 = t17 / 256; t19 = t17 % 256; t20 = t1 * t7; t21 = t20 / 256; t22 = t20 % 256; t23 = t0 * t9; t24 = t23 / 256; t25 = t23 % 256; t26 = t1 * t8; t27 = t26 / 256; t28 = t26 % 256; t29 = t2 * t7; t30 = t29 / 256; t31 = t29 % 256; t32 = t0 * t10; t33 = t32 / 256; t34 = t32 % 256; t35 = t1 * t9; t36 = t35 / 256; t37 = t35 % 256; t38 = t2 * t8; t39 = t38 / 256; t40 = t38 % 256; t41 = t3 * t7; t42 = t41 / 256; t43 = t41 % 256; t44 = t0 * t11; t45 = t44 / 256; t46 = t44 % 256; t47 = t1 * t10; t48 = t47 / 256; t49 = t47 % 256; t50 = t2 * t9; t51 = t50 / 256; t52 = t50 % 256; t53 = t3 * t8; t54 = t53 / 256; t55 = t53 % 256; t56 = t4 * t7; t57 = t56 / 256; t58 = t56 % 256; t59 = t0 * t12; t60 = t59 / 256; t61 = t59 % 256; t62 = t1 * t11; t63 = t62 / 256; t64 = t62 % 256; t65 = t2 * t10; t66 = t65 / 256; t67 = t65 % 256; t68 = t3 * t9; t69 = t68 / 256; t70 = t68 % 256; t71 = t4 * t8; t72 = t71 / 256; t73 = t71 % 256; t74 = t5 * t7; t75 = t74 / 256; t76 = t74 % 256; t77 = t0 * t13; t78 = t77 / 256; t79 = t77 % 256; t80 = t1 * t12; t81 = t80 / 256; t82 = t80 % 256; t83 = t2 * t11; t84 = t83 / 256; t85 = t83 % 256; t86 = t3 * t10; t87 = t86 / 256; t88 = t86 % 256; t89 = t4 * t9; t90 = t89 / 256; t91 = t89 % 256; t92 = t5 * t8; t93 = t92 / 256; t94 = t92 % 256; t95 = t6 * t7; t96 = t95 / 256; t97 = t95 % 256; t98 = t1 * t13; t99 = t98 / 256; t100 = t98 % 256; t101 = t2 * t12; t102 = t101 / 256; t103 = t101 % 256; t104 = t3 * t11; t105 = t104 / 256; t106 = t104 % 256; t107 = t4 * t10; t108 = t107 / 256; t109 = t107 % 256; t110 = t5 * t9; t111 = t110 / 256; t112 = t110 % 256; t113 = t6 * t8; t114 = t113 / 256; t115 = t113 % 256; t116 = t2 * t13; t117 = t116 / 256; t118 = t116 % 256; t119 = t3 * t12; t120 = t119 / 256; t121 = t119 % 256; t122 = t4 * t11; t123 = t122 / 256; t124 = t122 % 256; t125 = t5 * t10; t126 = t125 / 256; t127 = t125 % 256; t128 = t6 * t9; t129 = t128 / 256; t130 = t128 % 256; t131 = t3 * t13; t132 = t131 / 256; t133 = t131 % 256; t134 = t4 * t12; t135 = t134 / 256; t136 = t134 % 256; t137 = t5 * t11; t138 = t137 / 256; t139 = t137 % 256; t140 = t6 * t10; t141 = t140 / 256; t142 = t140 % 256; t143 = t4 * t13; t144 = t143 / 256; t145 = t143 % 256; t146 = t5 * t12; t147 = t146 / 256; t148 = t146 % 256; t149 = t6 * t11; t150 = t149 / 256; t151 = t149 % 256; t152 = t5 * t13; t153 = t152 / 256; t154 = t152 % 256; t155 = t6 * t12; t156 = t155 / 256; t157 = t155 % 256; t158 = t6 * t13; t159 = t158 / 256; t160 = t158 % 256; // reduce heights of each column to 9 t161 = t76 + t73 + t70; t162 = t161 / 256; t163 = t161 % 256; t164 = t79 + t75; t165 = t164 / 256; t166 = t164 % 256; t167 = t88 + t85 + t82; t168 = t167 / 256; t169 = t167 % 256; t170 = t97 + t94 + t91; t171 = t170 / 256; t172 = t170 % 256; t173 = t87 + t84; t174 = t173 / 256; t175 = t173 % 256; t176 = t96 + t93 + t90; t177 = t176 / 256; t178 = t176 % 256; t179 = t106 + t103 + t100; t180 = t179 / 256; t181 = t179 % 256; t182 = t115 + t112 + t109; t183 = t182 / 256; t184 = t182 % 256; t185 = t111 + t108 + t105; t186 = t185 / 256; t187 = t185 % 256; t188 = t121 + t118 + t114; t189 = t188 / 256; t190 = t188 % 256; t191 = t130 + t127 + t124; t192 = t191 / 256; t193 = t191 % 256; t194 = t133 + t129; t195 = t194 / 256; t196 = t194 % 256; t197 = t142 + t139 + t136; t198 = t197 / 256; t199 = t197 % 256; // reduce heights of each column to 6 t200 = t43 + t40; t201 = t200 / 256; t202 = t200 % 256; t203 = t49 + t46 + t42; t204 = t203 / 256; t205 = t203 % 256; t206 = t58 + t55 + t52; t207 = t206 / 256; t208 = t206 % 256; t209 = t48 + t45; t210 = t209 / 256; t211 = t209 % 256; t212 = t57 + t54 + t51; t213 = t212 / 256; t214 = t212 % 256; t215 = t67 + t64 + t61; t216 = t215 / 256; t217 = t215 % 256; t218 = t166 + t169 + t172; t219 = t218 / 256; t220 = t218 % 256; t221 = t63 + t60 + t162; t222 = t221 / 256; t223 = t221 % 256; t224 = t72 + t69 + t66; t225 = t224 / 256; t226 = t224 % 256; t227 = t178 + t181 + t184; t228 = t227 / 256; t229 = t227 % 256; t230 = t168 + t171 + t175; t231 = t230 / 256; t232 = t230 % 256; t233 = t81 + t78 + t165; t234 = t233 / 256; t235 = t233 % 256; t236 = t187 + t190 + t193; t237 = t236 / 256; t238 = t236 % 256; t239 = t177 + t180 + t183; t240 = t239 / 256; t241 = t239 % 256; t242 = t102 + t99 + t174; t243 = t242 / 256; t244 = t242 % 256; t245 = t192 + t196 + t199; t246 = t245 / 256; t247 = t245 % 256; t248 = t117 + t186 + t189; t249 = t248 / 256; t250 = t248 % 256; t251 = t126 + t123 + t120; t252 = t251 / 256; t253 = t251 % 256; t254 = t132 + t195 + t198; t255 = t254 / 256; t256 = t254 % 256; t257 = t141 + t138 + t135; t258 = t257 / 256; t259 = t257 % 256; t260 = t151 + t148 + t145; t261 = t260 / 256; t262 = t260 % 256; t263 = t157 + t154 + t150; t264 = t263 / 256; t265 = t263 % 256; // reduce heights of each column to 4 t266 = t31 + t28; t267 = t266 / 256; t268 = t266 % 256; t269 = t27 + t24; t270 = t269 / 256; t271 = t269 % 256; t272 = t37 + t34 + t30; t273 = t272 / 256; t274 = t272 % 256; t275 = t201 + t205 + t208; t276 = t275 / 256; t277 = t275 % 256; t278 = t39 + t36 + t33; t279 = t278 / 256; t280 = t278 % 256; t281 = t211 + t214 + t217; t282 = t281 / 256; t283 = t281 % 256; t284 = t163 + t204 + t207; t285 = t284 / 256; t286 = t284 % 256; t287 = t220 + t223 + t226; t288 = t287 / 256; t289 = t287 % 256; t290 = t210 + t213 + t216; t291 = t290 / 256; t292 = t290 % 256; t293 = t229 + t232 + t235; t294 = t293 / 256; t295 = t293 % 256; t296 = t219 + t222 + t225; t297 = t296 / 256; t298 = t296 % 256; t299 = t238 + t241 + t244; t300 = t299 / 256; t301 = t299 % 256; t302 = t228 + t231 + t234; t303 = t302 / 256; t304 = t302 % 256; t305 = t247 + t250 + t253; t306 = t305 / 256; t307 = t305 % 256; t308 = t237 + t240 + t243; t309 = t308 / 256; t310 = t308 % 256; t311 = t256 + t259 + t262; t312 = t311 / 256; t313 = t311 % 256; t314 = t246 + t249 + t252; t315 = t314 / 256; t316 = t314 % 256; t317 = t258 + t261 + t265; t318 = t317 / 256; t319 = t317 % 256; t320 = t147 + t144 + t255; t321 = t320 / 256; t322 = t320 % 256; t323 = t160 + t156 + t153; t324 = t323 / 256; t325 = t323 % 256; // reduce heights of each column to 3 t326 = t25 + t21; t327 = t326 / 256; t328 = t326 % 256; t329 = t202 + t267 + t271; t330 = t329 / 256; t331 = t329 % 256; t332 = t270 + t273 + t277; t333 = t332 / 256; t334 = t332 % 256; t335 = t276 + t279 + t283; t336 = t335 / 256; t337 = t335 % 256; t338 = t282 + t285 + t289; t339 = t338 / 256; t340 = t338 % 256; t341 = t288 + t291 + t295; t342 = t341 / 256; t343 = t341 % 256; t344 = t294 + t297 + t301; t345 = t344 / 256; t346 = t344 % 256; t347 = t300 + t303 + t307; t348 = t347 / 256; t349 = t347 % 256; t350 = t306 + t309 + t313; t351 = t350 / 256; t352 = t350 % 256; t353 = t312 + t315 + t319; t354 = t353 / 256; t355 = t353 % 256; t356 = t264 + t318 + t321; t357 = t356 / 256; t358 = t356 % 256; // reduce heights of each column to 2 t359 = t22 + t19; t360 = t359 / 256; t361 = t359 % 256; t362 = t18 + t268 + t328; t363 = t362 / 256; t364 = t362 % 256; t365 = t274 + t327 + t331; t366 = t365 / 256; t367 = t365 % 256; t368 = t280 + t330 + t334; t369 = t368 / 256; t370 = t368 % 256; t371 = t286 + t333 + t337; t372 = t371 / 256; t373 = t371 % 256; t374 = t292 + t336 + t340; t375 = t374 / 256; t376 = t374 % 256; t377 = t298 + t339 + t343; t378 = t377 / 256; t379 = t377 % 256; t380 = t304 + t342 + t346; t381 = t380 / 256; t382 = t380 % 256; t383 = t310 + t345 + t349; t384 = t383 / 256; t385 = t383 % 256; t386 = t316 + t348 + t352; t387 = t386 / 256; t388 = t386 % 256; t389 = t322 + t351 + t355; t390 = t389 / 256; t391 = t389 % 256; t392 = t325 + t354 + t358; t393 = t392 / 256; t394 = t392 % 256; t395 = t159 + t324 + t357; t396 = t395 / 256; t397 = t395 % 256; // preliminary addition of the two remaining numbers t398 = t15 + t361; t399 = t360 + t364; t400 = t363 + t367; t401 = t366 + t370; t402 = t369 + t373; t403 = t372 + t376; t404 = t375 + t379; t405 = t378 + t382; t406 = t381 + t385; t407 = t384 + t388; t408 = t387 + t391; t409 = t390 + t394; t410 = t393 + t397; // compute generate and propagate pairs t411 = t398 > 255; t412 = t398 == 255; t413 = t399 > 255; t414 = t399 == 255; t415 = t400 > 255; t416 = t400 == 255; t417 = t401 > 255; t418 = t401 == 255; t419 = t402 > 255; t420 = t402 == 255; t421 = t403 > 255; t422 = t403 == 255; t423 = t404 > 255; t424 = t404 == 255; t425 = t405 > 255; t426 = t405 == 255; t427 = t406 > 255; t428 = t406 == 255; t429 = t407 > 255; t430 = t407 == 255; t431 = t408 > 255; t432 = t408 == 255; t433 = t409 > 255; t434 = t409 == 255; t435 = t410 > 255; t436 = t410 == 255; // parallel prefix tree for computing carry bits // up-level 1 t413 = t414 & t411 | t413; t414 = t414 & t412; t417 = t418 & t415 | t417; t418 = t418 & t416; t421 = t422 & t419 | t421; t422 = t422 & t420; t425 = t426 & t423 | t425; t426 = t426 & t424; t429 = t430 & t427 | t429; t430 = t430 & t428; t433 = t434 & t431 | t433; t434 = t434 & t432; // up-level 2 t417 = t418 & t413 | t417; t418 = t418 & t414; t425 = t426 & t421 | t425; t426 = t426 & t422; t433 = t434 & t429 | t433; t434 = t434 & t430; // up-level 3 t425 = t426 & t417 | t425; t426 = t426 & t418; // down-level 5 t433 = t434 & t425 | t433; t434 = t434 & t426; // down-level 6 t421 = t422 & t417 | t421; t422 = t422 & t418; t429 = t430 & t425 | t429; t430 = t430 & t426; // down-level 7 t435 = t436 & t433 | t435; t436 = t436 & t434; t415 = t416 & t413 | t415; t416 = t416 & t414; t435 = t436 & t433 | t435; t436 = t436 & t434; t419 = t420 & t417 | t419; t420 = t420 & t418; t435 = t436 & t433 | t435; t436 = t436 & t434; t423 = t424 & t421 | t423; t424 = t424 & t422; t435 = t436 & t433 | t435; t436 = t436 & t434; t427 = t428 & t425 | t427; t428 = t428 & t426; t435 = t436 & t433 | t435; t436 = t436 & t434; t431 = t432 & t429 | t431; t432 = t432 & t430; t435 = t436 & t433 | t435; t436 = t436 & t434; // compute final sum digits as the digits of the product t410 = t410+(t433?1:0); t409 = t409+(t431?1:0); t408 = t408+(t429?1:0); t407 = t407+(t427?1:0); t406 = t406+(t425?1:0); t405 = t405+(t423?1:0); t404 = t404+(t421?1:0); t403 = t403+(t419?1:0); t402 = t402+(t417?1:0); t401 = t401+(t415?1:0); t400 = t400+(t413?1:0); t399 = t399+(t411?1:0); // get the product digits p[0] = t16; p[1] = t398 % 256; p[2] = t399 % 256; p[3] = t400 % 256; p[4] = t401 % 256; p[5] = t402 % 256; p[6] = t403 % 256; p[7] = t404 % 256; p[8] = t405 % 256; p[9] = t406 % 256; p[10] = t407 % 256; p[11] = t408 % 256; p[12] = t409 % 256; p[13] = t410 % 256; }