// ************************************************************************** // // // // eses eses // // eses eses // // eses eseses esesese eses Embedded Systems Group // // ese ese ese ese ese // // ese eseseses eseseses ese Department of Computer Science // // eses eses ese eses // // eses eseses eseseses eses University of Kaiserslautern // // eses eses // // // // ************************************************************************** // [7]nat x; [7]nat y; [14]nat p; thread RadixBMulDadda { nat t0,t1,t10,t100,t101,t102,t103,t104,t105,t106,t107,t108,t109,t11,t110,t111,t112,t113,t114,t115,t116,t117,t118,t119,t12,t120,t121,t122,t123,t124,t125,t126,t127,t128,t129,t13,t130,t131,t132,t133,t134,t135,t136,t137,t138,t139,t14,t140,t141,t142,t143,t144,t145,t146,t147,t148,t149,t15,t150,t151,t152,t153,t154,t155,t156,t157,t158,t159,t16,t160,t161,t162,t163,t164,t165,t166,t167,t168,t169,t17,t170,t171,t172,t173,t174,t175,t176,t177,t178,t179,t18,t180,t181,t182,t183,t184,t185,t186,t187,t188,t189,t19,t190,t191,t192,t193,t194,t195,t196,t197,t198,t199,t2,t20,t200,t201,t202,t203,t204,t205,t206,t207,t208,t209,t21,t210,t211,t212,t213,t214,t215,t216,t217,t218,t219,t22,t220,t221,t222,t223,t224,t225,t226,t227,t228,t229,t23,t230,t231,t232,t233,t234,t235,t236,t237,t238,t239,t24,t240,t241,t242,t243,t244,t245,t246,t247,t248,t249,t25,t250,t251,t252,t253,t254,t255,t256,t257,t258,t259,t26,t260,t261,t262,t263,t264,t265,t266,t267,t268,t269,t27,t270,t271,t272,t273,t274,t275,t276,t277,t278,t279,t28,t280,t281,t282,t283,t284,t285,t286,t287,t288,t289,t29,t290,t291,t292,t293,t294,t295,t296,t297,t298,t299,t3,t30,t300,t301,t302,t303,t304,t305,t306,t307,t308,t309,t31,t310,t311,t312,t313,t314,t315,t316,t317,t318,t319,t32,t320,t321,t322,t323,t324,t325,t326,t327,t328,t329,t33,t330,t331,t332,t333,t334,t335,t336,t337,t338,t339,t34,t340,t341,t342,t343,t344,t345,t346,t347,t348,t349,t35,t350,t351,t352,t353,t354,t355,t356,t357,t358,t359,t36,t360,t361,t362,t363,t364,t365,t366,t367,t368,t369,t37,t370,t371,t372,t373,t374,t375,t376,t377,t378,t379,t38,t380,t381,t382,t383,t384,t385,t386,t387,t388,t389,t39,t390,t391,t392,t393,t394,t395,t396,t4,t40,t41,t42,t43,t44,t45,t46,t47,t48,t49,t5,t50,t51,t52,t53,t54,t55,t56,t57,t58,t59,t6,t60,t61,t62,t63,t64,t65,t66,t67,t68,t69,t7,t70,t71,t72,t73,t74,t75,t76,t77,t78,t79,t8,t80,t81,t82,t83,t84,t85,t86,t87,t88,t89,t9,t90,t91,t92,t93,t94,t95,t96,t97,t98,t99; bool t397,t398,t399,t400,t401,t402,t403,t404,t405,t406,t407,t408,t409,t410,t411,t412,t413,t414,t415,t416,t417,t418,t419,t420,t421,t422; // compute partial products t0 = x[0] * y[0]; t1 = t0 / 256; t2 = t0 % 256; t3 = x[0] * y[1]; t4 = t3 / 256; t5 = t3 % 256; t6 = x[1] * y[0]; t7 = t6 / 256; t8 = t6 % 256; t9 = x[0] * y[2]; t10 = t9 / 256; t11 = t9 % 256; t12 = x[1] * y[1]; t13 = t12 / 256; t14 = t12 % 256; t15 = x[2] * y[0]; t16 = t15 / 256; t17 = t15 % 256; t18 = x[0] * y[3]; t19 = t18 / 256; t20 = t18 % 256; t21 = x[1] * y[2]; t22 = t21 / 256; t23 = t21 % 256; t24 = x[2] * y[1]; t25 = t24 / 256; t26 = t24 % 256; t27 = x[3] * y[0]; t28 = t27 / 256; t29 = t27 % 256; t30 = x[0] * y[4]; t31 = t30 / 256; t32 = t30 % 256; t33 = x[1] * y[3]; t34 = t33 / 256; t35 = t33 % 256; t36 = x[2] * y[2]; t37 = t36 / 256; t38 = t36 % 256; t39 = x[3] * y[1]; t40 = t39 / 256; t41 = t39 % 256; t42 = x[4] * y[0]; t43 = t42 / 256; t44 = t42 % 256; t45 = x[0] * y[5]; t46 = t45 / 256; t47 = t45 % 256; t48 = x[1] * y[4]; t49 = t48 / 256; t50 = t48 % 256; t51 = x[2] * y[3]; t52 = t51 / 256; t53 = t51 % 256; t54 = x[3] * y[2]; t55 = t54 / 256; t56 = t54 % 256; t57 = x[4] * y[1]; t58 = t57 / 256; t59 = t57 % 256; t60 = x[5] * y[0]; t61 = t60 / 256; t62 = t60 % 256; t63 = x[0] * y[6]; t64 = t63 / 256; t65 = t63 % 256; t66 = x[1] * y[5]; t67 = t66 / 256; t68 = t66 % 256; t69 = x[2] * y[4]; t70 = t69 / 256; t71 = t69 % 256; t72 = x[3] * y[3]; t73 = t72 / 256; t74 = t72 % 256; t75 = x[4] * y[2]; t76 = t75 / 256; t77 = t75 % 256; t78 = x[5] * y[1]; t79 = t78 / 256; t80 = t78 % 256; t81 = x[6] * y[0]; t82 = t81 / 256; t83 = t81 % 256; t84 = x[1] * y[6]; t85 = t84 / 256; t86 = t84 % 256; t87 = x[2] * y[5]; t88 = t87 / 256; t89 = t87 % 256; t90 = x[3] * y[4]; t91 = t90 / 256; t92 = t90 % 256; t93 = x[4] * y[3]; t94 = t93 / 256; t95 = t93 % 256; t96 = x[5] * y[2]; t97 = t96 / 256; t98 = t96 % 256; t99 = x[6] * y[1]; t100 = t99 / 256; t101 = t99 % 256; t102 = x[2] * y[6]; t103 = t102 / 256; t104 = t102 % 256; t105 = x[3] * y[5]; t106 = t105 / 256; t107 = t105 % 256; t108 = x[4] * y[4]; t109 = t108 / 256; t110 = t108 % 256; t111 = x[5] * y[3]; t112 = t111 / 256; t113 = t111 % 256; t114 = x[6] * y[2]; t115 = t114 / 256; t116 = t114 % 256; t117 = x[3] * y[6]; t118 = t117 / 256; t119 = t117 % 256; t120 = x[4] * y[5]; t121 = t120 / 256; t122 = t120 % 256; t123 = x[5] * y[4]; t124 = t123 / 256; t125 = t123 % 256; t126 = x[6] * y[3]; t127 = t126 / 256; t128 = t126 % 256; t129 = x[4] * y[6]; t130 = t129 / 256; t131 = t129 % 256; t132 = x[5] * y[5]; t133 = t132 / 256; t134 = t132 % 256; t135 = x[6] * y[4]; t136 = t135 / 256; t137 = t135 % 256; t138 = x[5] * y[6]; t139 = t138 / 256; t140 = t138 % 256; t141 = x[6] * y[5]; t142 = t141 / 256; t143 = t141 % 256; t144 = x[6] * y[6]; t145 = t144 / 256; t146 = t144 % 256; // reduce heights of each column to 9 t147 = t62 + t59 + t56; t148 = t147 / 256; t149 = t147 % 256; t150 = t65 + t61; t151 = t150 / 256; t152 = t150 % 256; t153 = t74 + t71 + t68; t154 = t153 / 256; t155 = t153 % 256; t156 = t83 + t80 + t77; t157 = t156 / 256; t158 = t156 % 256; t159 = t73 + t70; t160 = t159 / 256; t161 = t159 % 256; t162 = t82 + t79 + t76; t163 = t162 / 256; t164 = t162 % 256; t165 = t92 + t89 + t86; t166 = t165 / 256; t167 = t165 % 256; t168 = t101 + t98 + t95; t169 = t168 / 256; t170 = t168 % 256; t171 = t97 + t94 + t91; t172 = t171 / 256; t173 = t171 % 256; t174 = t107 + t104 + t100; t175 = t174 / 256; t176 = t174 % 256; t177 = t116 + t113 + t110; t178 = t177 / 256; t179 = t177 % 256; t180 = t119 + t115; t181 = t180 / 256; t182 = t180 % 256; t183 = t128 + t125 + t122; t184 = t183 / 256; t185 = t183 % 256; // reduce heights of each column to 6 t186 = t29 + t26; t187 = t186 / 256; t188 = t186 % 256; t189 = t35 + t32 + t28; t190 = t189 / 256; t191 = t189 % 256; t192 = t44 + t41 + t38; t193 = t192 / 256; t194 = t192 % 256; t195 = t34 + t31; t196 = t195 / 256; t197 = t195 % 256; t198 = t43 + t40 + t37; t199 = t198 / 256; t200 = t198 % 256; t201 = t53 + t50 + t47; t202 = t201 / 256; t203 = t201 % 256; t204 = t152 + t155 + t158; t205 = t204 / 256; t206 = t204 % 256; t207 = t49 + t46 + t148; t208 = t207 / 256; t209 = t207 % 256; t210 = t58 + t55 + t52; t211 = t210 / 256; t212 = t210 % 256; t213 = t164 + t167 + t170; t214 = t213 / 256; t215 = t213 % 256; t216 = t154 + t157 + t161; t217 = t216 / 256; t218 = t216 % 256; t219 = t67 + t64 + t151; t220 = t219 / 256; t221 = t219 % 256; t222 = t173 + t176 + t179; t223 = t222 / 256; t224 = t222 % 256; t225 = t163 + t166 + t169; t226 = t225 / 256; t227 = t225 % 256; t228 = t88 + t85 + t160; t229 = t228 / 256; t230 = t228 % 256; t231 = t178 + t182 + t185; t232 = t231 / 256; t233 = t231 % 256; t234 = t103 + t172 + t175; t235 = t234 / 256; t236 = t234 % 256; t237 = t112 + t109 + t106; t238 = t237 / 256; t239 = t237 % 256; t240 = t118 + t181 + t184; t241 = t240 / 256; t242 = t240 % 256; t243 = t127 + t124 + t121; t244 = t243 / 256; t245 = t243 % 256; t246 = t137 + t134 + t131; t247 = t246 / 256; t248 = t246 % 256; t249 = t143 + t140 + t136; t250 = t249 / 256; t251 = t249 % 256; // reduce heights of each column to 4 t252 = t17 + t14; t253 = t252 / 256; t254 = t252 % 256; t255 = t13 + t10; t256 = t255 / 256; t257 = t255 % 256; t258 = t23 + t20 + t16; t259 = t258 / 256; t260 = t258 % 256; t261 = t187 + t191 + t194; t262 = t261 / 256; t263 = t261 % 256; t264 = t25 + t22 + t19; t265 = t264 / 256; t266 = t264 % 256; t267 = t197 + t200 + t203; t268 = t267 / 256; t269 = t267 % 256; t270 = t149 + t190 + t193; t271 = t270 / 256; t272 = t270 % 256; t273 = t206 + t209 + t212; t274 = t273 / 256; t275 = t273 % 256; t276 = t196 + t199 + t202; t277 = t276 / 256; t278 = t276 % 256; t279 = t215 + t218 + t221; t280 = t279 / 256; t281 = t279 % 256; t282 = t205 + t208 + t211; t283 = t282 / 256; t284 = t282 % 256; t285 = t224 + t227 + t230; t286 = t285 / 256; t287 = t285 % 256; t288 = t214 + t217 + t220; t289 = t288 / 256; t290 = t288 % 256; t291 = t233 + t236 + t239; t292 = t291 / 256; t293 = t291 % 256; t294 = t223 + t226 + t229; t295 = t294 / 256; t296 = t294 % 256; t297 = t242 + t245 + t248; t298 = t297 / 256; t299 = t297 % 256; t300 = t232 + t235 + t238; t301 = t300 / 256; t302 = t300 % 256; t303 = t244 + t247 + t251; t304 = t303 / 256; t305 = t303 % 256; t306 = t133 + t130 + t241; t307 = t306 / 256; t308 = t306 % 256; t309 = t146 + t142 + t139; t310 = t309 / 256; t311 = t309 % 256; // reduce heights of each column to 3 t312 = t11 + t7; t313 = t312 / 256; t314 = t312 % 256; t315 = t188 + t253 + t257; t316 = t315 / 256; t317 = t315 % 256; t318 = t256 + t259 + t263; t319 = t318 / 256; t320 = t318 % 256; t321 = t262 + t265 + t269; t322 = t321 / 256; t323 = t321 % 256; t324 = t268 + t271 + t275; t325 = t324 / 256; t326 = t324 % 256; t327 = t274 + t277 + t281; t328 = t327 / 256; t329 = t327 % 256; t330 = t280 + t283 + t287; t331 = t330 / 256; t332 = t330 % 256; t333 = t286 + t289 + t293; t334 = t333 / 256; t335 = t333 % 256; t336 = t292 + t295 + t299; t337 = t336 / 256; t338 = t336 % 256; t339 = t298 + t301 + t305; t340 = t339 / 256; t341 = t339 % 256; t342 = t250 + t304 + t307; t343 = t342 / 256; t344 = t342 % 256; // reduce heights of each column to 2 t345 = t8 + t5; t346 = t345 / 256; t347 = t345 % 256; t348 = t4 + t254 + t314; t349 = t348 / 256; t350 = t348 % 256; t351 = t260 + t313 + t317; t352 = t351 / 256; t353 = t351 % 256; t354 = t266 + t316 + t320; t355 = t354 / 256; t356 = t354 % 256; t357 = t272 + t319 + t323; t358 = t357 / 256; t359 = t357 % 256; t360 = t278 + t322 + t326; t361 = t360 / 256; t362 = t360 % 256; t363 = t284 + t325 + t329; t364 = t363 / 256; t365 = t363 % 256; t366 = t290 + t328 + t332; t367 = t366 / 256; t368 = t366 % 256; t369 = t296 + t331 + t335; t370 = t369 / 256; t371 = t369 % 256; t372 = t302 + t334 + t338; t373 = t372 / 256; t374 = t372 % 256; t375 = t308 + t337 + t341; t376 = t375 / 256; t377 = t375 % 256; t378 = t311 + t340 + t344; t379 = t378 / 256; t380 = t378 % 256; t381 = t145 + t310 + t343; t382 = t381 / 256; t383 = t381 % 256; // preliminary addition of the two remaining numbers t384 = t1 + t347; t385 = t346 + t350; t386 = t349 + t353; t387 = t352 + t356; t388 = t355 + t359; t389 = t358 + t362; t390 = t361 + t365; t391 = t364 + t368; t392 = t367 + t371; t393 = t370 + t374; t394 = t373 + t377; t395 = t376 + t380; t396 = t379 + t383; // compute generate and propagate pairs t397 = t384 > 255; t398 = t384 == 255; t399 = t385 > 255; t400 = t385 == 255; t401 = t386 > 255; t402 = t386 == 255; t403 = t387 > 255; t404 = t387 == 255; t405 = t388 > 255; t406 = t388 == 255; t407 = t389 > 255; t408 = t389 == 255; t409 = t390 > 255; t410 = t390 == 255; t411 = t391 > 255; t412 = t391 == 255; t413 = t392 > 255; t414 = t392 == 255; t415 = t393 > 255; t416 = t393 == 255; t417 = t394 > 255; t418 = t394 == 255; t419 = t395 > 255; t420 = t395 == 255; t421 = t396 > 255; t422 = t396 == 255; // parallel prefix tree for computing carry bits // up-level 1 t399 = t400 & t397 | t399; t400 = t400 & t398; t403 = t404 & t401 | t403; t404 = t404 & t402; t407 = t408 & t405 | t407; t408 = t408 & t406; t411 = t412 & t409 | t411; t412 = t412 & t410; t415 = t416 & t413 | t415; t416 = t416 & t414; t419 = t420 & t417 | t419; t420 = t420 & t418; // up-level 2 t403 = t404 & t399 | t403; t404 = t404 & t400; t411 = t412 & t407 | t411; t412 = t412 & t408; t419 = t420 & t415 | t419; t420 = t420 & t416; // up-level 3 t411 = t412 & t403 | t411; t412 = t412 & t404; // down-level 5 t419 = t420 & t411 | t419; t420 = t420 & t412; // down-level 6 t407 = t408 & t403 | t407; t408 = t408 & t404; t415 = t416 & t411 | t415; t416 = t416 & t412; // down-level 7 t421 = t422 & t419 | t421; t422 = t422 & t420; t401 = t402 & t399 | t401; t402 = t402 & t400; t421 = t422 & t419 | t421; t422 = t422 & t420; t405 = t406 & t403 | t405; t406 = t406 & t404; t421 = t422 & t419 | t421; t422 = t422 & t420; t409 = t410 & t407 | t409; t410 = t410 & t408; t421 = t422 & t419 | t421; t422 = t422 & t420; t413 = t414 & t411 | t413; t414 = t414 & t412; t421 = t422 & t419 | t421; t422 = t422 & t420; t417 = t418 & t415 | t417; t418 = t418 & t416; t421 = t422 & t419 | t421; t422 = t422 & t420; // compute final sum digits as the digits of the product t396 = t396 + (nat) (t419&(bool)1); t395 = t395 + (nat) (t417&(bool)1); t394 = t394 + (nat) (t415&(bool)1); t393 = t393 + (nat) (t413&(bool)1); t392 = t392 + (nat) (t411&(bool)1); t391 = t391 + (nat) (t409&(bool)1); t390 = t390 + (nat) (t407&(bool)1); t389 = t389 + (nat) (t405&(bool)1); t388 = t388 + (nat) (t403&(bool)1); t387 = t387 + (nat) (t401&(bool)1); t386 = t386 + (nat) (t399&(bool)1); t385 = t385 + (nat) (t397&(bool)1); // get the product digits p[0] = t2; p[1] = t384 % 256; p[2] = t385 % 256; p[3] = t386 % 256; p[4] = t387 % 256; p[5] = t388 % 256; p[6] = t389 % 256; p[7] = t390 % 256; p[8] = t391 % 256; p[9] = t392 % 256; p[10] = t393 % 256; p[11] = t394 % 256; p[12] = t395 % 256; p[13] = t396 % 256; }