co-586.asm 20 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258
  1. ; This file is generated from a similarly-named Perl script in the BoringSSL
  2. ; source tree. Do not edit by hand.
  3. %ifdef BORINGSSL_PREFIX
  4. %include "boringssl_prefix_symbols_nasm.inc"
  5. %endif
  6. %ifidn __OUTPUT_FORMAT__,obj
  7. section code use32 class=code align=64
  8. %elifidn __OUTPUT_FORMAT__,win32
  9. $@feat.00 equ 1
  10. section .text code align=64
  11. %else
  12. section .text code
  13. %endif
  14. global _bn_mul_comba8
  15. align 16
  16. _bn_mul_comba8:
  17. L$_bn_mul_comba8_begin:
  18. push esi
  19. mov esi,DWORD [12+esp]
  20. push edi
  21. mov edi,DWORD [20+esp]
  22. push ebp
  23. push ebx
  24. xor ebx,ebx
  25. mov eax,DWORD [esi]
  26. xor ecx,ecx
  27. mov edx,DWORD [edi]
  28. ; ################## Calculate word 0
  29. xor ebp,ebp
  30. ; mul a[0]*b[0]
  31. mul edx
  32. add ebx,eax
  33. mov eax,DWORD [20+esp]
  34. adc ecx,edx
  35. mov edx,DWORD [edi]
  36. adc ebp,0
  37. mov DWORD [eax],ebx
  38. mov eax,DWORD [4+esi]
  39. ; saved r[0]
  40. ; ################## Calculate word 1
  41. xor ebx,ebx
  42. ; mul a[1]*b[0]
  43. mul edx
  44. add ecx,eax
  45. mov eax,DWORD [esi]
  46. adc ebp,edx
  47. mov edx,DWORD [4+edi]
  48. adc ebx,0
  49. ; mul a[0]*b[1]
  50. mul edx
  51. add ecx,eax
  52. mov eax,DWORD [20+esp]
  53. adc ebp,edx
  54. mov edx,DWORD [edi]
  55. adc ebx,0
  56. mov DWORD [4+eax],ecx
  57. mov eax,DWORD [8+esi]
  58. ; saved r[1]
  59. ; ################## Calculate word 2
  60. xor ecx,ecx
  61. ; mul a[2]*b[0]
  62. mul edx
  63. add ebp,eax
  64. mov eax,DWORD [4+esi]
  65. adc ebx,edx
  66. mov edx,DWORD [4+edi]
  67. adc ecx,0
  68. ; mul a[1]*b[1]
  69. mul edx
  70. add ebp,eax
  71. mov eax,DWORD [esi]
  72. adc ebx,edx
  73. mov edx,DWORD [8+edi]
  74. adc ecx,0
  75. ; mul a[0]*b[2]
  76. mul edx
  77. add ebp,eax
  78. mov eax,DWORD [20+esp]
  79. adc ebx,edx
  80. mov edx,DWORD [edi]
  81. adc ecx,0
  82. mov DWORD [8+eax],ebp
  83. mov eax,DWORD [12+esi]
  84. ; saved r[2]
  85. ; ################## Calculate word 3
  86. xor ebp,ebp
  87. ; mul a[3]*b[0]
  88. mul edx
  89. add ebx,eax
  90. mov eax,DWORD [8+esi]
  91. adc ecx,edx
  92. mov edx,DWORD [4+edi]
  93. adc ebp,0
  94. ; mul a[2]*b[1]
  95. mul edx
  96. add ebx,eax
  97. mov eax,DWORD [4+esi]
  98. adc ecx,edx
  99. mov edx,DWORD [8+edi]
  100. adc ebp,0
  101. ; mul a[1]*b[2]
  102. mul edx
  103. add ebx,eax
  104. mov eax,DWORD [esi]
  105. adc ecx,edx
  106. mov edx,DWORD [12+edi]
  107. adc ebp,0
  108. ; mul a[0]*b[3]
  109. mul edx
  110. add ebx,eax
  111. mov eax,DWORD [20+esp]
  112. adc ecx,edx
  113. mov edx,DWORD [edi]
  114. adc ebp,0
  115. mov DWORD [12+eax],ebx
  116. mov eax,DWORD [16+esi]
  117. ; saved r[3]
  118. ; ################## Calculate word 4
  119. xor ebx,ebx
  120. ; mul a[4]*b[0]
  121. mul edx
  122. add ecx,eax
  123. mov eax,DWORD [12+esi]
  124. adc ebp,edx
  125. mov edx,DWORD [4+edi]
  126. adc ebx,0
  127. ; mul a[3]*b[1]
  128. mul edx
  129. add ecx,eax
  130. mov eax,DWORD [8+esi]
  131. adc ebp,edx
  132. mov edx,DWORD [8+edi]
  133. adc ebx,0
  134. ; mul a[2]*b[2]
  135. mul edx
  136. add ecx,eax
  137. mov eax,DWORD [4+esi]
  138. adc ebp,edx
  139. mov edx,DWORD [12+edi]
  140. adc ebx,0
  141. ; mul a[1]*b[3]
  142. mul edx
  143. add ecx,eax
  144. mov eax,DWORD [esi]
  145. adc ebp,edx
  146. mov edx,DWORD [16+edi]
  147. adc ebx,0
  148. ; mul a[0]*b[4]
  149. mul edx
  150. add ecx,eax
  151. mov eax,DWORD [20+esp]
  152. adc ebp,edx
  153. mov edx,DWORD [edi]
  154. adc ebx,0
  155. mov DWORD [16+eax],ecx
  156. mov eax,DWORD [20+esi]
  157. ; saved r[4]
  158. ; ################## Calculate word 5
  159. xor ecx,ecx
  160. ; mul a[5]*b[0]
  161. mul edx
  162. add ebp,eax
  163. mov eax,DWORD [16+esi]
  164. adc ebx,edx
  165. mov edx,DWORD [4+edi]
  166. adc ecx,0
  167. ; mul a[4]*b[1]
  168. mul edx
  169. add ebp,eax
  170. mov eax,DWORD [12+esi]
  171. adc ebx,edx
  172. mov edx,DWORD [8+edi]
  173. adc ecx,0
  174. ; mul a[3]*b[2]
  175. mul edx
  176. add ebp,eax
  177. mov eax,DWORD [8+esi]
  178. adc ebx,edx
  179. mov edx,DWORD [12+edi]
  180. adc ecx,0
  181. ; mul a[2]*b[3]
  182. mul edx
  183. add ebp,eax
  184. mov eax,DWORD [4+esi]
  185. adc ebx,edx
  186. mov edx,DWORD [16+edi]
  187. adc ecx,0
  188. ; mul a[1]*b[4]
  189. mul edx
  190. add ebp,eax
  191. mov eax,DWORD [esi]
  192. adc ebx,edx
  193. mov edx,DWORD [20+edi]
  194. adc ecx,0
  195. ; mul a[0]*b[5]
  196. mul edx
  197. add ebp,eax
  198. mov eax,DWORD [20+esp]
  199. adc ebx,edx
  200. mov edx,DWORD [edi]
  201. adc ecx,0
  202. mov DWORD [20+eax],ebp
  203. mov eax,DWORD [24+esi]
  204. ; saved r[5]
  205. ; ################## Calculate word 6
  206. xor ebp,ebp
  207. ; mul a[6]*b[0]
  208. mul edx
  209. add ebx,eax
  210. mov eax,DWORD [20+esi]
  211. adc ecx,edx
  212. mov edx,DWORD [4+edi]
  213. adc ebp,0
  214. ; mul a[5]*b[1]
  215. mul edx
  216. add ebx,eax
  217. mov eax,DWORD [16+esi]
  218. adc ecx,edx
  219. mov edx,DWORD [8+edi]
  220. adc ebp,0
  221. ; mul a[4]*b[2]
  222. mul edx
  223. add ebx,eax
  224. mov eax,DWORD [12+esi]
  225. adc ecx,edx
  226. mov edx,DWORD [12+edi]
  227. adc ebp,0
  228. ; mul a[3]*b[3]
  229. mul edx
  230. add ebx,eax
  231. mov eax,DWORD [8+esi]
  232. adc ecx,edx
  233. mov edx,DWORD [16+edi]
  234. adc ebp,0
  235. ; mul a[2]*b[4]
  236. mul edx
  237. add ebx,eax
  238. mov eax,DWORD [4+esi]
  239. adc ecx,edx
  240. mov edx,DWORD [20+edi]
  241. adc ebp,0
  242. ; mul a[1]*b[5]
  243. mul edx
  244. add ebx,eax
  245. mov eax,DWORD [esi]
  246. adc ecx,edx
  247. mov edx,DWORD [24+edi]
  248. adc ebp,0
  249. ; mul a[0]*b[6]
  250. mul edx
  251. add ebx,eax
  252. mov eax,DWORD [20+esp]
  253. adc ecx,edx
  254. mov edx,DWORD [edi]
  255. adc ebp,0
  256. mov DWORD [24+eax],ebx
  257. mov eax,DWORD [28+esi]
  258. ; saved r[6]
  259. ; ################## Calculate word 7
  260. xor ebx,ebx
  261. ; mul a[7]*b[0]
  262. mul edx
  263. add ecx,eax
  264. mov eax,DWORD [24+esi]
  265. adc ebp,edx
  266. mov edx,DWORD [4+edi]
  267. adc ebx,0
  268. ; mul a[6]*b[1]
  269. mul edx
  270. add ecx,eax
  271. mov eax,DWORD [20+esi]
  272. adc ebp,edx
  273. mov edx,DWORD [8+edi]
  274. adc ebx,0
  275. ; mul a[5]*b[2]
  276. mul edx
  277. add ecx,eax
  278. mov eax,DWORD [16+esi]
  279. adc ebp,edx
  280. mov edx,DWORD [12+edi]
  281. adc ebx,0
  282. ; mul a[4]*b[3]
  283. mul edx
  284. add ecx,eax
  285. mov eax,DWORD [12+esi]
  286. adc ebp,edx
  287. mov edx,DWORD [16+edi]
  288. adc ebx,0
  289. ; mul a[3]*b[4]
  290. mul edx
  291. add ecx,eax
  292. mov eax,DWORD [8+esi]
  293. adc ebp,edx
  294. mov edx,DWORD [20+edi]
  295. adc ebx,0
  296. ; mul a[2]*b[5]
  297. mul edx
  298. add ecx,eax
  299. mov eax,DWORD [4+esi]
  300. adc ebp,edx
  301. mov edx,DWORD [24+edi]
  302. adc ebx,0
  303. ; mul a[1]*b[6]
  304. mul edx
  305. add ecx,eax
  306. mov eax,DWORD [esi]
  307. adc ebp,edx
  308. mov edx,DWORD [28+edi]
  309. adc ebx,0
  310. ; mul a[0]*b[7]
  311. mul edx
  312. add ecx,eax
  313. mov eax,DWORD [20+esp]
  314. adc ebp,edx
  315. mov edx,DWORD [4+edi]
  316. adc ebx,0
  317. mov DWORD [28+eax],ecx
  318. mov eax,DWORD [28+esi]
  319. ; saved r[7]
  320. ; ################## Calculate word 8
  321. xor ecx,ecx
  322. ; mul a[7]*b[1]
  323. mul edx
  324. add ebp,eax
  325. mov eax,DWORD [24+esi]
  326. adc ebx,edx
  327. mov edx,DWORD [8+edi]
  328. adc ecx,0
  329. ; mul a[6]*b[2]
  330. mul edx
  331. add ebp,eax
  332. mov eax,DWORD [20+esi]
  333. adc ebx,edx
  334. mov edx,DWORD [12+edi]
  335. adc ecx,0
  336. ; mul a[5]*b[3]
  337. mul edx
  338. add ebp,eax
  339. mov eax,DWORD [16+esi]
  340. adc ebx,edx
  341. mov edx,DWORD [16+edi]
  342. adc ecx,0
  343. ; mul a[4]*b[4]
  344. mul edx
  345. add ebp,eax
  346. mov eax,DWORD [12+esi]
  347. adc ebx,edx
  348. mov edx,DWORD [20+edi]
  349. adc ecx,0
  350. ; mul a[3]*b[5]
  351. mul edx
  352. add ebp,eax
  353. mov eax,DWORD [8+esi]
  354. adc ebx,edx
  355. mov edx,DWORD [24+edi]
  356. adc ecx,0
  357. ; mul a[2]*b[6]
  358. mul edx
  359. add ebp,eax
  360. mov eax,DWORD [4+esi]
  361. adc ebx,edx
  362. mov edx,DWORD [28+edi]
  363. adc ecx,0
  364. ; mul a[1]*b[7]
  365. mul edx
  366. add ebp,eax
  367. mov eax,DWORD [20+esp]
  368. adc ebx,edx
  369. mov edx,DWORD [8+edi]
  370. adc ecx,0
  371. mov DWORD [32+eax],ebp
  372. mov eax,DWORD [28+esi]
  373. ; saved r[8]
  374. ; ################## Calculate word 9
  375. xor ebp,ebp
  376. ; mul a[7]*b[2]
  377. mul edx
  378. add ebx,eax
  379. mov eax,DWORD [24+esi]
  380. adc ecx,edx
  381. mov edx,DWORD [12+edi]
  382. adc ebp,0
  383. ; mul a[6]*b[3]
  384. mul edx
  385. add ebx,eax
  386. mov eax,DWORD [20+esi]
  387. adc ecx,edx
  388. mov edx,DWORD [16+edi]
  389. adc ebp,0
  390. ; mul a[5]*b[4]
  391. mul edx
  392. add ebx,eax
  393. mov eax,DWORD [16+esi]
  394. adc ecx,edx
  395. mov edx,DWORD [20+edi]
  396. adc ebp,0
  397. ; mul a[4]*b[5]
  398. mul edx
  399. add ebx,eax
  400. mov eax,DWORD [12+esi]
  401. adc ecx,edx
  402. mov edx,DWORD [24+edi]
  403. adc ebp,0
  404. ; mul a[3]*b[6]
  405. mul edx
  406. add ebx,eax
  407. mov eax,DWORD [8+esi]
  408. adc ecx,edx
  409. mov edx,DWORD [28+edi]
  410. adc ebp,0
  411. ; mul a[2]*b[7]
  412. mul edx
  413. add ebx,eax
  414. mov eax,DWORD [20+esp]
  415. adc ecx,edx
  416. mov edx,DWORD [12+edi]
  417. adc ebp,0
  418. mov DWORD [36+eax],ebx
  419. mov eax,DWORD [28+esi]
  420. ; saved r[9]
  421. ; ################## Calculate word 10
  422. xor ebx,ebx
  423. ; mul a[7]*b[3]
  424. mul edx
  425. add ecx,eax
  426. mov eax,DWORD [24+esi]
  427. adc ebp,edx
  428. mov edx,DWORD [16+edi]
  429. adc ebx,0
  430. ; mul a[6]*b[4]
  431. mul edx
  432. add ecx,eax
  433. mov eax,DWORD [20+esi]
  434. adc ebp,edx
  435. mov edx,DWORD [20+edi]
  436. adc ebx,0
  437. ; mul a[5]*b[5]
  438. mul edx
  439. add ecx,eax
  440. mov eax,DWORD [16+esi]
  441. adc ebp,edx
  442. mov edx,DWORD [24+edi]
  443. adc ebx,0
  444. ; mul a[4]*b[6]
  445. mul edx
  446. add ecx,eax
  447. mov eax,DWORD [12+esi]
  448. adc ebp,edx
  449. mov edx,DWORD [28+edi]
  450. adc ebx,0
  451. ; mul a[3]*b[7]
  452. mul edx
  453. add ecx,eax
  454. mov eax,DWORD [20+esp]
  455. adc ebp,edx
  456. mov edx,DWORD [16+edi]
  457. adc ebx,0
  458. mov DWORD [40+eax],ecx
  459. mov eax,DWORD [28+esi]
  460. ; saved r[10]
  461. ; ################## Calculate word 11
  462. xor ecx,ecx
  463. ; mul a[7]*b[4]
  464. mul edx
  465. add ebp,eax
  466. mov eax,DWORD [24+esi]
  467. adc ebx,edx
  468. mov edx,DWORD [20+edi]
  469. adc ecx,0
  470. ; mul a[6]*b[5]
  471. mul edx
  472. add ebp,eax
  473. mov eax,DWORD [20+esi]
  474. adc ebx,edx
  475. mov edx,DWORD [24+edi]
  476. adc ecx,0
  477. ; mul a[5]*b[6]
  478. mul edx
  479. add ebp,eax
  480. mov eax,DWORD [16+esi]
  481. adc ebx,edx
  482. mov edx,DWORD [28+edi]
  483. adc ecx,0
  484. ; mul a[4]*b[7]
  485. mul edx
  486. add ebp,eax
  487. mov eax,DWORD [20+esp]
  488. adc ebx,edx
  489. mov edx,DWORD [20+edi]
  490. adc ecx,0
  491. mov DWORD [44+eax],ebp
  492. mov eax,DWORD [28+esi]
  493. ; saved r[11]
  494. ; ################## Calculate word 12
  495. xor ebp,ebp
  496. ; mul a[7]*b[5]
  497. mul edx
  498. add ebx,eax
  499. mov eax,DWORD [24+esi]
  500. adc ecx,edx
  501. mov edx,DWORD [24+edi]
  502. adc ebp,0
  503. ; mul a[6]*b[6]
  504. mul edx
  505. add ebx,eax
  506. mov eax,DWORD [20+esi]
  507. adc ecx,edx
  508. mov edx,DWORD [28+edi]
  509. adc ebp,0
  510. ; mul a[5]*b[7]
  511. mul edx
  512. add ebx,eax
  513. mov eax,DWORD [20+esp]
  514. adc ecx,edx
  515. mov edx,DWORD [24+edi]
  516. adc ebp,0
  517. mov DWORD [48+eax],ebx
  518. mov eax,DWORD [28+esi]
  519. ; saved r[12]
  520. ; ################## Calculate word 13
  521. xor ebx,ebx
  522. ; mul a[7]*b[6]
  523. mul edx
  524. add ecx,eax
  525. mov eax,DWORD [24+esi]
  526. adc ebp,edx
  527. mov edx,DWORD [28+edi]
  528. adc ebx,0
  529. ; mul a[6]*b[7]
  530. mul edx
  531. add ecx,eax
  532. mov eax,DWORD [20+esp]
  533. adc ebp,edx
  534. mov edx,DWORD [28+edi]
  535. adc ebx,0
  536. mov DWORD [52+eax],ecx
  537. mov eax,DWORD [28+esi]
  538. ; saved r[13]
  539. ; ################## Calculate word 14
  540. xor ecx,ecx
  541. ; mul a[7]*b[7]
  542. mul edx
  543. add ebp,eax
  544. mov eax,DWORD [20+esp]
  545. adc ebx,edx
  546. adc ecx,0
  547. mov DWORD [56+eax],ebp
  548. ; saved r[14]
  549. ; save r[15]
  550. mov DWORD [60+eax],ebx
  551. pop ebx
  552. pop ebp
  553. pop edi
  554. pop esi
  555. ret
  556. global _bn_mul_comba4
  557. align 16
  558. _bn_mul_comba4:
  559. L$_bn_mul_comba4_begin:
  560. push esi
  561. mov esi,DWORD [12+esp]
  562. push edi
  563. mov edi,DWORD [20+esp]
  564. push ebp
  565. push ebx
  566. xor ebx,ebx
  567. mov eax,DWORD [esi]
  568. xor ecx,ecx
  569. mov edx,DWORD [edi]
  570. ; ################## Calculate word 0
  571. xor ebp,ebp
  572. ; mul a[0]*b[0]
  573. mul edx
  574. add ebx,eax
  575. mov eax,DWORD [20+esp]
  576. adc ecx,edx
  577. mov edx,DWORD [edi]
  578. adc ebp,0
  579. mov DWORD [eax],ebx
  580. mov eax,DWORD [4+esi]
  581. ; saved r[0]
  582. ; ################## Calculate word 1
  583. xor ebx,ebx
  584. ; mul a[1]*b[0]
  585. mul edx
  586. add ecx,eax
  587. mov eax,DWORD [esi]
  588. adc ebp,edx
  589. mov edx,DWORD [4+edi]
  590. adc ebx,0
  591. ; mul a[0]*b[1]
  592. mul edx
  593. add ecx,eax
  594. mov eax,DWORD [20+esp]
  595. adc ebp,edx
  596. mov edx,DWORD [edi]
  597. adc ebx,0
  598. mov DWORD [4+eax],ecx
  599. mov eax,DWORD [8+esi]
  600. ; saved r[1]
  601. ; ################## Calculate word 2
  602. xor ecx,ecx
  603. ; mul a[2]*b[0]
  604. mul edx
  605. add ebp,eax
  606. mov eax,DWORD [4+esi]
  607. adc ebx,edx
  608. mov edx,DWORD [4+edi]
  609. adc ecx,0
  610. ; mul a[1]*b[1]
  611. mul edx
  612. add ebp,eax
  613. mov eax,DWORD [esi]
  614. adc ebx,edx
  615. mov edx,DWORD [8+edi]
  616. adc ecx,0
  617. ; mul a[0]*b[2]
  618. mul edx
  619. add ebp,eax
  620. mov eax,DWORD [20+esp]
  621. adc ebx,edx
  622. mov edx,DWORD [edi]
  623. adc ecx,0
  624. mov DWORD [8+eax],ebp
  625. mov eax,DWORD [12+esi]
  626. ; saved r[2]
  627. ; ################## Calculate word 3
  628. xor ebp,ebp
  629. ; mul a[3]*b[0]
  630. mul edx
  631. add ebx,eax
  632. mov eax,DWORD [8+esi]
  633. adc ecx,edx
  634. mov edx,DWORD [4+edi]
  635. adc ebp,0
  636. ; mul a[2]*b[1]
  637. mul edx
  638. add ebx,eax
  639. mov eax,DWORD [4+esi]
  640. adc ecx,edx
  641. mov edx,DWORD [8+edi]
  642. adc ebp,0
  643. ; mul a[1]*b[2]
  644. mul edx
  645. add ebx,eax
  646. mov eax,DWORD [esi]
  647. adc ecx,edx
  648. mov edx,DWORD [12+edi]
  649. adc ebp,0
  650. ; mul a[0]*b[3]
  651. mul edx
  652. add ebx,eax
  653. mov eax,DWORD [20+esp]
  654. adc ecx,edx
  655. mov edx,DWORD [4+edi]
  656. adc ebp,0
  657. mov DWORD [12+eax],ebx
  658. mov eax,DWORD [12+esi]
  659. ; saved r[3]
  660. ; ################## Calculate word 4
  661. xor ebx,ebx
  662. ; mul a[3]*b[1]
  663. mul edx
  664. add ecx,eax
  665. mov eax,DWORD [8+esi]
  666. adc ebp,edx
  667. mov edx,DWORD [8+edi]
  668. adc ebx,0
  669. ; mul a[2]*b[2]
  670. mul edx
  671. add ecx,eax
  672. mov eax,DWORD [4+esi]
  673. adc ebp,edx
  674. mov edx,DWORD [12+edi]
  675. adc ebx,0
  676. ; mul a[1]*b[3]
  677. mul edx
  678. add ecx,eax
  679. mov eax,DWORD [20+esp]
  680. adc ebp,edx
  681. mov edx,DWORD [8+edi]
  682. adc ebx,0
  683. mov DWORD [16+eax],ecx
  684. mov eax,DWORD [12+esi]
  685. ; saved r[4]
  686. ; ################## Calculate word 5
  687. xor ecx,ecx
  688. ; mul a[3]*b[2]
  689. mul edx
  690. add ebp,eax
  691. mov eax,DWORD [8+esi]
  692. adc ebx,edx
  693. mov edx,DWORD [12+edi]
  694. adc ecx,0
  695. ; mul a[2]*b[3]
  696. mul edx
  697. add ebp,eax
  698. mov eax,DWORD [20+esp]
  699. adc ebx,edx
  700. mov edx,DWORD [12+edi]
  701. adc ecx,0
  702. mov DWORD [20+eax],ebp
  703. mov eax,DWORD [12+esi]
  704. ; saved r[5]
  705. ; ################## Calculate word 6
  706. xor ebp,ebp
  707. ; mul a[3]*b[3]
  708. mul edx
  709. add ebx,eax
  710. mov eax,DWORD [20+esp]
  711. adc ecx,edx
  712. adc ebp,0
  713. mov DWORD [24+eax],ebx
  714. ; saved r[6]
  715. ; save r[7]
  716. mov DWORD [28+eax],ecx
  717. pop ebx
  718. pop ebp
  719. pop edi
  720. pop esi
  721. ret
  722. global _bn_sqr_comba8
  723. align 16
  724. _bn_sqr_comba8:
  725. L$_bn_sqr_comba8_begin:
  726. push esi
  727. push edi
  728. push ebp
  729. push ebx
  730. mov edi,DWORD [20+esp]
  731. mov esi,DWORD [24+esp]
  732. xor ebx,ebx
  733. xor ecx,ecx
  734. mov eax,DWORD [esi]
  735. ; ############### Calculate word 0
  736. xor ebp,ebp
  737. ; sqr a[0]*a[0]
  738. mul eax
  739. add ebx,eax
  740. adc ecx,edx
  741. mov edx,DWORD [esi]
  742. adc ebp,0
  743. mov DWORD [edi],ebx
  744. mov eax,DWORD [4+esi]
  745. ; saved r[0]
  746. ; ############### Calculate word 1
  747. xor ebx,ebx
  748. ; sqr a[1]*a[0]
  749. mul edx
  750. add eax,eax
  751. adc edx,edx
  752. adc ebx,0
  753. add ecx,eax
  754. adc ebp,edx
  755. mov eax,DWORD [8+esi]
  756. adc ebx,0
  757. mov DWORD [4+edi],ecx
  758. mov edx,DWORD [esi]
  759. ; saved r[1]
  760. ; ############### Calculate word 2
  761. xor ecx,ecx
  762. ; sqr a[2]*a[0]
  763. mul edx
  764. add eax,eax
  765. adc edx,edx
  766. adc ecx,0
  767. add ebp,eax
  768. adc ebx,edx
  769. mov eax,DWORD [4+esi]
  770. adc ecx,0
  771. ; sqr a[1]*a[1]
  772. mul eax
  773. add ebp,eax
  774. adc ebx,edx
  775. mov edx,DWORD [esi]
  776. adc ecx,0
  777. mov DWORD [8+edi],ebp
  778. mov eax,DWORD [12+esi]
  779. ; saved r[2]
  780. ; ############### Calculate word 3
  781. xor ebp,ebp
  782. ; sqr a[3]*a[0]
  783. mul edx
  784. add eax,eax
  785. adc edx,edx
  786. adc ebp,0
  787. add ebx,eax
  788. adc ecx,edx
  789. mov eax,DWORD [8+esi]
  790. adc ebp,0
  791. mov edx,DWORD [4+esi]
  792. ; sqr a[2]*a[1]
  793. mul edx
  794. add eax,eax
  795. adc edx,edx
  796. adc ebp,0
  797. add ebx,eax
  798. adc ecx,edx
  799. mov eax,DWORD [16+esi]
  800. adc ebp,0
  801. mov DWORD [12+edi],ebx
  802. mov edx,DWORD [esi]
  803. ; saved r[3]
  804. ; ############### Calculate word 4
  805. xor ebx,ebx
  806. ; sqr a[4]*a[0]
  807. mul edx
  808. add eax,eax
  809. adc edx,edx
  810. adc ebx,0
  811. add ecx,eax
  812. adc ebp,edx
  813. mov eax,DWORD [12+esi]
  814. adc ebx,0
  815. mov edx,DWORD [4+esi]
  816. ; sqr a[3]*a[1]
  817. mul edx
  818. add eax,eax
  819. adc edx,edx
  820. adc ebx,0
  821. add ecx,eax
  822. adc ebp,edx
  823. mov eax,DWORD [8+esi]
  824. adc ebx,0
  825. ; sqr a[2]*a[2]
  826. mul eax
  827. add ecx,eax
  828. adc ebp,edx
  829. mov edx,DWORD [esi]
  830. adc ebx,0
  831. mov DWORD [16+edi],ecx
  832. mov eax,DWORD [20+esi]
  833. ; saved r[4]
  834. ; ############### Calculate word 5
  835. xor ecx,ecx
  836. ; sqr a[5]*a[0]
  837. mul edx
  838. add eax,eax
  839. adc edx,edx
  840. adc ecx,0
  841. add ebp,eax
  842. adc ebx,edx
  843. mov eax,DWORD [16+esi]
  844. adc ecx,0
  845. mov edx,DWORD [4+esi]
  846. ; sqr a[4]*a[1]
  847. mul edx
  848. add eax,eax
  849. adc edx,edx
  850. adc ecx,0
  851. add ebp,eax
  852. adc ebx,edx
  853. mov eax,DWORD [12+esi]
  854. adc ecx,0
  855. mov edx,DWORD [8+esi]
  856. ; sqr a[3]*a[2]
  857. mul edx
  858. add eax,eax
  859. adc edx,edx
  860. adc ecx,0
  861. add ebp,eax
  862. adc ebx,edx
  863. mov eax,DWORD [24+esi]
  864. adc ecx,0
  865. mov DWORD [20+edi],ebp
  866. mov edx,DWORD [esi]
  867. ; saved r[5]
  868. ; ############### Calculate word 6
  869. xor ebp,ebp
  870. ; sqr a[6]*a[0]
  871. mul edx
  872. add eax,eax
  873. adc edx,edx
  874. adc ebp,0
  875. add ebx,eax
  876. adc ecx,edx
  877. mov eax,DWORD [20+esi]
  878. adc ebp,0
  879. mov edx,DWORD [4+esi]
  880. ; sqr a[5]*a[1]
  881. mul edx
  882. add eax,eax
  883. adc edx,edx
  884. adc ebp,0
  885. add ebx,eax
  886. adc ecx,edx
  887. mov eax,DWORD [16+esi]
  888. adc ebp,0
  889. mov edx,DWORD [8+esi]
  890. ; sqr a[4]*a[2]
  891. mul edx
  892. add eax,eax
  893. adc edx,edx
  894. adc ebp,0
  895. add ebx,eax
  896. adc ecx,edx
  897. mov eax,DWORD [12+esi]
  898. adc ebp,0
  899. ; sqr a[3]*a[3]
  900. mul eax
  901. add ebx,eax
  902. adc ecx,edx
  903. mov edx,DWORD [esi]
  904. adc ebp,0
  905. mov DWORD [24+edi],ebx
  906. mov eax,DWORD [28+esi]
  907. ; saved r[6]
  908. ; ############### Calculate word 7
  909. xor ebx,ebx
  910. ; sqr a[7]*a[0]
  911. mul edx
  912. add eax,eax
  913. adc edx,edx
  914. adc ebx,0
  915. add ecx,eax
  916. adc ebp,edx
  917. mov eax,DWORD [24+esi]
  918. adc ebx,0
  919. mov edx,DWORD [4+esi]
  920. ; sqr a[6]*a[1]
  921. mul edx
  922. add eax,eax
  923. adc edx,edx
  924. adc ebx,0
  925. add ecx,eax
  926. adc ebp,edx
  927. mov eax,DWORD [20+esi]
  928. adc ebx,0
  929. mov edx,DWORD [8+esi]
  930. ; sqr a[5]*a[2]
  931. mul edx
  932. add eax,eax
  933. adc edx,edx
  934. adc ebx,0
  935. add ecx,eax
  936. adc ebp,edx
  937. mov eax,DWORD [16+esi]
  938. adc ebx,0
  939. mov edx,DWORD [12+esi]
  940. ; sqr a[4]*a[3]
  941. mul edx
  942. add eax,eax
  943. adc edx,edx
  944. adc ebx,0
  945. add ecx,eax
  946. adc ebp,edx
  947. mov eax,DWORD [28+esi]
  948. adc ebx,0
  949. mov DWORD [28+edi],ecx
  950. mov edx,DWORD [4+esi]
  951. ; saved r[7]
  952. ; ############### Calculate word 8
  953. xor ecx,ecx
  954. ; sqr a[7]*a[1]
  955. mul edx
  956. add eax,eax
  957. adc edx,edx
  958. adc ecx,0
  959. add ebp,eax
  960. adc ebx,edx
  961. mov eax,DWORD [24+esi]
  962. adc ecx,0
  963. mov edx,DWORD [8+esi]
  964. ; sqr a[6]*a[2]
  965. mul edx
  966. add eax,eax
  967. adc edx,edx
  968. adc ecx,0
  969. add ebp,eax
  970. adc ebx,edx
  971. mov eax,DWORD [20+esi]
  972. adc ecx,0
  973. mov edx,DWORD [12+esi]
  974. ; sqr a[5]*a[3]
  975. mul edx
  976. add eax,eax
  977. adc edx,edx
  978. adc ecx,0
  979. add ebp,eax
  980. adc ebx,edx
  981. mov eax,DWORD [16+esi]
  982. adc ecx,0
  983. ; sqr a[4]*a[4]
  984. mul eax
  985. add ebp,eax
  986. adc ebx,edx
  987. mov edx,DWORD [8+esi]
  988. adc ecx,0
  989. mov DWORD [32+edi],ebp
  990. mov eax,DWORD [28+esi]
  991. ; saved r[8]
  992. ; ############### Calculate word 9
  993. xor ebp,ebp
  994. ; sqr a[7]*a[2]
  995. mul edx
  996. add eax,eax
  997. adc edx,edx
  998. adc ebp,0
  999. add ebx,eax
  1000. adc ecx,edx
  1001. mov eax,DWORD [24+esi]
  1002. adc ebp,0
  1003. mov edx,DWORD [12+esi]
  1004. ; sqr a[6]*a[3]
  1005. mul edx
  1006. add eax,eax
  1007. adc edx,edx
  1008. adc ebp,0
  1009. add ebx,eax
  1010. adc ecx,edx
  1011. mov eax,DWORD [20+esi]
  1012. adc ebp,0
  1013. mov edx,DWORD [16+esi]
  1014. ; sqr a[5]*a[4]
  1015. mul edx
  1016. add eax,eax
  1017. adc edx,edx
  1018. adc ebp,0
  1019. add ebx,eax
  1020. adc ecx,edx
  1021. mov eax,DWORD [28+esi]
  1022. adc ebp,0
  1023. mov DWORD [36+edi],ebx
  1024. mov edx,DWORD [12+esi]
  1025. ; saved r[9]
  1026. ; ############### Calculate word 10
  1027. xor ebx,ebx
  1028. ; sqr a[7]*a[3]
  1029. mul edx
  1030. add eax,eax
  1031. adc edx,edx
  1032. adc ebx,0
  1033. add ecx,eax
  1034. adc ebp,edx
  1035. mov eax,DWORD [24+esi]
  1036. adc ebx,0
  1037. mov edx,DWORD [16+esi]
  1038. ; sqr a[6]*a[4]
  1039. mul edx
  1040. add eax,eax
  1041. adc edx,edx
  1042. adc ebx,0
  1043. add ecx,eax
  1044. adc ebp,edx
  1045. mov eax,DWORD [20+esi]
  1046. adc ebx,0
  1047. ; sqr a[5]*a[5]
  1048. mul eax
  1049. add ecx,eax
  1050. adc ebp,edx
  1051. mov edx,DWORD [16+esi]
  1052. adc ebx,0
  1053. mov DWORD [40+edi],ecx
  1054. mov eax,DWORD [28+esi]
  1055. ; saved r[10]
  1056. ; ############### Calculate word 11
  1057. xor ecx,ecx
  1058. ; sqr a[7]*a[4]
  1059. mul edx
  1060. add eax,eax
  1061. adc edx,edx
  1062. adc ecx,0
  1063. add ebp,eax
  1064. adc ebx,edx
  1065. mov eax,DWORD [24+esi]
  1066. adc ecx,0
  1067. mov edx,DWORD [20+esi]
  1068. ; sqr a[6]*a[5]
  1069. mul edx
  1070. add eax,eax
  1071. adc edx,edx
  1072. adc ecx,0
  1073. add ebp,eax
  1074. adc ebx,edx
  1075. mov eax,DWORD [28+esi]
  1076. adc ecx,0
  1077. mov DWORD [44+edi],ebp
  1078. mov edx,DWORD [20+esi]
  1079. ; saved r[11]
  1080. ; ############### Calculate word 12
  1081. xor ebp,ebp
  1082. ; sqr a[7]*a[5]
  1083. mul edx
  1084. add eax,eax
  1085. adc edx,edx
  1086. adc ebp,0
  1087. add ebx,eax
  1088. adc ecx,edx
  1089. mov eax,DWORD [24+esi]
  1090. adc ebp,0
  1091. ; sqr a[6]*a[6]
  1092. mul eax
  1093. add ebx,eax
  1094. adc ecx,edx
  1095. mov edx,DWORD [24+esi]
  1096. adc ebp,0
  1097. mov DWORD [48+edi],ebx
  1098. mov eax,DWORD [28+esi]
  1099. ; saved r[12]
  1100. ; ############### Calculate word 13
  1101. xor ebx,ebx
  1102. ; sqr a[7]*a[6]
  1103. mul edx
  1104. add eax,eax
  1105. adc edx,edx
  1106. adc ebx,0
  1107. add ecx,eax
  1108. adc ebp,edx
  1109. mov eax,DWORD [28+esi]
  1110. adc ebx,0
  1111. mov DWORD [52+edi],ecx
  1112. ; saved r[13]
  1113. ; ############### Calculate word 14
  1114. xor ecx,ecx
  1115. ; sqr a[7]*a[7]
  1116. mul eax
  1117. add ebp,eax
  1118. adc ebx,edx
  1119. adc ecx,0
  1120. mov DWORD [56+edi],ebp
  1121. ; saved r[14]
  1122. mov DWORD [60+edi],ebx
  1123. pop ebx
  1124. pop ebp
  1125. pop edi
  1126. pop esi
  1127. ret
  1128. global _bn_sqr_comba4
  1129. align 16
  1130. _bn_sqr_comba4:
  1131. L$_bn_sqr_comba4_begin:
  1132. push esi
  1133. push edi
  1134. push ebp
  1135. push ebx
  1136. mov edi,DWORD [20+esp]
  1137. mov esi,DWORD [24+esp]
  1138. xor ebx,ebx
  1139. xor ecx,ecx
  1140. mov eax,DWORD [esi]
  1141. ; ############### Calculate word 0
  1142. xor ebp,ebp
  1143. ; sqr a[0]*a[0]
  1144. mul eax
  1145. add ebx,eax
  1146. adc ecx,edx
  1147. mov edx,DWORD [esi]
  1148. adc ebp,0
  1149. mov DWORD [edi],ebx
  1150. mov eax,DWORD [4+esi]
  1151. ; saved r[0]
  1152. ; ############### Calculate word 1
  1153. xor ebx,ebx
  1154. ; sqr a[1]*a[0]
  1155. mul edx
  1156. add eax,eax
  1157. adc edx,edx
  1158. adc ebx,0
  1159. add ecx,eax
  1160. adc ebp,edx
  1161. mov eax,DWORD [8+esi]
  1162. adc ebx,0
  1163. mov DWORD [4+edi],ecx
  1164. mov edx,DWORD [esi]
  1165. ; saved r[1]
  1166. ; ############### Calculate word 2
  1167. xor ecx,ecx
  1168. ; sqr a[2]*a[0]
  1169. mul edx
  1170. add eax,eax
  1171. adc edx,edx
  1172. adc ecx,0
  1173. add ebp,eax
  1174. adc ebx,edx
  1175. mov eax,DWORD [4+esi]
  1176. adc ecx,0
  1177. ; sqr a[1]*a[1]
  1178. mul eax
  1179. add ebp,eax
  1180. adc ebx,edx
  1181. mov edx,DWORD [esi]
  1182. adc ecx,0
  1183. mov DWORD [8+edi],ebp
  1184. mov eax,DWORD [12+esi]
  1185. ; saved r[2]
  1186. ; ############### Calculate word 3
  1187. xor ebp,ebp
  1188. ; sqr a[3]*a[0]
  1189. mul edx
  1190. add eax,eax
  1191. adc edx,edx
  1192. adc ebp,0
  1193. add ebx,eax
  1194. adc ecx,edx
  1195. mov eax,DWORD [8+esi]
  1196. adc ebp,0
  1197. mov edx,DWORD [4+esi]
  1198. ; sqr a[2]*a[1]
  1199. mul edx
  1200. add eax,eax
  1201. adc edx,edx
  1202. adc ebp,0
  1203. add ebx,eax
  1204. adc ecx,edx
  1205. mov eax,DWORD [12+esi]
  1206. adc ebp,0
  1207. mov DWORD [12+edi],ebx
  1208. mov edx,DWORD [4+esi]
  1209. ; saved r[3]
  1210. ; ############### Calculate word 4
  1211. xor ebx,ebx
  1212. ; sqr a[3]*a[1]
  1213. mul edx
  1214. add eax,eax
  1215. adc edx,edx
  1216. adc ebx,0
  1217. add ecx,eax
  1218. adc ebp,edx
  1219. mov eax,DWORD [8+esi]
  1220. adc ebx,0
  1221. ; sqr a[2]*a[2]
  1222. mul eax
  1223. add ecx,eax
  1224. adc ebp,edx
  1225. mov edx,DWORD [8+esi]
  1226. adc ebx,0
  1227. mov DWORD [16+edi],ecx
  1228. mov eax,DWORD [12+esi]
  1229. ; saved r[4]
  1230. ; ############### Calculate word 5
  1231. xor ecx,ecx
  1232. ; sqr a[3]*a[2]
  1233. mul edx
  1234. add eax,eax
  1235. adc edx,edx
  1236. adc ecx,0
  1237. add ebp,eax
  1238. adc ebx,edx
  1239. mov eax,DWORD [12+esi]
  1240. adc ecx,0
  1241. mov DWORD [20+edi],ebp
  1242. ; saved r[5]
  1243. ; ############### Calculate word 6
  1244. xor ebp,ebp
  1245. ; sqr a[3]*a[3]
  1246. mul eax
  1247. add ebx,eax
  1248. adc ecx,edx
  1249. adc ebp,0
  1250. mov DWORD [24+edi],ebx
  1251. ; saved r[6]
  1252. mov DWORD [28+edi],ecx
  1253. pop ebx
  1254. pop ebp
  1255. pop edi
  1256. pop esi
  1257. ret