x86-mont.asm 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. ; This file is generated from a similarly-named Perl script in the BoringSSL
  2. ; source tree. Do not edit by hand.
  3. %ifdef BORINGSSL_PREFIX
  4. %include "boringssl_prefix_symbols_nasm.inc"
  5. %endif
  6. %ifidn __OUTPUT_FORMAT__,obj
  7. section code use32 class=code align=64
  8. %elifidn __OUTPUT_FORMAT__,win32
  9. $@feat.00 equ 1
  10. section .text code align=64
  11. %else
  12. section .text code
  13. %endif
  14. ;extern _OPENSSL_ia32cap_P
  15. global _bn_mul_mont
  16. align 16
  17. _bn_mul_mont:
  18. L$_bn_mul_mont_begin:
  19. push ebp
  20. push ebx
  21. push esi
  22. push edi
  23. xor eax,eax
  24. mov edi,DWORD [40+esp]
  25. cmp edi,4
  26. jl NEAR L$000just_leave
  27. lea esi,[20+esp]
  28. lea edx,[24+esp]
  29. add edi,2
  30. neg edi
  31. lea ebp,[edi*4+esp-32]
  32. neg edi
  33. mov eax,ebp
  34. sub eax,edx
  35. and eax,2047
  36. sub ebp,eax
  37. xor edx,ebp
  38. and edx,2048
  39. xor edx,2048
  40. sub ebp,edx
  41. and ebp,-64
  42. mov eax,esp
  43. sub eax,ebp
  44. and eax,-4096
  45. mov edx,esp
  46. lea esp,[eax*1+ebp]
  47. mov eax,DWORD [esp]
  48. cmp esp,ebp
  49. ja NEAR L$001page_walk
  50. jmp NEAR L$002page_walk_done
  51. align 16
  52. L$001page_walk:
  53. lea esp,[esp-4096]
  54. mov eax,DWORD [esp]
  55. cmp esp,ebp
  56. ja NEAR L$001page_walk
  57. L$002page_walk_done:
  58. mov eax,DWORD [esi]
  59. mov ebx,DWORD [4+esi]
  60. mov ecx,DWORD [8+esi]
  61. mov ebp,DWORD [12+esi]
  62. mov esi,DWORD [16+esi]
  63. mov esi,DWORD [esi]
  64. mov DWORD [4+esp],eax
  65. mov DWORD [8+esp],ebx
  66. mov DWORD [12+esp],ecx
  67. mov DWORD [16+esp],ebp
  68. mov DWORD [20+esp],esi
  69. lea ebx,[edi-3]
  70. mov DWORD [24+esp],edx
  71. lea eax,[_OPENSSL_ia32cap_P]
  72. bt DWORD [eax],26
  73. jnc NEAR L$003non_sse2
  74. mov eax,-1
  75. movd mm7,eax
  76. mov esi,DWORD [8+esp]
  77. mov edi,DWORD [12+esp]
  78. mov ebp,DWORD [16+esp]
  79. xor edx,edx
  80. xor ecx,ecx
  81. movd mm4,DWORD [edi]
  82. movd mm5,DWORD [esi]
  83. movd mm3,DWORD [ebp]
  84. pmuludq mm5,mm4
  85. movq mm2,mm5
  86. movq mm0,mm5
  87. pand mm0,mm7
  88. pmuludq mm5,[20+esp]
  89. pmuludq mm3,mm5
  90. paddq mm3,mm0
  91. movd mm1,DWORD [4+ebp]
  92. movd mm0,DWORD [4+esi]
  93. psrlq mm2,32
  94. psrlq mm3,32
  95. inc ecx
  96. align 16
  97. L$0041st:
  98. pmuludq mm0,mm4
  99. pmuludq mm1,mm5
  100. paddq mm2,mm0
  101. paddq mm3,mm1
  102. movq mm0,mm2
  103. pand mm0,mm7
  104. movd mm1,DWORD [4+ecx*4+ebp]
  105. paddq mm3,mm0
  106. movd mm0,DWORD [4+ecx*4+esi]
  107. psrlq mm2,32
  108. movd DWORD [28+ecx*4+esp],mm3
  109. psrlq mm3,32
  110. lea ecx,[1+ecx]
  111. cmp ecx,ebx
  112. jl NEAR L$0041st
  113. pmuludq mm0,mm4
  114. pmuludq mm1,mm5
  115. paddq mm2,mm0
  116. paddq mm3,mm1
  117. movq mm0,mm2
  118. pand mm0,mm7
  119. paddq mm3,mm0
  120. movd DWORD [28+ecx*4+esp],mm3
  121. psrlq mm2,32
  122. psrlq mm3,32
  123. paddq mm3,mm2
  124. movq [32+ebx*4+esp],mm3
  125. inc edx
  126. L$005outer:
  127. xor ecx,ecx
  128. movd mm4,DWORD [edx*4+edi]
  129. movd mm5,DWORD [esi]
  130. movd mm6,DWORD [32+esp]
  131. movd mm3,DWORD [ebp]
  132. pmuludq mm5,mm4
  133. paddq mm5,mm6
  134. movq mm0,mm5
  135. movq mm2,mm5
  136. pand mm0,mm7
  137. pmuludq mm5,[20+esp]
  138. pmuludq mm3,mm5
  139. paddq mm3,mm0
  140. movd mm6,DWORD [36+esp]
  141. movd mm1,DWORD [4+ebp]
  142. movd mm0,DWORD [4+esi]
  143. psrlq mm2,32
  144. psrlq mm3,32
  145. paddq mm2,mm6
  146. inc ecx
  147. dec ebx
  148. L$006inner:
  149. pmuludq mm0,mm4
  150. pmuludq mm1,mm5
  151. paddq mm2,mm0
  152. paddq mm3,mm1
  153. movq mm0,mm2
  154. movd mm6,DWORD [36+ecx*4+esp]
  155. pand mm0,mm7
  156. movd mm1,DWORD [4+ecx*4+ebp]
  157. paddq mm3,mm0
  158. movd mm0,DWORD [4+ecx*4+esi]
  159. psrlq mm2,32
  160. movd DWORD [28+ecx*4+esp],mm3
  161. psrlq mm3,32
  162. paddq mm2,mm6
  163. dec ebx
  164. lea ecx,[1+ecx]
  165. jnz NEAR L$006inner
  166. mov ebx,ecx
  167. pmuludq mm0,mm4
  168. pmuludq mm1,mm5
  169. paddq mm2,mm0
  170. paddq mm3,mm1
  171. movq mm0,mm2
  172. pand mm0,mm7
  173. paddq mm3,mm0
  174. movd DWORD [28+ecx*4+esp],mm3
  175. psrlq mm2,32
  176. psrlq mm3,32
  177. movd mm6,DWORD [36+ebx*4+esp]
  178. paddq mm3,mm2
  179. paddq mm3,mm6
  180. movq [32+ebx*4+esp],mm3
  181. lea edx,[1+edx]
  182. cmp edx,ebx
  183. jle NEAR L$005outer
  184. emms
  185. jmp NEAR L$007common_tail
  186. align 16
  187. L$003non_sse2:
  188. mov esi,DWORD [8+esp]
  189. lea ebp,[1+ebx]
  190. mov edi,DWORD [12+esp]
  191. xor ecx,ecx
  192. mov edx,esi
  193. and ebp,1
  194. sub edx,edi
  195. lea eax,[4+ebx*4+edi]
  196. or ebp,edx
  197. mov edi,DWORD [edi]
  198. jz NEAR L$008bn_sqr_mont
  199. mov DWORD [28+esp],eax
  200. mov eax,DWORD [esi]
  201. xor edx,edx
  202. align 16
  203. L$009mull:
  204. mov ebp,edx
  205. mul edi
  206. add ebp,eax
  207. lea ecx,[1+ecx]
  208. adc edx,0
  209. mov eax,DWORD [ecx*4+esi]
  210. cmp ecx,ebx
  211. mov DWORD [28+ecx*4+esp],ebp
  212. jl NEAR L$009mull
  213. mov ebp,edx
  214. mul edi
  215. mov edi,DWORD [20+esp]
  216. add eax,ebp
  217. mov esi,DWORD [16+esp]
  218. adc edx,0
  219. imul edi,DWORD [32+esp]
  220. mov DWORD [32+ebx*4+esp],eax
  221. xor ecx,ecx
  222. mov DWORD [36+ebx*4+esp],edx
  223. mov DWORD [40+ebx*4+esp],ecx
  224. mov eax,DWORD [esi]
  225. mul edi
  226. add eax,DWORD [32+esp]
  227. mov eax,DWORD [4+esi]
  228. adc edx,0
  229. inc ecx
  230. jmp NEAR L$0102ndmadd
  231. align 16
  232. L$0111stmadd:
  233. mov ebp,edx
  234. mul edi
  235. add ebp,DWORD [32+ecx*4+esp]
  236. lea ecx,[1+ecx]
  237. adc edx,0
  238. add ebp,eax
  239. mov eax,DWORD [ecx*4+esi]
  240. adc edx,0
  241. cmp ecx,ebx
  242. mov DWORD [28+ecx*4+esp],ebp
  243. jl NEAR L$0111stmadd
  244. mov ebp,edx
  245. mul edi
  246. add eax,DWORD [32+ebx*4+esp]
  247. mov edi,DWORD [20+esp]
  248. adc edx,0
  249. mov esi,DWORD [16+esp]
  250. add ebp,eax
  251. adc edx,0
  252. imul edi,DWORD [32+esp]
  253. xor ecx,ecx
  254. add edx,DWORD [36+ebx*4+esp]
  255. mov DWORD [32+ebx*4+esp],ebp
  256. adc ecx,0
  257. mov eax,DWORD [esi]
  258. mov DWORD [36+ebx*4+esp],edx
  259. mov DWORD [40+ebx*4+esp],ecx
  260. mul edi
  261. add eax,DWORD [32+esp]
  262. mov eax,DWORD [4+esi]
  263. adc edx,0
  264. mov ecx,1
  265. align 16
  266. L$0102ndmadd:
  267. mov ebp,edx
  268. mul edi
  269. add ebp,DWORD [32+ecx*4+esp]
  270. lea ecx,[1+ecx]
  271. adc edx,0
  272. add ebp,eax
  273. mov eax,DWORD [ecx*4+esi]
  274. adc edx,0
  275. cmp ecx,ebx
  276. mov DWORD [24+ecx*4+esp],ebp
  277. jl NEAR L$0102ndmadd
  278. mov ebp,edx
  279. mul edi
  280. add ebp,DWORD [32+ebx*4+esp]
  281. adc edx,0
  282. add ebp,eax
  283. adc edx,0
  284. mov DWORD [28+ebx*4+esp],ebp
  285. xor eax,eax
  286. mov ecx,DWORD [12+esp]
  287. add edx,DWORD [36+ebx*4+esp]
  288. adc eax,DWORD [40+ebx*4+esp]
  289. lea ecx,[4+ecx]
  290. mov DWORD [32+ebx*4+esp],edx
  291. cmp ecx,DWORD [28+esp]
  292. mov DWORD [36+ebx*4+esp],eax
  293. je NEAR L$007common_tail
  294. mov edi,DWORD [ecx]
  295. mov esi,DWORD [8+esp]
  296. mov DWORD [12+esp],ecx
  297. xor ecx,ecx
  298. xor edx,edx
  299. mov eax,DWORD [esi]
  300. jmp NEAR L$0111stmadd
  301. align 16
  302. L$008bn_sqr_mont:
  303. mov DWORD [esp],ebx
  304. mov DWORD [12+esp],ecx
  305. mov eax,edi
  306. mul edi
  307. mov DWORD [32+esp],eax
  308. mov ebx,edx
  309. shr edx,1
  310. and ebx,1
  311. inc ecx
  312. align 16
  313. L$012sqr:
  314. mov eax,DWORD [ecx*4+esi]
  315. mov ebp,edx
  316. mul edi
  317. add eax,ebp
  318. lea ecx,[1+ecx]
  319. adc edx,0
  320. lea ebp,[eax*2+ebx]
  321. shr eax,31
  322. cmp ecx,DWORD [esp]
  323. mov ebx,eax
  324. mov DWORD [28+ecx*4+esp],ebp
  325. jl NEAR L$012sqr
  326. mov eax,DWORD [ecx*4+esi]
  327. mov ebp,edx
  328. mul edi
  329. add eax,ebp
  330. mov edi,DWORD [20+esp]
  331. adc edx,0
  332. mov esi,DWORD [16+esp]
  333. lea ebp,[eax*2+ebx]
  334. imul edi,DWORD [32+esp]
  335. shr eax,31
  336. mov DWORD [32+ecx*4+esp],ebp
  337. lea ebp,[edx*2+eax]
  338. mov eax,DWORD [esi]
  339. shr edx,31
  340. mov DWORD [36+ecx*4+esp],ebp
  341. mov DWORD [40+ecx*4+esp],edx
  342. mul edi
  343. add eax,DWORD [32+esp]
  344. mov ebx,ecx
  345. adc edx,0
  346. mov eax,DWORD [4+esi]
  347. mov ecx,1
  348. align 16
  349. L$0133rdmadd:
  350. mov ebp,edx
  351. mul edi
  352. add ebp,DWORD [32+ecx*4+esp]
  353. adc edx,0
  354. add ebp,eax
  355. mov eax,DWORD [4+ecx*4+esi]
  356. adc edx,0
  357. mov DWORD [28+ecx*4+esp],ebp
  358. mov ebp,edx
  359. mul edi
  360. add ebp,DWORD [36+ecx*4+esp]
  361. lea ecx,[2+ecx]
  362. adc edx,0
  363. add ebp,eax
  364. mov eax,DWORD [ecx*4+esi]
  365. adc edx,0
  366. cmp ecx,ebx
  367. mov DWORD [24+ecx*4+esp],ebp
  368. jl NEAR L$0133rdmadd
  369. mov ebp,edx
  370. mul edi
  371. add ebp,DWORD [32+ebx*4+esp]
  372. adc edx,0
  373. add ebp,eax
  374. adc edx,0
  375. mov DWORD [28+ebx*4+esp],ebp
  376. mov ecx,DWORD [12+esp]
  377. xor eax,eax
  378. mov esi,DWORD [8+esp]
  379. add edx,DWORD [36+ebx*4+esp]
  380. adc eax,DWORD [40+ebx*4+esp]
  381. mov DWORD [32+ebx*4+esp],edx
  382. cmp ecx,ebx
  383. mov DWORD [36+ebx*4+esp],eax
  384. je NEAR L$007common_tail
  385. mov edi,DWORD [4+ecx*4+esi]
  386. lea ecx,[1+ecx]
  387. mov eax,edi
  388. mov DWORD [12+esp],ecx
  389. mul edi
  390. add eax,DWORD [32+ecx*4+esp]
  391. adc edx,0
  392. mov DWORD [32+ecx*4+esp],eax
  393. xor ebp,ebp
  394. cmp ecx,ebx
  395. lea ecx,[1+ecx]
  396. je NEAR L$014sqrlast
  397. mov ebx,edx
  398. shr edx,1
  399. and ebx,1
  400. align 16
  401. L$015sqradd:
  402. mov eax,DWORD [ecx*4+esi]
  403. mov ebp,edx
  404. mul edi
  405. add eax,ebp
  406. lea ebp,[eax*1+eax]
  407. adc edx,0
  408. shr eax,31
  409. add ebp,DWORD [32+ecx*4+esp]
  410. lea ecx,[1+ecx]
  411. adc eax,0
  412. add ebp,ebx
  413. adc eax,0
  414. cmp ecx,DWORD [esp]
  415. mov DWORD [28+ecx*4+esp],ebp
  416. mov ebx,eax
  417. jle NEAR L$015sqradd
  418. mov ebp,edx
  419. add edx,edx
  420. shr ebp,31
  421. add edx,ebx
  422. adc ebp,0
  423. L$014sqrlast:
  424. mov edi,DWORD [20+esp]
  425. mov esi,DWORD [16+esp]
  426. imul edi,DWORD [32+esp]
  427. add edx,DWORD [32+ecx*4+esp]
  428. mov eax,DWORD [esi]
  429. adc ebp,0
  430. mov DWORD [32+ecx*4+esp],edx
  431. mov DWORD [36+ecx*4+esp],ebp
  432. mul edi
  433. add eax,DWORD [32+esp]
  434. lea ebx,[ecx-1]
  435. adc edx,0
  436. mov ecx,1
  437. mov eax,DWORD [4+esi]
  438. jmp NEAR L$0133rdmadd
  439. align 16
  440. L$007common_tail:
  441. mov ebp,DWORD [16+esp]
  442. mov edi,DWORD [4+esp]
  443. lea esi,[32+esp]
  444. mov eax,DWORD [esi]
  445. mov ecx,ebx
  446. xor edx,edx
  447. align 16
  448. L$016sub:
  449. sbb eax,DWORD [edx*4+ebp]
  450. mov DWORD [edx*4+edi],eax
  451. dec ecx
  452. mov eax,DWORD [4+edx*4+esi]
  453. lea edx,[1+edx]
  454. jge NEAR L$016sub
  455. sbb eax,0
  456. mov edx,-1
  457. xor edx,eax
  458. jmp NEAR L$017copy
  459. align 16
  460. L$017copy:
  461. mov esi,DWORD [32+ebx*4+esp]
  462. mov ebp,DWORD [ebx*4+edi]
  463. mov DWORD [32+ebx*4+esp],ecx
  464. and esi,eax
  465. and ebp,edx
  466. or ebp,esi
  467. mov DWORD [ebx*4+edi],ebp
  468. dec ebx
  469. jge NEAR L$017copy
  470. mov esp,DWORD [24+esp]
  471. mov eax,1
  472. L$000just_leave:
  473. pop edi
  474. pop esi
  475. pop ebx
  476. pop ebp
  477. ret
  478. db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
  479. db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
  480. db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
  481. db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
  482. db 111,114,103,62,0
  483. segment .bss
  484. common _OPENSSL_ia32cap_P 16