x86-mont.S 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__i386__)
  4. #if defined(BORINGSSL_PREFIX)
  5. #include <boringssl_prefix_symbols_asm.h>
  6. #endif
  7. .text
  8. .globl bn_mul_mont
  9. .hidden bn_mul_mont
  10. .type bn_mul_mont,@function
  11. .align 16
  12. bn_mul_mont:
  13. .L_bn_mul_mont_begin:
  14. pushl %ebp
  15. pushl %ebx
  16. pushl %esi
  17. pushl %edi
  18. xorl %eax,%eax
  19. movl 40(%esp),%edi
  20. cmpl $4,%edi
  21. jl .L000just_leave
  22. leal 20(%esp),%esi
  23. leal 24(%esp),%edx
  24. addl $2,%edi
  25. negl %edi
  26. leal -32(%esp,%edi,4),%ebp
  27. negl %edi
  28. movl %ebp,%eax
  29. subl %edx,%eax
  30. andl $2047,%eax
  31. subl %eax,%ebp
  32. xorl %ebp,%edx
  33. andl $2048,%edx
  34. xorl $2048,%edx
  35. subl %edx,%ebp
  36. andl $-64,%ebp
  37. movl %esp,%eax
  38. subl %ebp,%eax
  39. andl $-4096,%eax
  40. movl %esp,%edx
  41. leal (%ebp,%eax,1),%esp
  42. movl (%esp),%eax
  43. cmpl %ebp,%esp
  44. ja .L001page_walk
  45. jmp .L002page_walk_done
  46. .align 16
  47. .L001page_walk:
  48. leal -4096(%esp),%esp
  49. movl (%esp),%eax
  50. cmpl %ebp,%esp
  51. ja .L001page_walk
  52. .L002page_walk_done:
  53. movl (%esi),%eax
  54. movl 4(%esi),%ebx
  55. movl 8(%esi),%ecx
  56. movl 12(%esi),%ebp
  57. movl 16(%esi),%esi
  58. movl (%esi),%esi
  59. movl %eax,4(%esp)
  60. movl %ebx,8(%esp)
  61. movl %ecx,12(%esp)
  62. movl %ebp,16(%esp)
  63. movl %esi,20(%esp)
  64. leal -3(%edi),%ebx
  65. movl %edx,24(%esp)
  66. call .L003PIC_me_up
  67. .L003PIC_me_up:
  68. popl %eax
  69. leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
  70. btl $26,(%eax)
  71. jnc .L004non_sse2
  72. movl $-1,%eax
  73. movd %eax,%mm7
  74. movl 8(%esp),%esi
  75. movl 12(%esp),%edi
  76. movl 16(%esp),%ebp
  77. xorl %edx,%edx
  78. xorl %ecx,%ecx
  79. movd (%edi),%mm4
  80. movd (%esi),%mm5
  81. movd (%ebp),%mm3
  82. pmuludq %mm4,%mm5
  83. movq %mm5,%mm2
  84. movq %mm5,%mm0
  85. pand %mm7,%mm0
  86. pmuludq 20(%esp),%mm5
  87. pmuludq %mm5,%mm3
  88. paddq %mm0,%mm3
  89. movd 4(%ebp),%mm1
  90. movd 4(%esi),%mm0
  91. psrlq $32,%mm2
  92. psrlq $32,%mm3
  93. incl %ecx
  94. .align 16
  95. .L0051st:
  96. pmuludq %mm4,%mm0
  97. pmuludq %mm5,%mm1
  98. paddq %mm0,%mm2
  99. paddq %mm1,%mm3
  100. movq %mm2,%mm0
  101. pand %mm7,%mm0
  102. movd 4(%ebp,%ecx,4),%mm1
  103. paddq %mm0,%mm3
  104. movd 4(%esi,%ecx,4),%mm0
  105. psrlq $32,%mm2
  106. movd %mm3,28(%esp,%ecx,4)
  107. psrlq $32,%mm3
  108. leal 1(%ecx),%ecx
  109. cmpl %ebx,%ecx
  110. jl .L0051st
  111. pmuludq %mm4,%mm0
  112. pmuludq %mm5,%mm1
  113. paddq %mm0,%mm2
  114. paddq %mm1,%mm3
  115. movq %mm2,%mm0
  116. pand %mm7,%mm0
  117. paddq %mm0,%mm3
  118. movd %mm3,28(%esp,%ecx,4)
  119. psrlq $32,%mm2
  120. psrlq $32,%mm3
  121. paddq %mm2,%mm3
  122. movq %mm3,32(%esp,%ebx,4)
  123. incl %edx
  124. .L006outer:
  125. xorl %ecx,%ecx
  126. movd (%edi,%edx,4),%mm4
  127. movd (%esi),%mm5
  128. movd 32(%esp),%mm6
  129. movd (%ebp),%mm3
  130. pmuludq %mm4,%mm5
  131. paddq %mm6,%mm5
  132. movq %mm5,%mm0
  133. movq %mm5,%mm2
  134. pand %mm7,%mm0
  135. pmuludq 20(%esp),%mm5
  136. pmuludq %mm5,%mm3
  137. paddq %mm0,%mm3
  138. movd 36(%esp),%mm6
  139. movd 4(%ebp),%mm1
  140. movd 4(%esi),%mm0
  141. psrlq $32,%mm2
  142. psrlq $32,%mm3
  143. paddq %mm6,%mm2
  144. incl %ecx
  145. decl %ebx
  146. .L007inner:
  147. pmuludq %mm4,%mm0
  148. pmuludq %mm5,%mm1
  149. paddq %mm0,%mm2
  150. paddq %mm1,%mm3
  151. movq %mm2,%mm0
  152. movd 36(%esp,%ecx,4),%mm6
  153. pand %mm7,%mm0
  154. movd 4(%ebp,%ecx,4),%mm1
  155. paddq %mm0,%mm3
  156. movd 4(%esi,%ecx,4),%mm0
  157. psrlq $32,%mm2
  158. movd %mm3,28(%esp,%ecx,4)
  159. psrlq $32,%mm3
  160. paddq %mm6,%mm2
  161. decl %ebx
  162. leal 1(%ecx),%ecx
  163. jnz .L007inner
  164. movl %ecx,%ebx
  165. pmuludq %mm4,%mm0
  166. pmuludq %mm5,%mm1
  167. paddq %mm0,%mm2
  168. paddq %mm1,%mm3
  169. movq %mm2,%mm0
  170. pand %mm7,%mm0
  171. paddq %mm0,%mm3
  172. movd %mm3,28(%esp,%ecx,4)
  173. psrlq $32,%mm2
  174. psrlq $32,%mm3
  175. movd 36(%esp,%ebx,4),%mm6
  176. paddq %mm2,%mm3
  177. paddq %mm6,%mm3
  178. movq %mm3,32(%esp,%ebx,4)
  179. leal 1(%edx),%edx
  180. cmpl %ebx,%edx
  181. jle .L006outer
  182. emms
  183. jmp .L008common_tail
  184. .align 16
  185. .L004non_sse2:
  186. movl 8(%esp),%esi
  187. leal 1(%ebx),%ebp
  188. movl 12(%esp),%edi
  189. xorl %ecx,%ecx
  190. movl %esi,%edx
  191. andl $1,%ebp
  192. subl %edi,%edx
  193. leal 4(%edi,%ebx,4),%eax
  194. orl %edx,%ebp
  195. movl (%edi),%edi
  196. jz .L009bn_sqr_mont
  197. movl %eax,28(%esp)
  198. movl (%esi),%eax
  199. xorl %edx,%edx
  200. .align 16
  201. .L010mull:
  202. movl %edx,%ebp
  203. mull %edi
  204. addl %eax,%ebp
  205. leal 1(%ecx),%ecx
  206. adcl $0,%edx
  207. movl (%esi,%ecx,4),%eax
  208. cmpl %ebx,%ecx
  209. movl %ebp,28(%esp,%ecx,4)
  210. jl .L010mull
  211. movl %edx,%ebp
  212. mull %edi
  213. movl 20(%esp),%edi
  214. addl %ebp,%eax
  215. movl 16(%esp),%esi
  216. adcl $0,%edx
  217. imull 32(%esp),%edi
  218. movl %eax,32(%esp,%ebx,4)
  219. xorl %ecx,%ecx
  220. movl %edx,36(%esp,%ebx,4)
  221. movl %ecx,40(%esp,%ebx,4)
  222. movl (%esi),%eax
  223. mull %edi
  224. addl 32(%esp),%eax
  225. movl 4(%esi),%eax
  226. adcl $0,%edx
  227. incl %ecx
  228. jmp .L0112ndmadd
  229. .align 16
  230. .L0121stmadd:
  231. movl %edx,%ebp
  232. mull %edi
  233. addl 32(%esp,%ecx,4),%ebp
  234. leal 1(%ecx),%ecx
  235. adcl $0,%edx
  236. addl %eax,%ebp
  237. movl (%esi,%ecx,4),%eax
  238. adcl $0,%edx
  239. cmpl %ebx,%ecx
  240. movl %ebp,28(%esp,%ecx,4)
  241. jl .L0121stmadd
  242. movl %edx,%ebp
  243. mull %edi
  244. addl 32(%esp,%ebx,4),%eax
  245. movl 20(%esp),%edi
  246. adcl $0,%edx
  247. movl 16(%esp),%esi
  248. addl %eax,%ebp
  249. adcl $0,%edx
  250. imull 32(%esp),%edi
  251. xorl %ecx,%ecx
  252. addl 36(%esp,%ebx,4),%edx
  253. movl %ebp,32(%esp,%ebx,4)
  254. adcl $0,%ecx
  255. movl (%esi),%eax
  256. movl %edx,36(%esp,%ebx,4)
  257. movl %ecx,40(%esp,%ebx,4)
  258. mull %edi
  259. addl 32(%esp),%eax
  260. movl 4(%esi),%eax
  261. adcl $0,%edx
  262. movl $1,%ecx
  263. .align 16
  264. .L0112ndmadd:
  265. movl %edx,%ebp
  266. mull %edi
  267. addl 32(%esp,%ecx,4),%ebp
  268. leal 1(%ecx),%ecx
  269. adcl $0,%edx
  270. addl %eax,%ebp
  271. movl (%esi,%ecx,4),%eax
  272. adcl $0,%edx
  273. cmpl %ebx,%ecx
  274. movl %ebp,24(%esp,%ecx,4)
  275. jl .L0112ndmadd
  276. movl %edx,%ebp
  277. mull %edi
  278. addl 32(%esp,%ebx,4),%ebp
  279. adcl $0,%edx
  280. addl %eax,%ebp
  281. adcl $0,%edx
  282. movl %ebp,28(%esp,%ebx,4)
  283. xorl %eax,%eax
  284. movl 12(%esp),%ecx
  285. addl 36(%esp,%ebx,4),%edx
  286. adcl 40(%esp,%ebx,4),%eax
  287. leal 4(%ecx),%ecx
  288. movl %edx,32(%esp,%ebx,4)
  289. cmpl 28(%esp),%ecx
  290. movl %eax,36(%esp,%ebx,4)
  291. je .L008common_tail
  292. movl (%ecx),%edi
  293. movl 8(%esp),%esi
  294. movl %ecx,12(%esp)
  295. xorl %ecx,%ecx
  296. xorl %edx,%edx
  297. movl (%esi),%eax
  298. jmp .L0121stmadd
  299. .align 16
  300. .L009bn_sqr_mont:
  301. movl %ebx,(%esp)
  302. movl %ecx,12(%esp)
  303. movl %edi,%eax
  304. mull %edi
  305. movl %eax,32(%esp)
  306. movl %edx,%ebx
  307. shrl $1,%edx
  308. andl $1,%ebx
  309. incl %ecx
  310. .align 16
  311. .L013sqr:
  312. movl (%esi,%ecx,4),%eax
  313. movl %edx,%ebp
  314. mull %edi
  315. addl %ebp,%eax
  316. leal 1(%ecx),%ecx
  317. adcl $0,%edx
  318. leal (%ebx,%eax,2),%ebp
  319. shrl $31,%eax
  320. cmpl (%esp),%ecx
  321. movl %eax,%ebx
  322. movl %ebp,28(%esp,%ecx,4)
  323. jl .L013sqr
  324. movl (%esi,%ecx,4),%eax
  325. movl %edx,%ebp
  326. mull %edi
  327. addl %ebp,%eax
  328. movl 20(%esp),%edi
  329. adcl $0,%edx
  330. movl 16(%esp),%esi
  331. leal (%ebx,%eax,2),%ebp
  332. imull 32(%esp),%edi
  333. shrl $31,%eax
  334. movl %ebp,32(%esp,%ecx,4)
  335. leal (%eax,%edx,2),%ebp
  336. movl (%esi),%eax
  337. shrl $31,%edx
  338. movl %ebp,36(%esp,%ecx,4)
  339. movl %edx,40(%esp,%ecx,4)
  340. mull %edi
  341. addl 32(%esp),%eax
  342. movl %ecx,%ebx
  343. adcl $0,%edx
  344. movl 4(%esi),%eax
  345. movl $1,%ecx
  346. .align 16
  347. .L0143rdmadd:
  348. movl %edx,%ebp
  349. mull %edi
  350. addl 32(%esp,%ecx,4),%ebp
  351. adcl $0,%edx
  352. addl %eax,%ebp
  353. movl 4(%esi,%ecx,4),%eax
  354. adcl $0,%edx
  355. movl %ebp,28(%esp,%ecx,4)
  356. movl %edx,%ebp
  357. mull %edi
  358. addl 36(%esp,%ecx,4),%ebp
  359. leal 2(%ecx),%ecx
  360. adcl $0,%edx
  361. addl %eax,%ebp
  362. movl (%esi,%ecx,4),%eax
  363. adcl $0,%edx
  364. cmpl %ebx,%ecx
  365. movl %ebp,24(%esp,%ecx,4)
  366. jl .L0143rdmadd
  367. movl %edx,%ebp
  368. mull %edi
  369. addl 32(%esp,%ebx,4),%ebp
  370. adcl $0,%edx
  371. addl %eax,%ebp
  372. adcl $0,%edx
  373. movl %ebp,28(%esp,%ebx,4)
  374. movl 12(%esp),%ecx
  375. xorl %eax,%eax
  376. movl 8(%esp),%esi
  377. addl 36(%esp,%ebx,4),%edx
  378. adcl 40(%esp,%ebx,4),%eax
  379. movl %edx,32(%esp,%ebx,4)
  380. cmpl %ebx,%ecx
  381. movl %eax,36(%esp,%ebx,4)
  382. je .L008common_tail
  383. movl 4(%esi,%ecx,4),%edi
  384. leal 1(%ecx),%ecx
  385. movl %edi,%eax
  386. movl %ecx,12(%esp)
  387. mull %edi
  388. addl 32(%esp,%ecx,4),%eax
  389. adcl $0,%edx
  390. movl %eax,32(%esp,%ecx,4)
  391. xorl %ebp,%ebp
  392. cmpl %ebx,%ecx
  393. leal 1(%ecx),%ecx
  394. je .L015sqrlast
  395. movl %edx,%ebx
  396. shrl $1,%edx
  397. andl $1,%ebx
  398. .align 16
  399. .L016sqradd:
  400. movl (%esi,%ecx,4),%eax
  401. movl %edx,%ebp
  402. mull %edi
  403. addl %ebp,%eax
  404. leal (%eax,%eax,1),%ebp
  405. adcl $0,%edx
  406. shrl $31,%eax
  407. addl 32(%esp,%ecx,4),%ebp
  408. leal 1(%ecx),%ecx
  409. adcl $0,%eax
  410. addl %ebx,%ebp
  411. adcl $0,%eax
  412. cmpl (%esp),%ecx
  413. movl %ebp,28(%esp,%ecx,4)
  414. movl %eax,%ebx
  415. jle .L016sqradd
  416. movl %edx,%ebp
  417. addl %edx,%edx
  418. shrl $31,%ebp
  419. addl %ebx,%edx
  420. adcl $0,%ebp
  421. .L015sqrlast:
  422. movl 20(%esp),%edi
  423. movl 16(%esp),%esi
  424. imull 32(%esp),%edi
  425. addl 32(%esp,%ecx,4),%edx
  426. movl (%esi),%eax
  427. adcl $0,%ebp
  428. movl %edx,32(%esp,%ecx,4)
  429. movl %ebp,36(%esp,%ecx,4)
  430. mull %edi
  431. addl 32(%esp),%eax
  432. leal -1(%ecx),%ebx
  433. adcl $0,%edx
  434. movl $1,%ecx
  435. movl 4(%esi),%eax
  436. jmp .L0143rdmadd
  437. .align 16
  438. .L008common_tail:
  439. movl 16(%esp),%ebp
  440. movl 4(%esp),%edi
  441. leal 32(%esp),%esi
  442. movl (%esi),%eax
  443. movl %ebx,%ecx
  444. xorl %edx,%edx
  445. .align 16
  446. .L017sub:
  447. sbbl (%ebp,%edx,4),%eax
  448. movl %eax,(%edi,%edx,4)
  449. decl %ecx
  450. movl 4(%esi,%edx,4),%eax
  451. leal 1(%edx),%edx
  452. jge .L017sub
  453. sbbl $0,%eax
  454. movl $-1,%edx
  455. xorl %eax,%edx
  456. jmp .L018copy
  457. .align 16
  458. .L018copy:
  459. movl 32(%esp,%ebx,4),%esi
  460. movl (%edi,%ebx,4),%ebp
  461. movl %ecx,32(%esp,%ebx,4)
  462. andl %eax,%esi
  463. andl %edx,%ebp
  464. orl %esi,%ebp
  465. movl %ebp,(%edi,%ebx,4)
  466. decl %ebx
  467. jge .L018copy
  468. movl 24(%esp),%esp
  469. movl $1,%eax
  470. .L000just_leave:
  471. popl %edi
  472. popl %esi
  473. popl %ebx
  474. popl %ebp
  475. ret
  476. .size bn_mul_mont,.-.L_bn_mul_mont_begin
  477. .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
  478. .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
  479. .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
  480. .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
  481. .byte 111,114,103,62,0
  482. #endif
  483. .section .note.GNU-stack,"",@progbits