x86-mont.S 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__i386__)
  4. #if defined(BORINGSSL_PREFIX)
  5. #include <boringssl_prefix_symbols_asm.h>
  6. #endif
  7. .text
  8. .globl _bn_mul_mont
  9. .private_extern _bn_mul_mont
  10. .align 4
  11. _bn_mul_mont:
  12. L_bn_mul_mont_begin:
  13. pushl %ebp
  14. pushl %ebx
  15. pushl %esi
  16. pushl %edi
  17. xorl %eax,%eax
  18. movl 40(%esp),%edi
  19. cmpl $4,%edi
  20. jl L000just_leave
  21. leal 20(%esp),%esi
  22. leal 24(%esp),%edx
  23. addl $2,%edi
  24. negl %edi
  25. leal -32(%esp,%edi,4),%ebp
  26. negl %edi
  27. movl %ebp,%eax
  28. subl %edx,%eax
  29. andl $2047,%eax
  30. subl %eax,%ebp
  31. xorl %ebp,%edx
  32. andl $2048,%edx
  33. xorl $2048,%edx
  34. subl %edx,%ebp
  35. andl $-64,%ebp
  36. movl %esp,%eax
  37. subl %ebp,%eax
  38. andl $-4096,%eax
  39. movl %esp,%edx
  40. leal (%ebp,%eax,1),%esp
  41. movl (%esp),%eax
  42. cmpl %ebp,%esp
  43. ja L001page_walk
  44. jmp L002page_walk_done
  45. .align 4,0x90
  46. L001page_walk:
  47. leal -4096(%esp),%esp
  48. movl (%esp),%eax
  49. cmpl %ebp,%esp
  50. ja L001page_walk
  51. L002page_walk_done:
  52. movl (%esi),%eax
  53. movl 4(%esi),%ebx
  54. movl 8(%esi),%ecx
  55. movl 12(%esi),%ebp
  56. movl 16(%esi),%esi
  57. movl (%esi),%esi
  58. movl %eax,4(%esp)
  59. movl %ebx,8(%esp)
  60. movl %ecx,12(%esp)
  61. movl %ebp,16(%esp)
  62. movl %esi,20(%esp)
  63. leal -3(%edi),%ebx
  64. movl %edx,24(%esp)
  65. call L003PIC_me_up
  66. L003PIC_me_up:
  67. popl %eax
  68. movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax
  69. btl $26,(%eax)
  70. jnc L004non_sse2
  71. movl $-1,%eax
  72. movd %eax,%mm7
  73. movl 8(%esp),%esi
  74. movl 12(%esp),%edi
  75. movl 16(%esp),%ebp
  76. xorl %edx,%edx
  77. xorl %ecx,%ecx
  78. movd (%edi),%mm4
  79. movd (%esi),%mm5
  80. movd (%ebp),%mm3
  81. pmuludq %mm4,%mm5
  82. movq %mm5,%mm2
  83. movq %mm5,%mm0
  84. pand %mm7,%mm0
  85. pmuludq 20(%esp),%mm5
  86. pmuludq %mm5,%mm3
  87. paddq %mm0,%mm3
  88. movd 4(%ebp),%mm1
  89. movd 4(%esi),%mm0
  90. psrlq $32,%mm2
  91. psrlq $32,%mm3
  92. incl %ecx
  93. .align 4,0x90
  94. L0051st:
  95. pmuludq %mm4,%mm0
  96. pmuludq %mm5,%mm1
  97. paddq %mm0,%mm2
  98. paddq %mm1,%mm3
  99. movq %mm2,%mm0
  100. pand %mm7,%mm0
  101. movd 4(%ebp,%ecx,4),%mm1
  102. paddq %mm0,%mm3
  103. movd 4(%esi,%ecx,4),%mm0
  104. psrlq $32,%mm2
  105. movd %mm3,28(%esp,%ecx,4)
  106. psrlq $32,%mm3
  107. leal 1(%ecx),%ecx
  108. cmpl %ebx,%ecx
  109. jl L0051st
  110. pmuludq %mm4,%mm0
  111. pmuludq %mm5,%mm1
  112. paddq %mm0,%mm2
  113. paddq %mm1,%mm3
  114. movq %mm2,%mm0
  115. pand %mm7,%mm0
  116. paddq %mm0,%mm3
  117. movd %mm3,28(%esp,%ecx,4)
  118. psrlq $32,%mm2
  119. psrlq $32,%mm3
  120. paddq %mm2,%mm3
  121. movq %mm3,32(%esp,%ebx,4)
  122. incl %edx
  123. L006outer:
  124. xorl %ecx,%ecx
  125. movd (%edi,%edx,4),%mm4
  126. movd (%esi),%mm5
  127. movd 32(%esp),%mm6
  128. movd (%ebp),%mm3
  129. pmuludq %mm4,%mm5
  130. paddq %mm6,%mm5
  131. movq %mm5,%mm0
  132. movq %mm5,%mm2
  133. pand %mm7,%mm0
  134. pmuludq 20(%esp),%mm5
  135. pmuludq %mm5,%mm3
  136. paddq %mm0,%mm3
  137. movd 36(%esp),%mm6
  138. movd 4(%ebp),%mm1
  139. movd 4(%esi),%mm0
  140. psrlq $32,%mm2
  141. psrlq $32,%mm3
  142. paddq %mm6,%mm2
  143. incl %ecx
  144. decl %ebx
  145. L007inner:
  146. pmuludq %mm4,%mm0
  147. pmuludq %mm5,%mm1
  148. paddq %mm0,%mm2
  149. paddq %mm1,%mm3
  150. movq %mm2,%mm0
  151. movd 36(%esp,%ecx,4),%mm6
  152. pand %mm7,%mm0
  153. movd 4(%ebp,%ecx,4),%mm1
  154. paddq %mm0,%mm3
  155. movd 4(%esi,%ecx,4),%mm0
  156. psrlq $32,%mm2
  157. movd %mm3,28(%esp,%ecx,4)
  158. psrlq $32,%mm3
  159. paddq %mm6,%mm2
  160. decl %ebx
  161. leal 1(%ecx),%ecx
  162. jnz L007inner
  163. movl %ecx,%ebx
  164. pmuludq %mm4,%mm0
  165. pmuludq %mm5,%mm1
  166. paddq %mm0,%mm2
  167. paddq %mm1,%mm3
  168. movq %mm2,%mm0
  169. pand %mm7,%mm0
  170. paddq %mm0,%mm3
  171. movd %mm3,28(%esp,%ecx,4)
  172. psrlq $32,%mm2
  173. psrlq $32,%mm3
  174. movd 36(%esp,%ebx,4),%mm6
  175. paddq %mm2,%mm3
  176. paddq %mm6,%mm3
  177. movq %mm3,32(%esp,%ebx,4)
  178. leal 1(%edx),%edx
  179. cmpl %ebx,%edx
  180. jle L006outer
  181. emms
  182. jmp L008common_tail
  183. .align 4,0x90
  184. L004non_sse2:
  185. movl 8(%esp),%esi
  186. leal 1(%ebx),%ebp
  187. movl 12(%esp),%edi
  188. xorl %ecx,%ecx
  189. movl %esi,%edx
  190. andl $1,%ebp
  191. subl %edi,%edx
  192. leal 4(%edi,%ebx,4),%eax
  193. orl %edx,%ebp
  194. movl (%edi),%edi
  195. jz L009bn_sqr_mont
  196. movl %eax,28(%esp)
  197. movl (%esi),%eax
  198. xorl %edx,%edx
  199. .align 4,0x90
  200. L010mull:
  201. movl %edx,%ebp
  202. mull %edi
  203. addl %eax,%ebp
  204. leal 1(%ecx),%ecx
  205. adcl $0,%edx
  206. movl (%esi,%ecx,4),%eax
  207. cmpl %ebx,%ecx
  208. movl %ebp,28(%esp,%ecx,4)
  209. jl L010mull
  210. movl %edx,%ebp
  211. mull %edi
  212. movl 20(%esp),%edi
  213. addl %ebp,%eax
  214. movl 16(%esp),%esi
  215. adcl $0,%edx
  216. imull 32(%esp),%edi
  217. movl %eax,32(%esp,%ebx,4)
  218. xorl %ecx,%ecx
  219. movl %edx,36(%esp,%ebx,4)
  220. movl %ecx,40(%esp,%ebx,4)
  221. movl (%esi),%eax
  222. mull %edi
  223. addl 32(%esp),%eax
  224. movl 4(%esi),%eax
  225. adcl $0,%edx
  226. incl %ecx
  227. jmp L0112ndmadd
  228. .align 4,0x90
  229. L0121stmadd:
  230. movl %edx,%ebp
  231. mull %edi
  232. addl 32(%esp,%ecx,4),%ebp
  233. leal 1(%ecx),%ecx
  234. adcl $0,%edx
  235. addl %eax,%ebp
  236. movl (%esi,%ecx,4),%eax
  237. adcl $0,%edx
  238. cmpl %ebx,%ecx
  239. movl %ebp,28(%esp,%ecx,4)
  240. jl L0121stmadd
  241. movl %edx,%ebp
  242. mull %edi
  243. addl 32(%esp,%ebx,4),%eax
  244. movl 20(%esp),%edi
  245. adcl $0,%edx
  246. movl 16(%esp),%esi
  247. addl %eax,%ebp
  248. adcl $0,%edx
  249. imull 32(%esp),%edi
  250. xorl %ecx,%ecx
  251. addl 36(%esp,%ebx,4),%edx
  252. movl %ebp,32(%esp,%ebx,4)
  253. adcl $0,%ecx
  254. movl (%esi),%eax
  255. movl %edx,36(%esp,%ebx,4)
  256. movl %ecx,40(%esp,%ebx,4)
  257. mull %edi
  258. addl 32(%esp),%eax
  259. movl 4(%esi),%eax
  260. adcl $0,%edx
  261. movl $1,%ecx
  262. .align 4,0x90
  263. L0112ndmadd:
  264. movl %edx,%ebp
  265. mull %edi
  266. addl 32(%esp,%ecx,4),%ebp
  267. leal 1(%ecx),%ecx
  268. adcl $0,%edx
  269. addl %eax,%ebp
  270. movl (%esi,%ecx,4),%eax
  271. adcl $0,%edx
  272. cmpl %ebx,%ecx
  273. movl %ebp,24(%esp,%ecx,4)
  274. jl L0112ndmadd
  275. movl %edx,%ebp
  276. mull %edi
  277. addl 32(%esp,%ebx,4),%ebp
  278. adcl $0,%edx
  279. addl %eax,%ebp
  280. adcl $0,%edx
  281. movl %ebp,28(%esp,%ebx,4)
  282. xorl %eax,%eax
  283. movl 12(%esp),%ecx
  284. addl 36(%esp,%ebx,4),%edx
  285. adcl 40(%esp,%ebx,4),%eax
  286. leal 4(%ecx),%ecx
  287. movl %edx,32(%esp,%ebx,4)
  288. cmpl 28(%esp),%ecx
  289. movl %eax,36(%esp,%ebx,4)
  290. je L008common_tail
  291. movl (%ecx),%edi
  292. movl 8(%esp),%esi
  293. movl %ecx,12(%esp)
  294. xorl %ecx,%ecx
  295. xorl %edx,%edx
  296. movl (%esi),%eax
  297. jmp L0121stmadd
  298. .align 4,0x90
  299. L009bn_sqr_mont:
  300. movl %ebx,(%esp)
  301. movl %ecx,12(%esp)
  302. movl %edi,%eax
  303. mull %edi
  304. movl %eax,32(%esp)
  305. movl %edx,%ebx
  306. shrl $1,%edx
  307. andl $1,%ebx
  308. incl %ecx
  309. .align 4,0x90
  310. L013sqr:
  311. movl (%esi,%ecx,4),%eax
  312. movl %edx,%ebp
  313. mull %edi
  314. addl %ebp,%eax
  315. leal 1(%ecx),%ecx
  316. adcl $0,%edx
  317. leal (%ebx,%eax,2),%ebp
  318. shrl $31,%eax
  319. cmpl (%esp),%ecx
  320. movl %eax,%ebx
  321. movl %ebp,28(%esp,%ecx,4)
  322. jl L013sqr
  323. movl (%esi,%ecx,4),%eax
  324. movl %edx,%ebp
  325. mull %edi
  326. addl %ebp,%eax
  327. movl 20(%esp),%edi
  328. adcl $0,%edx
  329. movl 16(%esp),%esi
  330. leal (%ebx,%eax,2),%ebp
  331. imull 32(%esp),%edi
  332. shrl $31,%eax
  333. movl %ebp,32(%esp,%ecx,4)
  334. leal (%eax,%edx,2),%ebp
  335. movl (%esi),%eax
  336. shrl $31,%edx
  337. movl %ebp,36(%esp,%ecx,4)
  338. movl %edx,40(%esp,%ecx,4)
  339. mull %edi
  340. addl 32(%esp),%eax
  341. movl %ecx,%ebx
  342. adcl $0,%edx
  343. movl 4(%esi),%eax
  344. movl $1,%ecx
  345. .align 4,0x90
  346. L0143rdmadd:
  347. movl %edx,%ebp
  348. mull %edi
  349. addl 32(%esp,%ecx,4),%ebp
  350. adcl $0,%edx
  351. addl %eax,%ebp
  352. movl 4(%esi,%ecx,4),%eax
  353. adcl $0,%edx
  354. movl %ebp,28(%esp,%ecx,4)
  355. movl %edx,%ebp
  356. mull %edi
  357. addl 36(%esp,%ecx,4),%ebp
  358. leal 2(%ecx),%ecx
  359. adcl $0,%edx
  360. addl %eax,%ebp
  361. movl (%esi,%ecx,4),%eax
  362. adcl $0,%edx
  363. cmpl %ebx,%ecx
  364. movl %ebp,24(%esp,%ecx,4)
  365. jl L0143rdmadd
  366. movl %edx,%ebp
  367. mull %edi
  368. addl 32(%esp,%ebx,4),%ebp
  369. adcl $0,%edx
  370. addl %eax,%ebp
  371. adcl $0,%edx
  372. movl %ebp,28(%esp,%ebx,4)
  373. movl 12(%esp),%ecx
  374. xorl %eax,%eax
  375. movl 8(%esp),%esi
  376. addl 36(%esp,%ebx,4),%edx
  377. adcl 40(%esp,%ebx,4),%eax
  378. movl %edx,32(%esp,%ebx,4)
  379. cmpl %ebx,%ecx
  380. movl %eax,36(%esp,%ebx,4)
  381. je L008common_tail
  382. movl 4(%esi,%ecx,4),%edi
  383. leal 1(%ecx),%ecx
  384. movl %edi,%eax
  385. movl %ecx,12(%esp)
  386. mull %edi
  387. addl 32(%esp,%ecx,4),%eax
  388. adcl $0,%edx
  389. movl %eax,32(%esp,%ecx,4)
  390. xorl %ebp,%ebp
  391. cmpl %ebx,%ecx
  392. leal 1(%ecx),%ecx
  393. je L015sqrlast
  394. movl %edx,%ebx
  395. shrl $1,%edx
  396. andl $1,%ebx
  397. .align 4,0x90
  398. L016sqradd:
  399. movl (%esi,%ecx,4),%eax
  400. movl %edx,%ebp
  401. mull %edi
  402. addl %ebp,%eax
  403. leal (%eax,%eax,1),%ebp
  404. adcl $0,%edx
  405. shrl $31,%eax
  406. addl 32(%esp,%ecx,4),%ebp
  407. leal 1(%ecx),%ecx
  408. adcl $0,%eax
  409. addl %ebx,%ebp
  410. adcl $0,%eax
  411. cmpl (%esp),%ecx
  412. movl %ebp,28(%esp,%ecx,4)
  413. movl %eax,%ebx
  414. jle L016sqradd
  415. movl %edx,%ebp
  416. addl %edx,%edx
  417. shrl $31,%ebp
  418. addl %ebx,%edx
  419. adcl $0,%ebp
  420. L015sqrlast:
  421. movl 20(%esp),%edi
  422. movl 16(%esp),%esi
  423. imull 32(%esp),%edi
  424. addl 32(%esp,%ecx,4),%edx
  425. movl (%esi),%eax
  426. adcl $0,%ebp
  427. movl %edx,32(%esp,%ecx,4)
  428. movl %ebp,36(%esp,%ecx,4)
  429. mull %edi
  430. addl 32(%esp),%eax
  431. leal -1(%ecx),%ebx
  432. adcl $0,%edx
  433. movl $1,%ecx
  434. movl 4(%esi),%eax
  435. jmp L0143rdmadd
  436. .align 4,0x90
  437. L008common_tail:
  438. movl 16(%esp),%ebp
  439. movl 4(%esp),%edi
  440. leal 32(%esp),%esi
  441. movl (%esi),%eax
  442. movl %ebx,%ecx
  443. xorl %edx,%edx
  444. .align 4,0x90
  445. L017sub:
  446. sbbl (%ebp,%edx,4),%eax
  447. movl %eax,(%edi,%edx,4)
  448. decl %ecx
  449. movl 4(%esi,%edx,4),%eax
  450. leal 1(%edx),%edx
  451. jge L017sub
  452. sbbl $0,%eax
  453. movl $-1,%edx
  454. xorl %eax,%edx
  455. jmp L018copy
  456. .align 4,0x90
  457. L018copy:
  458. movl 32(%esp,%ebx,4),%esi
  459. movl (%edi,%ebx,4),%ebp
  460. movl %ecx,32(%esp,%ebx,4)
  461. andl %eax,%esi
  462. andl %edx,%ebp
  463. orl %esi,%ebp
  464. movl %ebp,(%edi,%ebx,4)
  465. decl %ebx
  466. jge L018copy
  467. movl 24(%esp),%esp
  468. movl $1,%eax
  469. L000just_leave:
  470. popl %edi
  471. popl %esi
  472. popl %ebx
  473. popl %ebp
  474. ret
  475. .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
  476. .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
  477. .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
  478. .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
  479. .byte 111,114,103,62,0
  480. .section __IMPORT,__pointers,non_lazy_symbol_pointers
  481. L_OPENSSL_ia32cap_P$non_lazy_ptr:
  482. .indirect_symbol _OPENSSL_ia32cap_P
  483. .long 0
  484. #endif