x86_64-mont5.asm 63 KB


  1. ; This file is generated from a similarly-named Perl script in the BoringSSL
  2. ; source tree. Do not edit by hand.
  3. default rel
  4. %define XMMWORD
  5. %define YMMWORD
  6. %define ZMMWORD
  7. %ifdef BORINGSSL_PREFIX
  8. %include "boringssl_prefix_symbols_nasm.inc"
  9. %endif
  10. section .text code align=64
  11. EXTERN OPENSSL_ia32cap_P
  12. global bn_mul_mont_gather5
  13. ALIGN 64
  14. bn_mul_mont_gather5:
  15. mov QWORD[8+rsp],rdi ;WIN64 prologue
  16. mov QWORD[16+rsp],rsi
  17. mov rax,rsp
  18. $L$SEH_begin_bn_mul_mont_gather5:
  19. mov rdi,rcx
  20. mov rsi,rdx
  21. mov rdx,r8
  22. mov rcx,r9
  23. mov r8,QWORD[40+rsp]
  24. mov r9,QWORD[48+rsp]
  25. mov r9d,r9d
  26. mov rax,rsp
  27. test r9d,7
  28. jnz NEAR $L$mul_enter
  29. lea r11,[OPENSSL_ia32cap_P]
  30. mov r11d,DWORD[8+r11]
  31. jmp NEAR $L$mul4x_enter
  32. ALIGN 16
  33. $L$mul_enter:
  34. movd xmm5,DWORD[56+rsp]
  35. push rbx
  36. push rbp
  37. push r12
  38. push r13
  39. push r14
  40. push r15
  41. neg r9
  42. mov r11,rsp
  43. lea r10,[((-280))+r9*8+rsp]
  44. neg r9
  45. and r10,-1024
  46. sub r11,r10
  47. and r11,-4096
  48. lea rsp,[r11*1+r10]
  49. mov r11,QWORD[rsp]
  50. cmp rsp,r10
  51. ja NEAR $L$mul_page_walk
  52. jmp NEAR $L$mul_page_walk_done
  53. $L$mul_page_walk:
  54. lea rsp,[((-4096))+rsp]
  55. mov r11,QWORD[rsp]
  56. cmp rsp,r10
  57. ja NEAR $L$mul_page_walk
  58. $L$mul_page_walk_done:
  59. lea r10,[$L$inc]
  60. mov QWORD[8+r9*8+rsp],rax
  61. $L$mul_body:
  62. lea r12,[128+rdx]
  63. movdqa xmm0,XMMWORD[r10]
  64. movdqa xmm1,XMMWORD[16+r10]
  65. lea r10,[((24-112))+r9*8+rsp]
  66. and r10,-16
  67. pshufd xmm5,xmm5,0
  68. movdqa xmm4,xmm1
  69. movdqa xmm2,xmm1
  70. paddd xmm1,xmm0
  71. pcmpeqd xmm0,xmm5
  72. DB 0x67
  73. movdqa xmm3,xmm4
  74. paddd xmm2,xmm1
  75. pcmpeqd xmm1,xmm5
  76. movdqa XMMWORD[112+r10],xmm0
  77. movdqa xmm0,xmm4
  78. paddd xmm3,xmm2
  79. pcmpeqd xmm2,xmm5
  80. movdqa XMMWORD[128+r10],xmm1
  81. movdqa xmm1,xmm4
  82. paddd xmm0,xmm3
  83. pcmpeqd xmm3,xmm5
  84. movdqa XMMWORD[144+r10],xmm2
  85. movdqa xmm2,xmm4
  86. paddd xmm1,xmm0
  87. pcmpeqd xmm0,xmm5
  88. movdqa XMMWORD[160+r10],xmm3
  89. movdqa xmm3,xmm4
  90. paddd xmm2,xmm1
  91. pcmpeqd xmm1,xmm5
  92. movdqa XMMWORD[176+r10],xmm0
  93. movdqa xmm0,xmm4
  94. paddd xmm3,xmm2
  95. pcmpeqd xmm2,xmm5
  96. movdqa XMMWORD[192+r10],xmm1
  97. movdqa xmm1,xmm4
  98. paddd xmm0,xmm3
  99. pcmpeqd xmm3,xmm5
  100. movdqa XMMWORD[208+r10],xmm2
  101. movdqa xmm2,xmm4
  102. paddd xmm1,xmm0
  103. pcmpeqd xmm0,xmm5
  104. movdqa XMMWORD[224+r10],xmm3
  105. movdqa xmm3,xmm4
  106. paddd xmm2,xmm1
  107. pcmpeqd xmm1,xmm5
  108. movdqa XMMWORD[240+r10],xmm0
  109. movdqa xmm0,xmm4
  110. paddd xmm3,xmm2
  111. pcmpeqd xmm2,xmm5
  112. movdqa XMMWORD[256+r10],xmm1
  113. movdqa xmm1,xmm4
  114. paddd xmm0,xmm3
  115. pcmpeqd xmm3,xmm5
  116. movdqa XMMWORD[272+r10],xmm2
  117. movdqa xmm2,xmm4
  118. paddd xmm1,xmm0
  119. pcmpeqd xmm0,xmm5
  120. movdqa XMMWORD[288+r10],xmm3
  121. movdqa xmm3,xmm4
  122. paddd xmm2,xmm1
  123. pcmpeqd xmm1,xmm5
  124. movdqa XMMWORD[304+r10],xmm0
  125. paddd xmm3,xmm2
  126. DB 0x67
  127. pcmpeqd xmm2,xmm5
  128. movdqa XMMWORD[320+r10],xmm1
  129. pcmpeqd xmm3,xmm5
  130. movdqa XMMWORD[336+r10],xmm2
  131. pand xmm0,XMMWORD[64+r12]
  132. pand xmm1,XMMWORD[80+r12]
  133. pand xmm2,XMMWORD[96+r12]
  134. movdqa XMMWORD[352+r10],xmm3
  135. pand xmm3,XMMWORD[112+r12]
  136. por xmm0,xmm2
  137. por xmm1,xmm3
  138. movdqa xmm4,XMMWORD[((-128))+r12]
  139. movdqa xmm5,XMMWORD[((-112))+r12]
  140. movdqa xmm2,XMMWORD[((-96))+r12]
  141. pand xmm4,XMMWORD[112+r10]
  142. movdqa xmm3,XMMWORD[((-80))+r12]
  143. pand xmm5,XMMWORD[128+r10]
  144. por xmm0,xmm4
  145. pand xmm2,XMMWORD[144+r10]
  146. por xmm1,xmm5
  147. pand xmm3,XMMWORD[160+r10]
  148. por xmm0,xmm2
  149. por xmm1,xmm3
  150. movdqa xmm4,XMMWORD[((-64))+r12]
  151. movdqa xmm5,XMMWORD[((-48))+r12]
  152. movdqa xmm2,XMMWORD[((-32))+r12]
  153. pand xmm4,XMMWORD[176+r10]
  154. movdqa xmm3,XMMWORD[((-16))+r12]
  155. pand xmm5,XMMWORD[192+r10]
  156. por xmm0,xmm4
  157. pand xmm2,XMMWORD[208+r10]
  158. por xmm1,xmm5
  159. pand xmm3,XMMWORD[224+r10]
  160. por xmm0,xmm2
  161. por xmm1,xmm3
  162. movdqa xmm4,XMMWORD[r12]
  163. movdqa xmm5,XMMWORD[16+r12]
  164. movdqa xmm2,XMMWORD[32+r12]
  165. pand xmm4,XMMWORD[240+r10]
  166. movdqa xmm3,XMMWORD[48+r12]
  167. pand xmm5,XMMWORD[256+r10]
  168. por xmm0,xmm4
  169. pand xmm2,XMMWORD[272+r10]
  170. por xmm1,xmm5
  171. pand xmm3,XMMWORD[288+r10]
  172. por xmm0,xmm2
  173. por xmm1,xmm3
  174. por xmm0,xmm1
  175. pshufd xmm1,xmm0,0x4e
  176. por xmm0,xmm1
  177. lea r12,[256+r12]
  178. DB 102,72,15,126,195
  179. mov r8,QWORD[r8]
  180. mov rax,QWORD[rsi]
  181. xor r14,r14
  182. xor r15,r15
  183. mov rbp,r8
  184. mul rbx
  185. mov r10,rax
  186. mov rax,QWORD[rcx]
  187. imul rbp,r10
  188. mov r11,rdx
  189. mul rbp
  190. add r10,rax
  191. mov rax,QWORD[8+rsi]
  192. adc rdx,0
  193. mov r13,rdx
  194. lea r15,[1+r15]
  195. jmp NEAR $L$1st_enter
  196. ALIGN 16
  197. $L$1st:
  198. add r13,rax
  199. mov rax,QWORD[r15*8+rsi]
  200. adc rdx,0
  201. add r13,r11
  202. mov r11,r10
  203. adc rdx,0
  204. mov QWORD[((-16))+r15*8+rsp],r13
  205. mov r13,rdx
  206. $L$1st_enter:
  207. mul rbx
  208. add r11,rax
  209. mov rax,QWORD[r15*8+rcx]
  210. adc rdx,0
  211. lea r15,[1+r15]
  212. mov r10,rdx
  213. mul rbp
  214. cmp r15,r9
  215. jne NEAR $L$1st
  216. add r13,rax
  217. adc rdx,0
  218. add r13,r11
  219. adc rdx,0
  220. mov QWORD[((-16))+r9*8+rsp],r13
  221. mov r13,rdx
  222. mov r11,r10
  223. xor rdx,rdx
  224. add r13,r11
  225. adc rdx,0
  226. mov QWORD[((-8))+r9*8+rsp],r13
  227. mov QWORD[r9*8+rsp],rdx
  228. lea r14,[1+r14]
  229. jmp NEAR $L$outer
  230. ALIGN 16
  231. $L$outer:
  232. lea rdx,[((24+128))+r9*8+rsp]
  233. and rdx,-16
  234. pxor xmm4,xmm4
  235. pxor xmm5,xmm5
  236. movdqa xmm0,XMMWORD[((-128))+r12]
  237. movdqa xmm1,XMMWORD[((-112))+r12]
  238. movdqa xmm2,XMMWORD[((-96))+r12]
  239. movdqa xmm3,XMMWORD[((-80))+r12]
  240. pand xmm0,XMMWORD[((-128))+rdx]
  241. pand xmm1,XMMWORD[((-112))+rdx]
  242. por xmm4,xmm0
  243. pand xmm2,XMMWORD[((-96))+rdx]
  244. por xmm5,xmm1
  245. pand xmm3,XMMWORD[((-80))+rdx]
  246. por xmm4,xmm2
  247. por xmm5,xmm3
  248. movdqa xmm0,XMMWORD[((-64))+r12]
  249. movdqa xmm1,XMMWORD[((-48))+r12]
  250. movdqa xmm2,XMMWORD[((-32))+r12]
  251. movdqa xmm3,XMMWORD[((-16))+r12]
  252. pand xmm0,XMMWORD[((-64))+rdx]
  253. pand xmm1,XMMWORD[((-48))+rdx]
  254. por xmm4,xmm0
  255. pand xmm2,XMMWORD[((-32))+rdx]
  256. por xmm5,xmm1
  257. pand xmm3,XMMWORD[((-16))+rdx]
  258. por xmm4,xmm2
  259. por xmm5,xmm3
  260. movdqa xmm0,XMMWORD[r12]
  261. movdqa xmm1,XMMWORD[16+r12]
  262. movdqa xmm2,XMMWORD[32+r12]
  263. movdqa xmm3,XMMWORD[48+r12]
  264. pand xmm0,XMMWORD[rdx]
  265. pand xmm1,XMMWORD[16+rdx]
  266. por xmm4,xmm0
  267. pand xmm2,XMMWORD[32+rdx]
  268. por xmm5,xmm1
  269. pand xmm3,XMMWORD[48+rdx]
  270. por xmm4,xmm2
  271. por xmm5,xmm3
  272. movdqa xmm0,XMMWORD[64+r12]
  273. movdqa xmm1,XMMWORD[80+r12]
  274. movdqa xmm2,XMMWORD[96+r12]
  275. movdqa xmm3,XMMWORD[112+r12]
  276. pand xmm0,XMMWORD[64+rdx]
  277. pand xmm1,XMMWORD[80+rdx]
  278. por xmm4,xmm0
  279. pand xmm2,XMMWORD[96+rdx]
  280. por xmm5,xmm1
  281. pand xmm3,XMMWORD[112+rdx]
  282. por xmm4,xmm2
  283. por xmm5,xmm3
  284. por xmm4,xmm5
  285. pshufd xmm0,xmm4,0x4e
  286. por xmm0,xmm4
  287. lea r12,[256+r12]
  288. mov rax,QWORD[rsi]
  289. DB 102,72,15,126,195
  290. xor r15,r15
  291. mov rbp,r8
  292. mov r10,QWORD[rsp]
  293. mul rbx
  294. add r10,rax
  295. mov rax,QWORD[rcx]
  296. adc rdx,0
  297. imul rbp,r10
  298. mov r11,rdx
  299. mul rbp
  300. add r10,rax
  301. mov rax,QWORD[8+rsi]
  302. adc rdx,0
  303. mov r10,QWORD[8+rsp]
  304. mov r13,rdx
  305. lea r15,[1+r15]
  306. jmp NEAR $L$inner_enter
  307. ALIGN 16
  308. $L$inner:
  309. add r13,rax
  310. mov rax,QWORD[r15*8+rsi]
  311. adc rdx,0
  312. add r13,r10
  313. mov r10,QWORD[r15*8+rsp]
  314. adc rdx,0
  315. mov QWORD[((-16))+r15*8+rsp],r13
  316. mov r13,rdx
  317. $L$inner_enter:
  318. mul rbx
  319. add r11,rax
  320. mov rax,QWORD[r15*8+rcx]
  321. adc rdx,0
  322. add r10,r11
  323. mov r11,rdx
  324. adc r11,0
  325. lea r15,[1+r15]
  326. mul rbp
  327. cmp r15,r9
  328. jne NEAR $L$inner
  329. add r13,rax
  330. adc rdx,0
  331. add r13,r10
  332. mov r10,QWORD[r9*8+rsp]
  333. adc rdx,0
  334. mov QWORD[((-16))+r9*8+rsp],r13
  335. mov r13,rdx
  336. xor rdx,rdx
  337. add r13,r11
  338. adc rdx,0
  339. add r13,r10
  340. adc rdx,0
  341. mov QWORD[((-8))+r9*8+rsp],r13
  342. mov QWORD[r9*8+rsp],rdx
  343. lea r14,[1+r14]
  344. cmp r14,r9
  345. jb NEAR $L$outer
  346. xor r14,r14
  347. mov rax,QWORD[rsp]
  348. lea rsi,[rsp]
  349. mov r15,r9
  350. jmp NEAR $L$sub
  351. ALIGN 16
  352. $L$sub: sbb rax,QWORD[r14*8+rcx]
  353. mov QWORD[r14*8+rdi],rax
  354. mov rax,QWORD[8+r14*8+rsi]
  355. lea r14,[1+r14]
  356. dec r15
  357. jnz NEAR $L$sub
  358. sbb rax,0
  359. mov rbx,-1
  360. xor rbx,rax
  361. xor r14,r14
  362. mov r15,r9
  363. $L$copy:
  364. mov rcx,QWORD[r14*8+rdi]
  365. mov rdx,QWORD[r14*8+rsp]
  366. and rcx,rbx
  367. and rdx,rax
  368. mov QWORD[r14*8+rsp],r14
  369. or rdx,rcx
  370. mov QWORD[r14*8+rdi],rdx
  371. lea r14,[1+r14]
  372. sub r15,1
  373. jnz NEAR $L$copy
  374. mov rsi,QWORD[8+r9*8+rsp]
  375. mov rax,1
  376. mov r15,QWORD[((-48))+rsi]
  377. mov r14,QWORD[((-40))+rsi]
  378. mov r13,QWORD[((-32))+rsi]
  379. mov r12,QWORD[((-24))+rsi]
  380. mov rbp,QWORD[((-16))+rsi]
  381. mov rbx,QWORD[((-8))+rsi]
  382. lea rsp,[rsi]
  383. $L$mul_epilogue:
  384. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  385. mov rsi,QWORD[16+rsp]
  386. DB 0F3h,0C3h ;repret
  387. $L$SEH_end_bn_mul_mont_gather5:
  388. ALIGN 32
  389. bn_mul4x_mont_gather5:
  390. mov QWORD[8+rsp],rdi ;WIN64 prologue
  391. mov QWORD[16+rsp],rsi
  392. mov rax,rsp
  393. $L$SEH_begin_bn_mul4x_mont_gather5:
  394. mov rdi,rcx
  395. mov rsi,rdx
  396. mov rdx,r8
  397. mov rcx,r9
  398. mov r8,QWORD[40+rsp]
  399. mov r9,QWORD[48+rsp]
  400. DB 0x67
  401. mov rax,rsp
  402. $L$mul4x_enter:
  403. and r11d,0x80108
  404. cmp r11d,0x80108
  405. je NEAR $L$mulx4x_enter
  406. push rbx
  407. push rbp
  408. push r12
  409. push r13
  410. push r14
  411. push r15
  412. $L$mul4x_prologue:
  413. DB 0x67
  414. shl r9d,3
  415. lea r10,[r9*2+r9]
  416. neg r9
  417. lea r11,[((-320))+r9*2+rsp]
  418. mov rbp,rsp
  419. sub r11,rdi
  420. and r11,4095
  421. cmp r10,r11
  422. jb NEAR $L$mul4xsp_alt
  423. sub rbp,r11
  424. lea rbp,[((-320))+r9*2+rbp]
  425. jmp NEAR $L$mul4xsp_done
  426. ALIGN 32
  427. $L$mul4xsp_alt:
  428. lea r10,[((4096-320))+r9*2]
  429. lea rbp,[((-320))+r9*2+rbp]
  430. sub r11,r10
  431. mov r10,0
  432. cmovc r11,r10
  433. sub rbp,r11
  434. $L$mul4xsp_done:
  435. and rbp,-64
  436. mov r11,rsp
  437. sub r11,rbp
  438. and r11,-4096
  439. lea rsp,[rbp*1+r11]
  440. mov r10,QWORD[rsp]
  441. cmp rsp,rbp
  442. ja NEAR $L$mul4x_page_walk
  443. jmp NEAR $L$mul4x_page_walk_done
  444. $L$mul4x_page_walk:
  445. lea rsp,[((-4096))+rsp]
  446. mov r10,QWORD[rsp]
  447. cmp rsp,rbp
  448. ja NEAR $L$mul4x_page_walk
  449. $L$mul4x_page_walk_done:
  450. neg r9
  451. mov QWORD[40+rsp],rax
  452. $L$mul4x_body:
  453. call mul4x_internal
  454. mov rsi,QWORD[40+rsp]
  455. mov rax,1
  456. mov r15,QWORD[((-48))+rsi]
  457. mov r14,QWORD[((-40))+rsi]
  458. mov r13,QWORD[((-32))+rsi]
  459. mov r12,QWORD[((-24))+rsi]
  460. mov rbp,QWORD[((-16))+rsi]
  461. mov rbx,QWORD[((-8))+rsi]
  462. lea rsp,[rsi]
  463. $L$mul4x_epilogue:
  464. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  465. mov rsi,QWORD[16+rsp]
  466. DB 0F3h,0C3h ;repret
  467. $L$SEH_end_bn_mul4x_mont_gather5:
  468. ALIGN 32
  469. mul4x_internal:
  470. shl r9,5
  471. movd xmm5,DWORD[56+rax]
  472. lea rax,[$L$inc]
  473. lea r13,[128+r9*1+rdx]
  474. shr r9,5
  475. movdqa xmm0,XMMWORD[rax]
  476. movdqa xmm1,XMMWORD[16+rax]
  477. lea r10,[((88-112))+r9*1+rsp]
  478. lea r12,[128+rdx]
  479. pshufd xmm5,xmm5,0
  480. movdqa xmm4,xmm1
  481. DB 0x67,0x67
  482. movdqa xmm2,xmm1
  483. paddd xmm1,xmm0
  484. pcmpeqd xmm0,xmm5
  485. DB 0x67
  486. movdqa xmm3,xmm4
  487. paddd xmm2,xmm1
  488. pcmpeqd xmm1,xmm5
  489. movdqa XMMWORD[112+r10],xmm0
  490. movdqa xmm0,xmm4
  491. paddd xmm3,xmm2
  492. pcmpeqd xmm2,xmm5
  493. movdqa XMMWORD[128+r10],xmm1
  494. movdqa xmm1,xmm4
  495. paddd xmm0,xmm3
  496. pcmpeqd xmm3,xmm5
  497. movdqa XMMWORD[144+r10],xmm2
  498. movdqa xmm2,xmm4
  499. paddd xmm1,xmm0
  500. pcmpeqd xmm0,xmm5
  501. movdqa XMMWORD[160+r10],xmm3
  502. movdqa xmm3,xmm4
  503. paddd xmm2,xmm1
  504. pcmpeqd xmm1,xmm5
  505. movdqa XMMWORD[176+r10],xmm0
  506. movdqa xmm0,xmm4
  507. paddd xmm3,xmm2
  508. pcmpeqd xmm2,xmm5
  509. movdqa XMMWORD[192+r10],xmm1
  510. movdqa xmm1,xmm4
  511. paddd xmm0,xmm3
  512. pcmpeqd xmm3,xmm5
  513. movdqa XMMWORD[208+r10],xmm2
  514. movdqa xmm2,xmm4
  515. paddd xmm1,xmm0
  516. pcmpeqd xmm0,xmm5
  517. movdqa XMMWORD[224+r10],xmm3
  518. movdqa xmm3,xmm4
  519. paddd xmm2,xmm1
  520. pcmpeqd xmm1,xmm5
  521. movdqa XMMWORD[240+r10],xmm0
  522. movdqa xmm0,xmm4
  523. paddd xmm3,xmm2
  524. pcmpeqd xmm2,xmm5
  525. movdqa XMMWORD[256+r10],xmm1
  526. movdqa xmm1,xmm4
  527. paddd xmm0,xmm3
  528. pcmpeqd xmm3,xmm5
  529. movdqa XMMWORD[272+r10],xmm2
  530. movdqa xmm2,xmm4
  531. paddd xmm1,xmm0
  532. pcmpeqd xmm0,xmm5
  533. movdqa XMMWORD[288+r10],xmm3
  534. movdqa xmm3,xmm4
  535. paddd xmm2,xmm1
  536. pcmpeqd xmm1,xmm5
  537. movdqa XMMWORD[304+r10],xmm0
  538. paddd xmm3,xmm2
  539. DB 0x67
  540. pcmpeqd xmm2,xmm5
  541. movdqa XMMWORD[320+r10],xmm1
  542. pcmpeqd xmm3,xmm5
  543. movdqa XMMWORD[336+r10],xmm2
  544. pand xmm0,XMMWORD[64+r12]
  545. pand xmm1,XMMWORD[80+r12]
  546. pand xmm2,XMMWORD[96+r12]
  547. movdqa XMMWORD[352+r10],xmm3
  548. pand xmm3,XMMWORD[112+r12]
  549. por xmm0,xmm2
  550. por xmm1,xmm3
  551. movdqa xmm4,XMMWORD[((-128))+r12]
  552. movdqa xmm5,XMMWORD[((-112))+r12]
  553. movdqa xmm2,XMMWORD[((-96))+r12]
  554. pand xmm4,XMMWORD[112+r10]
  555. movdqa xmm3,XMMWORD[((-80))+r12]
  556. pand xmm5,XMMWORD[128+r10]
  557. por xmm0,xmm4
  558. pand xmm2,XMMWORD[144+r10]
  559. por xmm1,xmm5
  560. pand xmm3,XMMWORD[160+r10]
  561. por xmm0,xmm2
  562. por xmm1,xmm3
  563. movdqa xmm4,XMMWORD[((-64))+r12]
  564. movdqa xmm5,XMMWORD[((-48))+r12]
  565. movdqa xmm2,XMMWORD[((-32))+r12]
  566. pand xmm4,XMMWORD[176+r10]
  567. movdqa xmm3,XMMWORD[((-16))+r12]
  568. pand xmm5,XMMWORD[192+r10]
  569. por xmm0,xmm4
  570. pand xmm2,XMMWORD[208+r10]
  571. por xmm1,xmm5
  572. pand xmm3,XMMWORD[224+r10]
  573. por xmm0,xmm2
  574. por xmm1,xmm3
  575. movdqa xmm4,XMMWORD[r12]
  576. movdqa xmm5,XMMWORD[16+r12]
  577. movdqa xmm2,XMMWORD[32+r12]
  578. pand xmm4,XMMWORD[240+r10]
  579. movdqa xmm3,XMMWORD[48+r12]
  580. pand xmm5,XMMWORD[256+r10]
  581. por xmm0,xmm4
  582. pand xmm2,XMMWORD[272+r10]
  583. por xmm1,xmm5
  584. pand xmm3,XMMWORD[288+r10]
  585. por xmm0,xmm2
  586. por xmm1,xmm3
  587. por xmm0,xmm1
  588. pshufd xmm1,xmm0,0x4e
  589. por xmm0,xmm1
  590. lea r12,[256+r12]
  591. DB 102,72,15,126,195
  592. mov QWORD[((16+8))+rsp],r13
  593. mov QWORD[((56+8))+rsp],rdi
  594. mov r8,QWORD[r8]
  595. mov rax,QWORD[rsi]
  596. lea rsi,[r9*1+rsi]
  597. neg r9
  598. mov rbp,r8
  599. mul rbx
  600. mov r10,rax
  601. mov rax,QWORD[rcx]
  602. imul rbp,r10
  603. lea r14,[((64+8))+rsp]
  604. mov r11,rdx
  605. mul rbp
  606. add r10,rax
  607. mov rax,QWORD[8+r9*1+rsi]
  608. adc rdx,0
  609. mov rdi,rdx
  610. mul rbx
  611. add r11,rax
  612. mov rax,QWORD[8+rcx]
  613. adc rdx,0
  614. mov r10,rdx
  615. mul rbp
  616. add rdi,rax
  617. mov rax,QWORD[16+r9*1+rsi]
  618. adc rdx,0
  619. add rdi,r11
  620. lea r15,[32+r9]
  621. lea rcx,[32+rcx]
  622. adc rdx,0
  623. mov QWORD[r14],rdi
  624. mov r13,rdx
  625. jmp NEAR $L$1st4x
  626. ALIGN 32
  627. $L$1st4x:
  628. mul rbx
  629. add r10,rax
  630. mov rax,QWORD[((-16))+rcx]
  631. lea r14,[32+r14]
  632. adc rdx,0
  633. mov r11,rdx
  634. mul rbp
  635. add r13,rax
  636. mov rax,QWORD[((-8))+r15*1+rsi]
  637. adc rdx,0
  638. add r13,r10
  639. adc rdx,0
  640. mov QWORD[((-24))+r14],r13
  641. mov rdi,rdx
  642. mul rbx
  643. add r11,rax
  644. mov rax,QWORD[((-8))+rcx]
  645. adc rdx,0
  646. mov r10,rdx
  647. mul rbp
  648. add rdi,rax
  649. mov rax,QWORD[r15*1+rsi]
  650. adc rdx,0
  651. add rdi,r11
  652. adc rdx,0
  653. mov QWORD[((-16))+r14],rdi
  654. mov r13,rdx
  655. mul rbx
  656. add r10,rax
  657. mov rax,QWORD[rcx]
  658. adc rdx,0
  659. mov r11,rdx
  660. mul rbp
  661. add r13,rax
  662. mov rax,QWORD[8+r15*1+rsi]
  663. adc rdx,0
  664. add r13,r10
  665. adc rdx,0
  666. mov QWORD[((-8))+r14],r13
  667. mov rdi,rdx
  668. mul rbx
  669. add r11,rax
  670. mov rax,QWORD[8+rcx]
  671. adc rdx,0
  672. mov r10,rdx
  673. mul rbp
  674. add rdi,rax
  675. mov rax,QWORD[16+r15*1+rsi]
  676. adc rdx,0
  677. add rdi,r11
  678. lea rcx,[32+rcx]
  679. adc rdx,0
  680. mov QWORD[r14],rdi
  681. mov r13,rdx
  682. add r15,32
  683. jnz NEAR $L$1st4x
  684. mul rbx
  685. add r10,rax
  686. mov rax,QWORD[((-16))+rcx]
  687. lea r14,[32+r14]
  688. adc rdx,0
  689. mov r11,rdx
  690. mul rbp
  691. add r13,rax
  692. mov rax,QWORD[((-8))+rsi]
  693. adc rdx,0
  694. add r13,r10
  695. adc rdx,0
  696. mov QWORD[((-24))+r14],r13
  697. mov rdi,rdx
  698. mul rbx
  699. add r11,rax
  700. mov rax,QWORD[((-8))+rcx]
  701. adc rdx,0
  702. mov r10,rdx
  703. mul rbp
  704. add rdi,rax
  705. mov rax,QWORD[r9*1+rsi]
  706. adc rdx,0
  707. add rdi,r11
  708. adc rdx,0
  709. mov QWORD[((-16))+r14],rdi
  710. mov r13,rdx
  711. lea rcx,[r9*1+rcx]
  712. xor rdi,rdi
  713. add r13,r10
  714. adc rdi,0
  715. mov QWORD[((-8))+r14],r13
  716. jmp NEAR $L$outer4x
  717. ALIGN 32
  718. $L$outer4x:
  719. lea rdx,[((16+128))+r14]
  720. pxor xmm4,xmm4
  721. pxor xmm5,xmm5
  722. movdqa xmm0,XMMWORD[((-128))+r12]
  723. movdqa xmm1,XMMWORD[((-112))+r12]
  724. movdqa xmm2,XMMWORD[((-96))+r12]
  725. movdqa xmm3,XMMWORD[((-80))+r12]
  726. pand xmm0,XMMWORD[((-128))+rdx]
  727. pand xmm1,XMMWORD[((-112))+rdx]
  728. por xmm4,xmm0
  729. pand xmm2,XMMWORD[((-96))+rdx]
  730. por xmm5,xmm1
  731. pand xmm3,XMMWORD[((-80))+rdx]
  732. por xmm4,xmm2
  733. por xmm5,xmm3
  734. movdqa xmm0,XMMWORD[((-64))+r12]
  735. movdqa xmm1,XMMWORD[((-48))+r12]
  736. movdqa xmm2,XMMWORD[((-32))+r12]
  737. movdqa xmm3,XMMWORD[((-16))+r12]
  738. pand xmm0,XMMWORD[((-64))+rdx]
  739. pand xmm1,XMMWORD[((-48))+rdx]
  740. por xmm4,xmm0
  741. pand xmm2,XMMWORD[((-32))+rdx]
  742. por xmm5,xmm1
  743. pand xmm3,XMMWORD[((-16))+rdx]
  744. por xmm4,xmm2
  745. por xmm5,xmm3
  746. movdqa xmm0,XMMWORD[r12]
  747. movdqa xmm1,XMMWORD[16+r12]
  748. movdqa xmm2,XMMWORD[32+r12]
  749. movdqa xmm3,XMMWORD[48+r12]
  750. pand xmm0,XMMWORD[rdx]
  751. pand xmm1,XMMWORD[16+rdx]
  752. por xmm4,xmm0
  753. pand xmm2,XMMWORD[32+rdx]
  754. por xmm5,xmm1
  755. pand xmm3,XMMWORD[48+rdx]
  756. por xmm4,xmm2
  757. por xmm5,xmm3
  758. movdqa xmm0,XMMWORD[64+r12]
  759. movdqa xmm1,XMMWORD[80+r12]
  760. movdqa xmm2,XMMWORD[96+r12]
  761. movdqa xmm3,XMMWORD[112+r12]
  762. pand xmm0,XMMWORD[64+rdx]
  763. pand xmm1,XMMWORD[80+rdx]
  764. por xmm4,xmm0
  765. pand xmm2,XMMWORD[96+rdx]
  766. por xmm5,xmm1
  767. pand xmm3,XMMWORD[112+rdx]
  768. por xmm4,xmm2
  769. por xmm5,xmm3
  770. por xmm4,xmm5
  771. pshufd xmm0,xmm4,0x4e
  772. por xmm0,xmm4
  773. lea r12,[256+r12]
  774. DB 102,72,15,126,195
  775. mov r10,QWORD[r9*1+r14]
  776. mov rbp,r8
  777. mul rbx
  778. add r10,rax
  779. mov rax,QWORD[rcx]
  780. adc rdx,0
  781. imul rbp,r10
  782. mov r11,rdx
  783. mov QWORD[r14],rdi
  784. lea r14,[r9*1+r14]
  785. mul rbp
  786. add r10,rax
  787. mov rax,QWORD[8+r9*1+rsi]
  788. adc rdx,0
  789. mov rdi,rdx
  790. mul rbx
  791. add r11,rax
  792. mov rax,QWORD[8+rcx]
  793. adc rdx,0
  794. add r11,QWORD[8+r14]
  795. adc rdx,0
  796. mov r10,rdx
  797. mul rbp
  798. add rdi,rax
  799. mov rax,QWORD[16+r9*1+rsi]
  800. adc rdx,0
  801. add rdi,r11
  802. lea r15,[32+r9]
  803. lea rcx,[32+rcx]
  804. adc rdx,0
  805. mov r13,rdx
  806. jmp NEAR $L$inner4x
  807. ALIGN 32
  808. $L$inner4x:
  809. mul rbx
  810. add r10,rax
  811. mov rax,QWORD[((-16))+rcx]
  812. adc rdx,0
  813. add r10,QWORD[16+r14]
  814. lea r14,[32+r14]
  815. adc rdx,0
  816. mov r11,rdx
  817. mul rbp
  818. add r13,rax
  819. mov rax,QWORD[((-8))+r15*1+rsi]
  820. adc rdx,0
  821. add r13,r10
  822. adc rdx,0
  823. mov QWORD[((-32))+r14],rdi
  824. mov rdi,rdx
  825. mul rbx
  826. add r11,rax
  827. mov rax,QWORD[((-8))+rcx]
  828. adc rdx,0
  829. add r11,QWORD[((-8))+r14]
  830. adc rdx,0
  831. mov r10,rdx
  832. mul rbp
  833. add rdi,rax
  834. mov rax,QWORD[r15*1+rsi]
  835. adc rdx,0
  836. add rdi,r11
  837. adc rdx,0
  838. mov QWORD[((-24))+r14],r13
  839. mov r13,rdx
  840. mul rbx
  841. add r10,rax
  842. mov rax,QWORD[rcx]
  843. adc rdx,0
  844. add r10,QWORD[r14]
  845. adc rdx,0
  846. mov r11,rdx
  847. mul rbp
  848. add r13,rax
  849. mov rax,QWORD[8+r15*1+rsi]
  850. adc rdx,0
  851. add r13,r10
  852. adc rdx,0
  853. mov QWORD[((-16))+r14],rdi
  854. mov rdi,rdx
  855. mul rbx
  856. add r11,rax
  857. mov rax,QWORD[8+rcx]
  858. adc rdx,0
  859. add r11,QWORD[8+r14]
  860. adc rdx,0
  861. mov r10,rdx
  862. mul rbp
  863. add rdi,rax
  864. mov rax,QWORD[16+r15*1+rsi]
  865. adc rdx,0
  866. add rdi,r11
  867. lea rcx,[32+rcx]
  868. adc rdx,0
  869. mov QWORD[((-8))+r14],r13
  870. mov r13,rdx
  871. add r15,32
  872. jnz NEAR $L$inner4x
  873. mul rbx
  874. add r10,rax
  875. mov rax,QWORD[((-16))+rcx]
  876. adc rdx,0
  877. add r10,QWORD[16+r14]
  878. lea r14,[32+r14]
  879. adc rdx,0
  880. mov r11,rdx
  881. mul rbp
  882. add r13,rax
  883. mov rax,QWORD[((-8))+rsi]
  884. adc rdx,0
  885. add r13,r10
  886. adc rdx,0
  887. mov QWORD[((-32))+r14],rdi
  888. mov rdi,rdx
  889. mul rbx
  890. add r11,rax
  891. mov rax,rbp
  892. mov rbp,QWORD[((-8))+rcx]
  893. adc rdx,0
  894. add r11,QWORD[((-8))+r14]
  895. adc rdx,0
  896. mov r10,rdx
  897. mul rbp
  898. add rdi,rax
  899. mov rax,QWORD[r9*1+rsi]
  900. adc rdx,0
  901. add rdi,r11
  902. adc rdx,0
  903. mov QWORD[((-24))+r14],r13
  904. mov r13,rdx
  905. mov QWORD[((-16))+r14],rdi
  906. lea rcx,[r9*1+rcx]
  907. xor rdi,rdi
  908. add r13,r10
  909. adc rdi,0
  910. add r13,QWORD[r14]
  911. adc rdi,0
  912. mov QWORD[((-8))+r14],r13
  913. cmp r12,QWORD[((16+8))+rsp]
  914. jb NEAR $L$outer4x
  915. xor rax,rax
  916. sub rbp,r13
  917. adc r15,r15
  918. or rdi,r15
  919. sub rax,rdi
  920. lea rbx,[r9*1+r14]
  921. mov r12,QWORD[rcx]
  922. lea rbp,[rcx]
  923. mov rcx,r9
  924. sar rcx,3+2
  925. mov rdi,QWORD[((56+8))+rsp]
  926. dec r12
  927. xor r10,r10
  928. mov r13,QWORD[8+rbp]
  929. mov r14,QWORD[16+rbp]
  930. mov r15,QWORD[24+rbp]
  931. jmp NEAR $L$sqr4x_sub_entry
  932. global bn_power5
  933. ALIGN 32
  934. bn_power5:
  935. mov QWORD[8+rsp],rdi ;WIN64 prologue
  936. mov QWORD[16+rsp],rsi
  937. mov rax,rsp
  938. $L$SEH_begin_bn_power5:
  939. mov rdi,rcx
  940. mov rsi,rdx
  941. mov rdx,r8
  942. mov rcx,r9
  943. mov r8,QWORD[40+rsp]
  944. mov r9,QWORD[48+rsp]
  945. mov rax,rsp
  946. lea r11,[OPENSSL_ia32cap_P]
  947. mov r11d,DWORD[8+r11]
  948. and r11d,0x80108
  949. cmp r11d,0x80108
  950. je NEAR $L$powerx5_enter
  951. push rbx
  952. push rbp
  953. push r12
  954. push r13
  955. push r14
  956. push r15
  957. $L$power5_prologue:
  958. shl r9d,3
  959. lea r10d,[r9*2+r9]
  960. neg r9
  961. mov r8,QWORD[r8]
  962. lea r11,[((-320))+r9*2+rsp]
  963. mov rbp,rsp
  964. sub r11,rdi
  965. and r11,4095
  966. cmp r10,r11
  967. jb NEAR $L$pwr_sp_alt
  968. sub rbp,r11
  969. lea rbp,[((-320))+r9*2+rbp]
  970. jmp NEAR $L$pwr_sp_done
  971. ALIGN 32
  972. $L$pwr_sp_alt:
  973. lea r10,[((4096-320))+r9*2]
  974. lea rbp,[((-320))+r9*2+rbp]
  975. sub r11,r10
  976. mov r10,0
  977. cmovc r11,r10
  978. sub rbp,r11
  979. $L$pwr_sp_done:
  980. and rbp,-64
  981. mov r11,rsp
  982. sub r11,rbp
  983. and r11,-4096
  984. lea rsp,[rbp*1+r11]
  985. mov r10,QWORD[rsp]
  986. cmp rsp,rbp
  987. ja NEAR $L$pwr_page_walk
  988. jmp NEAR $L$pwr_page_walk_done
  989. $L$pwr_page_walk:
  990. lea rsp,[((-4096))+rsp]
  991. mov r10,QWORD[rsp]
  992. cmp rsp,rbp
  993. ja NEAR $L$pwr_page_walk
  994. $L$pwr_page_walk_done:
  995. mov r10,r9
  996. neg r9
  997. mov QWORD[32+rsp],r8
  998. mov QWORD[40+rsp],rax
  999. $L$power5_body:
  1000. DB 102,72,15,110,207
  1001. DB 102,72,15,110,209
  1002. DB 102,73,15,110,218
  1003. DB 102,72,15,110,226
  1004. call __bn_sqr8x_internal
  1005. call __bn_post4x_internal
  1006. call __bn_sqr8x_internal
  1007. call __bn_post4x_internal
  1008. call __bn_sqr8x_internal
  1009. call __bn_post4x_internal
  1010. call __bn_sqr8x_internal
  1011. call __bn_post4x_internal
  1012. call __bn_sqr8x_internal
  1013. call __bn_post4x_internal
  1014. DB 102,72,15,126,209
  1015. DB 102,72,15,126,226
  1016. mov rdi,rsi
  1017. mov rax,QWORD[40+rsp]
  1018. lea r8,[32+rsp]
  1019. call mul4x_internal
  1020. mov rsi,QWORD[40+rsp]
  1021. mov rax,1
  1022. mov r15,QWORD[((-48))+rsi]
  1023. mov r14,QWORD[((-40))+rsi]
  1024. mov r13,QWORD[((-32))+rsi]
  1025. mov r12,QWORD[((-24))+rsi]
  1026. mov rbp,QWORD[((-16))+rsi]
  1027. mov rbx,QWORD[((-8))+rsi]
  1028. lea rsp,[rsi]
  1029. $L$power5_epilogue:
  1030. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  1031. mov rsi,QWORD[16+rsp]
  1032. DB 0F3h,0C3h ;repret
  1033. $L$SEH_end_bn_power5:
  1034. global bn_sqr8x_internal
  1035. ALIGN 32
  1036. bn_sqr8x_internal:
  1037. __bn_sqr8x_internal:
  1038. lea rbp,[32+r10]
  1039. lea rsi,[r9*1+rsi]
  1040. mov rcx,r9
  1041. mov r14,QWORD[((-32))+rbp*1+rsi]
  1042. lea rdi,[((48+8))+r9*2+rsp]
  1043. mov rax,QWORD[((-24))+rbp*1+rsi]
  1044. lea rdi,[((-32))+rbp*1+rdi]
  1045. mov rbx,QWORD[((-16))+rbp*1+rsi]
  1046. mov r15,rax
  1047. mul r14
  1048. mov r10,rax
  1049. mov rax,rbx
  1050. mov r11,rdx
  1051. mov QWORD[((-24))+rbp*1+rdi],r10
  1052. mul r14
  1053. add r11,rax
  1054. mov rax,rbx
  1055. adc rdx,0
  1056. mov QWORD[((-16))+rbp*1+rdi],r11
  1057. mov r10,rdx
  1058. mov rbx,QWORD[((-8))+rbp*1+rsi]
  1059. mul r15
  1060. mov r12,rax
  1061. mov rax,rbx
  1062. mov r13,rdx
  1063. lea rcx,[rbp]
  1064. mul r14
  1065. add r10,rax
  1066. mov rax,rbx
  1067. mov r11,rdx
  1068. adc r11,0
  1069. add r10,r12
  1070. adc r11,0
  1071. mov QWORD[((-8))+rcx*1+rdi],r10
  1072. jmp NEAR $L$sqr4x_1st
  1073. ALIGN 32
  1074. $L$sqr4x_1st:
  1075. mov rbx,QWORD[rcx*1+rsi]
  1076. mul r15
  1077. add r13,rax
  1078. mov rax,rbx
  1079. mov r12,rdx
  1080. adc r12,0
  1081. mul r14
  1082. add r11,rax
  1083. mov rax,rbx
  1084. mov rbx,QWORD[8+rcx*1+rsi]
  1085. mov r10,rdx
  1086. adc r10,0
  1087. add r11,r13
  1088. adc r10,0
  1089. mul r15
  1090. add r12,rax
  1091. mov rax,rbx
  1092. mov QWORD[rcx*1+rdi],r11
  1093. mov r13,rdx
  1094. adc r13,0
  1095. mul r14
  1096. add r10,rax
  1097. mov rax,rbx
  1098. mov rbx,QWORD[16+rcx*1+rsi]
  1099. mov r11,rdx
  1100. adc r11,0
  1101. add r10,r12
  1102. adc r11,0
  1103. mul r15
  1104. add r13,rax
  1105. mov rax,rbx
  1106. mov QWORD[8+rcx*1+rdi],r10
  1107. mov r12,rdx
  1108. adc r12,0
  1109. mul r14
  1110. add r11,rax
  1111. mov rax,rbx
  1112. mov rbx,QWORD[24+rcx*1+rsi]
  1113. mov r10,rdx
  1114. adc r10,0
  1115. add r11,r13
  1116. adc r10,0
  1117. mul r15
  1118. add r12,rax
  1119. mov rax,rbx
  1120. mov QWORD[16+rcx*1+rdi],r11
  1121. mov r13,rdx
  1122. adc r13,0
  1123. lea rcx,[32+rcx]
  1124. mul r14
  1125. add r10,rax
  1126. mov rax,rbx
  1127. mov r11,rdx
  1128. adc r11,0
  1129. add r10,r12
  1130. adc r11,0
  1131. mov QWORD[((-8))+rcx*1+rdi],r10
  1132. cmp rcx,0
  1133. jne NEAR $L$sqr4x_1st
  1134. mul r15
  1135. add r13,rax
  1136. lea rbp,[16+rbp]
  1137. adc rdx,0
  1138. add r13,r11
  1139. adc rdx,0
  1140. mov QWORD[rdi],r13
  1141. mov r12,rdx
  1142. mov QWORD[8+rdi],rdx
  1143. jmp NEAR $L$sqr4x_outer
  1144. ALIGN 32
  1145. $L$sqr4x_outer:
  1146. mov r14,QWORD[((-32))+rbp*1+rsi]
  1147. lea rdi,[((48+8))+r9*2+rsp]
  1148. mov rax,QWORD[((-24))+rbp*1+rsi]
  1149. lea rdi,[((-32))+rbp*1+rdi]
  1150. mov rbx,QWORD[((-16))+rbp*1+rsi]
  1151. mov r15,rax
  1152. mul r14
  1153. mov r10,QWORD[((-24))+rbp*1+rdi]
  1154. add r10,rax
  1155. mov rax,rbx
  1156. adc rdx,0
  1157. mov QWORD[((-24))+rbp*1+rdi],r10
  1158. mov r11,rdx
  1159. mul r14
  1160. add r11,rax
  1161. mov rax,rbx
  1162. adc rdx,0
  1163. add r11,QWORD[((-16))+rbp*1+rdi]
  1164. mov r10,rdx
  1165. adc r10,0
  1166. mov QWORD[((-16))+rbp*1+rdi],r11
  1167. xor r12,r12
  1168. mov rbx,QWORD[((-8))+rbp*1+rsi]
  1169. mul r15
  1170. add r12,rax
  1171. mov rax,rbx
  1172. adc rdx,0
  1173. add r12,QWORD[((-8))+rbp*1+rdi]
  1174. mov r13,rdx
  1175. adc r13,0
  1176. mul r14
  1177. add r10,rax
  1178. mov rax,rbx
  1179. adc rdx,0
  1180. add r10,r12
  1181. mov r11,rdx
  1182. adc r11,0
  1183. mov QWORD[((-8))+rbp*1+rdi],r10
  1184. lea rcx,[rbp]
  1185. jmp NEAR $L$sqr4x_inner
  1186. ALIGN 32
  1187. $L$sqr4x_inner:
  1188. mov rbx,QWORD[rcx*1+rsi]
  1189. mul r15
  1190. add r13,rax
  1191. mov rax,rbx
  1192. mov r12,rdx
  1193. adc r12,0
  1194. add r13,QWORD[rcx*1+rdi]
  1195. adc r12,0
  1196. DB 0x67
  1197. mul r14
  1198. add r11,rax
  1199. mov rax,rbx
  1200. mov rbx,QWORD[8+rcx*1+rsi]
  1201. mov r10,rdx
  1202. adc r10,0
  1203. add r11,r13
  1204. adc r10,0
  1205. mul r15
  1206. add r12,rax
  1207. mov QWORD[rcx*1+rdi],r11
  1208. mov rax,rbx
  1209. mov r13,rdx
  1210. adc r13,0
  1211. add r12,QWORD[8+rcx*1+rdi]
  1212. lea rcx,[16+rcx]
  1213. adc r13,0
  1214. mul r14
  1215. add r10,rax
  1216. mov rax,rbx
  1217. adc rdx,0
  1218. add r10,r12
  1219. mov r11,rdx
  1220. adc r11,0
  1221. mov QWORD[((-8))+rcx*1+rdi],r10
  1222. cmp rcx,0
  1223. jne NEAR $L$sqr4x_inner
  1224. DB 0x67
  1225. mul r15
  1226. add r13,rax
  1227. adc rdx,0
  1228. add r13,r11
  1229. adc rdx,0
  1230. mov QWORD[rdi],r13
  1231. mov r12,rdx
  1232. mov QWORD[8+rdi],rdx
  1233. add rbp,16
  1234. jnz NEAR $L$sqr4x_outer
  1235. mov r14,QWORD[((-32))+rsi]
  1236. lea rdi,[((48+8))+r9*2+rsp]
  1237. mov rax,QWORD[((-24))+rsi]
  1238. lea rdi,[((-32))+rbp*1+rdi]
  1239. mov rbx,QWORD[((-16))+rsi]
  1240. mov r15,rax
  1241. mul r14
  1242. add r10,rax
  1243. mov rax,rbx
  1244. mov r11,rdx
  1245. adc r11,0
  1246. mul r14
  1247. add r11,rax
  1248. mov rax,rbx
  1249. mov QWORD[((-24))+rdi],r10
  1250. mov r10,rdx
  1251. adc r10,0
  1252. add r11,r13
  1253. mov rbx,QWORD[((-8))+rsi]
  1254. adc r10,0
  1255. mul r15
  1256. add r12,rax
  1257. mov rax,rbx
  1258. mov QWORD[((-16))+rdi],r11
  1259. mov r13,rdx
  1260. adc r13,0
  1261. mul r14
  1262. add r10,rax
  1263. mov rax,rbx
  1264. mov r11,rdx
  1265. adc r11,0
  1266. add r10,r12
  1267. adc r11,0
  1268. mov QWORD[((-8))+rdi],r10
  1269. mul r15
  1270. add r13,rax
  1271. mov rax,QWORD[((-16))+rsi]
  1272. adc rdx,0
  1273. add r13,r11
  1274. adc rdx,0
  1275. mov QWORD[rdi],r13
  1276. mov r12,rdx
  1277. mov QWORD[8+rdi],rdx
  1278. mul rbx
  1279. add rbp,16
  1280. xor r14,r14
  1281. sub rbp,r9
  1282. xor r15,r15
  1283. add rax,r12
  1284. adc rdx,0
  1285. mov QWORD[8+rdi],rax
  1286. mov QWORD[16+rdi],rdx
  1287. mov QWORD[24+rdi],r15
  1288. mov rax,QWORD[((-16))+rbp*1+rsi]
  1289. lea rdi,[((48+8))+rsp]
  1290. xor r10,r10
  1291. mov r11,QWORD[8+rdi]
  1292. lea r12,[r10*2+r14]
  1293. shr r10,63
  1294. lea r13,[r11*2+rcx]
  1295. shr r11,63
  1296. or r13,r10
  1297. mov r10,QWORD[16+rdi]
  1298. mov r14,r11
  1299. mul rax
  1300. neg r15
  1301. mov r11,QWORD[24+rdi]
  1302. adc r12,rax
  1303. mov rax,QWORD[((-8))+rbp*1+rsi]
  1304. mov QWORD[rdi],r12
  1305. adc r13,rdx
  1306. lea rbx,[r10*2+r14]
  1307. mov QWORD[8+rdi],r13
  1308. sbb r15,r15
  1309. shr r10,63
  1310. lea r8,[r11*2+rcx]
  1311. shr r11,63
  1312. or r8,r10
  1313. mov r10,QWORD[32+rdi]
  1314. mov r14,r11
  1315. mul rax
  1316. neg r15
  1317. mov r11,QWORD[40+rdi]
  1318. adc rbx,rax
  1319. mov rax,QWORD[rbp*1+rsi]
  1320. mov QWORD[16+rdi],rbx
  1321. adc r8,rdx
  1322. lea rbp,[16+rbp]
  1323. mov QWORD[24+rdi],r8
  1324. sbb r15,r15
  1325. lea rdi,[64+rdi]
  1326. jmp NEAR $L$sqr4x_shift_n_add
  1327. ALIGN 32
  1328. $L$sqr4x_shift_n_add:
  1329. lea r12,[r10*2+r14]
  1330. shr r10,63
  1331. lea r13,[r11*2+rcx]
  1332. shr r11,63
  1333. or r13,r10
  1334. mov r10,QWORD[((-16))+rdi]
  1335. mov r14,r11
  1336. mul rax
  1337. neg r15
  1338. mov r11,QWORD[((-8))+rdi]
  1339. adc r12,rax
  1340. mov rax,QWORD[((-8))+rbp*1+rsi]
  1341. mov QWORD[((-32))+rdi],r12
  1342. adc r13,rdx
  1343. lea rbx,[r10*2+r14]
  1344. mov QWORD[((-24))+rdi],r13
  1345. sbb r15,r15
  1346. shr r10,63
  1347. lea r8,[r11*2+rcx]
  1348. shr r11,63
  1349. or r8,r10
  1350. mov r10,QWORD[rdi]
  1351. mov r14,r11
  1352. mul rax
  1353. neg r15
  1354. mov r11,QWORD[8+rdi]
  1355. adc rbx,rax
  1356. mov rax,QWORD[rbp*1+rsi]
  1357. mov QWORD[((-16))+rdi],rbx
  1358. adc r8,rdx
  1359. lea r12,[r10*2+r14]
  1360. mov QWORD[((-8))+rdi],r8
  1361. sbb r15,r15
  1362. shr r10,63
  1363. lea r13,[r11*2+rcx]
  1364. shr r11,63
  1365. or r13,r10
  1366. mov r10,QWORD[16+rdi]
  1367. mov r14,r11
  1368. mul rax
  1369. neg r15
  1370. mov r11,QWORD[24+rdi]
  1371. adc r12,rax
  1372. mov rax,QWORD[8+rbp*1+rsi]
  1373. mov QWORD[rdi],r12
  1374. adc r13,rdx
  1375. lea rbx,[r10*2+r14]
  1376. mov QWORD[8+rdi],r13
  1377. sbb r15,r15
  1378. shr r10,63
  1379. lea r8,[r11*2+rcx]
  1380. shr r11,63
  1381. or r8,r10
  1382. mov r10,QWORD[32+rdi]
  1383. mov r14,r11
  1384. mul rax
  1385. neg r15
  1386. mov r11,QWORD[40+rdi]
  1387. adc rbx,rax
  1388. mov rax,QWORD[16+rbp*1+rsi]
  1389. mov QWORD[16+rdi],rbx
  1390. adc r8,rdx
  1391. mov QWORD[24+rdi],r8
  1392. sbb r15,r15
  1393. lea rdi,[64+rdi]
  1394. add rbp,32
  1395. jnz NEAR $L$sqr4x_shift_n_add
  1396. lea r12,[r10*2+r14]
  1397. DB 0x67
  1398. shr r10,63
  1399. lea r13,[r11*2+rcx]
  1400. shr r11,63
  1401. or r13,r10
  1402. mov r10,QWORD[((-16))+rdi]
  1403. mov r14,r11
  1404. mul rax
  1405. neg r15
  1406. mov r11,QWORD[((-8))+rdi]
  1407. adc r12,rax
  1408. mov rax,QWORD[((-8))+rsi]
  1409. mov QWORD[((-32))+rdi],r12
  1410. adc r13,rdx
  1411. lea rbx,[r10*2+r14]
  1412. mov QWORD[((-24))+rdi],r13
  1413. sbb r15,r15
  1414. shr r10,63
  1415. lea r8,[r11*2+rcx]
  1416. shr r11,63
  1417. or r8,r10
  1418. mul rax
  1419. neg r15
  1420. adc rbx,rax
  1421. adc r8,rdx
  1422. mov QWORD[((-16))+rdi],rbx
  1423. mov QWORD[((-8))+rdi],r8
  1424. DB 102,72,15,126,213
  1425. __bn_sqr8x_reduction:
  1426. xor rax,rax
  1427. lea rcx,[rbp*1+r9]
  1428. lea rdx,[((48+8))+r9*2+rsp]
  1429. mov QWORD[((0+8))+rsp],rcx
  1430. lea rdi,[((48+8))+r9*1+rsp]
  1431. mov QWORD[((8+8))+rsp],rdx
  1432. neg r9
  1433. jmp NEAR $L$8x_reduction_loop
  1434. ALIGN 32
  1435. $L$8x_reduction_loop:
  1436. lea rdi,[r9*1+rdi]
  1437. DB 0x66
  1438. mov rbx,QWORD[rdi]
  1439. mov r9,QWORD[8+rdi]
  1440. mov r10,QWORD[16+rdi]
  1441. mov r11,QWORD[24+rdi]
  1442. mov r12,QWORD[32+rdi]
  1443. mov r13,QWORD[40+rdi]
  1444. mov r14,QWORD[48+rdi]
  1445. mov r15,QWORD[56+rdi]
  1446. mov QWORD[rdx],rax
  1447. lea rdi,[64+rdi]
  1448. DB 0x67
  1449. mov r8,rbx
  1450. imul rbx,QWORD[((32+8))+rsp]
  1451. mov rax,QWORD[rbp]
  1452. mov ecx,8
  1453. jmp NEAR $L$8x_reduce
  1454. ALIGN 32
  1455. $L$8x_reduce:
  1456. mul rbx
  1457. mov rax,QWORD[8+rbp]
  1458. neg r8
  1459. mov r8,rdx
  1460. adc r8,0
  1461. mul rbx
  1462. add r9,rax
  1463. mov rax,QWORD[16+rbp]
  1464. adc rdx,0
  1465. add r8,r9
  1466. mov QWORD[((48-8+8))+rcx*8+rsp],rbx
  1467. mov r9,rdx
  1468. adc r9,0
  1469. mul rbx
  1470. add r10,rax
  1471. mov rax,QWORD[24+rbp]
  1472. adc rdx,0
  1473. add r9,r10
  1474. mov rsi,QWORD[((32+8))+rsp]
  1475. mov r10,rdx
  1476. adc r10,0
  1477. mul rbx
  1478. add r11,rax
  1479. mov rax,QWORD[32+rbp]
  1480. adc rdx,0
  1481. imul rsi,r8
  1482. add r10,r11
  1483. mov r11,rdx
  1484. adc r11,0
  1485. mul rbx
  1486. add r12,rax
  1487. mov rax,QWORD[40+rbp]
  1488. adc rdx,0
  1489. add r11,r12
  1490. mov r12,rdx
  1491. adc r12,0
  1492. mul rbx
  1493. add r13,rax
  1494. mov rax,QWORD[48+rbp]
  1495. adc rdx,0
  1496. add r12,r13
  1497. mov r13,rdx
  1498. adc r13,0
  1499. mul rbx
  1500. add r14,rax
  1501. mov rax,QWORD[56+rbp]
  1502. adc rdx,0
  1503. add r13,r14
  1504. mov r14,rdx
  1505. adc r14,0
  1506. mul rbx
  1507. mov rbx,rsi
  1508. add r15,rax
  1509. mov rax,QWORD[rbp]
  1510. adc rdx,0
  1511. add r14,r15
  1512. mov r15,rdx
  1513. adc r15,0
  1514. dec ecx
  1515. jnz NEAR $L$8x_reduce
  1516. lea rbp,[64+rbp]
  1517. xor rax,rax
  1518. mov rdx,QWORD[((8+8))+rsp]
  1519. cmp rbp,QWORD[((0+8))+rsp]
  1520. jae NEAR $L$8x_no_tail
  1521. DB 0x66
  1522. add r8,QWORD[rdi]
  1523. adc r9,QWORD[8+rdi]
  1524. adc r10,QWORD[16+rdi]
  1525. adc r11,QWORD[24+rdi]
  1526. adc r12,QWORD[32+rdi]
  1527. adc r13,QWORD[40+rdi]
  1528. adc r14,QWORD[48+rdi]
  1529. adc r15,QWORD[56+rdi]
  1530. sbb rsi,rsi
  1531. mov rbx,QWORD[((48+56+8))+rsp]
  1532. mov ecx,8
  1533. mov rax,QWORD[rbp]
  1534. jmp NEAR $L$8x_tail
  1535. ALIGN 32
  1536. $L$8x_tail:
  1537. mul rbx
  1538. add r8,rax
  1539. mov rax,QWORD[8+rbp]
  1540. mov QWORD[rdi],r8
  1541. mov r8,rdx
  1542. adc r8,0
  1543. mul rbx
  1544. add r9,rax
  1545. mov rax,QWORD[16+rbp]
  1546. adc rdx,0
  1547. add r8,r9
  1548. lea rdi,[8+rdi]
  1549. mov r9,rdx
  1550. adc r9,0
  1551. mul rbx
  1552. add r10,rax
  1553. mov rax,QWORD[24+rbp]
  1554. adc rdx,0
  1555. add r9,r10
  1556. mov r10,rdx
  1557. adc r10,0
  1558. mul rbx
  1559. add r11,rax
  1560. mov rax,QWORD[32+rbp]
  1561. adc rdx,0
  1562. add r10,r11
  1563. mov r11,rdx
  1564. adc r11,0
  1565. mul rbx
  1566. add r12,rax
  1567. mov rax,QWORD[40+rbp]
  1568. adc rdx,0
  1569. add r11,r12
  1570. mov r12,rdx
  1571. adc r12,0
  1572. mul rbx
  1573. add r13,rax
  1574. mov rax,QWORD[48+rbp]
  1575. adc rdx,0
  1576. add r12,r13
  1577. mov r13,rdx
  1578. adc r13,0
  1579. mul rbx
  1580. add r14,rax
  1581. mov rax,QWORD[56+rbp]
  1582. adc rdx,0
  1583. add r13,r14
  1584. mov r14,rdx
  1585. adc r14,0
  1586. mul rbx
  1587. mov rbx,QWORD[((48-16+8))+rcx*8+rsp]
  1588. add r15,rax
  1589. adc rdx,0
  1590. add r14,r15
  1591. mov rax,QWORD[rbp]
  1592. mov r15,rdx
  1593. adc r15,0
  1594. dec ecx
  1595. jnz NEAR $L$8x_tail
  1596. lea rbp,[64+rbp]
  1597. mov rdx,QWORD[((8+8))+rsp]
  1598. cmp rbp,QWORD[((0+8))+rsp]
  1599. jae NEAR $L$8x_tail_done
  1600. mov rbx,QWORD[((48+56+8))+rsp]
  1601. neg rsi
  1602. mov rax,QWORD[rbp]
  1603. adc r8,QWORD[rdi]
  1604. adc r9,QWORD[8+rdi]
  1605. adc r10,QWORD[16+rdi]
  1606. adc r11,QWORD[24+rdi]
  1607. adc r12,QWORD[32+rdi]
  1608. adc r13,QWORD[40+rdi]
  1609. adc r14,QWORD[48+rdi]
  1610. adc r15,QWORD[56+rdi]
  1611. sbb rsi,rsi
  1612. mov ecx,8
  1613. jmp NEAR $L$8x_tail
  1614. ALIGN 32
  1615. $L$8x_tail_done:
  1616. xor rax,rax
  1617. add r8,QWORD[rdx]
  1618. adc r9,0
  1619. adc r10,0
  1620. adc r11,0
  1621. adc r12,0
  1622. adc r13,0
  1623. adc r14,0
  1624. adc r15,0
  1625. adc rax,0
  1626. neg rsi
  1627. $L$8x_no_tail:
  1628. adc r8,QWORD[rdi]
  1629. adc r9,QWORD[8+rdi]
  1630. adc r10,QWORD[16+rdi]
  1631. adc r11,QWORD[24+rdi]
  1632. adc r12,QWORD[32+rdi]
  1633. adc r13,QWORD[40+rdi]
  1634. adc r14,QWORD[48+rdi]
  1635. adc r15,QWORD[56+rdi]
  1636. adc rax,0
  1637. mov rcx,QWORD[((-8))+rbp]
  1638. xor rsi,rsi
  1639. DB 102,72,15,126,213
  1640. mov QWORD[rdi],r8
  1641. mov QWORD[8+rdi],r9
  1642. DB 102,73,15,126,217
  1643. mov QWORD[16+rdi],r10
  1644. mov QWORD[24+rdi],r11
  1645. mov QWORD[32+rdi],r12
  1646. mov QWORD[40+rdi],r13
  1647. mov QWORD[48+rdi],r14
  1648. mov QWORD[56+rdi],r15
  1649. lea rdi,[64+rdi]
  1650. cmp rdi,rdx
  1651. jb NEAR $L$8x_reduction_loop
  1652. DB 0F3h,0C3h ;repret
  1653. ALIGN 32
  1654. __bn_post4x_internal:
  1655. mov r12,QWORD[rbp]
  1656. lea rbx,[r9*1+rdi]
  1657. mov rcx,r9
  1658. DB 102,72,15,126,207
  1659. neg rax
  1660. DB 102,72,15,126,206
  1661. sar rcx,3+2
  1662. dec r12
  1663. xor r10,r10
  1664. mov r13,QWORD[8+rbp]
  1665. mov r14,QWORD[16+rbp]
  1666. mov r15,QWORD[24+rbp]
  1667. jmp NEAR $L$sqr4x_sub_entry
  1668. ALIGN 16
  1669. $L$sqr4x_sub:
  1670. mov r12,QWORD[rbp]
  1671. mov r13,QWORD[8+rbp]
  1672. mov r14,QWORD[16+rbp]
  1673. mov r15,QWORD[24+rbp]
  1674. $L$sqr4x_sub_entry:
  1675. lea rbp,[32+rbp]
  1676. not r12
  1677. not r13
  1678. not r14
  1679. not r15
  1680. and r12,rax
  1681. and r13,rax
  1682. and r14,rax
  1683. and r15,rax
  1684. neg r10
  1685. adc r12,QWORD[rbx]
  1686. adc r13,QWORD[8+rbx]
  1687. adc r14,QWORD[16+rbx]
  1688. adc r15,QWORD[24+rbx]
  1689. mov QWORD[rdi],r12
  1690. lea rbx,[32+rbx]
  1691. mov QWORD[8+rdi],r13
  1692. sbb r10,r10
  1693. mov QWORD[16+rdi],r14
  1694. mov QWORD[24+rdi],r15
  1695. lea rdi,[32+rdi]
  1696. inc rcx
  1697. jnz NEAR $L$sqr4x_sub
  1698. mov r10,r9
  1699. neg r9
  1700. DB 0F3h,0C3h ;repret
  1701. global bn_from_montgomery
  1702. ALIGN 32
  1703. bn_from_montgomery:
  1704. test DWORD[48+rsp],7
  1705. jz NEAR bn_from_mont8x
  1706. xor eax,eax
  1707. DB 0F3h,0C3h ;repret
  1708. ALIGN 32
  1709. bn_from_mont8x:
  1710. mov QWORD[8+rsp],rdi ;WIN64 prologue
  1711. mov QWORD[16+rsp],rsi
  1712. mov rax,rsp
  1713. $L$SEH_begin_bn_from_mont8x:
  1714. mov rdi,rcx
  1715. mov rsi,rdx
  1716. mov rdx,r8
  1717. mov rcx,r9
  1718. mov r8,QWORD[40+rsp]
  1719. mov r9,QWORD[48+rsp]
  1720. DB 0x67
  1721. mov rax,rsp
  1722. push rbx
  1723. push rbp
  1724. push r12
  1725. push r13
  1726. push r14
  1727. push r15
  1728. $L$from_prologue:
  1729. shl r9d,3
  1730. lea r10,[r9*2+r9]
  1731. neg r9
  1732. mov r8,QWORD[r8]
  1733. lea r11,[((-320))+r9*2+rsp]
  1734. mov rbp,rsp
  1735. sub r11,rdi
  1736. and r11,4095
  1737. cmp r10,r11
  1738. jb NEAR $L$from_sp_alt
  1739. sub rbp,r11
  1740. lea rbp,[((-320))+r9*2+rbp]
  1741. jmp NEAR $L$from_sp_done
  1742. ALIGN 32
  1743. $L$from_sp_alt:
  1744. lea r10,[((4096-320))+r9*2]
  1745. lea rbp,[((-320))+r9*2+rbp]
  1746. sub r11,r10
  1747. mov r10,0
  1748. cmovc r11,r10
  1749. sub rbp,r11
  1750. $L$from_sp_done:
  1751. and rbp,-64
  1752. mov r11,rsp
  1753. sub r11,rbp
  1754. and r11,-4096
  1755. lea rsp,[rbp*1+r11]
  1756. mov r10,QWORD[rsp]
  1757. cmp rsp,rbp
  1758. ja NEAR $L$from_page_walk
  1759. jmp NEAR $L$from_page_walk_done
  1760. $L$from_page_walk:
  1761. lea rsp,[((-4096))+rsp]
  1762. mov r10,QWORD[rsp]
  1763. cmp rsp,rbp
  1764. ja NEAR $L$from_page_walk
  1765. $L$from_page_walk_done:
  1766. mov r10,r9
  1767. neg r9
  1768. mov QWORD[32+rsp],r8
  1769. mov QWORD[40+rsp],rax
  1770. $L$from_body:
  1771. mov r11,r9
  1772. lea rax,[48+rsp]
  1773. pxor xmm0,xmm0
  1774. jmp NEAR $L$mul_by_1
  1775. ALIGN 32
  1776. $L$mul_by_1:
  1777. movdqu xmm1,XMMWORD[rsi]
  1778. movdqu xmm2,XMMWORD[16+rsi]
  1779. movdqu xmm3,XMMWORD[32+rsi]
  1780. movdqa XMMWORD[r9*1+rax],xmm0
  1781. movdqu xmm4,XMMWORD[48+rsi]
  1782. movdqa XMMWORD[16+r9*1+rax],xmm0
  1783. DB 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
  1784. movdqa XMMWORD[rax],xmm1
  1785. movdqa XMMWORD[32+r9*1+rax],xmm0
  1786. movdqa XMMWORD[16+rax],xmm2
  1787. movdqa XMMWORD[48+r9*1+rax],xmm0
  1788. movdqa XMMWORD[32+rax],xmm3
  1789. movdqa XMMWORD[48+rax],xmm4
  1790. lea rax,[64+rax]
  1791. sub r11,64
  1792. jnz NEAR $L$mul_by_1
  1793. DB 102,72,15,110,207
  1794. DB 102,72,15,110,209
  1795. DB 0x67
  1796. mov rbp,rcx
  1797. DB 102,73,15,110,218
  1798. lea r11,[OPENSSL_ia32cap_P]
  1799. mov r11d,DWORD[8+r11]
  1800. and r11d,0x80108
  1801. cmp r11d,0x80108
  1802. jne NEAR $L$from_mont_nox
  1803. lea rdi,[r9*1+rax]
  1804. call __bn_sqrx8x_reduction
  1805. call __bn_postx4x_internal
  1806. pxor xmm0,xmm0
  1807. lea rax,[48+rsp]
  1808. jmp NEAR $L$from_mont_zero
  1809. ALIGN 32
  1810. $L$from_mont_nox:
  1811. call __bn_sqr8x_reduction
  1812. call __bn_post4x_internal
  1813. pxor xmm0,xmm0
  1814. lea rax,[48+rsp]
  1815. jmp NEAR $L$from_mont_zero
  1816. ALIGN 32
  1817. $L$from_mont_zero:
  1818. mov rsi,QWORD[40+rsp]
  1819. movdqa XMMWORD[rax],xmm0
  1820. movdqa XMMWORD[16+rax],xmm0
  1821. movdqa XMMWORD[32+rax],xmm0
  1822. movdqa XMMWORD[48+rax],xmm0
  1823. lea rax,[64+rax]
  1824. sub r9,32
  1825. jnz NEAR $L$from_mont_zero
  1826. mov rax,1
  1827. mov r15,QWORD[((-48))+rsi]
  1828. mov r14,QWORD[((-40))+rsi]
  1829. mov r13,QWORD[((-32))+rsi]
  1830. mov r12,QWORD[((-24))+rsi]
  1831. mov rbp,QWORD[((-16))+rsi]
  1832. mov rbx,QWORD[((-8))+rsi]
  1833. lea rsp,[rsi]
  1834. $L$from_epilogue:
  1835. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  1836. mov rsi,QWORD[16+rsp]
  1837. DB 0F3h,0C3h ;repret
  1838. $L$SEH_end_bn_from_mont8x:
  1839. ALIGN 32
  1840. bn_mulx4x_mont_gather5:
  1841. mov QWORD[8+rsp],rdi ;WIN64 prologue
  1842. mov QWORD[16+rsp],rsi
  1843. mov rax,rsp
  1844. $L$SEH_begin_bn_mulx4x_mont_gather5:
  1845. mov rdi,rcx
  1846. mov rsi,rdx
  1847. mov rdx,r8
  1848. mov rcx,r9
  1849. mov r8,QWORD[40+rsp]
  1850. mov r9,QWORD[48+rsp]
  1851. mov rax,rsp
  1852. $L$mulx4x_enter:
  1853. push rbx
  1854. push rbp
  1855. push r12
  1856. push r13
  1857. push r14
  1858. push r15
  1859. $L$mulx4x_prologue:
  1860. shl r9d,3
  1861. lea r10,[r9*2+r9]
  1862. neg r9
  1863. mov r8,QWORD[r8]
  1864. lea r11,[((-320))+r9*2+rsp]
  1865. mov rbp,rsp
  1866. sub r11,rdi
  1867. and r11,4095
  1868. cmp r10,r11
  1869. jb NEAR $L$mulx4xsp_alt
  1870. sub rbp,r11
  1871. lea rbp,[((-320))+r9*2+rbp]
  1872. jmp NEAR $L$mulx4xsp_done
  1873. $L$mulx4xsp_alt:
  1874. lea r10,[((4096-320))+r9*2]
  1875. lea rbp,[((-320))+r9*2+rbp]
  1876. sub r11,r10
  1877. mov r10,0
  1878. cmovc r11,r10
  1879. sub rbp,r11
  1880. $L$mulx4xsp_done:
  1881. and rbp,-64
  1882. mov r11,rsp
  1883. sub r11,rbp
  1884. and r11,-4096
  1885. lea rsp,[rbp*1+r11]
  1886. mov r10,QWORD[rsp]
  1887. cmp rsp,rbp
  1888. ja NEAR $L$mulx4x_page_walk
  1889. jmp NEAR $L$mulx4x_page_walk_done
  1890. $L$mulx4x_page_walk:
  1891. lea rsp,[((-4096))+rsp]
  1892. mov r10,QWORD[rsp]
  1893. cmp rsp,rbp
  1894. ja NEAR $L$mulx4x_page_walk
  1895. $L$mulx4x_page_walk_done:
  1896. mov QWORD[32+rsp],r8
  1897. mov QWORD[40+rsp],rax
  1898. $L$mulx4x_body:
  1899. call mulx4x_internal
  1900. mov rsi,QWORD[40+rsp]
  1901. mov rax,1
  1902. mov r15,QWORD[((-48))+rsi]
  1903. mov r14,QWORD[((-40))+rsi]
  1904. mov r13,QWORD[((-32))+rsi]
  1905. mov r12,QWORD[((-24))+rsi]
  1906. mov rbp,QWORD[((-16))+rsi]
  1907. mov rbx,QWORD[((-8))+rsi]
  1908. lea rsp,[rsi]
  1909. $L$mulx4x_epilogue:
  1910. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  1911. mov rsi,QWORD[16+rsp]
  1912. DB 0F3h,0C3h ;repret
  1913. $L$SEH_end_bn_mulx4x_mont_gather5:
  1914. ALIGN 32
  1915. mulx4x_internal:
  1916. mov QWORD[8+rsp],r9
  1917. mov r10,r9
  1918. neg r9
  1919. shl r9,5
  1920. neg r10
  1921. lea r13,[128+r9*1+rdx]
  1922. shr r9,5+5
  1923. movd xmm5,DWORD[56+rax]
  1924. sub r9,1
  1925. lea rax,[$L$inc]
  1926. mov QWORD[((16+8))+rsp],r13
  1927. mov QWORD[((24+8))+rsp],r9
  1928. mov QWORD[((56+8))+rsp],rdi
  1929. movdqa xmm0,XMMWORD[rax]
  1930. movdqa xmm1,XMMWORD[16+rax]
  1931. lea r10,[((88-112))+r10*1+rsp]
  1932. lea rdi,[128+rdx]
  1933. pshufd xmm5,xmm5,0
  1934. movdqa xmm4,xmm1
  1935. DB 0x67
  1936. movdqa xmm2,xmm1
  1937. DB 0x67
  1938. paddd xmm1,xmm0
  1939. pcmpeqd xmm0,xmm5
  1940. movdqa xmm3,xmm4
  1941. paddd xmm2,xmm1
  1942. pcmpeqd xmm1,xmm5
  1943. movdqa XMMWORD[112+r10],xmm0
  1944. movdqa xmm0,xmm4
  1945. paddd xmm3,xmm2
  1946. pcmpeqd xmm2,xmm5
  1947. movdqa XMMWORD[128+r10],xmm1
  1948. movdqa xmm1,xmm4
  1949. paddd xmm0,xmm3
  1950. pcmpeqd xmm3,xmm5
  1951. movdqa XMMWORD[144+r10],xmm2
  1952. movdqa xmm2,xmm4
  1953. paddd xmm1,xmm0
  1954. pcmpeqd xmm0,xmm5
  1955. movdqa XMMWORD[160+r10],xmm3
  1956. movdqa xmm3,xmm4
  1957. paddd xmm2,xmm1
  1958. pcmpeqd xmm1,xmm5
  1959. movdqa XMMWORD[176+r10],xmm0
  1960. movdqa xmm0,xmm4
  1961. paddd xmm3,xmm2
  1962. pcmpeqd xmm2,xmm5
  1963. movdqa XMMWORD[192+r10],xmm1
  1964. movdqa xmm1,xmm4
  1965. paddd xmm0,xmm3
  1966. pcmpeqd xmm3,xmm5
  1967. movdqa XMMWORD[208+r10],xmm2
  1968. movdqa xmm2,xmm4
  1969. paddd xmm1,xmm0
  1970. pcmpeqd xmm0,xmm5
  1971. movdqa XMMWORD[224+r10],xmm3
  1972. movdqa xmm3,xmm4
  1973. paddd xmm2,xmm1
  1974. pcmpeqd xmm1,xmm5
  1975. movdqa XMMWORD[240+r10],xmm0
  1976. movdqa xmm0,xmm4
  1977. paddd xmm3,xmm2
  1978. pcmpeqd xmm2,xmm5
  1979. movdqa XMMWORD[256+r10],xmm1
  1980. movdqa xmm1,xmm4
  1981. paddd xmm0,xmm3
  1982. pcmpeqd xmm3,xmm5
  1983. movdqa XMMWORD[272+r10],xmm2
  1984. movdqa xmm2,xmm4
  1985. paddd xmm1,xmm0
  1986. pcmpeqd xmm0,xmm5
  1987. movdqa XMMWORD[288+r10],xmm3
  1988. movdqa xmm3,xmm4
  1989. DB 0x67
  1990. paddd xmm2,xmm1
  1991. pcmpeqd xmm1,xmm5
  1992. movdqa XMMWORD[304+r10],xmm0
  1993. paddd xmm3,xmm2
  1994. pcmpeqd xmm2,xmm5
  1995. movdqa XMMWORD[320+r10],xmm1
  1996. pcmpeqd xmm3,xmm5
  1997. movdqa XMMWORD[336+r10],xmm2
  1998. pand xmm0,XMMWORD[64+rdi]
  1999. pand xmm1,XMMWORD[80+rdi]
  2000. pand xmm2,XMMWORD[96+rdi]
  2001. movdqa XMMWORD[352+r10],xmm3
  2002. pand xmm3,XMMWORD[112+rdi]
  2003. por xmm0,xmm2
  2004. por xmm1,xmm3
  2005. movdqa xmm4,XMMWORD[((-128))+rdi]
  2006. movdqa xmm5,XMMWORD[((-112))+rdi]
  2007. movdqa xmm2,XMMWORD[((-96))+rdi]
  2008. pand xmm4,XMMWORD[112+r10]
  2009. movdqa xmm3,XMMWORD[((-80))+rdi]
  2010. pand xmm5,XMMWORD[128+r10]
  2011. por xmm0,xmm4
  2012. pand xmm2,XMMWORD[144+r10]
  2013. por xmm1,xmm5
  2014. pand xmm3,XMMWORD[160+r10]
  2015. por xmm0,xmm2
  2016. por xmm1,xmm3
  2017. movdqa xmm4,XMMWORD[((-64))+rdi]
  2018. movdqa xmm5,XMMWORD[((-48))+rdi]
  2019. movdqa xmm2,XMMWORD[((-32))+rdi]
  2020. pand xmm4,XMMWORD[176+r10]
  2021. movdqa xmm3,XMMWORD[((-16))+rdi]
  2022. pand xmm5,XMMWORD[192+r10]
  2023. por xmm0,xmm4
  2024. pand xmm2,XMMWORD[208+r10]
  2025. por xmm1,xmm5
  2026. pand xmm3,XMMWORD[224+r10]
  2027. por xmm0,xmm2
  2028. por xmm1,xmm3
  2029. movdqa xmm4,XMMWORD[rdi]
  2030. movdqa xmm5,XMMWORD[16+rdi]
  2031. movdqa xmm2,XMMWORD[32+rdi]
  2032. pand xmm4,XMMWORD[240+r10]
  2033. movdqa xmm3,XMMWORD[48+rdi]
  2034. pand xmm5,XMMWORD[256+r10]
  2035. por xmm0,xmm4
  2036. pand xmm2,XMMWORD[272+r10]
  2037. por xmm1,xmm5
  2038. pand xmm3,XMMWORD[288+r10]
  2039. por xmm0,xmm2
  2040. por xmm1,xmm3
  2041. pxor xmm0,xmm1
  2042. pshufd xmm1,xmm0,0x4e
  2043. por xmm0,xmm1
  2044. lea rdi,[256+rdi]
  2045. DB 102,72,15,126,194
  2046. lea rbx,[((64+32+8))+rsp]
  2047. mov r9,rdx
  2048. mulx rax,r8,QWORD[rsi]
  2049. mulx r12,r11,QWORD[8+rsi]
  2050. add r11,rax
  2051. mulx r13,rax,QWORD[16+rsi]
  2052. adc r12,rax
  2053. adc r13,0
  2054. mulx r14,rax,QWORD[24+rsi]
  2055. mov r15,r8
  2056. imul r8,QWORD[((32+8))+rsp]
  2057. xor rbp,rbp
  2058. mov rdx,r8
  2059. mov QWORD[((8+8))+rsp],rdi
  2060. lea rsi,[32+rsi]
  2061. adcx r13,rax
  2062. adcx r14,rbp
  2063. mulx r10,rax,QWORD[rcx]
  2064. adcx r15,rax
  2065. adox r10,r11
  2066. mulx r11,rax,QWORD[8+rcx]
  2067. adcx r10,rax
  2068. adox r11,r12
  2069. mulx r12,rax,QWORD[16+rcx]
  2070. mov rdi,QWORD[((24+8))+rsp]
  2071. mov QWORD[((-32))+rbx],r10
  2072. adcx r11,rax
  2073. adox r12,r13
  2074. mulx r15,rax,QWORD[24+rcx]
  2075. mov rdx,r9
  2076. mov QWORD[((-24))+rbx],r11
  2077. adcx r12,rax
  2078. adox r15,rbp
  2079. lea rcx,[32+rcx]
  2080. mov QWORD[((-16))+rbx],r12
  2081. jmp NEAR $L$mulx4x_1st
  2082. ALIGN 32
  2083. $L$mulx4x_1st:
  2084. adcx r15,rbp
  2085. mulx rax,r10,QWORD[rsi]
  2086. adcx r10,r14
  2087. mulx r14,r11,QWORD[8+rsi]
  2088. adcx r11,rax
  2089. mulx rax,r12,QWORD[16+rsi]
  2090. adcx r12,r14
  2091. mulx r14,r13,QWORD[24+rsi]
  2092. DB 0x67,0x67
  2093. mov rdx,r8
  2094. adcx r13,rax
  2095. adcx r14,rbp
  2096. lea rsi,[32+rsi]
  2097. lea rbx,[32+rbx]
  2098. adox r10,r15
  2099. mulx r15,rax,QWORD[rcx]
  2100. adcx r10,rax
  2101. adox r11,r15
  2102. mulx r15,rax,QWORD[8+rcx]
  2103. adcx r11,rax
  2104. adox r12,r15
  2105. mulx r15,rax,QWORD[16+rcx]
  2106. mov QWORD[((-40))+rbx],r10
  2107. adcx r12,rax
  2108. mov QWORD[((-32))+rbx],r11
  2109. adox r13,r15
  2110. mulx r15,rax,QWORD[24+rcx]
  2111. mov rdx,r9
  2112. mov QWORD[((-24))+rbx],r12
  2113. adcx r13,rax
  2114. adox r15,rbp
  2115. lea rcx,[32+rcx]
  2116. mov QWORD[((-16))+rbx],r13
  2117. dec rdi
  2118. jnz NEAR $L$mulx4x_1st
  2119. mov rax,QWORD[8+rsp]
  2120. adc r15,rbp
  2121. lea rsi,[rax*1+rsi]
  2122. add r14,r15
  2123. mov rdi,QWORD[((8+8))+rsp]
  2124. adc rbp,rbp
  2125. mov QWORD[((-8))+rbx],r14
  2126. jmp NEAR $L$mulx4x_outer
  2127. ALIGN 32
  2128. $L$mulx4x_outer:
  2129. lea r10,[((16-256))+rbx]
  2130. pxor xmm4,xmm4
  2131. DB 0x67,0x67
  2132. pxor xmm5,xmm5
  2133. movdqa xmm0,XMMWORD[((-128))+rdi]
  2134. movdqa xmm1,XMMWORD[((-112))+rdi]
  2135. movdqa xmm2,XMMWORD[((-96))+rdi]
  2136. pand xmm0,XMMWORD[256+r10]
  2137. movdqa xmm3,XMMWORD[((-80))+rdi]
  2138. pand xmm1,XMMWORD[272+r10]
  2139. por xmm4,xmm0
  2140. pand xmm2,XMMWORD[288+r10]
  2141. por xmm5,xmm1
  2142. pand xmm3,XMMWORD[304+r10]
  2143. por xmm4,xmm2
  2144. por xmm5,xmm3
  2145. movdqa xmm0,XMMWORD[((-64))+rdi]
  2146. movdqa xmm1,XMMWORD[((-48))+rdi]
  2147. movdqa xmm2,XMMWORD[((-32))+rdi]
  2148. pand xmm0,XMMWORD[320+r10]
  2149. movdqa xmm3,XMMWORD[((-16))+rdi]
  2150. pand xmm1,XMMWORD[336+r10]
  2151. por xmm4,xmm0
  2152. pand xmm2,XMMWORD[352+r10]
  2153. por xmm5,xmm1
  2154. pand xmm3,XMMWORD[368+r10]
  2155. por xmm4,xmm2
  2156. por xmm5,xmm3
  2157. movdqa xmm0,XMMWORD[rdi]
  2158. movdqa xmm1,XMMWORD[16+rdi]
  2159. movdqa xmm2,XMMWORD[32+rdi]
  2160. pand xmm0,XMMWORD[384+r10]
  2161. movdqa xmm3,XMMWORD[48+rdi]
  2162. pand xmm1,XMMWORD[400+r10]
  2163. por xmm4,xmm0
  2164. pand xmm2,XMMWORD[416+r10]
  2165. por xmm5,xmm1
  2166. pand xmm3,XMMWORD[432+r10]
  2167. por xmm4,xmm2
  2168. por xmm5,xmm3
  2169. movdqa xmm0,XMMWORD[64+rdi]
  2170. movdqa xmm1,XMMWORD[80+rdi]
  2171. movdqa xmm2,XMMWORD[96+rdi]
  2172. pand xmm0,XMMWORD[448+r10]
  2173. movdqa xmm3,XMMWORD[112+rdi]
  2174. pand xmm1,XMMWORD[464+r10]
  2175. por xmm4,xmm0
  2176. pand xmm2,XMMWORD[480+r10]
  2177. por xmm5,xmm1
  2178. pand xmm3,XMMWORD[496+r10]
  2179. por xmm4,xmm2
  2180. por xmm5,xmm3
  2181. por xmm4,xmm5
  2182. pshufd xmm0,xmm4,0x4e
  2183. por xmm0,xmm4
  2184. lea rdi,[256+rdi]
  2185. DB 102,72,15,126,194
  2186. mov QWORD[rbx],rbp
  2187. lea rbx,[32+rax*1+rbx]
  2188. mulx r11,r8,QWORD[rsi]
  2189. xor rbp,rbp
  2190. mov r9,rdx
  2191. mulx r12,r14,QWORD[8+rsi]
  2192. adox r8,QWORD[((-32))+rbx]
  2193. adcx r11,r14
  2194. mulx r13,r15,QWORD[16+rsi]
  2195. adox r11,QWORD[((-24))+rbx]
  2196. adcx r12,r15
  2197. mulx r14,rdx,QWORD[24+rsi]
  2198. adox r12,QWORD[((-16))+rbx]
  2199. adcx r13,rdx
  2200. lea rcx,[rax*1+rcx]
  2201. lea rsi,[32+rsi]
  2202. adox r13,QWORD[((-8))+rbx]
  2203. adcx r14,rbp
  2204. adox r14,rbp
  2205. mov r15,r8
  2206. imul r8,QWORD[((32+8))+rsp]
  2207. mov rdx,r8
  2208. xor rbp,rbp
  2209. mov QWORD[((8+8))+rsp],rdi
  2210. mulx r10,rax,QWORD[rcx]
  2211. adcx r15,rax
  2212. adox r10,r11
  2213. mulx r11,rax,QWORD[8+rcx]
  2214. adcx r10,rax
  2215. adox r11,r12
  2216. mulx r12,rax,QWORD[16+rcx]
  2217. adcx r11,rax
  2218. adox r12,r13
  2219. mulx r15,rax,QWORD[24+rcx]
  2220. mov rdx,r9
  2221. mov rdi,QWORD[((24+8))+rsp]
  2222. mov QWORD[((-32))+rbx],r10
  2223. adcx r12,rax
  2224. mov QWORD[((-24))+rbx],r11
  2225. adox r15,rbp
  2226. mov QWORD[((-16))+rbx],r12
  2227. lea rcx,[32+rcx]
  2228. jmp NEAR $L$mulx4x_inner
  2229. ALIGN 32
  2230. $L$mulx4x_inner:
  2231. mulx rax,r10,QWORD[rsi]
  2232. adcx r15,rbp
  2233. adox r10,r14
  2234. mulx r14,r11,QWORD[8+rsi]
  2235. adcx r10,QWORD[rbx]
  2236. adox r11,rax
  2237. mulx rax,r12,QWORD[16+rsi]
  2238. adcx r11,QWORD[8+rbx]
  2239. adox r12,r14
  2240. mulx r14,r13,QWORD[24+rsi]
  2241. mov rdx,r8
  2242. adcx r12,QWORD[16+rbx]
  2243. adox r13,rax
  2244. adcx r13,QWORD[24+rbx]
  2245. adox r14,rbp
  2246. lea rsi,[32+rsi]
  2247. lea rbx,[32+rbx]
  2248. adcx r14,rbp
  2249. adox r10,r15
  2250. mulx r15,rax,QWORD[rcx]
  2251. adcx r10,rax
  2252. adox r11,r15
  2253. mulx r15,rax,QWORD[8+rcx]
  2254. adcx r11,rax
  2255. adox r12,r15
  2256. mulx r15,rax,QWORD[16+rcx]
  2257. mov QWORD[((-40))+rbx],r10
  2258. adcx r12,rax
  2259. adox r13,r15
  2260. mov QWORD[((-32))+rbx],r11
  2261. mulx r15,rax,QWORD[24+rcx]
  2262. mov rdx,r9
  2263. lea rcx,[32+rcx]
  2264. mov QWORD[((-24))+rbx],r12
  2265. adcx r13,rax
  2266. adox r15,rbp
  2267. mov QWORD[((-16))+rbx],r13
  2268. dec rdi
  2269. jnz NEAR $L$mulx4x_inner
  2270. mov rax,QWORD[((0+8))+rsp]
  2271. adc r15,rbp
  2272. sub rdi,QWORD[rbx]
  2273. mov rdi,QWORD[((8+8))+rsp]
  2274. mov r10,QWORD[((16+8))+rsp]
  2275. adc r14,r15
  2276. lea rsi,[rax*1+rsi]
  2277. adc rbp,rbp
  2278. mov QWORD[((-8))+rbx],r14
  2279. cmp rdi,r10
  2280. jb NEAR $L$mulx4x_outer
  2281. mov r10,QWORD[((-8))+rcx]
  2282. mov r8,rbp
  2283. mov r12,QWORD[rax*1+rcx]
  2284. lea rbp,[rax*1+rcx]
  2285. mov rcx,rax
  2286. lea rdi,[rax*1+rbx]
  2287. xor eax,eax
  2288. xor r15,r15
  2289. sub r10,r14
  2290. adc r15,r15
  2291. or r8,r15
  2292. sar rcx,3+2
  2293. sub rax,r8
  2294. mov rdx,QWORD[((56+8))+rsp]
  2295. dec r12
  2296. mov r13,QWORD[8+rbp]
  2297. xor r8,r8
  2298. mov r14,QWORD[16+rbp]
  2299. mov r15,QWORD[24+rbp]
  2300. jmp NEAR $L$sqrx4x_sub_entry
  2301. ALIGN 32
  2302. bn_powerx5:
  2303. mov QWORD[8+rsp],rdi ;WIN64 prologue
  2304. mov QWORD[16+rsp],rsi
  2305. mov rax,rsp
  2306. $L$SEH_begin_bn_powerx5:
  2307. mov rdi,rcx
  2308. mov rsi,rdx
  2309. mov rdx,r8
  2310. mov rcx,r9
  2311. mov r8,QWORD[40+rsp]
  2312. mov r9,QWORD[48+rsp]
  2313. mov rax,rsp
  2314. $L$powerx5_enter:
  2315. push rbx
  2316. push rbp
  2317. push r12
  2318. push r13
  2319. push r14
  2320. push r15
  2321. $L$powerx5_prologue:
  2322. shl r9d,3
  2323. lea r10,[r9*2+r9]
  2324. neg r9
  2325. mov r8,QWORD[r8]
  2326. lea r11,[((-320))+r9*2+rsp]
  2327. mov rbp,rsp
  2328. sub r11,rdi
  2329. and r11,4095
  2330. cmp r10,r11
  2331. jb NEAR $L$pwrx_sp_alt
  2332. sub rbp,r11
  2333. lea rbp,[((-320))+r9*2+rbp]
  2334. jmp NEAR $L$pwrx_sp_done
  2335. ALIGN 32
  2336. $L$pwrx_sp_alt:
  2337. lea r10,[((4096-320))+r9*2]
  2338. lea rbp,[((-320))+r9*2+rbp]
  2339. sub r11,r10
  2340. mov r10,0
  2341. cmovc r11,r10
  2342. sub rbp,r11
  2343. $L$pwrx_sp_done:
  2344. and rbp,-64
  2345. mov r11,rsp
  2346. sub r11,rbp
  2347. and r11,-4096
  2348. lea rsp,[rbp*1+r11]
  2349. mov r10,QWORD[rsp]
  2350. cmp rsp,rbp
  2351. ja NEAR $L$pwrx_page_walk
  2352. jmp NEAR $L$pwrx_page_walk_done
  2353. $L$pwrx_page_walk:
  2354. lea rsp,[((-4096))+rsp]
  2355. mov r10,QWORD[rsp]
  2356. cmp rsp,rbp
  2357. ja NEAR $L$pwrx_page_walk
  2358. $L$pwrx_page_walk_done:
  2359. mov r10,r9
  2360. neg r9
  2361. pxor xmm0,xmm0
  2362. DB 102,72,15,110,207
  2363. DB 102,72,15,110,209
  2364. DB 102,73,15,110,218
  2365. DB 102,72,15,110,226
  2366. mov QWORD[32+rsp],r8
  2367. mov QWORD[40+rsp],rax
  2368. $L$powerx5_body:
  2369. call __bn_sqrx8x_internal
  2370. call __bn_postx4x_internal
  2371. call __bn_sqrx8x_internal
  2372. call __bn_postx4x_internal
  2373. call __bn_sqrx8x_internal
  2374. call __bn_postx4x_internal
  2375. call __bn_sqrx8x_internal
  2376. call __bn_postx4x_internal
  2377. call __bn_sqrx8x_internal
  2378. call __bn_postx4x_internal
  2379. mov r9,r10
  2380. mov rdi,rsi
  2381. DB 102,72,15,126,209
  2382. DB 102,72,15,126,226
  2383. mov rax,QWORD[40+rsp]
  2384. call mulx4x_internal
  2385. mov rsi,QWORD[40+rsp]
  2386. mov rax,1
  2387. mov r15,QWORD[((-48))+rsi]
  2388. mov r14,QWORD[((-40))+rsi]
  2389. mov r13,QWORD[((-32))+rsi]
  2390. mov r12,QWORD[((-24))+rsi]
  2391. mov rbp,QWORD[((-16))+rsi]
  2392. mov rbx,QWORD[((-8))+rsi]
  2393. lea rsp,[rsi]
  2394. $L$powerx5_epilogue:
  2395. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  2396. mov rsi,QWORD[16+rsp]
  2397. DB 0F3h,0C3h ;repret
  2398. $L$SEH_end_bn_powerx5:
  2399. global bn_sqrx8x_internal
  2400. ALIGN 32
  2401. bn_sqrx8x_internal:
  2402. __bn_sqrx8x_internal:
  2403. lea rdi,[((48+8))+rsp]
  2404. lea rbp,[r9*1+rsi]
  2405. mov QWORD[((0+8))+rsp],r9
  2406. mov QWORD[((8+8))+rsp],rbp
  2407. jmp NEAR $L$sqr8x_zero_start
  2408. ALIGN 32
  2409. DB 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
  2410. $L$sqrx8x_zero:
  2411. DB 0x3e
  2412. movdqa XMMWORD[rdi],xmm0
  2413. movdqa XMMWORD[16+rdi],xmm0
  2414. movdqa XMMWORD[32+rdi],xmm0
  2415. movdqa XMMWORD[48+rdi],xmm0
  2416. $L$sqr8x_zero_start:
  2417. movdqa XMMWORD[64+rdi],xmm0
  2418. movdqa XMMWORD[80+rdi],xmm0
  2419. movdqa XMMWORD[96+rdi],xmm0
  2420. movdqa XMMWORD[112+rdi],xmm0
  2421. lea rdi,[128+rdi]
  2422. sub r9,64
  2423. jnz NEAR $L$sqrx8x_zero
  2424. mov rdx,QWORD[rsi]
  2425. xor r10,r10
  2426. xor r11,r11
  2427. xor r12,r12
  2428. xor r13,r13
  2429. xor r14,r14
  2430. xor r15,r15
  2431. lea rdi,[((48+8))+rsp]
  2432. xor rbp,rbp
  2433. jmp NEAR $L$sqrx8x_outer_loop
  2434. ALIGN 32
  2435. $L$sqrx8x_outer_loop:
  2436. mulx rax,r8,QWORD[8+rsi]
  2437. adcx r8,r9
  2438. adox r10,rax
  2439. mulx rax,r9,QWORD[16+rsi]
  2440. adcx r9,r10
  2441. adox r11,rax
  2442. DB 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
  2443. adcx r10,r11
  2444. adox r12,rax
  2445. DB 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
  2446. adcx r11,r12
  2447. adox r13,rax
  2448. mulx rax,r12,QWORD[40+rsi]
  2449. adcx r12,r13
  2450. adox r14,rax
  2451. mulx rax,r13,QWORD[48+rsi]
  2452. adcx r13,r14
  2453. adox rax,r15
  2454. mulx r15,r14,QWORD[56+rsi]
  2455. mov rdx,QWORD[8+rsi]
  2456. adcx r14,rax
  2457. adox r15,rbp
  2458. adc r15,QWORD[64+rdi]
  2459. mov QWORD[8+rdi],r8
  2460. mov QWORD[16+rdi],r9
  2461. sbb rcx,rcx
  2462. xor rbp,rbp
  2463. mulx rbx,r8,QWORD[16+rsi]
  2464. mulx rax,r9,QWORD[24+rsi]
  2465. adcx r8,r10
  2466. adox r9,rbx
  2467. mulx rbx,r10,QWORD[32+rsi]
  2468. adcx r9,r11
  2469. adox r10,rax
  2470. DB 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
  2471. adcx r10,r12
  2472. adox r11,rbx
  2473. DB 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
  2474. adcx r11,r13
  2475. adox r12,r14
  2476. DB 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
  2477. mov rdx,QWORD[16+rsi]
  2478. adcx r12,rax
  2479. adox r13,rbx
  2480. adcx r13,r15
  2481. adox r14,rbp
  2482. adcx r14,rbp
  2483. mov QWORD[24+rdi],r8
  2484. mov QWORD[32+rdi],r9
  2485. mulx rbx,r8,QWORD[24+rsi]
  2486. mulx rax,r9,QWORD[32+rsi]
  2487. adcx r8,r10
  2488. adox r9,rbx
  2489. mulx rbx,r10,QWORD[40+rsi]
  2490. adcx r9,r11
  2491. adox r10,rax
  2492. DB 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
  2493. adcx r10,r12
  2494. adox r11,r13
  2495. DB 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
  2496. DB 0x3e
  2497. mov rdx,QWORD[24+rsi]
  2498. adcx r11,rbx
  2499. adox r12,rax
  2500. adcx r12,r14
  2501. mov QWORD[40+rdi],r8
  2502. mov QWORD[48+rdi],r9
  2503. mulx rax,r8,QWORD[32+rsi]
  2504. adox r13,rbp
  2505. adcx r13,rbp
  2506. mulx rbx,r9,QWORD[40+rsi]
  2507. adcx r8,r10
  2508. adox r9,rax
  2509. mulx rax,r10,QWORD[48+rsi]
  2510. adcx r9,r11
  2511. adox r10,r12
  2512. mulx r12,r11,QWORD[56+rsi]
  2513. mov rdx,QWORD[32+rsi]
  2514. mov r14,QWORD[40+rsi]
  2515. adcx r10,rbx
  2516. adox r11,rax
  2517. mov r15,QWORD[48+rsi]
  2518. adcx r11,r13
  2519. adox r12,rbp
  2520. adcx r12,rbp
  2521. mov QWORD[56+rdi],r8
  2522. mov QWORD[64+rdi],r9
  2523. mulx rax,r9,r14
  2524. mov r8,QWORD[56+rsi]
  2525. adcx r9,r10
  2526. mulx rbx,r10,r15
  2527. adox r10,rax
  2528. adcx r10,r11
  2529. mulx rax,r11,r8
  2530. mov rdx,r14
  2531. adox r11,rbx
  2532. adcx r11,r12
  2533. adcx rax,rbp
  2534. mulx rbx,r14,r15
  2535. mulx r13,r12,r8
  2536. mov rdx,r15
  2537. lea rsi,[64+rsi]
  2538. adcx r11,r14
  2539. adox r12,rbx
  2540. adcx r12,rax
  2541. adox r13,rbp
  2542. DB 0x67,0x67
  2543. mulx r14,r8,r8
  2544. adcx r13,r8
  2545. adcx r14,rbp
  2546. cmp rsi,QWORD[((8+8))+rsp]
  2547. je NEAR $L$sqrx8x_outer_break
  2548. neg rcx
  2549. mov rcx,-8
  2550. mov r15,rbp
  2551. mov r8,QWORD[64+rdi]
  2552. adcx r9,QWORD[72+rdi]
  2553. adcx r10,QWORD[80+rdi]
  2554. adcx r11,QWORD[88+rdi]
  2555. adc r12,QWORD[96+rdi]
  2556. adc r13,QWORD[104+rdi]
  2557. adc r14,QWORD[112+rdi]
  2558. adc r15,QWORD[120+rdi]
  2559. lea rbp,[rsi]
  2560. lea rdi,[128+rdi]
  2561. sbb rax,rax
  2562. mov rdx,QWORD[((-64))+rsi]
  2563. mov QWORD[((16+8))+rsp],rax
  2564. mov QWORD[((24+8))+rsp],rdi
  2565. xor eax,eax
  2566. jmp NEAR $L$sqrx8x_loop
  2567. ALIGN 32
  2568. $L$sqrx8x_loop:
  2569. mov rbx,r8
  2570. mulx r8,rax,QWORD[rbp]
  2571. adcx rbx,rax
  2572. adox r8,r9
  2573. mulx r9,rax,QWORD[8+rbp]
  2574. adcx r8,rax
  2575. adox r9,r10
  2576. mulx r10,rax,QWORD[16+rbp]
  2577. adcx r9,rax
  2578. adox r10,r11
  2579. mulx r11,rax,QWORD[24+rbp]
  2580. adcx r10,rax
  2581. adox r11,r12
  2582. DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
  2583. adcx r11,rax
  2584. adox r12,r13
  2585. mulx r13,rax,QWORD[40+rbp]
  2586. adcx r12,rax
  2587. adox r13,r14
  2588. mulx r14,rax,QWORD[48+rbp]
  2589. mov QWORD[rcx*8+rdi],rbx
  2590. mov ebx,0
  2591. adcx r13,rax
  2592. adox r14,r15
  2593. DB 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
  2594. mov rdx,QWORD[8+rcx*8+rsi]
  2595. adcx r14,rax
  2596. adox r15,rbx
  2597. adcx r15,rbx
  2598. DB 0x67
  2599. inc rcx
  2600. jnz NEAR $L$sqrx8x_loop
  2601. lea rbp,[64+rbp]
  2602. mov rcx,-8
  2603. cmp rbp,QWORD[((8+8))+rsp]
  2604. je NEAR $L$sqrx8x_break
  2605. sub rbx,QWORD[((16+8))+rsp]
  2606. DB 0x66
  2607. mov rdx,QWORD[((-64))+rsi]
  2608. adcx r8,QWORD[rdi]
  2609. adcx r9,QWORD[8+rdi]
  2610. adc r10,QWORD[16+rdi]
  2611. adc r11,QWORD[24+rdi]
  2612. adc r12,QWORD[32+rdi]
  2613. adc r13,QWORD[40+rdi]
  2614. adc r14,QWORD[48+rdi]
  2615. adc r15,QWORD[56+rdi]
  2616. lea rdi,[64+rdi]
  2617. DB 0x67
  2618. sbb rax,rax
  2619. xor ebx,ebx
  2620. mov QWORD[((16+8))+rsp],rax
  2621. jmp NEAR $L$sqrx8x_loop
  2622. ALIGN 32
  2623. $L$sqrx8x_break:
  2624. xor rbp,rbp
  2625. sub rbx,QWORD[((16+8))+rsp]
  2626. adcx r8,rbp
  2627. mov rcx,QWORD[((24+8))+rsp]
  2628. adcx r9,rbp
  2629. mov rdx,QWORD[rsi]
  2630. adc r10,0
  2631. mov QWORD[rdi],r8
  2632. adc r11,0
  2633. adc r12,0
  2634. adc r13,0
  2635. adc r14,0
  2636. adc r15,0
  2637. cmp rdi,rcx
  2638. je NEAR $L$sqrx8x_outer_loop
  2639. mov QWORD[8+rdi],r9
  2640. mov r9,QWORD[8+rcx]
  2641. mov QWORD[16+rdi],r10
  2642. mov r10,QWORD[16+rcx]
  2643. mov QWORD[24+rdi],r11
  2644. mov r11,QWORD[24+rcx]
  2645. mov QWORD[32+rdi],r12
  2646. mov r12,QWORD[32+rcx]
  2647. mov QWORD[40+rdi],r13
  2648. mov r13,QWORD[40+rcx]
  2649. mov QWORD[48+rdi],r14
  2650. mov r14,QWORD[48+rcx]
  2651. mov QWORD[56+rdi],r15
  2652. mov r15,QWORD[56+rcx]
  2653. mov rdi,rcx
  2654. jmp NEAR $L$sqrx8x_outer_loop
  2655. ALIGN 32
  2656. $L$sqrx8x_outer_break:
  2657. mov QWORD[72+rdi],r9
  2658. DB 102,72,15,126,217
  2659. mov QWORD[80+rdi],r10
  2660. mov QWORD[88+rdi],r11
  2661. mov QWORD[96+rdi],r12
  2662. mov QWORD[104+rdi],r13
  2663. mov QWORD[112+rdi],r14
  2664. lea rdi,[((48+8))+rsp]
  2665. mov rdx,QWORD[rcx*1+rsi]
  2666. mov r11,QWORD[8+rdi]
  2667. xor r10,r10
  2668. mov r9,QWORD[((0+8))+rsp]
  2669. adox r11,r11
  2670. mov r12,QWORD[16+rdi]
  2671. mov r13,QWORD[24+rdi]
  2672. ALIGN 32
  2673. $L$sqrx4x_shift_n_add:
  2674. mulx rbx,rax,rdx
  2675. adox r12,r12
  2676. adcx rax,r10
  2677. DB 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
  2678. DB 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
  2679. adox r13,r13
  2680. adcx rbx,r11
  2681. mov r11,QWORD[40+rdi]
  2682. mov QWORD[rdi],rax
  2683. mov QWORD[8+rdi],rbx
  2684. mulx rbx,rax,rdx
  2685. adox r10,r10
  2686. adcx rax,r12
  2687. mov rdx,QWORD[16+rcx*1+rsi]
  2688. mov r12,QWORD[48+rdi]
  2689. adox r11,r11
  2690. adcx rbx,r13
  2691. mov r13,QWORD[56+rdi]
  2692. mov QWORD[16+rdi],rax
  2693. mov QWORD[24+rdi],rbx
  2694. mulx rbx,rax,rdx
  2695. adox r12,r12
  2696. adcx rax,r10
  2697. mov rdx,QWORD[24+rcx*1+rsi]
  2698. lea rcx,[32+rcx]
  2699. mov r10,QWORD[64+rdi]
  2700. adox r13,r13
  2701. adcx rbx,r11
  2702. mov r11,QWORD[72+rdi]
  2703. mov QWORD[32+rdi],rax
  2704. mov QWORD[40+rdi],rbx
  2705. mulx rbx,rax,rdx
  2706. adox r10,r10
  2707. adcx rax,r12
  2708. jrcxz $L$sqrx4x_shift_n_add_break
  2709. DB 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
  2710. adox r11,r11
  2711. adcx rbx,r13
  2712. mov r12,QWORD[80+rdi]
  2713. mov r13,QWORD[88+rdi]
  2714. mov QWORD[48+rdi],rax
  2715. mov QWORD[56+rdi],rbx
  2716. lea rdi,[64+rdi]
  2717. nop
  2718. jmp NEAR $L$sqrx4x_shift_n_add
  2719. ALIGN 32
  2720. $L$sqrx4x_shift_n_add_break:
  2721. adcx rbx,r13
  2722. mov QWORD[48+rdi],rax
  2723. mov QWORD[56+rdi],rbx
  2724. lea rdi,[64+rdi]
  2725. DB 102,72,15,126,213
  2726. __bn_sqrx8x_reduction:
  2727. xor eax,eax
  2728. mov rbx,QWORD[((32+8))+rsp]
  2729. mov rdx,QWORD[((48+8))+rsp]
  2730. lea rcx,[((-64))+r9*1+rbp]
  2731. mov QWORD[((0+8))+rsp],rcx
  2732. mov QWORD[((8+8))+rsp],rdi
  2733. lea rdi,[((48+8))+rsp]
  2734. jmp NEAR $L$sqrx8x_reduction_loop
  2735. ALIGN 32
  2736. $L$sqrx8x_reduction_loop:
  2737. mov r9,QWORD[8+rdi]
  2738. mov r10,QWORD[16+rdi]
  2739. mov r11,QWORD[24+rdi]
  2740. mov r12,QWORD[32+rdi]
  2741. mov r8,rdx
  2742. imul rdx,rbx
  2743. mov r13,QWORD[40+rdi]
  2744. mov r14,QWORD[48+rdi]
  2745. mov r15,QWORD[56+rdi]
  2746. mov QWORD[((24+8))+rsp],rax
  2747. lea rdi,[64+rdi]
  2748. xor rsi,rsi
  2749. mov rcx,-8
  2750. jmp NEAR $L$sqrx8x_reduce
  2751. ALIGN 32
  2752. $L$sqrx8x_reduce:
  2753. mov rbx,r8
  2754. mulx r8,rax,QWORD[rbp]
  2755. adcx rax,rbx
  2756. adox r8,r9
  2757. mulx r9,rbx,QWORD[8+rbp]
  2758. adcx r8,rbx
  2759. adox r9,r10
  2760. mulx r10,rbx,QWORD[16+rbp]
  2761. adcx r9,rbx
  2762. adox r10,r11
  2763. mulx r11,rbx,QWORD[24+rbp]
  2764. adcx r10,rbx
  2765. adox r11,r12
  2766. DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
  2767. mov rax,rdx
  2768. mov rdx,r8
  2769. adcx r11,rbx
  2770. adox r12,r13
  2771. mulx rdx,rbx,QWORD[((32+8))+rsp]
  2772. mov rdx,rax
  2773. mov QWORD[((64+48+8))+rcx*8+rsp],rax
  2774. mulx r13,rax,QWORD[40+rbp]
  2775. adcx r12,rax
  2776. adox r13,r14
  2777. mulx r14,rax,QWORD[48+rbp]
  2778. adcx r13,rax
  2779. adox r14,r15
  2780. mulx r15,rax,QWORD[56+rbp]
  2781. mov rdx,rbx
  2782. adcx r14,rax
  2783. adox r15,rsi
  2784. adcx r15,rsi
  2785. DB 0x67,0x67,0x67
  2786. inc rcx
  2787. jnz NEAR $L$sqrx8x_reduce
  2788. mov rax,rsi
  2789. cmp rbp,QWORD[((0+8))+rsp]
  2790. jae NEAR $L$sqrx8x_no_tail
  2791. mov rdx,QWORD[((48+8))+rsp]
  2792. add r8,QWORD[rdi]
  2793. lea rbp,[64+rbp]
  2794. mov rcx,-8
  2795. adcx r9,QWORD[8+rdi]
  2796. adcx r10,QWORD[16+rdi]
  2797. adc r11,QWORD[24+rdi]
  2798. adc r12,QWORD[32+rdi]
  2799. adc r13,QWORD[40+rdi]
  2800. adc r14,QWORD[48+rdi]
  2801. adc r15,QWORD[56+rdi]
  2802. lea rdi,[64+rdi]
  2803. sbb rax,rax
  2804. xor rsi,rsi
  2805. mov QWORD[((16+8))+rsp],rax
  2806. jmp NEAR $L$sqrx8x_tail
  2807. ALIGN 32
  2808. $L$sqrx8x_tail:
  2809. mov rbx,r8
  2810. mulx r8,rax,QWORD[rbp]
  2811. adcx rbx,rax
  2812. adox r8,r9
  2813. mulx r9,rax,QWORD[8+rbp]
  2814. adcx r8,rax
  2815. adox r9,r10
  2816. mulx r10,rax,QWORD[16+rbp]
  2817. adcx r9,rax
  2818. adox r10,r11
  2819. mulx r11,rax,QWORD[24+rbp]
  2820. adcx r10,rax
  2821. adox r11,r12
  2822. DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
  2823. adcx r11,rax
  2824. adox r12,r13
  2825. mulx r13,rax,QWORD[40+rbp]
  2826. adcx r12,rax
  2827. adox r13,r14
  2828. mulx r14,rax,QWORD[48+rbp]
  2829. adcx r13,rax
  2830. adox r14,r15
  2831. mulx r15,rax,QWORD[56+rbp]
  2832. mov rdx,QWORD[((72+48+8))+rcx*8+rsp]
  2833. adcx r14,rax
  2834. adox r15,rsi
  2835. mov QWORD[rcx*8+rdi],rbx
  2836. mov rbx,r8
  2837. adcx r15,rsi
  2838. inc rcx
  2839. jnz NEAR $L$sqrx8x_tail
  2840. cmp rbp,QWORD[((0+8))+rsp]
  2841. jae NEAR $L$sqrx8x_tail_done
  2842. sub rsi,QWORD[((16+8))+rsp]
  2843. mov rdx,QWORD[((48+8))+rsp]
  2844. lea rbp,[64+rbp]
  2845. adc r8,QWORD[rdi]
  2846. adc r9,QWORD[8+rdi]
  2847. adc r10,QWORD[16+rdi]
  2848. adc r11,QWORD[24+rdi]
  2849. adc r12,QWORD[32+rdi]
  2850. adc r13,QWORD[40+rdi]
  2851. adc r14,QWORD[48+rdi]
  2852. adc r15,QWORD[56+rdi]
  2853. lea rdi,[64+rdi]
  2854. sbb rax,rax
  2855. sub rcx,8
  2856. xor rsi,rsi
  2857. mov QWORD[((16+8))+rsp],rax
  2858. jmp NEAR $L$sqrx8x_tail
  2859. ALIGN 32
  2860. $L$sqrx8x_tail_done:
  2861. xor rax,rax
  2862. add r8,QWORD[((24+8))+rsp]
  2863. adc r9,0
  2864. adc r10,0
  2865. adc r11,0
  2866. adc r12,0
  2867. adc r13,0
  2868. adc r14,0
  2869. adc r15,0
  2870. adc rax,0
  2871. sub rsi,QWORD[((16+8))+rsp]
  2872. $L$sqrx8x_no_tail:
  2873. adc r8,QWORD[rdi]
  2874. DB 102,72,15,126,217
  2875. adc r9,QWORD[8+rdi]
  2876. mov rsi,QWORD[56+rbp]
  2877. DB 102,72,15,126,213
  2878. adc r10,QWORD[16+rdi]
  2879. adc r11,QWORD[24+rdi]
  2880. adc r12,QWORD[32+rdi]
  2881. adc r13,QWORD[40+rdi]
  2882. adc r14,QWORD[48+rdi]
  2883. adc r15,QWORD[56+rdi]
  2884. adc rax,0
  2885. mov rbx,QWORD[((32+8))+rsp]
  2886. mov rdx,QWORD[64+rcx*1+rdi]
  2887. mov QWORD[rdi],r8
  2888. lea r8,[64+rdi]
  2889. mov QWORD[8+rdi],r9
  2890. mov QWORD[16+rdi],r10
  2891. mov QWORD[24+rdi],r11
  2892. mov QWORD[32+rdi],r12
  2893. mov QWORD[40+rdi],r13
  2894. mov QWORD[48+rdi],r14
  2895. mov QWORD[56+rdi],r15
  2896. lea rdi,[64+rcx*1+rdi]
  2897. cmp r8,QWORD[((8+8))+rsp]
  2898. jb NEAR $L$sqrx8x_reduction_loop
  2899. DB 0F3h,0C3h ;repret
  2900. ALIGN 32
  2901. __bn_postx4x_internal:
  2902. mov r12,QWORD[rbp]
  2903. mov r10,rcx
  2904. mov r9,rcx
  2905. neg rax
  2906. sar rcx,3+2
  2907. DB 102,72,15,126,202
  2908. DB 102,72,15,126,206
  2909. dec r12
  2910. mov r13,QWORD[8+rbp]
  2911. xor r8,r8
  2912. mov r14,QWORD[16+rbp]
  2913. mov r15,QWORD[24+rbp]
  2914. jmp NEAR $L$sqrx4x_sub_entry
  2915. ALIGN 16
  2916. $L$sqrx4x_sub:
  2917. mov r12,QWORD[rbp]
  2918. mov r13,QWORD[8+rbp]
  2919. mov r14,QWORD[16+rbp]
  2920. mov r15,QWORD[24+rbp]
  2921. $L$sqrx4x_sub_entry:
  2922. andn r12,r12,rax
  2923. lea rbp,[32+rbp]
  2924. andn r13,r13,rax
  2925. andn r14,r14,rax
  2926. andn r15,r15,rax
  2927. neg r8
  2928. adc r12,QWORD[rdi]
  2929. adc r13,QWORD[8+rdi]
  2930. adc r14,QWORD[16+rdi]
  2931. adc r15,QWORD[24+rdi]
  2932. mov QWORD[rdx],r12
  2933. lea rdi,[32+rdi]
  2934. mov QWORD[8+rdx],r13
  2935. sbb r8,r8
  2936. mov QWORD[16+rdx],r14
  2937. mov QWORD[24+rdx],r15
  2938. lea rdx,[32+rdx]
  2939. inc rcx
  2940. jnz NEAR $L$sqrx4x_sub
  2941. neg r9
  2942. DB 0F3h,0C3h ;repret
  2943. global bn_scatter5
  2944. ALIGN 16
  2945. bn_scatter5:
  2946. cmp edx,0
  2947. jz NEAR $L$scatter_epilogue
  2948. lea r8,[r9*8+r8]
  2949. $L$scatter:
  2950. mov rax,QWORD[rcx]
  2951. lea rcx,[8+rcx]
  2952. mov QWORD[r8],rax
  2953. lea r8,[256+r8]
  2954. sub edx,1
  2955. jnz NEAR $L$scatter
  2956. $L$scatter_epilogue:
  2957. DB 0F3h,0C3h ;repret
  2958. global bn_gather5
  2959. ALIGN 32
  2960. bn_gather5:
  2961. $L$SEH_begin_bn_gather5:
  2962. DB 0x4c,0x8d,0x14,0x24
  2963. DB 0x48,0x81,0xec,0x08,0x01,0x00,0x00
  2964. lea rax,[$L$inc]
  2965. and rsp,-16
  2966. movd xmm5,r9d
  2967. movdqa xmm0,XMMWORD[rax]
  2968. movdqa xmm1,XMMWORD[16+rax]
  2969. lea r11,[128+r8]
  2970. lea rax,[128+rsp]
  2971. pshufd xmm5,xmm5,0
  2972. movdqa xmm4,xmm1
  2973. movdqa xmm2,xmm1
  2974. paddd xmm1,xmm0
  2975. pcmpeqd xmm0,xmm5
  2976. movdqa xmm3,xmm4
  2977. paddd xmm2,xmm1
  2978. pcmpeqd xmm1,xmm5
  2979. movdqa XMMWORD[(-128)+rax],xmm0
  2980. movdqa xmm0,xmm4
  2981. paddd xmm3,xmm2
  2982. pcmpeqd xmm2,xmm5
  2983. movdqa XMMWORD[(-112)+rax],xmm1
  2984. movdqa xmm1,xmm4
  2985. paddd xmm0,xmm3
  2986. pcmpeqd xmm3,xmm5
  2987. movdqa XMMWORD[(-96)+rax],xmm2
  2988. movdqa xmm2,xmm4
  2989. paddd xmm1,xmm0
  2990. pcmpeqd xmm0,xmm5
  2991. movdqa XMMWORD[(-80)+rax],xmm3
  2992. movdqa xmm3,xmm4
  2993. paddd xmm2,xmm1
  2994. pcmpeqd xmm1,xmm5
  2995. movdqa XMMWORD[(-64)+rax],xmm0
  2996. movdqa xmm0,xmm4
  2997. paddd xmm3,xmm2
  2998. pcmpeqd xmm2,xmm5
  2999. movdqa XMMWORD[(-48)+rax],xmm1
  3000. movdqa xmm1,xmm4
  3001. paddd xmm0,xmm3
  3002. pcmpeqd xmm3,xmm5
  3003. movdqa XMMWORD[(-32)+rax],xmm2
  3004. movdqa xmm2,xmm4
  3005. paddd xmm1,xmm0
  3006. pcmpeqd xmm0,xmm5
  3007. movdqa XMMWORD[(-16)+rax],xmm3
  3008. movdqa xmm3,xmm4
  3009. paddd xmm2,xmm1
  3010. pcmpeqd xmm1,xmm5
  3011. movdqa XMMWORD[rax],xmm0
  3012. movdqa xmm0,xmm4
  3013. paddd xmm3,xmm2
  3014. pcmpeqd xmm2,xmm5
  3015. movdqa XMMWORD[16+rax],xmm1
  3016. movdqa xmm1,xmm4
  3017. paddd xmm0,xmm3
  3018. pcmpeqd xmm3,xmm5
  3019. movdqa XMMWORD[32+rax],xmm2
  3020. movdqa xmm2,xmm4
  3021. paddd xmm1,xmm0
  3022. pcmpeqd xmm0,xmm5
  3023. movdqa XMMWORD[48+rax],xmm3
  3024. movdqa xmm3,xmm4
  3025. paddd xmm2,xmm1
  3026. pcmpeqd xmm1,xmm5
  3027. movdqa XMMWORD[64+rax],xmm0
  3028. movdqa xmm0,xmm4
  3029. paddd xmm3,xmm2
  3030. pcmpeqd xmm2,xmm5
  3031. movdqa XMMWORD[80+rax],xmm1
  3032. movdqa xmm1,xmm4
  3033. paddd xmm0,xmm3
  3034. pcmpeqd xmm3,xmm5
  3035. movdqa XMMWORD[96+rax],xmm2
  3036. movdqa xmm2,xmm4
  3037. movdqa XMMWORD[112+rax],xmm3
  3038. jmp NEAR $L$gather
  3039. ALIGN 32
  3040. $L$gather:
  3041. pxor xmm4,xmm4
  3042. pxor xmm5,xmm5
  3043. movdqa xmm0,XMMWORD[((-128))+r11]
  3044. movdqa xmm1,XMMWORD[((-112))+r11]
  3045. movdqa xmm2,XMMWORD[((-96))+r11]
  3046. pand xmm0,XMMWORD[((-128))+rax]
  3047. movdqa xmm3,XMMWORD[((-80))+r11]
  3048. pand xmm1,XMMWORD[((-112))+rax]
  3049. por xmm4,xmm0
  3050. pand xmm2,XMMWORD[((-96))+rax]
  3051. por xmm5,xmm1
  3052. pand xmm3,XMMWORD[((-80))+rax]
  3053. por xmm4,xmm2
  3054. por xmm5,xmm3
  3055. movdqa xmm0,XMMWORD[((-64))+r11]
  3056. movdqa xmm1,XMMWORD[((-48))+r11]
  3057. movdqa xmm2,XMMWORD[((-32))+r11]
  3058. pand xmm0,XMMWORD[((-64))+rax]
  3059. movdqa xmm3,XMMWORD[((-16))+r11]
  3060. pand xmm1,XMMWORD[((-48))+rax]
  3061. por xmm4,xmm0
  3062. pand xmm2,XMMWORD[((-32))+rax]
  3063. por xmm5,xmm1
  3064. pand xmm3,XMMWORD[((-16))+rax]
  3065. por xmm4,xmm2
  3066. por xmm5,xmm3
  3067. movdqa xmm0,XMMWORD[r11]
  3068. movdqa xmm1,XMMWORD[16+r11]
  3069. movdqa xmm2,XMMWORD[32+r11]
  3070. pand xmm0,XMMWORD[rax]
  3071. movdqa xmm3,XMMWORD[48+r11]
  3072. pand xmm1,XMMWORD[16+rax]
  3073. por xmm4,xmm0
  3074. pand xmm2,XMMWORD[32+rax]
  3075. por xmm5,xmm1
  3076. pand xmm3,XMMWORD[48+rax]
  3077. por xmm4,xmm2
  3078. por xmm5,xmm3
  3079. movdqa xmm0,XMMWORD[64+r11]
  3080. movdqa xmm1,XMMWORD[80+r11]
  3081. movdqa xmm2,XMMWORD[96+r11]
  3082. pand xmm0,XMMWORD[64+rax]
  3083. movdqa xmm3,XMMWORD[112+r11]
  3084. pand xmm1,XMMWORD[80+rax]
  3085. por xmm4,xmm0
  3086. pand xmm2,XMMWORD[96+rax]
  3087. por xmm5,xmm1
  3088. pand xmm3,XMMWORD[112+rax]
  3089. por xmm4,xmm2
  3090. por xmm5,xmm3
  3091. por xmm4,xmm5
  3092. lea r11,[256+r11]
  3093. pshufd xmm0,xmm4,0x4e
  3094. por xmm0,xmm4
  3095. movq QWORD[rcx],xmm0
  3096. lea rcx,[8+rcx]
  3097. sub edx,1
  3098. jnz NEAR $L$gather
  3099. lea rsp,[r10]
  3100. DB 0F3h,0C3h ;repret
  3101. $L$SEH_end_bn_gather5:
  3102. ALIGN 64
  3103. $L$inc:
  3104. DD 0,0,1,1
  3105. DD 2,2,2,2
  3106. DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
  3107. DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
  3108. DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
  3109. DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
  3110. DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
  3111. DB 112,101,110,115,115,108,46,111,114,103,62,0
  3112. EXTERN __imp_RtlVirtualUnwind
  3113. ALIGN 16
  3114. mul_handler:
  3115. push rsi
  3116. push rdi
  3117. push rbx
  3118. push rbp
  3119. push r12
  3120. push r13
  3121. push r14
  3122. push r15
  3123. pushfq
  3124. sub rsp,64
  3125. mov rax,QWORD[120+r8]
  3126. mov rbx,QWORD[248+r8]
  3127. mov rsi,QWORD[8+r9]
  3128. mov r11,QWORD[56+r9]
  3129. mov r10d,DWORD[r11]
  3130. lea r10,[r10*1+rsi]
  3131. cmp rbx,r10
  3132. jb NEAR $L$common_seh_tail
  3133. mov r10d,DWORD[4+r11]
  3134. lea r10,[r10*1+rsi]
  3135. cmp rbx,r10
  3136. jb NEAR $L$common_pop_regs
  3137. mov rax,QWORD[152+r8]
  3138. mov r10d,DWORD[8+r11]
  3139. lea r10,[r10*1+rsi]
  3140. cmp rbx,r10
  3141. jae NEAR $L$common_seh_tail
  3142. lea r10,[$L$mul_epilogue]
  3143. cmp rbx,r10
  3144. ja NEAR $L$body_40
  3145. mov r10,QWORD[192+r8]
  3146. mov rax,QWORD[8+r10*8+rax]
  3147. jmp NEAR $L$common_pop_regs
  3148. $L$body_40:
  3149. mov rax,QWORD[40+rax]
  3150. $L$common_pop_regs:
  3151. mov rbx,QWORD[((-8))+rax]
  3152. mov rbp,QWORD[((-16))+rax]
  3153. mov r12,QWORD[((-24))+rax]
  3154. mov r13,QWORD[((-32))+rax]
  3155. mov r14,QWORD[((-40))+rax]
  3156. mov r15,QWORD[((-48))+rax]
  3157. mov QWORD[144+r8],rbx
  3158. mov QWORD[160+r8],rbp
  3159. mov QWORD[216+r8],r12
  3160. mov QWORD[224+r8],r13
  3161. mov QWORD[232+r8],r14
  3162. mov QWORD[240+r8],r15
  3163. $L$common_seh_tail:
  3164. mov rdi,QWORD[8+rax]
  3165. mov rsi,QWORD[16+rax]
  3166. mov QWORD[152+r8],rax
  3167. mov QWORD[168+r8],rsi
  3168. mov QWORD[176+r8],rdi
  3169. mov rdi,QWORD[40+r9]
  3170. mov rsi,r8
  3171. mov ecx,154
  3172. DD 0xa548f3fc
  3173. mov rsi,r9
  3174. xor rcx,rcx
  3175. mov rdx,QWORD[8+rsi]
  3176. mov r8,QWORD[rsi]
  3177. mov r9,QWORD[16+rsi]
  3178. mov r10,QWORD[40+rsi]
  3179. lea r11,[56+rsi]
  3180. lea r12,[24+rsi]
  3181. mov QWORD[32+rsp],r10
  3182. mov QWORD[40+rsp],r11
  3183. mov QWORD[48+rsp],r12
  3184. mov QWORD[56+rsp],rcx
  3185. call QWORD[__imp_RtlVirtualUnwind]
  3186. mov eax,1
  3187. add rsp,64
  3188. popfq
  3189. pop r15
  3190. pop r14
  3191. pop r13
  3192. pop r12
  3193. pop rbp
  3194. pop rbx
  3195. pop rdi
  3196. pop rsi
  3197. DB 0F3h,0C3h ;repret
  3198. section .pdata rdata align=4
  3199. ALIGN 4
  3200. DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase
  3201. DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase
  3202. DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase
  3203. DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase
  3204. DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase
  3205. DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase
  3206. DD $L$SEH_begin_bn_power5 wrt ..imagebase
  3207. DD $L$SEH_end_bn_power5 wrt ..imagebase
  3208. DD $L$SEH_info_bn_power5 wrt ..imagebase
  3209. DD $L$SEH_begin_bn_from_mont8x wrt ..imagebase
  3210. DD $L$SEH_end_bn_from_mont8x wrt ..imagebase
  3211. DD $L$SEH_info_bn_from_mont8x wrt ..imagebase
  3212. DD $L$SEH_begin_bn_mulx4x_mont_gather5 wrt ..imagebase
  3213. DD $L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase
  3214. DD $L$SEH_info_bn_mulx4x_mont_gather5 wrt ..imagebase
  3215. DD $L$SEH_begin_bn_powerx5 wrt ..imagebase
  3216. DD $L$SEH_end_bn_powerx5 wrt ..imagebase
  3217. DD $L$SEH_info_bn_powerx5 wrt ..imagebase
  3218. DD $L$SEH_begin_bn_gather5 wrt ..imagebase
  3219. DD $L$SEH_end_bn_gather5 wrt ..imagebase
  3220. DD $L$SEH_info_bn_gather5 wrt ..imagebase
  3221. section .xdata rdata align=8
  3222. ALIGN 8
  3223. $L$SEH_info_bn_mul_mont_gather5:
  3224. DB 9,0,0,0
  3225. DD mul_handler wrt ..imagebase
  3226. DD $L$mul_body wrt ..imagebase,$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
  3227. ALIGN 8
  3228. $L$SEH_info_bn_mul4x_mont_gather5:
  3229. DB 9,0,0,0
  3230. DD mul_handler wrt ..imagebase
  3231. DD $L$mul4x_prologue wrt ..imagebase,$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
  3232. ALIGN 8
  3233. $L$SEH_info_bn_power5:
  3234. DB 9,0,0,0
  3235. DD mul_handler wrt ..imagebase
  3236. DD $L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase
  3237. ALIGN 8
  3238. $L$SEH_info_bn_from_mont8x:
  3239. DB 9,0,0,0
  3240. DD mul_handler wrt ..imagebase
  3241. DD $L$from_prologue wrt ..imagebase,$L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase
  3242. ALIGN 8
  3243. $L$SEH_info_bn_mulx4x_mont_gather5:
  3244. DB 9,0,0,0
  3245. DD mul_handler wrt ..imagebase
  3246. DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase
  3247. ALIGN 8
  3248. $L$SEH_info_bn_powerx5:
  3249. DB 9,0,0,0
  3250. DD mul_handler wrt ..imagebase
  3251. DD $L$powerx5_prologue wrt ..imagebase,$L$powerx5_body wrt ..imagebase,$L$powerx5_epilogue wrt ..imagebase
  3252. ALIGN 8
  3253. $L$SEH_info_bn_gather5:
  3254. DB 0x01,0x0b,0x03,0x0a
  3255. DB 0x0b,0x01,0x21,0x00
  3256. DB 0x04,0xa3,0x00,0x00
  3257. ALIGN 8