sha256-x86_64.asm 60 KB


  1. ; This file is generated from a similarly-named Perl script in the BoringSSL
  2. ; source tree. Do not edit by hand.
  3. default rel
  4. %define XMMWORD
  5. %define YMMWORD
  6. %define ZMMWORD
  7. %ifdef BORINGSSL_PREFIX
  8. %include "boringssl_prefix_symbols_nasm.inc"
  9. %endif
  10. section .text code align=64
  11. EXTERN OPENSSL_ia32cap_P
  12. global sha256_block_data_order
  13. ALIGN 16
  14. sha256_block_data_order:
  15. mov QWORD[8+rsp],rdi ;WIN64 prologue
  16. mov QWORD[16+rsp],rsi
  17. mov rax,rsp
  18. $L$SEH_begin_sha256_block_data_order:
  19. mov rdi,rcx
  20. mov rsi,rdx
  21. mov rdx,r8
  22. lea r11,[OPENSSL_ia32cap_P]
  23. mov r9d,DWORD[r11]
  24. mov r10d,DWORD[4+r11]
  25. mov r11d,DWORD[8+r11]
  26. and r9d,1073741824
  27. and r10d,268435968
  28. or r10d,r9d
  29. cmp r10d,1342177792
  30. je NEAR $L$avx_shortcut
  31. test r10d,512
  32. jnz NEAR $L$ssse3_shortcut
  33. mov rax,rsp
  34. push rbx
  35. push rbp
  36. push r12
  37. push r13
  38. push r14
  39. push r15
  40. shl rdx,4
  41. sub rsp,16*4+4*8
  42. lea rdx,[rdx*4+rsi]
  43. and rsp,-64
  44. mov QWORD[((64+0))+rsp],rdi
  45. mov QWORD[((64+8))+rsp],rsi
  46. mov QWORD[((64+16))+rsp],rdx
  47. mov QWORD[88+rsp],rax
  48. $L$prologue:
  49. mov eax,DWORD[rdi]
  50. mov ebx,DWORD[4+rdi]
  51. mov ecx,DWORD[8+rdi]
  52. mov edx,DWORD[12+rdi]
  53. mov r8d,DWORD[16+rdi]
  54. mov r9d,DWORD[20+rdi]
  55. mov r10d,DWORD[24+rdi]
  56. mov r11d,DWORD[28+rdi]
  57. jmp NEAR $L$loop
  58. ALIGN 16
  59. $L$loop:
  60. mov edi,ebx
  61. lea rbp,[K256]
  62. xor edi,ecx
  63. mov r12d,DWORD[rsi]
  64. mov r13d,r8d
  65. mov r14d,eax
  66. bswap r12d
  67. ror r13d,14
  68. mov r15d,r9d
  69. xor r13d,r8d
  70. ror r14d,9
  71. xor r15d,r10d
  72. mov DWORD[rsp],r12d
  73. xor r14d,eax
  74. and r15d,r8d
  75. ror r13d,5
  76. add r12d,r11d
  77. xor r15d,r10d
  78. ror r14d,11
  79. xor r13d,r8d
  80. add r12d,r15d
  81. mov r15d,eax
  82. add r12d,DWORD[rbp]
  83. xor r14d,eax
  84. xor r15d,ebx
  85. ror r13d,6
  86. mov r11d,ebx
  87. and edi,r15d
  88. ror r14d,2
  89. add r12d,r13d
  90. xor r11d,edi
  91. add edx,r12d
  92. add r11d,r12d
  93. lea rbp,[4+rbp]
  94. add r11d,r14d
  95. mov r12d,DWORD[4+rsi]
  96. mov r13d,edx
  97. mov r14d,r11d
  98. bswap r12d
  99. ror r13d,14
  100. mov edi,r8d
  101. xor r13d,edx
  102. ror r14d,9
  103. xor edi,r9d
  104. mov DWORD[4+rsp],r12d
  105. xor r14d,r11d
  106. and edi,edx
  107. ror r13d,5
  108. add r12d,r10d
  109. xor edi,r9d
  110. ror r14d,11
  111. xor r13d,edx
  112. add r12d,edi
  113. mov edi,r11d
  114. add r12d,DWORD[rbp]
  115. xor r14d,r11d
  116. xor edi,eax
  117. ror r13d,6
  118. mov r10d,eax
  119. and r15d,edi
  120. ror r14d,2
  121. add r12d,r13d
  122. xor r10d,r15d
  123. add ecx,r12d
  124. add r10d,r12d
  125. lea rbp,[4+rbp]
  126. add r10d,r14d
  127. mov r12d,DWORD[8+rsi]
  128. mov r13d,ecx
  129. mov r14d,r10d
  130. bswap r12d
  131. ror r13d,14
  132. mov r15d,edx
  133. xor r13d,ecx
  134. ror r14d,9
  135. xor r15d,r8d
  136. mov DWORD[8+rsp],r12d
  137. xor r14d,r10d
  138. and r15d,ecx
  139. ror r13d,5
  140. add r12d,r9d
  141. xor r15d,r8d
  142. ror r14d,11
  143. xor r13d,ecx
  144. add r12d,r15d
  145. mov r15d,r10d
  146. add r12d,DWORD[rbp]
  147. xor r14d,r10d
  148. xor r15d,r11d
  149. ror r13d,6
  150. mov r9d,r11d
  151. and edi,r15d
  152. ror r14d,2
  153. add r12d,r13d
  154. xor r9d,edi
  155. add ebx,r12d
  156. add r9d,r12d
  157. lea rbp,[4+rbp]
  158. add r9d,r14d
  159. mov r12d,DWORD[12+rsi]
  160. mov r13d,ebx
  161. mov r14d,r9d
  162. bswap r12d
  163. ror r13d,14
  164. mov edi,ecx
  165. xor r13d,ebx
  166. ror r14d,9
  167. xor edi,edx
  168. mov DWORD[12+rsp],r12d
  169. xor r14d,r9d
  170. and edi,ebx
  171. ror r13d,5
  172. add r12d,r8d
  173. xor edi,edx
  174. ror r14d,11
  175. xor r13d,ebx
  176. add r12d,edi
  177. mov edi,r9d
  178. add r12d,DWORD[rbp]
  179. xor r14d,r9d
  180. xor edi,r10d
  181. ror r13d,6
  182. mov r8d,r10d
  183. and r15d,edi
  184. ror r14d,2
  185. add r12d,r13d
  186. xor r8d,r15d
  187. add eax,r12d
  188. add r8d,r12d
  189. lea rbp,[20+rbp]
  190. add r8d,r14d
  191. mov r12d,DWORD[16+rsi]
  192. mov r13d,eax
  193. mov r14d,r8d
  194. bswap r12d
  195. ror r13d,14
  196. mov r15d,ebx
  197. xor r13d,eax
  198. ror r14d,9
  199. xor r15d,ecx
  200. mov DWORD[16+rsp],r12d
  201. xor r14d,r8d
  202. and r15d,eax
  203. ror r13d,5
  204. add r12d,edx
  205. xor r15d,ecx
  206. ror r14d,11
  207. xor r13d,eax
  208. add r12d,r15d
  209. mov r15d,r8d
  210. add r12d,DWORD[rbp]
  211. xor r14d,r8d
  212. xor r15d,r9d
  213. ror r13d,6
  214. mov edx,r9d
  215. and edi,r15d
  216. ror r14d,2
  217. add r12d,r13d
  218. xor edx,edi
  219. add r11d,r12d
  220. add edx,r12d
  221. lea rbp,[4+rbp]
  222. add edx,r14d
  223. mov r12d,DWORD[20+rsi]
  224. mov r13d,r11d
  225. mov r14d,edx
  226. bswap r12d
  227. ror r13d,14
  228. mov edi,eax
  229. xor r13d,r11d
  230. ror r14d,9
  231. xor edi,ebx
  232. mov DWORD[20+rsp],r12d
  233. xor r14d,edx
  234. and edi,r11d
  235. ror r13d,5
  236. add r12d,ecx
  237. xor edi,ebx
  238. ror r14d,11
  239. xor r13d,r11d
  240. add r12d,edi
  241. mov edi,edx
  242. add r12d,DWORD[rbp]
  243. xor r14d,edx
  244. xor edi,r8d
  245. ror r13d,6
  246. mov ecx,r8d
  247. and r15d,edi
  248. ror r14d,2
  249. add r12d,r13d
  250. xor ecx,r15d
  251. add r10d,r12d
  252. add ecx,r12d
  253. lea rbp,[4+rbp]
  254. add ecx,r14d
  255. mov r12d,DWORD[24+rsi]
  256. mov r13d,r10d
  257. mov r14d,ecx
  258. bswap r12d
  259. ror r13d,14
  260. mov r15d,r11d
  261. xor r13d,r10d
  262. ror r14d,9
  263. xor r15d,eax
  264. mov DWORD[24+rsp],r12d
  265. xor r14d,ecx
  266. and r15d,r10d
  267. ror r13d,5
  268. add r12d,ebx
  269. xor r15d,eax
  270. ror r14d,11
  271. xor r13d,r10d
  272. add r12d,r15d
  273. mov r15d,ecx
  274. add r12d,DWORD[rbp]
  275. xor r14d,ecx
  276. xor r15d,edx
  277. ror r13d,6
  278. mov ebx,edx
  279. and edi,r15d
  280. ror r14d,2
  281. add r12d,r13d
  282. xor ebx,edi
  283. add r9d,r12d
  284. add ebx,r12d
  285. lea rbp,[4+rbp]
  286. add ebx,r14d
  287. mov r12d,DWORD[28+rsi]
  288. mov r13d,r9d
  289. mov r14d,ebx
  290. bswap r12d
  291. ror r13d,14
  292. mov edi,r10d
  293. xor r13d,r9d
  294. ror r14d,9
  295. xor edi,r11d
  296. mov DWORD[28+rsp],r12d
  297. xor r14d,ebx
  298. and edi,r9d
  299. ror r13d,5
  300. add r12d,eax
  301. xor edi,r11d
  302. ror r14d,11
  303. xor r13d,r9d
  304. add r12d,edi
  305. mov edi,ebx
  306. add r12d,DWORD[rbp]
  307. xor r14d,ebx
  308. xor edi,ecx
  309. ror r13d,6
  310. mov eax,ecx
  311. and r15d,edi
  312. ror r14d,2
  313. add r12d,r13d
  314. xor eax,r15d
  315. add r8d,r12d
  316. add eax,r12d
  317. lea rbp,[20+rbp]
  318. add eax,r14d
  319. mov r12d,DWORD[32+rsi]
  320. mov r13d,r8d
  321. mov r14d,eax
  322. bswap r12d
  323. ror r13d,14
  324. mov r15d,r9d
  325. xor r13d,r8d
  326. ror r14d,9
  327. xor r15d,r10d
  328. mov DWORD[32+rsp],r12d
  329. xor r14d,eax
  330. and r15d,r8d
  331. ror r13d,5
  332. add r12d,r11d
  333. xor r15d,r10d
  334. ror r14d,11
  335. xor r13d,r8d
  336. add r12d,r15d
  337. mov r15d,eax
  338. add r12d,DWORD[rbp]
  339. xor r14d,eax
  340. xor r15d,ebx
  341. ror r13d,6
  342. mov r11d,ebx
  343. and edi,r15d
  344. ror r14d,2
  345. add r12d,r13d
  346. xor r11d,edi
  347. add edx,r12d
  348. add r11d,r12d
  349. lea rbp,[4+rbp]
  350. add r11d,r14d
  351. mov r12d,DWORD[36+rsi]
  352. mov r13d,edx
  353. mov r14d,r11d
  354. bswap r12d
  355. ror r13d,14
  356. mov edi,r8d
  357. xor r13d,edx
  358. ror r14d,9
  359. xor edi,r9d
  360. mov DWORD[36+rsp],r12d
  361. xor r14d,r11d
  362. and edi,edx
  363. ror r13d,5
  364. add r12d,r10d
  365. xor edi,r9d
  366. ror r14d,11
  367. xor r13d,edx
  368. add r12d,edi
  369. mov edi,r11d
  370. add r12d,DWORD[rbp]
  371. xor r14d,r11d
  372. xor edi,eax
  373. ror r13d,6
  374. mov r10d,eax
  375. and r15d,edi
  376. ror r14d,2
  377. add r12d,r13d
  378. xor r10d,r15d
  379. add ecx,r12d
  380. add r10d,r12d
  381. lea rbp,[4+rbp]
  382. add r10d,r14d
  383. mov r12d,DWORD[40+rsi]
  384. mov r13d,ecx
  385. mov r14d,r10d
  386. bswap r12d
  387. ror r13d,14
  388. mov r15d,edx
  389. xor r13d,ecx
  390. ror r14d,9
  391. xor r15d,r8d
  392. mov DWORD[40+rsp],r12d
  393. xor r14d,r10d
  394. and r15d,ecx
  395. ror r13d,5
  396. add r12d,r9d
  397. xor r15d,r8d
  398. ror r14d,11
  399. xor r13d,ecx
  400. add r12d,r15d
  401. mov r15d,r10d
  402. add r12d,DWORD[rbp]
  403. xor r14d,r10d
  404. xor r15d,r11d
  405. ror r13d,6
  406. mov r9d,r11d
  407. and edi,r15d
  408. ror r14d,2
  409. add r12d,r13d
  410. xor r9d,edi
  411. add ebx,r12d
  412. add r9d,r12d
  413. lea rbp,[4+rbp]
  414. add r9d,r14d
  415. mov r12d,DWORD[44+rsi]
  416. mov r13d,ebx
  417. mov r14d,r9d
  418. bswap r12d
  419. ror r13d,14
  420. mov edi,ecx
  421. xor r13d,ebx
  422. ror r14d,9
  423. xor edi,edx
  424. mov DWORD[44+rsp],r12d
  425. xor r14d,r9d
  426. and edi,ebx
  427. ror r13d,5
  428. add r12d,r8d
  429. xor edi,edx
  430. ror r14d,11
  431. xor r13d,ebx
  432. add r12d,edi
  433. mov edi,r9d
  434. add r12d,DWORD[rbp]
  435. xor r14d,r9d
  436. xor edi,r10d
  437. ror r13d,6
  438. mov r8d,r10d
  439. and r15d,edi
  440. ror r14d,2
  441. add r12d,r13d
  442. xor r8d,r15d
  443. add eax,r12d
  444. add r8d,r12d
  445. lea rbp,[20+rbp]
  446. add r8d,r14d
  447. mov r12d,DWORD[48+rsi]
  448. mov r13d,eax
  449. mov r14d,r8d
  450. bswap r12d
  451. ror r13d,14
  452. mov r15d,ebx
  453. xor r13d,eax
  454. ror r14d,9
  455. xor r15d,ecx
  456. mov DWORD[48+rsp],r12d
  457. xor r14d,r8d
  458. and r15d,eax
  459. ror r13d,5
  460. add r12d,edx
  461. xor r15d,ecx
  462. ror r14d,11
  463. xor r13d,eax
  464. add r12d,r15d
  465. mov r15d,r8d
  466. add r12d,DWORD[rbp]
  467. xor r14d,r8d
  468. xor r15d,r9d
  469. ror r13d,6
  470. mov edx,r9d
  471. and edi,r15d
  472. ror r14d,2
  473. add r12d,r13d
  474. xor edx,edi
  475. add r11d,r12d
  476. add edx,r12d
  477. lea rbp,[4+rbp]
  478. add edx,r14d
  479. mov r12d,DWORD[52+rsi]
  480. mov r13d,r11d
  481. mov r14d,edx
  482. bswap r12d
  483. ror r13d,14
  484. mov edi,eax
  485. xor r13d,r11d
  486. ror r14d,9
  487. xor edi,ebx
  488. mov DWORD[52+rsp],r12d
  489. xor r14d,edx
  490. and edi,r11d
  491. ror r13d,5
  492. add r12d,ecx
  493. xor edi,ebx
  494. ror r14d,11
  495. xor r13d,r11d
  496. add r12d,edi
  497. mov edi,edx
  498. add r12d,DWORD[rbp]
  499. xor r14d,edx
  500. xor edi,r8d
  501. ror r13d,6
  502. mov ecx,r8d
  503. and r15d,edi
  504. ror r14d,2
  505. add r12d,r13d
  506. xor ecx,r15d
  507. add r10d,r12d
  508. add ecx,r12d
  509. lea rbp,[4+rbp]
  510. add ecx,r14d
  511. mov r12d,DWORD[56+rsi]
  512. mov r13d,r10d
  513. mov r14d,ecx
  514. bswap r12d
  515. ror r13d,14
  516. mov r15d,r11d
  517. xor r13d,r10d
  518. ror r14d,9
  519. xor r15d,eax
  520. mov DWORD[56+rsp],r12d
  521. xor r14d,ecx
  522. and r15d,r10d
  523. ror r13d,5
  524. add r12d,ebx
  525. xor r15d,eax
  526. ror r14d,11
  527. xor r13d,r10d
  528. add r12d,r15d
  529. mov r15d,ecx
  530. add r12d,DWORD[rbp]
  531. xor r14d,ecx
  532. xor r15d,edx
  533. ror r13d,6
  534. mov ebx,edx
  535. and edi,r15d
  536. ror r14d,2
  537. add r12d,r13d
  538. xor ebx,edi
  539. add r9d,r12d
  540. add ebx,r12d
  541. lea rbp,[4+rbp]
  542. add ebx,r14d
  543. mov r12d,DWORD[60+rsi]
  544. mov r13d,r9d
  545. mov r14d,ebx
  546. bswap r12d
  547. ror r13d,14
  548. mov edi,r10d
  549. xor r13d,r9d
  550. ror r14d,9
  551. xor edi,r11d
  552. mov DWORD[60+rsp],r12d
  553. xor r14d,ebx
  554. and edi,r9d
  555. ror r13d,5
  556. add r12d,eax
  557. xor edi,r11d
  558. ror r14d,11
  559. xor r13d,r9d
  560. add r12d,edi
  561. mov edi,ebx
  562. add r12d,DWORD[rbp]
  563. xor r14d,ebx
  564. xor edi,ecx
  565. ror r13d,6
  566. mov eax,ecx
  567. and r15d,edi
  568. ror r14d,2
  569. add r12d,r13d
  570. xor eax,r15d
  571. add r8d,r12d
  572. add eax,r12d
  573. lea rbp,[20+rbp]
  574. jmp NEAR $L$rounds_16_xx
  575. ALIGN 16
  576. $L$rounds_16_xx:
  577. mov r13d,DWORD[4+rsp]
  578. mov r15d,DWORD[56+rsp]
  579. mov r12d,r13d
  580. ror r13d,11
  581. add eax,r14d
  582. mov r14d,r15d
  583. ror r15d,2
  584. xor r13d,r12d
  585. shr r12d,3
  586. ror r13d,7
  587. xor r15d,r14d
  588. shr r14d,10
  589. ror r15d,17
  590. xor r12d,r13d
  591. xor r15d,r14d
  592. add r12d,DWORD[36+rsp]
  593. add r12d,DWORD[rsp]
  594. mov r13d,r8d
  595. add r12d,r15d
  596. mov r14d,eax
  597. ror r13d,14
  598. mov r15d,r9d
  599. xor r13d,r8d
  600. ror r14d,9
  601. xor r15d,r10d
  602. mov DWORD[rsp],r12d
  603. xor r14d,eax
  604. and r15d,r8d
  605. ror r13d,5
  606. add r12d,r11d
  607. xor r15d,r10d
  608. ror r14d,11
  609. xor r13d,r8d
  610. add r12d,r15d
  611. mov r15d,eax
  612. add r12d,DWORD[rbp]
  613. xor r14d,eax
  614. xor r15d,ebx
  615. ror r13d,6
  616. mov r11d,ebx
  617. and edi,r15d
  618. ror r14d,2
  619. add r12d,r13d
  620. xor r11d,edi
  621. add edx,r12d
  622. add r11d,r12d
  623. lea rbp,[4+rbp]
  624. mov r13d,DWORD[8+rsp]
  625. mov edi,DWORD[60+rsp]
  626. mov r12d,r13d
  627. ror r13d,11
  628. add r11d,r14d
  629. mov r14d,edi
  630. ror edi,2
  631. xor r13d,r12d
  632. shr r12d,3
  633. ror r13d,7
  634. xor edi,r14d
  635. shr r14d,10
  636. ror edi,17
  637. xor r12d,r13d
  638. xor edi,r14d
  639. add r12d,DWORD[40+rsp]
  640. add r12d,DWORD[4+rsp]
  641. mov r13d,edx
  642. add r12d,edi
  643. mov r14d,r11d
  644. ror r13d,14
  645. mov edi,r8d
  646. xor r13d,edx
  647. ror r14d,9
  648. xor edi,r9d
  649. mov DWORD[4+rsp],r12d
  650. xor r14d,r11d
  651. and edi,edx
  652. ror r13d,5
  653. add r12d,r10d
  654. xor edi,r9d
  655. ror r14d,11
  656. xor r13d,edx
  657. add r12d,edi
  658. mov edi,r11d
  659. add r12d,DWORD[rbp]
  660. xor r14d,r11d
  661. xor edi,eax
  662. ror r13d,6
  663. mov r10d,eax
  664. and r15d,edi
  665. ror r14d,2
  666. add r12d,r13d
  667. xor r10d,r15d
  668. add ecx,r12d
  669. add r10d,r12d
  670. lea rbp,[4+rbp]
  671. mov r13d,DWORD[12+rsp]
  672. mov r15d,DWORD[rsp]
  673. mov r12d,r13d
  674. ror r13d,11
  675. add r10d,r14d
  676. mov r14d,r15d
  677. ror r15d,2
  678. xor r13d,r12d
  679. shr r12d,3
  680. ror r13d,7
  681. xor r15d,r14d
  682. shr r14d,10
  683. ror r15d,17
  684. xor r12d,r13d
  685. xor r15d,r14d
  686. add r12d,DWORD[44+rsp]
  687. add r12d,DWORD[8+rsp]
  688. mov r13d,ecx
  689. add r12d,r15d
  690. mov r14d,r10d
  691. ror r13d,14
  692. mov r15d,edx
  693. xor r13d,ecx
  694. ror r14d,9
  695. xor r15d,r8d
  696. mov DWORD[8+rsp],r12d
  697. xor r14d,r10d
  698. and r15d,ecx
  699. ror r13d,5
  700. add r12d,r9d
  701. xor r15d,r8d
  702. ror r14d,11
  703. xor r13d,ecx
  704. add r12d,r15d
  705. mov r15d,r10d
  706. add r12d,DWORD[rbp]
  707. xor r14d,r10d
  708. xor r15d,r11d
  709. ror r13d,6
  710. mov r9d,r11d
  711. and edi,r15d
  712. ror r14d,2
  713. add r12d,r13d
  714. xor r9d,edi
  715. add ebx,r12d
  716. add r9d,r12d
  717. lea rbp,[4+rbp]
  718. mov r13d,DWORD[16+rsp]
  719. mov edi,DWORD[4+rsp]
  720. mov r12d,r13d
  721. ror r13d,11
  722. add r9d,r14d
  723. mov r14d,edi
  724. ror edi,2
  725. xor r13d,r12d
  726. shr r12d,3
  727. ror r13d,7
  728. xor edi,r14d
  729. shr r14d,10
  730. ror edi,17
  731. xor r12d,r13d
  732. xor edi,r14d
  733. add r12d,DWORD[48+rsp]
  734. add r12d,DWORD[12+rsp]
  735. mov r13d,ebx
  736. add r12d,edi
  737. mov r14d,r9d
  738. ror r13d,14
  739. mov edi,ecx
  740. xor r13d,ebx
  741. ror r14d,9
  742. xor edi,edx
  743. mov DWORD[12+rsp],r12d
  744. xor r14d,r9d
  745. and edi,ebx
  746. ror r13d,5
  747. add r12d,r8d
  748. xor edi,edx
  749. ror r14d,11
  750. xor r13d,ebx
  751. add r12d,edi
  752. mov edi,r9d
  753. add r12d,DWORD[rbp]
  754. xor r14d,r9d
  755. xor edi,r10d
  756. ror r13d,6
  757. mov r8d,r10d
  758. and r15d,edi
  759. ror r14d,2
  760. add r12d,r13d
  761. xor r8d,r15d
  762. add eax,r12d
  763. add r8d,r12d
  764. lea rbp,[20+rbp]
  765. mov r13d,DWORD[20+rsp]
  766. mov r15d,DWORD[8+rsp]
  767. mov r12d,r13d
  768. ror r13d,11
  769. add r8d,r14d
  770. mov r14d,r15d
  771. ror r15d,2
  772. xor r13d,r12d
  773. shr r12d,3
  774. ror r13d,7
  775. xor r15d,r14d
  776. shr r14d,10
  777. ror r15d,17
  778. xor r12d,r13d
  779. xor r15d,r14d
  780. add r12d,DWORD[52+rsp]
  781. add r12d,DWORD[16+rsp]
  782. mov r13d,eax
  783. add r12d,r15d
  784. mov r14d,r8d
  785. ror r13d,14
  786. mov r15d,ebx
  787. xor r13d,eax
  788. ror r14d,9
  789. xor r15d,ecx
  790. mov DWORD[16+rsp],r12d
  791. xor r14d,r8d
  792. and r15d,eax
  793. ror r13d,5
  794. add r12d,edx
  795. xor r15d,ecx
  796. ror r14d,11
  797. xor r13d,eax
  798. add r12d,r15d
  799. mov r15d,r8d
  800. add r12d,DWORD[rbp]
  801. xor r14d,r8d
  802. xor r15d,r9d
  803. ror r13d,6
  804. mov edx,r9d
  805. and edi,r15d
  806. ror r14d,2
  807. add r12d,r13d
  808. xor edx,edi
  809. add r11d,r12d
  810. add edx,r12d
  811. lea rbp,[4+rbp]
  812. mov r13d,DWORD[24+rsp]
  813. mov edi,DWORD[12+rsp]
  814. mov r12d,r13d
  815. ror r13d,11
  816. add edx,r14d
  817. mov r14d,edi
  818. ror edi,2
  819. xor r13d,r12d
  820. shr r12d,3
  821. ror r13d,7
  822. xor edi,r14d
  823. shr r14d,10
  824. ror edi,17
  825. xor r12d,r13d
  826. xor edi,r14d
  827. add r12d,DWORD[56+rsp]
  828. add r12d,DWORD[20+rsp]
  829. mov r13d,r11d
  830. add r12d,edi
  831. mov r14d,edx
  832. ror r13d,14
  833. mov edi,eax
  834. xor r13d,r11d
  835. ror r14d,9
  836. xor edi,ebx
  837. mov DWORD[20+rsp],r12d
  838. xor r14d,edx
  839. and edi,r11d
  840. ror r13d,5
  841. add r12d,ecx
  842. xor edi,ebx
  843. ror r14d,11
  844. xor r13d,r11d
  845. add r12d,edi
  846. mov edi,edx
  847. add r12d,DWORD[rbp]
  848. xor r14d,edx
  849. xor edi,r8d
  850. ror r13d,6
  851. mov ecx,r8d
  852. and r15d,edi
  853. ror r14d,2
  854. add r12d,r13d
  855. xor ecx,r15d
  856. add r10d,r12d
  857. add ecx,r12d
  858. lea rbp,[4+rbp]
  859. mov r13d,DWORD[28+rsp]
  860. mov r15d,DWORD[16+rsp]
  861. mov r12d,r13d
  862. ror r13d,11
  863. add ecx,r14d
  864. mov r14d,r15d
  865. ror r15d,2
  866. xor r13d,r12d
  867. shr r12d,3
  868. ror r13d,7
  869. xor r15d,r14d
  870. shr r14d,10
  871. ror r15d,17
  872. xor r12d,r13d
  873. xor r15d,r14d
  874. add r12d,DWORD[60+rsp]
  875. add r12d,DWORD[24+rsp]
  876. mov r13d,r10d
  877. add r12d,r15d
  878. mov r14d,ecx
  879. ror r13d,14
  880. mov r15d,r11d
  881. xor r13d,r10d
  882. ror r14d,9
  883. xor r15d,eax
  884. mov DWORD[24+rsp],r12d
  885. xor r14d,ecx
  886. and r15d,r10d
  887. ror r13d,5
  888. add r12d,ebx
  889. xor r15d,eax
  890. ror r14d,11
  891. xor r13d,r10d
  892. add r12d,r15d
  893. mov r15d,ecx
  894. add r12d,DWORD[rbp]
  895. xor r14d,ecx
  896. xor r15d,edx
  897. ror r13d,6
  898. mov ebx,edx
  899. and edi,r15d
  900. ror r14d,2
  901. add r12d,r13d
  902. xor ebx,edi
  903. add r9d,r12d
  904. add ebx,r12d
  905. lea rbp,[4+rbp]
  906. mov r13d,DWORD[32+rsp]
  907. mov edi,DWORD[20+rsp]
  908. mov r12d,r13d
  909. ror r13d,11
  910. add ebx,r14d
  911. mov r14d,edi
  912. ror edi,2
  913. xor r13d,r12d
  914. shr r12d,3
  915. ror r13d,7
  916. xor edi,r14d
  917. shr r14d,10
  918. ror edi,17
  919. xor r12d,r13d
  920. xor edi,r14d
  921. add r12d,DWORD[rsp]
  922. add r12d,DWORD[28+rsp]
  923. mov r13d,r9d
  924. add r12d,edi
  925. mov r14d,ebx
  926. ror r13d,14
  927. mov edi,r10d
  928. xor r13d,r9d
  929. ror r14d,9
  930. xor edi,r11d
  931. mov DWORD[28+rsp],r12d
  932. xor r14d,ebx
  933. and edi,r9d
  934. ror r13d,5
  935. add r12d,eax
  936. xor edi,r11d
  937. ror r14d,11
  938. xor r13d,r9d
  939. add r12d,edi
  940. mov edi,ebx
  941. add r12d,DWORD[rbp]
  942. xor r14d,ebx
  943. xor edi,ecx
  944. ror r13d,6
  945. mov eax,ecx
  946. and r15d,edi
  947. ror r14d,2
  948. add r12d,r13d
  949. xor eax,r15d
  950. add r8d,r12d
  951. add eax,r12d
  952. lea rbp,[20+rbp]
  953. mov r13d,DWORD[36+rsp]
  954. mov r15d,DWORD[24+rsp]
  955. mov r12d,r13d
  956. ror r13d,11
  957. add eax,r14d
  958. mov r14d,r15d
  959. ror r15d,2
  960. xor r13d,r12d
  961. shr r12d,3
  962. ror r13d,7
  963. xor r15d,r14d
  964. shr r14d,10
  965. ror r15d,17
  966. xor r12d,r13d
  967. xor r15d,r14d
  968. add r12d,DWORD[4+rsp]
  969. add r12d,DWORD[32+rsp]
  970. mov r13d,r8d
  971. add r12d,r15d
  972. mov r14d,eax
  973. ror r13d,14
  974. mov r15d,r9d
  975. xor r13d,r8d
  976. ror r14d,9
  977. xor r15d,r10d
  978. mov DWORD[32+rsp],r12d
  979. xor r14d,eax
  980. and r15d,r8d
  981. ror r13d,5
  982. add r12d,r11d
  983. xor r15d,r10d
  984. ror r14d,11
  985. xor r13d,r8d
  986. add r12d,r15d
  987. mov r15d,eax
  988. add r12d,DWORD[rbp]
  989. xor r14d,eax
  990. xor r15d,ebx
  991. ror r13d,6
  992. mov r11d,ebx
  993. and edi,r15d
  994. ror r14d,2
  995. add r12d,r13d
  996. xor r11d,edi
  997. add edx,r12d
  998. add r11d,r12d
  999. lea rbp,[4+rbp]
  1000. mov r13d,DWORD[40+rsp]
  1001. mov edi,DWORD[28+rsp]
  1002. mov r12d,r13d
  1003. ror r13d,11
  1004. add r11d,r14d
  1005. mov r14d,edi
  1006. ror edi,2
  1007. xor r13d,r12d
  1008. shr r12d,3
  1009. ror r13d,7
  1010. xor edi,r14d
  1011. shr r14d,10
  1012. ror edi,17
  1013. xor r12d,r13d
  1014. xor edi,r14d
  1015. add r12d,DWORD[8+rsp]
  1016. add r12d,DWORD[36+rsp]
  1017. mov r13d,edx
  1018. add r12d,edi
  1019. mov r14d,r11d
  1020. ror r13d,14
  1021. mov edi,r8d
  1022. xor r13d,edx
  1023. ror r14d,9
  1024. xor edi,r9d
  1025. mov DWORD[36+rsp],r12d
  1026. xor r14d,r11d
  1027. and edi,edx
  1028. ror r13d,5
  1029. add r12d,r10d
  1030. xor edi,r9d
  1031. ror r14d,11
  1032. xor r13d,edx
  1033. add r12d,edi
  1034. mov edi,r11d
  1035. add r12d,DWORD[rbp]
  1036. xor r14d,r11d
  1037. xor edi,eax
  1038. ror r13d,6
  1039. mov r10d,eax
  1040. and r15d,edi
  1041. ror r14d,2
  1042. add r12d,r13d
  1043. xor r10d,r15d
  1044. add ecx,r12d
  1045. add r10d,r12d
  1046. lea rbp,[4+rbp]
  1047. mov r13d,DWORD[44+rsp]
  1048. mov r15d,DWORD[32+rsp]
  1049. mov r12d,r13d
  1050. ror r13d,11
  1051. add r10d,r14d
  1052. mov r14d,r15d
  1053. ror r15d,2
  1054. xor r13d,r12d
  1055. shr r12d,3
  1056. ror r13d,7
  1057. xor r15d,r14d
  1058. shr r14d,10
  1059. ror r15d,17
  1060. xor r12d,r13d
  1061. xor r15d,r14d
  1062. add r12d,DWORD[12+rsp]
  1063. add r12d,DWORD[40+rsp]
  1064. mov r13d,ecx
  1065. add r12d,r15d
  1066. mov r14d,r10d
  1067. ror r13d,14
  1068. mov r15d,edx
  1069. xor r13d,ecx
  1070. ror r14d,9
  1071. xor r15d,r8d
  1072. mov DWORD[40+rsp],r12d
  1073. xor r14d,r10d
  1074. and r15d,ecx
  1075. ror r13d,5
  1076. add r12d,r9d
  1077. xor r15d,r8d
  1078. ror r14d,11
  1079. xor r13d,ecx
  1080. add r12d,r15d
  1081. mov r15d,r10d
  1082. add r12d,DWORD[rbp]
  1083. xor r14d,r10d
  1084. xor r15d,r11d
  1085. ror r13d,6
  1086. mov r9d,r11d
  1087. and edi,r15d
  1088. ror r14d,2
  1089. add r12d,r13d
  1090. xor r9d,edi
  1091. add ebx,r12d
  1092. add r9d,r12d
  1093. lea rbp,[4+rbp]
  1094. mov r13d,DWORD[48+rsp]
  1095. mov edi,DWORD[36+rsp]
  1096. mov r12d,r13d
  1097. ror r13d,11
  1098. add r9d,r14d
  1099. mov r14d,edi
  1100. ror edi,2
  1101. xor r13d,r12d
  1102. shr r12d,3
  1103. ror r13d,7
  1104. xor edi,r14d
  1105. shr r14d,10
  1106. ror edi,17
  1107. xor r12d,r13d
  1108. xor edi,r14d
  1109. add r12d,DWORD[16+rsp]
  1110. add r12d,DWORD[44+rsp]
  1111. mov r13d,ebx
  1112. add r12d,edi
  1113. mov r14d,r9d
  1114. ror r13d,14
  1115. mov edi,ecx
  1116. xor r13d,ebx
  1117. ror r14d,9
  1118. xor edi,edx
  1119. mov DWORD[44+rsp],r12d
  1120. xor r14d,r9d
  1121. and edi,ebx
  1122. ror r13d,5
  1123. add r12d,r8d
  1124. xor edi,edx
  1125. ror r14d,11
  1126. xor r13d,ebx
  1127. add r12d,edi
  1128. mov edi,r9d
  1129. add r12d,DWORD[rbp]
  1130. xor r14d,r9d
  1131. xor edi,r10d
  1132. ror r13d,6
  1133. mov r8d,r10d
  1134. and r15d,edi
  1135. ror r14d,2
  1136. add r12d,r13d
  1137. xor r8d,r15d
  1138. add eax,r12d
  1139. add r8d,r12d
  1140. lea rbp,[20+rbp]
  1141. mov r13d,DWORD[52+rsp]
  1142. mov r15d,DWORD[40+rsp]
  1143. mov r12d,r13d
  1144. ror r13d,11
  1145. add r8d,r14d
  1146. mov r14d,r15d
  1147. ror r15d,2
  1148. xor r13d,r12d
  1149. shr r12d,3
  1150. ror r13d,7
  1151. xor r15d,r14d
  1152. shr r14d,10
  1153. ror r15d,17
  1154. xor r12d,r13d
  1155. xor r15d,r14d
  1156. add r12d,DWORD[20+rsp]
  1157. add r12d,DWORD[48+rsp]
  1158. mov r13d,eax
  1159. add r12d,r15d
  1160. mov r14d,r8d
  1161. ror r13d,14
  1162. mov r15d,ebx
  1163. xor r13d,eax
  1164. ror r14d,9
  1165. xor r15d,ecx
  1166. mov DWORD[48+rsp],r12d
  1167. xor r14d,r8d
  1168. and r15d,eax
  1169. ror r13d,5
  1170. add r12d,edx
  1171. xor r15d,ecx
  1172. ror r14d,11
  1173. xor r13d,eax
  1174. add r12d,r15d
  1175. mov r15d,r8d
  1176. add r12d,DWORD[rbp]
  1177. xor r14d,r8d
  1178. xor r15d,r9d
  1179. ror r13d,6
  1180. mov edx,r9d
  1181. and edi,r15d
  1182. ror r14d,2
  1183. add r12d,r13d
  1184. xor edx,edi
  1185. add r11d,r12d
  1186. add edx,r12d
  1187. lea rbp,[4+rbp]
  1188. mov r13d,DWORD[56+rsp]
  1189. mov edi,DWORD[44+rsp]
  1190. mov r12d,r13d
  1191. ror r13d,11
  1192. add edx,r14d
  1193. mov r14d,edi
  1194. ror edi,2
  1195. xor r13d,r12d
  1196. shr r12d,3
  1197. ror r13d,7
  1198. xor edi,r14d
  1199. shr r14d,10
  1200. ror edi,17
  1201. xor r12d,r13d
  1202. xor edi,r14d
  1203. add r12d,DWORD[24+rsp]
  1204. add r12d,DWORD[52+rsp]
  1205. mov r13d,r11d
  1206. add r12d,edi
  1207. mov r14d,edx
  1208. ror r13d,14
  1209. mov edi,eax
  1210. xor r13d,r11d
  1211. ror r14d,9
  1212. xor edi,ebx
  1213. mov DWORD[52+rsp],r12d
  1214. xor r14d,edx
  1215. and edi,r11d
  1216. ror r13d,5
  1217. add r12d,ecx
  1218. xor edi,ebx
  1219. ror r14d,11
  1220. xor r13d,r11d
  1221. add r12d,edi
  1222. mov edi,edx
  1223. add r12d,DWORD[rbp]
  1224. xor r14d,edx
  1225. xor edi,r8d
  1226. ror r13d,6
  1227. mov ecx,r8d
  1228. and r15d,edi
  1229. ror r14d,2
  1230. add r12d,r13d
  1231. xor ecx,r15d
  1232. add r10d,r12d
  1233. add ecx,r12d
  1234. lea rbp,[4+rbp]
  1235. mov r13d,DWORD[60+rsp]
  1236. mov r15d,DWORD[48+rsp]
  1237. mov r12d,r13d
  1238. ror r13d,11
  1239. add ecx,r14d
  1240. mov r14d,r15d
  1241. ror r15d,2
  1242. xor r13d,r12d
  1243. shr r12d,3
  1244. ror r13d,7
  1245. xor r15d,r14d
  1246. shr r14d,10
  1247. ror r15d,17
  1248. xor r12d,r13d
  1249. xor r15d,r14d
  1250. add r12d,DWORD[28+rsp]
  1251. add r12d,DWORD[56+rsp]
  1252. mov r13d,r10d
  1253. add r12d,r15d
  1254. mov r14d,ecx
  1255. ror r13d,14
  1256. mov r15d,r11d
  1257. xor r13d,r10d
  1258. ror r14d,9
  1259. xor r15d,eax
  1260. mov DWORD[56+rsp],r12d
  1261. xor r14d,ecx
  1262. and r15d,r10d
  1263. ror r13d,5
  1264. add r12d,ebx
  1265. xor r15d,eax
  1266. ror r14d,11
  1267. xor r13d,r10d
  1268. add r12d,r15d
  1269. mov r15d,ecx
  1270. add r12d,DWORD[rbp]
  1271. xor r14d,ecx
  1272. xor r15d,edx
  1273. ror r13d,6
  1274. mov ebx,edx
  1275. and edi,r15d
  1276. ror r14d,2
  1277. add r12d,r13d
  1278. xor ebx,edi
  1279. add r9d,r12d
  1280. add ebx,r12d
  1281. lea rbp,[4+rbp]
  1282. mov r13d,DWORD[rsp]
  1283. mov edi,DWORD[52+rsp]
  1284. mov r12d,r13d
  1285. ror r13d,11
  1286. add ebx,r14d
  1287. mov r14d,edi
  1288. ror edi,2
  1289. xor r13d,r12d
  1290. shr r12d,3
  1291. ror r13d,7
  1292. xor edi,r14d
  1293. shr r14d,10
  1294. ror edi,17
  1295. xor r12d,r13d
  1296. xor edi,r14d
  1297. add r12d,DWORD[32+rsp]
  1298. add r12d,DWORD[60+rsp]
  1299. mov r13d,r9d
  1300. add r12d,edi
  1301. mov r14d,ebx
  1302. ror r13d,14
  1303. mov edi,r10d
  1304. xor r13d,r9d
  1305. ror r14d,9
  1306. xor edi,r11d
  1307. mov DWORD[60+rsp],r12d
  1308. xor r14d,ebx
  1309. and edi,r9d
  1310. ror r13d,5
  1311. add r12d,eax
  1312. xor edi,r11d
  1313. ror r14d,11
  1314. xor r13d,r9d
  1315. add r12d,edi
  1316. mov edi,ebx
  1317. add r12d,DWORD[rbp]
  1318. xor r14d,ebx
  1319. xor edi,ecx
  1320. ror r13d,6
  1321. mov eax,ecx
  1322. and r15d,edi
  1323. ror r14d,2
  1324. add r12d,r13d
  1325. xor eax,r15d
  1326. add r8d,r12d
  1327. add eax,r12d
  1328. lea rbp,[20+rbp]
  1329. cmp BYTE[3+rbp],0
  1330. jnz NEAR $L$rounds_16_xx
  1331. mov rdi,QWORD[((64+0))+rsp]
  1332. add eax,r14d
  1333. lea rsi,[64+rsi]
  1334. add eax,DWORD[rdi]
  1335. add ebx,DWORD[4+rdi]
  1336. add ecx,DWORD[8+rdi]
  1337. add edx,DWORD[12+rdi]
  1338. add r8d,DWORD[16+rdi]
  1339. add r9d,DWORD[20+rdi]
  1340. add r10d,DWORD[24+rdi]
  1341. add r11d,DWORD[28+rdi]
  1342. cmp rsi,QWORD[((64+16))+rsp]
  1343. mov DWORD[rdi],eax
  1344. mov DWORD[4+rdi],ebx
  1345. mov DWORD[8+rdi],ecx
  1346. mov DWORD[12+rdi],edx
  1347. mov DWORD[16+rdi],r8d
  1348. mov DWORD[20+rdi],r9d
  1349. mov DWORD[24+rdi],r10d
  1350. mov DWORD[28+rdi],r11d
  1351. jb NEAR $L$loop
  1352. mov rsi,QWORD[88+rsp]
  1353. mov r15,QWORD[((-48))+rsi]
  1354. mov r14,QWORD[((-40))+rsi]
  1355. mov r13,QWORD[((-32))+rsi]
  1356. mov r12,QWORD[((-24))+rsi]
  1357. mov rbp,QWORD[((-16))+rsi]
  1358. mov rbx,QWORD[((-8))+rsi]
  1359. lea rsp,[rsi]
  1360. $L$epilogue:
  1361. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  1362. mov rsi,QWORD[16+rsp]
  1363. DB 0F3h,0C3h ;repret
  1364. $L$SEH_end_sha256_block_data_order:
  1365. ALIGN 64
  1366. K256:
  1367. DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
  1368. DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
  1369. DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
  1370. DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
  1371. DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
  1372. DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
  1373. DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
  1374. DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
  1375. DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
  1376. DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
  1377. DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
  1378. DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
  1379. DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
  1380. DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
  1381. DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
  1382. DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
  1383. DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
  1384. DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
  1385. DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
  1386. DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
  1387. DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
  1388. DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
  1389. DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
  1390. DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
  1391. DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
  1392. DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
  1393. DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
  1394. DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
  1395. DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
  1396. DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
  1397. DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
  1398. DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
  1399. DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
  1400. DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
  1401. DD 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
  1402. DD 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
  1403. DD 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
  1404. DD 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
  1405. DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
  1406. DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54
  1407. DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
  1408. DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
  1409. DB 111,114,103,62,0
  1410. ALIGN 64
  1411. sha256_block_data_order_ssse3:
  1412. mov QWORD[8+rsp],rdi ;WIN64 prologue
  1413. mov QWORD[16+rsp],rsi
  1414. mov rax,rsp
  1415. $L$SEH_begin_sha256_block_data_order_ssse3:
  1416. mov rdi,rcx
  1417. mov rsi,rdx
  1418. mov rdx,r8
  1419. $L$ssse3_shortcut:
  1420. mov rax,rsp
  1421. push rbx
  1422. push rbp
  1423. push r12
  1424. push r13
  1425. push r14
  1426. push r15
  1427. shl rdx,4
  1428. sub rsp,160
  1429. lea rdx,[rdx*4+rsi]
  1430. and rsp,-64
  1431. mov QWORD[((64+0))+rsp],rdi
  1432. mov QWORD[((64+8))+rsp],rsi
  1433. mov QWORD[((64+16))+rsp],rdx
  1434. mov QWORD[88+rsp],rax
  1435. movaps XMMWORD[(64+32)+rsp],xmm6
  1436. movaps XMMWORD[(64+48)+rsp],xmm7
  1437. movaps XMMWORD[(64+64)+rsp],xmm8
  1438. movaps XMMWORD[(64+80)+rsp],xmm9
  1439. $L$prologue_ssse3:
  1440. mov eax,DWORD[rdi]
  1441. mov ebx,DWORD[4+rdi]
  1442. mov ecx,DWORD[8+rdi]
  1443. mov edx,DWORD[12+rdi]
  1444. mov r8d,DWORD[16+rdi]
  1445. mov r9d,DWORD[20+rdi]
  1446. mov r10d,DWORD[24+rdi]
  1447. mov r11d,DWORD[28+rdi]
  1448. jmp NEAR $L$loop_ssse3
  1449. ALIGN 16
  1450. $L$loop_ssse3:
  1451. movdqa xmm7,XMMWORD[((K256+512))]
  1452. movdqu xmm0,XMMWORD[rsi]
  1453. movdqu xmm1,XMMWORD[16+rsi]
  1454. movdqu xmm2,XMMWORD[32+rsi]
  1455. DB 102,15,56,0,199
  1456. movdqu xmm3,XMMWORD[48+rsi]
  1457. lea rbp,[K256]
  1458. DB 102,15,56,0,207
  1459. movdqa xmm4,XMMWORD[rbp]
  1460. movdqa xmm5,XMMWORD[32+rbp]
  1461. DB 102,15,56,0,215
  1462. paddd xmm4,xmm0
  1463. movdqa xmm6,XMMWORD[64+rbp]
  1464. DB 102,15,56,0,223
  1465. movdqa xmm7,XMMWORD[96+rbp]
  1466. paddd xmm5,xmm1
  1467. paddd xmm6,xmm2
  1468. paddd xmm7,xmm3
  1469. movdqa XMMWORD[rsp],xmm4
  1470. mov r14d,eax
  1471. movdqa XMMWORD[16+rsp],xmm5
  1472. mov edi,ebx
  1473. movdqa XMMWORD[32+rsp],xmm6
  1474. xor edi,ecx
  1475. movdqa XMMWORD[48+rsp],xmm7
  1476. mov r13d,r8d
  1477. jmp NEAR $L$ssse3_00_47
  1478. ALIGN 16
  1479. $L$ssse3_00_47:
  1480. sub rbp,-128
  1481. ror r13d,14
  1482. movdqa xmm4,xmm1
  1483. mov eax,r14d
  1484. mov r12d,r9d
  1485. movdqa xmm7,xmm3
  1486. ror r14d,9
  1487. xor r13d,r8d
  1488. xor r12d,r10d
  1489. ror r13d,5
  1490. xor r14d,eax
  1491. DB 102,15,58,15,224,4
  1492. and r12d,r8d
  1493. xor r13d,r8d
  1494. DB 102,15,58,15,250,4
  1495. add r11d,DWORD[rsp]
  1496. mov r15d,eax
  1497. xor r12d,r10d
  1498. ror r14d,11
  1499. movdqa xmm5,xmm4
  1500. xor r15d,ebx
  1501. add r11d,r12d
  1502. movdqa xmm6,xmm4
  1503. ror r13d,6
  1504. and edi,r15d
  1505. psrld xmm4,3
  1506. xor r14d,eax
  1507. add r11d,r13d
  1508. xor edi,ebx
  1509. paddd xmm0,xmm7
  1510. ror r14d,2
  1511. add edx,r11d
  1512. psrld xmm6,7
  1513. add r11d,edi
  1514. mov r13d,edx
  1515. pshufd xmm7,xmm3,250
  1516. add r14d,r11d
  1517. ror r13d,14
  1518. pslld xmm5,14
  1519. mov r11d,r14d
  1520. mov r12d,r8d
  1521. pxor xmm4,xmm6
  1522. ror r14d,9
  1523. xor r13d,edx
  1524. xor r12d,r9d
  1525. ror r13d,5
  1526. psrld xmm6,11
  1527. xor r14d,r11d
  1528. pxor xmm4,xmm5
  1529. and r12d,edx
  1530. xor r13d,edx
  1531. pslld xmm5,11
  1532. add r10d,DWORD[4+rsp]
  1533. mov edi,r11d
  1534. pxor xmm4,xmm6
  1535. xor r12d,r9d
  1536. ror r14d,11
  1537. movdqa xmm6,xmm7
  1538. xor edi,eax
  1539. add r10d,r12d
  1540. pxor xmm4,xmm5
  1541. ror r13d,6
  1542. and r15d,edi
  1543. xor r14d,r11d
  1544. psrld xmm7,10
  1545. add r10d,r13d
  1546. xor r15d,eax
  1547. paddd xmm0,xmm4
  1548. ror r14d,2
  1549. add ecx,r10d
  1550. psrlq xmm6,17
  1551. add r10d,r15d
  1552. mov r13d,ecx
  1553. add r14d,r10d
  1554. pxor xmm7,xmm6
  1555. ror r13d,14
  1556. mov r10d,r14d
  1557. mov r12d,edx
  1558. ror r14d,9
  1559. psrlq xmm6,2
  1560. xor r13d,ecx
  1561. xor r12d,r8d
  1562. pxor xmm7,xmm6
  1563. ror r13d,5
  1564. xor r14d,r10d
  1565. and r12d,ecx
  1566. pshufd xmm7,xmm7,128
  1567. xor r13d,ecx
  1568. add r9d,DWORD[8+rsp]
  1569. mov r15d,r10d
  1570. psrldq xmm7,8
  1571. xor r12d,r8d
  1572. ror r14d,11
  1573. xor r15d,r11d
  1574. add r9d,r12d
  1575. ror r13d,6
  1576. paddd xmm0,xmm7
  1577. and edi,r15d
  1578. xor r14d,r10d
  1579. add r9d,r13d
  1580. pshufd xmm7,xmm0,80
  1581. xor edi,r11d
  1582. ror r14d,2
  1583. add ebx,r9d
  1584. movdqa xmm6,xmm7
  1585. add r9d,edi
  1586. mov r13d,ebx
  1587. psrld xmm7,10
  1588. add r14d,r9d
  1589. ror r13d,14
  1590. psrlq xmm6,17
  1591. mov r9d,r14d
  1592. mov r12d,ecx
  1593. pxor xmm7,xmm6
  1594. ror r14d,9
  1595. xor r13d,ebx
  1596. xor r12d,edx
  1597. ror r13d,5
  1598. xor r14d,r9d
  1599. psrlq xmm6,2
  1600. and r12d,ebx
  1601. xor r13d,ebx
  1602. add r8d,DWORD[12+rsp]
  1603. pxor xmm7,xmm6
  1604. mov edi,r9d
  1605. xor r12d,edx
  1606. ror r14d,11
  1607. pshufd xmm7,xmm7,8
  1608. xor edi,r10d
  1609. add r8d,r12d
  1610. movdqa xmm6,XMMWORD[rbp]
  1611. ror r13d,6
  1612. and r15d,edi
  1613. pslldq xmm7,8
  1614. xor r14d,r9d
  1615. add r8d,r13d
  1616. xor r15d,r10d
  1617. paddd xmm0,xmm7
  1618. ror r14d,2
  1619. add eax,r8d
  1620. add r8d,r15d
  1621. paddd xmm6,xmm0
  1622. mov r13d,eax
  1623. add r14d,r8d
  1624. movdqa XMMWORD[rsp],xmm6
  1625. ror r13d,14
  1626. movdqa xmm4,xmm2
  1627. mov r8d,r14d
  1628. mov r12d,ebx
  1629. movdqa xmm7,xmm0
  1630. ror r14d,9
  1631. xor r13d,eax
  1632. xor r12d,ecx
  1633. ror r13d,5
  1634. xor r14d,r8d
  1635. DB 102,15,58,15,225,4
  1636. and r12d,eax
  1637. xor r13d,eax
  1638. DB 102,15,58,15,251,4
  1639. add edx,DWORD[16+rsp]
  1640. mov r15d,r8d
  1641. xor r12d,ecx
  1642. ror r14d,11
  1643. movdqa xmm5,xmm4
  1644. xor r15d,r9d
  1645. add edx,r12d
  1646. movdqa xmm6,xmm4
  1647. ror r13d,6
  1648. and edi,r15d
  1649. psrld xmm4,3
  1650. xor r14d,r8d
  1651. add edx,r13d
  1652. xor edi,r9d
  1653. paddd xmm1,xmm7
  1654. ror r14d,2
  1655. add r11d,edx
  1656. psrld xmm6,7
  1657. add edx,edi
  1658. mov r13d,r11d
  1659. pshufd xmm7,xmm0,250
  1660. add r14d,edx
  1661. ror r13d,14
  1662. pslld xmm5,14
  1663. mov edx,r14d
  1664. mov r12d,eax
  1665. pxor xmm4,xmm6
  1666. ror r14d,9
  1667. xor r13d,r11d
  1668. xor r12d,ebx
  1669. ror r13d,5
  1670. psrld xmm6,11
  1671. xor r14d,edx
  1672. pxor xmm4,xmm5
  1673. and r12d,r11d
  1674. xor r13d,r11d
  1675. pslld xmm5,11
  1676. add ecx,DWORD[20+rsp]
  1677. mov edi,edx
  1678. pxor xmm4,xmm6
  1679. xor r12d,ebx
  1680. ror r14d,11
  1681. movdqa xmm6,xmm7
  1682. xor edi,r8d
  1683. add ecx,r12d
  1684. pxor xmm4,xmm5
  1685. ror r13d,6
  1686. and r15d,edi
  1687. xor r14d,edx
  1688. psrld xmm7,10
  1689. add ecx,r13d
  1690. xor r15d,r8d
  1691. paddd xmm1,xmm4
  1692. ror r14d,2
  1693. add r10d,ecx
  1694. psrlq xmm6,17
  1695. add ecx,r15d
  1696. mov r13d,r10d
  1697. add r14d,ecx
  1698. pxor xmm7,xmm6
  1699. ror r13d,14
  1700. mov ecx,r14d
  1701. mov r12d,r11d
  1702. ror r14d,9
  1703. psrlq xmm6,2
  1704. xor r13d,r10d
  1705. xor r12d,eax
  1706. pxor xmm7,xmm6
  1707. ror r13d,5
  1708. xor r14d,ecx
  1709. and r12d,r10d
  1710. pshufd xmm7,xmm7,128
  1711. xor r13d,r10d
  1712. add ebx,DWORD[24+rsp]
  1713. mov r15d,ecx
  1714. psrldq xmm7,8
  1715. xor r12d,eax
  1716. ror r14d,11
  1717. xor r15d,edx
  1718. add ebx,r12d
  1719. ror r13d,6
  1720. paddd xmm1,xmm7
  1721. and edi,r15d
  1722. xor r14d,ecx
  1723. add ebx,r13d
  1724. pshufd xmm7,xmm1,80
  1725. xor edi,edx
  1726. ror r14d,2
  1727. add r9d,ebx
  1728. movdqa xmm6,xmm7
  1729. add ebx,edi
  1730. mov r13d,r9d
  1731. psrld xmm7,10
  1732. add r14d,ebx
  1733. ror r13d,14
  1734. psrlq xmm6,17
  1735. mov ebx,r14d
  1736. mov r12d,r10d
  1737. pxor xmm7,xmm6
  1738. ror r14d,9
  1739. xor r13d,r9d
  1740. xor r12d,r11d
  1741. ror r13d,5
  1742. xor r14d,ebx
  1743. psrlq xmm6,2
  1744. and r12d,r9d
  1745. xor r13d,r9d
  1746. add eax,DWORD[28+rsp]
  1747. pxor xmm7,xmm6
  1748. mov edi,ebx
  1749. xor r12d,r11d
  1750. ror r14d,11
  1751. pshufd xmm7,xmm7,8
  1752. xor edi,ecx
  1753. add eax,r12d
  1754. movdqa xmm6,XMMWORD[32+rbp]
  1755. ror r13d,6
  1756. and r15d,edi
  1757. pslldq xmm7,8
  1758. xor r14d,ebx
  1759. add eax,r13d
  1760. xor r15d,ecx
  1761. paddd xmm1,xmm7
  1762. ror r14d,2
  1763. add r8d,eax
  1764. add eax,r15d
  1765. paddd xmm6,xmm1
  1766. mov r13d,r8d
  1767. add r14d,eax
  1768. movdqa XMMWORD[16+rsp],xmm6
  1769. ror r13d,14
  1770. movdqa xmm4,xmm3
  1771. mov eax,r14d
  1772. mov r12d,r9d
  1773. movdqa xmm7,xmm1
  1774. ror r14d,9
  1775. xor r13d,r8d
  1776. xor r12d,r10d
  1777. ror r13d,5
  1778. xor r14d,eax
  1779. DB 102,15,58,15,226,4
  1780. and r12d,r8d
  1781. xor r13d,r8d
  1782. DB 102,15,58,15,248,4
  1783. add r11d,DWORD[32+rsp]
  1784. mov r15d,eax
  1785. xor r12d,r10d
  1786. ror r14d,11
  1787. movdqa xmm5,xmm4
  1788. xor r15d,ebx
  1789. add r11d,r12d
  1790. movdqa xmm6,xmm4
  1791. ror r13d,6
  1792. and edi,r15d
  1793. psrld xmm4,3
  1794. xor r14d,eax
  1795. add r11d,r13d
  1796. xor edi,ebx
  1797. paddd xmm2,xmm7
  1798. ror r14d,2
  1799. add edx,r11d
  1800. psrld xmm6,7
  1801. add r11d,edi
  1802. mov r13d,edx
  1803. pshufd xmm7,xmm1,250
  1804. add r14d,r11d
  1805. ror r13d,14
  1806. pslld xmm5,14
  1807. mov r11d,r14d
  1808. mov r12d,r8d
  1809. pxor xmm4,xmm6
  1810. ror r14d,9
  1811. xor r13d,edx
  1812. xor r12d,r9d
  1813. ror r13d,5
  1814. psrld xmm6,11
  1815. xor r14d,r11d
  1816. pxor xmm4,xmm5
  1817. and r12d,edx
  1818. xor r13d,edx
  1819. pslld xmm5,11
  1820. add r10d,DWORD[36+rsp]
  1821. mov edi,r11d
  1822. pxor xmm4,xmm6
  1823. xor r12d,r9d
  1824. ror r14d,11
  1825. movdqa xmm6,xmm7
  1826. xor edi,eax
  1827. add r10d,r12d
  1828. pxor xmm4,xmm5
  1829. ror r13d,6
  1830. and r15d,edi
  1831. xor r14d,r11d
  1832. psrld xmm7,10
  1833. add r10d,r13d
  1834. xor r15d,eax
  1835. paddd xmm2,xmm4
  1836. ror r14d,2
  1837. add ecx,r10d
  1838. psrlq xmm6,17
  1839. add r10d,r15d
  1840. mov r13d,ecx
  1841. add r14d,r10d
  1842. pxor xmm7,xmm6
  1843. ror r13d,14
  1844. mov r10d,r14d
  1845. mov r12d,edx
  1846. ror r14d,9
  1847. psrlq xmm6,2
  1848. xor r13d,ecx
  1849. xor r12d,r8d
  1850. pxor xmm7,xmm6
  1851. ror r13d,5
  1852. xor r14d,r10d
  1853. and r12d,ecx
  1854. pshufd xmm7,xmm7,128
  1855. xor r13d,ecx
  1856. add r9d,DWORD[40+rsp]
  1857. mov r15d,r10d
  1858. psrldq xmm7,8
  1859. xor r12d,r8d
  1860. ror r14d,11
  1861. xor r15d,r11d
  1862. add r9d,r12d
  1863. ror r13d,6
  1864. paddd xmm2,xmm7
  1865. and edi,r15d
  1866. xor r14d,r10d
  1867. add r9d,r13d
  1868. pshufd xmm7,xmm2,80
  1869. xor edi,r11d
  1870. ror r14d,2
  1871. add ebx,r9d
  1872. movdqa xmm6,xmm7
  1873. add r9d,edi
  1874. mov r13d,ebx
  1875. psrld xmm7,10
  1876. add r14d,r9d
  1877. ror r13d,14
  1878. psrlq xmm6,17
  1879. mov r9d,r14d
  1880. mov r12d,ecx
  1881. pxor xmm7,xmm6
  1882. ror r14d,9
  1883. xor r13d,ebx
  1884. xor r12d,edx
  1885. ror r13d,5
  1886. xor r14d,r9d
  1887. psrlq xmm6,2
  1888. and r12d,ebx
  1889. xor r13d,ebx
  1890. add r8d,DWORD[44+rsp]
  1891. pxor xmm7,xmm6
  1892. mov edi,r9d
  1893. xor r12d,edx
  1894. ror r14d,11
  1895. pshufd xmm7,xmm7,8
  1896. xor edi,r10d
  1897. add r8d,r12d
  1898. movdqa xmm6,XMMWORD[64+rbp]
  1899. ror r13d,6
  1900. and r15d,edi
  1901. pslldq xmm7,8
  1902. xor r14d,r9d
  1903. add r8d,r13d
  1904. xor r15d,r10d
  1905. paddd xmm2,xmm7
  1906. ror r14d,2
  1907. add eax,r8d
  1908. add r8d,r15d
  1909. paddd xmm6,xmm2
  1910. mov r13d,eax
  1911. add r14d,r8d
  1912. movdqa XMMWORD[32+rsp],xmm6
  1913. ror r13d,14
  1914. movdqa xmm4,xmm0
  1915. mov r8d,r14d
  1916. mov r12d,ebx
  1917. movdqa xmm7,xmm2
  1918. ror r14d,9
  1919. xor r13d,eax
  1920. xor r12d,ecx
  1921. ror r13d,5
  1922. xor r14d,r8d
  1923. DB 102,15,58,15,227,4
  1924. and r12d,eax
  1925. xor r13d,eax
  1926. DB 102,15,58,15,249,4
  1927. add edx,DWORD[48+rsp]
  1928. mov r15d,r8d
  1929. xor r12d,ecx
  1930. ror r14d,11
  1931. movdqa xmm5,xmm4
  1932. xor r15d,r9d
  1933. add edx,r12d
  1934. movdqa xmm6,xmm4
  1935. ror r13d,6
  1936. and edi,r15d
  1937. psrld xmm4,3
  1938. xor r14d,r8d
  1939. add edx,r13d
  1940. xor edi,r9d
  1941. paddd xmm3,xmm7
  1942. ror r14d,2
  1943. add r11d,edx
  1944. psrld xmm6,7
  1945. add edx,edi
  1946. mov r13d,r11d
  1947. pshufd xmm7,xmm2,250
  1948. add r14d,edx
  1949. ror r13d,14
  1950. pslld xmm5,14
  1951. mov edx,r14d
  1952. mov r12d,eax
  1953. pxor xmm4,xmm6
  1954. ror r14d,9
  1955. xor r13d,r11d
  1956. xor r12d,ebx
  1957. ror r13d,5
  1958. psrld xmm6,11
  1959. xor r14d,edx
  1960. pxor xmm4,xmm5
  1961. and r12d,r11d
  1962. xor r13d,r11d
  1963. pslld xmm5,11
  1964. add ecx,DWORD[52+rsp]
  1965. mov edi,edx
  1966. pxor xmm4,xmm6
  1967. xor r12d,ebx
  1968. ror r14d,11
  1969. movdqa xmm6,xmm7
  1970. xor edi,r8d
  1971. add ecx,r12d
  1972. pxor xmm4,xmm5
  1973. ror r13d,6
  1974. and r15d,edi
  1975. xor r14d,edx
  1976. psrld xmm7,10
  1977. add ecx,r13d
  1978. xor r15d,r8d
  1979. paddd xmm3,xmm4
  1980. ror r14d,2
  1981. add r10d,ecx
  1982. psrlq xmm6,17
  1983. add ecx,r15d
  1984. mov r13d,r10d
  1985. add r14d,ecx
  1986. pxor xmm7,xmm6
  1987. ror r13d,14
  1988. mov ecx,r14d
  1989. mov r12d,r11d
  1990. ror r14d,9
  1991. psrlq xmm6,2
  1992. xor r13d,r10d
  1993. xor r12d,eax
  1994. pxor xmm7,xmm6
  1995. ror r13d,5
  1996. xor r14d,ecx
  1997. and r12d,r10d
  1998. pshufd xmm7,xmm7,128
  1999. xor r13d,r10d
  2000. add ebx,DWORD[56+rsp]
  2001. mov r15d,ecx
  2002. psrldq xmm7,8
  2003. xor r12d,eax
  2004. ror r14d,11
  2005. xor r15d,edx
  2006. add ebx,r12d
  2007. ror r13d,6
  2008. paddd xmm3,xmm7
  2009. and edi,r15d
  2010. xor r14d,ecx
  2011. add ebx,r13d
  2012. pshufd xmm7,xmm3,80
  2013. xor edi,edx
  2014. ror r14d,2
  2015. add r9d,ebx
  2016. movdqa xmm6,xmm7
  2017. add ebx,edi
  2018. mov r13d,r9d
  2019. psrld xmm7,10
  2020. add r14d,ebx
  2021. ror r13d,14
  2022. psrlq xmm6,17
  2023. mov ebx,r14d
  2024. mov r12d,r10d
  2025. pxor xmm7,xmm6
  2026. ror r14d,9
  2027. xor r13d,r9d
  2028. xor r12d,r11d
  2029. ror r13d,5
  2030. xor r14d,ebx
  2031. psrlq xmm6,2
  2032. and r12d,r9d
  2033. xor r13d,r9d
  2034. add eax,DWORD[60+rsp]
  2035. pxor xmm7,xmm6
  2036. mov edi,ebx
  2037. xor r12d,r11d
  2038. ror r14d,11
  2039. pshufd xmm7,xmm7,8
  2040. xor edi,ecx
  2041. add eax,r12d
  2042. movdqa xmm6,XMMWORD[96+rbp]
  2043. ror r13d,6
  2044. and r15d,edi
  2045. pslldq xmm7,8
  2046. xor r14d,ebx
  2047. add eax,r13d
  2048. xor r15d,ecx
  2049. paddd xmm3,xmm7
  2050. ror r14d,2
  2051. add r8d,eax
  2052. add eax,r15d
  2053. paddd xmm6,xmm3
  2054. mov r13d,r8d
  2055. add r14d,eax
  2056. movdqa XMMWORD[48+rsp],xmm6
  2057. cmp BYTE[131+rbp],0
  2058. jne NEAR $L$ssse3_00_47
  2059. ror r13d,14
  2060. mov eax,r14d
  2061. mov r12d,r9d
  2062. ror r14d,9
  2063. xor r13d,r8d
  2064. xor r12d,r10d
  2065. ror r13d,5
  2066. xor r14d,eax
  2067. and r12d,r8d
  2068. xor r13d,r8d
  2069. add r11d,DWORD[rsp]
  2070. mov r15d,eax
  2071. xor r12d,r10d
  2072. ror r14d,11
  2073. xor r15d,ebx
  2074. add r11d,r12d
  2075. ror r13d,6
  2076. and edi,r15d
  2077. xor r14d,eax
  2078. add r11d,r13d
  2079. xor edi,ebx
  2080. ror r14d,2
  2081. add edx,r11d
  2082. add r11d,edi
  2083. mov r13d,edx
  2084. add r14d,r11d
  2085. ror r13d,14
  2086. mov r11d,r14d
  2087. mov r12d,r8d
  2088. ror r14d,9
  2089. xor r13d,edx
  2090. xor r12d,r9d
  2091. ror r13d,5
  2092. xor r14d,r11d
  2093. and r12d,edx
  2094. xor r13d,edx
  2095. add r10d,DWORD[4+rsp]
  2096. mov edi,r11d
  2097. xor r12d,r9d
  2098. ror r14d,11
  2099. xor edi,eax
  2100. add r10d,r12d
  2101. ror r13d,6
  2102. and r15d,edi
  2103. xor r14d,r11d
  2104. add r10d,r13d
  2105. xor r15d,eax
  2106. ror r14d,2
  2107. add ecx,r10d
  2108. add r10d,r15d
  2109. mov r13d,ecx
  2110. add r14d,r10d
  2111. ror r13d,14
  2112. mov r10d,r14d
  2113. mov r12d,edx
  2114. ror r14d,9
  2115. xor r13d,ecx
  2116. xor r12d,r8d
  2117. ror r13d,5
  2118. xor r14d,r10d
  2119. and r12d,ecx
  2120. xor r13d,ecx
  2121. add r9d,DWORD[8+rsp]
  2122. mov r15d,r10d
  2123. xor r12d,r8d
  2124. ror r14d,11
  2125. xor r15d,r11d
  2126. add r9d,r12d
  2127. ror r13d,6
  2128. and edi,r15d
  2129. xor r14d,r10d
  2130. add r9d,r13d
  2131. xor edi,r11d
  2132. ror r14d,2
  2133. add ebx,r9d
  2134. add r9d,edi
  2135. mov r13d,ebx
  2136. add r14d,r9d
  2137. ror r13d,14
  2138. mov r9d,r14d
  2139. mov r12d,ecx
  2140. ror r14d,9
  2141. xor r13d,ebx
  2142. xor r12d,edx
  2143. ror r13d,5
  2144. xor r14d,r9d
  2145. and r12d,ebx
  2146. xor r13d,ebx
  2147. add r8d,DWORD[12+rsp]
  2148. mov edi,r9d
  2149. xor r12d,edx
  2150. ror r14d,11
  2151. xor edi,r10d
  2152. add r8d,r12d
  2153. ror r13d,6
  2154. and r15d,edi
  2155. xor r14d,r9d
  2156. add r8d,r13d
  2157. xor r15d,r10d
  2158. ror r14d,2
  2159. add eax,r8d
  2160. add r8d,r15d
  2161. mov r13d,eax
  2162. add r14d,r8d
  2163. ror r13d,14
  2164. mov r8d,r14d
  2165. mov r12d,ebx
  2166. ror r14d,9
  2167. xor r13d,eax
  2168. xor r12d,ecx
  2169. ror r13d,5
  2170. xor r14d,r8d
  2171. and r12d,eax
  2172. xor r13d,eax
  2173. add edx,DWORD[16+rsp]
  2174. mov r15d,r8d
  2175. xor r12d,ecx
  2176. ror r14d,11
  2177. xor r15d,r9d
  2178. add edx,r12d
  2179. ror r13d,6
  2180. and edi,r15d
  2181. xor r14d,r8d
  2182. add edx,r13d
  2183. xor edi,r9d
  2184. ror r14d,2
  2185. add r11d,edx
  2186. add edx,edi
  2187. mov r13d,r11d
  2188. add r14d,edx
  2189. ror r13d,14
  2190. mov edx,r14d
  2191. mov r12d,eax
  2192. ror r14d,9
  2193. xor r13d,r11d
  2194. xor r12d,ebx
  2195. ror r13d,5
  2196. xor r14d,edx
  2197. and r12d,r11d
  2198. xor r13d,r11d
  2199. add ecx,DWORD[20+rsp]
  2200. mov edi,edx
  2201. xor r12d,ebx
  2202. ror r14d,11
  2203. xor edi,r8d
  2204. add ecx,r12d
  2205. ror r13d,6
  2206. and r15d,edi
  2207. xor r14d,edx
  2208. add ecx,r13d
  2209. xor r15d,r8d
  2210. ror r14d,2
  2211. add r10d,ecx
  2212. add ecx,r15d
  2213. mov r13d,r10d
  2214. add r14d,ecx
  2215. ror r13d,14
  2216. mov ecx,r14d
  2217. mov r12d,r11d
  2218. ror r14d,9
  2219. xor r13d,r10d
  2220. xor r12d,eax
  2221. ror r13d,5
  2222. xor r14d,ecx
  2223. and r12d,r10d
  2224. xor r13d,r10d
  2225. add ebx,DWORD[24+rsp]
  2226. mov r15d,ecx
  2227. xor r12d,eax
  2228. ror r14d,11
  2229. xor r15d,edx
  2230. add ebx,r12d
  2231. ror r13d,6
  2232. and edi,r15d
  2233. xor r14d,ecx
  2234. add ebx,r13d
  2235. xor edi,edx
  2236. ror r14d,2
  2237. add r9d,ebx
  2238. add ebx,edi
  2239. mov r13d,r9d
  2240. add r14d,ebx
  2241. ror r13d,14
  2242. mov ebx,r14d
  2243. mov r12d,r10d
  2244. ror r14d,9
  2245. xor r13d,r9d
  2246. xor r12d,r11d
  2247. ror r13d,5
  2248. xor r14d,ebx
  2249. and r12d,r9d
  2250. xor r13d,r9d
  2251. add eax,DWORD[28+rsp]
  2252. mov edi,ebx
  2253. xor r12d,r11d
  2254. ror r14d,11
  2255. xor edi,ecx
  2256. add eax,r12d
  2257. ror r13d,6
  2258. and r15d,edi
  2259. xor r14d,ebx
  2260. add eax,r13d
  2261. xor r15d,ecx
  2262. ror r14d,2
  2263. add r8d,eax
  2264. add eax,r15d
  2265. mov r13d,r8d
  2266. add r14d,eax
  2267. ror r13d,14
  2268. mov eax,r14d
  2269. mov r12d,r9d
  2270. ror r14d,9
  2271. xor r13d,r8d
  2272. xor r12d,r10d
  2273. ror r13d,5
  2274. xor r14d,eax
  2275. and r12d,r8d
  2276. xor r13d,r8d
  2277. add r11d,DWORD[32+rsp]
  2278. mov r15d,eax
  2279. xor r12d,r10d
  2280. ror r14d,11
  2281. xor r15d,ebx
  2282. add r11d,r12d
  2283. ror r13d,6
  2284. and edi,r15d
  2285. xor r14d,eax
  2286. add r11d,r13d
  2287. xor edi,ebx
  2288. ror r14d,2
  2289. add edx,r11d
  2290. add r11d,edi
  2291. mov r13d,edx
  2292. add r14d,r11d
  2293. ror r13d,14
  2294. mov r11d,r14d
  2295. mov r12d,r8d
  2296. ror r14d,9
  2297. xor r13d,edx
  2298. xor r12d,r9d
  2299. ror r13d,5
  2300. xor r14d,r11d
  2301. and r12d,edx
  2302. xor r13d,edx
  2303. add r10d,DWORD[36+rsp]
  2304. mov edi,r11d
  2305. xor r12d,r9d
  2306. ror r14d,11
  2307. xor edi,eax
  2308. add r10d,r12d
  2309. ror r13d,6
  2310. and r15d,edi
  2311. xor r14d,r11d
  2312. add r10d,r13d
  2313. xor r15d,eax
  2314. ror r14d,2
  2315. add ecx,r10d
  2316. add r10d,r15d
  2317. mov r13d,ecx
  2318. add r14d,r10d
  2319. ror r13d,14
  2320. mov r10d,r14d
  2321. mov r12d,edx
  2322. ror r14d,9
  2323. xor r13d,ecx
  2324. xor r12d,r8d
  2325. ror r13d,5
  2326. xor r14d,r10d
  2327. and r12d,ecx
  2328. xor r13d,ecx
  2329. add r9d,DWORD[40+rsp]
  2330. mov r15d,r10d
  2331. xor r12d,r8d
  2332. ror r14d,11
  2333. xor r15d,r11d
  2334. add r9d,r12d
  2335. ror r13d,6
  2336. and edi,r15d
  2337. xor r14d,r10d
  2338. add r9d,r13d
  2339. xor edi,r11d
  2340. ror r14d,2
  2341. add ebx,r9d
  2342. add r9d,edi
  2343. mov r13d,ebx
  2344. add r14d,r9d
  2345. ror r13d,14
  2346. mov r9d,r14d
  2347. mov r12d,ecx
  2348. ror r14d,9
  2349. xor r13d,ebx
  2350. xor r12d,edx
  2351. ror r13d,5
  2352. xor r14d,r9d
  2353. and r12d,ebx
  2354. xor r13d,ebx
  2355. add r8d,DWORD[44+rsp]
  2356. mov edi,r9d
  2357. xor r12d,edx
  2358. ror r14d,11
  2359. xor edi,r10d
  2360. add r8d,r12d
  2361. ror r13d,6
  2362. and r15d,edi
  2363. xor r14d,r9d
  2364. add r8d,r13d
  2365. xor r15d,r10d
  2366. ror r14d,2
  2367. add eax,r8d
  2368. add r8d,r15d
  2369. mov r13d,eax
  2370. add r14d,r8d
  2371. ror r13d,14
  2372. mov r8d,r14d
  2373. mov r12d,ebx
  2374. ror r14d,9
  2375. xor r13d,eax
  2376. xor r12d,ecx
  2377. ror r13d,5
  2378. xor r14d,r8d
  2379. and r12d,eax
  2380. xor r13d,eax
  2381. add edx,DWORD[48+rsp]
  2382. mov r15d,r8d
  2383. xor r12d,ecx
  2384. ror r14d,11
  2385. xor r15d,r9d
  2386. add edx,r12d
  2387. ror r13d,6
  2388. and edi,r15d
  2389. xor r14d,r8d
  2390. add edx,r13d
  2391. xor edi,r9d
  2392. ror r14d,2
  2393. add r11d,edx
  2394. add edx,edi
  2395. mov r13d,r11d
  2396. add r14d,edx
  2397. ror r13d,14
  2398. mov edx,r14d
  2399. mov r12d,eax
  2400. ror r14d,9
  2401. xor r13d,r11d
  2402. xor r12d,ebx
  2403. ror r13d,5
  2404. xor r14d,edx
  2405. and r12d,r11d
  2406. xor r13d,r11d
  2407. add ecx,DWORD[52+rsp]
  2408. mov edi,edx
  2409. xor r12d,ebx
  2410. ror r14d,11
  2411. xor edi,r8d
  2412. add ecx,r12d
  2413. ror r13d,6
  2414. and r15d,edi
  2415. xor r14d,edx
  2416. add ecx,r13d
  2417. xor r15d,r8d
  2418. ror r14d,2
  2419. add r10d,ecx
  2420. add ecx,r15d
  2421. mov r13d,r10d
  2422. add r14d,ecx
  2423. ror r13d,14
  2424. mov ecx,r14d
  2425. mov r12d,r11d
  2426. ror r14d,9
  2427. xor r13d,r10d
  2428. xor r12d,eax
  2429. ror r13d,5
  2430. xor r14d,ecx
  2431. and r12d,r10d
  2432. xor r13d,r10d
  2433. add ebx,DWORD[56+rsp]
  2434. mov r15d,ecx
  2435. xor r12d,eax
  2436. ror r14d,11
  2437. xor r15d,edx
  2438. add ebx,r12d
  2439. ror r13d,6
  2440. and edi,r15d
  2441. xor r14d,ecx
  2442. add ebx,r13d
  2443. xor edi,edx
  2444. ror r14d,2
  2445. add r9d,ebx
  2446. add ebx,edi
  2447. mov r13d,r9d
  2448. add r14d,ebx
  2449. ror r13d,14
  2450. mov ebx,r14d
  2451. mov r12d,r10d
  2452. ror r14d,9
  2453. xor r13d,r9d
  2454. xor r12d,r11d
  2455. ror r13d,5
  2456. xor r14d,ebx
  2457. and r12d,r9d
  2458. xor r13d,r9d
  2459. add eax,DWORD[60+rsp]
  2460. mov edi,ebx
  2461. xor r12d,r11d
  2462. ror r14d,11
  2463. xor edi,ecx
  2464. add eax,r12d
  2465. ror r13d,6
  2466. and r15d,edi
  2467. xor r14d,ebx
  2468. add eax,r13d
  2469. xor r15d,ecx
  2470. ror r14d,2
  2471. add r8d,eax
  2472. add eax,r15d
  2473. mov r13d,r8d
  2474. add r14d,eax
  2475. mov rdi,QWORD[((64+0))+rsp]
  2476. mov eax,r14d
  2477. add eax,DWORD[rdi]
  2478. lea rsi,[64+rsi]
  2479. add ebx,DWORD[4+rdi]
  2480. add ecx,DWORD[8+rdi]
  2481. add edx,DWORD[12+rdi]
  2482. add r8d,DWORD[16+rdi]
  2483. add r9d,DWORD[20+rdi]
  2484. add r10d,DWORD[24+rdi]
  2485. add r11d,DWORD[28+rdi]
  2486. cmp rsi,QWORD[((64+16))+rsp]
  2487. mov DWORD[rdi],eax
  2488. mov DWORD[4+rdi],ebx
  2489. mov DWORD[8+rdi],ecx
  2490. mov DWORD[12+rdi],edx
  2491. mov DWORD[16+rdi],r8d
  2492. mov DWORD[20+rdi],r9d
  2493. mov DWORD[24+rdi],r10d
  2494. mov DWORD[28+rdi],r11d
  2495. jb NEAR $L$loop_ssse3
  2496. mov rsi,QWORD[88+rsp]
  2497. movaps xmm6,XMMWORD[((64+32))+rsp]
  2498. movaps xmm7,XMMWORD[((64+48))+rsp]
  2499. movaps xmm8,XMMWORD[((64+64))+rsp]
  2500. movaps xmm9,XMMWORD[((64+80))+rsp]
  2501. mov r15,QWORD[((-48))+rsi]
  2502. mov r14,QWORD[((-40))+rsi]
  2503. mov r13,QWORD[((-32))+rsi]
  2504. mov r12,QWORD[((-24))+rsi]
  2505. mov rbp,QWORD[((-16))+rsi]
  2506. mov rbx,QWORD[((-8))+rsi]
  2507. lea rsp,[rsi]
  2508. $L$epilogue_ssse3:
  2509. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  2510. mov rsi,QWORD[16+rsp]
  2511. DB 0F3h,0C3h ;repret
  2512. $L$SEH_end_sha256_block_data_order_ssse3:
  2513. ALIGN 64
  2514. sha256_block_data_order_avx:
  2515. mov QWORD[8+rsp],rdi ;WIN64 prologue
  2516. mov QWORD[16+rsp],rsi
  2517. mov rax,rsp
  2518. $L$SEH_begin_sha256_block_data_order_avx:
  2519. mov rdi,rcx
  2520. mov rsi,rdx
  2521. mov rdx,r8
  2522. $L$avx_shortcut:
  2523. mov rax,rsp
  2524. push rbx
  2525. push rbp
  2526. push r12
  2527. push r13
  2528. push r14
  2529. push r15
  2530. shl rdx,4
  2531. sub rsp,160
  2532. lea rdx,[rdx*4+rsi]
  2533. and rsp,-64
  2534. mov QWORD[((64+0))+rsp],rdi
  2535. mov QWORD[((64+8))+rsp],rsi
  2536. mov QWORD[((64+16))+rsp],rdx
  2537. mov QWORD[88+rsp],rax
  2538. movaps XMMWORD[(64+32)+rsp],xmm6
  2539. movaps XMMWORD[(64+48)+rsp],xmm7
  2540. movaps XMMWORD[(64+64)+rsp],xmm8
  2541. movaps XMMWORD[(64+80)+rsp],xmm9
  2542. $L$prologue_avx:
  2543. vzeroupper
  2544. mov eax,DWORD[rdi]
  2545. mov ebx,DWORD[4+rdi]
  2546. mov ecx,DWORD[8+rdi]
  2547. mov edx,DWORD[12+rdi]
  2548. mov r8d,DWORD[16+rdi]
  2549. mov r9d,DWORD[20+rdi]
  2550. mov r10d,DWORD[24+rdi]
  2551. mov r11d,DWORD[28+rdi]
  2552. vmovdqa xmm8,XMMWORD[((K256+512+32))]
  2553. vmovdqa xmm9,XMMWORD[((K256+512+64))]
  2554. jmp NEAR $L$loop_avx
  2555. ALIGN 16
  2556. $L$loop_avx:
  2557. vmovdqa xmm7,XMMWORD[((K256+512))]
  2558. vmovdqu xmm0,XMMWORD[rsi]
  2559. vmovdqu xmm1,XMMWORD[16+rsi]
  2560. vmovdqu xmm2,XMMWORD[32+rsi]
  2561. vmovdqu xmm3,XMMWORD[48+rsi]
  2562. vpshufb xmm0,xmm0,xmm7
  2563. lea rbp,[K256]
  2564. vpshufb xmm1,xmm1,xmm7
  2565. vpshufb xmm2,xmm2,xmm7
  2566. vpaddd xmm4,xmm0,XMMWORD[rbp]
  2567. vpshufb xmm3,xmm3,xmm7
  2568. vpaddd xmm5,xmm1,XMMWORD[32+rbp]
  2569. vpaddd xmm6,xmm2,XMMWORD[64+rbp]
  2570. vpaddd xmm7,xmm3,XMMWORD[96+rbp]
  2571. vmovdqa XMMWORD[rsp],xmm4
  2572. mov r14d,eax
  2573. vmovdqa XMMWORD[16+rsp],xmm5
  2574. mov edi,ebx
  2575. vmovdqa XMMWORD[32+rsp],xmm6
  2576. xor edi,ecx
  2577. vmovdqa XMMWORD[48+rsp],xmm7
  2578. mov r13d,r8d
  2579. jmp NEAR $L$avx_00_47
  2580. ALIGN 16
  2581. $L$avx_00_47:
  2582. sub rbp,-128
  2583. vpalignr xmm4,xmm1,xmm0,4
  2584. shrd r13d,r13d,14
  2585. mov eax,r14d
  2586. mov r12d,r9d
  2587. vpalignr xmm7,xmm3,xmm2,4
  2588. shrd r14d,r14d,9
  2589. xor r13d,r8d
  2590. xor r12d,r10d
  2591. vpsrld xmm6,xmm4,7
  2592. shrd r13d,r13d,5
  2593. xor r14d,eax
  2594. and r12d,r8d
  2595. vpaddd xmm0,xmm0,xmm7
  2596. xor r13d,r8d
  2597. add r11d,DWORD[rsp]
  2598. mov r15d,eax
  2599. vpsrld xmm7,xmm4,3
  2600. xor r12d,r10d
  2601. shrd r14d,r14d,11
  2602. xor r15d,ebx
  2603. vpslld xmm5,xmm4,14
  2604. add r11d,r12d
  2605. shrd r13d,r13d,6
  2606. and edi,r15d
  2607. vpxor xmm4,xmm7,xmm6
  2608. xor r14d,eax
  2609. add r11d,r13d
  2610. xor edi,ebx
  2611. vpshufd xmm7,xmm3,250
  2612. shrd r14d,r14d,2
  2613. add edx,r11d
  2614. add r11d,edi
  2615. vpsrld xmm6,xmm6,11
  2616. mov r13d,edx
  2617. add r14d,r11d
  2618. shrd r13d,r13d,14
  2619. vpxor xmm4,xmm4,xmm5
  2620. mov r11d,r14d
  2621. mov r12d,r8d
  2622. shrd r14d,r14d,9
  2623. vpslld xmm5,xmm5,11
  2624. xor r13d,edx
  2625. xor r12d,r9d
  2626. shrd r13d,r13d,5
  2627. vpxor xmm4,xmm4,xmm6
  2628. xor r14d,r11d
  2629. and r12d,edx
  2630. xor r13d,edx
  2631. vpsrld xmm6,xmm7,10
  2632. add r10d,DWORD[4+rsp]
  2633. mov edi,r11d
  2634. xor r12d,r9d
  2635. vpxor xmm4,xmm4,xmm5
  2636. shrd r14d,r14d,11
  2637. xor edi,eax
  2638. add r10d,r12d
  2639. vpsrlq xmm7,xmm7,17
  2640. shrd r13d,r13d,6
  2641. and r15d,edi
  2642. xor r14d,r11d
  2643. vpaddd xmm0,xmm0,xmm4
  2644. add r10d,r13d
  2645. xor r15d,eax
  2646. shrd r14d,r14d,2
  2647. vpxor xmm6,xmm6,xmm7
  2648. add ecx,r10d
  2649. add r10d,r15d
  2650. mov r13d,ecx
  2651. vpsrlq xmm7,xmm7,2
  2652. add r14d,r10d
  2653. shrd r13d,r13d,14
  2654. mov r10d,r14d
  2655. vpxor xmm6,xmm6,xmm7
  2656. mov r12d,edx
  2657. shrd r14d,r14d,9
  2658. xor r13d,ecx
  2659. vpshufb xmm6,xmm6,xmm8
  2660. xor r12d,r8d
  2661. shrd r13d,r13d,5
  2662. xor r14d,r10d
  2663. vpaddd xmm0,xmm0,xmm6
  2664. and r12d,ecx
  2665. xor r13d,ecx
  2666. add r9d,DWORD[8+rsp]
  2667. vpshufd xmm7,xmm0,80
  2668. mov r15d,r10d
  2669. xor r12d,r8d
  2670. shrd r14d,r14d,11
  2671. vpsrld xmm6,xmm7,10
  2672. xor r15d,r11d
  2673. add r9d,r12d
  2674. shrd r13d,r13d,6
  2675. vpsrlq xmm7,xmm7,17
  2676. and edi,r15d
  2677. xor r14d,r10d
  2678. add r9d,r13d
  2679. vpxor xmm6,xmm6,xmm7
  2680. xor edi,r11d
  2681. shrd r14d,r14d,2
  2682. add ebx,r9d
  2683. vpsrlq xmm7,xmm7,2
  2684. add r9d,edi
  2685. mov r13d,ebx
  2686. add r14d,r9d
  2687. vpxor xmm6,xmm6,xmm7
  2688. shrd r13d,r13d,14
  2689. mov r9d,r14d
  2690. mov r12d,ecx
  2691. vpshufb xmm6,xmm6,xmm9
  2692. shrd r14d,r14d,9
  2693. xor r13d,ebx
  2694. xor r12d,edx
  2695. vpaddd xmm0,xmm0,xmm6
  2696. shrd r13d,r13d,5
  2697. xor r14d,r9d
  2698. and r12d,ebx
  2699. vpaddd xmm6,xmm0,XMMWORD[rbp]
  2700. xor r13d,ebx
  2701. add r8d,DWORD[12+rsp]
  2702. mov edi,r9d
  2703. xor r12d,edx
  2704. shrd r14d,r14d,11
  2705. xor edi,r10d
  2706. add r8d,r12d
  2707. shrd r13d,r13d,6
  2708. and r15d,edi
  2709. xor r14d,r9d
  2710. add r8d,r13d
  2711. xor r15d,r10d
  2712. shrd r14d,r14d,2
  2713. add eax,r8d
  2714. add r8d,r15d
  2715. mov r13d,eax
  2716. add r14d,r8d
  2717. vmovdqa XMMWORD[rsp],xmm6
  2718. vpalignr xmm4,xmm2,xmm1,4
  2719. shrd r13d,r13d,14
  2720. mov r8d,r14d
  2721. mov r12d,ebx
  2722. vpalignr xmm7,xmm0,xmm3,4
  2723. shrd r14d,r14d,9
  2724. xor r13d,eax
  2725. xor r12d,ecx
  2726. vpsrld xmm6,xmm4,7
  2727. shrd r13d,r13d,5
  2728. xor r14d,r8d
  2729. and r12d,eax
  2730. vpaddd xmm1,xmm1,xmm7
  2731. xor r13d,eax
  2732. add edx,DWORD[16+rsp]
  2733. mov r15d,r8d
  2734. vpsrld xmm7,xmm4,3
  2735. xor r12d,ecx
  2736. shrd r14d,r14d,11
  2737. xor r15d,r9d
  2738. vpslld xmm5,xmm4,14
  2739. add edx,r12d
  2740. shrd r13d,r13d,6
  2741. and edi,r15d
  2742. vpxor xmm4,xmm7,xmm6
  2743. xor r14d,r8d
  2744. add edx,r13d
  2745. xor edi,r9d
  2746. vpshufd xmm7,xmm0,250
  2747. shrd r14d,r14d,2
  2748. add r11d,edx
  2749. add edx,edi
  2750. vpsrld xmm6,xmm6,11
  2751. mov r13d,r11d
  2752. add r14d,edx
  2753. shrd r13d,r13d,14
  2754. vpxor xmm4,xmm4,xmm5
  2755. mov edx,r14d
  2756. mov r12d,eax
  2757. shrd r14d,r14d,9
  2758. vpslld xmm5,xmm5,11
  2759. xor r13d,r11d
  2760. xor r12d,ebx
  2761. shrd r13d,r13d,5
  2762. vpxor xmm4,xmm4,xmm6
  2763. xor r14d,edx
  2764. and r12d,r11d
  2765. xor r13d,r11d
  2766. vpsrld xmm6,xmm7,10
  2767. add ecx,DWORD[20+rsp]
  2768. mov edi,edx
  2769. xor r12d,ebx
  2770. vpxor xmm4,xmm4,xmm5
  2771. shrd r14d,r14d,11
  2772. xor edi,r8d
  2773. add ecx,r12d
  2774. vpsrlq xmm7,xmm7,17
  2775. shrd r13d,r13d,6
  2776. and r15d,edi
  2777. xor r14d,edx
  2778. vpaddd xmm1,xmm1,xmm4
  2779. add ecx,r13d
  2780. xor r15d,r8d
  2781. shrd r14d,r14d,2
  2782. vpxor xmm6,xmm6,xmm7
  2783. add r10d,ecx
  2784. add ecx,r15d
  2785. mov r13d,r10d
  2786. vpsrlq xmm7,xmm7,2
  2787. add r14d,ecx
  2788. shrd r13d,r13d,14
  2789. mov ecx,r14d
  2790. vpxor xmm6,xmm6,xmm7
  2791. mov r12d,r11d
  2792. shrd r14d,r14d,9
  2793. xor r13d,r10d
  2794. vpshufb xmm6,xmm6,xmm8
  2795. xor r12d,eax
  2796. shrd r13d,r13d,5
  2797. xor r14d,ecx
  2798. vpaddd xmm1,xmm1,xmm6
  2799. and r12d,r10d
  2800. xor r13d,r10d
  2801. add ebx,DWORD[24+rsp]
  2802. vpshufd xmm7,xmm1,80
  2803. mov r15d,ecx
  2804. xor r12d,eax
  2805. shrd r14d,r14d,11
  2806. vpsrld xmm6,xmm7,10
  2807. xor r15d,edx
  2808. add ebx,r12d
  2809. shrd r13d,r13d,6
  2810. vpsrlq xmm7,xmm7,17
  2811. and edi,r15d
  2812. xor r14d,ecx
  2813. add ebx,r13d
  2814. vpxor xmm6,xmm6,xmm7
  2815. xor edi,edx
  2816. shrd r14d,r14d,2
  2817. add r9d,ebx
  2818. vpsrlq xmm7,xmm7,2
  2819. add ebx,edi
  2820. mov r13d,r9d
  2821. add r14d,ebx
  2822. vpxor xmm6,xmm6,xmm7
  2823. shrd r13d,r13d,14
  2824. mov ebx,r14d
  2825. mov r12d,r10d
  2826. vpshufb xmm6,xmm6,xmm9
  2827. shrd r14d,r14d,9
  2828. xor r13d,r9d
  2829. xor r12d,r11d
  2830. vpaddd xmm1,xmm1,xmm6
  2831. shrd r13d,r13d,5
  2832. xor r14d,ebx
  2833. and r12d,r9d
  2834. vpaddd xmm6,xmm1,XMMWORD[32+rbp]
  2835. xor r13d,r9d
  2836. add eax,DWORD[28+rsp]
  2837. mov edi,ebx
  2838. xor r12d,r11d
  2839. shrd r14d,r14d,11
  2840. xor edi,ecx
  2841. add eax,r12d
  2842. shrd r13d,r13d,6
  2843. and r15d,edi
  2844. xor r14d,ebx
  2845. add eax,r13d
  2846. xor r15d,ecx
  2847. shrd r14d,r14d,2
  2848. add r8d,eax
  2849. add eax,r15d
  2850. mov r13d,r8d
  2851. add r14d,eax
  2852. vmovdqa XMMWORD[16+rsp],xmm6
  2853. vpalignr xmm4,xmm3,xmm2,4
  2854. shrd r13d,r13d,14
  2855. mov eax,r14d
  2856. mov r12d,r9d
  2857. vpalignr xmm7,xmm1,xmm0,4
  2858. shrd r14d,r14d,9
  2859. xor r13d,r8d
  2860. xor r12d,r10d
  2861. vpsrld xmm6,xmm4,7
  2862. shrd r13d,r13d,5
  2863. xor r14d,eax
  2864. and r12d,r8d
  2865. vpaddd xmm2,xmm2,xmm7
  2866. xor r13d,r8d
  2867. add r11d,DWORD[32+rsp]
  2868. mov r15d,eax
  2869. vpsrld xmm7,xmm4,3
  2870. xor r12d,r10d
  2871. shrd r14d,r14d,11
  2872. xor r15d,ebx
  2873. vpslld xmm5,xmm4,14
  2874. add r11d,r12d
  2875. shrd r13d,r13d,6
  2876. and edi,r15d
  2877. vpxor xmm4,xmm7,xmm6
  2878. xor r14d,eax
  2879. add r11d,r13d
  2880. xor edi,ebx
  2881. vpshufd xmm7,xmm1,250
  2882. shrd r14d,r14d,2
  2883. add edx,r11d
  2884. add r11d,edi
  2885. vpsrld xmm6,xmm6,11
  2886. mov r13d,edx
  2887. add r14d,r11d
  2888. shrd r13d,r13d,14
  2889. vpxor xmm4,xmm4,xmm5
  2890. mov r11d,r14d
  2891. mov r12d,r8d
  2892. shrd r14d,r14d,9
  2893. vpslld xmm5,xmm5,11
  2894. xor r13d,edx
  2895. xor r12d,r9d
  2896. shrd r13d,r13d,5
  2897. vpxor xmm4,xmm4,xmm6
  2898. xor r14d,r11d
  2899. and r12d,edx
  2900. xor r13d,edx
  2901. vpsrld xmm6,xmm7,10
  2902. add r10d,DWORD[36+rsp]
  2903. mov edi,r11d
  2904. xor r12d,r9d
  2905. vpxor xmm4,xmm4,xmm5
  2906. shrd r14d,r14d,11
  2907. xor edi,eax
  2908. add r10d,r12d
  2909. vpsrlq xmm7,xmm7,17
  2910. shrd r13d,r13d,6
  2911. and r15d,edi
  2912. xor r14d,r11d
  2913. vpaddd xmm2,xmm2,xmm4
  2914. add r10d,r13d
  2915. xor r15d,eax
  2916. shrd r14d,r14d,2
  2917. vpxor xmm6,xmm6,xmm7
  2918. add ecx,r10d
  2919. add r10d,r15d
  2920. mov r13d,ecx
  2921. vpsrlq xmm7,xmm7,2
  2922. add r14d,r10d
  2923. shrd r13d,r13d,14
  2924. mov r10d,r14d
  2925. vpxor xmm6,xmm6,xmm7
  2926. mov r12d,edx
  2927. shrd r14d,r14d,9
  2928. xor r13d,ecx
  2929. vpshufb xmm6,xmm6,xmm8
  2930. xor r12d,r8d
  2931. shrd r13d,r13d,5
  2932. xor r14d,r10d
  2933. vpaddd xmm2,xmm2,xmm6
  2934. and r12d,ecx
  2935. xor r13d,ecx
  2936. add r9d,DWORD[40+rsp]
  2937. vpshufd xmm7,xmm2,80
  2938. mov r15d,r10d
  2939. xor r12d,r8d
  2940. shrd r14d,r14d,11
  2941. vpsrld xmm6,xmm7,10
  2942. xor r15d,r11d
  2943. add r9d,r12d
  2944. shrd r13d,r13d,6
  2945. vpsrlq xmm7,xmm7,17
  2946. and edi,r15d
  2947. xor r14d,r10d
  2948. add r9d,r13d
  2949. vpxor xmm6,xmm6,xmm7
  2950. xor edi,r11d
  2951. shrd r14d,r14d,2
  2952. add ebx,r9d
  2953. vpsrlq xmm7,xmm7,2
  2954. add r9d,edi
  2955. mov r13d,ebx
  2956. add r14d,r9d
  2957. vpxor xmm6,xmm6,xmm7
  2958. shrd r13d,r13d,14
  2959. mov r9d,r14d
  2960. mov r12d,ecx
  2961. vpshufb xmm6,xmm6,xmm9
  2962. shrd r14d,r14d,9
  2963. xor r13d,ebx
  2964. xor r12d,edx
  2965. vpaddd xmm2,xmm2,xmm6
  2966. shrd r13d,r13d,5
  2967. xor r14d,r9d
  2968. and r12d,ebx
  2969. vpaddd xmm6,xmm2,XMMWORD[64+rbp]
  2970. xor r13d,ebx
  2971. add r8d,DWORD[44+rsp]
  2972. mov edi,r9d
  2973. xor r12d,edx
  2974. shrd r14d,r14d,11
  2975. xor edi,r10d
  2976. add r8d,r12d
  2977. shrd r13d,r13d,6
  2978. and r15d,edi
  2979. xor r14d,r9d
  2980. add r8d,r13d
  2981. xor r15d,r10d
  2982. shrd r14d,r14d,2
  2983. add eax,r8d
  2984. add r8d,r15d
  2985. mov r13d,eax
  2986. add r14d,r8d
  2987. vmovdqa XMMWORD[32+rsp],xmm6
  2988. vpalignr xmm4,xmm0,xmm3,4
  2989. shrd r13d,r13d,14
  2990. mov r8d,r14d
  2991. mov r12d,ebx
  2992. vpalignr xmm7,xmm2,xmm1,4
  2993. shrd r14d,r14d,9
  2994. xor r13d,eax
  2995. xor r12d,ecx
  2996. vpsrld xmm6,xmm4,7
  2997. shrd r13d,r13d,5
  2998. xor r14d,r8d
  2999. and r12d,eax
  3000. vpaddd xmm3,xmm3,xmm7
  3001. xor r13d,eax
  3002. add edx,DWORD[48+rsp]
  3003. mov r15d,r8d
  3004. vpsrld xmm7,xmm4,3
  3005. xor r12d,ecx
  3006. shrd r14d,r14d,11
  3007. xor r15d,r9d
  3008. vpslld xmm5,xmm4,14
  3009. add edx,r12d
  3010. shrd r13d,r13d,6
  3011. and edi,r15d
  3012. vpxor xmm4,xmm7,xmm6
  3013. xor r14d,r8d
  3014. add edx,r13d
  3015. xor edi,r9d
  3016. vpshufd xmm7,xmm2,250
  3017. shrd r14d,r14d,2
  3018. add r11d,edx
  3019. add edx,edi
  3020. vpsrld xmm6,xmm6,11
  3021. mov r13d,r11d
  3022. add r14d,edx
  3023. shrd r13d,r13d,14
  3024. vpxor xmm4,xmm4,xmm5
  3025. mov edx,r14d
  3026. mov r12d,eax
  3027. shrd r14d,r14d,9
  3028. vpslld xmm5,xmm5,11
  3029. xor r13d,r11d
  3030. xor r12d,ebx
  3031. shrd r13d,r13d,5
  3032. vpxor xmm4,xmm4,xmm6
  3033. xor r14d,edx
  3034. and r12d,r11d
  3035. xor r13d,r11d
  3036. vpsrld xmm6,xmm7,10
  3037. add ecx,DWORD[52+rsp]
  3038. mov edi,edx
  3039. xor r12d,ebx
  3040. vpxor xmm4,xmm4,xmm5
  3041. shrd r14d,r14d,11
  3042. xor edi,r8d
  3043. add ecx,r12d
  3044. vpsrlq xmm7,xmm7,17
  3045. shrd r13d,r13d,6
  3046. and r15d,edi
  3047. xor r14d,edx
  3048. vpaddd xmm3,xmm3,xmm4
  3049. add ecx,r13d
  3050. xor r15d,r8d
  3051. shrd r14d,r14d,2
  3052. vpxor xmm6,xmm6,xmm7
  3053. add r10d,ecx
  3054. add ecx,r15d
  3055. mov r13d,r10d
  3056. vpsrlq xmm7,xmm7,2
  3057. add r14d,ecx
  3058. shrd r13d,r13d,14
  3059. mov ecx,r14d
  3060. vpxor xmm6,xmm6,xmm7
  3061. mov r12d,r11d
  3062. shrd r14d,r14d,9
  3063. xor r13d,r10d
  3064. vpshufb xmm6,xmm6,xmm8
  3065. xor r12d,eax
  3066. shrd r13d,r13d,5
  3067. xor r14d,ecx
  3068. vpaddd xmm3,xmm3,xmm6
  3069. and r12d,r10d
  3070. xor r13d,r10d
  3071. add ebx,DWORD[56+rsp]
  3072. vpshufd xmm7,xmm3,80
  3073. mov r15d,ecx
  3074. xor r12d,eax
  3075. shrd r14d,r14d,11
  3076. vpsrld xmm6,xmm7,10
  3077. xor r15d,edx
  3078. add ebx,r12d
  3079. shrd r13d,r13d,6
  3080. vpsrlq xmm7,xmm7,17
  3081. and edi,r15d
  3082. xor r14d,ecx
  3083. add ebx,r13d
  3084. vpxor xmm6,xmm6,xmm7
  3085. xor edi,edx
  3086. shrd r14d,r14d,2
  3087. add r9d,ebx
  3088. vpsrlq xmm7,xmm7,2
  3089. add ebx,edi
  3090. mov r13d,r9d
  3091. add r14d,ebx
  3092. vpxor xmm6,xmm6,xmm7
  3093. shrd r13d,r13d,14
  3094. mov ebx,r14d
  3095. mov r12d,r10d
  3096. vpshufb xmm6,xmm6,xmm9
  3097. shrd r14d,r14d,9
  3098. xor r13d,r9d
  3099. xor r12d,r11d
  3100. vpaddd xmm3,xmm3,xmm6
  3101. shrd r13d,r13d,5
  3102. xor r14d,ebx
  3103. and r12d,r9d
  3104. vpaddd xmm6,xmm3,XMMWORD[96+rbp]
  3105. xor r13d,r9d
  3106. add eax,DWORD[60+rsp]
  3107. mov edi,ebx
  3108. xor r12d,r11d
  3109. shrd r14d,r14d,11
  3110. xor edi,ecx
  3111. add eax,r12d
  3112. shrd r13d,r13d,6
  3113. and r15d,edi
  3114. xor r14d,ebx
  3115. add eax,r13d
  3116. xor r15d,ecx
  3117. shrd r14d,r14d,2
  3118. add r8d,eax
  3119. add eax,r15d
  3120. mov r13d,r8d
  3121. add r14d,eax
  3122. vmovdqa XMMWORD[48+rsp],xmm6
  3123. cmp BYTE[131+rbp],0
  3124. jne NEAR $L$avx_00_47
  3125. shrd r13d,r13d,14
  3126. mov eax,r14d
  3127. mov r12d,r9d
  3128. shrd r14d,r14d,9
  3129. xor r13d,r8d
  3130. xor r12d,r10d
  3131. shrd r13d,r13d,5
  3132. xor r14d,eax
  3133. and r12d,r8d
  3134. xor r13d,r8d
  3135. add r11d,DWORD[rsp]
  3136. mov r15d,eax
  3137. xor r12d,r10d
  3138. shrd r14d,r14d,11
  3139. xor r15d,ebx
  3140. add r11d,r12d
  3141. shrd r13d,r13d,6
  3142. and edi,r15d
  3143. xor r14d,eax
  3144. add r11d,r13d
  3145. xor edi,ebx
  3146. shrd r14d,r14d,2
  3147. add edx,r11d
  3148. add r11d,edi
  3149. mov r13d,edx
  3150. add r14d,r11d
  3151. shrd r13d,r13d,14
  3152. mov r11d,r14d
  3153. mov r12d,r8d
  3154. shrd r14d,r14d,9
  3155. xor r13d,edx
  3156. xor r12d,r9d
  3157. shrd r13d,r13d,5
  3158. xor r14d,r11d
  3159. and r12d,edx
  3160. xor r13d,edx
  3161. add r10d,DWORD[4+rsp]
  3162. mov edi,r11d
  3163. xor r12d,r9d
  3164. shrd r14d,r14d,11
  3165. xor edi,eax
  3166. add r10d,r12d
  3167. shrd r13d,r13d,6
  3168. and r15d,edi
  3169. xor r14d,r11d
  3170. add r10d,r13d
  3171. xor r15d,eax
  3172. shrd r14d,r14d,2
  3173. add ecx,r10d
  3174. add r10d,r15d
  3175. mov r13d,ecx
  3176. add r14d,r10d
  3177. shrd r13d,r13d,14
  3178. mov r10d,r14d
  3179. mov r12d,edx
  3180. shrd r14d,r14d,9
  3181. xor r13d,ecx
  3182. xor r12d,r8d
  3183. shrd r13d,r13d,5
  3184. xor r14d,r10d
  3185. and r12d,ecx
  3186. xor r13d,ecx
  3187. add r9d,DWORD[8+rsp]
  3188. mov r15d,r10d
  3189. xor r12d,r8d
  3190. shrd r14d,r14d,11
  3191. xor r15d,r11d
  3192. add r9d,r12d
  3193. shrd r13d,r13d,6
  3194. and edi,r15d
  3195. xor r14d,r10d
  3196. add r9d,r13d
  3197. xor edi,r11d
  3198. shrd r14d,r14d,2
  3199. add ebx,r9d
  3200. add r9d,edi
  3201. mov r13d,ebx
  3202. add r14d,r9d
  3203. shrd r13d,r13d,14
  3204. mov r9d,r14d
  3205. mov r12d,ecx
  3206. shrd r14d,r14d,9
  3207. xor r13d,ebx
  3208. xor r12d,edx
  3209. shrd r13d,r13d,5
  3210. xor r14d,r9d
  3211. and r12d,ebx
  3212. xor r13d,ebx
  3213. add r8d,DWORD[12+rsp]
  3214. mov edi,r9d
  3215. xor r12d,edx
  3216. shrd r14d,r14d,11
  3217. xor edi,r10d
  3218. add r8d,r12d
  3219. shrd r13d,r13d,6
  3220. and r15d,edi
  3221. xor r14d,r9d
  3222. add r8d,r13d
  3223. xor r15d,r10d
  3224. shrd r14d,r14d,2
  3225. add eax,r8d
  3226. add r8d,r15d
  3227. mov r13d,eax
  3228. add r14d,r8d
  3229. shrd r13d,r13d,14
  3230. mov r8d,r14d
  3231. mov r12d,ebx
  3232. shrd r14d,r14d,9
  3233. xor r13d,eax
  3234. xor r12d,ecx
  3235. shrd r13d,r13d,5
  3236. xor r14d,r8d
  3237. and r12d,eax
  3238. xor r13d,eax
  3239. add edx,DWORD[16+rsp]
  3240. mov r15d,r8d
  3241. xor r12d,ecx
  3242. shrd r14d,r14d,11
  3243. xor r15d,r9d
  3244. add edx,r12d
  3245. shrd r13d,r13d,6
  3246. and edi,r15d
  3247. xor r14d,r8d
  3248. add edx,r13d
  3249. xor edi,r9d
  3250. shrd r14d,r14d,2
  3251. add r11d,edx
  3252. add edx,edi
  3253. mov r13d,r11d
  3254. add r14d,edx
  3255. shrd r13d,r13d,14
  3256. mov edx,r14d
  3257. mov r12d,eax
  3258. shrd r14d,r14d,9
  3259. xor r13d,r11d
  3260. xor r12d,ebx
  3261. shrd r13d,r13d,5
  3262. xor r14d,edx
  3263. and r12d,r11d
  3264. xor r13d,r11d
  3265. add ecx,DWORD[20+rsp]
  3266. mov edi,edx
  3267. xor r12d,ebx
  3268. shrd r14d,r14d,11
  3269. xor edi,r8d
  3270. add ecx,r12d
  3271. shrd r13d,r13d,6
  3272. and r15d,edi
  3273. xor r14d,edx
  3274. add ecx,r13d
  3275. xor r15d,r8d
  3276. shrd r14d,r14d,2
  3277. add r10d,ecx
  3278. add ecx,r15d
  3279. mov r13d,r10d
  3280. add r14d,ecx
  3281. shrd r13d,r13d,14
  3282. mov ecx,r14d
  3283. mov r12d,r11d
  3284. shrd r14d,r14d,9
  3285. xor r13d,r10d
  3286. xor r12d,eax
  3287. shrd r13d,r13d,5
  3288. xor r14d,ecx
  3289. and r12d,r10d
  3290. xor r13d,r10d
  3291. add ebx,DWORD[24+rsp]
  3292. mov r15d,ecx
  3293. xor r12d,eax
  3294. shrd r14d,r14d,11
  3295. xor r15d,edx
  3296. add ebx,r12d
  3297. shrd r13d,r13d,6
  3298. and edi,r15d
  3299. xor r14d,ecx
  3300. add ebx,r13d
  3301. xor edi,edx
  3302. shrd r14d,r14d,2
  3303. add r9d,ebx
  3304. add ebx,edi
  3305. mov r13d,r9d
  3306. add r14d,ebx
  3307. shrd r13d,r13d,14
  3308. mov ebx,r14d
  3309. mov r12d,r10d
  3310. shrd r14d,r14d,9
  3311. xor r13d,r9d
  3312. xor r12d,r11d
  3313. shrd r13d,r13d,5
  3314. xor r14d,ebx
  3315. and r12d,r9d
  3316. xor r13d,r9d
  3317. add eax,DWORD[28+rsp]
  3318. mov edi,ebx
  3319. xor r12d,r11d
  3320. shrd r14d,r14d,11
  3321. xor edi,ecx
  3322. add eax,r12d
  3323. shrd r13d,r13d,6
  3324. and r15d,edi
  3325. xor r14d,ebx
  3326. add eax,r13d
  3327. xor r15d,ecx
  3328. shrd r14d,r14d,2
  3329. add r8d,eax
  3330. add eax,r15d
  3331. mov r13d,r8d
  3332. add r14d,eax
  3333. shrd r13d,r13d,14
  3334. mov eax,r14d
  3335. mov r12d,r9d
  3336. shrd r14d,r14d,9
  3337. xor r13d,r8d
  3338. xor r12d,r10d
  3339. shrd r13d,r13d,5
  3340. xor r14d,eax
  3341. and r12d,r8d
  3342. xor r13d,r8d
  3343. add r11d,DWORD[32+rsp]
  3344. mov r15d,eax
  3345. xor r12d,r10d
  3346. shrd r14d,r14d,11
  3347. xor r15d,ebx
  3348. add r11d,r12d
  3349. shrd r13d,r13d,6
  3350. and edi,r15d
  3351. xor r14d,eax
  3352. add r11d,r13d
  3353. xor edi,ebx
  3354. shrd r14d,r14d,2
  3355. add edx,r11d
  3356. add r11d,edi
  3357. mov r13d,edx
  3358. add r14d,r11d
  3359. shrd r13d,r13d,14
  3360. mov r11d,r14d
  3361. mov r12d,r8d
  3362. shrd r14d,r14d,9
  3363. xor r13d,edx
  3364. xor r12d,r9d
  3365. shrd r13d,r13d,5
  3366. xor r14d,r11d
  3367. and r12d,edx
  3368. xor r13d,edx
  3369. add r10d,DWORD[36+rsp]
  3370. mov edi,r11d
  3371. xor r12d,r9d
  3372. shrd r14d,r14d,11
  3373. xor edi,eax
  3374. add r10d,r12d
  3375. shrd r13d,r13d,6
  3376. and r15d,edi
  3377. xor r14d,r11d
  3378. add r10d,r13d
  3379. xor r15d,eax
  3380. shrd r14d,r14d,2
  3381. add ecx,r10d
  3382. add r10d,r15d
  3383. mov r13d,ecx
  3384. add r14d,r10d
  3385. shrd r13d,r13d,14
  3386. mov r10d,r14d
  3387. mov r12d,edx
  3388. shrd r14d,r14d,9
  3389. xor r13d,ecx
  3390. xor r12d,r8d
  3391. shrd r13d,r13d,5
  3392. xor r14d,r10d
  3393. and r12d,ecx
  3394. xor r13d,ecx
  3395. add r9d,DWORD[40+rsp]
  3396. mov r15d,r10d
  3397. xor r12d,r8d
  3398. shrd r14d,r14d,11
  3399. xor r15d,r11d
  3400. add r9d,r12d
  3401. shrd r13d,r13d,6
  3402. and edi,r15d
  3403. xor r14d,r10d
  3404. add r9d,r13d
  3405. xor edi,r11d
  3406. shrd r14d,r14d,2
  3407. add ebx,r9d
  3408. add r9d,edi
  3409. mov r13d,ebx
  3410. add r14d,r9d
  3411. shrd r13d,r13d,14
  3412. mov r9d,r14d
  3413. mov r12d,ecx
  3414. shrd r14d,r14d,9
  3415. xor r13d,ebx
  3416. xor r12d,edx
  3417. shrd r13d,r13d,5
  3418. xor r14d,r9d
  3419. and r12d,ebx
  3420. xor r13d,ebx
  3421. add r8d,DWORD[44+rsp]
  3422. mov edi,r9d
  3423. xor r12d,edx
  3424. shrd r14d,r14d,11
  3425. xor edi,r10d
  3426. add r8d,r12d
  3427. shrd r13d,r13d,6
  3428. and r15d,edi
  3429. xor r14d,r9d
  3430. add r8d,r13d
  3431. xor r15d,r10d
  3432. shrd r14d,r14d,2
  3433. add eax,r8d
  3434. add r8d,r15d
  3435. mov r13d,eax
  3436. add r14d,r8d
  3437. shrd r13d,r13d,14
  3438. mov r8d,r14d
  3439. mov r12d,ebx
  3440. shrd r14d,r14d,9
  3441. xor r13d,eax
  3442. xor r12d,ecx
  3443. shrd r13d,r13d,5
  3444. xor r14d,r8d
  3445. and r12d,eax
  3446. xor r13d,eax
  3447. add edx,DWORD[48+rsp]
  3448. mov r15d,r8d
  3449. xor r12d,ecx
  3450. shrd r14d,r14d,11
  3451. xor r15d,r9d
  3452. add edx,r12d
  3453. shrd r13d,r13d,6
  3454. and edi,r15d
  3455. xor r14d,r8d
  3456. add edx,r13d
  3457. xor edi,r9d
  3458. shrd r14d,r14d,2
  3459. add r11d,edx
  3460. add edx,edi
  3461. mov r13d,r11d
  3462. add r14d,edx
  3463. shrd r13d,r13d,14
  3464. mov edx,r14d
  3465. mov r12d,eax
  3466. shrd r14d,r14d,9
  3467. xor r13d,r11d
  3468. xor r12d,ebx
  3469. shrd r13d,r13d,5
  3470. xor r14d,edx
  3471. and r12d,r11d
  3472. xor r13d,r11d
  3473. add ecx,DWORD[52+rsp]
  3474. mov edi,edx
  3475. xor r12d,ebx
  3476. shrd r14d,r14d,11
  3477. xor edi,r8d
  3478. add ecx,r12d
  3479. shrd r13d,r13d,6
  3480. and r15d,edi
  3481. xor r14d,edx
  3482. add ecx,r13d
  3483. xor r15d,r8d
  3484. shrd r14d,r14d,2
  3485. add r10d,ecx
  3486. add ecx,r15d
  3487. mov r13d,r10d
  3488. add r14d,ecx
  3489. shrd r13d,r13d,14
  3490. mov ecx,r14d
  3491. mov r12d,r11d
  3492. shrd r14d,r14d,9
  3493. xor r13d,r10d
  3494. xor r12d,eax
  3495. shrd r13d,r13d,5
  3496. xor r14d,ecx
  3497. and r12d,r10d
  3498. xor r13d,r10d
  3499. add ebx,DWORD[56+rsp]
  3500. mov r15d,ecx
  3501. xor r12d,eax
  3502. shrd r14d,r14d,11
  3503. xor r15d,edx
  3504. add ebx,r12d
  3505. shrd r13d,r13d,6
  3506. and edi,r15d
  3507. xor r14d,ecx
  3508. add ebx,r13d
  3509. xor edi,edx
  3510. shrd r14d,r14d,2
  3511. add r9d,ebx
  3512. add ebx,edi
  3513. mov r13d,r9d
  3514. add r14d,ebx
  3515. shrd r13d,r13d,14
  3516. mov ebx,r14d
  3517. mov r12d,r10d
  3518. shrd r14d,r14d,9
  3519. xor r13d,r9d
  3520. xor r12d,r11d
  3521. shrd r13d,r13d,5
  3522. xor r14d,ebx
  3523. and r12d,r9d
  3524. xor r13d,r9d
  3525. add eax,DWORD[60+rsp]
  3526. mov edi,ebx
  3527. xor r12d,r11d
  3528. shrd r14d,r14d,11
  3529. xor edi,ecx
  3530. add eax,r12d
  3531. shrd r13d,r13d,6
  3532. and r15d,edi
  3533. xor r14d,ebx
  3534. add eax,r13d
  3535. xor r15d,ecx
  3536. shrd r14d,r14d,2
  3537. add r8d,eax
  3538. add eax,r15d
  3539. mov r13d,r8d
  3540. add r14d,eax
  3541. mov rdi,QWORD[((64+0))+rsp]
  3542. mov eax,r14d
  3543. add eax,DWORD[rdi]
  3544. lea rsi,[64+rsi]
  3545. add ebx,DWORD[4+rdi]
  3546. add ecx,DWORD[8+rdi]
  3547. add edx,DWORD[12+rdi]
  3548. add r8d,DWORD[16+rdi]
  3549. add r9d,DWORD[20+rdi]
  3550. add r10d,DWORD[24+rdi]
  3551. add r11d,DWORD[28+rdi]
  3552. cmp rsi,QWORD[((64+16))+rsp]
  3553. mov DWORD[rdi],eax
  3554. mov DWORD[4+rdi],ebx
  3555. mov DWORD[8+rdi],ecx
  3556. mov DWORD[12+rdi],edx
  3557. mov DWORD[16+rdi],r8d
  3558. mov DWORD[20+rdi],r9d
  3559. mov DWORD[24+rdi],r10d
  3560. mov DWORD[28+rdi],r11d
  3561. jb NEAR $L$loop_avx
  3562. mov rsi,QWORD[88+rsp]
  3563. vzeroupper
  3564. movaps xmm6,XMMWORD[((64+32))+rsp]
  3565. movaps xmm7,XMMWORD[((64+48))+rsp]
  3566. movaps xmm8,XMMWORD[((64+64))+rsp]
  3567. movaps xmm9,XMMWORD[((64+80))+rsp]
  3568. mov r15,QWORD[((-48))+rsi]
  3569. mov r14,QWORD[((-40))+rsi]
  3570. mov r13,QWORD[((-32))+rsi]
  3571. mov r12,QWORD[((-24))+rsi]
  3572. mov rbp,QWORD[((-16))+rsi]
  3573. mov rbx,QWORD[((-8))+rsi]
  3574. lea rsp,[rsi]
  3575. $L$epilogue_avx:
  3576. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  3577. mov rsi,QWORD[16+rsp]
  3578. DB 0F3h,0C3h ;repret
  3579. $L$SEH_end_sha256_block_data_order_avx:
  3580. EXTERN __imp_RtlVirtualUnwind
  3581. ALIGN 16
  3582. se_handler:
  3583. push rsi
  3584. push rdi
  3585. push rbx
  3586. push rbp
  3587. push r12
  3588. push r13
  3589. push r14
  3590. push r15
  3591. pushfq
  3592. sub rsp,64
  3593. mov rax,QWORD[120+r8]
  3594. mov rbx,QWORD[248+r8]
  3595. mov rsi,QWORD[8+r9]
  3596. mov r11,QWORD[56+r9]
  3597. mov r10d,DWORD[r11]
  3598. lea r10,[r10*1+rsi]
  3599. cmp rbx,r10
  3600. jb NEAR $L$in_prologue
  3601. mov rax,QWORD[152+r8]
  3602. mov r10d,DWORD[4+r11]
  3603. lea r10,[r10*1+rsi]
  3604. cmp rbx,r10
  3605. jae NEAR $L$in_prologue
  3606. mov rsi,rax
  3607. mov rax,QWORD[((64+24))+rax]
  3608. mov rbx,QWORD[((-8))+rax]
  3609. mov rbp,QWORD[((-16))+rax]
  3610. mov r12,QWORD[((-24))+rax]
  3611. mov r13,QWORD[((-32))+rax]
  3612. mov r14,QWORD[((-40))+rax]
  3613. mov r15,QWORD[((-48))+rax]
  3614. mov QWORD[144+r8],rbx
  3615. mov QWORD[160+r8],rbp
  3616. mov QWORD[216+r8],r12
  3617. mov QWORD[224+r8],r13
  3618. mov QWORD[232+r8],r14
  3619. mov QWORD[240+r8],r15
  3620. lea r10,[$L$epilogue]
  3621. cmp rbx,r10
  3622. jb NEAR $L$in_prologue
  3623. lea rsi,[((64+32))+rsi]
  3624. lea rdi,[512+r8]
  3625. mov ecx,8
  3626. DD 0xa548f3fc
  3627. $L$in_prologue:
  3628. mov rdi,QWORD[8+rax]
  3629. mov rsi,QWORD[16+rax]
  3630. mov QWORD[152+r8],rax
  3631. mov QWORD[168+r8],rsi
  3632. mov QWORD[176+r8],rdi
  3633. mov rdi,QWORD[40+r9]
  3634. mov rsi,r8
  3635. mov ecx,154
  3636. DD 0xa548f3fc
  3637. mov rsi,r9
  3638. xor rcx,rcx
  3639. mov rdx,QWORD[8+rsi]
  3640. mov r8,QWORD[rsi]
  3641. mov r9,QWORD[16+rsi]
  3642. mov r10,QWORD[40+rsi]
  3643. lea r11,[56+rsi]
  3644. lea r12,[24+rsi]
  3645. mov QWORD[32+rsp],r10
  3646. mov QWORD[40+rsp],r11
  3647. mov QWORD[48+rsp],r12
  3648. mov QWORD[56+rsp],rcx
  3649. call QWORD[__imp_RtlVirtualUnwind]
  3650. mov eax,1
  3651. add rsp,64
  3652. popfq
  3653. pop r15
  3654. pop r14
  3655. pop r13
  3656. pop r12
  3657. pop rbp
  3658. pop rbx
  3659. pop rdi
  3660. pop rsi
  3661. DB 0F3h,0C3h ;repret
  3662. section .pdata rdata align=4
  3663. ALIGN 4
  3664. DD $L$SEH_begin_sha256_block_data_order wrt ..imagebase
  3665. DD $L$SEH_end_sha256_block_data_order wrt ..imagebase
  3666. DD $L$SEH_info_sha256_block_data_order wrt ..imagebase
  3667. DD $L$SEH_begin_sha256_block_data_order_ssse3 wrt ..imagebase
  3668. DD $L$SEH_end_sha256_block_data_order_ssse3 wrt ..imagebase
  3669. DD $L$SEH_info_sha256_block_data_order_ssse3 wrt ..imagebase
  3670. DD $L$SEH_begin_sha256_block_data_order_avx wrt ..imagebase
  3671. DD $L$SEH_end_sha256_block_data_order_avx wrt ..imagebase
  3672. DD $L$SEH_info_sha256_block_data_order_avx wrt ..imagebase
  3673. section .xdata rdata align=8
  3674. ALIGN 8
  3675. $L$SEH_info_sha256_block_data_order:
  3676. DB 9,0,0,0
  3677. DD se_handler wrt ..imagebase
  3678. DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase
  3679. $L$SEH_info_sha256_block_data_order_ssse3:
  3680. DB 9,0,0,0
  3681. DD se_handler wrt ..imagebase
  3682. DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase
  3683. $L$SEH_info_sha256_block_data_order_avx:
  3684. DB 9,0,0,0
  3685. DD se_handler wrt ..imagebase
  3686. DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase