chacha-x86.asm 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975
  1. ; This file is generated from a similarly-named Perl script in the BoringSSL
  2. ; source tree. Do not edit by hand.
  3. %ifdef BORINGSSL_PREFIX
  4. %include "boringssl_prefix_symbols_nasm.inc"
  5. %endif
  6. %ifidn __OUTPUT_FORMAT__,obj
  7. section code use32 class=code align=64
  8. %elifidn __OUTPUT_FORMAT__,win32
  9. $@feat.00 equ 1
  10. section .text code align=64
  11. %else
  12. section .text code
  13. %endif
  14. global _ChaCha20_ctr32
  15. align 16
  16. _ChaCha20_ctr32:
  17. L$_ChaCha20_ctr32_begin:
  18. push ebp
  19. push ebx
  20. push esi
  21. push edi
  22. xor eax,eax
  23. cmp eax,DWORD [28+esp]
  24. je NEAR L$000no_data
  25. call L$pic_point
  26. L$pic_point:
  27. pop eax
  28. lea ebp,[_OPENSSL_ia32cap_P]
  29. test DWORD [ebp],16777216
  30. jz NEAR L$001x86
  31. test DWORD [4+ebp],512
  32. jz NEAR L$001x86
  33. jmp NEAR L$ssse3_shortcut
  34. L$001x86:
  35. mov esi,DWORD [32+esp]
  36. mov edi,DWORD [36+esp]
  37. sub esp,132
  38. mov eax,DWORD [esi]
  39. mov ebx,DWORD [4+esi]
  40. mov ecx,DWORD [8+esi]
  41. mov edx,DWORD [12+esi]
  42. mov DWORD [80+esp],eax
  43. mov DWORD [84+esp],ebx
  44. mov DWORD [88+esp],ecx
  45. mov DWORD [92+esp],edx
  46. mov eax,DWORD [16+esi]
  47. mov ebx,DWORD [20+esi]
  48. mov ecx,DWORD [24+esi]
  49. mov edx,DWORD [28+esi]
  50. mov DWORD [96+esp],eax
  51. mov DWORD [100+esp],ebx
  52. mov DWORD [104+esp],ecx
  53. mov DWORD [108+esp],edx
  54. mov eax,DWORD [edi]
  55. mov ebx,DWORD [4+edi]
  56. mov ecx,DWORD [8+edi]
  57. mov edx,DWORD [12+edi]
  58. sub eax,1
  59. mov DWORD [112+esp],eax
  60. mov DWORD [116+esp],ebx
  61. mov DWORD [120+esp],ecx
  62. mov DWORD [124+esp],edx
  63. jmp NEAR L$002entry
  64. align 16
  65. L$003outer_loop:
  66. mov DWORD [156+esp],ebx
  67. mov DWORD [152+esp],eax
  68. mov DWORD [160+esp],ecx
  69. L$002entry:
  70. mov eax,1634760805
  71. mov DWORD [4+esp],857760878
  72. mov DWORD [8+esp],2036477234
  73. mov DWORD [12+esp],1797285236
  74. mov ebx,DWORD [84+esp]
  75. mov ebp,DWORD [88+esp]
  76. mov ecx,DWORD [104+esp]
  77. mov esi,DWORD [108+esp]
  78. mov edx,DWORD [116+esp]
  79. mov edi,DWORD [120+esp]
  80. mov DWORD [20+esp],ebx
  81. mov DWORD [24+esp],ebp
  82. mov DWORD [40+esp],ecx
  83. mov DWORD [44+esp],esi
  84. mov DWORD [52+esp],edx
  85. mov DWORD [56+esp],edi
  86. mov ebx,DWORD [92+esp]
  87. mov edi,DWORD [124+esp]
  88. mov edx,DWORD [112+esp]
  89. mov ebp,DWORD [80+esp]
  90. mov ecx,DWORD [96+esp]
  91. mov esi,DWORD [100+esp]
  92. add edx,1
  93. mov DWORD [28+esp],ebx
  94. mov DWORD [60+esp],edi
  95. mov DWORD [112+esp],edx
  96. mov ebx,10
  97. jmp NEAR L$004loop
  98. align 16
  99. L$004loop:
  100. add eax,ebp
  101. mov DWORD [128+esp],ebx
  102. mov ebx,ebp
  103. xor edx,eax
  104. rol edx,16
  105. add ecx,edx
  106. xor ebx,ecx
  107. mov edi,DWORD [52+esp]
  108. rol ebx,12
  109. mov ebp,DWORD [20+esp]
  110. add eax,ebx
  111. xor edx,eax
  112. mov DWORD [esp],eax
  113. rol edx,8
  114. mov eax,DWORD [4+esp]
  115. add ecx,edx
  116. mov DWORD [48+esp],edx
  117. xor ebx,ecx
  118. add eax,ebp
  119. rol ebx,7
  120. xor edi,eax
  121. mov DWORD [32+esp],ecx
  122. rol edi,16
  123. mov DWORD [16+esp],ebx
  124. add esi,edi
  125. mov ecx,DWORD [40+esp]
  126. xor ebp,esi
  127. mov edx,DWORD [56+esp]
  128. rol ebp,12
  129. mov ebx,DWORD [24+esp]
  130. add eax,ebp
  131. xor edi,eax
  132. mov DWORD [4+esp],eax
  133. rol edi,8
  134. mov eax,DWORD [8+esp]
  135. add esi,edi
  136. mov DWORD [52+esp],edi
  137. xor ebp,esi
  138. add eax,ebx
  139. rol ebp,7
  140. xor edx,eax
  141. mov DWORD [36+esp],esi
  142. rol edx,16
  143. mov DWORD [20+esp],ebp
  144. add ecx,edx
  145. mov esi,DWORD [44+esp]
  146. xor ebx,ecx
  147. mov edi,DWORD [60+esp]
  148. rol ebx,12
  149. mov ebp,DWORD [28+esp]
  150. add eax,ebx
  151. xor edx,eax
  152. mov DWORD [8+esp],eax
  153. rol edx,8
  154. mov eax,DWORD [12+esp]
  155. add ecx,edx
  156. mov DWORD [56+esp],edx
  157. xor ebx,ecx
  158. add eax,ebp
  159. rol ebx,7
  160. xor edi,eax
  161. rol edi,16
  162. mov DWORD [24+esp],ebx
  163. add esi,edi
  164. xor ebp,esi
  165. rol ebp,12
  166. mov ebx,DWORD [20+esp]
  167. add eax,ebp
  168. xor edi,eax
  169. mov DWORD [12+esp],eax
  170. rol edi,8
  171. mov eax,DWORD [esp]
  172. add esi,edi
  173. mov edx,edi
  174. xor ebp,esi
  175. add eax,ebx
  176. rol ebp,7
  177. xor edx,eax
  178. rol edx,16
  179. mov DWORD [28+esp],ebp
  180. add ecx,edx
  181. xor ebx,ecx
  182. mov edi,DWORD [48+esp]
  183. rol ebx,12
  184. mov ebp,DWORD [24+esp]
  185. add eax,ebx
  186. xor edx,eax
  187. mov DWORD [esp],eax
  188. rol edx,8
  189. mov eax,DWORD [4+esp]
  190. add ecx,edx
  191. mov DWORD [60+esp],edx
  192. xor ebx,ecx
  193. add eax,ebp
  194. rol ebx,7
  195. xor edi,eax
  196. mov DWORD [40+esp],ecx
  197. rol edi,16
  198. mov DWORD [20+esp],ebx
  199. add esi,edi
  200. mov ecx,DWORD [32+esp]
  201. xor ebp,esi
  202. mov edx,DWORD [52+esp]
  203. rol ebp,12
  204. mov ebx,DWORD [28+esp]
  205. add eax,ebp
  206. xor edi,eax
  207. mov DWORD [4+esp],eax
  208. rol edi,8
  209. mov eax,DWORD [8+esp]
  210. add esi,edi
  211. mov DWORD [48+esp],edi
  212. xor ebp,esi
  213. add eax,ebx
  214. rol ebp,7
  215. xor edx,eax
  216. mov DWORD [44+esp],esi
  217. rol edx,16
  218. mov DWORD [24+esp],ebp
  219. add ecx,edx
  220. mov esi,DWORD [36+esp]
  221. xor ebx,ecx
  222. mov edi,DWORD [56+esp]
  223. rol ebx,12
  224. mov ebp,DWORD [16+esp]
  225. add eax,ebx
  226. xor edx,eax
  227. mov DWORD [8+esp],eax
  228. rol edx,8
  229. mov eax,DWORD [12+esp]
  230. add ecx,edx
  231. mov DWORD [52+esp],edx
  232. xor ebx,ecx
  233. add eax,ebp
  234. rol ebx,7
  235. xor edi,eax
  236. rol edi,16
  237. mov DWORD [28+esp],ebx
  238. add esi,edi
  239. xor ebp,esi
  240. mov edx,DWORD [48+esp]
  241. rol ebp,12
  242. mov ebx,DWORD [128+esp]
  243. add eax,ebp
  244. xor edi,eax
  245. mov DWORD [12+esp],eax
  246. rol edi,8
  247. mov eax,DWORD [esp]
  248. add esi,edi
  249. mov DWORD [56+esp],edi
  250. xor ebp,esi
  251. rol ebp,7
  252. dec ebx
  253. jnz NEAR L$004loop
  254. mov ebx,DWORD [160+esp]
  255. add eax,1634760805
  256. add ebp,DWORD [80+esp]
  257. add ecx,DWORD [96+esp]
  258. add esi,DWORD [100+esp]
  259. cmp ebx,64
  260. jb NEAR L$005tail
  261. mov ebx,DWORD [156+esp]
  262. add edx,DWORD [112+esp]
  263. add edi,DWORD [120+esp]
  264. xor eax,DWORD [ebx]
  265. xor ebp,DWORD [16+ebx]
  266. mov DWORD [esp],eax
  267. mov eax,DWORD [152+esp]
  268. xor ecx,DWORD [32+ebx]
  269. xor esi,DWORD [36+ebx]
  270. xor edx,DWORD [48+ebx]
  271. xor edi,DWORD [56+ebx]
  272. mov DWORD [16+eax],ebp
  273. mov DWORD [32+eax],ecx
  274. mov DWORD [36+eax],esi
  275. mov DWORD [48+eax],edx
  276. mov DWORD [56+eax],edi
  277. mov ebp,DWORD [4+esp]
  278. mov ecx,DWORD [8+esp]
  279. mov esi,DWORD [12+esp]
  280. mov edx,DWORD [20+esp]
  281. mov edi,DWORD [24+esp]
  282. add ebp,857760878
  283. add ecx,2036477234
  284. add esi,1797285236
  285. add edx,DWORD [84+esp]
  286. add edi,DWORD [88+esp]
  287. xor ebp,DWORD [4+ebx]
  288. xor ecx,DWORD [8+ebx]
  289. xor esi,DWORD [12+ebx]
  290. xor edx,DWORD [20+ebx]
  291. xor edi,DWORD [24+ebx]
  292. mov DWORD [4+eax],ebp
  293. mov DWORD [8+eax],ecx
  294. mov DWORD [12+eax],esi
  295. mov DWORD [20+eax],edx
  296. mov DWORD [24+eax],edi
  297. mov ebp,DWORD [28+esp]
  298. mov ecx,DWORD [40+esp]
  299. mov esi,DWORD [44+esp]
  300. mov edx,DWORD [52+esp]
  301. mov edi,DWORD [60+esp]
  302. add ebp,DWORD [92+esp]
  303. add ecx,DWORD [104+esp]
  304. add esi,DWORD [108+esp]
  305. add edx,DWORD [116+esp]
  306. add edi,DWORD [124+esp]
  307. xor ebp,DWORD [28+ebx]
  308. xor ecx,DWORD [40+ebx]
  309. xor esi,DWORD [44+ebx]
  310. xor edx,DWORD [52+ebx]
  311. xor edi,DWORD [60+ebx]
  312. lea ebx,[64+ebx]
  313. mov DWORD [28+eax],ebp
  314. mov ebp,DWORD [esp]
  315. mov DWORD [40+eax],ecx
  316. mov ecx,DWORD [160+esp]
  317. mov DWORD [44+eax],esi
  318. mov DWORD [52+eax],edx
  319. mov DWORD [60+eax],edi
  320. mov DWORD [eax],ebp
  321. lea eax,[64+eax]
  322. sub ecx,64
  323. jnz NEAR L$003outer_loop
  324. jmp NEAR L$006done
  325. L$005tail:
  326. add edx,DWORD [112+esp]
  327. add edi,DWORD [120+esp]
  328. mov DWORD [esp],eax
  329. mov DWORD [16+esp],ebp
  330. mov DWORD [32+esp],ecx
  331. mov DWORD [36+esp],esi
  332. mov DWORD [48+esp],edx
  333. mov DWORD [56+esp],edi
  334. mov ebp,DWORD [4+esp]
  335. mov ecx,DWORD [8+esp]
  336. mov esi,DWORD [12+esp]
  337. mov edx,DWORD [20+esp]
  338. mov edi,DWORD [24+esp]
  339. add ebp,857760878
  340. add ecx,2036477234
  341. add esi,1797285236
  342. add edx,DWORD [84+esp]
  343. add edi,DWORD [88+esp]
  344. mov DWORD [4+esp],ebp
  345. mov DWORD [8+esp],ecx
  346. mov DWORD [12+esp],esi
  347. mov DWORD [20+esp],edx
  348. mov DWORD [24+esp],edi
  349. mov ebp,DWORD [28+esp]
  350. mov ecx,DWORD [40+esp]
  351. mov esi,DWORD [44+esp]
  352. mov edx,DWORD [52+esp]
  353. mov edi,DWORD [60+esp]
  354. add ebp,DWORD [92+esp]
  355. add ecx,DWORD [104+esp]
  356. add esi,DWORD [108+esp]
  357. add edx,DWORD [116+esp]
  358. add edi,DWORD [124+esp]
  359. mov DWORD [28+esp],ebp
  360. mov ebp,DWORD [156+esp]
  361. mov DWORD [40+esp],ecx
  362. mov ecx,DWORD [152+esp]
  363. mov DWORD [44+esp],esi
  364. xor esi,esi
  365. mov DWORD [52+esp],edx
  366. mov DWORD [60+esp],edi
  367. xor eax,eax
  368. xor edx,edx
  369. L$007tail_loop:
  370. mov al,BYTE [ebp*1+esi]
  371. mov dl,BYTE [esi*1+esp]
  372. lea esi,[1+esi]
  373. xor al,dl
  374. mov BYTE [esi*1+ecx-1],al
  375. dec ebx
  376. jnz NEAR L$007tail_loop
  377. L$006done:
  378. add esp,132
  379. L$000no_data:
  380. pop edi
  381. pop esi
  382. pop ebx
  383. pop ebp
  384. ret
  385. global _ChaCha20_ssse3
  386. align 16
  387. _ChaCha20_ssse3:
  388. L$_ChaCha20_ssse3_begin:
  389. push ebp
  390. push ebx
  391. push esi
  392. push edi
  393. L$ssse3_shortcut:
  394. mov edi,DWORD [20+esp]
  395. mov esi,DWORD [24+esp]
  396. mov ecx,DWORD [28+esp]
  397. mov edx,DWORD [32+esp]
  398. mov ebx,DWORD [36+esp]
  399. mov ebp,esp
  400. sub esp,524
  401. and esp,-64
  402. mov DWORD [512+esp],ebp
  403. lea eax,[(L$ssse3_data-L$pic_point)+eax]
  404. movdqu xmm3,[ebx]
  405. cmp ecx,256
  406. jb NEAR L$0081x
  407. mov DWORD [516+esp],edx
  408. mov DWORD [520+esp],ebx
  409. sub ecx,256
  410. lea ebp,[384+esp]
  411. movdqu xmm7,[edx]
  412. pshufd xmm0,xmm3,0
  413. pshufd xmm1,xmm3,85
  414. pshufd xmm2,xmm3,170
  415. pshufd xmm3,xmm3,255
  416. paddd xmm0,[48+eax]
  417. pshufd xmm4,xmm7,0
  418. pshufd xmm5,xmm7,85
  419. psubd xmm0,[64+eax]
  420. pshufd xmm6,xmm7,170
  421. pshufd xmm7,xmm7,255
  422. movdqa [64+ebp],xmm0
  423. movdqa [80+ebp],xmm1
  424. movdqa [96+ebp],xmm2
  425. movdqa [112+ebp],xmm3
  426. movdqu xmm3,[16+edx]
  427. movdqa [ebp-64],xmm4
  428. movdqa [ebp-48],xmm5
  429. movdqa [ebp-32],xmm6
  430. movdqa [ebp-16],xmm7
  431. movdqa xmm7,[32+eax]
  432. lea ebx,[128+esp]
  433. pshufd xmm0,xmm3,0
  434. pshufd xmm1,xmm3,85
  435. pshufd xmm2,xmm3,170
  436. pshufd xmm3,xmm3,255
  437. pshufd xmm4,xmm7,0
  438. pshufd xmm5,xmm7,85
  439. pshufd xmm6,xmm7,170
  440. pshufd xmm7,xmm7,255
  441. movdqa [ebp],xmm0
  442. movdqa [16+ebp],xmm1
  443. movdqa [32+ebp],xmm2
  444. movdqa [48+ebp],xmm3
  445. movdqa [ebp-128],xmm4
  446. movdqa [ebp-112],xmm5
  447. movdqa [ebp-96],xmm6
  448. movdqa [ebp-80],xmm7
  449. lea esi,[128+esi]
  450. lea edi,[128+edi]
  451. jmp NEAR L$009outer_loop
  452. align 16
  453. L$009outer_loop:
  454. movdqa xmm1,[ebp-112]
  455. movdqa xmm2,[ebp-96]
  456. movdqa xmm3,[ebp-80]
  457. movdqa xmm5,[ebp-48]
  458. movdqa xmm6,[ebp-32]
  459. movdqa xmm7,[ebp-16]
  460. movdqa [ebx-112],xmm1
  461. movdqa [ebx-96],xmm2
  462. movdqa [ebx-80],xmm3
  463. movdqa [ebx-48],xmm5
  464. movdqa [ebx-32],xmm6
  465. movdqa [ebx-16],xmm7
  466. movdqa xmm2,[32+ebp]
  467. movdqa xmm3,[48+ebp]
  468. movdqa xmm4,[64+ebp]
  469. movdqa xmm5,[80+ebp]
  470. movdqa xmm6,[96+ebp]
  471. movdqa xmm7,[112+ebp]
  472. paddd xmm4,[64+eax]
  473. movdqa [32+ebx],xmm2
  474. movdqa [48+ebx],xmm3
  475. movdqa [64+ebx],xmm4
  476. movdqa [80+ebx],xmm5
  477. movdqa [96+ebx],xmm6
  478. movdqa [112+ebx],xmm7
  479. movdqa [64+ebp],xmm4
  480. movdqa xmm0,[ebp-128]
  481. movdqa xmm6,xmm4
  482. movdqa xmm3,[ebp-64]
  483. movdqa xmm4,[ebp]
  484. movdqa xmm5,[16+ebp]
  485. mov edx,10
  486. nop
  487. align 16
  488. L$010loop:
  489. paddd xmm0,xmm3
  490. movdqa xmm2,xmm3
  491. pxor xmm6,xmm0
  492. pshufb xmm6,[eax]
  493. paddd xmm4,xmm6
  494. pxor xmm2,xmm4
  495. movdqa xmm3,[ebx-48]
  496. movdqa xmm1,xmm2
  497. pslld xmm2,12
  498. psrld xmm1,20
  499. por xmm2,xmm1
  500. movdqa xmm1,[ebx-112]
  501. paddd xmm0,xmm2
  502. movdqa xmm7,[80+ebx]
  503. pxor xmm6,xmm0
  504. movdqa [ebx-128],xmm0
  505. pshufb xmm6,[16+eax]
  506. paddd xmm4,xmm6
  507. movdqa [64+ebx],xmm6
  508. pxor xmm2,xmm4
  509. paddd xmm1,xmm3
  510. movdqa xmm0,xmm2
  511. pslld xmm2,7
  512. psrld xmm0,25
  513. pxor xmm7,xmm1
  514. por xmm2,xmm0
  515. movdqa [ebx],xmm4
  516. pshufb xmm7,[eax]
  517. movdqa [ebx-64],xmm2
  518. paddd xmm5,xmm7
  519. movdqa xmm4,[32+ebx]
  520. pxor xmm3,xmm5
  521. movdqa xmm2,[ebx-32]
  522. movdqa xmm0,xmm3
  523. pslld xmm3,12
  524. psrld xmm0,20
  525. por xmm3,xmm0
  526. movdqa xmm0,[ebx-96]
  527. paddd xmm1,xmm3
  528. movdqa xmm6,[96+ebx]
  529. pxor xmm7,xmm1
  530. movdqa [ebx-112],xmm1
  531. pshufb xmm7,[16+eax]
  532. paddd xmm5,xmm7
  533. movdqa [80+ebx],xmm7
  534. pxor xmm3,xmm5
  535. paddd xmm0,xmm2
  536. movdqa xmm1,xmm3
  537. pslld xmm3,7
  538. psrld xmm1,25
  539. pxor xmm6,xmm0
  540. por xmm3,xmm1
  541. movdqa [16+ebx],xmm5
  542. pshufb xmm6,[eax]
  543. movdqa [ebx-48],xmm3
  544. paddd xmm4,xmm6
  545. movdqa xmm5,[48+ebx]
  546. pxor xmm2,xmm4
  547. movdqa xmm3,[ebx-16]
  548. movdqa xmm1,xmm2
  549. pslld xmm2,12
  550. psrld xmm1,20
  551. por xmm2,xmm1
  552. movdqa xmm1,[ebx-80]
  553. paddd xmm0,xmm2
  554. movdqa xmm7,[112+ebx]
  555. pxor xmm6,xmm0
  556. movdqa [ebx-96],xmm0
  557. pshufb xmm6,[16+eax]
  558. paddd xmm4,xmm6
  559. movdqa [96+ebx],xmm6
  560. pxor xmm2,xmm4
  561. paddd xmm1,xmm3
  562. movdqa xmm0,xmm2
  563. pslld xmm2,7
  564. psrld xmm0,25
  565. pxor xmm7,xmm1
  566. por xmm2,xmm0
  567. pshufb xmm7,[eax]
  568. movdqa [ebx-32],xmm2
  569. paddd xmm5,xmm7
  570. pxor xmm3,xmm5
  571. movdqa xmm2,[ebx-48]
  572. movdqa xmm0,xmm3
  573. pslld xmm3,12
  574. psrld xmm0,20
  575. por xmm3,xmm0
  576. movdqa xmm0,[ebx-128]
  577. paddd xmm1,xmm3
  578. pxor xmm7,xmm1
  579. movdqa [ebx-80],xmm1
  580. pshufb xmm7,[16+eax]
  581. paddd xmm5,xmm7
  582. movdqa xmm6,xmm7
  583. pxor xmm3,xmm5
  584. paddd xmm0,xmm2
  585. movdqa xmm1,xmm3
  586. pslld xmm3,7
  587. psrld xmm1,25
  588. pxor xmm6,xmm0
  589. por xmm3,xmm1
  590. pshufb xmm6,[eax]
  591. movdqa [ebx-16],xmm3
  592. paddd xmm4,xmm6
  593. pxor xmm2,xmm4
  594. movdqa xmm3,[ebx-32]
  595. movdqa xmm1,xmm2
  596. pslld xmm2,12
  597. psrld xmm1,20
  598. por xmm2,xmm1
  599. movdqa xmm1,[ebx-112]
  600. paddd xmm0,xmm2
  601. movdqa xmm7,[64+ebx]
  602. pxor xmm6,xmm0
  603. movdqa [ebx-128],xmm0
  604. pshufb xmm6,[16+eax]
  605. paddd xmm4,xmm6
  606. movdqa [112+ebx],xmm6
  607. pxor xmm2,xmm4
  608. paddd xmm1,xmm3
  609. movdqa xmm0,xmm2
  610. pslld xmm2,7
  611. psrld xmm0,25
  612. pxor xmm7,xmm1
  613. por xmm2,xmm0
  614. movdqa [32+ebx],xmm4
  615. pshufb xmm7,[eax]
  616. movdqa [ebx-48],xmm2
  617. paddd xmm5,xmm7
  618. movdqa xmm4,[ebx]
  619. pxor xmm3,xmm5
  620. movdqa xmm2,[ebx-16]
  621. movdqa xmm0,xmm3
  622. pslld xmm3,12
  623. psrld xmm0,20
  624. por xmm3,xmm0
  625. movdqa xmm0,[ebx-96]
  626. paddd xmm1,xmm3
  627. movdqa xmm6,[80+ebx]
  628. pxor xmm7,xmm1
  629. movdqa [ebx-112],xmm1
  630. pshufb xmm7,[16+eax]
  631. paddd xmm5,xmm7
  632. movdqa [64+ebx],xmm7
  633. pxor xmm3,xmm5
  634. paddd xmm0,xmm2
  635. movdqa xmm1,xmm3
  636. pslld xmm3,7
  637. psrld xmm1,25
  638. pxor xmm6,xmm0
  639. por xmm3,xmm1
  640. movdqa [48+ebx],xmm5
  641. pshufb xmm6,[eax]
  642. movdqa [ebx-32],xmm3
  643. paddd xmm4,xmm6
  644. movdqa xmm5,[16+ebx]
  645. pxor xmm2,xmm4
  646. movdqa xmm3,[ebx-64]
  647. movdqa xmm1,xmm2
  648. pslld xmm2,12
  649. psrld xmm1,20
  650. por xmm2,xmm1
  651. movdqa xmm1,[ebx-80]
  652. paddd xmm0,xmm2
  653. movdqa xmm7,[96+ebx]
  654. pxor xmm6,xmm0
  655. movdqa [ebx-96],xmm0
  656. pshufb xmm6,[16+eax]
  657. paddd xmm4,xmm6
  658. movdqa [80+ebx],xmm6
  659. pxor xmm2,xmm4
  660. paddd xmm1,xmm3
  661. movdqa xmm0,xmm2
  662. pslld xmm2,7
  663. psrld xmm0,25
  664. pxor xmm7,xmm1
  665. por xmm2,xmm0
  666. pshufb xmm7,[eax]
  667. movdqa [ebx-16],xmm2
  668. paddd xmm5,xmm7
  669. pxor xmm3,xmm5
  670. movdqa xmm0,xmm3
  671. pslld xmm3,12
  672. psrld xmm0,20
  673. por xmm3,xmm0
  674. movdqa xmm0,[ebx-128]
  675. paddd xmm1,xmm3
  676. movdqa xmm6,[64+ebx]
  677. pxor xmm7,xmm1
  678. movdqa [ebx-80],xmm1
  679. pshufb xmm7,[16+eax]
  680. paddd xmm5,xmm7
  681. movdqa [96+ebx],xmm7
  682. pxor xmm3,xmm5
  683. movdqa xmm1,xmm3
  684. pslld xmm3,7
  685. psrld xmm1,25
  686. por xmm3,xmm1
  687. dec edx
  688. jnz NEAR L$010loop
  689. movdqa [ebx-64],xmm3
  690. movdqa [ebx],xmm4
  691. movdqa [16+ebx],xmm5
  692. movdqa [64+ebx],xmm6
  693. movdqa [96+ebx],xmm7
  694. movdqa xmm1,[ebx-112]
  695. movdqa xmm2,[ebx-96]
  696. movdqa xmm3,[ebx-80]
  697. paddd xmm0,[ebp-128]
  698. paddd xmm1,[ebp-112]
  699. paddd xmm2,[ebp-96]
  700. paddd xmm3,[ebp-80]
  701. movdqa xmm6,xmm0
  702. punpckldq xmm0,xmm1
  703. movdqa xmm7,xmm2
  704. punpckldq xmm2,xmm3
  705. punpckhdq xmm6,xmm1
  706. punpckhdq xmm7,xmm3
  707. movdqa xmm1,xmm0
  708. punpcklqdq xmm0,xmm2
  709. movdqa xmm3,xmm6
  710. punpcklqdq xmm6,xmm7
  711. punpckhqdq xmm1,xmm2
  712. punpckhqdq xmm3,xmm7
  713. movdqu xmm4,[esi-128]
  714. movdqu xmm5,[esi-64]
  715. movdqu xmm2,[esi]
  716. movdqu xmm7,[64+esi]
  717. lea esi,[16+esi]
  718. pxor xmm4,xmm0
  719. movdqa xmm0,[ebx-64]
  720. pxor xmm5,xmm1
  721. movdqa xmm1,[ebx-48]
  722. pxor xmm6,xmm2
  723. movdqa xmm2,[ebx-32]
  724. pxor xmm7,xmm3
  725. movdqa xmm3,[ebx-16]
  726. movdqu [edi-128],xmm4
  727. movdqu [edi-64],xmm5
  728. movdqu [edi],xmm6
  729. movdqu [64+edi],xmm7
  730. lea edi,[16+edi]
  731. paddd xmm0,[ebp-64]
  732. paddd xmm1,[ebp-48]
  733. paddd xmm2,[ebp-32]
  734. paddd xmm3,[ebp-16]
  735. movdqa xmm6,xmm0
  736. punpckldq xmm0,xmm1
  737. movdqa xmm7,xmm2
  738. punpckldq xmm2,xmm3
  739. punpckhdq xmm6,xmm1
  740. punpckhdq xmm7,xmm3
  741. movdqa xmm1,xmm0
  742. punpcklqdq xmm0,xmm2
  743. movdqa xmm3,xmm6
  744. punpcklqdq xmm6,xmm7
  745. punpckhqdq xmm1,xmm2
  746. punpckhqdq xmm3,xmm7
  747. movdqu xmm4,[esi-128]
  748. movdqu xmm5,[esi-64]
  749. movdqu xmm2,[esi]
  750. movdqu xmm7,[64+esi]
  751. lea esi,[16+esi]
  752. pxor xmm4,xmm0
  753. movdqa xmm0,[ebx]
  754. pxor xmm5,xmm1
  755. movdqa xmm1,[16+ebx]
  756. pxor xmm6,xmm2
  757. movdqa xmm2,[32+ebx]
  758. pxor xmm7,xmm3
  759. movdqa xmm3,[48+ebx]
  760. movdqu [edi-128],xmm4
  761. movdqu [edi-64],xmm5
  762. movdqu [edi],xmm6
  763. movdqu [64+edi],xmm7
  764. lea edi,[16+edi]
  765. paddd xmm0,[ebp]
  766. paddd xmm1,[16+ebp]
  767. paddd xmm2,[32+ebp]
  768. paddd xmm3,[48+ebp]
  769. movdqa xmm6,xmm0
  770. punpckldq xmm0,xmm1
  771. movdqa xmm7,xmm2
  772. punpckldq xmm2,xmm3
  773. punpckhdq xmm6,xmm1
  774. punpckhdq xmm7,xmm3
  775. movdqa xmm1,xmm0
  776. punpcklqdq xmm0,xmm2
  777. movdqa xmm3,xmm6
  778. punpcklqdq xmm6,xmm7
  779. punpckhqdq xmm1,xmm2
  780. punpckhqdq xmm3,xmm7
  781. movdqu xmm4,[esi-128]
  782. movdqu xmm5,[esi-64]
  783. movdqu xmm2,[esi]
  784. movdqu xmm7,[64+esi]
  785. lea esi,[16+esi]
  786. pxor xmm4,xmm0
  787. movdqa xmm0,[64+ebx]
  788. pxor xmm5,xmm1
  789. movdqa xmm1,[80+ebx]
  790. pxor xmm6,xmm2
  791. movdqa xmm2,[96+ebx]
  792. pxor xmm7,xmm3
  793. movdqa xmm3,[112+ebx]
  794. movdqu [edi-128],xmm4
  795. movdqu [edi-64],xmm5
  796. movdqu [edi],xmm6
  797. movdqu [64+edi],xmm7
  798. lea edi,[16+edi]
  799. paddd xmm0,[64+ebp]
  800. paddd xmm1,[80+ebp]
  801. paddd xmm2,[96+ebp]
  802. paddd xmm3,[112+ebp]
  803. movdqa xmm6,xmm0
  804. punpckldq xmm0,xmm1
  805. movdqa xmm7,xmm2
  806. punpckldq xmm2,xmm3
  807. punpckhdq xmm6,xmm1
  808. punpckhdq xmm7,xmm3
  809. movdqa xmm1,xmm0
  810. punpcklqdq xmm0,xmm2
  811. movdqa xmm3,xmm6
  812. punpcklqdq xmm6,xmm7
  813. punpckhqdq xmm1,xmm2
  814. punpckhqdq xmm3,xmm7
  815. movdqu xmm4,[esi-128]
  816. movdqu xmm5,[esi-64]
  817. movdqu xmm2,[esi]
  818. movdqu xmm7,[64+esi]
  819. lea esi,[208+esi]
  820. pxor xmm4,xmm0
  821. pxor xmm5,xmm1
  822. pxor xmm6,xmm2
  823. pxor xmm7,xmm3
  824. movdqu [edi-128],xmm4
  825. movdqu [edi-64],xmm5
  826. movdqu [edi],xmm6
  827. movdqu [64+edi],xmm7
  828. lea edi,[208+edi]
  829. sub ecx,256
  830. jnc NEAR L$009outer_loop
  831. add ecx,256
  832. jz NEAR L$011done
  833. mov ebx,DWORD [520+esp]
  834. lea esi,[esi-128]
  835. mov edx,DWORD [516+esp]
  836. lea edi,[edi-128]
  837. movd xmm2,DWORD [64+ebp]
  838. movdqu xmm3,[ebx]
  839. paddd xmm2,[96+eax]
  840. pand xmm3,[112+eax]
  841. por xmm3,xmm2
  842. L$0081x:
  843. movdqa xmm0,[32+eax]
  844. movdqu xmm1,[edx]
  845. movdqu xmm2,[16+edx]
  846. movdqa xmm6,[eax]
  847. movdqa xmm7,[16+eax]
  848. mov DWORD [48+esp],ebp
  849. movdqa [esp],xmm0
  850. movdqa [16+esp],xmm1
  851. movdqa [32+esp],xmm2
  852. movdqa [48+esp],xmm3
  853. mov edx,10
  854. jmp NEAR L$012loop1x
  855. align 16
  856. L$013outer1x:
  857. movdqa xmm3,[80+eax]
  858. movdqa xmm0,[esp]
  859. movdqa xmm1,[16+esp]
  860. movdqa xmm2,[32+esp]
  861. paddd xmm3,[48+esp]
  862. mov edx,10
  863. movdqa [48+esp],xmm3
  864. jmp NEAR L$012loop1x
  865. align 16
  866. L$012loop1x:
  867. paddd xmm0,xmm1
  868. pxor xmm3,xmm0
  869. db 102,15,56,0,222
  870. paddd xmm2,xmm3
  871. pxor xmm1,xmm2
  872. movdqa xmm4,xmm1
  873. psrld xmm1,20
  874. pslld xmm4,12
  875. por xmm1,xmm4
  876. paddd xmm0,xmm1
  877. pxor xmm3,xmm0
  878. db 102,15,56,0,223
  879. paddd xmm2,xmm3
  880. pxor xmm1,xmm2
  881. movdqa xmm4,xmm1
  882. psrld xmm1,25
  883. pslld xmm4,7
  884. por xmm1,xmm4
  885. pshufd xmm2,xmm2,78
  886. pshufd xmm1,xmm1,57
  887. pshufd xmm3,xmm3,147
  888. nop
  889. paddd xmm0,xmm1
  890. pxor xmm3,xmm0
  891. db 102,15,56,0,222
  892. paddd xmm2,xmm3
  893. pxor xmm1,xmm2
  894. movdqa xmm4,xmm1
  895. psrld xmm1,20
  896. pslld xmm4,12
  897. por xmm1,xmm4
  898. paddd xmm0,xmm1
  899. pxor xmm3,xmm0
  900. db 102,15,56,0,223
  901. paddd xmm2,xmm3
  902. pxor xmm1,xmm2
  903. movdqa xmm4,xmm1
  904. psrld xmm1,25
  905. pslld xmm4,7
  906. por xmm1,xmm4
  907. pshufd xmm2,xmm2,78
  908. pshufd xmm1,xmm1,147
  909. pshufd xmm3,xmm3,57
  910. dec edx
  911. jnz NEAR L$012loop1x
  912. paddd xmm0,[esp]
  913. paddd xmm1,[16+esp]
  914. paddd xmm2,[32+esp]
  915. paddd xmm3,[48+esp]
  916. cmp ecx,64
  917. jb NEAR L$014tail
  918. movdqu xmm4,[esi]
  919. movdqu xmm5,[16+esi]
  920. pxor xmm0,xmm4
  921. movdqu xmm4,[32+esi]
  922. pxor xmm1,xmm5
  923. movdqu xmm5,[48+esi]
  924. pxor xmm2,xmm4
  925. pxor xmm3,xmm5
  926. lea esi,[64+esi]
  927. movdqu [edi],xmm0
  928. movdqu [16+edi],xmm1
  929. movdqu [32+edi],xmm2
  930. movdqu [48+edi],xmm3
  931. lea edi,[64+edi]
  932. sub ecx,64
  933. jnz NEAR L$013outer1x
  934. jmp NEAR L$011done
  935. L$014tail:
  936. movdqa [esp],xmm0
  937. movdqa [16+esp],xmm1
  938. movdqa [32+esp],xmm2
  939. movdqa [48+esp],xmm3
  940. xor eax,eax
  941. xor edx,edx
  942. xor ebp,ebp
  943. L$015tail_loop:
  944. mov al,BYTE [ebp*1+esp]
  945. mov dl,BYTE [ebp*1+esi]
  946. lea ebp,[1+ebp]
  947. xor al,dl
  948. mov BYTE [ebp*1+edi-1],al
  949. dec ecx
  950. jnz NEAR L$015tail_loop
  951. L$011done:
  952. mov esp,DWORD [512+esp]
  953. pop edi
  954. pop esi
  955. pop ebx
  956. pop ebp
  957. ret
  958. align 64
  959. L$ssse3_data:
  960. db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
  961. db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
  962. dd 1634760805,857760878,2036477234,1797285236
  963. dd 0,1,2,3
  964. dd 4,4,4,4
  965. dd 1,0,0,0
  966. dd 4,0,0,0
  967. dd 0,-1,-1,-1
  968. align 64
  969. db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
  970. db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
  971. db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
  972. db 114,103,62,0
  973. segment .bss
  974. common _OPENSSL_ia32cap_P 16