chacha-x86.S 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__i386__)
  4. #if defined(BORINGSSL_PREFIX)
  5. #include <boringssl_prefix_symbols_asm.h>
  6. #endif
  7. .text
  8. .globl _ChaCha20_ctr32
  9. .private_extern _ChaCha20_ctr32
  10. .align 4
  11. _ChaCha20_ctr32:
  12. L_ChaCha20_ctr32_begin:
  13. pushl %ebp
  14. pushl %ebx
  15. pushl %esi
  16. pushl %edi
  17. xorl %eax,%eax
  18. cmpl 28(%esp),%eax
  19. je L000no_data
  20. call Lpic_point
  21. Lpic_point:
  22. popl %eax
  23. movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp
  24. testl $16777216,(%ebp)
  25. jz L001x86
  26. testl $512,4(%ebp)
  27. jz L001x86
  28. jmp Lssse3_shortcut
  29. L001x86:
  30. movl 32(%esp),%esi
  31. movl 36(%esp),%edi
  32. subl $132,%esp
  33. movl (%esi),%eax
  34. movl 4(%esi),%ebx
  35. movl 8(%esi),%ecx
  36. movl 12(%esi),%edx
  37. movl %eax,80(%esp)
  38. movl %ebx,84(%esp)
  39. movl %ecx,88(%esp)
  40. movl %edx,92(%esp)
  41. movl 16(%esi),%eax
  42. movl 20(%esi),%ebx
  43. movl 24(%esi),%ecx
  44. movl 28(%esi),%edx
  45. movl %eax,96(%esp)
  46. movl %ebx,100(%esp)
  47. movl %ecx,104(%esp)
  48. movl %edx,108(%esp)
  49. movl (%edi),%eax
  50. movl 4(%edi),%ebx
  51. movl 8(%edi),%ecx
  52. movl 12(%edi),%edx
  53. subl $1,%eax
  54. movl %eax,112(%esp)
  55. movl %ebx,116(%esp)
  56. movl %ecx,120(%esp)
  57. movl %edx,124(%esp)
  58. jmp L002entry
  59. .align 4,0x90
  60. L003outer_loop:
  61. movl %ebx,156(%esp)
  62. movl %eax,152(%esp)
  63. movl %ecx,160(%esp)
  64. L002entry:
  65. movl $1634760805,%eax
  66. movl $857760878,4(%esp)
  67. movl $2036477234,8(%esp)
  68. movl $1797285236,12(%esp)
  69. movl 84(%esp),%ebx
  70. movl 88(%esp),%ebp
  71. movl 104(%esp),%ecx
  72. movl 108(%esp),%esi
  73. movl 116(%esp),%edx
  74. movl 120(%esp),%edi
  75. movl %ebx,20(%esp)
  76. movl %ebp,24(%esp)
  77. movl %ecx,40(%esp)
  78. movl %esi,44(%esp)
  79. movl %edx,52(%esp)
  80. movl %edi,56(%esp)
  81. movl 92(%esp),%ebx
  82. movl 124(%esp),%edi
  83. movl 112(%esp),%edx
  84. movl 80(%esp),%ebp
  85. movl 96(%esp),%ecx
  86. movl 100(%esp),%esi
  87. addl $1,%edx
  88. movl %ebx,28(%esp)
  89. movl %edi,60(%esp)
  90. movl %edx,112(%esp)
  91. movl $10,%ebx
  92. jmp L004loop
  93. .align 4,0x90
  94. L004loop:
  95. addl %ebp,%eax
  96. movl %ebx,128(%esp)
  97. movl %ebp,%ebx
  98. xorl %eax,%edx
  99. roll $16,%edx
  100. addl %edx,%ecx
  101. xorl %ecx,%ebx
  102. movl 52(%esp),%edi
  103. roll $12,%ebx
  104. movl 20(%esp),%ebp
  105. addl %ebx,%eax
  106. xorl %eax,%edx
  107. movl %eax,(%esp)
  108. roll $8,%edx
  109. movl 4(%esp),%eax
  110. addl %edx,%ecx
  111. movl %edx,48(%esp)
  112. xorl %ecx,%ebx
  113. addl %ebp,%eax
  114. roll $7,%ebx
  115. xorl %eax,%edi
  116. movl %ecx,32(%esp)
  117. roll $16,%edi
  118. movl %ebx,16(%esp)
  119. addl %edi,%esi
  120. movl 40(%esp),%ecx
  121. xorl %esi,%ebp
  122. movl 56(%esp),%edx
  123. roll $12,%ebp
  124. movl 24(%esp),%ebx
  125. addl %ebp,%eax
  126. xorl %eax,%edi
  127. movl %eax,4(%esp)
  128. roll $8,%edi
  129. movl 8(%esp),%eax
  130. addl %edi,%esi
  131. movl %edi,52(%esp)
  132. xorl %esi,%ebp
  133. addl %ebx,%eax
  134. roll $7,%ebp
  135. xorl %eax,%edx
  136. movl %esi,36(%esp)
  137. roll $16,%edx
  138. movl %ebp,20(%esp)
  139. addl %edx,%ecx
  140. movl 44(%esp),%esi
  141. xorl %ecx,%ebx
  142. movl 60(%esp),%edi
  143. roll $12,%ebx
  144. movl 28(%esp),%ebp
  145. addl %ebx,%eax
  146. xorl %eax,%edx
  147. movl %eax,8(%esp)
  148. roll $8,%edx
  149. movl 12(%esp),%eax
  150. addl %edx,%ecx
  151. movl %edx,56(%esp)
  152. xorl %ecx,%ebx
  153. addl %ebp,%eax
  154. roll $7,%ebx
  155. xorl %eax,%edi
  156. roll $16,%edi
  157. movl %ebx,24(%esp)
  158. addl %edi,%esi
  159. xorl %esi,%ebp
  160. roll $12,%ebp
  161. movl 20(%esp),%ebx
  162. addl %ebp,%eax
  163. xorl %eax,%edi
  164. movl %eax,12(%esp)
  165. roll $8,%edi
  166. movl (%esp),%eax
  167. addl %edi,%esi
  168. movl %edi,%edx
  169. xorl %esi,%ebp
  170. addl %ebx,%eax
  171. roll $7,%ebp
  172. xorl %eax,%edx
  173. roll $16,%edx
  174. movl %ebp,28(%esp)
  175. addl %edx,%ecx
  176. xorl %ecx,%ebx
  177. movl 48(%esp),%edi
  178. roll $12,%ebx
  179. movl 24(%esp),%ebp
  180. addl %ebx,%eax
  181. xorl %eax,%edx
  182. movl %eax,(%esp)
  183. roll $8,%edx
  184. movl 4(%esp),%eax
  185. addl %edx,%ecx
  186. movl %edx,60(%esp)
  187. xorl %ecx,%ebx
  188. addl %ebp,%eax
  189. roll $7,%ebx
  190. xorl %eax,%edi
  191. movl %ecx,40(%esp)
  192. roll $16,%edi
  193. movl %ebx,20(%esp)
  194. addl %edi,%esi
  195. movl 32(%esp),%ecx
  196. xorl %esi,%ebp
  197. movl 52(%esp),%edx
  198. roll $12,%ebp
  199. movl 28(%esp),%ebx
  200. addl %ebp,%eax
  201. xorl %eax,%edi
  202. movl %eax,4(%esp)
  203. roll $8,%edi
  204. movl 8(%esp),%eax
  205. addl %edi,%esi
  206. movl %edi,48(%esp)
  207. xorl %esi,%ebp
  208. addl %ebx,%eax
  209. roll $7,%ebp
  210. xorl %eax,%edx
  211. movl %esi,44(%esp)
  212. roll $16,%edx
  213. movl %ebp,24(%esp)
  214. addl %edx,%ecx
  215. movl 36(%esp),%esi
  216. xorl %ecx,%ebx
  217. movl 56(%esp),%edi
  218. roll $12,%ebx
  219. movl 16(%esp),%ebp
  220. addl %ebx,%eax
  221. xorl %eax,%edx
  222. movl %eax,8(%esp)
  223. roll $8,%edx
  224. movl 12(%esp),%eax
  225. addl %edx,%ecx
  226. movl %edx,52(%esp)
  227. xorl %ecx,%ebx
  228. addl %ebp,%eax
  229. roll $7,%ebx
  230. xorl %eax,%edi
  231. roll $16,%edi
  232. movl %ebx,28(%esp)
  233. addl %edi,%esi
  234. xorl %esi,%ebp
  235. movl 48(%esp),%edx
  236. roll $12,%ebp
  237. movl 128(%esp),%ebx
  238. addl %ebp,%eax
  239. xorl %eax,%edi
  240. movl %eax,12(%esp)
  241. roll $8,%edi
  242. movl (%esp),%eax
  243. addl %edi,%esi
  244. movl %edi,56(%esp)
  245. xorl %esi,%ebp
  246. roll $7,%ebp
  247. decl %ebx
  248. jnz L004loop
  249. movl 160(%esp),%ebx
  250. addl $1634760805,%eax
  251. addl 80(%esp),%ebp
  252. addl 96(%esp),%ecx
  253. addl 100(%esp),%esi
  254. cmpl $64,%ebx
  255. jb L005tail
  256. movl 156(%esp),%ebx
  257. addl 112(%esp),%edx
  258. addl 120(%esp),%edi
  259. xorl (%ebx),%eax
  260. xorl 16(%ebx),%ebp
  261. movl %eax,(%esp)
  262. movl 152(%esp),%eax
  263. xorl 32(%ebx),%ecx
  264. xorl 36(%ebx),%esi
  265. xorl 48(%ebx),%edx
  266. xorl 56(%ebx),%edi
  267. movl %ebp,16(%eax)
  268. movl %ecx,32(%eax)
  269. movl %esi,36(%eax)
  270. movl %edx,48(%eax)
  271. movl %edi,56(%eax)
  272. movl 4(%esp),%ebp
  273. movl 8(%esp),%ecx
  274. movl 12(%esp),%esi
  275. movl 20(%esp),%edx
  276. movl 24(%esp),%edi
  277. addl $857760878,%ebp
  278. addl $2036477234,%ecx
  279. addl $1797285236,%esi
  280. addl 84(%esp),%edx
  281. addl 88(%esp),%edi
  282. xorl 4(%ebx),%ebp
  283. xorl 8(%ebx),%ecx
  284. xorl 12(%ebx),%esi
  285. xorl 20(%ebx),%edx
  286. xorl 24(%ebx),%edi
  287. movl %ebp,4(%eax)
  288. movl %ecx,8(%eax)
  289. movl %esi,12(%eax)
  290. movl %edx,20(%eax)
  291. movl %edi,24(%eax)
  292. movl 28(%esp),%ebp
  293. movl 40(%esp),%ecx
  294. movl 44(%esp),%esi
  295. movl 52(%esp),%edx
  296. movl 60(%esp),%edi
  297. addl 92(%esp),%ebp
  298. addl 104(%esp),%ecx
  299. addl 108(%esp),%esi
  300. addl 116(%esp),%edx
  301. addl 124(%esp),%edi
  302. xorl 28(%ebx),%ebp
  303. xorl 40(%ebx),%ecx
  304. xorl 44(%ebx),%esi
  305. xorl 52(%ebx),%edx
  306. xorl 60(%ebx),%edi
  307. leal 64(%ebx),%ebx
  308. movl %ebp,28(%eax)
  309. movl (%esp),%ebp
  310. movl %ecx,40(%eax)
  311. movl 160(%esp),%ecx
  312. movl %esi,44(%eax)
  313. movl %edx,52(%eax)
  314. movl %edi,60(%eax)
  315. movl %ebp,(%eax)
  316. leal 64(%eax),%eax
  317. subl $64,%ecx
  318. jnz L003outer_loop
  319. jmp L006done
  320. L005tail:
  321. addl 112(%esp),%edx
  322. addl 120(%esp),%edi
  323. movl %eax,(%esp)
  324. movl %ebp,16(%esp)
  325. movl %ecx,32(%esp)
  326. movl %esi,36(%esp)
  327. movl %edx,48(%esp)
  328. movl %edi,56(%esp)
  329. movl 4(%esp),%ebp
  330. movl 8(%esp),%ecx
  331. movl 12(%esp),%esi
  332. movl 20(%esp),%edx
  333. movl 24(%esp),%edi
  334. addl $857760878,%ebp
  335. addl $2036477234,%ecx
  336. addl $1797285236,%esi
  337. addl 84(%esp),%edx
  338. addl 88(%esp),%edi
  339. movl %ebp,4(%esp)
  340. movl %ecx,8(%esp)
  341. movl %esi,12(%esp)
  342. movl %edx,20(%esp)
  343. movl %edi,24(%esp)
  344. movl 28(%esp),%ebp
  345. movl 40(%esp),%ecx
  346. movl 44(%esp),%esi
  347. movl 52(%esp),%edx
  348. movl 60(%esp),%edi
  349. addl 92(%esp),%ebp
  350. addl 104(%esp),%ecx
  351. addl 108(%esp),%esi
  352. addl 116(%esp),%edx
  353. addl 124(%esp),%edi
  354. movl %ebp,28(%esp)
  355. movl 156(%esp),%ebp
  356. movl %ecx,40(%esp)
  357. movl 152(%esp),%ecx
  358. movl %esi,44(%esp)
  359. xorl %esi,%esi
  360. movl %edx,52(%esp)
  361. movl %edi,60(%esp)
  362. xorl %eax,%eax
  363. xorl %edx,%edx
  364. L007tail_loop:
  365. movb (%esi,%ebp,1),%al
  366. movb (%esp,%esi,1),%dl
  367. leal 1(%esi),%esi
  368. xorb %dl,%al
  369. movb %al,-1(%ecx,%esi,1)
  370. decl %ebx
  371. jnz L007tail_loop
  372. L006done:
  373. addl $132,%esp
  374. L000no_data:
  375. popl %edi
  376. popl %esi
  377. popl %ebx
  378. popl %ebp
  379. ret
  380. .globl _ChaCha20_ssse3
  381. .private_extern _ChaCha20_ssse3
  382. .align 4
  383. _ChaCha20_ssse3:
  384. L_ChaCha20_ssse3_begin:
  385. pushl %ebp
  386. pushl %ebx
  387. pushl %esi
  388. pushl %edi
  389. Lssse3_shortcut:
  390. movl 20(%esp),%edi
  391. movl 24(%esp),%esi
  392. movl 28(%esp),%ecx
  393. movl 32(%esp),%edx
  394. movl 36(%esp),%ebx
  395. movl %esp,%ebp
  396. subl $524,%esp
  397. andl $-64,%esp
  398. movl %ebp,512(%esp)
  399. leal Lssse3_data-Lpic_point(%eax),%eax
  400. movdqu (%ebx),%xmm3
  401. cmpl $256,%ecx
  402. jb L0081x
  403. movl %edx,516(%esp)
  404. movl %ebx,520(%esp)
  405. subl $256,%ecx
  406. leal 384(%esp),%ebp
  407. movdqu (%edx),%xmm7
  408. pshufd $0,%xmm3,%xmm0
  409. pshufd $85,%xmm3,%xmm1
  410. pshufd $170,%xmm3,%xmm2
  411. pshufd $255,%xmm3,%xmm3
  412. paddd 48(%eax),%xmm0
  413. pshufd $0,%xmm7,%xmm4
  414. pshufd $85,%xmm7,%xmm5
  415. psubd 64(%eax),%xmm0
  416. pshufd $170,%xmm7,%xmm6
  417. pshufd $255,%xmm7,%xmm7
  418. movdqa %xmm0,64(%ebp)
  419. movdqa %xmm1,80(%ebp)
  420. movdqa %xmm2,96(%ebp)
  421. movdqa %xmm3,112(%ebp)
  422. movdqu 16(%edx),%xmm3
  423. movdqa %xmm4,-64(%ebp)
  424. movdqa %xmm5,-48(%ebp)
  425. movdqa %xmm6,-32(%ebp)
  426. movdqa %xmm7,-16(%ebp)
  427. movdqa 32(%eax),%xmm7
  428. leal 128(%esp),%ebx
  429. pshufd $0,%xmm3,%xmm0
  430. pshufd $85,%xmm3,%xmm1
  431. pshufd $170,%xmm3,%xmm2
  432. pshufd $255,%xmm3,%xmm3
  433. pshufd $0,%xmm7,%xmm4
  434. pshufd $85,%xmm7,%xmm5
  435. pshufd $170,%xmm7,%xmm6
  436. pshufd $255,%xmm7,%xmm7
  437. movdqa %xmm0,(%ebp)
  438. movdqa %xmm1,16(%ebp)
  439. movdqa %xmm2,32(%ebp)
  440. movdqa %xmm3,48(%ebp)
  441. movdqa %xmm4,-128(%ebp)
  442. movdqa %xmm5,-112(%ebp)
  443. movdqa %xmm6,-96(%ebp)
  444. movdqa %xmm7,-80(%ebp)
  445. leal 128(%esi),%esi
  446. leal 128(%edi),%edi
  447. jmp L009outer_loop
  448. .align 4,0x90
  449. L009outer_loop:
  450. movdqa -112(%ebp),%xmm1
  451. movdqa -96(%ebp),%xmm2
  452. movdqa -80(%ebp),%xmm3
  453. movdqa -48(%ebp),%xmm5
  454. movdqa -32(%ebp),%xmm6
  455. movdqa -16(%ebp),%xmm7
  456. movdqa %xmm1,-112(%ebx)
  457. movdqa %xmm2,-96(%ebx)
  458. movdqa %xmm3,-80(%ebx)
  459. movdqa %xmm5,-48(%ebx)
  460. movdqa %xmm6,-32(%ebx)
  461. movdqa %xmm7,-16(%ebx)
  462. movdqa 32(%ebp),%xmm2
  463. movdqa 48(%ebp),%xmm3
  464. movdqa 64(%ebp),%xmm4
  465. movdqa 80(%ebp),%xmm5
  466. movdqa 96(%ebp),%xmm6
  467. movdqa 112(%ebp),%xmm7
  468. paddd 64(%eax),%xmm4
  469. movdqa %xmm2,32(%ebx)
  470. movdqa %xmm3,48(%ebx)
  471. movdqa %xmm4,64(%ebx)
  472. movdqa %xmm5,80(%ebx)
  473. movdqa %xmm6,96(%ebx)
  474. movdqa %xmm7,112(%ebx)
  475. movdqa %xmm4,64(%ebp)
  476. movdqa -128(%ebp),%xmm0
  477. movdqa %xmm4,%xmm6
  478. movdqa -64(%ebp),%xmm3
  479. movdqa (%ebp),%xmm4
  480. movdqa 16(%ebp),%xmm5
  481. movl $10,%edx
  482. nop
  483. .align 4,0x90
  484. L010loop:
  485. paddd %xmm3,%xmm0
  486. movdqa %xmm3,%xmm2
  487. pxor %xmm0,%xmm6
  488. pshufb (%eax),%xmm6
  489. paddd %xmm6,%xmm4
  490. pxor %xmm4,%xmm2
  491. movdqa -48(%ebx),%xmm3
  492. movdqa %xmm2,%xmm1
  493. pslld $12,%xmm2
  494. psrld $20,%xmm1
  495. por %xmm1,%xmm2
  496. movdqa -112(%ebx),%xmm1
  497. paddd %xmm2,%xmm0
  498. movdqa 80(%ebx),%xmm7
  499. pxor %xmm0,%xmm6
  500. movdqa %xmm0,-128(%ebx)
  501. pshufb 16(%eax),%xmm6
  502. paddd %xmm6,%xmm4
  503. movdqa %xmm6,64(%ebx)
  504. pxor %xmm4,%xmm2
  505. paddd %xmm3,%xmm1
  506. movdqa %xmm2,%xmm0
  507. pslld $7,%xmm2
  508. psrld $25,%xmm0
  509. pxor %xmm1,%xmm7
  510. por %xmm0,%xmm2
  511. movdqa %xmm4,(%ebx)
  512. pshufb (%eax),%xmm7
  513. movdqa %xmm2,-64(%ebx)
  514. paddd %xmm7,%xmm5
  515. movdqa 32(%ebx),%xmm4
  516. pxor %xmm5,%xmm3
  517. movdqa -32(%ebx),%xmm2
  518. movdqa %xmm3,%xmm0
  519. pslld $12,%xmm3
  520. psrld $20,%xmm0
  521. por %xmm0,%xmm3
  522. movdqa -96(%ebx),%xmm0
  523. paddd %xmm3,%xmm1
  524. movdqa 96(%ebx),%xmm6
  525. pxor %xmm1,%xmm7
  526. movdqa %xmm1,-112(%ebx)
  527. pshufb 16(%eax),%xmm7
  528. paddd %xmm7,%xmm5
  529. movdqa %xmm7,80(%ebx)
  530. pxor %xmm5,%xmm3
  531. paddd %xmm2,%xmm0
  532. movdqa %xmm3,%xmm1
  533. pslld $7,%xmm3
  534. psrld $25,%xmm1
  535. pxor %xmm0,%xmm6
  536. por %xmm1,%xmm3
  537. movdqa %xmm5,16(%ebx)
  538. pshufb (%eax),%xmm6
  539. movdqa %xmm3,-48(%ebx)
  540. paddd %xmm6,%xmm4
  541. movdqa 48(%ebx),%xmm5
  542. pxor %xmm4,%xmm2
  543. movdqa -16(%ebx),%xmm3
  544. movdqa %xmm2,%xmm1
  545. pslld $12,%xmm2
  546. psrld $20,%xmm1
  547. por %xmm1,%xmm2
  548. movdqa -80(%ebx),%xmm1
  549. paddd %xmm2,%xmm0
  550. movdqa 112(%ebx),%xmm7
  551. pxor %xmm0,%xmm6
  552. movdqa %xmm0,-96(%ebx)
  553. pshufb 16(%eax),%xmm6
  554. paddd %xmm6,%xmm4
  555. movdqa %xmm6,96(%ebx)
  556. pxor %xmm4,%xmm2
  557. paddd %xmm3,%xmm1
  558. movdqa %xmm2,%xmm0
  559. pslld $7,%xmm2
  560. psrld $25,%xmm0
  561. pxor %xmm1,%xmm7
  562. por %xmm0,%xmm2
  563. pshufb (%eax),%xmm7
  564. movdqa %xmm2,-32(%ebx)
  565. paddd %xmm7,%xmm5
  566. pxor %xmm5,%xmm3
  567. movdqa -48(%ebx),%xmm2
  568. movdqa %xmm3,%xmm0
  569. pslld $12,%xmm3
  570. psrld $20,%xmm0
  571. por %xmm0,%xmm3
  572. movdqa -128(%ebx),%xmm0
  573. paddd %xmm3,%xmm1
  574. pxor %xmm1,%xmm7
  575. movdqa %xmm1,-80(%ebx)
  576. pshufb 16(%eax),%xmm7
  577. paddd %xmm7,%xmm5
  578. movdqa %xmm7,%xmm6
  579. pxor %xmm5,%xmm3
  580. paddd %xmm2,%xmm0
  581. movdqa %xmm3,%xmm1
  582. pslld $7,%xmm3
  583. psrld $25,%xmm1
  584. pxor %xmm0,%xmm6
  585. por %xmm1,%xmm3
  586. pshufb (%eax),%xmm6
  587. movdqa %xmm3,-16(%ebx)
  588. paddd %xmm6,%xmm4
  589. pxor %xmm4,%xmm2
  590. movdqa -32(%ebx),%xmm3
  591. movdqa %xmm2,%xmm1
  592. pslld $12,%xmm2
  593. psrld $20,%xmm1
  594. por %xmm1,%xmm2
  595. movdqa -112(%ebx),%xmm1
  596. paddd %xmm2,%xmm0
  597. movdqa 64(%ebx),%xmm7
  598. pxor %xmm0,%xmm6
  599. movdqa %xmm0,-128(%ebx)
  600. pshufb 16(%eax),%xmm6
  601. paddd %xmm6,%xmm4
  602. movdqa %xmm6,112(%ebx)
  603. pxor %xmm4,%xmm2
  604. paddd %xmm3,%xmm1
  605. movdqa %xmm2,%xmm0
  606. pslld $7,%xmm2
  607. psrld $25,%xmm0
  608. pxor %xmm1,%xmm7
  609. por %xmm0,%xmm2
  610. movdqa %xmm4,32(%ebx)
  611. pshufb (%eax),%xmm7
  612. movdqa %xmm2,-48(%ebx)
  613. paddd %xmm7,%xmm5
  614. movdqa (%ebx),%xmm4
  615. pxor %xmm5,%xmm3
  616. movdqa -16(%ebx),%xmm2
  617. movdqa %xmm3,%xmm0
  618. pslld $12,%xmm3
  619. psrld $20,%xmm0
  620. por %xmm0,%xmm3
  621. movdqa -96(%ebx),%xmm0
  622. paddd %xmm3,%xmm1
  623. movdqa 80(%ebx),%xmm6
  624. pxor %xmm1,%xmm7
  625. movdqa %xmm1,-112(%ebx)
  626. pshufb 16(%eax),%xmm7
  627. paddd %xmm7,%xmm5
  628. movdqa %xmm7,64(%ebx)
  629. pxor %xmm5,%xmm3
  630. paddd %xmm2,%xmm0
  631. movdqa %xmm3,%xmm1
  632. pslld $7,%xmm3
  633. psrld $25,%xmm1
  634. pxor %xmm0,%xmm6
  635. por %xmm1,%xmm3
  636. movdqa %xmm5,48(%ebx)
  637. pshufb (%eax),%xmm6
  638. movdqa %xmm3,-32(%ebx)
  639. paddd %xmm6,%xmm4
  640. movdqa 16(%ebx),%xmm5
  641. pxor %xmm4,%xmm2
  642. movdqa -64(%ebx),%xmm3
  643. movdqa %xmm2,%xmm1
  644. pslld $12,%xmm2
  645. psrld $20,%xmm1
  646. por %xmm1,%xmm2
  647. movdqa -80(%ebx),%xmm1
  648. paddd %xmm2,%xmm0
  649. movdqa 96(%ebx),%xmm7
  650. pxor %xmm0,%xmm6
  651. movdqa %xmm0,-96(%ebx)
  652. pshufb 16(%eax),%xmm6
  653. paddd %xmm6,%xmm4
  654. movdqa %xmm6,80(%ebx)
  655. pxor %xmm4,%xmm2
  656. paddd %xmm3,%xmm1
  657. movdqa %xmm2,%xmm0
  658. pslld $7,%xmm2
  659. psrld $25,%xmm0
  660. pxor %xmm1,%xmm7
  661. por %xmm0,%xmm2
  662. pshufb (%eax),%xmm7
  663. movdqa %xmm2,-16(%ebx)
  664. paddd %xmm7,%xmm5
  665. pxor %xmm5,%xmm3
  666. movdqa %xmm3,%xmm0
  667. pslld $12,%xmm3
  668. psrld $20,%xmm0
  669. por %xmm0,%xmm3
  670. movdqa -128(%ebx),%xmm0
  671. paddd %xmm3,%xmm1
  672. movdqa 64(%ebx),%xmm6
  673. pxor %xmm1,%xmm7
  674. movdqa %xmm1,-80(%ebx)
  675. pshufb 16(%eax),%xmm7
  676. paddd %xmm7,%xmm5
  677. movdqa %xmm7,96(%ebx)
  678. pxor %xmm5,%xmm3
  679. movdqa %xmm3,%xmm1
  680. pslld $7,%xmm3
  681. psrld $25,%xmm1
  682. por %xmm1,%xmm3
  683. decl %edx
  684. jnz L010loop
  685. movdqa %xmm3,-64(%ebx)
  686. movdqa %xmm4,(%ebx)
  687. movdqa %xmm5,16(%ebx)
  688. movdqa %xmm6,64(%ebx)
  689. movdqa %xmm7,96(%ebx)
  690. movdqa -112(%ebx),%xmm1
  691. movdqa -96(%ebx),%xmm2
  692. movdqa -80(%ebx),%xmm3
  693. paddd -128(%ebp),%xmm0
  694. paddd -112(%ebp),%xmm1
  695. paddd -96(%ebp),%xmm2
  696. paddd -80(%ebp),%xmm3
  697. movdqa %xmm0,%xmm6
  698. punpckldq %xmm1,%xmm0
  699. movdqa %xmm2,%xmm7
  700. punpckldq %xmm3,%xmm2
  701. punpckhdq %xmm1,%xmm6
  702. punpckhdq %xmm3,%xmm7
  703. movdqa %xmm0,%xmm1
  704. punpcklqdq %xmm2,%xmm0
  705. movdqa %xmm6,%xmm3
  706. punpcklqdq %xmm7,%xmm6
  707. punpckhqdq %xmm2,%xmm1
  708. punpckhqdq %xmm7,%xmm3
  709. movdqu -128(%esi),%xmm4
  710. movdqu -64(%esi),%xmm5
  711. movdqu (%esi),%xmm2
  712. movdqu 64(%esi),%xmm7
  713. leal 16(%esi),%esi
  714. pxor %xmm0,%xmm4
  715. movdqa -64(%ebx),%xmm0
  716. pxor %xmm1,%xmm5
  717. movdqa -48(%ebx),%xmm1
  718. pxor %xmm2,%xmm6
  719. movdqa -32(%ebx),%xmm2
  720. pxor %xmm3,%xmm7
  721. movdqa -16(%ebx),%xmm3
  722. movdqu %xmm4,-128(%edi)
  723. movdqu %xmm5,-64(%edi)
  724. movdqu %xmm6,(%edi)
  725. movdqu %xmm7,64(%edi)
  726. leal 16(%edi),%edi
  727. paddd -64(%ebp),%xmm0
  728. paddd -48(%ebp),%xmm1
  729. paddd -32(%ebp),%xmm2
  730. paddd -16(%ebp),%xmm3
  731. movdqa %xmm0,%xmm6
  732. punpckldq %xmm1,%xmm0
  733. movdqa %xmm2,%xmm7
  734. punpckldq %xmm3,%xmm2
  735. punpckhdq %xmm1,%xmm6
  736. punpckhdq %xmm3,%xmm7
  737. movdqa %xmm0,%xmm1
  738. punpcklqdq %xmm2,%xmm0
  739. movdqa %xmm6,%xmm3
  740. punpcklqdq %xmm7,%xmm6
  741. punpckhqdq %xmm2,%xmm1
  742. punpckhqdq %xmm7,%xmm3
  743. movdqu -128(%esi),%xmm4
  744. movdqu -64(%esi),%xmm5
  745. movdqu (%esi),%xmm2
  746. movdqu 64(%esi),%xmm7
  747. leal 16(%esi),%esi
  748. pxor %xmm0,%xmm4
  749. movdqa (%ebx),%xmm0
  750. pxor %xmm1,%xmm5
  751. movdqa 16(%ebx),%xmm1
  752. pxor %xmm2,%xmm6
  753. movdqa 32(%ebx),%xmm2
  754. pxor %xmm3,%xmm7
  755. movdqa 48(%ebx),%xmm3
  756. movdqu %xmm4,-128(%edi)
  757. movdqu %xmm5,-64(%edi)
  758. movdqu %xmm6,(%edi)
  759. movdqu %xmm7,64(%edi)
  760. leal 16(%edi),%edi
  761. paddd (%ebp),%xmm0
  762. paddd 16(%ebp),%xmm1
  763. paddd 32(%ebp),%xmm2
  764. paddd 48(%ebp),%xmm3
  765. movdqa %xmm0,%xmm6
  766. punpckldq %xmm1,%xmm0
  767. movdqa %xmm2,%xmm7
  768. punpckldq %xmm3,%xmm2
  769. punpckhdq %xmm1,%xmm6
  770. punpckhdq %xmm3,%xmm7
  771. movdqa %xmm0,%xmm1
  772. punpcklqdq %xmm2,%xmm0
  773. movdqa %xmm6,%xmm3
  774. punpcklqdq %xmm7,%xmm6
  775. punpckhqdq %xmm2,%xmm1
  776. punpckhqdq %xmm7,%xmm3
  777. movdqu -128(%esi),%xmm4
  778. movdqu -64(%esi),%xmm5
  779. movdqu (%esi),%xmm2
  780. movdqu 64(%esi),%xmm7
  781. leal 16(%esi),%esi
  782. pxor %xmm0,%xmm4
  783. movdqa 64(%ebx),%xmm0
  784. pxor %xmm1,%xmm5
  785. movdqa 80(%ebx),%xmm1
  786. pxor %xmm2,%xmm6
  787. movdqa 96(%ebx),%xmm2
  788. pxor %xmm3,%xmm7
  789. movdqa 112(%ebx),%xmm3
  790. movdqu %xmm4,-128(%edi)
  791. movdqu %xmm5,-64(%edi)
  792. movdqu %xmm6,(%edi)
  793. movdqu %xmm7,64(%edi)
  794. leal 16(%edi),%edi
  795. paddd 64(%ebp),%xmm0
  796. paddd 80(%ebp),%xmm1
  797. paddd 96(%ebp),%xmm2
  798. paddd 112(%ebp),%xmm3
  799. movdqa %xmm0,%xmm6
  800. punpckldq %xmm1,%xmm0
  801. movdqa %xmm2,%xmm7
  802. punpckldq %xmm3,%xmm2
  803. punpckhdq %xmm1,%xmm6
  804. punpckhdq %xmm3,%xmm7
  805. movdqa %xmm0,%xmm1
  806. punpcklqdq %xmm2,%xmm0
  807. movdqa %xmm6,%xmm3
  808. punpcklqdq %xmm7,%xmm6
  809. punpckhqdq %xmm2,%xmm1
  810. punpckhqdq %xmm7,%xmm3
  811. movdqu -128(%esi),%xmm4
  812. movdqu -64(%esi),%xmm5
  813. movdqu (%esi),%xmm2
  814. movdqu 64(%esi),%xmm7
  815. leal 208(%esi),%esi
  816. pxor %xmm0,%xmm4
  817. pxor %xmm1,%xmm5
  818. pxor %xmm2,%xmm6
  819. pxor %xmm3,%xmm7
  820. movdqu %xmm4,-128(%edi)
  821. movdqu %xmm5,-64(%edi)
  822. movdqu %xmm6,(%edi)
  823. movdqu %xmm7,64(%edi)
  824. leal 208(%edi),%edi
  825. subl $256,%ecx
  826. jnc L009outer_loop
  827. addl $256,%ecx
  828. jz L011done
  829. movl 520(%esp),%ebx
  830. leal -128(%esi),%esi
  831. movl 516(%esp),%edx
  832. leal -128(%edi),%edi
  833. movd 64(%ebp),%xmm2
  834. movdqu (%ebx),%xmm3
  835. paddd 96(%eax),%xmm2
  836. pand 112(%eax),%xmm3
  837. por %xmm2,%xmm3
  838. L0081x:
  839. movdqa 32(%eax),%xmm0
  840. movdqu (%edx),%xmm1
  841. movdqu 16(%edx),%xmm2
  842. movdqa (%eax),%xmm6
  843. movdqa 16(%eax),%xmm7
  844. movl %ebp,48(%esp)
  845. movdqa %xmm0,(%esp)
  846. movdqa %xmm1,16(%esp)
  847. movdqa %xmm2,32(%esp)
  848. movdqa %xmm3,48(%esp)
  849. movl $10,%edx
  850. jmp L012loop1x
  851. .align 4,0x90
  852. L013outer1x:
  853. movdqa 80(%eax),%xmm3
  854. movdqa (%esp),%xmm0
  855. movdqa 16(%esp),%xmm1
  856. movdqa 32(%esp),%xmm2
  857. paddd 48(%esp),%xmm3
  858. movl $10,%edx
  859. movdqa %xmm3,48(%esp)
  860. jmp L012loop1x
  861. .align 4,0x90
  862. L012loop1x:
  863. paddd %xmm1,%xmm0
  864. pxor %xmm0,%xmm3
  865. .byte 102,15,56,0,222
  866. paddd %xmm3,%xmm2
  867. pxor %xmm2,%xmm1
  868. movdqa %xmm1,%xmm4
  869. psrld $20,%xmm1
  870. pslld $12,%xmm4
  871. por %xmm4,%xmm1
  872. paddd %xmm1,%xmm0
  873. pxor %xmm0,%xmm3
  874. .byte 102,15,56,0,223
  875. paddd %xmm3,%xmm2
  876. pxor %xmm2,%xmm1
  877. movdqa %xmm1,%xmm4
  878. psrld $25,%xmm1
  879. pslld $7,%xmm4
  880. por %xmm4,%xmm1
  881. pshufd $78,%xmm2,%xmm2
  882. pshufd $57,%xmm1,%xmm1
  883. pshufd $147,%xmm3,%xmm3
  884. nop
  885. paddd %xmm1,%xmm0
  886. pxor %xmm0,%xmm3
  887. .byte 102,15,56,0,222
  888. paddd %xmm3,%xmm2
  889. pxor %xmm2,%xmm1
  890. movdqa %xmm1,%xmm4
  891. psrld $20,%xmm1
  892. pslld $12,%xmm4
  893. por %xmm4,%xmm1
  894. paddd %xmm1,%xmm0
  895. pxor %xmm0,%xmm3
  896. .byte 102,15,56,0,223
  897. paddd %xmm3,%xmm2
  898. pxor %xmm2,%xmm1
  899. movdqa %xmm1,%xmm4
  900. psrld $25,%xmm1
  901. pslld $7,%xmm4
  902. por %xmm4,%xmm1
  903. pshufd $78,%xmm2,%xmm2
  904. pshufd $147,%xmm1,%xmm1
  905. pshufd $57,%xmm3,%xmm3
  906. decl %edx
  907. jnz L012loop1x
  908. paddd (%esp),%xmm0
  909. paddd 16(%esp),%xmm1
  910. paddd 32(%esp),%xmm2
  911. paddd 48(%esp),%xmm3
  912. cmpl $64,%ecx
  913. jb L014tail
  914. movdqu (%esi),%xmm4
  915. movdqu 16(%esi),%xmm5
  916. pxor %xmm4,%xmm0
  917. movdqu 32(%esi),%xmm4
  918. pxor %xmm5,%xmm1
  919. movdqu 48(%esi),%xmm5
  920. pxor %xmm4,%xmm2
  921. pxor %xmm5,%xmm3
  922. leal 64(%esi),%esi
  923. movdqu %xmm0,(%edi)
  924. movdqu %xmm1,16(%edi)
  925. movdqu %xmm2,32(%edi)
  926. movdqu %xmm3,48(%edi)
  927. leal 64(%edi),%edi
  928. subl $64,%ecx
  929. jnz L013outer1x
  930. jmp L011done
  931. L014tail:
  932. movdqa %xmm0,(%esp)
  933. movdqa %xmm1,16(%esp)
  934. movdqa %xmm2,32(%esp)
  935. movdqa %xmm3,48(%esp)
  936. xorl %eax,%eax
  937. xorl %edx,%edx
  938. xorl %ebp,%ebp
  939. L015tail_loop:
  940. movb (%esp,%ebp,1),%al
  941. movb (%esi,%ebp,1),%dl
  942. leal 1(%ebp),%ebp
  943. xorb %dl,%al
  944. movb %al,-1(%edi,%ebp,1)
  945. decl %ecx
  946. jnz L015tail_loop
  947. L011done:
  948. movl 512(%esp),%esp
  949. popl %edi
  950. popl %esi
  951. popl %ebx
  952. popl %ebp
  953. ret
  954. .align 6,0x90
  955. Lssse3_data:
  956. .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
  957. .byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
  958. .long 1634760805,857760878,2036477234,1797285236
  959. .long 0,1,2,3
  960. .long 4,4,4,4
  961. .long 1,0,0,0
  962. .long 4,0,0,0
  963. .long 0,-1,-1,-1
  964. .align 6,0x90
  965. .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
  966. .byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
  967. .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
  968. .byte 114,103,62,0
  969. .section __IMPORT,__pointers,non_lazy_symbol_pointers
  970. L_OPENSSL_ia32cap_P$non_lazy_ptr:
  971. .indirect_symbol _OPENSSL_ia32cap_P
  972. .long 0
  973. #endif