chacha-x86.S 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__i386__)
  4. #if defined(BORINGSSL_PREFIX)
  5. #include <boringssl_prefix_symbols_asm.h>
  6. #endif
  7. .text
  8. .globl ChaCha20_ctr32
  9. .hidden ChaCha20_ctr32
  10. .type ChaCha20_ctr32,@function
  11. .align 16
  12. ChaCha20_ctr32:
  13. .L_ChaCha20_ctr32_begin:
  14. pushl %ebp
  15. pushl %ebx
  16. pushl %esi
  17. pushl %edi
  18. xorl %eax,%eax
  19. cmpl 28(%esp),%eax
  20. je .L000no_data
  21. call .Lpic_point
  22. .Lpic_point:
  23. popl %eax
  24. leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
  25. testl $16777216,(%ebp)
  26. jz .L001x86
  27. testl $512,4(%ebp)
  28. jz .L001x86
  29. jmp .Lssse3_shortcut
  30. .L001x86:
  31. movl 32(%esp),%esi
  32. movl 36(%esp),%edi
  33. subl $132,%esp
  34. movl (%esi),%eax
  35. movl 4(%esi),%ebx
  36. movl 8(%esi),%ecx
  37. movl 12(%esi),%edx
  38. movl %eax,80(%esp)
  39. movl %ebx,84(%esp)
  40. movl %ecx,88(%esp)
  41. movl %edx,92(%esp)
  42. movl 16(%esi),%eax
  43. movl 20(%esi),%ebx
  44. movl 24(%esi),%ecx
  45. movl 28(%esi),%edx
  46. movl %eax,96(%esp)
  47. movl %ebx,100(%esp)
  48. movl %ecx,104(%esp)
  49. movl %edx,108(%esp)
  50. movl (%edi),%eax
  51. movl 4(%edi),%ebx
  52. movl 8(%edi),%ecx
  53. movl 12(%edi),%edx
  54. subl $1,%eax
  55. movl %eax,112(%esp)
  56. movl %ebx,116(%esp)
  57. movl %ecx,120(%esp)
  58. movl %edx,124(%esp)
  59. jmp .L002entry
  60. .align 16
  61. .L003outer_loop:
  62. movl %ebx,156(%esp)
  63. movl %eax,152(%esp)
  64. movl %ecx,160(%esp)
  65. .L002entry:
  66. movl $1634760805,%eax
  67. movl $857760878,4(%esp)
  68. movl $2036477234,8(%esp)
  69. movl $1797285236,12(%esp)
  70. movl 84(%esp),%ebx
  71. movl 88(%esp),%ebp
  72. movl 104(%esp),%ecx
  73. movl 108(%esp),%esi
  74. movl 116(%esp),%edx
  75. movl 120(%esp),%edi
  76. movl %ebx,20(%esp)
  77. movl %ebp,24(%esp)
  78. movl %ecx,40(%esp)
  79. movl %esi,44(%esp)
  80. movl %edx,52(%esp)
  81. movl %edi,56(%esp)
  82. movl 92(%esp),%ebx
  83. movl 124(%esp),%edi
  84. movl 112(%esp),%edx
  85. movl 80(%esp),%ebp
  86. movl 96(%esp),%ecx
  87. movl 100(%esp),%esi
  88. addl $1,%edx
  89. movl %ebx,28(%esp)
  90. movl %edi,60(%esp)
  91. movl %edx,112(%esp)
  92. movl $10,%ebx
  93. jmp .L004loop
  94. .align 16
  95. .L004loop:
  96. addl %ebp,%eax
  97. movl %ebx,128(%esp)
  98. movl %ebp,%ebx
  99. xorl %eax,%edx
  100. roll $16,%edx
  101. addl %edx,%ecx
  102. xorl %ecx,%ebx
  103. movl 52(%esp),%edi
  104. roll $12,%ebx
  105. movl 20(%esp),%ebp
  106. addl %ebx,%eax
  107. xorl %eax,%edx
  108. movl %eax,(%esp)
  109. roll $8,%edx
  110. movl 4(%esp),%eax
  111. addl %edx,%ecx
  112. movl %edx,48(%esp)
  113. xorl %ecx,%ebx
  114. addl %ebp,%eax
  115. roll $7,%ebx
  116. xorl %eax,%edi
  117. movl %ecx,32(%esp)
  118. roll $16,%edi
  119. movl %ebx,16(%esp)
  120. addl %edi,%esi
  121. movl 40(%esp),%ecx
  122. xorl %esi,%ebp
  123. movl 56(%esp),%edx
  124. roll $12,%ebp
  125. movl 24(%esp),%ebx
  126. addl %ebp,%eax
  127. xorl %eax,%edi
  128. movl %eax,4(%esp)
  129. roll $8,%edi
  130. movl 8(%esp),%eax
  131. addl %edi,%esi
  132. movl %edi,52(%esp)
  133. xorl %esi,%ebp
  134. addl %ebx,%eax
  135. roll $7,%ebp
  136. xorl %eax,%edx
  137. movl %esi,36(%esp)
  138. roll $16,%edx
  139. movl %ebp,20(%esp)
  140. addl %edx,%ecx
  141. movl 44(%esp),%esi
  142. xorl %ecx,%ebx
  143. movl 60(%esp),%edi
  144. roll $12,%ebx
  145. movl 28(%esp),%ebp
  146. addl %ebx,%eax
  147. xorl %eax,%edx
  148. movl %eax,8(%esp)
  149. roll $8,%edx
  150. movl 12(%esp),%eax
  151. addl %edx,%ecx
  152. movl %edx,56(%esp)
  153. xorl %ecx,%ebx
  154. addl %ebp,%eax
  155. roll $7,%ebx
  156. xorl %eax,%edi
  157. roll $16,%edi
  158. movl %ebx,24(%esp)
  159. addl %edi,%esi
  160. xorl %esi,%ebp
  161. roll $12,%ebp
  162. movl 20(%esp),%ebx
  163. addl %ebp,%eax
  164. xorl %eax,%edi
  165. movl %eax,12(%esp)
  166. roll $8,%edi
  167. movl (%esp),%eax
  168. addl %edi,%esi
  169. movl %edi,%edx
  170. xorl %esi,%ebp
  171. addl %ebx,%eax
  172. roll $7,%ebp
  173. xorl %eax,%edx
  174. roll $16,%edx
  175. movl %ebp,28(%esp)
  176. addl %edx,%ecx
  177. xorl %ecx,%ebx
  178. movl 48(%esp),%edi
  179. roll $12,%ebx
  180. movl 24(%esp),%ebp
  181. addl %ebx,%eax
  182. xorl %eax,%edx
  183. movl %eax,(%esp)
  184. roll $8,%edx
  185. movl 4(%esp),%eax
  186. addl %edx,%ecx
  187. movl %edx,60(%esp)
  188. xorl %ecx,%ebx
  189. addl %ebp,%eax
  190. roll $7,%ebx
  191. xorl %eax,%edi
  192. movl %ecx,40(%esp)
  193. roll $16,%edi
  194. movl %ebx,20(%esp)
  195. addl %edi,%esi
  196. movl 32(%esp),%ecx
  197. xorl %esi,%ebp
  198. movl 52(%esp),%edx
  199. roll $12,%ebp
  200. movl 28(%esp),%ebx
  201. addl %ebp,%eax
  202. xorl %eax,%edi
  203. movl %eax,4(%esp)
  204. roll $8,%edi
  205. movl 8(%esp),%eax
  206. addl %edi,%esi
  207. movl %edi,48(%esp)
  208. xorl %esi,%ebp
  209. addl %ebx,%eax
  210. roll $7,%ebp
  211. xorl %eax,%edx
  212. movl %esi,44(%esp)
  213. roll $16,%edx
  214. movl %ebp,24(%esp)
  215. addl %edx,%ecx
  216. movl 36(%esp),%esi
  217. xorl %ecx,%ebx
  218. movl 56(%esp),%edi
  219. roll $12,%ebx
  220. movl 16(%esp),%ebp
  221. addl %ebx,%eax
  222. xorl %eax,%edx
  223. movl %eax,8(%esp)
  224. roll $8,%edx
  225. movl 12(%esp),%eax
  226. addl %edx,%ecx
  227. movl %edx,52(%esp)
  228. xorl %ecx,%ebx
  229. addl %ebp,%eax
  230. roll $7,%ebx
  231. xorl %eax,%edi
  232. roll $16,%edi
  233. movl %ebx,28(%esp)
  234. addl %edi,%esi
  235. xorl %esi,%ebp
  236. movl 48(%esp),%edx
  237. roll $12,%ebp
  238. movl 128(%esp),%ebx
  239. addl %ebp,%eax
  240. xorl %eax,%edi
  241. movl %eax,12(%esp)
  242. roll $8,%edi
  243. movl (%esp),%eax
  244. addl %edi,%esi
  245. movl %edi,56(%esp)
  246. xorl %esi,%ebp
  247. roll $7,%ebp
  248. decl %ebx
  249. jnz .L004loop
  250. movl 160(%esp),%ebx
  251. addl $1634760805,%eax
  252. addl 80(%esp),%ebp
  253. addl 96(%esp),%ecx
  254. addl 100(%esp),%esi
  255. cmpl $64,%ebx
  256. jb .L005tail
  257. movl 156(%esp),%ebx
  258. addl 112(%esp),%edx
  259. addl 120(%esp),%edi
  260. xorl (%ebx),%eax
  261. xorl 16(%ebx),%ebp
  262. movl %eax,(%esp)
  263. movl 152(%esp),%eax
  264. xorl 32(%ebx),%ecx
  265. xorl 36(%ebx),%esi
  266. xorl 48(%ebx),%edx
  267. xorl 56(%ebx),%edi
  268. movl %ebp,16(%eax)
  269. movl %ecx,32(%eax)
  270. movl %esi,36(%eax)
  271. movl %edx,48(%eax)
  272. movl %edi,56(%eax)
  273. movl 4(%esp),%ebp
  274. movl 8(%esp),%ecx
  275. movl 12(%esp),%esi
  276. movl 20(%esp),%edx
  277. movl 24(%esp),%edi
  278. addl $857760878,%ebp
  279. addl $2036477234,%ecx
  280. addl $1797285236,%esi
  281. addl 84(%esp),%edx
  282. addl 88(%esp),%edi
  283. xorl 4(%ebx),%ebp
  284. xorl 8(%ebx),%ecx
  285. xorl 12(%ebx),%esi
  286. xorl 20(%ebx),%edx
  287. xorl 24(%ebx),%edi
  288. movl %ebp,4(%eax)
  289. movl %ecx,8(%eax)
  290. movl %esi,12(%eax)
  291. movl %edx,20(%eax)
  292. movl %edi,24(%eax)
  293. movl 28(%esp),%ebp
  294. movl 40(%esp),%ecx
  295. movl 44(%esp),%esi
  296. movl 52(%esp),%edx
  297. movl 60(%esp),%edi
  298. addl 92(%esp),%ebp
  299. addl 104(%esp),%ecx
  300. addl 108(%esp),%esi
  301. addl 116(%esp),%edx
  302. addl 124(%esp),%edi
  303. xorl 28(%ebx),%ebp
  304. xorl 40(%ebx),%ecx
  305. xorl 44(%ebx),%esi
  306. xorl 52(%ebx),%edx
  307. xorl 60(%ebx),%edi
  308. leal 64(%ebx),%ebx
  309. movl %ebp,28(%eax)
  310. movl (%esp),%ebp
  311. movl %ecx,40(%eax)
  312. movl 160(%esp),%ecx
  313. movl %esi,44(%eax)
  314. movl %edx,52(%eax)
  315. movl %edi,60(%eax)
  316. movl %ebp,(%eax)
  317. leal 64(%eax),%eax
  318. subl $64,%ecx
  319. jnz .L003outer_loop
  320. jmp .L006done
  321. .L005tail:
  322. addl 112(%esp),%edx
  323. addl 120(%esp),%edi
  324. movl %eax,(%esp)
  325. movl %ebp,16(%esp)
  326. movl %ecx,32(%esp)
  327. movl %esi,36(%esp)
  328. movl %edx,48(%esp)
  329. movl %edi,56(%esp)
  330. movl 4(%esp),%ebp
  331. movl 8(%esp),%ecx
  332. movl 12(%esp),%esi
  333. movl 20(%esp),%edx
  334. movl 24(%esp),%edi
  335. addl $857760878,%ebp
  336. addl $2036477234,%ecx
  337. addl $1797285236,%esi
  338. addl 84(%esp),%edx
  339. addl 88(%esp),%edi
  340. movl %ebp,4(%esp)
  341. movl %ecx,8(%esp)
  342. movl %esi,12(%esp)
  343. movl %edx,20(%esp)
  344. movl %edi,24(%esp)
  345. movl 28(%esp),%ebp
  346. movl 40(%esp),%ecx
  347. movl 44(%esp),%esi
  348. movl 52(%esp),%edx
  349. movl 60(%esp),%edi
  350. addl 92(%esp),%ebp
  351. addl 104(%esp),%ecx
  352. addl 108(%esp),%esi
  353. addl 116(%esp),%edx
  354. addl 124(%esp),%edi
  355. movl %ebp,28(%esp)
  356. movl 156(%esp),%ebp
  357. movl %ecx,40(%esp)
  358. movl 152(%esp),%ecx
  359. movl %esi,44(%esp)
  360. xorl %esi,%esi
  361. movl %edx,52(%esp)
  362. movl %edi,60(%esp)
  363. xorl %eax,%eax
  364. xorl %edx,%edx
  365. .L007tail_loop:
  366. movb (%esi,%ebp,1),%al
  367. movb (%esp,%esi,1),%dl
  368. leal 1(%esi),%esi
  369. xorb %dl,%al
  370. movb %al,-1(%ecx,%esi,1)
  371. decl %ebx
  372. jnz .L007tail_loop
  373. .L006done:
  374. addl $132,%esp
  375. .L000no_data:
  376. popl %edi
  377. popl %esi
  378. popl %ebx
  379. popl %ebp
  380. ret
  381. .size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
  382. .globl ChaCha20_ssse3
  383. .hidden ChaCha20_ssse3
  384. .type ChaCha20_ssse3,@function
  385. .align 16
  386. ChaCha20_ssse3:
  387. .L_ChaCha20_ssse3_begin:
  388. pushl %ebp
  389. pushl %ebx
  390. pushl %esi
  391. pushl %edi
  392. .Lssse3_shortcut:
  393. movl 20(%esp),%edi
  394. movl 24(%esp),%esi
  395. movl 28(%esp),%ecx
  396. movl 32(%esp),%edx
  397. movl 36(%esp),%ebx
  398. movl %esp,%ebp
  399. subl $524,%esp
  400. andl $-64,%esp
  401. movl %ebp,512(%esp)
  402. leal .Lssse3_data-.Lpic_point(%eax),%eax
  403. movdqu (%ebx),%xmm3
  404. cmpl $256,%ecx
  405. jb .L0081x
  406. movl %edx,516(%esp)
  407. movl %ebx,520(%esp)
  408. subl $256,%ecx
  409. leal 384(%esp),%ebp
  410. movdqu (%edx),%xmm7
  411. pshufd $0,%xmm3,%xmm0
  412. pshufd $85,%xmm3,%xmm1
  413. pshufd $170,%xmm3,%xmm2
  414. pshufd $255,%xmm3,%xmm3
  415. paddd 48(%eax),%xmm0
  416. pshufd $0,%xmm7,%xmm4
  417. pshufd $85,%xmm7,%xmm5
  418. psubd 64(%eax),%xmm0
  419. pshufd $170,%xmm7,%xmm6
  420. pshufd $255,%xmm7,%xmm7
  421. movdqa %xmm0,64(%ebp)
  422. movdqa %xmm1,80(%ebp)
  423. movdqa %xmm2,96(%ebp)
  424. movdqa %xmm3,112(%ebp)
  425. movdqu 16(%edx),%xmm3
  426. movdqa %xmm4,-64(%ebp)
  427. movdqa %xmm5,-48(%ebp)
  428. movdqa %xmm6,-32(%ebp)
  429. movdqa %xmm7,-16(%ebp)
  430. movdqa 32(%eax),%xmm7
  431. leal 128(%esp),%ebx
  432. pshufd $0,%xmm3,%xmm0
  433. pshufd $85,%xmm3,%xmm1
  434. pshufd $170,%xmm3,%xmm2
  435. pshufd $255,%xmm3,%xmm3
  436. pshufd $0,%xmm7,%xmm4
  437. pshufd $85,%xmm7,%xmm5
  438. pshufd $170,%xmm7,%xmm6
  439. pshufd $255,%xmm7,%xmm7
  440. movdqa %xmm0,(%ebp)
  441. movdqa %xmm1,16(%ebp)
  442. movdqa %xmm2,32(%ebp)
  443. movdqa %xmm3,48(%ebp)
  444. movdqa %xmm4,-128(%ebp)
  445. movdqa %xmm5,-112(%ebp)
  446. movdqa %xmm6,-96(%ebp)
  447. movdqa %xmm7,-80(%ebp)
  448. leal 128(%esi),%esi
  449. leal 128(%edi),%edi
  450. jmp .L009outer_loop
  451. .align 16
  452. .L009outer_loop:
  453. movdqa -112(%ebp),%xmm1
  454. movdqa -96(%ebp),%xmm2
  455. movdqa -80(%ebp),%xmm3
  456. movdqa -48(%ebp),%xmm5
  457. movdqa -32(%ebp),%xmm6
  458. movdqa -16(%ebp),%xmm7
  459. movdqa %xmm1,-112(%ebx)
  460. movdqa %xmm2,-96(%ebx)
  461. movdqa %xmm3,-80(%ebx)
  462. movdqa %xmm5,-48(%ebx)
  463. movdqa %xmm6,-32(%ebx)
  464. movdqa %xmm7,-16(%ebx)
  465. movdqa 32(%ebp),%xmm2
  466. movdqa 48(%ebp),%xmm3
  467. movdqa 64(%ebp),%xmm4
  468. movdqa 80(%ebp),%xmm5
  469. movdqa 96(%ebp),%xmm6
  470. movdqa 112(%ebp),%xmm7
  471. paddd 64(%eax),%xmm4
  472. movdqa %xmm2,32(%ebx)
  473. movdqa %xmm3,48(%ebx)
  474. movdqa %xmm4,64(%ebx)
  475. movdqa %xmm5,80(%ebx)
  476. movdqa %xmm6,96(%ebx)
  477. movdqa %xmm7,112(%ebx)
  478. movdqa %xmm4,64(%ebp)
  479. movdqa -128(%ebp),%xmm0
  480. movdqa %xmm4,%xmm6
  481. movdqa -64(%ebp),%xmm3
  482. movdqa (%ebp),%xmm4
  483. movdqa 16(%ebp),%xmm5
  484. movl $10,%edx
  485. nop
  486. .align 16
  487. .L010loop:
  488. paddd %xmm3,%xmm0
  489. movdqa %xmm3,%xmm2
  490. pxor %xmm0,%xmm6
  491. pshufb (%eax),%xmm6
  492. paddd %xmm6,%xmm4
  493. pxor %xmm4,%xmm2
  494. movdqa -48(%ebx),%xmm3
  495. movdqa %xmm2,%xmm1
  496. pslld $12,%xmm2
  497. psrld $20,%xmm1
  498. por %xmm1,%xmm2
  499. movdqa -112(%ebx),%xmm1
  500. paddd %xmm2,%xmm0
  501. movdqa 80(%ebx),%xmm7
  502. pxor %xmm0,%xmm6
  503. movdqa %xmm0,-128(%ebx)
  504. pshufb 16(%eax),%xmm6
  505. paddd %xmm6,%xmm4
  506. movdqa %xmm6,64(%ebx)
  507. pxor %xmm4,%xmm2
  508. paddd %xmm3,%xmm1
  509. movdqa %xmm2,%xmm0
  510. pslld $7,%xmm2
  511. psrld $25,%xmm0
  512. pxor %xmm1,%xmm7
  513. por %xmm0,%xmm2
  514. movdqa %xmm4,(%ebx)
  515. pshufb (%eax),%xmm7
  516. movdqa %xmm2,-64(%ebx)
  517. paddd %xmm7,%xmm5
  518. movdqa 32(%ebx),%xmm4
  519. pxor %xmm5,%xmm3
  520. movdqa -32(%ebx),%xmm2
  521. movdqa %xmm3,%xmm0
  522. pslld $12,%xmm3
  523. psrld $20,%xmm0
  524. por %xmm0,%xmm3
  525. movdqa -96(%ebx),%xmm0
  526. paddd %xmm3,%xmm1
  527. movdqa 96(%ebx),%xmm6
  528. pxor %xmm1,%xmm7
  529. movdqa %xmm1,-112(%ebx)
  530. pshufb 16(%eax),%xmm7
  531. paddd %xmm7,%xmm5
  532. movdqa %xmm7,80(%ebx)
  533. pxor %xmm5,%xmm3
  534. paddd %xmm2,%xmm0
  535. movdqa %xmm3,%xmm1
  536. pslld $7,%xmm3
  537. psrld $25,%xmm1
  538. pxor %xmm0,%xmm6
  539. por %xmm1,%xmm3
  540. movdqa %xmm5,16(%ebx)
  541. pshufb (%eax),%xmm6
  542. movdqa %xmm3,-48(%ebx)
  543. paddd %xmm6,%xmm4
  544. movdqa 48(%ebx),%xmm5
  545. pxor %xmm4,%xmm2
  546. movdqa -16(%ebx),%xmm3
  547. movdqa %xmm2,%xmm1
  548. pslld $12,%xmm2
  549. psrld $20,%xmm1
  550. por %xmm1,%xmm2
  551. movdqa -80(%ebx),%xmm1
  552. paddd %xmm2,%xmm0
  553. movdqa 112(%ebx),%xmm7
  554. pxor %xmm0,%xmm6
  555. movdqa %xmm0,-96(%ebx)
  556. pshufb 16(%eax),%xmm6
  557. paddd %xmm6,%xmm4
  558. movdqa %xmm6,96(%ebx)
  559. pxor %xmm4,%xmm2
  560. paddd %xmm3,%xmm1
  561. movdqa %xmm2,%xmm0
  562. pslld $7,%xmm2
  563. psrld $25,%xmm0
  564. pxor %xmm1,%xmm7
  565. por %xmm0,%xmm2
  566. pshufb (%eax),%xmm7
  567. movdqa %xmm2,-32(%ebx)
  568. paddd %xmm7,%xmm5
  569. pxor %xmm5,%xmm3
  570. movdqa -48(%ebx),%xmm2
  571. movdqa %xmm3,%xmm0
  572. pslld $12,%xmm3
  573. psrld $20,%xmm0
  574. por %xmm0,%xmm3
  575. movdqa -128(%ebx),%xmm0
  576. paddd %xmm3,%xmm1
  577. pxor %xmm1,%xmm7
  578. movdqa %xmm1,-80(%ebx)
  579. pshufb 16(%eax),%xmm7
  580. paddd %xmm7,%xmm5
  581. movdqa %xmm7,%xmm6
  582. pxor %xmm5,%xmm3
  583. paddd %xmm2,%xmm0
  584. movdqa %xmm3,%xmm1
  585. pslld $7,%xmm3
  586. psrld $25,%xmm1
  587. pxor %xmm0,%xmm6
  588. por %xmm1,%xmm3
  589. pshufb (%eax),%xmm6
  590. movdqa %xmm3,-16(%ebx)
  591. paddd %xmm6,%xmm4
  592. pxor %xmm4,%xmm2
  593. movdqa -32(%ebx),%xmm3
  594. movdqa %xmm2,%xmm1
  595. pslld $12,%xmm2
  596. psrld $20,%xmm1
  597. por %xmm1,%xmm2
  598. movdqa -112(%ebx),%xmm1
  599. paddd %xmm2,%xmm0
  600. movdqa 64(%ebx),%xmm7
  601. pxor %xmm0,%xmm6
  602. movdqa %xmm0,-128(%ebx)
  603. pshufb 16(%eax),%xmm6
  604. paddd %xmm6,%xmm4
  605. movdqa %xmm6,112(%ebx)
  606. pxor %xmm4,%xmm2
  607. paddd %xmm3,%xmm1
  608. movdqa %xmm2,%xmm0
  609. pslld $7,%xmm2
  610. psrld $25,%xmm0
  611. pxor %xmm1,%xmm7
  612. por %xmm0,%xmm2
  613. movdqa %xmm4,32(%ebx)
  614. pshufb (%eax),%xmm7
  615. movdqa %xmm2,-48(%ebx)
  616. paddd %xmm7,%xmm5
  617. movdqa (%ebx),%xmm4
  618. pxor %xmm5,%xmm3
  619. movdqa -16(%ebx),%xmm2
  620. movdqa %xmm3,%xmm0
  621. pslld $12,%xmm3
  622. psrld $20,%xmm0
  623. por %xmm0,%xmm3
  624. movdqa -96(%ebx),%xmm0
  625. paddd %xmm3,%xmm1
  626. movdqa 80(%ebx),%xmm6
  627. pxor %xmm1,%xmm7
  628. movdqa %xmm1,-112(%ebx)
  629. pshufb 16(%eax),%xmm7
  630. paddd %xmm7,%xmm5
  631. movdqa %xmm7,64(%ebx)
  632. pxor %xmm5,%xmm3
  633. paddd %xmm2,%xmm0
  634. movdqa %xmm3,%xmm1
  635. pslld $7,%xmm3
  636. psrld $25,%xmm1
  637. pxor %xmm0,%xmm6
  638. por %xmm1,%xmm3
  639. movdqa %xmm5,48(%ebx)
  640. pshufb (%eax),%xmm6
  641. movdqa %xmm3,-32(%ebx)
  642. paddd %xmm6,%xmm4
  643. movdqa 16(%ebx),%xmm5
  644. pxor %xmm4,%xmm2
  645. movdqa -64(%ebx),%xmm3
  646. movdqa %xmm2,%xmm1
  647. pslld $12,%xmm2
  648. psrld $20,%xmm1
  649. por %xmm1,%xmm2
  650. movdqa -80(%ebx),%xmm1
  651. paddd %xmm2,%xmm0
  652. movdqa 96(%ebx),%xmm7
  653. pxor %xmm0,%xmm6
  654. movdqa %xmm0,-96(%ebx)
  655. pshufb 16(%eax),%xmm6
  656. paddd %xmm6,%xmm4
  657. movdqa %xmm6,80(%ebx)
  658. pxor %xmm4,%xmm2
  659. paddd %xmm3,%xmm1
  660. movdqa %xmm2,%xmm0
  661. pslld $7,%xmm2
  662. psrld $25,%xmm0
  663. pxor %xmm1,%xmm7
  664. por %xmm0,%xmm2
  665. pshufb (%eax),%xmm7
  666. movdqa %xmm2,-16(%ebx)
  667. paddd %xmm7,%xmm5
  668. pxor %xmm5,%xmm3
  669. movdqa %xmm3,%xmm0
  670. pslld $12,%xmm3
  671. psrld $20,%xmm0
  672. por %xmm0,%xmm3
  673. movdqa -128(%ebx),%xmm0
  674. paddd %xmm3,%xmm1
  675. movdqa 64(%ebx),%xmm6
  676. pxor %xmm1,%xmm7
  677. movdqa %xmm1,-80(%ebx)
  678. pshufb 16(%eax),%xmm7
  679. paddd %xmm7,%xmm5
  680. movdqa %xmm7,96(%ebx)
  681. pxor %xmm5,%xmm3
  682. movdqa %xmm3,%xmm1
  683. pslld $7,%xmm3
  684. psrld $25,%xmm1
  685. por %xmm1,%xmm3
  686. decl %edx
  687. jnz .L010loop
  688. movdqa %xmm3,-64(%ebx)
  689. movdqa %xmm4,(%ebx)
  690. movdqa %xmm5,16(%ebx)
  691. movdqa %xmm6,64(%ebx)
  692. movdqa %xmm7,96(%ebx)
  693. movdqa -112(%ebx),%xmm1
  694. movdqa -96(%ebx),%xmm2
  695. movdqa -80(%ebx),%xmm3
  696. paddd -128(%ebp),%xmm0
  697. paddd -112(%ebp),%xmm1
  698. paddd -96(%ebp),%xmm2
  699. paddd -80(%ebp),%xmm3
  700. movdqa %xmm0,%xmm6
  701. punpckldq %xmm1,%xmm0
  702. movdqa %xmm2,%xmm7
  703. punpckldq %xmm3,%xmm2
  704. punpckhdq %xmm1,%xmm6
  705. punpckhdq %xmm3,%xmm7
  706. movdqa %xmm0,%xmm1
  707. punpcklqdq %xmm2,%xmm0
  708. movdqa %xmm6,%xmm3
  709. punpcklqdq %xmm7,%xmm6
  710. punpckhqdq %xmm2,%xmm1
  711. punpckhqdq %xmm7,%xmm3
  712. movdqu -128(%esi),%xmm4
  713. movdqu -64(%esi),%xmm5
  714. movdqu (%esi),%xmm2
  715. movdqu 64(%esi),%xmm7
  716. leal 16(%esi),%esi
  717. pxor %xmm0,%xmm4
  718. movdqa -64(%ebx),%xmm0
  719. pxor %xmm1,%xmm5
  720. movdqa -48(%ebx),%xmm1
  721. pxor %xmm2,%xmm6
  722. movdqa -32(%ebx),%xmm2
  723. pxor %xmm3,%xmm7
  724. movdqa -16(%ebx),%xmm3
  725. movdqu %xmm4,-128(%edi)
  726. movdqu %xmm5,-64(%edi)
  727. movdqu %xmm6,(%edi)
  728. movdqu %xmm7,64(%edi)
  729. leal 16(%edi),%edi
  730. paddd -64(%ebp),%xmm0
  731. paddd -48(%ebp),%xmm1
  732. paddd -32(%ebp),%xmm2
  733. paddd -16(%ebp),%xmm3
  734. movdqa %xmm0,%xmm6
  735. punpckldq %xmm1,%xmm0
  736. movdqa %xmm2,%xmm7
  737. punpckldq %xmm3,%xmm2
  738. punpckhdq %xmm1,%xmm6
  739. punpckhdq %xmm3,%xmm7
  740. movdqa %xmm0,%xmm1
  741. punpcklqdq %xmm2,%xmm0
  742. movdqa %xmm6,%xmm3
  743. punpcklqdq %xmm7,%xmm6
  744. punpckhqdq %xmm2,%xmm1
  745. punpckhqdq %xmm7,%xmm3
  746. movdqu -128(%esi),%xmm4
  747. movdqu -64(%esi),%xmm5
  748. movdqu (%esi),%xmm2
  749. movdqu 64(%esi),%xmm7
  750. leal 16(%esi),%esi
  751. pxor %xmm0,%xmm4
  752. movdqa (%ebx),%xmm0
  753. pxor %xmm1,%xmm5
  754. movdqa 16(%ebx),%xmm1
  755. pxor %xmm2,%xmm6
  756. movdqa 32(%ebx),%xmm2
  757. pxor %xmm3,%xmm7
  758. movdqa 48(%ebx),%xmm3
  759. movdqu %xmm4,-128(%edi)
  760. movdqu %xmm5,-64(%edi)
  761. movdqu %xmm6,(%edi)
  762. movdqu %xmm7,64(%edi)
  763. leal 16(%edi),%edi
  764. paddd (%ebp),%xmm0
  765. paddd 16(%ebp),%xmm1
  766. paddd 32(%ebp),%xmm2
  767. paddd 48(%ebp),%xmm3
  768. movdqa %xmm0,%xmm6
  769. punpckldq %xmm1,%xmm0
  770. movdqa %xmm2,%xmm7
  771. punpckldq %xmm3,%xmm2
  772. punpckhdq %xmm1,%xmm6
  773. punpckhdq %xmm3,%xmm7
  774. movdqa %xmm0,%xmm1
  775. punpcklqdq %xmm2,%xmm0
  776. movdqa %xmm6,%xmm3
  777. punpcklqdq %xmm7,%xmm6
  778. punpckhqdq %xmm2,%xmm1
  779. punpckhqdq %xmm7,%xmm3
  780. movdqu -128(%esi),%xmm4
  781. movdqu -64(%esi),%xmm5
  782. movdqu (%esi),%xmm2
  783. movdqu 64(%esi),%xmm7
  784. leal 16(%esi),%esi
  785. pxor %xmm0,%xmm4
  786. movdqa 64(%ebx),%xmm0
  787. pxor %xmm1,%xmm5
  788. movdqa 80(%ebx),%xmm1
  789. pxor %xmm2,%xmm6
  790. movdqa 96(%ebx),%xmm2
  791. pxor %xmm3,%xmm7
  792. movdqa 112(%ebx),%xmm3
  793. movdqu %xmm4,-128(%edi)
  794. movdqu %xmm5,-64(%edi)
  795. movdqu %xmm6,(%edi)
  796. movdqu %xmm7,64(%edi)
  797. leal 16(%edi),%edi
  798. paddd 64(%ebp),%xmm0
  799. paddd 80(%ebp),%xmm1
  800. paddd 96(%ebp),%xmm2
  801. paddd 112(%ebp),%xmm3
  802. movdqa %xmm0,%xmm6
  803. punpckldq %xmm1,%xmm0
  804. movdqa %xmm2,%xmm7
  805. punpckldq %xmm3,%xmm2
  806. punpckhdq %xmm1,%xmm6
  807. punpckhdq %xmm3,%xmm7
  808. movdqa %xmm0,%xmm1
  809. punpcklqdq %xmm2,%xmm0
  810. movdqa %xmm6,%xmm3
  811. punpcklqdq %xmm7,%xmm6
  812. punpckhqdq %xmm2,%xmm1
  813. punpckhqdq %xmm7,%xmm3
  814. movdqu -128(%esi),%xmm4
  815. movdqu -64(%esi),%xmm5
  816. movdqu (%esi),%xmm2
  817. movdqu 64(%esi),%xmm7
  818. leal 208(%esi),%esi
  819. pxor %xmm0,%xmm4
  820. pxor %xmm1,%xmm5
  821. pxor %xmm2,%xmm6
  822. pxor %xmm3,%xmm7
  823. movdqu %xmm4,-128(%edi)
  824. movdqu %xmm5,-64(%edi)
  825. movdqu %xmm6,(%edi)
  826. movdqu %xmm7,64(%edi)
  827. leal 208(%edi),%edi
  828. subl $256,%ecx
  829. jnc .L009outer_loop
  830. addl $256,%ecx
  831. jz .L011done
  832. movl 520(%esp),%ebx
  833. leal -128(%esi),%esi
  834. movl 516(%esp),%edx
  835. leal -128(%edi),%edi
  836. movd 64(%ebp),%xmm2
  837. movdqu (%ebx),%xmm3
  838. paddd 96(%eax),%xmm2
  839. pand 112(%eax),%xmm3
  840. por %xmm2,%xmm3
  841. .L0081x:
  842. movdqa 32(%eax),%xmm0
  843. movdqu (%edx),%xmm1
  844. movdqu 16(%edx),%xmm2
  845. movdqa (%eax),%xmm6
  846. movdqa 16(%eax),%xmm7
  847. movl %ebp,48(%esp)
  848. movdqa %xmm0,(%esp)
  849. movdqa %xmm1,16(%esp)
  850. movdqa %xmm2,32(%esp)
  851. movdqa %xmm3,48(%esp)
  852. movl $10,%edx
  853. jmp .L012loop1x
  854. .align 16
  855. .L013outer1x:
  856. movdqa 80(%eax),%xmm3
  857. movdqa (%esp),%xmm0
  858. movdqa 16(%esp),%xmm1
  859. movdqa 32(%esp),%xmm2
  860. paddd 48(%esp),%xmm3
  861. movl $10,%edx
  862. movdqa %xmm3,48(%esp)
  863. jmp .L012loop1x
  864. .align 16
  865. .L012loop1x:
  866. paddd %xmm1,%xmm0
  867. pxor %xmm0,%xmm3
  868. .byte 102,15,56,0,222
  869. paddd %xmm3,%xmm2
  870. pxor %xmm2,%xmm1
  871. movdqa %xmm1,%xmm4
  872. psrld $20,%xmm1
  873. pslld $12,%xmm4
  874. por %xmm4,%xmm1
  875. paddd %xmm1,%xmm0
  876. pxor %xmm0,%xmm3
  877. .byte 102,15,56,0,223
  878. paddd %xmm3,%xmm2
  879. pxor %xmm2,%xmm1
  880. movdqa %xmm1,%xmm4
  881. psrld $25,%xmm1
  882. pslld $7,%xmm4
  883. por %xmm4,%xmm1
  884. pshufd $78,%xmm2,%xmm2
  885. pshufd $57,%xmm1,%xmm1
  886. pshufd $147,%xmm3,%xmm3
  887. nop
  888. paddd %xmm1,%xmm0
  889. pxor %xmm0,%xmm3
  890. .byte 102,15,56,0,222
  891. paddd %xmm3,%xmm2
  892. pxor %xmm2,%xmm1
  893. movdqa %xmm1,%xmm4
  894. psrld $20,%xmm1
  895. pslld $12,%xmm4
  896. por %xmm4,%xmm1
  897. paddd %xmm1,%xmm0
  898. pxor %xmm0,%xmm3
  899. .byte 102,15,56,0,223
  900. paddd %xmm3,%xmm2
  901. pxor %xmm2,%xmm1
  902. movdqa %xmm1,%xmm4
  903. psrld $25,%xmm1
  904. pslld $7,%xmm4
  905. por %xmm4,%xmm1
  906. pshufd $78,%xmm2,%xmm2
  907. pshufd $147,%xmm1,%xmm1
  908. pshufd $57,%xmm3,%xmm3
  909. decl %edx
  910. jnz .L012loop1x
  911. paddd (%esp),%xmm0
  912. paddd 16(%esp),%xmm1
  913. paddd 32(%esp),%xmm2
  914. paddd 48(%esp),%xmm3
  915. cmpl $64,%ecx
  916. jb .L014tail
  917. movdqu (%esi),%xmm4
  918. movdqu 16(%esi),%xmm5
  919. pxor %xmm4,%xmm0
  920. movdqu 32(%esi),%xmm4
  921. pxor %xmm5,%xmm1
  922. movdqu 48(%esi),%xmm5
  923. pxor %xmm4,%xmm2
  924. pxor %xmm5,%xmm3
  925. leal 64(%esi),%esi
  926. movdqu %xmm0,(%edi)
  927. movdqu %xmm1,16(%edi)
  928. movdqu %xmm2,32(%edi)
  929. movdqu %xmm3,48(%edi)
  930. leal 64(%edi),%edi
  931. subl $64,%ecx
  932. jnz .L013outer1x
  933. jmp .L011done
  934. .L014tail:
  935. movdqa %xmm0,(%esp)
  936. movdqa %xmm1,16(%esp)
  937. movdqa %xmm2,32(%esp)
  938. movdqa %xmm3,48(%esp)
  939. xorl %eax,%eax
  940. xorl %edx,%edx
  941. xorl %ebp,%ebp
  942. .L015tail_loop:
  943. movb (%esp,%ebp,1),%al
  944. movb (%esi,%ebp,1),%dl
  945. leal 1(%ebp),%ebp
  946. xorb %dl,%al
  947. movb %al,-1(%edi,%ebp,1)
  948. decl %ecx
  949. jnz .L015tail_loop
  950. .L011done:
  951. movl 512(%esp),%esp
  952. popl %edi
  953. popl %esi
  954. popl %ebx
  955. popl %ebp
  956. ret
  957. .size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
  958. .align 64
  959. .Lssse3_data:
  960. .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
  961. .byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
  962. .long 1634760805,857760878,2036477234,1797285236
  963. .long 0,1,2,3
  964. .long 4,4,4,4
  965. .long 1,0,0,0
  966. .long 4,0,0,0
  967. .long 0,-1,-1,-1
  968. .align 64
  969. .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
  970. .byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
  971. .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
  972. .byte 114,103,62,0
  973. #endif
  974. .section .note.GNU-stack,"",@progbits