ghash-x86.S 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__i386__)
  4. #if defined(BORINGSSL_PREFIX)
  5. #include <boringssl_prefix_symbols_asm.h>
  6. #endif
  7. .text
  8. .globl gcm_init_clmul
  9. .hidden gcm_init_clmul
  10. .type gcm_init_clmul,@function
  11. .align 16
  12. gcm_init_clmul:
  13. .L_gcm_init_clmul_begin:
  14. movl 4(%esp),%edx
  15. movl 8(%esp),%eax
  16. call .L000pic
  17. .L000pic:
  18. popl %ecx
  19. leal .Lbswap-.L000pic(%ecx),%ecx
  20. movdqu (%eax),%xmm2
  21. pshufd $78,%xmm2,%xmm2
  22. pshufd $255,%xmm2,%xmm4
  23. movdqa %xmm2,%xmm3
  24. psllq $1,%xmm2
  25. pxor %xmm5,%xmm5
  26. psrlq $63,%xmm3
  27. pcmpgtd %xmm4,%xmm5
  28. pslldq $8,%xmm3
  29. por %xmm3,%xmm2
  30. pand 16(%ecx),%xmm5
  31. pxor %xmm5,%xmm2
  32. movdqa %xmm2,%xmm0
  33. movdqa %xmm0,%xmm1
  34. pshufd $78,%xmm0,%xmm3
  35. pshufd $78,%xmm2,%xmm4
  36. pxor %xmm0,%xmm3
  37. pxor %xmm2,%xmm4
  38. .byte 102,15,58,68,194,0
  39. .byte 102,15,58,68,202,17
  40. .byte 102,15,58,68,220,0
  41. xorps %xmm0,%xmm3
  42. xorps %xmm1,%xmm3
  43. movdqa %xmm3,%xmm4
  44. psrldq $8,%xmm3
  45. pslldq $8,%xmm4
  46. pxor %xmm3,%xmm1
  47. pxor %xmm4,%xmm0
  48. movdqa %xmm0,%xmm4
  49. movdqa %xmm0,%xmm3
  50. psllq $5,%xmm0
  51. pxor %xmm0,%xmm3
  52. psllq $1,%xmm0
  53. pxor %xmm3,%xmm0
  54. psllq $57,%xmm0
  55. movdqa %xmm0,%xmm3
  56. pslldq $8,%xmm0
  57. psrldq $8,%xmm3
  58. pxor %xmm4,%xmm0
  59. pxor %xmm3,%xmm1
  60. movdqa %xmm0,%xmm4
  61. psrlq $1,%xmm0
  62. pxor %xmm4,%xmm1
  63. pxor %xmm0,%xmm4
  64. psrlq $5,%xmm0
  65. pxor %xmm4,%xmm0
  66. psrlq $1,%xmm0
  67. pxor %xmm1,%xmm0
  68. pshufd $78,%xmm2,%xmm3
  69. pshufd $78,%xmm0,%xmm4
  70. pxor %xmm2,%xmm3
  71. movdqu %xmm2,(%edx)
  72. pxor %xmm0,%xmm4
  73. movdqu %xmm0,16(%edx)
  74. .byte 102,15,58,15,227,8
  75. movdqu %xmm4,32(%edx)
  76. ret
  77. .size gcm_init_clmul,.-.L_gcm_init_clmul_begin
  78. .globl gcm_gmult_clmul
  79. .hidden gcm_gmult_clmul
  80. .type gcm_gmult_clmul,@function
  81. .align 16
  82. gcm_gmult_clmul:
  83. .L_gcm_gmult_clmul_begin:
  84. movl 4(%esp),%eax
  85. movl 8(%esp),%edx
  86. call .L001pic
  87. .L001pic:
  88. popl %ecx
  89. leal .Lbswap-.L001pic(%ecx),%ecx
  90. movdqu (%eax),%xmm0
  91. movdqa (%ecx),%xmm5
  92. movups (%edx),%xmm2
  93. .byte 102,15,56,0,197
  94. movups 32(%edx),%xmm4
  95. movdqa %xmm0,%xmm1
  96. pshufd $78,%xmm0,%xmm3
  97. pxor %xmm0,%xmm3
  98. .byte 102,15,58,68,194,0
  99. .byte 102,15,58,68,202,17
  100. .byte 102,15,58,68,220,0
  101. xorps %xmm0,%xmm3
  102. xorps %xmm1,%xmm3
  103. movdqa %xmm3,%xmm4
  104. psrldq $8,%xmm3
  105. pslldq $8,%xmm4
  106. pxor %xmm3,%xmm1
  107. pxor %xmm4,%xmm0
  108. movdqa %xmm0,%xmm4
  109. movdqa %xmm0,%xmm3
  110. psllq $5,%xmm0
  111. pxor %xmm0,%xmm3
  112. psllq $1,%xmm0
  113. pxor %xmm3,%xmm0
  114. psllq $57,%xmm0
  115. movdqa %xmm0,%xmm3
  116. pslldq $8,%xmm0
  117. psrldq $8,%xmm3
  118. pxor %xmm4,%xmm0
  119. pxor %xmm3,%xmm1
  120. movdqa %xmm0,%xmm4
  121. psrlq $1,%xmm0
  122. pxor %xmm4,%xmm1
  123. pxor %xmm0,%xmm4
  124. psrlq $5,%xmm0
  125. pxor %xmm4,%xmm0
  126. psrlq $1,%xmm0
  127. pxor %xmm1,%xmm0
  128. .byte 102,15,56,0,197
  129. movdqu %xmm0,(%eax)
  130. ret
  131. .size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
  132. .globl gcm_ghash_clmul
  133. .hidden gcm_ghash_clmul
  134. .type gcm_ghash_clmul,@function
  135. .align 16
  136. gcm_ghash_clmul:
  137. .L_gcm_ghash_clmul_begin:
  138. pushl %ebp
  139. pushl %ebx
  140. pushl %esi
  141. pushl %edi
  142. movl 20(%esp),%eax
  143. movl 24(%esp),%edx
  144. movl 28(%esp),%esi
  145. movl 32(%esp),%ebx
  146. call .L002pic
  147. .L002pic:
  148. popl %ecx
  149. leal .Lbswap-.L002pic(%ecx),%ecx
  150. movdqu (%eax),%xmm0
  151. movdqa (%ecx),%xmm5
  152. movdqu (%edx),%xmm2
  153. .byte 102,15,56,0,197
  154. subl $16,%ebx
  155. jz .L003odd_tail
  156. movdqu (%esi),%xmm3
  157. movdqu 16(%esi),%xmm6
  158. .byte 102,15,56,0,221
  159. .byte 102,15,56,0,245
  160. movdqu 32(%edx),%xmm5
  161. pxor %xmm3,%xmm0
  162. pshufd $78,%xmm6,%xmm3
  163. movdqa %xmm6,%xmm7
  164. pxor %xmm6,%xmm3
  165. leal 32(%esi),%esi
  166. .byte 102,15,58,68,242,0
  167. .byte 102,15,58,68,250,17
  168. .byte 102,15,58,68,221,0
  169. movups 16(%edx),%xmm2
  170. nop
  171. subl $32,%ebx
  172. jbe .L004even_tail
  173. jmp .L005mod_loop
  174. .align 32
  175. .L005mod_loop:
  176. pshufd $78,%xmm0,%xmm4
  177. movdqa %xmm0,%xmm1
  178. pxor %xmm0,%xmm4
  179. nop
  180. .byte 102,15,58,68,194,0
  181. .byte 102,15,58,68,202,17
  182. .byte 102,15,58,68,229,16
  183. movups (%edx),%xmm2
  184. xorps %xmm6,%xmm0
  185. movdqa (%ecx),%xmm5
  186. xorps %xmm7,%xmm1
  187. movdqu (%esi),%xmm7
  188. pxor %xmm0,%xmm3
  189. movdqu 16(%esi),%xmm6
  190. pxor %xmm1,%xmm3
  191. .byte 102,15,56,0,253
  192. pxor %xmm3,%xmm4
  193. movdqa %xmm4,%xmm3
  194. psrldq $8,%xmm4
  195. pslldq $8,%xmm3
  196. pxor %xmm4,%xmm1
  197. pxor %xmm3,%xmm0
  198. .byte 102,15,56,0,245
  199. pxor %xmm7,%xmm1
  200. movdqa %xmm6,%xmm7
  201. movdqa %xmm0,%xmm4
  202. movdqa %xmm0,%xmm3
  203. psllq $5,%xmm0
  204. pxor %xmm0,%xmm3
  205. psllq $1,%xmm0
  206. pxor %xmm3,%xmm0
  207. .byte 102,15,58,68,242,0
  208. movups 32(%edx),%xmm5
  209. psllq $57,%xmm0
  210. movdqa %xmm0,%xmm3
  211. pslldq $8,%xmm0
  212. psrldq $8,%xmm3
  213. pxor %xmm4,%xmm0
  214. pxor %xmm3,%xmm1
  215. pshufd $78,%xmm7,%xmm3
  216. movdqa %xmm0,%xmm4
  217. psrlq $1,%xmm0
  218. pxor %xmm7,%xmm3
  219. pxor %xmm4,%xmm1
  220. .byte 102,15,58,68,250,17
  221. movups 16(%edx),%xmm2
  222. pxor %xmm0,%xmm4
  223. psrlq $5,%xmm0
  224. pxor %xmm4,%xmm0
  225. psrlq $1,%xmm0
  226. pxor %xmm1,%xmm0
  227. .byte 102,15,58,68,221,0
  228. leal 32(%esi),%esi
  229. subl $32,%ebx
  230. ja .L005mod_loop
  231. .L004even_tail:
  232. pshufd $78,%xmm0,%xmm4
  233. movdqa %xmm0,%xmm1
  234. pxor %xmm0,%xmm4
  235. .byte 102,15,58,68,194,0
  236. .byte 102,15,58,68,202,17
  237. .byte 102,15,58,68,229,16
  238. movdqa (%ecx),%xmm5
  239. xorps %xmm6,%xmm0
  240. xorps %xmm7,%xmm1
  241. pxor %xmm0,%xmm3
  242. pxor %xmm1,%xmm3
  243. pxor %xmm3,%xmm4
  244. movdqa %xmm4,%xmm3
  245. psrldq $8,%xmm4
  246. pslldq $8,%xmm3
  247. pxor %xmm4,%xmm1
  248. pxor %xmm3,%xmm0
  249. movdqa %xmm0,%xmm4
  250. movdqa %xmm0,%xmm3
  251. psllq $5,%xmm0
  252. pxor %xmm0,%xmm3
  253. psllq $1,%xmm0
  254. pxor %xmm3,%xmm0
  255. psllq $57,%xmm0
  256. movdqa %xmm0,%xmm3
  257. pslldq $8,%xmm0
  258. psrldq $8,%xmm3
  259. pxor %xmm4,%xmm0
  260. pxor %xmm3,%xmm1
  261. movdqa %xmm0,%xmm4
  262. psrlq $1,%xmm0
  263. pxor %xmm4,%xmm1
  264. pxor %xmm0,%xmm4
  265. psrlq $5,%xmm0
  266. pxor %xmm4,%xmm0
  267. psrlq $1,%xmm0
  268. pxor %xmm1,%xmm0
  269. testl %ebx,%ebx
  270. jnz .L006done
  271. movups (%edx),%xmm2
  272. .L003odd_tail:
  273. movdqu (%esi),%xmm3
  274. .byte 102,15,56,0,221
  275. pxor %xmm3,%xmm0
  276. movdqa %xmm0,%xmm1
  277. pshufd $78,%xmm0,%xmm3
  278. pshufd $78,%xmm2,%xmm4
  279. pxor %xmm0,%xmm3
  280. pxor %xmm2,%xmm4
  281. .byte 102,15,58,68,194,0
  282. .byte 102,15,58,68,202,17
  283. .byte 102,15,58,68,220,0
  284. xorps %xmm0,%xmm3
  285. xorps %xmm1,%xmm3
  286. movdqa %xmm3,%xmm4
  287. psrldq $8,%xmm3
  288. pslldq $8,%xmm4
  289. pxor %xmm3,%xmm1
  290. pxor %xmm4,%xmm0
  291. movdqa %xmm0,%xmm4
  292. movdqa %xmm0,%xmm3
  293. psllq $5,%xmm0
  294. pxor %xmm0,%xmm3
  295. psllq $1,%xmm0
  296. pxor %xmm3,%xmm0
  297. psllq $57,%xmm0
  298. movdqa %xmm0,%xmm3
  299. pslldq $8,%xmm0
  300. psrldq $8,%xmm3
  301. pxor %xmm4,%xmm0
  302. pxor %xmm3,%xmm1
  303. movdqa %xmm0,%xmm4
  304. psrlq $1,%xmm0
  305. pxor %xmm4,%xmm1
  306. pxor %xmm0,%xmm4
  307. psrlq $5,%xmm0
  308. pxor %xmm4,%xmm0
  309. psrlq $1,%xmm0
  310. pxor %xmm1,%xmm0
  311. .L006done:
  312. .byte 102,15,56,0,197
  313. movdqu %xmm0,(%eax)
  314. popl %edi
  315. popl %esi
  316. popl %ebx
  317. popl %ebp
  318. ret
  319. .size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
  320. .align 64
  321. .Lbswap:
  322. .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  323. .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
  324. .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
  325. .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
  326. .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
  327. .byte 0
  328. #endif
  329. .section .note.GNU-stack,"",@progbits