ghash-x86.S 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__i386__)
  4. #if defined(BORINGSSL_PREFIX)
  5. #include <boringssl_prefix_symbols_asm.h>
  6. #endif
  7. .text
  8. .globl _gcm_init_clmul
  9. .private_extern _gcm_init_clmul
  10. .align 4
  11. _gcm_init_clmul:
  12. L_gcm_init_clmul_begin:
  13. movl 4(%esp),%edx
  14. movl 8(%esp),%eax
  15. call L000pic
  16. L000pic:
  17. popl %ecx
  18. leal Lbswap-L000pic(%ecx),%ecx
  19. movdqu (%eax),%xmm2
  20. pshufd $78,%xmm2,%xmm2
  21. pshufd $255,%xmm2,%xmm4
  22. movdqa %xmm2,%xmm3
  23. psllq $1,%xmm2
  24. pxor %xmm5,%xmm5
  25. psrlq $63,%xmm3
  26. pcmpgtd %xmm4,%xmm5
  27. pslldq $8,%xmm3
  28. por %xmm3,%xmm2
  29. pand 16(%ecx),%xmm5
  30. pxor %xmm5,%xmm2
  31. movdqa %xmm2,%xmm0
  32. movdqa %xmm0,%xmm1
  33. pshufd $78,%xmm0,%xmm3
  34. pshufd $78,%xmm2,%xmm4
  35. pxor %xmm0,%xmm3
  36. pxor %xmm2,%xmm4
  37. .byte 102,15,58,68,194,0
  38. .byte 102,15,58,68,202,17
  39. .byte 102,15,58,68,220,0
  40. xorps %xmm0,%xmm3
  41. xorps %xmm1,%xmm3
  42. movdqa %xmm3,%xmm4
  43. psrldq $8,%xmm3
  44. pslldq $8,%xmm4
  45. pxor %xmm3,%xmm1
  46. pxor %xmm4,%xmm0
  47. movdqa %xmm0,%xmm4
  48. movdqa %xmm0,%xmm3
  49. psllq $5,%xmm0
  50. pxor %xmm0,%xmm3
  51. psllq $1,%xmm0
  52. pxor %xmm3,%xmm0
  53. psllq $57,%xmm0
  54. movdqa %xmm0,%xmm3
  55. pslldq $8,%xmm0
  56. psrldq $8,%xmm3
  57. pxor %xmm4,%xmm0
  58. pxor %xmm3,%xmm1
  59. movdqa %xmm0,%xmm4
  60. psrlq $1,%xmm0
  61. pxor %xmm4,%xmm1
  62. pxor %xmm0,%xmm4
  63. psrlq $5,%xmm0
  64. pxor %xmm4,%xmm0
  65. psrlq $1,%xmm0
  66. pxor %xmm1,%xmm0
  67. pshufd $78,%xmm2,%xmm3
  68. pshufd $78,%xmm0,%xmm4
  69. pxor %xmm2,%xmm3
  70. movdqu %xmm2,(%edx)
  71. pxor %xmm0,%xmm4
  72. movdqu %xmm0,16(%edx)
  73. .byte 102,15,58,15,227,8
  74. movdqu %xmm4,32(%edx)
  75. ret
  76. .globl _gcm_gmult_clmul
  77. .private_extern _gcm_gmult_clmul
  78. .align 4
  79. _gcm_gmult_clmul:
  80. L_gcm_gmult_clmul_begin:
  81. movl 4(%esp),%eax
  82. movl 8(%esp),%edx
  83. call L001pic
  84. L001pic:
  85. popl %ecx
  86. leal Lbswap-L001pic(%ecx),%ecx
  87. movdqu (%eax),%xmm0
  88. movdqa (%ecx),%xmm5
  89. movups (%edx),%xmm2
  90. .byte 102,15,56,0,197
  91. movups 32(%edx),%xmm4
  92. movdqa %xmm0,%xmm1
  93. pshufd $78,%xmm0,%xmm3
  94. pxor %xmm0,%xmm3
  95. .byte 102,15,58,68,194,0
  96. .byte 102,15,58,68,202,17
  97. .byte 102,15,58,68,220,0
  98. xorps %xmm0,%xmm3
  99. xorps %xmm1,%xmm3
  100. movdqa %xmm3,%xmm4
  101. psrldq $8,%xmm3
  102. pslldq $8,%xmm4
  103. pxor %xmm3,%xmm1
  104. pxor %xmm4,%xmm0
  105. movdqa %xmm0,%xmm4
  106. movdqa %xmm0,%xmm3
  107. psllq $5,%xmm0
  108. pxor %xmm0,%xmm3
  109. psllq $1,%xmm0
  110. pxor %xmm3,%xmm0
  111. psllq $57,%xmm0
  112. movdqa %xmm0,%xmm3
  113. pslldq $8,%xmm0
  114. psrldq $8,%xmm3
  115. pxor %xmm4,%xmm0
  116. pxor %xmm3,%xmm1
  117. movdqa %xmm0,%xmm4
  118. psrlq $1,%xmm0
  119. pxor %xmm4,%xmm1
  120. pxor %xmm0,%xmm4
  121. psrlq $5,%xmm0
  122. pxor %xmm4,%xmm0
  123. psrlq $1,%xmm0
  124. pxor %xmm1,%xmm0
  125. .byte 102,15,56,0,197
  126. movdqu %xmm0,(%eax)
  127. ret
  128. .globl _gcm_ghash_clmul
  129. .private_extern _gcm_ghash_clmul
  130. .align 4
  131. _gcm_ghash_clmul:
  132. L_gcm_ghash_clmul_begin:
  133. pushl %ebp
  134. pushl %ebx
  135. pushl %esi
  136. pushl %edi
  137. movl 20(%esp),%eax
  138. movl 24(%esp),%edx
  139. movl 28(%esp),%esi
  140. movl 32(%esp),%ebx
  141. call L002pic
  142. L002pic:
  143. popl %ecx
  144. leal Lbswap-L002pic(%ecx),%ecx
  145. movdqu (%eax),%xmm0
  146. movdqa (%ecx),%xmm5
  147. movdqu (%edx),%xmm2
  148. .byte 102,15,56,0,197
  149. subl $16,%ebx
  150. jz L003odd_tail
  151. movdqu (%esi),%xmm3
  152. movdqu 16(%esi),%xmm6
  153. .byte 102,15,56,0,221
  154. .byte 102,15,56,0,245
  155. movdqu 32(%edx),%xmm5
  156. pxor %xmm3,%xmm0
  157. pshufd $78,%xmm6,%xmm3
  158. movdqa %xmm6,%xmm7
  159. pxor %xmm6,%xmm3
  160. leal 32(%esi),%esi
  161. .byte 102,15,58,68,242,0
  162. .byte 102,15,58,68,250,17
  163. .byte 102,15,58,68,221,0
  164. movups 16(%edx),%xmm2
  165. nop
  166. subl $32,%ebx
  167. jbe L004even_tail
  168. jmp L005mod_loop
  169. .align 5,0x90
  170. L005mod_loop:
  171. pshufd $78,%xmm0,%xmm4
  172. movdqa %xmm0,%xmm1
  173. pxor %xmm0,%xmm4
  174. nop
  175. .byte 102,15,58,68,194,0
  176. .byte 102,15,58,68,202,17
  177. .byte 102,15,58,68,229,16
  178. movups (%edx),%xmm2
  179. xorps %xmm6,%xmm0
  180. movdqa (%ecx),%xmm5
  181. xorps %xmm7,%xmm1
  182. movdqu (%esi),%xmm7
  183. pxor %xmm0,%xmm3
  184. movdqu 16(%esi),%xmm6
  185. pxor %xmm1,%xmm3
  186. .byte 102,15,56,0,253
  187. pxor %xmm3,%xmm4
  188. movdqa %xmm4,%xmm3
  189. psrldq $8,%xmm4
  190. pslldq $8,%xmm3
  191. pxor %xmm4,%xmm1
  192. pxor %xmm3,%xmm0
  193. .byte 102,15,56,0,245
  194. pxor %xmm7,%xmm1
  195. movdqa %xmm6,%xmm7
  196. movdqa %xmm0,%xmm4
  197. movdqa %xmm0,%xmm3
  198. psllq $5,%xmm0
  199. pxor %xmm0,%xmm3
  200. psllq $1,%xmm0
  201. pxor %xmm3,%xmm0
  202. .byte 102,15,58,68,242,0
  203. movups 32(%edx),%xmm5
  204. psllq $57,%xmm0
  205. movdqa %xmm0,%xmm3
  206. pslldq $8,%xmm0
  207. psrldq $8,%xmm3
  208. pxor %xmm4,%xmm0
  209. pxor %xmm3,%xmm1
  210. pshufd $78,%xmm7,%xmm3
  211. movdqa %xmm0,%xmm4
  212. psrlq $1,%xmm0
  213. pxor %xmm7,%xmm3
  214. pxor %xmm4,%xmm1
  215. .byte 102,15,58,68,250,17
  216. movups 16(%edx),%xmm2
  217. pxor %xmm0,%xmm4
  218. psrlq $5,%xmm0
  219. pxor %xmm4,%xmm0
  220. psrlq $1,%xmm0
  221. pxor %xmm1,%xmm0
  222. .byte 102,15,58,68,221,0
  223. leal 32(%esi),%esi
  224. subl $32,%ebx
  225. ja L005mod_loop
  226. L004even_tail:
  227. pshufd $78,%xmm0,%xmm4
  228. movdqa %xmm0,%xmm1
  229. pxor %xmm0,%xmm4
  230. .byte 102,15,58,68,194,0
  231. .byte 102,15,58,68,202,17
  232. .byte 102,15,58,68,229,16
  233. movdqa (%ecx),%xmm5
  234. xorps %xmm6,%xmm0
  235. xorps %xmm7,%xmm1
  236. pxor %xmm0,%xmm3
  237. pxor %xmm1,%xmm3
  238. pxor %xmm3,%xmm4
  239. movdqa %xmm4,%xmm3
  240. psrldq $8,%xmm4
  241. pslldq $8,%xmm3
  242. pxor %xmm4,%xmm1
  243. pxor %xmm3,%xmm0
  244. movdqa %xmm0,%xmm4
  245. movdqa %xmm0,%xmm3
  246. psllq $5,%xmm0
  247. pxor %xmm0,%xmm3
  248. psllq $1,%xmm0
  249. pxor %xmm3,%xmm0
  250. psllq $57,%xmm0
  251. movdqa %xmm0,%xmm3
  252. pslldq $8,%xmm0
  253. psrldq $8,%xmm3
  254. pxor %xmm4,%xmm0
  255. pxor %xmm3,%xmm1
  256. movdqa %xmm0,%xmm4
  257. psrlq $1,%xmm0
  258. pxor %xmm4,%xmm1
  259. pxor %xmm0,%xmm4
  260. psrlq $5,%xmm0
  261. pxor %xmm4,%xmm0
  262. psrlq $1,%xmm0
  263. pxor %xmm1,%xmm0
  264. testl %ebx,%ebx
  265. jnz L006done
  266. movups (%edx),%xmm2
  267. L003odd_tail:
  268. movdqu (%esi),%xmm3
  269. .byte 102,15,56,0,221
  270. pxor %xmm3,%xmm0
  271. movdqa %xmm0,%xmm1
  272. pshufd $78,%xmm0,%xmm3
  273. pshufd $78,%xmm2,%xmm4
  274. pxor %xmm0,%xmm3
  275. pxor %xmm2,%xmm4
  276. .byte 102,15,58,68,194,0
  277. .byte 102,15,58,68,202,17
  278. .byte 102,15,58,68,220,0
  279. xorps %xmm0,%xmm3
  280. xorps %xmm1,%xmm3
  281. movdqa %xmm3,%xmm4
  282. psrldq $8,%xmm3
  283. pslldq $8,%xmm4
  284. pxor %xmm3,%xmm1
  285. pxor %xmm4,%xmm0
  286. movdqa %xmm0,%xmm4
  287. movdqa %xmm0,%xmm3
  288. psllq $5,%xmm0
  289. pxor %xmm0,%xmm3
  290. psllq $1,%xmm0
  291. pxor %xmm3,%xmm0
  292. psllq $57,%xmm0
  293. movdqa %xmm0,%xmm3
  294. pslldq $8,%xmm0
  295. psrldq $8,%xmm3
  296. pxor %xmm4,%xmm0
  297. pxor %xmm3,%xmm1
  298. movdqa %xmm0,%xmm4
  299. psrlq $1,%xmm0
  300. pxor %xmm4,%xmm1
  301. pxor %xmm0,%xmm4
  302. psrlq $5,%xmm0
  303. pxor %xmm4,%xmm0
  304. psrlq $1,%xmm0
  305. pxor %xmm1,%xmm0
  306. L006done:
  307. .byte 102,15,56,0,197
  308. movdqu %xmm0,(%eax)
  309. popl %edi
  310. popl %esi
  311. popl %ebx
  312. popl %ebp
  313. ret
  314. .align 6,0x90
  315. Lbswap:
  316. .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  317. .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
  318. .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
  319. .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
  320. .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
  321. .byte 0
  322. #endif