ghash-ssse3-x86.S 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__i386__)
  4. #if defined(BORINGSSL_PREFIX)
  5. #include <boringssl_prefix_symbols_asm.h>
  6. #endif
  7. .text
  8. .globl gcm_gmult_ssse3
  9. .hidden gcm_gmult_ssse3
  10. .type gcm_gmult_ssse3,@function
  11. .align 16
  12. gcm_gmult_ssse3:
  13. .L_gcm_gmult_ssse3_begin:
  14. pushl %ebp
  15. pushl %ebx
  16. pushl %esi
  17. pushl %edi
  18. movl 20(%esp),%edi
  19. movl 24(%esp),%esi
  20. movdqu (%edi),%xmm0
  21. call .L000pic_point
  22. .L000pic_point:
  23. popl %eax
  24. movdqa .Lreverse_bytes-.L000pic_point(%eax),%xmm7
  25. movdqa .Llow4_mask-.L000pic_point(%eax),%xmm2
  26. .byte 102,15,56,0,199
  27. movdqa %xmm2,%xmm1
  28. pandn %xmm0,%xmm1
  29. psrld $4,%xmm1
  30. pand %xmm2,%xmm0
  31. pxor %xmm2,%xmm2
  32. pxor %xmm3,%xmm3
  33. movl $5,%eax
  34. .L001loop_row_1:
  35. movdqa (%esi),%xmm4
  36. leal 16(%esi),%esi
  37. movdqa %xmm2,%xmm6
  38. .byte 102,15,58,15,243,1
  39. movdqa %xmm6,%xmm3
  40. psrldq $1,%xmm2
  41. movdqa %xmm4,%xmm5
  42. .byte 102,15,56,0,224
  43. .byte 102,15,56,0,233
  44. pxor %xmm5,%xmm2
  45. movdqa %xmm4,%xmm5
  46. psllq $60,%xmm5
  47. movdqa %xmm5,%xmm6
  48. pslldq $8,%xmm6
  49. pxor %xmm6,%xmm3
  50. psrldq $8,%xmm5
  51. pxor %xmm5,%xmm2
  52. psrlq $4,%xmm4
  53. pxor %xmm4,%xmm2
  54. subl $1,%eax
  55. jnz .L001loop_row_1
  56. pxor %xmm3,%xmm2
  57. psrlq $1,%xmm3
  58. pxor %xmm3,%xmm2
  59. psrlq $1,%xmm3
  60. pxor %xmm3,%xmm2
  61. psrlq $5,%xmm3
  62. pxor %xmm3,%xmm2
  63. pxor %xmm3,%xmm3
  64. movl $5,%eax
  65. .L002loop_row_2:
  66. movdqa (%esi),%xmm4
  67. leal 16(%esi),%esi
  68. movdqa %xmm2,%xmm6
  69. .byte 102,15,58,15,243,1
  70. movdqa %xmm6,%xmm3
  71. psrldq $1,%xmm2
  72. movdqa %xmm4,%xmm5
  73. .byte 102,15,56,0,224
  74. .byte 102,15,56,0,233
  75. pxor %xmm5,%xmm2
  76. movdqa %xmm4,%xmm5
  77. psllq $60,%xmm5
  78. movdqa %xmm5,%xmm6
  79. pslldq $8,%xmm6
  80. pxor %xmm6,%xmm3
  81. psrldq $8,%xmm5
  82. pxor %xmm5,%xmm2
  83. psrlq $4,%xmm4
  84. pxor %xmm4,%xmm2
  85. subl $1,%eax
  86. jnz .L002loop_row_2
  87. pxor %xmm3,%xmm2
  88. psrlq $1,%xmm3
  89. pxor %xmm3,%xmm2
  90. psrlq $1,%xmm3
  91. pxor %xmm3,%xmm2
  92. psrlq $5,%xmm3
  93. pxor %xmm3,%xmm2
  94. pxor %xmm3,%xmm3
  95. movl $6,%eax
  96. .L003loop_row_3:
  97. movdqa (%esi),%xmm4
  98. leal 16(%esi),%esi
  99. movdqa %xmm2,%xmm6
  100. .byte 102,15,58,15,243,1
  101. movdqa %xmm6,%xmm3
  102. psrldq $1,%xmm2
  103. movdqa %xmm4,%xmm5
  104. .byte 102,15,56,0,224
  105. .byte 102,15,56,0,233
  106. pxor %xmm5,%xmm2
  107. movdqa %xmm4,%xmm5
  108. psllq $60,%xmm5
  109. movdqa %xmm5,%xmm6
  110. pslldq $8,%xmm6
  111. pxor %xmm6,%xmm3
  112. psrldq $8,%xmm5
  113. pxor %xmm5,%xmm2
  114. psrlq $4,%xmm4
  115. pxor %xmm4,%xmm2
  116. subl $1,%eax
  117. jnz .L003loop_row_3
  118. pxor %xmm3,%xmm2
  119. psrlq $1,%xmm3
  120. pxor %xmm3,%xmm2
  121. psrlq $1,%xmm3
  122. pxor %xmm3,%xmm2
  123. psrlq $5,%xmm3
  124. pxor %xmm3,%xmm2
  125. pxor %xmm3,%xmm3
  126. .byte 102,15,56,0,215
  127. movdqu %xmm2,(%edi)
  128. pxor %xmm0,%xmm0
  129. pxor %xmm1,%xmm1
  130. pxor %xmm2,%xmm2
  131. pxor %xmm3,%xmm3
  132. pxor %xmm4,%xmm4
  133. pxor %xmm5,%xmm5
  134. pxor %xmm6,%xmm6
  135. popl %edi
  136. popl %esi
  137. popl %ebx
  138. popl %ebp
  139. ret
  140. .size gcm_gmult_ssse3,.-.L_gcm_gmult_ssse3_begin
  141. .globl gcm_ghash_ssse3
  142. .hidden gcm_ghash_ssse3
  143. .type gcm_ghash_ssse3,@function
  144. .align 16
  145. gcm_ghash_ssse3:
  146. .L_gcm_ghash_ssse3_begin:
  147. pushl %ebp
  148. pushl %ebx
  149. pushl %esi
  150. pushl %edi
  151. movl 20(%esp),%edi
  152. movl 24(%esp),%esi
  153. movl 28(%esp),%edx
  154. movl 32(%esp),%ecx
  155. movdqu (%edi),%xmm0
  156. call .L004pic_point
  157. .L004pic_point:
  158. popl %ebx
  159. movdqa .Lreverse_bytes-.L004pic_point(%ebx),%xmm7
  160. andl $-16,%ecx
  161. .byte 102,15,56,0,199
  162. pxor %xmm3,%xmm3
  163. .L005loop_ghash:
  164. movdqa .Llow4_mask-.L004pic_point(%ebx),%xmm2
  165. movdqu (%edx),%xmm1
  166. .byte 102,15,56,0,207
  167. pxor %xmm1,%xmm0
  168. movdqa %xmm2,%xmm1
  169. pandn %xmm0,%xmm1
  170. psrld $4,%xmm1
  171. pand %xmm2,%xmm0
  172. pxor %xmm2,%xmm2
  173. movl $5,%eax
  174. .L006loop_row_4:
  175. movdqa (%esi),%xmm4
  176. leal 16(%esi),%esi
  177. movdqa %xmm2,%xmm6
  178. .byte 102,15,58,15,243,1
  179. movdqa %xmm6,%xmm3
  180. psrldq $1,%xmm2
  181. movdqa %xmm4,%xmm5
  182. .byte 102,15,56,0,224
  183. .byte 102,15,56,0,233
  184. pxor %xmm5,%xmm2
  185. movdqa %xmm4,%xmm5
  186. psllq $60,%xmm5
  187. movdqa %xmm5,%xmm6
  188. pslldq $8,%xmm6
  189. pxor %xmm6,%xmm3
  190. psrldq $8,%xmm5
  191. pxor %xmm5,%xmm2
  192. psrlq $4,%xmm4
  193. pxor %xmm4,%xmm2
  194. subl $1,%eax
  195. jnz .L006loop_row_4
  196. pxor %xmm3,%xmm2
  197. psrlq $1,%xmm3
  198. pxor %xmm3,%xmm2
  199. psrlq $1,%xmm3
  200. pxor %xmm3,%xmm2
  201. psrlq $5,%xmm3
  202. pxor %xmm3,%xmm2
  203. pxor %xmm3,%xmm3
  204. movl $5,%eax
  205. .L007loop_row_5:
  206. movdqa (%esi),%xmm4
  207. leal 16(%esi),%esi
  208. movdqa %xmm2,%xmm6
  209. .byte 102,15,58,15,243,1
  210. movdqa %xmm6,%xmm3
  211. psrldq $1,%xmm2
  212. movdqa %xmm4,%xmm5
  213. .byte 102,15,56,0,224
  214. .byte 102,15,56,0,233
  215. pxor %xmm5,%xmm2
  216. movdqa %xmm4,%xmm5
  217. psllq $60,%xmm5
  218. movdqa %xmm5,%xmm6
  219. pslldq $8,%xmm6
  220. pxor %xmm6,%xmm3
  221. psrldq $8,%xmm5
  222. pxor %xmm5,%xmm2
  223. psrlq $4,%xmm4
  224. pxor %xmm4,%xmm2
  225. subl $1,%eax
  226. jnz .L007loop_row_5
  227. pxor %xmm3,%xmm2
  228. psrlq $1,%xmm3
  229. pxor %xmm3,%xmm2
  230. psrlq $1,%xmm3
  231. pxor %xmm3,%xmm2
  232. psrlq $5,%xmm3
  233. pxor %xmm3,%xmm2
  234. pxor %xmm3,%xmm3
  235. movl $6,%eax
  236. .L008loop_row_6:
  237. movdqa (%esi),%xmm4
  238. leal 16(%esi),%esi
  239. movdqa %xmm2,%xmm6
  240. .byte 102,15,58,15,243,1
  241. movdqa %xmm6,%xmm3
  242. psrldq $1,%xmm2
  243. movdqa %xmm4,%xmm5
  244. .byte 102,15,56,0,224
  245. .byte 102,15,56,0,233
  246. pxor %xmm5,%xmm2
  247. movdqa %xmm4,%xmm5
  248. psllq $60,%xmm5
  249. movdqa %xmm5,%xmm6
  250. pslldq $8,%xmm6
  251. pxor %xmm6,%xmm3
  252. psrldq $8,%xmm5
  253. pxor %xmm5,%xmm2
  254. psrlq $4,%xmm4
  255. pxor %xmm4,%xmm2
  256. subl $1,%eax
  257. jnz .L008loop_row_6
  258. pxor %xmm3,%xmm2
  259. psrlq $1,%xmm3
  260. pxor %xmm3,%xmm2
  261. psrlq $1,%xmm3
  262. pxor %xmm3,%xmm2
  263. psrlq $5,%xmm3
  264. pxor %xmm3,%xmm2
  265. pxor %xmm3,%xmm3
  266. movdqa %xmm2,%xmm0
  267. leal -256(%esi),%esi
  268. leal 16(%edx),%edx
  269. subl $16,%ecx
  270. jnz .L005loop_ghash
  271. .byte 102,15,56,0,199
  272. movdqu %xmm0,(%edi)
  273. pxor %xmm0,%xmm0
  274. pxor %xmm1,%xmm1
  275. pxor %xmm2,%xmm2
  276. pxor %xmm3,%xmm3
  277. pxor %xmm4,%xmm4
  278. pxor %xmm5,%xmm5
  279. pxor %xmm6,%xmm6
  280. popl %edi
  281. popl %esi
  282. popl %ebx
  283. popl %ebp
  284. ret
  285. .size gcm_ghash_ssse3,.-.L_gcm_ghash_ssse3_begin
  286. .align 16
  287. .Lreverse_bytes:
  288. .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  289. .align 16
  290. .Llow4_mask:
  291. .long 252645135,252645135,252645135,252645135
  292. #endif
  293. .section .note.GNU-stack,"",@progbits