ghash-ssse3-x86.S 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__i386__)
  4. #if defined(BORINGSSL_PREFIX)
  5. #include <boringssl_prefix_symbols_asm.h>
  6. #endif
  7. .text
  8. .globl _gcm_gmult_ssse3
  9. .private_extern _gcm_gmult_ssse3
  10. .align 4
  11. _gcm_gmult_ssse3:
  12. L_gcm_gmult_ssse3_begin:
  13. pushl %ebp
  14. pushl %ebx
  15. pushl %esi
  16. pushl %edi
  17. movl 20(%esp),%edi
  18. movl 24(%esp),%esi
  19. movdqu (%edi),%xmm0
  20. call L000pic_point
  21. L000pic_point:
  22. popl %eax
  23. movdqa Lreverse_bytes-L000pic_point(%eax),%xmm7
  24. movdqa Llow4_mask-L000pic_point(%eax),%xmm2
  25. .byte 102,15,56,0,199
  26. movdqa %xmm2,%xmm1
  27. pandn %xmm0,%xmm1
  28. psrld $4,%xmm1
  29. pand %xmm2,%xmm0
  30. pxor %xmm2,%xmm2
  31. pxor %xmm3,%xmm3
  32. movl $5,%eax
  33. L001loop_row_1:
  34. movdqa (%esi),%xmm4
  35. leal 16(%esi),%esi
  36. movdqa %xmm2,%xmm6
  37. .byte 102,15,58,15,243,1
  38. movdqa %xmm6,%xmm3
  39. psrldq $1,%xmm2
  40. movdqa %xmm4,%xmm5
  41. .byte 102,15,56,0,224
  42. .byte 102,15,56,0,233
  43. pxor %xmm5,%xmm2
  44. movdqa %xmm4,%xmm5
  45. psllq $60,%xmm5
  46. movdqa %xmm5,%xmm6
  47. pslldq $8,%xmm6
  48. pxor %xmm6,%xmm3
  49. psrldq $8,%xmm5
  50. pxor %xmm5,%xmm2
  51. psrlq $4,%xmm4
  52. pxor %xmm4,%xmm2
  53. subl $1,%eax
  54. jnz L001loop_row_1
  55. pxor %xmm3,%xmm2
  56. psrlq $1,%xmm3
  57. pxor %xmm3,%xmm2
  58. psrlq $1,%xmm3
  59. pxor %xmm3,%xmm2
  60. psrlq $5,%xmm3
  61. pxor %xmm3,%xmm2
  62. pxor %xmm3,%xmm3
  63. movl $5,%eax
  64. L002loop_row_2:
  65. movdqa (%esi),%xmm4
  66. leal 16(%esi),%esi
  67. movdqa %xmm2,%xmm6
  68. .byte 102,15,58,15,243,1
  69. movdqa %xmm6,%xmm3
  70. psrldq $1,%xmm2
  71. movdqa %xmm4,%xmm5
  72. .byte 102,15,56,0,224
  73. .byte 102,15,56,0,233
  74. pxor %xmm5,%xmm2
  75. movdqa %xmm4,%xmm5
  76. psllq $60,%xmm5
  77. movdqa %xmm5,%xmm6
  78. pslldq $8,%xmm6
  79. pxor %xmm6,%xmm3
  80. psrldq $8,%xmm5
  81. pxor %xmm5,%xmm2
  82. psrlq $4,%xmm4
  83. pxor %xmm4,%xmm2
  84. subl $1,%eax
  85. jnz L002loop_row_2
  86. pxor %xmm3,%xmm2
  87. psrlq $1,%xmm3
  88. pxor %xmm3,%xmm2
  89. psrlq $1,%xmm3
  90. pxor %xmm3,%xmm2
  91. psrlq $5,%xmm3
  92. pxor %xmm3,%xmm2
  93. pxor %xmm3,%xmm3
  94. movl $6,%eax
  95. L003loop_row_3:
  96. movdqa (%esi),%xmm4
  97. leal 16(%esi),%esi
  98. movdqa %xmm2,%xmm6
  99. .byte 102,15,58,15,243,1
  100. movdqa %xmm6,%xmm3
  101. psrldq $1,%xmm2
  102. movdqa %xmm4,%xmm5
  103. .byte 102,15,56,0,224
  104. .byte 102,15,56,0,233
  105. pxor %xmm5,%xmm2
  106. movdqa %xmm4,%xmm5
  107. psllq $60,%xmm5
  108. movdqa %xmm5,%xmm6
  109. pslldq $8,%xmm6
  110. pxor %xmm6,%xmm3
  111. psrldq $8,%xmm5
  112. pxor %xmm5,%xmm2
  113. psrlq $4,%xmm4
  114. pxor %xmm4,%xmm2
  115. subl $1,%eax
  116. jnz L003loop_row_3
  117. pxor %xmm3,%xmm2
  118. psrlq $1,%xmm3
  119. pxor %xmm3,%xmm2
  120. psrlq $1,%xmm3
  121. pxor %xmm3,%xmm2
  122. psrlq $5,%xmm3
  123. pxor %xmm3,%xmm2
  124. pxor %xmm3,%xmm3
  125. .byte 102,15,56,0,215
  126. movdqu %xmm2,(%edi)
  127. pxor %xmm0,%xmm0
  128. pxor %xmm1,%xmm1
  129. pxor %xmm2,%xmm2
  130. pxor %xmm3,%xmm3
  131. pxor %xmm4,%xmm4
  132. pxor %xmm5,%xmm5
  133. pxor %xmm6,%xmm6
  134. popl %edi
  135. popl %esi
  136. popl %ebx
  137. popl %ebp
  138. ret
  139. .globl _gcm_ghash_ssse3
  140. .private_extern _gcm_ghash_ssse3
  141. .align 4
  142. _gcm_ghash_ssse3:
  143. L_gcm_ghash_ssse3_begin:
  144. pushl %ebp
  145. pushl %ebx
  146. pushl %esi
  147. pushl %edi
  148. movl 20(%esp),%edi
  149. movl 24(%esp),%esi
  150. movl 28(%esp),%edx
  151. movl 32(%esp),%ecx
  152. movdqu (%edi),%xmm0
  153. call L004pic_point
  154. L004pic_point:
  155. popl %ebx
  156. movdqa Lreverse_bytes-L004pic_point(%ebx),%xmm7
  157. andl $-16,%ecx
  158. .byte 102,15,56,0,199
  159. pxor %xmm3,%xmm3
  160. L005loop_ghash:
  161. movdqa Llow4_mask-L004pic_point(%ebx),%xmm2
  162. movdqu (%edx),%xmm1
  163. .byte 102,15,56,0,207
  164. pxor %xmm1,%xmm0
  165. movdqa %xmm2,%xmm1
  166. pandn %xmm0,%xmm1
  167. psrld $4,%xmm1
  168. pand %xmm2,%xmm0
  169. pxor %xmm2,%xmm2
  170. movl $5,%eax
  171. L006loop_row_4:
  172. movdqa (%esi),%xmm4
  173. leal 16(%esi),%esi
  174. movdqa %xmm2,%xmm6
  175. .byte 102,15,58,15,243,1
  176. movdqa %xmm6,%xmm3
  177. psrldq $1,%xmm2
  178. movdqa %xmm4,%xmm5
  179. .byte 102,15,56,0,224
  180. .byte 102,15,56,0,233
  181. pxor %xmm5,%xmm2
  182. movdqa %xmm4,%xmm5
  183. psllq $60,%xmm5
  184. movdqa %xmm5,%xmm6
  185. pslldq $8,%xmm6
  186. pxor %xmm6,%xmm3
  187. psrldq $8,%xmm5
  188. pxor %xmm5,%xmm2
  189. psrlq $4,%xmm4
  190. pxor %xmm4,%xmm2
  191. subl $1,%eax
  192. jnz L006loop_row_4
  193. pxor %xmm3,%xmm2
  194. psrlq $1,%xmm3
  195. pxor %xmm3,%xmm2
  196. psrlq $1,%xmm3
  197. pxor %xmm3,%xmm2
  198. psrlq $5,%xmm3
  199. pxor %xmm3,%xmm2
  200. pxor %xmm3,%xmm3
  201. movl $5,%eax
  202. L007loop_row_5:
  203. movdqa (%esi),%xmm4
  204. leal 16(%esi),%esi
  205. movdqa %xmm2,%xmm6
  206. .byte 102,15,58,15,243,1
  207. movdqa %xmm6,%xmm3
  208. psrldq $1,%xmm2
  209. movdqa %xmm4,%xmm5
  210. .byte 102,15,56,0,224
  211. .byte 102,15,56,0,233
  212. pxor %xmm5,%xmm2
  213. movdqa %xmm4,%xmm5
  214. psllq $60,%xmm5
  215. movdqa %xmm5,%xmm6
  216. pslldq $8,%xmm6
  217. pxor %xmm6,%xmm3
  218. psrldq $8,%xmm5
  219. pxor %xmm5,%xmm2
  220. psrlq $4,%xmm4
  221. pxor %xmm4,%xmm2
  222. subl $1,%eax
  223. jnz L007loop_row_5
  224. pxor %xmm3,%xmm2
  225. psrlq $1,%xmm3
  226. pxor %xmm3,%xmm2
  227. psrlq $1,%xmm3
  228. pxor %xmm3,%xmm2
  229. psrlq $5,%xmm3
  230. pxor %xmm3,%xmm2
  231. pxor %xmm3,%xmm3
  232. movl $6,%eax
  233. L008loop_row_6:
  234. movdqa (%esi),%xmm4
  235. leal 16(%esi),%esi
  236. movdqa %xmm2,%xmm6
  237. .byte 102,15,58,15,243,1
  238. movdqa %xmm6,%xmm3
  239. psrldq $1,%xmm2
  240. movdqa %xmm4,%xmm5
  241. .byte 102,15,56,0,224
  242. .byte 102,15,56,0,233
  243. pxor %xmm5,%xmm2
  244. movdqa %xmm4,%xmm5
  245. psllq $60,%xmm5
  246. movdqa %xmm5,%xmm6
  247. pslldq $8,%xmm6
  248. pxor %xmm6,%xmm3
  249. psrldq $8,%xmm5
  250. pxor %xmm5,%xmm2
  251. psrlq $4,%xmm4
  252. pxor %xmm4,%xmm2
  253. subl $1,%eax
  254. jnz L008loop_row_6
  255. pxor %xmm3,%xmm2
  256. psrlq $1,%xmm3
  257. pxor %xmm3,%xmm2
  258. psrlq $1,%xmm3
  259. pxor %xmm3,%xmm2
  260. psrlq $5,%xmm3
  261. pxor %xmm3,%xmm2
  262. pxor %xmm3,%xmm3
  263. movdqa %xmm2,%xmm0
  264. leal -256(%esi),%esi
  265. leal 16(%edx),%edx
  266. subl $16,%ecx
  267. jnz L005loop_ghash
  268. .byte 102,15,56,0,199
  269. movdqu %xmm0,(%edi)
  270. pxor %xmm0,%xmm0
  271. pxor %xmm1,%xmm1
  272. pxor %xmm2,%xmm2
  273. pxor %xmm3,%xmm3
  274. pxor %xmm4,%xmm4
  275. pxor %xmm5,%xmm5
  276. pxor %xmm6,%xmm6
  277. popl %edi
  278. popl %esi
  279. popl %ebx
  280. popl %ebp
  281. ret
  282. .align 4,0x90
  283. Lreverse_bytes:
  284. .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  285. .align 4,0x90
  286. Llow4_mask:
  287. .long 252645135,252645135,252645135,252645135
  288. #endif