ghash-ssse3-x86_64.S 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__has_feature)
  4. #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
  5. #define OPENSSL_NO_ASM
  6. #endif
  7. #endif
  8. #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
  9. #if defined(BORINGSSL_PREFIX)
  10. #include <boringssl_prefix_symbols_asm.h>
  11. #endif
  12. .text
  13. .type gcm_gmult_ssse3, @function
  14. .globl gcm_gmult_ssse3
  15. .hidden gcm_gmult_ssse3
  16. .align 16
  17. gcm_gmult_ssse3:
  18. .cfi_startproc
  19. .Lgmult_seh_begin:
  20. movdqu (%rdi),%xmm0
  21. movdqa .Lreverse_bytes(%rip),%xmm10
  22. movdqa .Llow4_mask(%rip),%xmm2
  23. .byte 102,65,15,56,0,194
  24. movdqa %xmm2,%xmm1
  25. pandn %xmm0,%xmm1
  26. psrld $4,%xmm1
  27. pand %xmm2,%xmm0
  28. pxor %xmm2,%xmm2
  29. pxor %xmm3,%xmm3
  30. movq $5,%rax
  31. .Loop_row_1:
  32. movdqa (%rsi),%xmm4
  33. leaq 16(%rsi),%rsi
  34. movdqa %xmm2,%xmm6
  35. .byte 102,15,58,15,243,1
  36. movdqa %xmm6,%xmm3
  37. psrldq $1,%xmm2
  38. movdqa %xmm4,%xmm5
  39. .byte 102,15,56,0,224
  40. .byte 102,15,56,0,233
  41. pxor %xmm5,%xmm2
  42. movdqa %xmm4,%xmm5
  43. psllq $60,%xmm5
  44. movdqa %xmm5,%xmm6
  45. pslldq $8,%xmm6
  46. pxor %xmm6,%xmm3
  47. psrldq $8,%xmm5
  48. pxor %xmm5,%xmm2
  49. psrlq $4,%xmm4
  50. pxor %xmm4,%xmm2
  51. subq $1,%rax
  52. jnz .Loop_row_1
  53. pxor %xmm3,%xmm2
  54. psrlq $1,%xmm3
  55. pxor %xmm3,%xmm2
  56. psrlq $1,%xmm3
  57. pxor %xmm3,%xmm2
  58. psrlq $5,%xmm3
  59. pxor %xmm3,%xmm2
  60. pxor %xmm3,%xmm3
  61. movq $5,%rax
  62. .Loop_row_2:
  63. movdqa (%rsi),%xmm4
  64. leaq 16(%rsi),%rsi
  65. movdqa %xmm2,%xmm6
  66. .byte 102,15,58,15,243,1
  67. movdqa %xmm6,%xmm3
  68. psrldq $1,%xmm2
  69. movdqa %xmm4,%xmm5
  70. .byte 102,15,56,0,224
  71. .byte 102,15,56,0,233
  72. pxor %xmm5,%xmm2
  73. movdqa %xmm4,%xmm5
  74. psllq $60,%xmm5
  75. movdqa %xmm5,%xmm6
  76. pslldq $8,%xmm6
  77. pxor %xmm6,%xmm3
  78. psrldq $8,%xmm5
  79. pxor %xmm5,%xmm2
  80. psrlq $4,%xmm4
  81. pxor %xmm4,%xmm2
  82. subq $1,%rax
  83. jnz .Loop_row_2
  84. pxor %xmm3,%xmm2
  85. psrlq $1,%xmm3
  86. pxor %xmm3,%xmm2
  87. psrlq $1,%xmm3
  88. pxor %xmm3,%xmm2
  89. psrlq $5,%xmm3
  90. pxor %xmm3,%xmm2
  91. pxor %xmm3,%xmm3
  92. movq $6,%rax
  93. .Loop_row_3:
  94. movdqa (%rsi),%xmm4
  95. leaq 16(%rsi),%rsi
  96. movdqa %xmm2,%xmm6
  97. .byte 102,15,58,15,243,1
  98. movdqa %xmm6,%xmm3
  99. psrldq $1,%xmm2
  100. movdqa %xmm4,%xmm5
  101. .byte 102,15,56,0,224
  102. .byte 102,15,56,0,233
  103. pxor %xmm5,%xmm2
  104. movdqa %xmm4,%xmm5
  105. psllq $60,%xmm5
  106. movdqa %xmm5,%xmm6
  107. pslldq $8,%xmm6
  108. pxor %xmm6,%xmm3
  109. psrldq $8,%xmm5
  110. pxor %xmm5,%xmm2
  111. psrlq $4,%xmm4
  112. pxor %xmm4,%xmm2
  113. subq $1,%rax
  114. jnz .Loop_row_3
  115. pxor %xmm3,%xmm2
  116. psrlq $1,%xmm3
  117. pxor %xmm3,%xmm2
  118. psrlq $1,%xmm3
  119. pxor %xmm3,%xmm2
  120. psrlq $5,%xmm3
  121. pxor %xmm3,%xmm2
  122. pxor %xmm3,%xmm3
  123. .byte 102,65,15,56,0,210
  124. movdqu %xmm2,(%rdi)
  125. pxor %xmm0,%xmm0
  126. pxor %xmm1,%xmm1
  127. pxor %xmm2,%xmm2
  128. pxor %xmm3,%xmm3
  129. pxor %xmm4,%xmm4
  130. pxor %xmm5,%xmm5
  131. pxor %xmm6,%xmm6
  132. .byte 0xf3,0xc3
  133. .Lgmult_seh_end:
  134. .cfi_endproc
  135. .size gcm_gmult_ssse3,.-gcm_gmult_ssse3
  136. .type gcm_ghash_ssse3, @function
  137. .globl gcm_ghash_ssse3
  138. .hidden gcm_ghash_ssse3
  139. .align 16
  140. gcm_ghash_ssse3:
  141. .Lghash_seh_begin:
  142. .cfi_startproc
  143. movdqu (%rdi),%xmm0
  144. movdqa .Lreverse_bytes(%rip),%xmm10
  145. movdqa .Llow4_mask(%rip),%xmm11
  146. andq $-16,%rcx
  147. .byte 102,65,15,56,0,194
  148. pxor %xmm3,%xmm3
  149. .Loop_ghash:
  150. movdqu (%rdx),%xmm1
  151. .byte 102,65,15,56,0,202
  152. pxor %xmm1,%xmm0
  153. movdqa %xmm11,%xmm1
  154. pandn %xmm0,%xmm1
  155. psrld $4,%xmm1
  156. pand %xmm11,%xmm0
  157. pxor %xmm2,%xmm2
  158. movq $5,%rax
  159. .Loop_row_4:
  160. movdqa (%rsi),%xmm4
  161. leaq 16(%rsi),%rsi
  162. movdqa %xmm2,%xmm6
  163. .byte 102,15,58,15,243,1
  164. movdqa %xmm6,%xmm3
  165. psrldq $1,%xmm2
  166. movdqa %xmm4,%xmm5
  167. .byte 102,15,56,0,224
  168. .byte 102,15,56,0,233
  169. pxor %xmm5,%xmm2
  170. movdqa %xmm4,%xmm5
  171. psllq $60,%xmm5
  172. movdqa %xmm5,%xmm6
  173. pslldq $8,%xmm6
  174. pxor %xmm6,%xmm3
  175. psrldq $8,%xmm5
  176. pxor %xmm5,%xmm2
  177. psrlq $4,%xmm4
  178. pxor %xmm4,%xmm2
  179. subq $1,%rax
  180. jnz .Loop_row_4
  181. pxor %xmm3,%xmm2
  182. psrlq $1,%xmm3
  183. pxor %xmm3,%xmm2
  184. psrlq $1,%xmm3
  185. pxor %xmm3,%xmm2
  186. psrlq $5,%xmm3
  187. pxor %xmm3,%xmm2
  188. pxor %xmm3,%xmm3
  189. movq $5,%rax
  190. .Loop_row_5:
  191. movdqa (%rsi),%xmm4
  192. leaq 16(%rsi),%rsi
  193. movdqa %xmm2,%xmm6
  194. .byte 102,15,58,15,243,1
  195. movdqa %xmm6,%xmm3
  196. psrldq $1,%xmm2
  197. movdqa %xmm4,%xmm5
  198. .byte 102,15,56,0,224
  199. .byte 102,15,56,0,233
  200. pxor %xmm5,%xmm2
  201. movdqa %xmm4,%xmm5
  202. psllq $60,%xmm5
  203. movdqa %xmm5,%xmm6
  204. pslldq $8,%xmm6
  205. pxor %xmm6,%xmm3
  206. psrldq $8,%xmm5
  207. pxor %xmm5,%xmm2
  208. psrlq $4,%xmm4
  209. pxor %xmm4,%xmm2
  210. subq $1,%rax
  211. jnz .Loop_row_5
  212. pxor %xmm3,%xmm2
  213. psrlq $1,%xmm3
  214. pxor %xmm3,%xmm2
  215. psrlq $1,%xmm3
  216. pxor %xmm3,%xmm2
  217. psrlq $5,%xmm3
  218. pxor %xmm3,%xmm2
  219. pxor %xmm3,%xmm3
  220. movq $6,%rax
  221. .Loop_row_6:
  222. movdqa (%rsi),%xmm4
  223. leaq 16(%rsi),%rsi
  224. movdqa %xmm2,%xmm6
  225. .byte 102,15,58,15,243,1
  226. movdqa %xmm6,%xmm3
  227. psrldq $1,%xmm2
  228. movdqa %xmm4,%xmm5
  229. .byte 102,15,56,0,224
  230. .byte 102,15,56,0,233
  231. pxor %xmm5,%xmm2
  232. movdqa %xmm4,%xmm5
  233. psllq $60,%xmm5
  234. movdqa %xmm5,%xmm6
  235. pslldq $8,%xmm6
  236. pxor %xmm6,%xmm3
  237. psrldq $8,%xmm5
  238. pxor %xmm5,%xmm2
  239. psrlq $4,%xmm4
  240. pxor %xmm4,%xmm2
  241. subq $1,%rax
  242. jnz .Loop_row_6
  243. pxor %xmm3,%xmm2
  244. psrlq $1,%xmm3
  245. pxor %xmm3,%xmm2
  246. psrlq $1,%xmm3
  247. pxor %xmm3,%xmm2
  248. psrlq $5,%xmm3
  249. pxor %xmm3,%xmm2
  250. pxor %xmm3,%xmm3
  251. movdqa %xmm2,%xmm0
  252. leaq -256(%rsi),%rsi
  253. leaq 16(%rdx),%rdx
  254. subq $16,%rcx
  255. jnz .Loop_ghash
  256. .byte 102,65,15,56,0,194
  257. movdqu %xmm0,(%rdi)
  258. pxor %xmm0,%xmm0
  259. pxor %xmm1,%xmm1
  260. pxor %xmm2,%xmm2
  261. pxor %xmm3,%xmm3
  262. pxor %xmm4,%xmm4
  263. pxor %xmm5,%xmm5
  264. pxor %xmm6,%xmm6
  265. .byte 0xf3,0xc3
  266. .Lghash_seh_end:
  267. .cfi_endproc
  268. .size gcm_ghash_ssse3,.-gcm_ghash_ssse3
  269. .align 16
  270. .Lreverse_bytes:
  271. .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
  272. .Llow4_mask:
  273. .quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
  274. #endif
  275. .section .note.GNU-stack,"",@progbits