ghash-ssse3-x86_64.S 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__has_feature)
  4. #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
  5. #define OPENSSL_NO_ASM
  6. #endif
  7. #endif
  8. #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
  9. #if defined(BORINGSSL_PREFIX)
  10. #include <boringssl_prefix_symbols_asm.h>
  11. #endif
  12. .text
  13. .globl _gcm_gmult_ssse3
  14. .private_extern _gcm_gmult_ssse3
  15. .p2align 4
  16. _gcm_gmult_ssse3:
  17. L$gmult_seh_begin:
  18. movdqu (%rdi),%xmm0
  19. movdqa L$reverse_bytes(%rip),%xmm10
  20. movdqa L$low4_mask(%rip),%xmm2
  21. .byte 102,65,15,56,0,194
  22. movdqa %xmm2,%xmm1
  23. pandn %xmm0,%xmm1
  24. psrld $4,%xmm1
  25. pand %xmm2,%xmm0
  26. pxor %xmm2,%xmm2
  27. pxor %xmm3,%xmm3
  28. movq $5,%rax
  29. L$oop_row_1:
  30. movdqa (%rsi),%xmm4
  31. leaq 16(%rsi),%rsi
  32. movdqa %xmm2,%xmm6
  33. .byte 102,15,58,15,243,1
  34. movdqa %xmm6,%xmm3
  35. psrldq $1,%xmm2
  36. movdqa %xmm4,%xmm5
  37. .byte 102,15,56,0,224
  38. .byte 102,15,56,0,233
  39. pxor %xmm5,%xmm2
  40. movdqa %xmm4,%xmm5
  41. psllq $60,%xmm5
  42. movdqa %xmm5,%xmm6
  43. pslldq $8,%xmm6
  44. pxor %xmm6,%xmm3
  45. psrldq $8,%xmm5
  46. pxor %xmm5,%xmm2
  47. psrlq $4,%xmm4
  48. pxor %xmm4,%xmm2
  49. subq $1,%rax
  50. jnz L$oop_row_1
  51. pxor %xmm3,%xmm2
  52. psrlq $1,%xmm3
  53. pxor %xmm3,%xmm2
  54. psrlq $1,%xmm3
  55. pxor %xmm3,%xmm2
  56. psrlq $5,%xmm3
  57. pxor %xmm3,%xmm2
  58. pxor %xmm3,%xmm3
  59. movq $5,%rax
  60. L$oop_row_2:
  61. movdqa (%rsi),%xmm4
  62. leaq 16(%rsi),%rsi
  63. movdqa %xmm2,%xmm6
  64. .byte 102,15,58,15,243,1
  65. movdqa %xmm6,%xmm3
  66. psrldq $1,%xmm2
  67. movdqa %xmm4,%xmm5
  68. .byte 102,15,56,0,224
  69. .byte 102,15,56,0,233
  70. pxor %xmm5,%xmm2
  71. movdqa %xmm4,%xmm5
  72. psllq $60,%xmm5
  73. movdqa %xmm5,%xmm6
  74. pslldq $8,%xmm6
  75. pxor %xmm6,%xmm3
  76. psrldq $8,%xmm5
  77. pxor %xmm5,%xmm2
  78. psrlq $4,%xmm4
  79. pxor %xmm4,%xmm2
  80. subq $1,%rax
  81. jnz L$oop_row_2
  82. pxor %xmm3,%xmm2
  83. psrlq $1,%xmm3
  84. pxor %xmm3,%xmm2
  85. psrlq $1,%xmm3
  86. pxor %xmm3,%xmm2
  87. psrlq $5,%xmm3
  88. pxor %xmm3,%xmm2
  89. pxor %xmm3,%xmm3
  90. movq $6,%rax
  91. L$oop_row_3:
  92. movdqa (%rsi),%xmm4
  93. leaq 16(%rsi),%rsi
  94. movdqa %xmm2,%xmm6
  95. .byte 102,15,58,15,243,1
  96. movdqa %xmm6,%xmm3
  97. psrldq $1,%xmm2
  98. movdqa %xmm4,%xmm5
  99. .byte 102,15,56,0,224
  100. .byte 102,15,56,0,233
  101. pxor %xmm5,%xmm2
  102. movdqa %xmm4,%xmm5
  103. psllq $60,%xmm5
  104. movdqa %xmm5,%xmm6
  105. pslldq $8,%xmm6
  106. pxor %xmm6,%xmm3
  107. psrldq $8,%xmm5
  108. pxor %xmm5,%xmm2
  109. psrlq $4,%xmm4
  110. pxor %xmm4,%xmm2
  111. subq $1,%rax
  112. jnz L$oop_row_3
  113. pxor %xmm3,%xmm2
  114. psrlq $1,%xmm3
  115. pxor %xmm3,%xmm2
  116. psrlq $1,%xmm3
  117. pxor %xmm3,%xmm2
  118. psrlq $5,%xmm3
  119. pxor %xmm3,%xmm2
  120. pxor %xmm3,%xmm3
  121. .byte 102,65,15,56,0,210
  122. movdqu %xmm2,(%rdi)
  123. pxor %xmm0,%xmm0
  124. pxor %xmm1,%xmm1
  125. pxor %xmm2,%xmm2
  126. pxor %xmm3,%xmm3
  127. pxor %xmm4,%xmm4
  128. pxor %xmm5,%xmm5
  129. pxor %xmm6,%xmm6
  130. .byte 0xf3,0xc3
  131. L$gmult_seh_end:
  132. .globl _gcm_ghash_ssse3
  133. .private_extern _gcm_ghash_ssse3
  134. .p2align 4
  135. _gcm_ghash_ssse3:
  136. L$ghash_seh_begin:
  137. movdqu (%rdi),%xmm0
  138. movdqa L$reverse_bytes(%rip),%xmm10
  139. movdqa L$low4_mask(%rip),%xmm11
  140. andq $-16,%rcx
  141. .byte 102,65,15,56,0,194
  142. pxor %xmm3,%xmm3
  143. L$oop_ghash:
  144. movdqu (%rdx),%xmm1
  145. .byte 102,65,15,56,0,202
  146. pxor %xmm1,%xmm0
  147. movdqa %xmm11,%xmm1
  148. pandn %xmm0,%xmm1
  149. psrld $4,%xmm1
  150. pand %xmm11,%xmm0
  151. pxor %xmm2,%xmm2
  152. movq $5,%rax
  153. L$oop_row_4:
  154. movdqa (%rsi),%xmm4
  155. leaq 16(%rsi),%rsi
  156. movdqa %xmm2,%xmm6
  157. .byte 102,15,58,15,243,1
  158. movdqa %xmm6,%xmm3
  159. psrldq $1,%xmm2
  160. movdqa %xmm4,%xmm5
  161. .byte 102,15,56,0,224
  162. .byte 102,15,56,0,233
  163. pxor %xmm5,%xmm2
  164. movdqa %xmm4,%xmm5
  165. psllq $60,%xmm5
  166. movdqa %xmm5,%xmm6
  167. pslldq $8,%xmm6
  168. pxor %xmm6,%xmm3
  169. psrldq $8,%xmm5
  170. pxor %xmm5,%xmm2
  171. psrlq $4,%xmm4
  172. pxor %xmm4,%xmm2
  173. subq $1,%rax
  174. jnz L$oop_row_4
  175. pxor %xmm3,%xmm2
  176. psrlq $1,%xmm3
  177. pxor %xmm3,%xmm2
  178. psrlq $1,%xmm3
  179. pxor %xmm3,%xmm2
  180. psrlq $5,%xmm3
  181. pxor %xmm3,%xmm2
  182. pxor %xmm3,%xmm3
  183. movq $5,%rax
  184. L$oop_row_5:
  185. movdqa (%rsi),%xmm4
  186. leaq 16(%rsi),%rsi
  187. movdqa %xmm2,%xmm6
  188. .byte 102,15,58,15,243,1
  189. movdqa %xmm6,%xmm3
  190. psrldq $1,%xmm2
  191. movdqa %xmm4,%xmm5
  192. .byte 102,15,56,0,224
  193. .byte 102,15,56,0,233
  194. pxor %xmm5,%xmm2
  195. movdqa %xmm4,%xmm5
  196. psllq $60,%xmm5
  197. movdqa %xmm5,%xmm6
  198. pslldq $8,%xmm6
  199. pxor %xmm6,%xmm3
  200. psrldq $8,%xmm5
  201. pxor %xmm5,%xmm2
  202. psrlq $4,%xmm4
  203. pxor %xmm4,%xmm2
  204. subq $1,%rax
  205. jnz L$oop_row_5
  206. pxor %xmm3,%xmm2
  207. psrlq $1,%xmm3
  208. pxor %xmm3,%xmm2
  209. psrlq $1,%xmm3
  210. pxor %xmm3,%xmm2
  211. psrlq $5,%xmm3
  212. pxor %xmm3,%xmm2
  213. pxor %xmm3,%xmm3
  214. movq $6,%rax
  215. L$oop_row_6:
  216. movdqa (%rsi),%xmm4
  217. leaq 16(%rsi),%rsi
  218. movdqa %xmm2,%xmm6
  219. .byte 102,15,58,15,243,1
  220. movdqa %xmm6,%xmm3
  221. psrldq $1,%xmm2
  222. movdqa %xmm4,%xmm5
  223. .byte 102,15,56,0,224
  224. .byte 102,15,56,0,233
  225. pxor %xmm5,%xmm2
  226. movdqa %xmm4,%xmm5
  227. psllq $60,%xmm5
  228. movdqa %xmm5,%xmm6
  229. pslldq $8,%xmm6
  230. pxor %xmm6,%xmm3
  231. psrldq $8,%xmm5
  232. pxor %xmm5,%xmm2
  233. psrlq $4,%xmm4
  234. pxor %xmm4,%xmm2
  235. subq $1,%rax
  236. jnz L$oop_row_6
  237. pxor %xmm3,%xmm2
  238. psrlq $1,%xmm3
  239. pxor %xmm3,%xmm2
  240. psrlq $1,%xmm3
  241. pxor %xmm3,%xmm2
  242. psrlq $5,%xmm3
  243. pxor %xmm3,%xmm2
  244. pxor %xmm3,%xmm3
  245. movdqa %xmm2,%xmm0
  246. leaq -256(%rsi),%rsi
  247. leaq 16(%rdx),%rdx
  248. subq $16,%rcx
  249. jnz L$oop_ghash
  250. .byte 102,65,15,56,0,194
  251. movdqu %xmm0,(%rdi)
  252. pxor %xmm0,%xmm0
  253. pxor %xmm1,%xmm1
  254. pxor %xmm2,%xmm2
  255. pxor %xmm3,%xmm3
  256. pxor %xmm4,%xmm4
  257. pxor %xmm5,%xmm5
  258. pxor %xmm6,%xmm6
  259. .byte 0xf3,0xc3
  260. L$ghash_seh_end:
  261. .p2align 4
  262. L$reverse_bytes:
  263. .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
  264. L$low4_mask:
  265. .quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
  266. #endif