ghash-ssse3-x86_64.asm 6.1 KB


  1. ; This file is generated from a similarly-named Perl script in the BoringSSL
  2. ; source tree. Do not edit by hand.
  3. default rel
  4. %define XMMWORD
  5. %define YMMWORD
  6. %define ZMMWORD
  7. %ifdef BORINGSSL_PREFIX
  8. %include "boringssl_prefix_symbols_nasm.inc"
  9. %endif
  10. section .text code align=64
  11. global gcm_gmult_ssse3
  12. ALIGN 16
  13. gcm_gmult_ssse3:
  14. $L$gmult_seh_begin:
  15. sub rsp,40
  16. $L$gmult_seh_allocstack:
  17. movdqa XMMWORD[rsp],xmm6
  18. $L$gmult_seh_save_xmm6:
  19. movdqa XMMWORD[16+rsp],xmm10
  20. $L$gmult_seh_save_xmm10:
  21. $L$gmult_seh_prolog_end:
  22. movdqu xmm0,XMMWORD[rcx]
  23. movdqa xmm10,XMMWORD[$L$reverse_bytes]
  24. movdqa xmm2,XMMWORD[$L$low4_mask]
  25. DB 102,65,15,56,0,194
  26. movdqa xmm1,xmm2
  27. pandn xmm1,xmm0
  28. psrld xmm1,4
  29. pand xmm0,xmm2
  30. pxor xmm2,xmm2
  31. pxor xmm3,xmm3
  32. mov rax,5
  33. $L$oop_row_1:
  34. movdqa xmm4,XMMWORD[rdx]
  35. lea rdx,[16+rdx]
  36. movdqa xmm6,xmm2
  37. DB 102,15,58,15,243,1
  38. movdqa xmm3,xmm6
  39. psrldq xmm2,1
  40. movdqa xmm5,xmm4
  41. DB 102,15,56,0,224
  42. DB 102,15,56,0,233
  43. pxor xmm2,xmm5
  44. movdqa xmm5,xmm4
  45. psllq xmm5,60
  46. movdqa xmm6,xmm5
  47. pslldq xmm6,8
  48. pxor xmm3,xmm6
  49. psrldq xmm5,8
  50. pxor xmm2,xmm5
  51. psrlq xmm4,4
  52. pxor xmm2,xmm4
  53. sub rax,1
  54. jnz NEAR $L$oop_row_1
  55. pxor xmm2,xmm3
  56. psrlq xmm3,1
  57. pxor xmm2,xmm3
  58. psrlq xmm3,1
  59. pxor xmm2,xmm3
  60. psrlq xmm3,5
  61. pxor xmm2,xmm3
  62. pxor xmm3,xmm3
  63. mov rax,5
  64. $L$oop_row_2:
  65. movdqa xmm4,XMMWORD[rdx]
  66. lea rdx,[16+rdx]
  67. movdqa xmm6,xmm2
  68. DB 102,15,58,15,243,1
  69. movdqa xmm3,xmm6
  70. psrldq xmm2,1
  71. movdqa xmm5,xmm4
  72. DB 102,15,56,0,224
  73. DB 102,15,56,0,233
  74. pxor xmm2,xmm5
  75. movdqa xmm5,xmm4
  76. psllq xmm5,60
  77. movdqa xmm6,xmm5
  78. pslldq xmm6,8
  79. pxor xmm3,xmm6
  80. psrldq xmm5,8
  81. pxor xmm2,xmm5
  82. psrlq xmm4,4
  83. pxor xmm2,xmm4
  84. sub rax,1
  85. jnz NEAR $L$oop_row_2
  86. pxor xmm2,xmm3
  87. psrlq xmm3,1
  88. pxor xmm2,xmm3
  89. psrlq xmm3,1
  90. pxor xmm2,xmm3
  91. psrlq xmm3,5
  92. pxor xmm2,xmm3
  93. pxor xmm3,xmm3
  94. mov rax,6
  95. $L$oop_row_3:
  96. movdqa xmm4,XMMWORD[rdx]
  97. lea rdx,[16+rdx]
  98. movdqa xmm6,xmm2
  99. DB 102,15,58,15,243,1
  100. movdqa xmm3,xmm6
  101. psrldq xmm2,1
  102. movdqa xmm5,xmm4
  103. DB 102,15,56,0,224
  104. DB 102,15,56,0,233
  105. pxor xmm2,xmm5
  106. movdqa xmm5,xmm4
  107. psllq xmm5,60
  108. movdqa xmm6,xmm5
  109. pslldq xmm6,8
  110. pxor xmm3,xmm6
  111. psrldq xmm5,8
  112. pxor xmm2,xmm5
  113. psrlq xmm4,4
  114. pxor xmm2,xmm4
  115. sub rax,1
  116. jnz NEAR $L$oop_row_3
  117. pxor xmm2,xmm3
  118. psrlq xmm3,1
  119. pxor xmm2,xmm3
  120. psrlq xmm3,1
  121. pxor xmm2,xmm3
  122. psrlq xmm3,5
  123. pxor xmm2,xmm3
  124. pxor xmm3,xmm3
  125. DB 102,65,15,56,0,210
  126. movdqu XMMWORD[rcx],xmm2
  127. pxor xmm0,xmm0
  128. pxor xmm1,xmm1
  129. pxor xmm2,xmm2
  130. pxor xmm3,xmm3
  131. pxor xmm4,xmm4
  132. pxor xmm5,xmm5
  133. pxor xmm6,xmm6
  134. movdqa xmm6,XMMWORD[rsp]
  135. movdqa xmm10,XMMWORD[16+rsp]
  136. add rsp,40
  137. DB 0F3h,0C3h ;repret
  138. $L$gmult_seh_end:
  139. global gcm_ghash_ssse3
  140. ALIGN 16
  141. gcm_ghash_ssse3:
  142. $L$ghash_seh_begin:
  143. sub rsp,56
  144. $L$ghash_seh_allocstack:
  145. movdqa XMMWORD[rsp],xmm6
  146. $L$ghash_seh_save_xmm6:
  147. movdqa XMMWORD[16+rsp],xmm10
  148. $L$ghash_seh_save_xmm10:
  149. movdqa XMMWORD[32+rsp],xmm11
  150. $L$ghash_seh_save_xmm11:
  151. $L$ghash_seh_prolog_end:
  152. movdqu xmm0,XMMWORD[rcx]
  153. movdqa xmm10,XMMWORD[$L$reverse_bytes]
  154. movdqa xmm11,XMMWORD[$L$low4_mask]
  155. and r9,-16
  156. DB 102,65,15,56,0,194
  157. pxor xmm3,xmm3
  158. $L$oop_ghash:
  159. movdqu xmm1,XMMWORD[r8]
  160. DB 102,65,15,56,0,202
  161. pxor xmm0,xmm1
  162. movdqa xmm1,xmm11
  163. pandn xmm1,xmm0
  164. psrld xmm1,4
  165. pand xmm0,xmm11
  166. pxor xmm2,xmm2
  167. mov rax,5
  168. $L$oop_row_4:
  169. movdqa xmm4,XMMWORD[rdx]
  170. lea rdx,[16+rdx]
  171. movdqa xmm6,xmm2
  172. DB 102,15,58,15,243,1
  173. movdqa xmm3,xmm6
  174. psrldq xmm2,1
  175. movdqa xmm5,xmm4
  176. DB 102,15,56,0,224
  177. DB 102,15,56,0,233
  178. pxor xmm2,xmm5
  179. movdqa xmm5,xmm4
  180. psllq xmm5,60
  181. movdqa xmm6,xmm5
  182. pslldq xmm6,8
  183. pxor xmm3,xmm6
  184. psrldq xmm5,8
  185. pxor xmm2,xmm5
  186. psrlq xmm4,4
  187. pxor xmm2,xmm4
  188. sub rax,1
  189. jnz NEAR $L$oop_row_4
  190. pxor xmm2,xmm3
  191. psrlq xmm3,1
  192. pxor xmm2,xmm3
  193. psrlq xmm3,1
  194. pxor xmm2,xmm3
  195. psrlq xmm3,5
  196. pxor xmm2,xmm3
  197. pxor xmm3,xmm3
  198. mov rax,5
  199. $L$oop_row_5:
  200. movdqa xmm4,XMMWORD[rdx]
  201. lea rdx,[16+rdx]
  202. movdqa xmm6,xmm2
  203. DB 102,15,58,15,243,1
  204. movdqa xmm3,xmm6
  205. psrldq xmm2,1
  206. movdqa xmm5,xmm4
  207. DB 102,15,56,0,224
  208. DB 102,15,56,0,233
  209. pxor xmm2,xmm5
  210. movdqa xmm5,xmm4
  211. psllq xmm5,60
  212. movdqa xmm6,xmm5
  213. pslldq xmm6,8
  214. pxor xmm3,xmm6
  215. psrldq xmm5,8
  216. pxor xmm2,xmm5
  217. psrlq xmm4,4
  218. pxor xmm2,xmm4
  219. sub rax,1
  220. jnz NEAR $L$oop_row_5
  221. pxor xmm2,xmm3
  222. psrlq xmm3,1
  223. pxor xmm2,xmm3
  224. psrlq xmm3,1
  225. pxor xmm2,xmm3
  226. psrlq xmm3,5
  227. pxor xmm2,xmm3
  228. pxor xmm3,xmm3
  229. mov rax,6
  230. $L$oop_row_6:
  231. movdqa xmm4,XMMWORD[rdx]
  232. lea rdx,[16+rdx]
  233. movdqa xmm6,xmm2
  234. DB 102,15,58,15,243,1
  235. movdqa xmm3,xmm6
  236. psrldq xmm2,1
  237. movdqa xmm5,xmm4
  238. DB 102,15,56,0,224
  239. DB 102,15,56,0,233
  240. pxor xmm2,xmm5
  241. movdqa xmm5,xmm4
  242. psllq xmm5,60
  243. movdqa xmm6,xmm5
  244. pslldq xmm6,8
  245. pxor xmm3,xmm6
  246. psrldq xmm5,8
  247. pxor xmm2,xmm5
  248. psrlq xmm4,4
  249. pxor xmm2,xmm4
  250. sub rax,1
  251. jnz NEAR $L$oop_row_6
  252. pxor xmm2,xmm3
  253. psrlq xmm3,1
  254. pxor xmm2,xmm3
  255. psrlq xmm3,1
  256. pxor xmm2,xmm3
  257. psrlq xmm3,5
  258. pxor xmm2,xmm3
  259. pxor xmm3,xmm3
  260. movdqa xmm0,xmm2
  261. lea rdx,[((-256))+rdx]
  262. lea r8,[16+r8]
  263. sub r9,16
  264. jnz NEAR $L$oop_ghash
  265. DB 102,65,15,56,0,194
  266. movdqu XMMWORD[rcx],xmm0
  267. pxor xmm0,xmm0
  268. pxor xmm1,xmm1
  269. pxor xmm2,xmm2
  270. pxor xmm3,xmm3
  271. pxor xmm4,xmm4
  272. pxor xmm5,xmm5
  273. pxor xmm6,xmm6
  274. movdqa xmm6,XMMWORD[rsp]
  275. movdqa xmm10,XMMWORD[16+rsp]
  276. movdqa xmm11,XMMWORD[32+rsp]
  277. add rsp,56
  278. DB 0F3h,0C3h ;repret
  279. $L$ghash_seh_end:
  280. ALIGN 16
  281. $L$reverse_bytes:
  282. DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  283. $L$low4_mask:
  284. DQ 0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f
  285. section .pdata rdata align=4
  286. ALIGN 4
  287. DD $L$gmult_seh_begin wrt ..imagebase
  288. DD $L$gmult_seh_end wrt ..imagebase
  289. DD $L$gmult_seh_info wrt ..imagebase
  290. DD $L$ghash_seh_begin wrt ..imagebase
  291. DD $L$ghash_seh_end wrt ..imagebase
  292. DD $L$ghash_seh_info wrt ..imagebase
  293. section .xdata rdata align=8
  294. ALIGN 8
  295. $L$gmult_seh_info:
  296. DB 1
  297. DB $L$gmult_seh_prolog_end-$L$gmult_seh_begin
  298. DB 5
  299. DB 0
  300. DB $L$gmult_seh_save_xmm10-$L$gmult_seh_begin
  301. DB 168
  302. DW 1
  303. DB $L$gmult_seh_save_xmm6-$L$gmult_seh_begin
  304. DB 104
  305. DW 0
  306. DB $L$gmult_seh_allocstack-$L$gmult_seh_begin
  307. DB 66
  308. ALIGN 8
  309. $L$ghash_seh_info:
  310. DB 1
  311. DB $L$ghash_seh_prolog_end-$L$ghash_seh_begin
  312. DB 7
  313. DB 0
  314. DB $L$ghash_seh_save_xmm11-$L$ghash_seh_begin
  315. DB 184
  316. DW 2
  317. DB $L$ghash_seh_save_xmm10-$L$ghash_seh_begin
  318. DB 168
  319. DW 1
  320. DB $L$ghash_seh_save_xmm6-$L$ghash_seh_begin
  321. DB 104
  322. DW 0
  323. DB $L$ghash_seh_allocstack-$L$ghash_seh_begin
  324. DB 98