ghash-ssse3-x86.asm 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. ; This file is generated from a similarly-named Perl script in the BoringSSL
  2. ; source tree. Do not edit by hand.
  3. %ifdef BORINGSSL_PREFIX
  4. %include "boringssl_prefix_symbols_nasm.inc"
  5. %endif
  6. %ifidn __OUTPUT_FORMAT__,obj
  7. section code use32 class=code align=64
  8. %elifidn __OUTPUT_FORMAT__,win32
  9. $@feat.00 equ 1
  10. section .text code align=64
  11. %else
  12. section .text code
  13. %endif
  14. global _gcm_gmult_ssse3
  15. align 16
  16. _gcm_gmult_ssse3:
  17. L$_gcm_gmult_ssse3_begin:
  18. push ebp
  19. push ebx
  20. push esi
  21. push edi
  22. mov edi,DWORD [20+esp]
  23. mov esi,DWORD [24+esp]
  24. movdqu xmm0,[edi]
  25. call L$000pic_point
  26. L$000pic_point:
  27. pop eax
  28. movdqa xmm7,[(L$reverse_bytes-L$000pic_point)+eax]
  29. movdqa xmm2,[(L$low4_mask-L$000pic_point)+eax]
  30. db 102,15,56,0,199
  31. movdqa xmm1,xmm2
  32. pandn xmm1,xmm0
  33. psrld xmm1,4
  34. pand xmm0,xmm2
  35. pxor xmm2,xmm2
  36. pxor xmm3,xmm3
  37. mov eax,5
  38. L$001loop_row_1:
  39. movdqa xmm4,[esi]
  40. lea esi,[16+esi]
  41. movdqa xmm6,xmm2
  42. db 102,15,58,15,243,1
  43. movdqa xmm3,xmm6
  44. psrldq xmm2,1
  45. movdqa xmm5,xmm4
  46. db 102,15,56,0,224
  47. db 102,15,56,0,233
  48. pxor xmm2,xmm5
  49. movdqa xmm5,xmm4
  50. psllq xmm5,60
  51. movdqa xmm6,xmm5
  52. pslldq xmm6,8
  53. pxor xmm3,xmm6
  54. psrldq xmm5,8
  55. pxor xmm2,xmm5
  56. psrlq xmm4,4
  57. pxor xmm2,xmm4
  58. sub eax,1
  59. jnz NEAR L$001loop_row_1
  60. pxor xmm2,xmm3
  61. psrlq xmm3,1
  62. pxor xmm2,xmm3
  63. psrlq xmm3,1
  64. pxor xmm2,xmm3
  65. psrlq xmm3,5
  66. pxor xmm2,xmm3
  67. pxor xmm3,xmm3
  68. mov eax,5
  69. L$002loop_row_2:
  70. movdqa xmm4,[esi]
  71. lea esi,[16+esi]
  72. movdqa xmm6,xmm2
  73. db 102,15,58,15,243,1
  74. movdqa xmm3,xmm6
  75. psrldq xmm2,1
  76. movdqa xmm5,xmm4
  77. db 102,15,56,0,224
  78. db 102,15,56,0,233
  79. pxor xmm2,xmm5
  80. movdqa xmm5,xmm4
  81. psllq xmm5,60
  82. movdqa xmm6,xmm5
  83. pslldq xmm6,8
  84. pxor xmm3,xmm6
  85. psrldq xmm5,8
  86. pxor xmm2,xmm5
  87. psrlq xmm4,4
  88. pxor xmm2,xmm4
  89. sub eax,1
  90. jnz NEAR L$002loop_row_2
  91. pxor xmm2,xmm3
  92. psrlq xmm3,1
  93. pxor xmm2,xmm3
  94. psrlq xmm3,1
  95. pxor xmm2,xmm3
  96. psrlq xmm3,5
  97. pxor xmm2,xmm3
  98. pxor xmm3,xmm3
  99. mov eax,6
  100. L$003loop_row_3:
  101. movdqa xmm4,[esi]
  102. lea esi,[16+esi]
  103. movdqa xmm6,xmm2
  104. db 102,15,58,15,243,1
  105. movdqa xmm3,xmm6
  106. psrldq xmm2,1
  107. movdqa xmm5,xmm4
  108. db 102,15,56,0,224
  109. db 102,15,56,0,233
  110. pxor xmm2,xmm5
  111. movdqa xmm5,xmm4
  112. psllq xmm5,60
  113. movdqa xmm6,xmm5
  114. pslldq xmm6,8
  115. pxor xmm3,xmm6
  116. psrldq xmm5,8
  117. pxor xmm2,xmm5
  118. psrlq xmm4,4
  119. pxor xmm2,xmm4
  120. sub eax,1
  121. jnz NEAR L$003loop_row_3
  122. pxor xmm2,xmm3
  123. psrlq xmm3,1
  124. pxor xmm2,xmm3
  125. psrlq xmm3,1
  126. pxor xmm2,xmm3
  127. psrlq xmm3,5
  128. pxor xmm2,xmm3
  129. pxor xmm3,xmm3
  130. db 102,15,56,0,215
  131. movdqu [edi],xmm2
  132. pxor xmm0,xmm0
  133. pxor xmm1,xmm1
  134. pxor xmm2,xmm2
  135. pxor xmm3,xmm3
  136. pxor xmm4,xmm4
  137. pxor xmm5,xmm5
  138. pxor xmm6,xmm6
  139. pop edi
  140. pop esi
  141. pop ebx
  142. pop ebp
  143. ret
  144. global _gcm_ghash_ssse3
  145. align 16
  146. _gcm_ghash_ssse3:
  147. L$_gcm_ghash_ssse3_begin:
  148. push ebp
  149. push ebx
  150. push esi
  151. push edi
  152. mov edi,DWORD [20+esp]
  153. mov esi,DWORD [24+esp]
  154. mov edx,DWORD [28+esp]
  155. mov ecx,DWORD [32+esp]
  156. movdqu xmm0,[edi]
  157. call L$004pic_point
  158. L$004pic_point:
  159. pop ebx
  160. movdqa xmm7,[(L$reverse_bytes-L$004pic_point)+ebx]
  161. and ecx,-16
  162. db 102,15,56,0,199
  163. pxor xmm3,xmm3
  164. L$005loop_ghash:
  165. movdqa xmm2,[(L$low4_mask-L$004pic_point)+ebx]
  166. movdqu xmm1,[edx]
  167. db 102,15,56,0,207
  168. pxor xmm0,xmm1
  169. movdqa xmm1,xmm2
  170. pandn xmm1,xmm0
  171. psrld xmm1,4
  172. pand xmm0,xmm2
  173. pxor xmm2,xmm2
  174. mov eax,5
  175. L$006loop_row_4:
  176. movdqa xmm4,[esi]
  177. lea esi,[16+esi]
  178. movdqa xmm6,xmm2
  179. db 102,15,58,15,243,1
  180. movdqa xmm3,xmm6
  181. psrldq xmm2,1
  182. movdqa xmm5,xmm4
  183. db 102,15,56,0,224
  184. db 102,15,56,0,233
  185. pxor xmm2,xmm5
  186. movdqa xmm5,xmm4
  187. psllq xmm5,60
  188. movdqa xmm6,xmm5
  189. pslldq xmm6,8
  190. pxor xmm3,xmm6
  191. psrldq xmm5,8
  192. pxor xmm2,xmm5
  193. psrlq xmm4,4
  194. pxor xmm2,xmm4
  195. sub eax,1
  196. jnz NEAR L$006loop_row_4
  197. pxor xmm2,xmm3
  198. psrlq xmm3,1
  199. pxor xmm2,xmm3
  200. psrlq xmm3,1
  201. pxor xmm2,xmm3
  202. psrlq xmm3,5
  203. pxor xmm2,xmm3
  204. pxor xmm3,xmm3
  205. mov eax,5
  206. L$007loop_row_5:
  207. movdqa xmm4,[esi]
  208. lea esi,[16+esi]
  209. movdqa xmm6,xmm2
  210. db 102,15,58,15,243,1
  211. movdqa xmm3,xmm6
  212. psrldq xmm2,1
  213. movdqa xmm5,xmm4
  214. db 102,15,56,0,224
  215. db 102,15,56,0,233
  216. pxor xmm2,xmm5
  217. movdqa xmm5,xmm4
  218. psllq xmm5,60
  219. movdqa xmm6,xmm5
  220. pslldq xmm6,8
  221. pxor xmm3,xmm6
  222. psrldq xmm5,8
  223. pxor xmm2,xmm5
  224. psrlq xmm4,4
  225. pxor xmm2,xmm4
  226. sub eax,1
  227. jnz NEAR L$007loop_row_5
  228. pxor xmm2,xmm3
  229. psrlq xmm3,1
  230. pxor xmm2,xmm3
  231. psrlq xmm3,1
  232. pxor xmm2,xmm3
  233. psrlq xmm3,5
  234. pxor xmm2,xmm3
  235. pxor xmm3,xmm3
  236. mov eax,6
  237. L$008loop_row_6:
  238. movdqa xmm4,[esi]
  239. lea esi,[16+esi]
  240. movdqa xmm6,xmm2
  241. db 102,15,58,15,243,1
  242. movdqa xmm3,xmm6
  243. psrldq xmm2,1
  244. movdqa xmm5,xmm4
  245. db 102,15,56,0,224
  246. db 102,15,56,0,233
  247. pxor xmm2,xmm5
  248. movdqa xmm5,xmm4
  249. psllq xmm5,60
  250. movdqa xmm6,xmm5
  251. pslldq xmm6,8
  252. pxor xmm3,xmm6
  253. psrldq xmm5,8
  254. pxor xmm2,xmm5
  255. psrlq xmm4,4
  256. pxor xmm2,xmm4
  257. sub eax,1
  258. jnz NEAR L$008loop_row_6
  259. pxor xmm2,xmm3
  260. psrlq xmm3,1
  261. pxor xmm2,xmm3
  262. psrlq xmm3,1
  263. pxor xmm2,xmm3
  264. psrlq xmm3,5
  265. pxor xmm2,xmm3
  266. pxor xmm3,xmm3
  267. movdqa xmm0,xmm2
  268. lea esi,[esi-256]
  269. lea edx,[16+edx]
  270. sub ecx,16
  271. jnz NEAR L$005loop_ghash
  272. db 102,15,56,0,199
  273. movdqu [edi],xmm0
  274. pxor xmm0,xmm0
  275. pxor xmm1,xmm1
  276. pxor xmm2,xmm2
  277. pxor xmm3,xmm3
  278. pxor xmm4,xmm4
  279. pxor xmm5,xmm5
  280. pxor xmm6,xmm6
  281. pop edi
  282. pop esi
  283. pop ebx
  284. pop ebp
  285. ret
  286. align 16
  287. L$reverse_bytes:
  288. db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  289. align 16
  290. L$low4_mask:
  291. dd 252645135,252645135,252645135,252645135