ghash-x86.asm 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. ; This file is generated from a similarly-named Perl script in the BoringSSL
  2. ; source tree. Do not edit by hand.
  3. %ifdef BORINGSSL_PREFIX
  4. %include "boringssl_prefix_symbols_nasm.inc"
  5. %endif
  6. %ifidn __OUTPUT_FORMAT__,obj
  7. section code use32 class=code align=64
  8. %elifidn __OUTPUT_FORMAT__,win32
  9. $@feat.00 equ 1
  10. section .text code align=64
  11. %else
  12. section .text code
  13. %endif
  14. global _gcm_init_clmul
  15. align 16
  16. _gcm_init_clmul:
  17. L$_gcm_init_clmul_begin:
  18. mov edx,DWORD [4+esp]
  19. mov eax,DWORD [8+esp]
  20. call L$000pic
  21. L$000pic:
  22. pop ecx
  23. lea ecx,[(L$bswap-L$000pic)+ecx]
  24. movdqu xmm2,[eax]
  25. pshufd xmm2,xmm2,78
  26. pshufd xmm4,xmm2,255
  27. movdqa xmm3,xmm2
  28. psllq xmm2,1
  29. pxor xmm5,xmm5
  30. psrlq xmm3,63
  31. pcmpgtd xmm5,xmm4
  32. pslldq xmm3,8
  33. por xmm2,xmm3
  34. pand xmm5,[16+ecx]
  35. pxor xmm2,xmm5
  36. movdqa xmm0,xmm2
  37. movdqa xmm1,xmm0
  38. pshufd xmm3,xmm0,78
  39. pshufd xmm4,xmm2,78
  40. pxor xmm3,xmm0
  41. pxor xmm4,xmm2
  42. db 102,15,58,68,194,0
  43. db 102,15,58,68,202,17
  44. db 102,15,58,68,220,0
  45. xorps xmm3,xmm0
  46. xorps xmm3,xmm1
  47. movdqa xmm4,xmm3
  48. psrldq xmm3,8
  49. pslldq xmm4,8
  50. pxor xmm1,xmm3
  51. pxor xmm0,xmm4
  52. movdqa xmm4,xmm0
  53. movdqa xmm3,xmm0
  54. psllq xmm0,5
  55. pxor xmm3,xmm0
  56. psllq xmm0,1
  57. pxor xmm0,xmm3
  58. psllq xmm0,57
  59. movdqa xmm3,xmm0
  60. pslldq xmm0,8
  61. psrldq xmm3,8
  62. pxor xmm0,xmm4
  63. pxor xmm1,xmm3
  64. movdqa xmm4,xmm0
  65. psrlq xmm0,1
  66. pxor xmm1,xmm4
  67. pxor xmm4,xmm0
  68. psrlq xmm0,5
  69. pxor xmm0,xmm4
  70. psrlq xmm0,1
  71. pxor xmm0,xmm1
  72. pshufd xmm3,xmm2,78
  73. pshufd xmm4,xmm0,78
  74. pxor xmm3,xmm2
  75. movdqu [edx],xmm2
  76. pxor xmm4,xmm0
  77. movdqu [16+edx],xmm0
  78. db 102,15,58,15,227,8
  79. movdqu [32+edx],xmm4
  80. ret
  81. global _gcm_gmult_clmul
  82. align 16
  83. _gcm_gmult_clmul:
  84. L$_gcm_gmult_clmul_begin:
  85. mov eax,DWORD [4+esp]
  86. mov edx,DWORD [8+esp]
  87. call L$001pic
  88. L$001pic:
  89. pop ecx
  90. lea ecx,[(L$bswap-L$001pic)+ecx]
  91. movdqu xmm0,[eax]
  92. movdqa xmm5,[ecx]
  93. movups xmm2,[edx]
  94. db 102,15,56,0,197
  95. movups xmm4,[32+edx]
  96. movdqa xmm1,xmm0
  97. pshufd xmm3,xmm0,78
  98. pxor xmm3,xmm0
  99. db 102,15,58,68,194,0
  100. db 102,15,58,68,202,17
  101. db 102,15,58,68,220,0
  102. xorps xmm3,xmm0
  103. xorps xmm3,xmm1
  104. movdqa xmm4,xmm3
  105. psrldq xmm3,8
  106. pslldq xmm4,8
  107. pxor xmm1,xmm3
  108. pxor xmm0,xmm4
  109. movdqa xmm4,xmm0
  110. movdqa xmm3,xmm0
  111. psllq xmm0,5
  112. pxor xmm3,xmm0
  113. psllq xmm0,1
  114. pxor xmm0,xmm3
  115. psllq xmm0,57
  116. movdqa xmm3,xmm0
  117. pslldq xmm0,8
  118. psrldq xmm3,8
  119. pxor xmm0,xmm4
  120. pxor xmm1,xmm3
  121. movdqa xmm4,xmm0
  122. psrlq xmm0,1
  123. pxor xmm1,xmm4
  124. pxor xmm4,xmm0
  125. psrlq xmm0,5
  126. pxor xmm0,xmm4
  127. psrlq xmm0,1
  128. pxor xmm0,xmm1
  129. db 102,15,56,0,197
  130. movdqu [eax],xmm0
  131. ret
  132. global _gcm_ghash_clmul
  133. align 16
  134. _gcm_ghash_clmul:
  135. L$_gcm_ghash_clmul_begin:
  136. push ebp
  137. push ebx
  138. push esi
  139. push edi
  140. mov eax,DWORD [20+esp]
  141. mov edx,DWORD [24+esp]
  142. mov esi,DWORD [28+esp]
  143. mov ebx,DWORD [32+esp]
  144. call L$002pic
  145. L$002pic:
  146. pop ecx
  147. lea ecx,[(L$bswap-L$002pic)+ecx]
  148. movdqu xmm0,[eax]
  149. movdqa xmm5,[ecx]
  150. movdqu xmm2,[edx]
  151. db 102,15,56,0,197
  152. sub ebx,16
  153. jz NEAR L$003odd_tail
  154. movdqu xmm3,[esi]
  155. movdqu xmm6,[16+esi]
  156. db 102,15,56,0,221
  157. db 102,15,56,0,245
  158. movdqu xmm5,[32+edx]
  159. pxor xmm0,xmm3
  160. pshufd xmm3,xmm6,78
  161. movdqa xmm7,xmm6
  162. pxor xmm3,xmm6
  163. lea esi,[32+esi]
  164. db 102,15,58,68,242,0
  165. db 102,15,58,68,250,17
  166. db 102,15,58,68,221,0
  167. movups xmm2,[16+edx]
  168. nop
  169. sub ebx,32
  170. jbe NEAR L$004even_tail
  171. jmp NEAR L$005mod_loop
  172. align 32
  173. L$005mod_loop:
  174. pshufd xmm4,xmm0,78
  175. movdqa xmm1,xmm0
  176. pxor xmm4,xmm0
  177. nop
  178. db 102,15,58,68,194,0
  179. db 102,15,58,68,202,17
  180. db 102,15,58,68,229,16
  181. movups xmm2,[edx]
  182. xorps xmm0,xmm6
  183. movdqa xmm5,[ecx]
  184. xorps xmm1,xmm7
  185. movdqu xmm7,[esi]
  186. pxor xmm3,xmm0
  187. movdqu xmm6,[16+esi]
  188. pxor xmm3,xmm1
  189. db 102,15,56,0,253
  190. pxor xmm4,xmm3
  191. movdqa xmm3,xmm4
  192. psrldq xmm4,8
  193. pslldq xmm3,8
  194. pxor xmm1,xmm4
  195. pxor xmm0,xmm3
  196. db 102,15,56,0,245
  197. pxor xmm1,xmm7
  198. movdqa xmm7,xmm6
  199. movdqa xmm4,xmm0
  200. movdqa xmm3,xmm0
  201. psllq xmm0,5
  202. pxor xmm3,xmm0
  203. psllq xmm0,1
  204. pxor xmm0,xmm3
  205. db 102,15,58,68,242,0
  206. movups xmm5,[32+edx]
  207. psllq xmm0,57
  208. movdqa xmm3,xmm0
  209. pslldq xmm0,8
  210. psrldq xmm3,8
  211. pxor xmm0,xmm4
  212. pxor xmm1,xmm3
  213. pshufd xmm3,xmm7,78
  214. movdqa xmm4,xmm0
  215. psrlq xmm0,1
  216. pxor xmm3,xmm7
  217. pxor xmm1,xmm4
  218. db 102,15,58,68,250,17
  219. movups xmm2,[16+edx]
  220. pxor xmm4,xmm0
  221. psrlq xmm0,5
  222. pxor xmm0,xmm4
  223. psrlq xmm0,1
  224. pxor xmm0,xmm1
  225. db 102,15,58,68,221,0
  226. lea esi,[32+esi]
  227. sub ebx,32
  228. ja NEAR L$005mod_loop
  229. L$004even_tail:
  230. pshufd xmm4,xmm0,78
  231. movdqa xmm1,xmm0
  232. pxor xmm4,xmm0
  233. db 102,15,58,68,194,0
  234. db 102,15,58,68,202,17
  235. db 102,15,58,68,229,16
  236. movdqa xmm5,[ecx]
  237. xorps xmm0,xmm6
  238. xorps xmm1,xmm7
  239. pxor xmm3,xmm0
  240. pxor xmm3,xmm1
  241. pxor xmm4,xmm3
  242. movdqa xmm3,xmm4
  243. psrldq xmm4,8
  244. pslldq xmm3,8
  245. pxor xmm1,xmm4
  246. pxor xmm0,xmm3
  247. movdqa xmm4,xmm0
  248. movdqa xmm3,xmm0
  249. psllq xmm0,5
  250. pxor xmm3,xmm0
  251. psllq xmm0,1
  252. pxor xmm0,xmm3
  253. psllq xmm0,57
  254. movdqa xmm3,xmm0
  255. pslldq xmm0,8
  256. psrldq xmm3,8
  257. pxor xmm0,xmm4
  258. pxor xmm1,xmm3
  259. movdqa xmm4,xmm0
  260. psrlq xmm0,1
  261. pxor xmm1,xmm4
  262. pxor xmm4,xmm0
  263. psrlq xmm0,5
  264. pxor xmm0,xmm4
  265. psrlq xmm0,1
  266. pxor xmm0,xmm1
  267. test ebx,ebx
  268. jnz NEAR L$006done
  269. movups xmm2,[edx]
  270. L$003odd_tail:
  271. movdqu xmm3,[esi]
  272. db 102,15,56,0,221
  273. pxor xmm0,xmm3
  274. movdqa xmm1,xmm0
  275. pshufd xmm3,xmm0,78
  276. pshufd xmm4,xmm2,78
  277. pxor xmm3,xmm0
  278. pxor xmm4,xmm2
  279. db 102,15,58,68,194,0
  280. db 102,15,58,68,202,17
  281. db 102,15,58,68,220,0
  282. xorps xmm3,xmm0
  283. xorps xmm3,xmm1
  284. movdqa xmm4,xmm3
  285. psrldq xmm3,8
  286. pslldq xmm4,8
  287. pxor xmm1,xmm3
  288. pxor xmm0,xmm4
  289. movdqa xmm4,xmm0
  290. movdqa xmm3,xmm0
  291. psllq xmm0,5
  292. pxor xmm3,xmm0
  293. psllq xmm0,1
  294. pxor xmm0,xmm3
  295. psllq xmm0,57
  296. movdqa xmm3,xmm0
  297. pslldq xmm0,8
  298. psrldq xmm3,8
  299. pxor xmm0,xmm4
  300. pxor xmm1,xmm3
  301. movdqa xmm4,xmm0
  302. psrlq xmm0,1
  303. pxor xmm1,xmm4
  304. pxor xmm4,xmm0
  305. psrlq xmm0,5
  306. pxor xmm0,xmm4
  307. psrlq xmm0,1
  308. pxor xmm0,xmm1
  309. L$006done:
  310. db 102,15,56,0,197
  311. movdqu [eax],xmm0
  312. pop edi
  313. pop esi
  314. pop ebx
  315. pop ebp
  316. ret
  317. align 64
  318. L$bswap:
  319. db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  320. db 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
  321. db 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
  322. db 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
  323. db 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
  324. db 0