ghash-x86_64.S 23 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__has_feature)
  4. #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
  5. #define OPENSSL_NO_ASM
  6. #endif
  7. #endif
  8. #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
  9. #if defined(BORINGSSL_PREFIX)
  10. #include <boringssl_prefix_symbols_asm.h>
  11. #endif
  12. .text
  13. .extern OPENSSL_ia32cap_P
  14. .hidden OPENSSL_ia32cap_P
  15. .globl gcm_init_clmul
  16. .hidden gcm_init_clmul
  17. .type gcm_init_clmul,@function
  18. .align 16
  19. gcm_init_clmul:
  20. .cfi_startproc
  21. .L_init_clmul:
  22. movdqu (%rsi),%xmm2
  23. pshufd $78,%xmm2,%xmm2
  24. pshufd $255,%xmm2,%xmm4
  25. movdqa %xmm2,%xmm3
  26. psllq $1,%xmm2
  27. pxor %xmm5,%xmm5
  28. psrlq $63,%xmm3
  29. pcmpgtd %xmm4,%xmm5
  30. pslldq $8,%xmm3
  31. por %xmm3,%xmm2
  32. pand .L0x1c2_polynomial(%rip),%xmm5
  33. pxor %xmm5,%xmm2
  34. pshufd $78,%xmm2,%xmm6
  35. movdqa %xmm2,%xmm0
  36. pxor %xmm2,%xmm6
  37. movdqa %xmm0,%xmm1
  38. pshufd $78,%xmm0,%xmm3
  39. pxor %xmm0,%xmm3
  40. .byte 102,15,58,68,194,0
  41. .byte 102,15,58,68,202,17
  42. .byte 102,15,58,68,222,0
  43. pxor %xmm0,%xmm3
  44. pxor %xmm1,%xmm3
  45. movdqa %xmm3,%xmm4
  46. psrldq $8,%xmm3
  47. pslldq $8,%xmm4
  48. pxor %xmm3,%xmm1
  49. pxor %xmm4,%xmm0
  50. movdqa %xmm0,%xmm4
  51. movdqa %xmm0,%xmm3
  52. psllq $5,%xmm0
  53. pxor %xmm0,%xmm3
  54. psllq $1,%xmm0
  55. pxor %xmm3,%xmm0
  56. psllq $57,%xmm0
  57. movdqa %xmm0,%xmm3
  58. pslldq $8,%xmm0
  59. psrldq $8,%xmm3
  60. pxor %xmm4,%xmm0
  61. pxor %xmm3,%xmm1
  62. movdqa %xmm0,%xmm4
  63. psrlq $1,%xmm0
  64. pxor %xmm4,%xmm1
  65. pxor %xmm0,%xmm4
  66. psrlq $5,%xmm0
  67. pxor %xmm4,%xmm0
  68. psrlq $1,%xmm0
  69. pxor %xmm1,%xmm0
  70. pshufd $78,%xmm2,%xmm3
  71. pshufd $78,%xmm0,%xmm4
  72. pxor %xmm2,%xmm3
  73. movdqu %xmm2,0(%rdi)
  74. pxor %xmm0,%xmm4
  75. movdqu %xmm0,16(%rdi)
  76. .byte 102,15,58,15,227,8
  77. movdqu %xmm4,32(%rdi)
  78. movdqa %xmm0,%xmm1
  79. pshufd $78,%xmm0,%xmm3
  80. pxor %xmm0,%xmm3
  81. .byte 102,15,58,68,194,0
  82. .byte 102,15,58,68,202,17
  83. .byte 102,15,58,68,222,0
  84. pxor %xmm0,%xmm3
  85. pxor %xmm1,%xmm3
  86. movdqa %xmm3,%xmm4
  87. psrldq $8,%xmm3
  88. pslldq $8,%xmm4
  89. pxor %xmm3,%xmm1
  90. pxor %xmm4,%xmm0
  91. movdqa %xmm0,%xmm4
  92. movdqa %xmm0,%xmm3
  93. psllq $5,%xmm0
  94. pxor %xmm0,%xmm3
  95. psllq $1,%xmm0
  96. pxor %xmm3,%xmm0
  97. psllq $57,%xmm0
  98. movdqa %xmm0,%xmm3
  99. pslldq $8,%xmm0
  100. psrldq $8,%xmm3
  101. pxor %xmm4,%xmm0
  102. pxor %xmm3,%xmm1
  103. movdqa %xmm0,%xmm4
  104. psrlq $1,%xmm0
  105. pxor %xmm4,%xmm1
  106. pxor %xmm0,%xmm4
  107. psrlq $5,%xmm0
  108. pxor %xmm4,%xmm0
  109. psrlq $1,%xmm0
  110. pxor %xmm1,%xmm0
  111. movdqa %xmm0,%xmm5
  112. movdqa %xmm0,%xmm1
  113. pshufd $78,%xmm0,%xmm3
  114. pxor %xmm0,%xmm3
  115. .byte 102,15,58,68,194,0
  116. .byte 102,15,58,68,202,17
  117. .byte 102,15,58,68,222,0
  118. pxor %xmm0,%xmm3
  119. pxor %xmm1,%xmm3
  120. movdqa %xmm3,%xmm4
  121. psrldq $8,%xmm3
  122. pslldq $8,%xmm4
  123. pxor %xmm3,%xmm1
  124. pxor %xmm4,%xmm0
  125. movdqa %xmm0,%xmm4
  126. movdqa %xmm0,%xmm3
  127. psllq $5,%xmm0
  128. pxor %xmm0,%xmm3
  129. psllq $1,%xmm0
  130. pxor %xmm3,%xmm0
  131. psllq $57,%xmm0
  132. movdqa %xmm0,%xmm3
  133. pslldq $8,%xmm0
  134. psrldq $8,%xmm3
  135. pxor %xmm4,%xmm0
  136. pxor %xmm3,%xmm1
  137. movdqa %xmm0,%xmm4
  138. psrlq $1,%xmm0
  139. pxor %xmm4,%xmm1
  140. pxor %xmm0,%xmm4
  141. psrlq $5,%xmm0
  142. pxor %xmm4,%xmm0
  143. psrlq $1,%xmm0
  144. pxor %xmm1,%xmm0
  145. pshufd $78,%xmm5,%xmm3
  146. pshufd $78,%xmm0,%xmm4
  147. pxor %xmm5,%xmm3
  148. movdqu %xmm5,48(%rdi)
  149. pxor %xmm0,%xmm4
  150. movdqu %xmm0,64(%rdi)
  151. .byte 102,15,58,15,227,8
  152. movdqu %xmm4,80(%rdi)
  153. .byte 0xf3,0xc3
  154. .cfi_endproc
  155. .size gcm_init_clmul,.-gcm_init_clmul
  156. .globl gcm_gmult_clmul
  157. .hidden gcm_gmult_clmul
  158. .type gcm_gmult_clmul,@function
  159. .align 16
  160. gcm_gmult_clmul:
  161. .cfi_startproc
  162. .L_gmult_clmul:
  163. movdqu (%rdi),%xmm0
  164. movdqa .Lbswap_mask(%rip),%xmm5
  165. movdqu (%rsi),%xmm2
  166. movdqu 32(%rsi),%xmm4
  167. .byte 102,15,56,0,197
  168. movdqa %xmm0,%xmm1
  169. pshufd $78,%xmm0,%xmm3
  170. pxor %xmm0,%xmm3
  171. .byte 102,15,58,68,194,0
  172. .byte 102,15,58,68,202,17
  173. .byte 102,15,58,68,220,0
  174. pxor %xmm0,%xmm3
  175. pxor %xmm1,%xmm3
  176. movdqa %xmm3,%xmm4
  177. psrldq $8,%xmm3
  178. pslldq $8,%xmm4
  179. pxor %xmm3,%xmm1
  180. pxor %xmm4,%xmm0
  181. movdqa %xmm0,%xmm4
  182. movdqa %xmm0,%xmm3
  183. psllq $5,%xmm0
  184. pxor %xmm0,%xmm3
  185. psllq $1,%xmm0
  186. pxor %xmm3,%xmm0
  187. psllq $57,%xmm0
  188. movdqa %xmm0,%xmm3
  189. pslldq $8,%xmm0
  190. psrldq $8,%xmm3
  191. pxor %xmm4,%xmm0
  192. pxor %xmm3,%xmm1
  193. movdqa %xmm0,%xmm4
  194. psrlq $1,%xmm0
  195. pxor %xmm4,%xmm1
  196. pxor %xmm0,%xmm4
  197. psrlq $5,%xmm0
  198. pxor %xmm4,%xmm0
  199. psrlq $1,%xmm0
  200. pxor %xmm1,%xmm0
  201. .byte 102,15,56,0,197
  202. movdqu %xmm0,(%rdi)
  203. .byte 0xf3,0xc3
  204. .cfi_endproc
  205. .size gcm_gmult_clmul,.-gcm_gmult_clmul
  206. .globl gcm_ghash_clmul
  207. .hidden gcm_ghash_clmul
  208. .type gcm_ghash_clmul,@function
  209. .align 32
  210. gcm_ghash_clmul:
  211. .cfi_startproc
  212. .L_ghash_clmul:
  213. movdqa .Lbswap_mask(%rip),%xmm10
  214. movdqu (%rdi),%xmm0
  215. movdqu (%rsi),%xmm2
  216. movdqu 32(%rsi),%xmm7
  217. .byte 102,65,15,56,0,194
  218. subq $0x10,%rcx
  219. jz .Lodd_tail
  220. movdqu 16(%rsi),%xmm6
  221. leaq OPENSSL_ia32cap_P(%rip),%rax
  222. movl 4(%rax),%eax
  223. cmpq $0x30,%rcx
  224. jb .Lskip4x
  225. andl $71303168,%eax
  226. cmpl $4194304,%eax
  227. je .Lskip4x
  228. subq $0x30,%rcx
  229. movq $0xA040608020C0E000,%rax
  230. movdqu 48(%rsi),%xmm14
  231. movdqu 64(%rsi),%xmm15
  232. movdqu 48(%rdx),%xmm3
  233. movdqu 32(%rdx),%xmm11
  234. .byte 102,65,15,56,0,218
  235. .byte 102,69,15,56,0,218
  236. movdqa %xmm3,%xmm5
  237. pshufd $78,%xmm3,%xmm4
  238. pxor %xmm3,%xmm4
  239. .byte 102,15,58,68,218,0
  240. .byte 102,15,58,68,234,17
  241. .byte 102,15,58,68,231,0
  242. movdqa %xmm11,%xmm13
  243. pshufd $78,%xmm11,%xmm12
  244. pxor %xmm11,%xmm12
  245. .byte 102,68,15,58,68,222,0
  246. .byte 102,68,15,58,68,238,17
  247. .byte 102,68,15,58,68,231,16
  248. xorps %xmm11,%xmm3
  249. xorps %xmm13,%xmm5
  250. movups 80(%rsi),%xmm7
  251. xorps %xmm12,%xmm4
  252. movdqu 16(%rdx),%xmm11
  253. movdqu 0(%rdx),%xmm8
  254. .byte 102,69,15,56,0,218
  255. .byte 102,69,15,56,0,194
  256. movdqa %xmm11,%xmm13
  257. pshufd $78,%xmm11,%xmm12
  258. pxor %xmm8,%xmm0
  259. pxor %xmm11,%xmm12
  260. .byte 102,69,15,58,68,222,0
  261. movdqa %xmm0,%xmm1
  262. pshufd $78,%xmm0,%xmm8
  263. pxor %xmm0,%xmm8
  264. .byte 102,69,15,58,68,238,17
  265. .byte 102,68,15,58,68,231,0
  266. xorps %xmm11,%xmm3
  267. xorps %xmm13,%xmm5
  268. leaq 64(%rdx),%rdx
  269. subq $0x40,%rcx
  270. jc .Ltail4x
  271. jmp .Lmod4_loop
  272. .align 32
  273. .Lmod4_loop:
  274. .byte 102,65,15,58,68,199,0
  275. xorps %xmm12,%xmm4
  276. movdqu 48(%rdx),%xmm11
  277. .byte 102,69,15,56,0,218
  278. .byte 102,65,15,58,68,207,17
  279. xorps %xmm3,%xmm0
  280. movdqu 32(%rdx),%xmm3
  281. movdqa %xmm11,%xmm13
  282. .byte 102,68,15,58,68,199,16
  283. pshufd $78,%xmm11,%xmm12
  284. xorps %xmm5,%xmm1
  285. pxor %xmm11,%xmm12
  286. .byte 102,65,15,56,0,218
  287. movups 32(%rsi),%xmm7
  288. xorps %xmm4,%xmm8
  289. .byte 102,68,15,58,68,218,0
  290. pshufd $78,%xmm3,%xmm4
  291. pxor %xmm0,%xmm8
  292. movdqa %xmm3,%xmm5
  293. pxor %xmm1,%xmm8
  294. pxor %xmm3,%xmm4
  295. movdqa %xmm8,%xmm9
  296. .byte 102,68,15,58,68,234,17
  297. pslldq $8,%xmm8
  298. psrldq $8,%xmm9
  299. pxor %xmm8,%xmm0
  300. movdqa .L7_mask(%rip),%xmm8
  301. pxor %xmm9,%xmm1
  302. .byte 102,76,15,110,200
  303. pand %xmm0,%xmm8
  304. .byte 102,69,15,56,0,200
  305. pxor %xmm0,%xmm9
  306. .byte 102,68,15,58,68,231,0
  307. psllq $57,%xmm9
  308. movdqa %xmm9,%xmm8
  309. pslldq $8,%xmm9
  310. .byte 102,15,58,68,222,0
  311. psrldq $8,%xmm8
  312. pxor %xmm9,%xmm0
  313. pxor %xmm8,%xmm1
  314. movdqu 0(%rdx),%xmm8
  315. movdqa %xmm0,%xmm9
  316. psrlq $1,%xmm0
  317. .byte 102,15,58,68,238,17
  318. xorps %xmm11,%xmm3
  319. movdqu 16(%rdx),%xmm11
  320. .byte 102,69,15,56,0,218
  321. .byte 102,15,58,68,231,16
  322. xorps %xmm13,%xmm5
  323. movups 80(%rsi),%xmm7
  324. .byte 102,69,15,56,0,194
  325. pxor %xmm9,%xmm1
  326. pxor %xmm0,%xmm9
  327. psrlq $5,%xmm0
  328. movdqa %xmm11,%xmm13
  329. pxor %xmm12,%xmm4
  330. pshufd $78,%xmm11,%xmm12
  331. pxor %xmm9,%xmm0
  332. pxor %xmm8,%xmm1
  333. pxor %xmm11,%xmm12
  334. .byte 102,69,15,58,68,222,0
  335. psrlq $1,%xmm0
  336. pxor %xmm1,%xmm0
  337. movdqa %xmm0,%xmm1
  338. .byte 102,69,15,58,68,238,17
  339. xorps %xmm11,%xmm3
  340. pshufd $78,%xmm0,%xmm8
  341. pxor %xmm0,%xmm8
  342. .byte 102,68,15,58,68,231,0
  343. xorps %xmm13,%xmm5
  344. leaq 64(%rdx),%rdx
  345. subq $0x40,%rcx
  346. jnc .Lmod4_loop
  347. .Ltail4x:
  348. .byte 102,65,15,58,68,199,0
  349. .byte 102,65,15,58,68,207,17
  350. .byte 102,68,15,58,68,199,16
  351. xorps %xmm12,%xmm4
  352. xorps %xmm3,%xmm0
  353. xorps %xmm5,%xmm1
  354. pxor %xmm0,%xmm1
  355. pxor %xmm4,%xmm8
  356. pxor %xmm1,%xmm8
  357. pxor %xmm0,%xmm1
  358. movdqa %xmm8,%xmm9
  359. psrldq $8,%xmm8
  360. pslldq $8,%xmm9
  361. pxor %xmm8,%xmm1
  362. pxor %xmm9,%xmm0
  363. movdqa %xmm0,%xmm4
  364. movdqa %xmm0,%xmm3
  365. psllq $5,%xmm0
  366. pxor %xmm0,%xmm3
  367. psllq $1,%xmm0
  368. pxor %xmm3,%xmm0
  369. psllq $57,%xmm0
  370. movdqa %xmm0,%xmm3
  371. pslldq $8,%xmm0
  372. psrldq $8,%xmm3
  373. pxor %xmm4,%xmm0
  374. pxor %xmm3,%xmm1
  375. movdqa %xmm0,%xmm4
  376. psrlq $1,%xmm0
  377. pxor %xmm4,%xmm1
  378. pxor %xmm0,%xmm4
  379. psrlq $5,%xmm0
  380. pxor %xmm4,%xmm0
  381. psrlq $1,%xmm0
  382. pxor %xmm1,%xmm0
  383. addq $0x40,%rcx
  384. jz .Ldone
  385. movdqu 32(%rsi),%xmm7
  386. subq $0x10,%rcx
  387. jz .Lodd_tail
  388. .Lskip4x:
  389. movdqu (%rdx),%xmm8
  390. movdqu 16(%rdx),%xmm3
  391. .byte 102,69,15,56,0,194
  392. .byte 102,65,15,56,0,218
  393. pxor %xmm8,%xmm0
  394. movdqa %xmm3,%xmm5
  395. pshufd $78,%xmm3,%xmm4
  396. pxor %xmm3,%xmm4
  397. .byte 102,15,58,68,218,0
  398. .byte 102,15,58,68,234,17
  399. .byte 102,15,58,68,231,0
  400. leaq 32(%rdx),%rdx
  401. nop
  402. subq $0x20,%rcx
  403. jbe .Leven_tail
  404. nop
  405. jmp .Lmod_loop
  406. .align 32
  407. .Lmod_loop:
  408. movdqa %xmm0,%xmm1
  409. movdqa %xmm4,%xmm8
  410. pshufd $78,%xmm0,%xmm4
  411. pxor %xmm0,%xmm4
  412. .byte 102,15,58,68,198,0
  413. .byte 102,15,58,68,206,17
  414. .byte 102,15,58,68,231,16
  415. pxor %xmm3,%xmm0
  416. pxor %xmm5,%xmm1
  417. movdqu (%rdx),%xmm9
  418. pxor %xmm0,%xmm8
  419. .byte 102,69,15,56,0,202
  420. movdqu 16(%rdx),%xmm3
  421. pxor %xmm1,%xmm8
  422. pxor %xmm9,%xmm1
  423. pxor %xmm8,%xmm4
  424. .byte 102,65,15,56,0,218
  425. movdqa %xmm4,%xmm8
  426. psrldq $8,%xmm8
  427. pslldq $8,%xmm4
  428. pxor %xmm8,%xmm1
  429. pxor %xmm4,%xmm0
  430. movdqa %xmm3,%xmm5
  431. movdqa %xmm0,%xmm9
  432. movdqa %xmm0,%xmm8
  433. psllq $5,%xmm0
  434. pxor %xmm0,%xmm8
  435. .byte 102,15,58,68,218,0
  436. psllq $1,%xmm0
  437. pxor %xmm8,%xmm0
  438. psllq $57,%xmm0
  439. movdqa %xmm0,%xmm8
  440. pslldq $8,%xmm0
  441. psrldq $8,%xmm8
  442. pxor %xmm9,%xmm0
  443. pshufd $78,%xmm5,%xmm4
  444. pxor %xmm8,%xmm1
  445. pxor %xmm5,%xmm4
  446. movdqa %xmm0,%xmm9
  447. psrlq $1,%xmm0
  448. .byte 102,15,58,68,234,17
  449. pxor %xmm9,%xmm1
  450. pxor %xmm0,%xmm9
  451. psrlq $5,%xmm0
  452. pxor %xmm9,%xmm0
  453. leaq 32(%rdx),%rdx
  454. psrlq $1,%xmm0
  455. .byte 102,15,58,68,231,0
  456. pxor %xmm1,%xmm0
  457. subq $0x20,%rcx
  458. ja .Lmod_loop
  459. .Leven_tail:
  460. movdqa %xmm0,%xmm1
  461. movdqa %xmm4,%xmm8
  462. pshufd $78,%xmm0,%xmm4
  463. pxor %xmm0,%xmm4
  464. .byte 102,15,58,68,198,0
  465. .byte 102,15,58,68,206,17
  466. .byte 102,15,58,68,231,16
  467. pxor %xmm3,%xmm0
  468. pxor %xmm5,%xmm1
  469. pxor %xmm0,%xmm8
  470. pxor %xmm1,%xmm8
  471. pxor %xmm8,%xmm4
  472. movdqa %xmm4,%xmm8
  473. psrldq $8,%xmm8
  474. pslldq $8,%xmm4
  475. pxor %xmm8,%xmm1
  476. pxor %xmm4,%xmm0
  477. movdqa %xmm0,%xmm4
  478. movdqa %xmm0,%xmm3
  479. psllq $5,%xmm0
  480. pxor %xmm0,%xmm3
  481. psllq $1,%xmm0
  482. pxor %xmm3,%xmm0
  483. psllq $57,%xmm0
  484. movdqa %xmm0,%xmm3
  485. pslldq $8,%xmm0
  486. psrldq $8,%xmm3
  487. pxor %xmm4,%xmm0
  488. pxor %xmm3,%xmm1
  489. movdqa %xmm0,%xmm4
  490. psrlq $1,%xmm0
  491. pxor %xmm4,%xmm1
  492. pxor %xmm0,%xmm4
  493. psrlq $5,%xmm0
  494. pxor %xmm4,%xmm0
  495. psrlq $1,%xmm0
  496. pxor %xmm1,%xmm0
  497. testq %rcx,%rcx
  498. jnz .Ldone
  499. .Lodd_tail:
  500. movdqu (%rdx),%xmm8
  501. .byte 102,69,15,56,0,194
  502. pxor %xmm8,%xmm0
  503. movdqa %xmm0,%xmm1
  504. pshufd $78,%xmm0,%xmm3
  505. pxor %xmm0,%xmm3
  506. .byte 102,15,58,68,194,0
  507. .byte 102,15,58,68,202,17
  508. .byte 102,15,58,68,223,0
  509. pxor %xmm0,%xmm3
  510. pxor %xmm1,%xmm3
  511. movdqa %xmm3,%xmm4
  512. psrldq $8,%xmm3
  513. pslldq $8,%xmm4
  514. pxor %xmm3,%xmm1
  515. pxor %xmm4,%xmm0
  516. movdqa %xmm0,%xmm4
  517. movdqa %xmm0,%xmm3
  518. psllq $5,%xmm0
  519. pxor %xmm0,%xmm3
  520. psllq $1,%xmm0
  521. pxor %xmm3,%xmm0
  522. psllq $57,%xmm0
  523. movdqa %xmm0,%xmm3
  524. pslldq $8,%xmm0
  525. psrldq $8,%xmm3
  526. pxor %xmm4,%xmm0
  527. pxor %xmm3,%xmm1
  528. movdqa %xmm0,%xmm4
  529. psrlq $1,%xmm0
  530. pxor %xmm4,%xmm1
  531. pxor %xmm0,%xmm4
  532. psrlq $5,%xmm0
  533. pxor %xmm4,%xmm0
  534. psrlq $1,%xmm0
  535. pxor %xmm1,%xmm0
  536. .Ldone:
  537. .byte 102,65,15,56,0,194
  538. movdqu %xmm0,(%rdi)
  539. .byte 0xf3,0xc3
  540. .cfi_endproc
  541. .size gcm_ghash_clmul,.-gcm_ghash_clmul
  542. .globl gcm_init_avx
  543. .hidden gcm_init_avx
  544. .type gcm_init_avx,@function
  545. .align 32
  546. gcm_init_avx:
  547. .cfi_startproc
  548. vzeroupper
  549. vmovdqu (%rsi),%xmm2
  550. vpshufd $78,%xmm2,%xmm2
  551. vpshufd $255,%xmm2,%xmm4
  552. vpsrlq $63,%xmm2,%xmm3
  553. vpsllq $1,%xmm2,%xmm2
  554. vpxor %xmm5,%xmm5,%xmm5
  555. vpcmpgtd %xmm4,%xmm5,%xmm5
  556. vpslldq $8,%xmm3,%xmm3
  557. vpor %xmm3,%xmm2,%xmm2
  558. vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5
  559. vpxor %xmm5,%xmm2,%xmm2
  560. vpunpckhqdq %xmm2,%xmm2,%xmm6
  561. vmovdqa %xmm2,%xmm0
  562. vpxor %xmm2,%xmm6,%xmm6
  563. movq $4,%r10
  564. jmp .Linit_start_avx
  565. .align 32
  566. .Linit_loop_avx:
  567. vpalignr $8,%xmm3,%xmm4,%xmm5
  568. vmovdqu %xmm5,-16(%rdi)
  569. vpunpckhqdq %xmm0,%xmm0,%xmm3
  570. vpxor %xmm0,%xmm3,%xmm3
  571. vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
  572. vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
  573. vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
  574. vpxor %xmm0,%xmm1,%xmm4
  575. vpxor %xmm4,%xmm3,%xmm3
  576. vpslldq $8,%xmm3,%xmm4
  577. vpsrldq $8,%xmm3,%xmm3
  578. vpxor %xmm4,%xmm0,%xmm0
  579. vpxor %xmm3,%xmm1,%xmm1
  580. vpsllq $57,%xmm0,%xmm3
  581. vpsllq $62,%xmm0,%xmm4
  582. vpxor %xmm3,%xmm4,%xmm4
  583. vpsllq $63,%xmm0,%xmm3
  584. vpxor %xmm3,%xmm4,%xmm4
  585. vpslldq $8,%xmm4,%xmm3
  586. vpsrldq $8,%xmm4,%xmm4
  587. vpxor %xmm3,%xmm0,%xmm0
  588. vpxor %xmm4,%xmm1,%xmm1
  589. vpsrlq $1,%xmm0,%xmm4
  590. vpxor %xmm0,%xmm1,%xmm1
  591. vpxor %xmm4,%xmm0,%xmm0
  592. vpsrlq $5,%xmm4,%xmm4
  593. vpxor %xmm4,%xmm0,%xmm0
  594. vpsrlq $1,%xmm0,%xmm0
  595. vpxor %xmm1,%xmm0,%xmm0
  596. .Linit_start_avx:
  597. vmovdqa %xmm0,%xmm5
  598. vpunpckhqdq %xmm0,%xmm0,%xmm3
  599. vpxor %xmm0,%xmm3,%xmm3
  600. vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
  601. vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
  602. vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
  603. vpxor %xmm0,%xmm1,%xmm4
  604. vpxor %xmm4,%xmm3,%xmm3
  605. vpslldq $8,%xmm3,%xmm4
  606. vpsrldq $8,%xmm3,%xmm3
  607. vpxor %xmm4,%xmm0,%xmm0
  608. vpxor %xmm3,%xmm1,%xmm1
  609. vpsllq $57,%xmm0,%xmm3
  610. vpsllq $62,%xmm0,%xmm4
  611. vpxor %xmm3,%xmm4,%xmm4
  612. vpsllq $63,%xmm0,%xmm3
  613. vpxor %xmm3,%xmm4,%xmm4
  614. vpslldq $8,%xmm4,%xmm3
  615. vpsrldq $8,%xmm4,%xmm4
  616. vpxor %xmm3,%xmm0,%xmm0
  617. vpxor %xmm4,%xmm1,%xmm1
  618. vpsrlq $1,%xmm0,%xmm4
  619. vpxor %xmm0,%xmm1,%xmm1
  620. vpxor %xmm4,%xmm0,%xmm0
  621. vpsrlq $5,%xmm4,%xmm4
  622. vpxor %xmm4,%xmm0,%xmm0
  623. vpsrlq $1,%xmm0,%xmm0
  624. vpxor %xmm1,%xmm0,%xmm0
  625. vpshufd $78,%xmm5,%xmm3
  626. vpshufd $78,%xmm0,%xmm4
  627. vpxor %xmm5,%xmm3,%xmm3
  628. vmovdqu %xmm5,0(%rdi)
  629. vpxor %xmm0,%xmm4,%xmm4
  630. vmovdqu %xmm0,16(%rdi)
  631. leaq 48(%rdi),%rdi
  632. subq $1,%r10
  633. jnz .Linit_loop_avx
  634. vpalignr $8,%xmm4,%xmm3,%xmm5
  635. vmovdqu %xmm5,-16(%rdi)
  636. vzeroupper
  637. .byte 0xf3,0xc3
  638. .cfi_endproc
  639. .size gcm_init_avx,.-gcm_init_avx
  640. .globl gcm_gmult_avx
  641. .hidden gcm_gmult_avx
  642. .type gcm_gmult_avx,@function
  643. .align 32
  644. gcm_gmult_avx:
  645. .cfi_startproc
  646. jmp .L_gmult_clmul
  647. .cfi_endproc
  648. .size gcm_gmult_avx,.-gcm_gmult_avx
  649. .globl gcm_ghash_avx
  650. .hidden gcm_ghash_avx
  651. .type gcm_ghash_avx,@function
  652. .align 32
  653. gcm_ghash_avx:
  654. .cfi_startproc
  655. vzeroupper
  656. vmovdqu (%rdi),%xmm10
  657. leaq .L0x1c2_polynomial(%rip),%r10
  658. leaq 64(%rsi),%rsi
  659. vmovdqu .Lbswap_mask(%rip),%xmm13
  660. vpshufb %xmm13,%xmm10,%xmm10
  661. cmpq $0x80,%rcx
  662. jb .Lshort_avx
  663. subq $0x80,%rcx
  664. vmovdqu 112(%rdx),%xmm14
  665. vmovdqu 0-64(%rsi),%xmm6
  666. vpshufb %xmm13,%xmm14,%xmm14
  667. vmovdqu 32-64(%rsi),%xmm7
  668. vpunpckhqdq %xmm14,%xmm14,%xmm9
  669. vmovdqu 96(%rdx),%xmm15
  670. vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
  671. vpxor %xmm14,%xmm9,%xmm9
  672. vpshufb %xmm13,%xmm15,%xmm15
  673. vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
  674. vmovdqu 16-64(%rsi),%xmm6
  675. vpunpckhqdq %xmm15,%xmm15,%xmm8
  676. vmovdqu 80(%rdx),%xmm14
  677. vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
  678. vpxor %xmm15,%xmm8,%xmm8
  679. vpshufb %xmm13,%xmm14,%xmm14
  680. vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
  681. vpunpckhqdq %xmm14,%xmm14,%xmm9
  682. vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
  683. vmovdqu 48-64(%rsi),%xmm6
  684. vpxor %xmm14,%xmm9,%xmm9
  685. vmovdqu 64(%rdx),%xmm15
  686. vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
  687. vmovdqu 80-64(%rsi),%xmm7
  688. vpshufb %xmm13,%xmm15,%xmm15
  689. vpxor %xmm0,%xmm3,%xmm3
  690. vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
  691. vpxor %xmm1,%xmm4,%xmm4
  692. vpunpckhqdq %xmm15,%xmm15,%xmm8
  693. vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
  694. vmovdqu 64-64(%rsi),%xmm6
  695. vpxor %xmm2,%xmm5,%xmm5
  696. vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
  697. vpxor %xmm15,%xmm8,%xmm8
  698. vmovdqu 48(%rdx),%xmm14
  699. vpxor %xmm3,%xmm0,%xmm0
  700. vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
  701. vpxor %xmm4,%xmm1,%xmm1
  702. vpshufb %xmm13,%xmm14,%xmm14
  703. vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
  704. vmovdqu 96-64(%rsi),%xmm6
  705. vpxor %xmm5,%xmm2,%xmm2
  706. vpunpckhqdq %xmm14,%xmm14,%xmm9
  707. vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
  708. vmovdqu 128-64(%rsi),%xmm7
  709. vpxor %xmm14,%xmm9,%xmm9
  710. vmovdqu 32(%rdx),%xmm15
  711. vpxor %xmm0,%xmm3,%xmm3
  712. vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
  713. vpxor %xmm1,%xmm4,%xmm4
  714. vpshufb %xmm13,%xmm15,%xmm15
  715. vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
  716. vmovdqu 112-64(%rsi),%xmm6
  717. vpxor %xmm2,%xmm5,%xmm5
  718. vpunpckhqdq %xmm15,%xmm15,%xmm8
  719. vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
  720. vpxor %xmm15,%xmm8,%xmm8
  721. vmovdqu 16(%rdx),%xmm14
  722. vpxor %xmm3,%xmm0,%xmm0
  723. vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
  724. vpxor %xmm4,%xmm1,%xmm1
  725. vpshufb %xmm13,%xmm14,%xmm14
  726. vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
  727. vmovdqu 144-64(%rsi),%xmm6
  728. vpxor %xmm5,%xmm2,%xmm2
  729. vpunpckhqdq %xmm14,%xmm14,%xmm9
  730. vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
  731. vmovdqu 176-64(%rsi),%xmm7
  732. vpxor %xmm14,%xmm9,%xmm9
  733. vmovdqu (%rdx),%xmm15
  734. vpxor %xmm0,%xmm3,%xmm3
  735. vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
  736. vpxor %xmm1,%xmm4,%xmm4
  737. vpshufb %xmm13,%xmm15,%xmm15
  738. vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
  739. vmovdqu 160-64(%rsi),%xmm6
  740. vpxor %xmm2,%xmm5,%xmm5
  741. vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
  742. leaq 128(%rdx),%rdx
  743. cmpq $0x80,%rcx
  744. jb .Ltail_avx
  745. vpxor %xmm10,%xmm15,%xmm15
  746. subq $0x80,%rcx
  747. jmp .Loop8x_avx
  748. .align 32
  749. .Loop8x_avx:
  750. vpunpckhqdq %xmm15,%xmm15,%xmm8
  751. vmovdqu 112(%rdx),%xmm14
  752. vpxor %xmm0,%xmm3,%xmm3
  753. vpxor %xmm15,%xmm8,%xmm8
  754. vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10
  755. vpshufb %xmm13,%xmm14,%xmm14
  756. vpxor %xmm1,%xmm4,%xmm4
  757. vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11
  758. vmovdqu 0-64(%rsi),%xmm6
  759. vpunpckhqdq %xmm14,%xmm14,%xmm9
  760. vpxor %xmm2,%xmm5,%xmm5
  761. vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12
  762. vmovdqu 32-64(%rsi),%xmm7
  763. vpxor %xmm14,%xmm9,%xmm9
  764. vmovdqu 96(%rdx),%xmm15
  765. vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
  766. vpxor %xmm3,%xmm10,%xmm10
  767. vpshufb %xmm13,%xmm15,%xmm15
  768. vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
  769. vxorps %xmm4,%xmm11,%xmm11
  770. vmovdqu 16-64(%rsi),%xmm6
  771. vpunpckhqdq %xmm15,%xmm15,%xmm8
  772. vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
  773. vpxor %xmm5,%xmm12,%xmm12
  774. vxorps %xmm15,%xmm8,%xmm8
  775. vmovdqu 80(%rdx),%xmm14
  776. vpxor %xmm10,%xmm12,%xmm12
  777. vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
  778. vpxor %xmm11,%xmm12,%xmm12
  779. vpslldq $8,%xmm12,%xmm9
  780. vpxor %xmm0,%xmm3,%xmm3
  781. vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
  782. vpsrldq $8,%xmm12,%xmm12
  783. vpxor %xmm9,%xmm10,%xmm10
  784. vmovdqu 48-64(%rsi),%xmm6
  785. vpshufb %xmm13,%xmm14,%xmm14
  786. vxorps %xmm12,%xmm11,%xmm11
  787. vpxor %xmm1,%xmm4,%xmm4
  788. vpunpckhqdq %xmm14,%xmm14,%xmm9
  789. vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
  790. vmovdqu 80-64(%rsi),%xmm7
  791. vpxor %xmm14,%xmm9,%xmm9
  792. vpxor %xmm2,%xmm5,%xmm5
  793. vmovdqu 64(%rdx),%xmm15
  794. vpalignr $8,%xmm10,%xmm10,%xmm12
  795. vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
  796. vpshufb %xmm13,%xmm15,%xmm15
  797. vpxor %xmm3,%xmm0,%xmm0
  798. vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
  799. vmovdqu 64-64(%rsi),%xmm6
  800. vpunpckhqdq %xmm15,%xmm15,%xmm8
  801. vpxor %xmm4,%xmm1,%xmm1
  802. vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
  803. vxorps %xmm15,%xmm8,%xmm8
  804. vpxor %xmm5,%xmm2,%xmm2
  805. vmovdqu 48(%rdx),%xmm14
  806. vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
  807. vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
  808. vpshufb %xmm13,%xmm14,%xmm14
  809. vpxor %xmm0,%xmm3,%xmm3
  810. vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
  811. vmovdqu 96-64(%rsi),%xmm6
  812. vpunpckhqdq %xmm14,%xmm14,%xmm9
  813. vpxor %xmm1,%xmm4,%xmm4
  814. vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
  815. vmovdqu 128-64(%rsi),%xmm7
  816. vpxor %xmm14,%xmm9,%xmm9
  817. vpxor %xmm2,%xmm5,%xmm5
  818. vmovdqu 32(%rdx),%xmm15
  819. vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
  820. vpshufb %xmm13,%xmm15,%xmm15
  821. vpxor %xmm3,%xmm0,%xmm0
  822. vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
  823. vmovdqu 112-64(%rsi),%xmm6
  824. vpunpckhqdq %xmm15,%xmm15,%xmm8
  825. vpxor %xmm4,%xmm1,%xmm1
  826. vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
  827. vpxor %xmm15,%xmm8,%xmm8
  828. vpxor %xmm5,%xmm2,%xmm2
  829. vxorps %xmm12,%xmm10,%xmm10
  830. vmovdqu 16(%rdx),%xmm14
  831. vpalignr $8,%xmm10,%xmm10,%xmm12
  832. vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
  833. vpshufb %xmm13,%xmm14,%xmm14
  834. vpxor %xmm0,%xmm3,%xmm3
  835. vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
  836. vmovdqu 144-64(%rsi),%xmm6
  837. vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
  838. vxorps %xmm11,%xmm12,%xmm12
  839. vpunpckhqdq %xmm14,%xmm14,%xmm9
  840. vpxor %xmm1,%xmm4,%xmm4
  841. vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
  842. vmovdqu 176-64(%rsi),%xmm7
  843. vpxor %xmm14,%xmm9,%xmm9
  844. vpxor %xmm2,%xmm5,%xmm5
  845. vmovdqu (%rdx),%xmm15
  846. vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
  847. vpshufb %xmm13,%xmm15,%xmm15
  848. vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
  849. vmovdqu 160-64(%rsi),%xmm6
  850. vpxor %xmm12,%xmm15,%xmm15
  851. vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
  852. vpxor %xmm10,%xmm15,%xmm15
  853. leaq 128(%rdx),%rdx
  854. subq $0x80,%rcx
  855. jnc .Loop8x_avx
  856. addq $0x80,%rcx
  857. jmp .Ltail_no_xor_avx
  858. .align 32
  859. .Lshort_avx:
  860. vmovdqu -16(%rdx,%rcx,1),%xmm14
  861. leaq (%rdx,%rcx,1),%rdx
  862. vmovdqu 0-64(%rsi),%xmm6
  863. vmovdqu 32-64(%rsi),%xmm7
  864. vpshufb %xmm13,%xmm14,%xmm15
  865. vmovdqa %xmm0,%xmm3
  866. vmovdqa %xmm1,%xmm4
  867. vmovdqa %xmm2,%xmm5
  868. subq $0x10,%rcx
  869. jz .Ltail_avx
  870. vpunpckhqdq %xmm15,%xmm15,%xmm8
  871. vpxor %xmm0,%xmm3,%xmm3
  872. vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
  873. vpxor %xmm15,%xmm8,%xmm8
  874. vmovdqu -32(%rdx),%xmm14
  875. vpxor %xmm1,%xmm4,%xmm4
  876. vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
  877. vmovdqu 16-64(%rsi),%xmm6
  878. vpshufb %xmm13,%xmm14,%xmm15
  879. vpxor %xmm2,%xmm5,%xmm5
  880. vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
  881. vpsrldq $8,%xmm7,%xmm7
  882. subq $0x10,%rcx
  883. jz .Ltail_avx
  884. vpunpckhqdq %xmm15,%xmm15,%xmm8
  885. vpxor %xmm0,%xmm3,%xmm3
  886. vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
  887. vpxor %xmm15,%xmm8,%xmm8
  888. vmovdqu -48(%rdx),%xmm14
  889. vpxor %xmm1,%xmm4,%xmm4
  890. vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
  891. vmovdqu 48-64(%rsi),%xmm6
  892. vpshufb %xmm13,%xmm14,%xmm15
  893. vpxor %xmm2,%xmm5,%xmm5
  894. vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
  895. vmovdqu 80-64(%rsi),%xmm7
  896. subq $0x10,%rcx
  897. jz .Ltail_avx
  898. vpunpckhqdq %xmm15,%xmm15,%xmm8
  899. vpxor %xmm0,%xmm3,%xmm3
  900. vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
  901. vpxor %xmm15,%xmm8,%xmm8
  902. vmovdqu -64(%rdx),%xmm14
  903. vpxor %xmm1,%xmm4,%xmm4
  904. vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
  905. vmovdqu 64-64(%rsi),%xmm6
  906. vpshufb %xmm13,%xmm14,%xmm15
  907. vpxor %xmm2,%xmm5,%xmm5
  908. vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
  909. vpsrldq $8,%xmm7,%xmm7
  910. subq $0x10,%rcx
  911. jz .Ltail_avx
  912. vpunpckhqdq %xmm15,%xmm15,%xmm8
  913. vpxor %xmm0,%xmm3,%xmm3
  914. vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
  915. vpxor %xmm15,%xmm8,%xmm8
  916. vmovdqu -80(%rdx),%xmm14
  917. vpxor %xmm1,%xmm4,%xmm4
  918. vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
  919. vmovdqu 96-64(%rsi),%xmm6
  920. vpshufb %xmm13,%xmm14,%xmm15
  921. vpxor %xmm2,%xmm5,%xmm5
  922. vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
  923. vmovdqu 128-64(%rsi),%xmm7
  924. subq $0x10,%rcx
  925. jz .Ltail_avx
  926. vpunpckhqdq %xmm15,%xmm15,%xmm8
  927. vpxor %xmm0,%xmm3,%xmm3
  928. vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
  929. vpxor %xmm15,%xmm8,%xmm8
  930. vmovdqu -96(%rdx),%xmm14
  931. vpxor %xmm1,%xmm4,%xmm4
  932. vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
  933. vmovdqu 112-64(%rsi),%xmm6
  934. vpshufb %xmm13,%xmm14,%xmm15
  935. vpxor %xmm2,%xmm5,%xmm5
  936. vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
  937. vpsrldq $8,%xmm7,%xmm7
  938. subq $0x10,%rcx
  939. jz .Ltail_avx
  940. vpunpckhqdq %xmm15,%xmm15,%xmm8
  941. vpxor %xmm0,%xmm3,%xmm3
  942. vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
  943. vpxor %xmm15,%xmm8,%xmm8
  944. vmovdqu -112(%rdx),%xmm14
  945. vpxor %xmm1,%xmm4,%xmm4
  946. vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
  947. vmovdqu 144-64(%rsi),%xmm6
  948. vpshufb %xmm13,%xmm14,%xmm15
  949. vpxor %xmm2,%xmm5,%xmm5
  950. vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
  951. vmovq 184-64(%rsi),%xmm7
  952. subq $0x10,%rcx
  953. jmp .Ltail_avx
  954. .align 32
  955. .Ltail_avx:
  956. vpxor %xmm10,%xmm15,%xmm15
  957. .Ltail_no_xor_avx:
  958. vpunpckhqdq %xmm15,%xmm15,%xmm8
  959. vpxor %xmm0,%xmm3,%xmm3
  960. vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
  961. vpxor %xmm15,%xmm8,%xmm8
  962. vpxor %xmm1,%xmm4,%xmm4
  963. vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
  964. vpxor %xmm2,%xmm5,%xmm5
  965. vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
  966. vmovdqu (%r10),%xmm12
  967. vpxor %xmm0,%xmm3,%xmm10
  968. vpxor %xmm1,%xmm4,%xmm11
  969. vpxor %xmm2,%xmm5,%xmm5
  970. vpxor %xmm10,%xmm5,%xmm5
  971. vpxor %xmm11,%xmm5,%xmm5
  972. vpslldq $8,%xmm5,%xmm9
  973. vpsrldq $8,%xmm5,%xmm5
  974. vpxor %xmm9,%xmm10,%xmm10
  975. vpxor %xmm5,%xmm11,%xmm11
  976. vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
  977. vpalignr $8,%xmm10,%xmm10,%xmm10
  978. vpxor %xmm9,%xmm10,%xmm10
  979. vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
  980. vpalignr $8,%xmm10,%xmm10,%xmm10
  981. vpxor %xmm11,%xmm10,%xmm10
  982. vpxor %xmm9,%xmm10,%xmm10
  983. cmpq $0,%rcx
  984. jne .Lshort_avx
  985. vpshufb %xmm13,%xmm10,%xmm10
  986. vmovdqu %xmm10,(%rdi)
  987. vzeroupper
  988. .byte 0xf3,0xc3
  989. .cfi_endproc
  990. .size gcm_ghash_avx,.-gcm_ghash_avx
  991. .align 64
  992. .Lbswap_mask:
  993. .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  994. .L0x1c2_polynomial:
  995. .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
  996. .L7_mask:
  997. .long 7,0,7,0
  998. .align 64
  999. .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
  1000. .align 64
  1001. #endif
  1002. .section .note.GNU-stack,"",@progbits