12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127 |
- // This file is generated from a similarly-named Perl script in the BoringSSL
- // source tree. Do not edit by hand.
- #if defined(__has_feature)
- #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
- #define OPENSSL_NO_ASM
- #endif
- #endif
- #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
- #if defined(BORINGSSL_PREFIX)
- #include <boringssl_prefix_symbols_asm.h>
- #endif
- .text
- .extern OPENSSL_ia32cap_P
- .hidden OPENSSL_ia32cap_P
- .globl gcm_init_clmul
- .hidden gcm_init_clmul
- .type gcm_init_clmul,@function
- .align 16
- gcm_init_clmul:
- .cfi_startproc
- .L_init_clmul:
- movdqu (%rsi),%xmm2
- pshufd $78,%xmm2,%xmm2
- pshufd $255,%xmm2,%xmm4
- movdqa %xmm2,%xmm3
- psllq $1,%xmm2
- pxor %xmm5,%xmm5
- psrlq $63,%xmm3
- pcmpgtd %xmm4,%xmm5
- pslldq $8,%xmm3
- por %xmm3,%xmm2
- pand .L0x1c2_polynomial(%rip),%xmm5
- pxor %xmm5,%xmm2
- pshufd $78,%xmm2,%xmm6
- movdqa %xmm2,%xmm0
- pxor %xmm2,%xmm6
- movdqa %xmm0,%xmm1
- pshufd $78,%xmm0,%xmm3
- pxor %xmm0,%xmm3
- .byte 102,15,58,68,194,0
- .byte 102,15,58,68,202,17
- .byte 102,15,58,68,222,0
- pxor %xmm0,%xmm3
- pxor %xmm1,%xmm3
- movdqa %xmm3,%xmm4
- psrldq $8,%xmm3
- pslldq $8,%xmm4
- pxor %xmm3,%xmm1
- pxor %xmm4,%xmm0
- movdqa %xmm0,%xmm4
- movdqa %xmm0,%xmm3
- psllq $5,%xmm0
- pxor %xmm0,%xmm3
- psllq $1,%xmm0
- pxor %xmm3,%xmm0
- psllq $57,%xmm0
- movdqa %xmm0,%xmm3
- pslldq $8,%xmm0
- psrldq $8,%xmm3
- pxor %xmm4,%xmm0
- pxor %xmm3,%xmm1
- movdqa %xmm0,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm1
- pxor %xmm0,%xmm4
- psrlq $5,%xmm0
- pxor %xmm4,%xmm0
- psrlq $1,%xmm0
- pxor %xmm1,%xmm0
- pshufd $78,%xmm2,%xmm3
- pshufd $78,%xmm0,%xmm4
- pxor %xmm2,%xmm3
- movdqu %xmm2,0(%rdi)
- pxor %xmm0,%xmm4
- movdqu %xmm0,16(%rdi)
- .byte 102,15,58,15,227,8
- movdqu %xmm4,32(%rdi)
- movdqa %xmm0,%xmm1
- pshufd $78,%xmm0,%xmm3
- pxor %xmm0,%xmm3
- .byte 102,15,58,68,194,0
- .byte 102,15,58,68,202,17
- .byte 102,15,58,68,222,0
- pxor %xmm0,%xmm3
- pxor %xmm1,%xmm3
- movdqa %xmm3,%xmm4
- psrldq $8,%xmm3
- pslldq $8,%xmm4
- pxor %xmm3,%xmm1
- pxor %xmm4,%xmm0
- movdqa %xmm0,%xmm4
- movdqa %xmm0,%xmm3
- psllq $5,%xmm0
- pxor %xmm0,%xmm3
- psllq $1,%xmm0
- pxor %xmm3,%xmm0
- psllq $57,%xmm0
- movdqa %xmm0,%xmm3
- pslldq $8,%xmm0
- psrldq $8,%xmm3
- pxor %xmm4,%xmm0
- pxor %xmm3,%xmm1
- movdqa %xmm0,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm1
- pxor %xmm0,%xmm4
- psrlq $5,%xmm0
- pxor %xmm4,%xmm0
- psrlq $1,%xmm0
- pxor %xmm1,%xmm0
- movdqa %xmm0,%xmm5
- movdqa %xmm0,%xmm1
- pshufd $78,%xmm0,%xmm3
- pxor %xmm0,%xmm3
- .byte 102,15,58,68,194,0
- .byte 102,15,58,68,202,17
- .byte 102,15,58,68,222,0
- pxor %xmm0,%xmm3
- pxor %xmm1,%xmm3
- movdqa %xmm3,%xmm4
- psrldq $8,%xmm3
- pslldq $8,%xmm4
- pxor %xmm3,%xmm1
- pxor %xmm4,%xmm0
- movdqa %xmm0,%xmm4
- movdqa %xmm0,%xmm3
- psllq $5,%xmm0
- pxor %xmm0,%xmm3
- psllq $1,%xmm0
- pxor %xmm3,%xmm0
- psllq $57,%xmm0
- movdqa %xmm0,%xmm3
- pslldq $8,%xmm0
- psrldq $8,%xmm3
- pxor %xmm4,%xmm0
- pxor %xmm3,%xmm1
- movdqa %xmm0,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm1
- pxor %xmm0,%xmm4
- psrlq $5,%xmm0
- pxor %xmm4,%xmm0
- psrlq $1,%xmm0
- pxor %xmm1,%xmm0
- pshufd $78,%xmm5,%xmm3
- pshufd $78,%xmm0,%xmm4
- pxor %xmm5,%xmm3
- movdqu %xmm5,48(%rdi)
- pxor %xmm0,%xmm4
- movdqu %xmm0,64(%rdi)
- .byte 102,15,58,15,227,8
- movdqu %xmm4,80(%rdi)
- .byte 0xf3,0xc3
- .cfi_endproc
- .size gcm_init_clmul,.-gcm_init_clmul
- .globl gcm_gmult_clmul
- .hidden gcm_gmult_clmul
- .type gcm_gmult_clmul,@function
- .align 16
- gcm_gmult_clmul:
- .cfi_startproc
- .L_gmult_clmul:
- movdqu (%rdi),%xmm0
- movdqa .Lbswap_mask(%rip),%xmm5
- movdqu (%rsi),%xmm2
- movdqu 32(%rsi),%xmm4
- .byte 102,15,56,0,197
- movdqa %xmm0,%xmm1
- pshufd $78,%xmm0,%xmm3
- pxor %xmm0,%xmm3
- .byte 102,15,58,68,194,0
- .byte 102,15,58,68,202,17
- .byte 102,15,58,68,220,0
- pxor %xmm0,%xmm3
- pxor %xmm1,%xmm3
- movdqa %xmm3,%xmm4
- psrldq $8,%xmm3
- pslldq $8,%xmm4
- pxor %xmm3,%xmm1
- pxor %xmm4,%xmm0
- movdqa %xmm0,%xmm4
- movdqa %xmm0,%xmm3
- psllq $5,%xmm0
- pxor %xmm0,%xmm3
- psllq $1,%xmm0
- pxor %xmm3,%xmm0
- psllq $57,%xmm0
- movdqa %xmm0,%xmm3
- pslldq $8,%xmm0
- psrldq $8,%xmm3
- pxor %xmm4,%xmm0
- pxor %xmm3,%xmm1
- movdqa %xmm0,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm1
- pxor %xmm0,%xmm4
- psrlq $5,%xmm0
- pxor %xmm4,%xmm0
- psrlq $1,%xmm0
- pxor %xmm1,%xmm0
- .byte 102,15,56,0,197
- movdqu %xmm0,(%rdi)
- .byte 0xf3,0xc3
- .cfi_endproc
- .size gcm_gmult_clmul,.-gcm_gmult_clmul
- .globl gcm_ghash_clmul
- .hidden gcm_ghash_clmul
- .type gcm_ghash_clmul,@function
- .align 32
- gcm_ghash_clmul:
- .cfi_startproc
- .L_ghash_clmul:
- movdqa .Lbswap_mask(%rip),%xmm10
- movdqu (%rdi),%xmm0
- movdqu (%rsi),%xmm2
- movdqu 32(%rsi),%xmm7
- .byte 102,65,15,56,0,194
- subq $0x10,%rcx
- jz .Lodd_tail
- movdqu 16(%rsi),%xmm6
- leaq OPENSSL_ia32cap_P(%rip),%rax
- movl 4(%rax),%eax
- cmpq $0x30,%rcx
- jb .Lskip4x
- andl $71303168,%eax
- cmpl $4194304,%eax
- je .Lskip4x
- subq $0x30,%rcx
- movq $0xA040608020C0E000,%rax
- movdqu 48(%rsi),%xmm14
- movdqu 64(%rsi),%xmm15
- movdqu 48(%rdx),%xmm3
- movdqu 32(%rdx),%xmm11
- .byte 102,65,15,56,0,218
- .byte 102,69,15,56,0,218
- movdqa %xmm3,%xmm5
- pshufd $78,%xmm3,%xmm4
- pxor %xmm3,%xmm4
- .byte 102,15,58,68,218,0
- .byte 102,15,58,68,234,17
- .byte 102,15,58,68,231,0
- movdqa %xmm11,%xmm13
- pshufd $78,%xmm11,%xmm12
- pxor %xmm11,%xmm12
- .byte 102,68,15,58,68,222,0
- .byte 102,68,15,58,68,238,17
- .byte 102,68,15,58,68,231,16
- xorps %xmm11,%xmm3
- xorps %xmm13,%xmm5
- movups 80(%rsi),%xmm7
- xorps %xmm12,%xmm4
- movdqu 16(%rdx),%xmm11
- movdqu 0(%rdx),%xmm8
- .byte 102,69,15,56,0,218
- .byte 102,69,15,56,0,194
- movdqa %xmm11,%xmm13
- pshufd $78,%xmm11,%xmm12
- pxor %xmm8,%xmm0
- pxor %xmm11,%xmm12
- .byte 102,69,15,58,68,222,0
- movdqa %xmm0,%xmm1
- pshufd $78,%xmm0,%xmm8
- pxor %xmm0,%xmm8
- .byte 102,69,15,58,68,238,17
- .byte 102,68,15,58,68,231,0
- xorps %xmm11,%xmm3
- xorps %xmm13,%xmm5
- leaq 64(%rdx),%rdx
- subq $0x40,%rcx
- jc .Ltail4x
- jmp .Lmod4_loop
- .align 32
- .Lmod4_loop:
- .byte 102,65,15,58,68,199,0
- xorps %xmm12,%xmm4
- movdqu 48(%rdx),%xmm11
- .byte 102,69,15,56,0,218
- .byte 102,65,15,58,68,207,17
- xorps %xmm3,%xmm0
- movdqu 32(%rdx),%xmm3
- movdqa %xmm11,%xmm13
- .byte 102,68,15,58,68,199,16
- pshufd $78,%xmm11,%xmm12
- xorps %xmm5,%xmm1
- pxor %xmm11,%xmm12
- .byte 102,65,15,56,0,218
- movups 32(%rsi),%xmm7
- xorps %xmm4,%xmm8
- .byte 102,68,15,58,68,218,0
- pshufd $78,%xmm3,%xmm4
- pxor %xmm0,%xmm8
- movdqa %xmm3,%xmm5
- pxor %xmm1,%xmm8
- pxor %xmm3,%xmm4
- movdqa %xmm8,%xmm9
- .byte 102,68,15,58,68,234,17
- pslldq $8,%xmm8
- psrldq $8,%xmm9
- pxor %xmm8,%xmm0
- movdqa .L7_mask(%rip),%xmm8
- pxor %xmm9,%xmm1
- .byte 102,76,15,110,200
- pand %xmm0,%xmm8
- .byte 102,69,15,56,0,200
- pxor %xmm0,%xmm9
- .byte 102,68,15,58,68,231,0
- psllq $57,%xmm9
- movdqa %xmm9,%xmm8
- pslldq $8,%xmm9
- .byte 102,15,58,68,222,0
- psrldq $8,%xmm8
- pxor %xmm9,%xmm0
- pxor %xmm8,%xmm1
- movdqu 0(%rdx),%xmm8
- movdqa %xmm0,%xmm9
- psrlq $1,%xmm0
- .byte 102,15,58,68,238,17
- xorps %xmm11,%xmm3
- movdqu 16(%rdx),%xmm11
- .byte 102,69,15,56,0,218
- .byte 102,15,58,68,231,16
- xorps %xmm13,%xmm5
- movups 80(%rsi),%xmm7
- .byte 102,69,15,56,0,194
- pxor %xmm9,%xmm1
- pxor %xmm0,%xmm9
- psrlq $5,%xmm0
- movdqa %xmm11,%xmm13
- pxor %xmm12,%xmm4
- pshufd $78,%xmm11,%xmm12
- pxor %xmm9,%xmm0
- pxor %xmm8,%xmm1
- pxor %xmm11,%xmm12
- .byte 102,69,15,58,68,222,0
- psrlq $1,%xmm0
- pxor %xmm1,%xmm0
- movdqa %xmm0,%xmm1
- .byte 102,69,15,58,68,238,17
- xorps %xmm11,%xmm3
- pshufd $78,%xmm0,%xmm8
- pxor %xmm0,%xmm8
- .byte 102,68,15,58,68,231,0
- xorps %xmm13,%xmm5
- leaq 64(%rdx),%rdx
- subq $0x40,%rcx
- jnc .Lmod4_loop
- .Ltail4x:
- .byte 102,65,15,58,68,199,0
- .byte 102,65,15,58,68,207,17
- .byte 102,68,15,58,68,199,16
- xorps %xmm12,%xmm4
- xorps %xmm3,%xmm0
- xorps %xmm5,%xmm1
- pxor %xmm0,%xmm1
- pxor %xmm4,%xmm8
- pxor %xmm1,%xmm8
- pxor %xmm0,%xmm1
- movdqa %xmm8,%xmm9
- psrldq $8,%xmm8
- pslldq $8,%xmm9
- pxor %xmm8,%xmm1
- pxor %xmm9,%xmm0
- movdqa %xmm0,%xmm4
- movdqa %xmm0,%xmm3
- psllq $5,%xmm0
- pxor %xmm0,%xmm3
- psllq $1,%xmm0
- pxor %xmm3,%xmm0
- psllq $57,%xmm0
- movdqa %xmm0,%xmm3
- pslldq $8,%xmm0
- psrldq $8,%xmm3
- pxor %xmm4,%xmm0
- pxor %xmm3,%xmm1
- movdqa %xmm0,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm1
- pxor %xmm0,%xmm4
- psrlq $5,%xmm0
- pxor %xmm4,%xmm0
- psrlq $1,%xmm0
- pxor %xmm1,%xmm0
- addq $0x40,%rcx
- jz .Ldone
- movdqu 32(%rsi),%xmm7
- subq $0x10,%rcx
- jz .Lodd_tail
- .Lskip4x:
- movdqu (%rdx),%xmm8
- movdqu 16(%rdx),%xmm3
- .byte 102,69,15,56,0,194
- .byte 102,65,15,56,0,218
- pxor %xmm8,%xmm0
- movdqa %xmm3,%xmm5
- pshufd $78,%xmm3,%xmm4
- pxor %xmm3,%xmm4
- .byte 102,15,58,68,218,0
- .byte 102,15,58,68,234,17
- .byte 102,15,58,68,231,0
- leaq 32(%rdx),%rdx
- nop
- subq $0x20,%rcx
- jbe .Leven_tail
- nop
- jmp .Lmod_loop
- .align 32
- .Lmod_loop:
- movdqa %xmm0,%xmm1
- movdqa %xmm4,%xmm8
- pshufd $78,%xmm0,%xmm4
- pxor %xmm0,%xmm4
- .byte 102,15,58,68,198,0
- .byte 102,15,58,68,206,17
- .byte 102,15,58,68,231,16
- pxor %xmm3,%xmm0
- pxor %xmm5,%xmm1
- movdqu (%rdx),%xmm9
- pxor %xmm0,%xmm8
- .byte 102,69,15,56,0,202
- movdqu 16(%rdx),%xmm3
- pxor %xmm1,%xmm8
- pxor %xmm9,%xmm1
- pxor %xmm8,%xmm4
- .byte 102,65,15,56,0,218
- movdqa %xmm4,%xmm8
- psrldq $8,%xmm8
- pslldq $8,%xmm4
- pxor %xmm8,%xmm1
- pxor %xmm4,%xmm0
- movdqa %xmm3,%xmm5
- movdqa %xmm0,%xmm9
- movdqa %xmm0,%xmm8
- psllq $5,%xmm0
- pxor %xmm0,%xmm8
- .byte 102,15,58,68,218,0
- psllq $1,%xmm0
- pxor %xmm8,%xmm0
- psllq $57,%xmm0
- movdqa %xmm0,%xmm8
- pslldq $8,%xmm0
- psrldq $8,%xmm8
- pxor %xmm9,%xmm0
- pshufd $78,%xmm5,%xmm4
- pxor %xmm8,%xmm1
- pxor %xmm5,%xmm4
- movdqa %xmm0,%xmm9
- psrlq $1,%xmm0
- .byte 102,15,58,68,234,17
- pxor %xmm9,%xmm1
- pxor %xmm0,%xmm9
- psrlq $5,%xmm0
- pxor %xmm9,%xmm0
- leaq 32(%rdx),%rdx
- psrlq $1,%xmm0
- .byte 102,15,58,68,231,0
- pxor %xmm1,%xmm0
- subq $0x20,%rcx
- ja .Lmod_loop
- .Leven_tail:
- movdqa %xmm0,%xmm1
- movdqa %xmm4,%xmm8
- pshufd $78,%xmm0,%xmm4
- pxor %xmm0,%xmm4
- .byte 102,15,58,68,198,0
- .byte 102,15,58,68,206,17
- .byte 102,15,58,68,231,16
- pxor %xmm3,%xmm0
- pxor %xmm5,%xmm1
- pxor %xmm0,%xmm8
- pxor %xmm1,%xmm8
- pxor %xmm8,%xmm4
- movdqa %xmm4,%xmm8
- psrldq $8,%xmm8
- pslldq $8,%xmm4
- pxor %xmm8,%xmm1
- pxor %xmm4,%xmm0
- movdqa %xmm0,%xmm4
- movdqa %xmm0,%xmm3
- psllq $5,%xmm0
- pxor %xmm0,%xmm3
- psllq $1,%xmm0
- pxor %xmm3,%xmm0
- psllq $57,%xmm0
- movdqa %xmm0,%xmm3
- pslldq $8,%xmm0
- psrldq $8,%xmm3
- pxor %xmm4,%xmm0
- pxor %xmm3,%xmm1
- movdqa %xmm0,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm1
- pxor %xmm0,%xmm4
- psrlq $5,%xmm0
- pxor %xmm4,%xmm0
- psrlq $1,%xmm0
- pxor %xmm1,%xmm0
- testq %rcx,%rcx
- jnz .Ldone
- .Lodd_tail:
- movdqu (%rdx),%xmm8
- .byte 102,69,15,56,0,194
- pxor %xmm8,%xmm0
- movdqa %xmm0,%xmm1
- pshufd $78,%xmm0,%xmm3
- pxor %xmm0,%xmm3
- .byte 102,15,58,68,194,0
- .byte 102,15,58,68,202,17
- .byte 102,15,58,68,223,0
- pxor %xmm0,%xmm3
- pxor %xmm1,%xmm3
- movdqa %xmm3,%xmm4
- psrldq $8,%xmm3
- pslldq $8,%xmm4
- pxor %xmm3,%xmm1
- pxor %xmm4,%xmm0
- movdqa %xmm0,%xmm4
- movdqa %xmm0,%xmm3
- psllq $5,%xmm0
- pxor %xmm0,%xmm3
- psllq $1,%xmm0
- pxor %xmm3,%xmm0
- psllq $57,%xmm0
- movdqa %xmm0,%xmm3
- pslldq $8,%xmm0
- psrldq $8,%xmm3
- pxor %xmm4,%xmm0
- pxor %xmm3,%xmm1
- movdqa %xmm0,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm1
- pxor %xmm0,%xmm4
- psrlq $5,%xmm0
- pxor %xmm4,%xmm0
- psrlq $1,%xmm0
- pxor %xmm1,%xmm0
- .Ldone:
- .byte 102,65,15,56,0,194
- movdqu %xmm0,(%rdi)
- .byte 0xf3,0xc3
- .cfi_endproc
- .size gcm_ghash_clmul,.-gcm_ghash_clmul
- .globl gcm_init_avx
- .hidden gcm_init_avx
- .type gcm_init_avx,@function
- .align 32
- gcm_init_avx:
- .cfi_startproc
- vzeroupper
- vmovdqu (%rsi),%xmm2
- vpshufd $78,%xmm2,%xmm2
- vpshufd $255,%xmm2,%xmm4
- vpsrlq $63,%xmm2,%xmm3
- vpsllq $1,%xmm2,%xmm2
- vpxor %xmm5,%xmm5,%xmm5
- vpcmpgtd %xmm4,%xmm5,%xmm5
- vpslldq $8,%xmm3,%xmm3
- vpor %xmm3,%xmm2,%xmm2
- vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5
- vpxor %xmm5,%xmm2,%xmm2
- vpunpckhqdq %xmm2,%xmm2,%xmm6
- vmovdqa %xmm2,%xmm0
- vpxor %xmm2,%xmm6,%xmm6
- movq $4,%r10
- jmp .Linit_start_avx
- .align 32
- .Linit_loop_avx:
- vpalignr $8,%xmm3,%xmm4,%xmm5
- vmovdqu %xmm5,-16(%rdi)
- vpunpckhqdq %xmm0,%xmm0,%xmm3
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
- vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
- vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
- vpxor %xmm0,%xmm1,%xmm4
- vpxor %xmm4,%xmm3,%xmm3
- vpslldq $8,%xmm3,%xmm4
- vpsrldq $8,%xmm3,%xmm3
- vpxor %xmm4,%xmm0,%xmm0
- vpxor %xmm3,%xmm1,%xmm1
- vpsllq $57,%xmm0,%xmm3
- vpsllq $62,%xmm0,%xmm4
- vpxor %xmm3,%xmm4,%xmm4
- vpsllq $63,%xmm0,%xmm3
- vpxor %xmm3,%xmm4,%xmm4
- vpslldq $8,%xmm4,%xmm3
- vpsrldq $8,%xmm4,%xmm4
- vpxor %xmm3,%xmm0,%xmm0
- vpxor %xmm4,%xmm1,%xmm1
- vpsrlq $1,%xmm0,%xmm4
- vpxor %xmm0,%xmm1,%xmm1
- vpxor %xmm4,%xmm0,%xmm0
- vpsrlq $5,%xmm4,%xmm4
- vpxor %xmm4,%xmm0,%xmm0
- vpsrlq $1,%xmm0,%xmm0
- vpxor %xmm1,%xmm0,%xmm0
- .Linit_start_avx:
- vmovdqa %xmm0,%xmm5
- vpunpckhqdq %xmm0,%xmm0,%xmm3
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
- vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
- vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
- vpxor %xmm0,%xmm1,%xmm4
- vpxor %xmm4,%xmm3,%xmm3
- vpslldq $8,%xmm3,%xmm4
- vpsrldq $8,%xmm3,%xmm3
- vpxor %xmm4,%xmm0,%xmm0
- vpxor %xmm3,%xmm1,%xmm1
- vpsllq $57,%xmm0,%xmm3
- vpsllq $62,%xmm0,%xmm4
- vpxor %xmm3,%xmm4,%xmm4
- vpsllq $63,%xmm0,%xmm3
- vpxor %xmm3,%xmm4,%xmm4
- vpslldq $8,%xmm4,%xmm3
- vpsrldq $8,%xmm4,%xmm4
- vpxor %xmm3,%xmm0,%xmm0
- vpxor %xmm4,%xmm1,%xmm1
- vpsrlq $1,%xmm0,%xmm4
- vpxor %xmm0,%xmm1,%xmm1
- vpxor %xmm4,%xmm0,%xmm0
- vpsrlq $5,%xmm4,%xmm4
- vpxor %xmm4,%xmm0,%xmm0
- vpsrlq $1,%xmm0,%xmm0
- vpxor %xmm1,%xmm0,%xmm0
- vpshufd $78,%xmm5,%xmm3
- vpshufd $78,%xmm0,%xmm4
- vpxor %xmm5,%xmm3,%xmm3
- vmovdqu %xmm5,0(%rdi)
- vpxor %xmm0,%xmm4,%xmm4
- vmovdqu %xmm0,16(%rdi)
- leaq 48(%rdi),%rdi
- subq $1,%r10
- jnz .Linit_loop_avx
- vpalignr $8,%xmm4,%xmm3,%xmm5
- vmovdqu %xmm5,-16(%rdi)
- vzeroupper
- .byte 0xf3,0xc3
- .cfi_endproc
- .size gcm_init_avx,.-gcm_init_avx
- .globl gcm_gmult_avx
- .hidden gcm_gmult_avx
- .type gcm_gmult_avx,@function
- .align 32
- gcm_gmult_avx:
- .cfi_startproc
- jmp .L_gmult_clmul
- .cfi_endproc
- .size gcm_gmult_avx,.-gcm_gmult_avx
- .globl gcm_ghash_avx
- .hidden gcm_ghash_avx
- .type gcm_ghash_avx,@function
- .align 32
- gcm_ghash_avx:
- .cfi_startproc
- vzeroupper
- vmovdqu (%rdi),%xmm10
- leaq .L0x1c2_polynomial(%rip),%r10
- leaq 64(%rsi),%rsi
- vmovdqu .Lbswap_mask(%rip),%xmm13
- vpshufb %xmm13,%xmm10,%xmm10
- cmpq $0x80,%rcx
- jb .Lshort_avx
- subq $0x80,%rcx
- vmovdqu 112(%rdx),%xmm14
- vmovdqu 0-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm14
- vmovdqu 32-64(%rsi),%xmm7
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vmovdqu 96(%rdx),%xmm15
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpxor %xmm14,%xmm9,%xmm9
- vpshufb %xmm13,%xmm15,%xmm15
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vmovdqu 16-64(%rsi),%xmm6
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vmovdqu 80(%rdx),%xmm14
- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
- vpxor %xmm15,%xmm8,%xmm8
- vpshufb %xmm13,%xmm14,%xmm14
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
- vmovdqu 48-64(%rsi),%xmm6
- vpxor %xmm14,%xmm9,%xmm9
- vmovdqu 64(%rdx),%xmm15
- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
- vmovdqu 80-64(%rsi),%xmm7
- vpshufb %xmm13,%xmm15,%xmm15
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpxor %xmm1,%xmm4,%xmm4
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vmovdqu 64-64(%rsi),%xmm6
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu 48(%rdx),%xmm14
- vpxor %xmm3,%xmm0,%xmm0
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
- vpxor %xmm4,%xmm1,%xmm1
- vpshufb %xmm13,%xmm14,%xmm14
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
- vmovdqu 96-64(%rsi),%xmm6
- vpxor %xmm5,%xmm2,%xmm2
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
- vmovdqu 128-64(%rsi),%xmm7
- vpxor %xmm14,%xmm9,%xmm9
- vmovdqu 32(%rdx),%xmm15
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpxor %xmm1,%xmm4,%xmm4
- vpshufb %xmm13,%xmm15,%xmm15
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vmovdqu 112-64(%rsi),%xmm6
- vpxor %xmm2,%xmm5,%xmm5
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu 16(%rdx),%xmm14
- vpxor %xmm3,%xmm0,%xmm0
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
- vpxor %xmm4,%xmm1,%xmm1
- vpshufb %xmm13,%xmm14,%xmm14
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
- vmovdqu 144-64(%rsi),%xmm6
- vpxor %xmm5,%xmm2,%xmm2
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
- vmovdqu 176-64(%rsi),%xmm7
- vpxor %xmm14,%xmm9,%xmm9
- vmovdqu (%rdx),%xmm15
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpxor %xmm1,%xmm4,%xmm4
- vpshufb %xmm13,%xmm15,%xmm15
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vmovdqu 160-64(%rsi),%xmm6
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
- leaq 128(%rdx),%rdx
- cmpq $0x80,%rcx
- jb .Ltail_avx
- vpxor %xmm10,%xmm15,%xmm15
- subq $0x80,%rcx
- jmp .Loop8x_avx
- .align 32
- .Loop8x_avx:
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vmovdqu 112(%rdx),%xmm14
- vpxor %xmm0,%xmm3,%xmm3
- vpxor %xmm15,%xmm8,%xmm8
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10
- vpshufb %xmm13,%xmm14,%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11
- vmovdqu 0-64(%rsi),%xmm6
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12
- vmovdqu 32-64(%rsi),%xmm7
- vpxor %xmm14,%xmm9,%xmm9
- vmovdqu 96(%rdx),%xmm15
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpxor %xmm3,%xmm10,%xmm10
- vpshufb %xmm13,%xmm15,%xmm15
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vxorps %xmm4,%xmm11,%xmm11
- vmovdqu 16-64(%rsi),%xmm6
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
- vpxor %xmm5,%xmm12,%xmm12
- vxorps %xmm15,%xmm8,%xmm8
- vmovdqu 80(%rdx),%xmm14
- vpxor %xmm10,%xmm12,%xmm12
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
- vpxor %xmm11,%xmm12,%xmm12
- vpslldq $8,%xmm12,%xmm9
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
- vpsrldq $8,%xmm12,%xmm12
- vpxor %xmm9,%xmm10,%xmm10
- vmovdqu 48-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm14
- vxorps %xmm12,%xmm11,%xmm11
- vpxor %xmm1,%xmm4,%xmm4
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
- vmovdqu 80-64(%rsi),%xmm7
- vpxor %xmm14,%xmm9,%xmm9
- vpxor %xmm2,%xmm5,%xmm5
- vmovdqu 64(%rdx),%xmm15
- vpalignr $8,%xmm10,%xmm10,%xmm12
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpshufb %xmm13,%xmm15,%xmm15
- vpxor %xmm3,%xmm0,%xmm0
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vmovdqu 64-64(%rsi),%xmm6
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm4,%xmm1,%xmm1
- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
- vxorps %xmm15,%xmm8,%xmm8
- vpxor %xmm5,%xmm2,%xmm2
- vmovdqu 48(%rdx),%xmm14
- vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
- vpshufb %xmm13,%xmm14,%xmm14
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
- vmovdqu 96-64(%rsi),%xmm6
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
- vmovdqu 128-64(%rsi),%xmm7
- vpxor %xmm14,%xmm9,%xmm9
- vpxor %xmm2,%xmm5,%xmm5
- vmovdqu 32(%rdx),%xmm15
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpshufb %xmm13,%xmm15,%xmm15
- vpxor %xmm3,%xmm0,%xmm0
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vmovdqu 112-64(%rsi),%xmm6
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm4,%xmm1,%xmm1
- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
- vpxor %xmm15,%xmm8,%xmm8
- vpxor %xmm5,%xmm2,%xmm2
- vxorps %xmm12,%xmm10,%xmm10
- vmovdqu 16(%rdx),%xmm14
- vpalignr $8,%xmm10,%xmm10,%xmm12
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
- vpshufb %xmm13,%xmm14,%xmm14
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
- vmovdqu 144-64(%rsi),%xmm6
- vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
- vxorps %xmm11,%xmm12,%xmm12
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
- vmovdqu 176-64(%rsi),%xmm7
- vpxor %xmm14,%xmm9,%xmm9
- vpxor %xmm2,%xmm5,%xmm5
- vmovdqu (%rdx),%xmm15
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpshufb %xmm13,%xmm15,%xmm15
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vmovdqu 160-64(%rsi),%xmm6
- vpxor %xmm12,%xmm15,%xmm15
- vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
- vpxor %xmm10,%xmm15,%xmm15
- leaq 128(%rdx),%rdx
- subq $0x80,%rcx
- jnc .Loop8x_avx
- addq $0x80,%rcx
- jmp .Ltail_no_xor_avx
- .align 32
- .Lshort_avx:
- vmovdqu -16(%rdx,%rcx,1),%xmm14
- leaq (%rdx,%rcx,1),%rdx
- vmovdqu 0-64(%rsi),%xmm6
- vmovdqu 32-64(%rsi),%xmm7
- vpshufb %xmm13,%xmm14,%xmm15
- vmovdqa %xmm0,%xmm3
- vmovdqa %xmm1,%xmm4
- vmovdqa %xmm2,%xmm5
- subq $0x10,%rcx
- jz .Ltail_avx
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu -32(%rdx),%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
- vmovdqu 16-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm15
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
- vpsrldq $8,%xmm7,%xmm7
- subq $0x10,%rcx
- jz .Ltail_avx
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu -48(%rdx),%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
- vmovdqu 48-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm15
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
- vmovdqu 80-64(%rsi),%xmm7
- subq $0x10,%rcx
- jz .Ltail_avx
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu -64(%rdx),%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
- vmovdqu 64-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm15
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
- vpsrldq $8,%xmm7,%xmm7
- subq $0x10,%rcx
- jz .Ltail_avx
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu -80(%rdx),%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
- vmovdqu 96-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm15
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
- vmovdqu 128-64(%rsi),%xmm7
- subq $0x10,%rcx
- jz .Ltail_avx
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu -96(%rdx),%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
- vmovdqu 112-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm15
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
- vpsrldq $8,%xmm7,%xmm7
- subq $0x10,%rcx
- jz .Ltail_avx
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu -112(%rdx),%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
- vmovdqu 144-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm15
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
- vmovq 184-64(%rsi),%xmm7
- subq $0x10,%rcx
- jmp .Ltail_avx
- .align 32
- .Ltail_avx:
- vpxor %xmm10,%xmm15,%xmm15
- .Ltail_no_xor_avx:
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
- vpxor %xmm15,%xmm8,%xmm8
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
- vmovdqu (%r10),%xmm12
- vpxor %xmm0,%xmm3,%xmm10
- vpxor %xmm1,%xmm4,%xmm11
- vpxor %xmm2,%xmm5,%xmm5
- vpxor %xmm10,%xmm5,%xmm5
- vpxor %xmm11,%xmm5,%xmm5
- vpslldq $8,%xmm5,%xmm9
- vpsrldq $8,%xmm5,%xmm5
- vpxor %xmm9,%xmm10,%xmm10
- vpxor %xmm5,%xmm11,%xmm11
- vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
- vpalignr $8,%xmm10,%xmm10,%xmm10
- vpxor %xmm9,%xmm10,%xmm10
- vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
- vpalignr $8,%xmm10,%xmm10,%xmm10
- vpxor %xmm11,%xmm10,%xmm10
- vpxor %xmm9,%xmm10,%xmm10
- cmpq $0,%rcx
- jne .Lshort_avx
- vpshufb %xmm13,%xmm10,%xmm10
- vmovdqu %xmm10,(%rdi)
- vzeroupper
- .byte 0xf3,0xc3
- .cfi_endproc
- .size gcm_ghash_avx,.-gcm_ghash_avx
- .align 64
- .Lbswap_mask:
- .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
- .L0x1c2_polynomial:
- .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
- .L7_mask:
- .long 7,0,7,0
- .align 64
- .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
- .align 64
- #endif
- .section .note.GNU-stack,"",@progbits
|