vpaes-x86_64.S 19 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__has_feature)
  4. #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
  5. #define OPENSSL_NO_ASM
  6. #endif
  7. #endif
  8. #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
  9. #if defined(BORINGSSL_PREFIX)
  10. #include <boringssl_prefix_symbols_asm.h>
  11. #endif
  12. .text
  13. .type _vpaes_encrypt_core,@function
  14. .align 16
  15. _vpaes_encrypt_core:
  16. .cfi_startproc
  17. movq %rdx,%r9
  18. movq $16,%r11
  19. movl 240(%rdx),%eax
  20. movdqa %xmm9,%xmm1
  21. movdqa .Lk_ipt(%rip),%xmm2
  22. pandn %xmm0,%xmm1
  23. movdqu (%r9),%xmm5
  24. psrld $4,%xmm1
  25. pand %xmm9,%xmm0
  26. .byte 102,15,56,0,208
  27. movdqa .Lk_ipt+16(%rip),%xmm0
  28. .byte 102,15,56,0,193
  29. pxor %xmm5,%xmm2
  30. addq $16,%r9
  31. pxor %xmm2,%xmm0
  32. leaq .Lk_mc_backward(%rip),%r10
  33. jmp .Lenc_entry
  34. .align 16
  35. .Lenc_loop:
  36. movdqa %xmm13,%xmm4
  37. movdqa %xmm12,%xmm0
  38. .byte 102,15,56,0,226
  39. .byte 102,15,56,0,195
  40. pxor %xmm5,%xmm4
  41. movdqa %xmm15,%xmm5
  42. pxor %xmm4,%xmm0
  43. movdqa -64(%r11,%r10,1),%xmm1
  44. .byte 102,15,56,0,234
  45. movdqa (%r11,%r10,1),%xmm4
  46. movdqa %xmm14,%xmm2
  47. .byte 102,15,56,0,211
  48. movdqa %xmm0,%xmm3
  49. pxor %xmm5,%xmm2
  50. .byte 102,15,56,0,193
  51. addq $16,%r9
  52. pxor %xmm2,%xmm0
  53. .byte 102,15,56,0,220
  54. addq $16,%r11
  55. pxor %xmm0,%xmm3
  56. .byte 102,15,56,0,193
  57. andq $0x30,%r11
  58. subq $1,%rax
  59. pxor %xmm3,%xmm0
  60. .Lenc_entry:
  61. movdqa %xmm9,%xmm1
  62. movdqa %xmm11,%xmm5
  63. pandn %xmm0,%xmm1
  64. psrld $4,%xmm1
  65. pand %xmm9,%xmm0
  66. .byte 102,15,56,0,232
  67. movdqa %xmm10,%xmm3
  68. pxor %xmm1,%xmm0
  69. .byte 102,15,56,0,217
  70. movdqa %xmm10,%xmm4
  71. pxor %xmm5,%xmm3
  72. .byte 102,15,56,0,224
  73. movdqa %xmm10,%xmm2
  74. pxor %xmm5,%xmm4
  75. .byte 102,15,56,0,211
  76. movdqa %xmm10,%xmm3
  77. pxor %xmm0,%xmm2
  78. .byte 102,15,56,0,220
  79. movdqu (%r9),%xmm5
  80. pxor %xmm1,%xmm3
  81. jnz .Lenc_loop
  82. movdqa -96(%r10),%xmm4
  83. movdqa -80(%r10),%xmm0
  84. .byte 102,15,56,0,226
  85. pxor %xmm5,%xmm4
  86. .byte 102,15,56,0,195
  87. movdqa 64(%r11,%r10,1),%xmm1
  88. pxor %xmm4,%xmm0
  89. .byte 102,15,56,0,193
  90. .byte 0xf3,0xc3
  91. .cfi_endproc
  92. .size _vpaes_encrypt_core,.-_vpaes_encrypt_core
  93. .type _vpaes_encrypt_core_2x,@function
  94. .align 16
  95. _vpaes_encrypt_core_2x:
  96. .cfi_startproc
  97. movq %rdx,%r9
  98. movq $16,%r11
  99. movl 240(%rdx),%eax
  100. movdqa %xmm9,%xmm1
  101. movdqa %xmm9,%xmm7
  102. movdqa .Lk_ipt(%rip),%xmm2
  103. movdqa %xmm2,%xmm8
  104. pandn %xmm0,%xmm1
  105. pandn %xmm6,%xmm7
  106. movdqu (%r9),%xmm5
  107. psrld $4,%xmm1
  108. psrld $4,%xmm7
  109. pand %xmm9,%xmm0
  110. pand %xmm9,%xmm6
  111. .byte 102,15,56,0,208
  112. .byte 102,68,15,56,0,198
  113. movdqa .Lk_ipt+16(%rip),%xmm0
  114. movdqa %xmm0,%xmm6
  115. .byte 102,15,56,0,193
  116. .byte 102,15,56,0,247
  117. pxor %xmm5,%xmm2
  118. pxor %xmm5,%xmm8
  119. addq $16,%r9
  120. pxor %xmm2,%xmm0
  121. pxor %xmm8,%xmm6
  122. leaq .Lk_mc_backward(%rip),%r10
  123. jmp .Lenc2x_entry
  124. .align 16
  125. .Lenc2x_loop:
  126. movdqa .Lk_sb1(%rip),%xmm4
  127. movdqa .Lk_sb1+16(%rip),%xmm0
  128. movdqa %xmm4,%xmm12
  129. movdqa %xmm0,%xmm6
  130. .byte 102,15,56,0,226
  131. .byte 102,69,15,56,0,224
  132. .byte 102,15,56,0,195
  133. .byte 102,65,15,56,0,243
  134. pxor %xmm5,%xmm4
  135. pxor %xmm5,%xmm12
  136. movdqa .Lk_sb2(%rip),%xmm5
  137. movdqa %xmm5,%xmm13
  138. pxor %xmm4,%xmm0
  139. pxor %xmm12,%xmm6
  140. movdqa -64(%r11,%r10,1),%xmm1
  141. .byte 102,15,56,0,234
  142. .byte 102,69,15,56,0,232
  143. movdqa (%r11,%r10,1),%xmm4
  144. movdqa .Lk_sb2+16(%rip),%xmm2
  145. movdqa %xmm2,%xmm8
  146. .byte 102,15,56,0,211
  147. .byte 102,69,15,56,0,195
  148. movdqa %xmm0,%xmm3
  149. movdqa %xmm6,%xmm11
  150. pxor %xmm5,%xmm2
  151. pxor %xmm13,%xmm8
  152. .byte 102,15,56,0,193
  153. .byte 102,15,56,0,241
  154. addq $16,%r9
  155. pxor %xmm2,%xmm0
  156. pxor %xmm8,%xmm6
  157. .byte 102,15,56,0,220
  158. .byte 102,68,15,56,0,220
  159. addq $16,%r11
  160. pxor %xmm0,%xmm3
  161. pxor %xmm6,%xmm11
  162. .byte 102,15,56,0,193
  163. .byte 102,15,56,0,241
  164. andq $0x30,%r11
  165. subq $1,%rax
  166. pxor %xmm3,%xmm0
  167. pxor %xmm11,%xmm6
  168. .Lenc2x_entry:
  169. movdqa %xmm9,%xmm1
  170. movdqa %xmm9,%xmm7
  171. movdqa .Lk_inv+16(%rip),%xmm5
  172. movdqa %xmm5,%xmm13
  173. pandn %xmm0,%xmm1
  174. pandn %xmm6,%xmm7
  175. psrld $4,%xmm1
  176. psrld $4,%xmm7
  177. pand %xmm9,%xmm0
  178. pand %xmm9,%xmm6
  179. .byte 102,15,56,0,232
  180. .byte 102,68,15,56,0,238
  181. movdqa %xmm10,%xmm3
  182. movdqa %xmm10,%xmm11
  183. pxor %xmm1,%xmm0
  184. pxor %xmm7,%xmm6
  185. .byte 102,15,56,0,217
  186. .byte 102,68,15,56,0,223
  187. movdqa %xmm10,%xmm4
  188. movdqa %xmm10,%xmm12
  189. pxor %xmm5,%xmm3
  190. pxor %xmm13,%xmm11
  191. .byte 102,15,56,0,224
  192. .byte 102,68,15,56,0,230
  193. movdqa %xmm10,%xmm2
  194. movdqa %xmm10,%xmm8
  195. pxor %xmm5,%xmm4
  196. pxor %xmm13,%xmm12
  197. .byte 102,15,56,0,211
  198. .byte 102,69,15,56,0,195
  199. movdqa %xmm10,%xmm3
  200. movdqa %xmm10,%xmm11
  201. pxor %xmm0,%xmm2
  202. pxor %xmm6,%xmm8
  203. .byte 102,15,56,0,220
  204. .byte 102,69,15,56,0,220
  205. movdqu (%r9),%xmm5
  206. pxor %xmm1,%xmm3
  207. pxor %xmm7,%xmm11
  208. jnz .Lenc2x_loop
  209. movdqa -96(%r10),%xmm4
  210. movdqa -80(%r10),%xmm0
  211. movdqa %xmm4,%xmm12
  212. movdqa %xmm0,%xmm6
  213. .byte 102,15,56,0,226
  214. .byte 102,69,15,56,0,224
  215. pxor %xmm5,%xmm4
  216. pxor %xmm5,%xmm12
  217. .byte 102,15,56,0,195
  218. .byte 102,65,15,56,0,243
  219. movdqa 64(%r11,%r10,1),%xmm1
  220. pxor %xmm4,%xmm0
  221. pxor %xmm12,%xmm6
  222. .byte 102,15,56,0,193
  223. .byte 102,15,56,0,241
  224. .byte 0xf3,0xc3
  225. .cfi_endproc
  226. .size _vpaes_encrypt_core_2x,.-_vpaes_encrypt_core_2x
  227. .type _vpaes_decrypt_core,@function
  228. .align 16
  229. _vpaes_decrypt_core:
  230. .cfi_startproc
  231. movq %rdx,%r9
  232. movl 240(%rdx),%eax
  233. movdqa %xmm9,%xmm1
  234. movdqa .Lk_dipt(%rip),%xmm2
  235. pandn %xmm0,%xmm1
  236. movq %rax,%r11
  237. psrld $4,%xmm1
  238. movdqu (%r9),%xmm5
  239. shlq $4,%r11
  240. pand %xmm9,%xmm0
  241. .byte 102,15,56,0,208
  242. movdqa .Lk_dipt+16(%rip),%xmm0
  243. xorq $0x30,%r11
  244. leaq .Lk_dsbd(%rip),%r10
  245. .byte 102,15,56,0,193
  246. andq $0x30,%r11
  247. pxor %xmm5,%xmm2
  248. movdqa .Lk_mc_forward+48(%rip),%xmm5
  249. pxor %xmm2,%xmm0
  250. addq $16,%r9
  251. addq %r10,%r11
  252. jmp .Ldec_entry
  253. .align 16
  254. .Ldec_loop:
  255. movdqa -32(%r10),%xmm4
  256. movdqa -16(%r10),%xmm1
  257. .byte 102,15,56,0,226
  258. .byte 102,15,56,0,203
  259. pxor %xmm4,%xmm0
  260. movdqa 0(%r10),%xmm4
  261. pxor %xmm1,%xmm0
  262. movdqa 16(%r10),%xmm1
  263. .byte 102,15,56,0,226
  264. .byte 102,15,56,0,197
  265. .byte 102,15,56,0,203
  266. pxor %xmm4,%xmm0
  267. movdqa 32(%r10),%xmm4
  268. pxor %xmm1,%xmm0
  269. movdqa 48(%r10),%xmm1
  270. .byte 102,15,56,0,226
  271. .byte 102,15,56,0,197
  272. .byte 102,15,56,0,203
  273. pxor %xmm4,%xmm0
  274. movdqa 64(%r10),%xmm4
  275. pxor %xmm1,%xmm0
  276. movdqa 80(%r10),%xmm1
  277. .byte 102,15,56,0,226
  278. .byte 102,15,56,0,197
  279. .byte 102,15,56,0,203
  280. pxor %xmm4,%xmm0
  281. addq $16,%r9
  282. .byte 102,15,58,15,237,12
  283. pxor %xmm1,%xmm0
  284. subq $1,%rax
  285. .Ldec_entry:
  286. movdqa %xmm9,%xmm1
  287. pandn %xmm0,%xmm1
  288. movdqa %xmm11,%xmm2
  289. psrld $4,%xmm1
  290. pand %xmm9,%xmm0
  291. .byte 102,15,56,0,208
  292. movdqa %xmm10,%xmm3
  293. pxor %xmm1,%xmm0
  294. .byte 102,15,56,0,217
  295. movdqa %xmm10,%xmm4
  296. pxor %xmm2,%xmm3
  297. .byte 102,15,56,0,224
  298. pxor %xmm2,%xmm4
  299. movdqa %xmm10,%xmm2
  300. .byte 102,15,56,0,211
  301. movdqa %xmm10,%xmm3
  302. pxor %xmm0,%xmm2
  303. .byte 102,15,56,0,220
  304. movdqu (%r9),%xmm0
  305. pxor %xmm1,%xmm3
  306. jnz .Ldec_loop
  307. movdqa 96(%r10),%xmm4
  308. .byte 102,15,56,0,226
  309. pxor %xmm0,%xmm4
  310. movdqa 112(%r10),%xmm0
  311. movdqa -352(%r11),%xmm2
  312. .byte 102,15,56,0,195
  313. pxor %xmm4,%xmm0
  314. .byte 102,15,56,0,194
  315. .byte 0xf3,0xc3
  316. .cfi_endproc
  317. .size _vpaes_decrypt_core,.-_vpaes_decrypt_core
  318. .type _vpaes_schedule_core,@function
  319. .align 16
  320. _vpaes_schedule_core:
  321. .cfi_startproc
  322. call _vpaes_preheat
  323. movdqa .Lk_rcon(%rip),%xmm8
  324. movdqu (%rdi),%xmm0
  325. movdqa %xmm0,%xmm3
  326. leaq .Lk_ipt(%rip),%r11
  327. call _vpaes_schedule_transform
  328. movdqa %xmm0,%xmm7
  329. leaq .Lk_sr(%rip),%r10
  330. testq %rcx,%rcx
  331. jnz .Lschedule_am_decrypting
  332. movdqu %xmm0,(%rdx)
  333. jmp .Lschedule_go
  334. .Lschedule_am_decrypting:
  335. movdqa (%r8,%r10,1),%xmm1
  336. .byte 102,15,56,0,217
  337. movdqu %xmm3,(%rdx)
  338. xorq $0x30,%r8
  339. .Lschedule_go:
  340. cmpl $192,%esi
  341. ja .Lschedule_256
  342. je .Lschedule_192
  343. .Lschedule_128:
  344. movl $10,%esi
  345. .Loop_schedule_128:
  346. call _vpaes_schedule_round
  347. decq %rsi
  348. jz .Lschedule_mangle_last
  349. call _vpaes_schedule_mangle
  350. jmp .Loop_schedule_128
  351. .align 16
  352. .Lschedule_192:
  353. movdqu 8(%rdi),%xmm0
  354. call _vpaes_schedule_transform
  355. movdqa %xmm0,%xmm6
  356. pxor %xmm4,%xmm4
  357. movhlps %xmm4,%xmm6
  358. movl $4,%esi
  359. .Loop_schedule_192:
  360. call _vpaes_schedule_round
  361. .byte 102,15,58,15,198,8
  362. call _vpaes_schedule_mangle
  363. call _vpaes_schedule_192_smear
  364. call _vpaes_schedule_mangle
  365. call _vpaes_schedule_round
  366. decq %rsi
  367. jz .Lschedule_mangle_last
  368. call _vpaes_schedule_mangle
  369. call _vpaes_schedule_192_smear
  370. jmp .Loop_schedule_192
  371. .align 16
  372. .Lschedule_256:
  373. movdqu 16(%rdi),%xmm0
  374. call _vpaes_schedule_transform
  375. movl $7,%esi
  376. .Loop_schedule_256:
  377. call _vpaes_schedule_mangle
  378. movdqa %xmm0,%xmm6
  379. call _vpaes_schedule_round
  380. decq %rsi
  381. jz .Lschedule_mangle_last
  382. call _vpaes_schedule_mangle
  383. pshufd $0xFF,%xmm0,%xmm0
  384. movdqa %xmm7,%xmm5
  385. movdqa %xmm6,%xmm7
  386. call _vpaes_schedule_low_round
  387. movdqa %xmm5,%xmm7
  388. jmp .Loop_schedule_256
  389. .align 16
  390. .Lschedule_mangle_last:
  391. leaq .Lk_deskew(%rip),%r11
  392. testq %rcx,%rcx
  393. jnz .Lschedule_mangle_last_dec
  394. movdqa (%r8,%r10,1),%xmm1
  395. .byte 102,15,56,0,193
  396. leaq .Lk_opt(%rip),%r11
  397. addq $32,%rdx
  398. .Lschedule_mangle_last_dec:
  399. addq $-16,%rdx
  400. pxor .Lk_s63(%rip),%xmm0
  401. call _vpaes_schedule_transform
  402. movdqu %xmm0,(%rdx)
  403. pxor %xmm0,%xmm0
  404. pxor %xmm1,%xmm1
  405. pxor %xmm2,%xmm2
  406. pxor %xmm3,%xmm3
  407. pxor %xmm4,%xmm4
  408. pxor %xmm5,%xmm5
  409. pxor %xmm6,%xmm6
  410. pxor %xmm7,%xmm7
  411. .byte 0xf3,0xc3
  412. .cfi_endproc
  413. .size _vpaes_schedule_core,.-_vpaes_schedule_core
  414. .type _vpaes_schedule_192_smear,@function
  415. .align 16
  416. _vpaes_schedule_192_smear:
  417. .cfi_startproc
  418. pshufd $0x80,%xmm6,%xmm1
  419. pshufd $0xFE,%xmm7,%xmm0
  420. pxor %xmm1,%xmm6
  421. pxor %xmm1,%xmm1
  422. pxor %xmm0,%xmm6
  423. movdqa %xmm6,%xmm0
  424. movhlps %xmm1,%xmm6
  425. .byte 0xf3,0xc3
  426. .cfi_endproc
  427. .size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
  428. .type _vpaes_schedule_round,@function
  429. .align 16
  430. _vpaes_schedule_round:
  431. .cfi_startproc
  432. pxor %xmm1,%xmm1
  433. .byte 102,65,15,58,15,200,15
  434. .byte 102,69,15,58,15,192,15
  435. pxor %xmm1,%xmm7
  436. pshufd $0xFF,%xmm0,%xmm0
  437. .byte 102,15,58,15,192,1
  438. _vpaes_schedule_low_round:
  439. movdqa %xmm7,%xmm1
  440. pslldq $4,%xmm7
  441. pxor %xmm1,%xmm7
  442. movdqa %xmm7,%xmm1
  443. pslldq $8,%xmm7
  444. pxor %xmm1,%xmm7
  445. pxor .Lk_s63(%rip),%xmm7
  446. movdqa %xmm9,%xmm1
  447. pandn %xmm0,%xmm1
  448. psrld $4,%xmm1
  449. pand %xmm9,%xmm0
  450. movdqa %xmm11,%xmm2
  451. .byte 102,15,56,0,208
  452. pxor %xmm1,%xmm0
  453. movdqa %xmm10,%xmm3
  454. .byte 102,15,56,0,217
  455. pxor %xmm2,%xmm3
  456. movdqa %xmm10,%xmm4
  457. .byte 102,15,56,0,224
  458. pxor %xmm2,%xmm4
  459. movdqa %xmm10,%xmm2
  460. .byte 102,15,56,0,211
  461. pxor %xmm0,%xmm2
  462. movdqa %xmm10,%xmm3
  463. .byte 102,15,56,0,220
  464. pxor %xmm1,%xmm3
  465. movdqa %xmm13,%xmm4
  466. .byte 102,15,56,0,226
  467. movdqa %xmm12,%xmm0
  468. .byte 102,15,56,0,195
  469. pxor %xmm4,%xmm0
  470. pxor %xmm7,%xmm0
  471. movdqa %xmm0,%xmm7
  472. .byte 0xf3,0xc3
  473. .cfi_endproc
  474. .size _vpaes_schedule_round,.-_vpaes_schedule_round
  475. .type _vpaes_schedule_transform,@function
  476. .align 16
  477. _vpaes_schedule_transform:
  478. .cfi_startproc
  479. movdqa %xmm9,%xmm1
  480. pandn %xmm0,%xmm1
  481. psrld $4,%xmm1
  482. pand %xmm9,%xmm0
  483. movdqa (%r11),%xmm2
  484. .byte 102,15,56,0,208
  485. movdqa 16(%r11),%xmm0
  486. .byte 102,15,56,0,193
  487. pxor %xmm2,%xmm0
  488. .byte 0xf3,0xc3
  489. .cfi_endproc
  490. .size _vpaes_schedule_transform,.-_vpaes_schedule_transform
  491. .type _vpaes_schedule_mangle,@function
  492. .align 16
  493. _vpaes_schedule_mangle:
  494. .cfi_startproc
  495. movdqa %xmm0,%xmm4
  496. movdqa .Lk_mc_forward(%rip),%xmm5
  497. testq %rcx,%rcx
  498. jnz .Lschedule_mangle_dec
  499. addq $16,%rdx
  500. pxor .Lk_s63(%rip),%xmm4
  501. .byte 102,15,56,0,229
  502. movdqa %xmm4,%xmm3
  503. .byte 102,15,56,0,229
  504. pxor %xmm4,%xmm3
  505. .byte 102,15,56,0,229
  506. pxor %xmm4,%xmm3
  507. jmp .Lschedule_mangle_both
  508. .align 16
  509. .Lschedule_mangle_dec:
  510. leaq .Lk_dksd(%rip),%r11
  511. movdqa %xmm9,%xmm1
  512. pandn %xmm4,%xmm1
  513. psrld $4,%xmm1
  514. pand %xmm9,%xmm4
  515. movdqa 0(%r11),%xmm2
  516. .byte 102,15,56,0,212
  517. movdqa 16(%r11),%xmm3
  518. .byte 102,15,56,0,217
  519. pxor %xmm2,%xmm3
  520. .byte 102,15,56,0,221
  521. movdqa 32(%r11),%xmm2
  522. .byte 102,15,56,0,212
  523. pxor %xmm3,%xmm2
  524. movdqa 48(%r11),%xmm3
  525. .byte 102,15,56,0,217
  526. pxor %xmm2,%xmm3
  527. .byte 102,15,56,0,221
  528. movdqa 64(%r11),%xmm2
  529. .byte 102,15,56,0,212
  530. pxor %xmm3,%xmm2
  531. movdqa 80(%r11),%xmm3
  532. .byte 102,15,56,0,217
  533. pxor %xmm2,%xmm3
  534. .byte 102,15,56,0,221
  535. movdqa 96(%r11),%xmm2
  536. .byte 102,15,56,0,212
  537. pxor %xmm3,%xmm2
  538. movdqa 112(%r11),%xmm3
  539. .byte 102,15,56,0,217
  540. pxor %xmm2,%xmm3
  541. addq $-16,%rdx
  542. .Lschedule_mangle_both:
  543. movdqa (%r8,%r10,1),%xmm1
  544. .byte 102,15,56,0,217
  545. addq $-16,%r8
  546. andq $0x30,%r8
  547. movdqu %xmm3,(%rdx)
  548. .byte 0xf3,0xc3
  549. .cfi_endproc
  550. .size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
  551. .globl vpaes_set_encrypt_key
  552. .hidden vpaes_set_encrypt_key
  553. .type vpaes_set_encrypt_key,@function
  554. .align 16
  555. vpaes_set_encrypt_key:
  556. .cfi_startproc
  557. #ifdef BORINGSSL_DISPATCH_TEST
  558. .extern BORINGSSL_function_hit
  559. .hidden BORINGSSL_function_hit
  560. movb $1,BORINGSSL_function_hit+5(%rip)
  561. #endif
  562. movl %esi,%eax
  563. shrl $5,%eax
  564. addl $5,%eax
  565. movl %eax,240(%rdx)
  566. movl $0,%ecx
  567. movl $0x30,%r8d
  568. call _vpaes_schedule_core
  569. xorl %eax,%eax
  570. .byte 0xf3,0xc3
  571. .cfi_endproc
  572. .size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
  573. .globl vpaes_set_decrypt_key
  574. .hidden vpaes_set_decrypt_key
  575. .type vpaes_set_decrypt_key,@function
  576. .align 16
  577. vpaes_set_decrypt_key:
  578. .cfi_startproc
  579. movl %esi,%eax
  580. shrl $5,%eax
  581. addl $5,%eax
  582. movl %eax,240(%rdx)
  583. shll $4,%eax
  584. leaq 16(%rdx,%rax,1),%rdx
  585. movl $1,%ecx
  586. movl %esi,%r8d
  587. shrl $1,%r8d
  588. andl $32,%r8d
  589. xorl $32,%r8d
  590. call _vpaes_schedule_core
  591. xorl %eax,%eax
  592. .byte 0xf3,0xc3
  593. .cfi_endproc
  594. .size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
  595. .globl vpaes_encrypt
  596. .hidden vpaes_encrypt
  597. .type vpaes_encrypt,@function
  598. .align 16
  599. vpaes_encrypt:
  600. .cfi_startproc
  601. #ifdef BORINGSSL_DISPATCH_TEST
  602. .extern BORINGSSL_function_hit
  603. .hidden BORINGSSL_function_hit
  604. movb $1,BORINGSSL_function_hit+4(%rip)
  605. #endif
  606. movdqu (%rdi),%xmm0
  607. call _vpaes_preheat
  608. call _vpaes_encrypt_core
  609. movdqu %xmm0,(%rsi)
  610. .byte 0xf3,0xc3
  611. .cfi_endproc
  612. .size vpaes_encrypt,.-vpaes_encrypt
  613. .globl vpaes_decrypt
  614. .hidden vpaes_decrypt
  615. .type vpaes_decrypt,@function
  616. .align 16
  617. vpaes_decrypt:
  618. .cfi_startproc
  619. movdqu (%rdi),%xmm0
  620. call _vpaes_preheat
  621. call _vpaes_decrypt_core
  622. movdqu %xmm0,(%rsi)
  623. .byte 0xf3,0xc3
  624. .cfi_endproc
  625. .size vpaes_decrypt,.-vpaes_decrypt
  626. .globl vpaes_cbc_encrypt
  627. .hidden vpaes_cbc_encrypt
  628. .type vpaes_cbc_encrypt,@function
  629. .align 16
  630. vpaes_cbc_encrypt:
  631. .cfi_startproc
  632. xchgq %rcx,%rdx
  633. subq $16,%rcx
  634. jc .Lcbc_abort
  635. movdqu (%r8),%xmm6
  636. subq %rdi,%rsi
  637. call _vpaes_preheat
  638. cmpl $0,%r9d
  639. je .Lcbc_dec_loop
  640. jmp .Lcbc_enc_loop
  641. .align 16
  642. .Lcbc_enc_loop:
  643. movdqu (%rdi),%xmm0
  644. pxor %xmm6,%xmm0
  645. call _vpaes_encrypt_core
  646. movdqa %xmm0,%xmm6
  647. movdqu %xmm0,(%rsi,%rdi,1)
  648. leaq 16(%rdi),%rdi
  649. subq $16,%rcx
  650. jnc .Lcbc_enc_loop
  651. jmp .Lcbc_done
  652. .align 16
  653. .Lcbc_dec_loop:
  654. movdqu (%rdi),%xmm0
  655. movdqa %xmm0,%xmm7
  656. call _vpaes_decrypt_core
  657. pxor %xmm6,%xmm0
  658. movdqa %xmm7,%xmm6
  659. movdqu %xmm0,(%rsi,%rdi,1)
  660. leaq 16(%rdi),%rdi
  661. subq $16,%rcx
  662. jnc .Lcbc_dec_loop
  663. .Lcbc_done:
  664. movdqu %xmm6,(%r8)
  665. .Lcbc_abort:
  666. .byte 0xf3,0xc3
  667. .cfi_endproc
  668. .size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
  669. .globl vpaes_ctr32_encrypt_blocks
  670. .hidden vpaes_ctr32_encrypt_blocks
  671. .type vpaes_ctr32_encrypt_blocks,@function
  672. .align 16
  673. vpaes_ctr32_encrypt_blocks:
  674. .cfi_startproc
  675. xchgq %rcx,%rdx
  676. testq %rcx,%rcx
  677. jz .Lctr32_abort
  678. movdqu (%r8),%xmm0
  679. movdqa .Lctr_add_one(%rip),%xmm8
  680. subq %rdi,%rsi
  681. call _vpaes_preheat
  682. movdqa %xmm0,%xmm6
  683. pshufb .Lrev_ctr(%rip),%xmm6
  684. testq $1,%rcx
  685. jz .Lctr32_prep_loop
  686. movdqu (%rdi),%xmm7
  687. call _vpaes_encrypt_core
  688. pxor %xmm7,%xmm0
  689. paddd %xmm8,%xmm6
  690. movdqu %xmm0,(%rsi,%rdi,1)
  691. subq $1,%rcx
  692. leaq 16(%rdi),%rdi
  693. jz .Lctr32_done
  694. .Lctr32_prep_loop:
  695. movdqa %xmm6,%xmm14
  696. movdqa %xmm6,%xmm15
  697. paddd %xmm8,%xmm15
  698. .Lctr32_loop:
  699. movdqa .Lrev_ctr(%rip),%xmm1
  700. movdqa %xmm14,%xmm0
  701. movdqa %xmm15,%xmm6
  702. .byte 102,15,56,0,193
  703. .byte 102,15,56,0,241
  704. call _vpaes_encrypt_core_2x
  705. movdqu (%rdi),%xmm1
  706. movdqu 16(%rdi),%xmm2
  707. movdqa .Lctr_add_two(%rip),%xmm3
  708. pxor %xmm1,%xmm0
  709. pxor %xmm2,%xmm6
  710. paddd %xmm3,%xmm14
  711. paddd %xmm3,%xmm15
  712. movdqu %xmm0,(%rsi,%rdi,1)
  713. movdqu %xmm6,16(%rsi,%rdi,1)
  714. subq $2,%rcx
  715. leaq 32(%rdi),%rdi
  716. jnz .Lctr32_loop
  717. .Lctr32_done:
  718. .Lctr32_abort:
  719. .byte 0xf3,0xc3
  720. .cfi_endproc
  721. .size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
  722. .type _vpaes_preheat,@function
  723. .align 16
  724. _vpaes_preheat:
  725. .cfi_startproc
  726. leaq .Lk_s0F(%rip),%r10
  727. movdqa -32(%r10),%xmm10
  728. movdqa -16(%r10),%xmm11
  729. movdqa 0(%r10),%xmm9
  730. movdqa 48(%r10),%xmm13
  731. movdqa 64(%r10),%xmm12
  732. movdqa 80(%r10),%xmm15
  733. movdqa 96(%r10),%xmm14
  734. .byte 0xf3,0xc3
  735. .cfi_endproc
  736. .size _vpaes_preheat,.-_vpaes_preheat
  737. .type _vpaes_consts,@object
  738. .align 64
  739. _vpaes_consts:
  740. .Lk_inv:
  741. .quad 0x0E05060F0D080180, 0x040703090A0B0C02
  742. .quad 0x01040A060F0B0780, 0x030D0E0C02050809
  743. .Lk_s0F:
  744. .quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
  745. .Lk_ipt:
  746. .quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
  747. .quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
  748. .Lk_sb1:
  749. .quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
  750. .quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
  751. .Lk_sb2:
  752. .quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
  753. .quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
  754. .Lk_sbo:
  755. .quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
  756. .quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
  757. .Lk_mc_forward:
  758. .quad 0x0407060500030201, 0x0C0F0E0D080B0A09
  759. .quad 0x080B0A0904070605, 0x000302010C0F0E0D
  760. .quad 0x0C0F0E0D080B0A09, 0x0407060500030201
  761. .quad 0x000302010C0F0E0D, 0x080B0A0904070605
  762. .Lk_mc_backward:
  763. .quad 0x0605040702010003, 0x0E0D0C0F0A09080B
  764. .quad 0x020100030E0D0C0F, 0x0A09080B06050407
  765. .quad 0x0E0D0C0F0A09080B, 0x0605040702010003
  766. .quad 0x0A09080B06050407, 0x020100030E0D0C0F
  767. .Lk_sr:
  768. .quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
  769. .quad 0x030E09040F0A0500, 0x0B06010C07020D08
  770. .quad 0x0F060D040B020900, 0x070E050C030A0108
  771. .quad 0x0B0E0104070A0D00, 0x0306090C0F020508
  772. .Lk_rcon:
  773. .quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
  774. .Lk_s63:
  775. .quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
  776. .Lk_opt:
  777. .quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
  778. .quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
  779. .Lk_deskew:
  780. .quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
  781. .quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
  782. .Lk_dksd:
  783. .quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
  784. .quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
  785. .Lk_dksb:
  786. .quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
  787. .quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
  788. .Lk_dkse:
  789. .quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
  790. .quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
  791. .Lk_dks9:
  792. .quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
  793. .quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
  794. .Lk_dipt:
  795. .quad 0x0F505B040B545F00, 0x154A411E114E451A
  796. .quad 0x86E383E660056500, 0x12771772F491F194
  797. .Lk_dsb9:
  798. .quad 0x851C03539A86D600, 0xCAD51F504F994CC9
  799. .quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
  800. .Lk_dsbd:
  801. .quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
  802. .quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
  803. .Lk_dsbb:
  804. .quad 0xD022649296B44200, 0x602646F6B0F2D404
  805. .quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
  806. .Lk_dsbe:
  807. .quad 0x46F2929626D4D000, 0x2242600464B4F6B0
  808. .quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
  809. .Lk_dsbo:
  810. .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
  811. .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
  812. .Lrev_ctr:
  813. .quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
  814. .Lctr_add_one:
  815. .quad 0x0000000000000000, 0x0000000100000000
  816. .Lctr_add_two:
  817. .quad 0x0000000000000000, 0x0000000200000000
  818. .byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
  819. .align 64
  820. .size _vpaes_consts,.-_vpaes_consts
  821. #endif
  822. .section .note.GNU-stack,"",@progbits