vpaes-x86_64.S 17 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__has_feature)
  4. #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
  5. #define OPENSSL_NO_ASM
  6. #endif
  7. #endif
  8. #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
  9. #if defined(BORINGSSL_PREFIX)
  10. #include <boringssl_prefix_symbols_asm.h>
  11. #endif
  12. .text
  13. .p2align 4
  14. _vpaes_encrypt_core:
  15. movq %rdx,%r9
  16. movq $16,%r11
  17. movl 240(%rdx),%eax
  18. movdqa %xmm9,%xmm1
  19. movdqa L$k_ipt(%rip),%xmm2
  20. pandn %xmm0,%xmm1
  21. movdqu (%r9),%xmm5
  22. psrld $4,%xmm1
  23. pand %xmm9,%xmm0
  24. .byte 102,15,56,0,208
  25. movdqa L$k_ipt+16(%rip),%xmm0
  26. .byte 102,15,56,0,193
  27. pxor %xmm5,%xmm2
  28. addq $16,%r9
  29. pxor %xmm2,%xmm0
  30. leaq L$k_mc_backward(%rip),%r10
  31. jmp L$enc_entry
  32. .p2align 4
  33. L$enc_loop:
  34. movdqa %xmm13,%xmm4
  35. movdqa %xmm12,%xmm0
  36. .byte 102,15,56,0,226
  37. .byte 102,15,56,0,195
  38. pxor %xmm5,%xmm4
  39. movdqa %xmm15,%xmm5
  40. pxor %xmm4,%xmm0
  41. movdqa -64(%r11,%r10,1),%xmm1
  42. .byte 102,15,56,0,234
  43. movdqa (%r11,%r10,1),%xmm4
  44. movdqa %xmm14,%xmm2
  45. .byte 102,15,56,0,211
  46. movdqa %xmm0,%xmm3
  47. pxor %xmm5,%xmm2
  48. .byte 102,15,56,0,193
  49. addq $16,%r9
  50. pxor %xmm2,%xmm0
  51. .byte 102,15,56,0,220
  52. addq $16,%r11
  53. pxor %xmm0,%xmm3
  54. .byte 102,15,56,0,193
  55. andq $0x30,%r11
  56. subq $1,%rax
  57. pxor %xmm3,%xmm0
  58. L$enc_entry:
  59. movdqa %xmm9,%xmm1
  60. movdqa %xmm11,%xmm5
  61. pandn %xmm0,%xmm1
  62. psrld $4,%xmm1
  63. pand %xmm9,%xmm0
  64. .byte 102,15,56,0,232
  65. movdqa %xmm10,%xmm3
  66. pxor %xmm1,%xmm0
  67. .byte 102,15,56,0,217
  68. movdqa %xmm10,%xmm4
  69. pxor %xmm5,%xmm3
  70. .byte 102,15,56,0,224
  71. movdqa %xmm10,%xmm2
  72. pxor %xmm5,%xmm4
  73. .byte 102,15,56,0,211
  74. movdqa %xmm10,%xmm3
  75. pxor %xmm0,%xmm2
  76. .byte 102,15,56,0,220
  77. movdqu (%r9),%xmm5
  78. pxor %xmm1,%xmm3
  79. jnz L$enc_loop
  80. movdqa -96(%r10),%xmm4
  81. movdqa -80(%r10),%xmm0
  82. .byte 102,15,56,0,226
  83. pxor %xmm5,%xmm4
  84. .byte 102,15,56,0,195
  85. movdqa 64(%r11,%r10,1),%xmm1
  86. pxor %xmm4,%xmm0
  87. .byte 102,15,56,0,193
  88. .byte 0xf3,0xc3
  89. .p2align 4
  90. _vpaes_encrypt_core_2x:
  91. movq %rdx,%r9
  92. movq $16,%r11
  93. movl 240(%rdx),%eax
  94. movdqa %xmm9,%xmm1
  95. movdqa %xmm9,%xmm7
  96. movdqa L$k_ipt(%rip),%xmm2
  97. movdqa %xmm2,%xmm8
  98. pandn %xmm0,%xmm1
  99. pandn %xmm6,%xmm7
  100. movdqu (%r9),%xmm5
  101. psrld $4,%xmm1
  102. psrld $4,%xmm7
  103. pand %xmm9,%xmm0
  104. pand %xmm9,%xmm6
  105. .byte 102,15,56,0,208
  106. .byte 102,68,15,56,0,198
  107. movdqa L$k_ipt+16(%rip),%xmm0
  108. movdqa %xmm0,%xmm6
  109. .byte 102,15,56,0,193
  110. .byte 102,15,56,0,247
  111. pxor %xmm5,%xmm2
  112. pxor %xmm5,%xmm8
  113. addq $16,%r9
  114. pxor %xmm2,%xmm0
  115. pxor %xmm8,%xmm6
  116. leaq L$k_mc_backward(%rip),%r10
  117. jmp L$enc2x_entry
  118. .p2align 4
  119. L$enc2x_loop:
  120. movdqa L$k_sb1(%rip),%xmm4
  121. movdqa L$k_sb1+16(%rip),%xmm0
  122. movdqa %xmm4,%xmm12
  123. movdqa %xmm0,%xmm6
  124. .byte 102,15,56,0,226
  125. .byte 102,69,15,56,0,224
  126. .byte 102,15,56,0,195
  127. .byte 102,65,15,56,0,243
  128. pxor %xmm5,%xmm4
  129. pxor %xmm5,%xmm12
  130. movdqa L$k_sb2(%rip),%xmm5
  131. movdqa %xmm5,%xmm13
  132. pxor %xmm4,%xmm0
  133. pxor %xmm12,%xmm6
  134. movdqa -64(%r11,%r10,1),%xmm1
  135. .byte 102,15,56,0,234
  136. .byte 102,69,15,56,0,232
  137. movdqa (%r11,%r10,1),%xmm4
  138. movdqa L$k_sb2+16(%rip),%xmm2
  139. movdqa %xmm2,%xmm8
  140. .byte 102,15,56,0,211
  141. .byte 102,69,15,56,0,195
  142. movdqa %xmm0,%xmm3
  143. movdqa %xmm6,%xmm11
  144. pxor %xmm5,%xmm2
  145. pxor %xmm13,%xmm8
  146. .byte 102,15,56,0,193
  147. .byte 102,15,56,0,241
  148. addq $16,%r9
  149. pxor %xmm2,%xmm0
  150. pxor %xmm8,%xmm6
  151. .byte 102,15,56,0,220
  152. .byte 102,68,15,56,0,220
  153. addq $16,%r11
  154. pxor %xmm0,%xmm3
  155. pxor %xmm6,%xmm11
  156. .byte 102,15,56,0,193
  157. .byte 102,15,56,0,241
  158. andq $0x30,%r11
  159. subq $1,%rax
  160. pxor %xmm3,%xmm0
  161. pxor %xmm11,%xmm6
  162. L$enc2x_entry:
  163. movdqa %xmm9,%xmm1
  164. movdqa %xmm9,%xmm7
  165. movdqa L$k_inv+16(%rip),%xmm5
  166. movdqa %xmm5,%xmm13
  167. pandn %xmm0,%xmm1
  168. pandn %xmm6,%xmm7
  169. psrld $4,%xmm1
  170. psrld $4,%xmm7
  171. pand %xmm9,%xmm0
  172. pand %xmm9,%xmm6
  173. .byte 102,15,56,0,232
  174. .byte 102,68,15,56,0,238
  175. movdqa %xmm10,%xmm3
  176. movdqa %xmm10,%xmm11
  177. pxor %xmm1,%xmm0
  178. pxor %xmm7,%xmm6
  179. .byte 102,15,56,0,217
  180. .byte 102,68,15,56,0,223
  181. movdqa %xmm10,%xmm4
  182. movdqa %xmm10,%xmm12
  183. pxor %xmm5,%xmm3
  184. pxor %xmm13,%xmm11
  185. .byte 102,15,56,0,224
  186. .byte 102,68,15,56,0,230
  187. movdqa %xmm10,%xmm2
  188. movdqa %xmm10,%xmm8
  189. pxor %xmm5,%xmm4
  190. pxor %xmm13,%xmm12
  191. .byte 102,15,56,0,211
  192. .byte 102,69,15,56,0,195
  193. movdqa %xmm10,%xmm3
  194. movdqa %xmm10,%xmm11
  195. pxor %xmm0,%xmm2
  196. pxor %xmm6,%xmm8
  197. .byte 102,15,56,0,220
  198. .byte 102,69,15,56,0,220
  199. movdqu (%r9),%xmm5
  200. pxor %xmm1,%xmm3
  201. pxor %xmm7,%xmm11
  202. jnz L$enc2x_loop
  203. movdqa -96(%r10),%xmm4
  204. movdqa -80(%r10),%xmm0
  205. movdqa %xmm4,%xmm12
  206. movdqa %xmm0,%xmm6
  207. .byte 102,15,56,0,226
  208. .byte 102,69,15,56,0,224
  209. pxor %xmm5,%xmm4
  210. pxor %xmm5,%xmm12
  211. .byte 102,15,56,0,195
  212. .byte 102,65,15,56,0,243
  213. movdqa 64(%r11,%r10,1),%xmm1
  214. pxor %xmm4,%xmm0
  215. pxor %xmm12,%xmm6
  216. .byte 102,15,56,0,193
  217. .byte 102,15,56,0,241
  218. .byte 0xf3,0xc3
  219. .p2align 4
  220. _vpaes_decrypt_core:
  221. movq %rdx,%r9
  222. movl 240(%rdx),%eax
  223. movdqa %xmm9,%xmm1
  224. movdqa L$k_dipt(%rip),%xmm2
  225. pandn %xmm0,%xmm1
  226. movq %rax,%r11
  227. psrld $4,%xmm1
  228. movdqu (%r9),%xmm5
  229. shlq $4,%r11
  230. pand %xmm9,%xmm0
  231. .byte 102,15,56,0,208
  232. movdqa L$k_dipt+16(%rip),%xmm0
  233. xorq $0x30,%r11
  234. leaq L$k_dsbd(%rip),%r10
  235. .byte 102,15,56,0,193
  236. andq $0x30,%r11
  237. pxor %xmm5,%xmm2
  238. movdqa L$k_mc_forward+48(%rip),%xmm5
  239. pxor %xmm2,%xmm0
  240. addq $16,%r9
  241. addq %r10,%r11
  242. jmp L$dec_entry
  243. .p2align 4
  244. L$dec_loop:
  245. movdqa -32(%r10),%xmm4
  246. movdqa -16(%r10),%xmm1
  247. .byte 102,15,56,0,226
  248. .byte 102,15,56,0,203
  249. pxor %xmm4,%xmm0
  250. movdqa 0(%r10),%xmm4
  251. pxor %xmm1,%xmm0
  252. movdqa 16(%r10),%xmm1
  253. .byte 102,15,56,0,226
  254. .byte 102,15,56,0,197
  255. .byte 102,15,56,0,203
  256. pxor %xmm4,%xmm0
  257. movdqa 32(%r10),%xmm4
  258. pxor %xmm1,%xmm0
  259. movdqa 48(%r10),%xmm1
  260. .byte 102,15,56,0,226
  261. .byte 102,15,56,0,197
  262. .byte 102,15,56,0,203
  263. pxor %xmm4,%xmm0
  264. movdqa 64(%r10),%xmm4
  265. pxor %xmm1,%xmm0
  266. movdqa 80(%r10),%xmm1
  267. .byte 102,15,56,0,226
  268. .byte 102,15,56,0,197
  269. .byte 102,15,56,0,203
  270. pxor %xmm4,%xmm0
  271. addq $16,%r9
  272. .byte 102,15,58,15,237,12
  273. pxor %xmm1,%xmm0
  274. subq $1,%rax
  275. L$dec_entry:
  276. movdqa %xmm9,%xmm1
  277. pandn %xmm0,%xmm1
  278. movdqa %xmm11,%xmm2
  279. psrld $4,%xmm1
  280. pand %xmm9,%xmm0
  281. .byte 102,15,56,0,208
  282. movdqa %xmm10,%xmm3
  283. pxor %xmm1,%xmm0
  284. .byte 102,15,56,0,217
  285. movdqa %xmm10,%xmm4
  286. pxor %xmm2,%xmm3
  287. .byte 102,15,56,0,224
  288. pxor %xmm2,%xmm4
  289. movdqa %xmm10,%xmm2
  290. .byte 102,15,56,0,211
  291. movdqa %xmm10,%xmm3
  292. pxor %xmm0,%xmm2
  293. .byte 102,15,56,0,220
  294. movdqu (%r9),%xmm0
  295. pxor %xmm1,%xmm3
  296. jnz L$dec_loop
  297. movdqa 96(%r10),%xmm4
  298. .byte 102,15,56,0,226
  299. pxor %xmm0,%xmm4
  300. movdqa 112(%r10),%xmm0
  301. movdqa -352(%r11),%xmm2
  302. .byte 102,15,56,0,195
  303. pxor %xmm4,%xmm0
  304. .byte 102,15,56,0,194
  305. .byte 0xf3,0xc3
  306. .p2align 4
  307. _vpaes_schedule_core:
  308. call _vpaes_preheat
  309. movdqa L$k_rcon(%rip),%xmm8
  310. movdqu (%rdi),%xmm0
  311. movdqa %xmm0,%xmm3
  312. leaq L$k_ipt(%rip),%r11
  313. call _vpaes_schedule_transform
  314. movdqa %xmm0,%xmm7
  315. leaq L$k_sr(%rip),%r10
  316. testq %rcx,%rcx
  317. jnz L$schedule_am_decrypting
  318. movdqu %xmm0,(%rdx)
  319. jmp L$schedule_go
  320. L$schedule_am_decrypting:
  321. movdqa (%r8,%r10,1),%xmm1
  322. .byte 102,15,56,0,217
  323. movdqu %xmm3,(%rdx)
  324. xorq $0x30,%r8
  325. L$schedule_go:
  326. cmpl $192,%esi
  327. ja L$schedule_256
  328. je L$schedule_192
  329. L$schedule_128:
  330. movl $10,%esi
  331. L$oop_schedule_128:
  332. call _vpaes_schedule_round
  333. decq %rsi
  334. jz L$schedule_mangle_last
  335. call _vpaes_schedule_mangle
  336. jmp L$oop_schedule_128
  337. .p2align 4
  338. L$schedule_192:
  339. movdqu 8(%rdi),%xmm0
  340. call _vpaes_schedule_transform
  341. movdqa %xmm0,%xmm6
  342. pxor %xmm4,%xmm4
  343. movhlps %xmm4,%xmm6
  344. movl $4,%esi
  345. L$oop_schedule_192:
  346. call _vpaes_schedule_round
  347. .byte 102,15,58,15,198,8
  348. call _vpaes_schedule_mangle
  349. call _vpaes_schedule_192_smear
  350. call _vpaes_schedule_mangle
  351. call _vpaes_schedule_round
  352. decq %rsi
  353. jz L$schedule_mangle_last
  354. call _vpaes_schedule_mangle
  355. call _vpaes_schedule_192_smear
  356. jmp L$oop_schedule_192
  357. .p2align 4
  358. L$schedule_256:
  359. movdqu 16(%rdi),%xmm0
  360. call _vpaes_schedule_transform
  361. movl $7,%esi
  362. L$oop_schedule_256:
  363. call _vpaes_schedule_mangle
  364. movdqa %xmm0,%xmm6
  365. call _vpaes_schedule_round
  366. decq %rsi
  367. jz L$schedule_mangle_last
  368. call _vpaes_schedule_mangle
  369. pshufd $0xFF,%xmm0,%xmm0
  370. movdqa %xmm7,%xmm5
  371. movdqa %xmm6,%xmm7
  372. call _vpaes_schedule_low_round
  373. movdqa %xmm5,%xmm7
  374. jmp L$oop_schedule_256
  375. .p2align 4
  376. L$schedule_mangle_last:
  377. leaq L$k_deskew(%rip),%r11
  378. testq %rcx,%rcx
  379. jnz L$schedule_mangle_last_dec
  380. movdqa (%r8,%r10,1),%xmm1
  381. .byte 102,15,56,0,193
  382. leaq L$k_opt(%rip),%r11
  383. addq $32,%rdx
  384. L$schedule_mangle_last_dec:
  385. addq $-16,%rdx
  386. pxor L$k_s63(%rip),%xmm0
  387. call _vpaes_schedule_transform
  388. movdqu %xmm0,(%rdx)
  389. pxor %xmm0,%xmm0
  390. pxor %xmm1,%xmm1
  391. pxor %xmm2,%xmm2
  392. pxor %xmm3,%xmm3
  393. pxor %xmm4,%xmm4
  394. pxor %xmm5,%xmm5
  395. pxor %xmm6,%xmm6
  396. pxor %xmm7,%xmm7
  397. .byte 0xf3,0xc3
  398. .p2align 4
  399. _vpaes_schedule_192_smear:
  400. pshufd $0x80,%xmm6,%xmm1
  401. pshufd $0xFE,%xmm7,%xmm0
  402. pxor %xmm1,%xmm6
  403. pxor %xmm1,%xmm1
  404. pxor %xmm0,%xmm6
  405. movdqa %xmm6,%xmm0
  406. movhlps %xmm1,%xmm6
  407. .byte 0xf3,0xc3
  408. .p2align 4
  409. _vpaes_schedule_round:
  410. pxor %xmm1,%xmm1
  411. .byte 102,65,15,58,15,200,15
  412. .byte 102,69,15,58,15,192,15
  413. pxor %xmm1,%xmm7
  414. pshufd $0xFF,%xmm0,%xmm0
  415. .byte 102,15,58,15,192,1
  416. _vpaes_schedule_low_round:
  417. movdqa %xmm7,%xmm1
  418. pslldq $4,%xmm7
  419. pxor %xmm1,%xmm7
  420. movdqa %xmm7,%xmm1
  421. pslldq $8,%xmm7
  422. pxor %xmm1,%xmm7
  423. pxor L$k_s63(%rip),%xmm7
  424. movdqa %xmm9,%xmm1
  425. pandn %xmm0,%xmm1
  426. psrld $4,%xmm1
  427. pand %xmm9,%xmm0
  428. movdqa %xmm11,%xmm2
  429. .byte 102,15,56,0,208
  430. pxor %xmm1,%xmm0
  431. movdqa %xmm10,%xmm3
  432. .byte 102,15,56,0,217
  433. pxor %xmm2,%xmm3
  434. movdqa %xmm10,%xmm4
  435. .byte 102,15,56,0,224
  436. pxor %xmm2,%xmm4
  437. movdqa %xmm10,%xmm2
  438. .byte 102,15,56,0,211
  439. pxor %xmm0,%xmm2
  440. movdqa %xmm10,%xmm3
  441. .byte 102,15,56,0,220
  442. pxor %xmm1,%xmm3
  443. movdqa %xmm13,%xmm4
  444. .byte 102,15,56,0,226
  445. movdqa %xmm12,%xmm0
  446. .byte 102,15,56,0,195
  447. pxor %xmm4,%xmm0
  448. pxor %xmm7,%xmm0
  449. movdqa %xmm0,%xmm7
  450. .byte 0xf3,0xc3
  451. .p2align 4
  452. _vpaes_schedule_transform:
  453. movdqa %xmm9,%xmm1
  454. pandn %xmm0,%xmm1
  455. psrld $4,%xmm1
  456. pand %xmm9,%xmm0
  457. movdqa (%r11),%xmm2
  458. .byte 102,15,56,0,208
  459. movdqa 16(%r11),%xmm0
  460. .byte 102,15,56,0,193
  461. pxor %xmm2,%xmm0
  462. .byte 0xf3,0xc3
  463. .p2align 4
  464. _vpaes_schedule_mangle:
  465. movdqa %xmm0,%xmm4
  466. movdqa L$k_mc_forward(%rip),%xmm5
  467. testq %rcx,%rcx
  468. jnz L$schedule_mangle_dec
  469. addq $16,%rdx
  470. pxor L$k_s63(%rip),%xmm4
  471. .byte 102,15,56,0,229
  472. movdqa %xmm4,%xmm3
  473. .byte 102,15,56,0,229
  474. pxor %xmm4,%xmm3
  475. .byte 102,15,56,0,229
  476. pxor %xmm4,%xmm3
  477. jmp L$schedule_mangle_both
  478. .p2align 4
  479. L$schedule_mangle_dec:
  480. leaq L$k_dksd(%rip),%r11
  481. movdqa %xmm9,%xmm1
  482. pandn %xmm4,%xmm1
  483. psrld $4,%xmm1
  484. pand %xmm9,%xmm4
  485. movdqa 0(%r11),%xmm2
  486. .byte 102,15,56,0,212
  487. movdqa 16(%r11),%xmm3
  488. .byte 102,15,56,0,217
  489. pxor %xmm2,%xmm3
  490. .byte 102,15,56,0,221
  491. movdqa 32(%r11),%xmm2
  492. .byte 102,15,56,0,212
  493. pxor %xmm3,%xmm2
  494. movdqa 48(%r11),%xmm3
  495. .byte 102,15,56,0,217
  496. pxor %xmm2,%xmm3
  497. .byte 102,15,56,0,221
  498. movdqa 64(%r11),%xmm2
  499. .byte 102,15,56,0,212
  500. pxor %xmm3,%xmm2
  501. movdqa 80(%r11),%xmm3
  502. .byte 102,15,56,0,217
  503. pxor %xmm2,%xmm3
  504. .byte 102,15,56,0,221
  505. movdqa 96(%r11),%xmm2
  506. .byte 102,15,56,0,212
  507. pxor %xmm3,%xmm2
  508. movdqa 112(%r11),%xmm3
  509. .byte 102,15,56,0,217
  510. pxor %xmm2,%xmm3
  511. addq $-16,%rdx
  512. L$schedule_mangle_both:
  513. movdqa (%r8,%r10,1),%xmm1
  514. .byte 102,15,56,0,217
  515. addq $-16,%r8
  516. andq $0x30,%r8
  517. movdqu %xmm3,(%rdx)
  518. .byte 0xf3,0xc3
  519. .globl _vpaes_set_encrypt_key
  520. .private_extern _vpaes_set_encrypt_key
  521. .p2align 4
  522. _vpaes_set_encrypt_key:
  523. #ifdef BORINGSSL_DISPATCH_TEST
  524. movb $1,_BORINGSSL_function_hit+5(%rip)
  525. #endif
  526. movl %esi,%eax
  527. shrl $5,%eax
  528. addl $5,%eax
  529. movl %eax,240(%rdx)
  530. movl $0,%ecx
  531. movl $0x30,%r8d
  532. call _vpaes_schedule_core
  533. xorl %eax,%eax
  534. .byte 0xf3,0xc3
  535. .globl _vpaes_set_decrypt_key
  536. .private_extern _vpaes_set_decrypt_key
  537. .p2align 4
  538. _vpaes_set_decrypt_key:
  539. movl %esi,%eax
  540. shrl $5,%eax
  541. addl $5,%eax
  542. movl %eax,240(%rdx)
  543. shll $4,%eax
  544. leaq 16(%rdx,%rax,1),%rdx
  545. movl $1,%ecx
  546. movl %esi,%r8d
  547. shrl $1,%r8d
  548. andl $32,%r8d
  549. xorl $32,%r8d
  550. call _vpaes_schedule_core
  551. xorl %eax,%eax
  552. .byte 0xf3,0xc3
  553. .globl _vpaes_encrypt
  554. .private_extern _vpaes_encrypt
  555. .p2align 4
  556. _vpaes_encrypt:
  557. #ifdef BORINGSSL_DISPATCH_TEST
  558. movb $1,_BORINGSSL_function_hit+4(%rip)
  559. #endif
  560. movdqu (%rdi),%xmm0
  561. call _vpaes_preheat
  562. call _vpaes_encrypt_core
  563. movdqu %xmm0,(%rsi)
  564. .byte 0xf3,0xc3
  565. .globl _vpaes_decrypt
  566. .private_extern _vpaes_decrypt
  567. .p2align 4
  568. _vpaes_decrypt:
  569. movdqu (%rdi),%xmm0
  570. call _vpaes_preheat
  571. call _vpaes_decrypt_core
  572. movdqu %xmm0,(%rsi)
  573. .byte 0xf3,0xc3
  574. .globl _vpaes_cbc_encrypt
  575. .private_extern _vpaes_cbc_encrypt
  576. .p2align 4
  577. _vpaes_cbc_encrypt:
  578. xchgq %rcx,%rdx
  579. subq $16,%rcx
  580. jc L$cbc_abort
  581. movdqu (%r8),%xmm6
  582. subq %rdi,%rsi
  583. call _vpaes_preheat
  584. cmpl $0,%r9d
  585. je L$cbc_dec_loop
  586. jmp L$cbc_enc_loop
  587. .p2align 4
  588. L$cbc_enc_loop:
  589. movdqu (%rdi),%xmm0
  590. pxor %xmm6,%xmm0
  591. call _vpaes_encrypt_core
  592. movdqa %xmm0,%xmm6
  593. movdqu %xmm0,(%rsi,%rdi,1)
  594. leaq 16(%rdi),%rdi
  595. subq $16,%rcx
  596. jnc L$cbc_enc_loop
  597. jmp L$cbc_done
  598. .p2align 4
  599. L$cbc_dec_loop:
  600. movdqu (%rdi),%xmm0
  601. movdqa %xmm0,%xmm7
  602. call _vpaes_decrypt_core
  603. pxor %xmm6,%xmm0
  604. movdqa %xmm7,%xmm6
  605. movdqu %xmm0,(%rsi,%rdi,1)
  606. leaq 16(%rdi),%rdi
  607. subq $16,%rcx
  608. jnc L$cbc_dec_loop
  609. L$cbc_done:
  610. movdqu %xmm6,(%r8)
  611. L$cbc_abort:
  612. .byte 0xf3,0xc3
  613. .globl _vpaes_ctr32_encrypt_blocks
  614. .private_extern _vpaes_ctr32_encrypt_blocks
  615. .p2align 4
  616. _vpaes_ctr32_encrypt_blocks:
  617. xchgq %rcx,%rdx
  618. testq %rcx,%rcx
  619. jz L$ctr32_abort
  620. movdqu (%r8),%xmm0
  621. movdqa L$ctr_add_one(%rip),%xmm8
  622. subq %rdi,%rsi
  623. call _vpaes_preheat
  624. movdqa %xmm0,%xmm6
  625. pshufb L$rev_ctr(%rip),%xmm6
  626. testq $1,%rcx
  627. jz L$ctr32_prep_loop
  628. movdqu (%rdi),%xmm7
  629. call _vpaes_encrypt_core
  630. pxor %xmm7,%xmm0
  631. paddd %xmm8,%xmm6
  632. movdqu %xmm0,(%rsi,%rdi,1)
  633. subq $1,%rcx
  634. leaq 16(%rdi),%rdi
  635. jz L$ctr32_done
  636. L$ctr32_prep_loop:
  637. movdqa %xmm6,%xmm14
  638. movdqa %xmm6,%xmm15
  639. paddd %xmm8,%xmm15
  640. L$ctr32_loop:
  641. movdqa L$rev_ctr(%rip),%xmm1
  642. movdqa %xmm14,%xmm0
  643. movdqa %xmm15,%xmm6
  644. .byte 102,15,56,0,193
  645. .byte 102,15,56,0,241
  646. call _vpaes_encrypt_core_2x
  647. movdqu (%rdi),%xmm1
  648. movdqu 16(%rdi),%xmm2
  649. movdqa L$ctr_add_two(%rip),%xmm3
  650. pxor %xmm1,%xmm0
  651. pxor %xmm2,%xmm6
  652. paddd %xmm3,%xmm14
  653. paddd %xmm3,%xmm15
  654. movdqu %xmm0,(%rsi,%rdi,1)
  655. movdqu %xmm6,16(%rsi,%rdi,1)
  656. subq $2,%rcx
  657. leaq 32(%rdi),%rdi
  658. jnz L$ctr32_loop
  659. L$ctr32_done:
  660. L$ctr32_abort:
  661. .byte 0xf3,0xc3
  662. .p2align 4
  663. _vpaes_preheat:
  664. leaq L$k_s0F(%rip),%r10
  665. movdqa -32(%r10),%xmm10
  666. movdqa -16(%r10),%xmm11
  667. movdqa 0(%r10),%xmm9
  668. movdqa 48(%r10),%xmm13
  669. movdqa 64(%r10),%xmm12
  670. movdqa 80(%r10),%xmm15
  671. movdqa 96(%r10),%xmm14
  672. .byte 0xf3,0xc3
  673. .p2align 6
  674. _vpaes_consts:
  675. L$k_inv:
  676. .quad 0x0E05060F0D080180, 0x040703090A0B0C02
  677. .quad 0x01040A060F0B0780, 0x030D0E0C02050809
  678. L$k_s0F:
  679. .quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
  680. L$k_ipt:
  681. .quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
  682. .quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
  683. L$k_sb1:
  684. .quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
  685. .quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
  686. L$k_sb2:
  687. .quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
  688. .quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
  689. L$k_sbo:
  690. .quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
  691. .quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
  692. L$k_mc_forward:
  693. .quad 0x0407060500030201, 0x0C0F0E0D080B0A09
  694. .quad 0x080B0A0904070605, 0x000302010C0F0E0D
  695. .quad 0x0C0F0E0D080B0A09, 0x0407060500030201
  696. .quad 0x000302010C0F0E0D, 0x080B0A0904070605
  697. L$k_mc_backward:
  698. .quad 0x0605040702010003, 0x0E0D0C0F0A09080B
  699. .quad 0x020100030E0D0C0F, 0x0A09080B06050407
  700. .quad 0x0E0D0C0F0A09080B, 0x0605040702010003
  701. .quad 0x0A09080B06050407, 0x020100030E0D0C0F
  702. L$k_sr:
  703. .quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
  704. .quad 0x030E09040F0A0500, 0x0B06010C07020D08
  705. .quad 0x0F060D040B020900, 0x070E050C030A0108
  706. .quad 0x0B0E0104070A0D00, 0x0306090C0F020508
  707. L$k_rcon:
  708. .quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
  709. L$k_s63:
  710. .quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
  711. L$k_opt:
  712. .quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
  713. .quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
  714. L$k_deskew:
  715. .quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
  716. .quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
  717. L$k_dksd:
  718. .quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
  719. .quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
  720. L$k_dksb:
  721. .quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
  722. .quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
  723. L$k_dkse:
  724. .quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
  725. .quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
  726. L$k_dks9:
  727. .quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
  728. .quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
  729. L$k_dipt:
  730. .quad 0x0F505B040B545F00, 0x154A411E114E451A
  731. .quad 0x86E383E660056500, 0x12771772F491F194
  732. L$k_dsb9:
  733. .quad 0x851C03539A86D600, 0xCAD51F504F994CC9
  734. .quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
  735. L$k_dsbd:
  736. .quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
  737. .quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
  738. L$k_dsbb:
  739. .quad 0xD022649296B44200, 0x602646F6B0F2D404
  740. .quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
  741. L$k_dsbe:
  742. .quad 0x46F2929626D4D000, 0x2242600464B4F6B0
  743. .quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
  744. L$k_dsbo:
  745. .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
  746. .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
  747. L$rev_ctr:
  748. .quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
  749. L$ctr_add_one:
  750. .quad 0x0000000000000000, 0x0000000100000000
  751. L$ctr_add_two:
  752. .quad 0x0000000000000000, 0x0000000200000000
  753. .byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
  754. .p2align 6
  755. #endif