aesni-x86_64.S 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__has_feature)
  4. #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
  5. #define OPENSSL_NO_ASM
  6. #endif
  7. #endif
  8. #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
  9. #if defined(BORINGSSL_PREFIX)
  10. #include <boringssl_prefix_symbols_asm.h>
  11. #endif
  12. .text
  13. .extern OPENSSL_ia32cap_P
  14. .hidden OPENSSL_ia32cap_P
  15. .globl aes_hw_encrypt
  16. .hidden aes_hw_encrypt
  17. .type aes_hw_encrypt,@function
  18. .align 16
  19. aes_hw_encrypt:
  20. .cfi_startproc
  21. #ifdef BORINGSSL_DISPATCH_TEST
  22. .extern BORINGSSL_function_hit
  23. .hidden BORINGSSL_function_hit
  24. movb $1,BORINGSSL_function_hit+1(%rip)
  25. #endif
  26. movups (%rdi),%xmm2
  27. movl 240(%rdx),%eax
  28. movups (%rdx),%xmm0
  29. movups 16(%rdx),%xmm1
  30. leaq 32(%rdx),%rdx
  31. xorps %xmm0,%xmm2
  32. .Loop_enc1_1:
  33. .byte 102,15,56,220,209
  34. decl %eax
  35. movups (%rdx),%xmm1
  36. leaq 16(%rdx),%rdx
  37. jnz .Loop_enc1_1
  38. .byte 102,15,56,221,209
  39. pxor %xmm0,%xmm0
  40. pxor %xmm1,%xmm1
  41. movups %xmm2,(%rsi)
  42. pxor %xmm2,%xmm2
  43. .byte 0xf3,0xc3
  44. .cfi_endproc
  45. .size aes_hw_encrypt,.-aes_hw_encrypt
  46. .globl aes_hw_decrypt
  47. .hidden aes_hw_decrypt
  48. .type aes_hw_decrypt,@function
  49. .align 16
  50. aes_hw_decrypt:
  51. .cfi_startproc
  52. movups (%rdi),%xmm2
  53. movl 240(%rdx),%eax
  54. movups (%rdx),%xmm0
  55. movups 16(%rdx),%xmm1
  56. leaq 32(%rdx),%rdx
  57. xorps %xmm0,%xmm2
  58. .Loop_dec1_2:
  59. .byte 102,15,56,222,209
  60. decl %eax
  61. movups (%rdx),%xmm1
  62. leaq 16(%rdx),%rdx
  63. jnz .Loop_dec1_2
  64. .byte 102,15,56,223,209
  65. pxor %xmm0,%xmm0
  66. pxor %xmm1,%xmm1
  67. movups %xmm2,(%rsi)
  68. pxor %xmm2,%xmm2
  69. .byte 0xf3,0xc3
  70. .cfi_endproc
  71. .size aes_hw_decrypt, .-aes_hw_decrypt
  72. .type _aesni_encrypt2,@function
  73. .align 16
  74. _aesni_encrypt2:
  75. .cfi_startproc
  76. movups (%rcx),%xmm0
  77. shll $4,%eax
  78. movups 16(%rcx),%xmm1
  79. xorps %xmm0,%xmm2
  80. xorps %xmm0,%xmm3
  81. movups 32(%rcx),%xmm0
  82. leaq 32(%rcx,%rax,1),%rcx
  83. negq %rax
  84. addq $16,%rax
  85. .Lenc_loop2:
  86. .byte 102,15,56,220,209
  87. .byte 102,15,56,220,217
  88. movups (%rcx,%rax,1),%xmm1
  89. addq $32,%rax
  90. .byte 102,15,56,220,208
  91. .byte 102,15,56,220,216
  92. movups -16(%rcx,%rax,1),%xmm0
  93. jnz .Lenc_loop2
  94. .byte 102,15,56,220,209
  95. .byte 102,15,56,220,217
  96. .byte 102,15,56,221,208
  97. .byte 102,15,56,221,216
  98. .byte 0xf3,0xc3
  99. .cfi_endproc
  100. .size _aesni_encrypt2,.-_aesni_encrypt2
  101. .type _aesni_decrypt2,@function
  102. .align 16
  103. _aesni_decrypt2:
  104. .cfi_startproc
  105. movups (%rcx),%xmm0
  106. shll $4,%eax
  107. movups 16(%rcx),%xmm1
  108. xorps %xmm0,%xmm2
  109. xorps %xmm0,%xmm3
  110. movups 32(%rcx),%xmm0
  111. leaq 32(%rcx,%rax,1),%rcx
  112. negq %rax
  113. addq $16,%rax
  114. .Ldec_loop2:
  115. .byte 102,15,56,222,209
  116. .byte 102,15,56,222,217
  117. movups (%rcx,%rax,1),%xmm1
  118. addq $32,%rax
  119. .byte 102,15,56,222,208
  120. .byte 102,15,56,222,216
  121. movups -16(%rcx,%rax,1),%xmm0
  122. jnz .Ldec_loop2
  123. .byte 102,15,56,222,209
  124. .byte 102,15,56,222,217
  125. .byte 102,15,56,223,208
  126. .byte 102,15,56,223,216
  127. .byte 0xf3,0xc3
  128. .cfi_endproc
  129. .size _aesni_decrypt2,.-_aesni_decrypt2
  130. .type _aesni_encrypt3,@function
  131. .align 16
  132. _aesni_encrypt3:
  133. .cfi_startproc
  134. movups (%rcx),%xmm0
  135. shll $4,%eax
  136. movups 16(%rcx),%xmm1
  137. xorps %xmm0,%xmm2
  138. xorps %xmm0,%xmm3
  139. xorps %xmm0,%xmm4
  140. movups 32(%rcx),%xmm0
  141. leaq 32(%rcx,%rax,1),%rcx
  142. negq %rax
  143. addq $16,%rax
  144. .Lenc_loop3:
  145. .byte 102,15,56,220,209
  146. .byte 102,15,56,220,217
  147. .byte 102,15,56,220,225
  148. movups (%rcx,%rax,1),%xmm1
  149. addq $32,%rax
  150. .byte 102,15,56,220,208
  151. .byte 102,15,56,220,216
  152. .byte 102,15,56,220,224
  153. movups -16(%rcx,%rax,1),%xmm0
  154. jnz .Lenc_loop3
  155. .byte 102,15,56,220,209
  156. .byte 102,15,56,220,217
  157. .byte 102,15,56,220,225
  158. .byte 102,15,56,221,208
  159. .byte 102,15,56,221,216
  160. .byte 102,15,56,221,224
  161. .byte 0xf3,0xc3
  162. .cfi_endproc
  163. .size _aesni_encrypt3,.-_aesni_encrypt3
  164. .type _aesni_decrypt3,@function
  165. .align 16
  166. _aesni_decrypt3:
  167. .cfi_startproc
  168. movups (%rcx),%xmm0
  169. shll $4,%eax
  170. movups 16(%rcx),%xmm1
  171. xorps %xmm0,%xmm2
  172. xorps %xmm0,%xmm3
  173. xorps %xmm0,%xmm4
  174. movups 32(%rcx),%xmm0
  175. leaq 32(%rcx,%rax,1),%rcx
  176. negq %rax
  177. addq $16,%rax
  178. .Ldec_loop3:
  179. .byte 102,15,56,222,209
  180. .byte 102,15,56,222,217
  181. .byte 102,15,56,222,225
  182. movups (%rcx,%rax,1),%xmm1
  183. addq $32,%rax
  184. .byte 102,15,56,222,208
  185. .byte 102,15,56,222,216
  186. .byte 102,15,56,222,224
  187. movups -16(%rcx,%rax,1),%xmm0
  188. jnz .Ldec_loop3
  189. .byte 102,15,56,222,209
  190. .byte 102,15,56,222,217
  191. .byte 102,15,56,222,225
  192. .byte 102,15,56,223,208
  193. .byte 102,15,56,223,216
  194. .byte 102,15,56,223,224
  195. .byte 0xf3,0xc3
  196. .cfi_endproc
  197. .size _aesni_decrypt3,.-_aesni_decrypt3
  198. .type _aesni_encrypt4,@function
  199. .align 16
  200. _aesni_encrypt4:
  201. .cfi_startproc
  202. movups (%rcx),%xmm0
  203. shll $4,%eax
  204. movups 16(%rcx),%xmm1
  205. xorps %xmm0,%xmm2
  206. xorps %xmm0,%xmm3
  207. xorps %xmm0,%xmm4
  208. xorps %xmm0,%xmm5
  209. movups 32(%rcx),%xmm0
  210. leaq 32(%rcx,%rax,1),%rcx
  211. negq %rax
  212. .byte 0x0f,0x1f,0x00
  213. addq $16,%rax
  214. .Lenc_loop4:
  215. .byte 102,15,56,220,209
  216. .byte 102,15,56,220,217
  217. .byte 102,15,56,220,225
  218. .byte 102,15,56,220,233
  219. movups (%rcx,%rax,1),%xmm1
  220. addq $32,%rax
  221. .byte 102,15,56,220,208
  222. .byte 102,15,56,220,216
  223. .byte 102,15,56,220,224
  224. .byte 102,15,56,220,232
  225. movups -16(%rcx,%rax,1),%xmm0
  226. jnz .Lenc_loop4
  227. .byte 102,15,56,220,209
  228. .byte 102,15,56,220,217
  229. .byte 102,15,56,220,225
  230. .byte 102,15,56,220,233
  231. .byte 102,15,56,221,208
  232. .byte 102,15,56,221,216
  233. .byte 102,15,56,221,224
  234. .byte 102,15,56,221,232
  235. .byte 0xf3,0xc3
  236. .cfi_endproc
  237. .size _aesni_encrypt4,.-_aesni_encrypt4
  238. .type _aesni_decrypt4,@function
  239. .align 16
  240. _aesni_decrypt4:
  241. .cfi_startproc
  242. movups (%rcx),%xmm0
  243. shll $4,%eax
  244. movups 16(%rcx),%xmm1
  245. xorps %xmm0,%xmm2
  246. xorps %xmm0,%xmm3
  247. xorps %xmm0,%xmm4
  248. xorps %xmm0,%xmm5
  249. movups 32(%rcx),%xmm0
  250. leaq 32(%rcx,%rax,1),%rcx
  251. negq %rax
  252. .byte 0x0f,0x1f,0x00
  253. addq $16,%rax
  254. .Ldec_loop4:
  255. .byte 102,15,56,222,209
  256. .byte 102,15,56,222,217
  257. .byte 102,15,56,222,225
  258. .byte 102,15,56,222,233
  259. movups (%rcx,%rax,1),%xmm1
  260. addq $32,%rax
  261. .byte 102,15,56,222,208
  262. .byte 102,15,56,222,216
  263. .byte 102,15,56,222,224
  264. .byte 102,15,56,222,232
  265. movups -16(%rcx,%rax,1),%xmm0
  266. jnz .Ldec_loop4
  267. .byte 102,15,56,222,209
  268. .byte 102,15,56,222,217
  269. .byte 102,15,56,222,225
  270. .byte 102,15,56,222,233
  271. .byte 102,15,56,223,208
  272. .byte 102,15,56,223,216
  273. .byte 102,15,56,223,224
  274. .byte 102,15,56,223,232
  275. .byte 0xf3,0xc3
  276. .cfi_endproc
  277. .size _aesni_decrypt4,.-_aesni_decrypt4
  278. .type _aesni_encrypt6,@function
  279. .align 16
  280. _aesni_encrypt6:
  281. .cfi_startproc
  282. movups (%rcx),%xmm0
  283. shll $4,%eax
  284. movups 16(%rcx),%xmm1
  285. xorps %xmm0,%xmm2
  286. pxor %xmm0,%xmm3
  287. pxor %xmm0,%xmm4
  288. .byte 102,15,56,220,209
  289. leaq 32(%rcx,%rax,1),%rcx
  290. negq %rax
  291. .byte 102,15,56,220,217
  292. pxor %xmm0,%xmm5
  293. pxor %xmm0,%xmm6
  294. .byte 102,15,56,220,225
  295. pxor %xmm0,%xmm7
  296. movups (%rcx,%rax,1),%xmm0
  297. addq $16,%rax
  298. jmp .Lenc_loop6_enter
  299. .align 16
  300. .Lenc_loop6:
  301. .byte 102,15,56,220,209
  302. .byte 102,15,56,220,217
  303. .byte 102,15,56,220,225
  304. .Lenc_loop6_enter:
  305. .byte 102,15,56,220,233
  306. .byte 102,15,56,220,241
  307. .byte 102,15,56,220,249
  308. movups (%rcx,%rax,1),%xmm1
  309. addq $32,%rax
  310. .byte 102,15,56,220,208
  311. .byte 102,15,56,220,216
  312. .byte 102,15,56,220,224
  313. .byte 102,15,56,220,232
  314. .byte 102,15,56,220,240
  315. .byte 102,15,56,220,248
  316. movups -16(%rcx,%rax,1),%xmm0
  317. jnz .Lenc_loop6
  318. .byte 102,15,56,220,209
  319. .byte 102,15,56,220,217
  320. .byte 102,15,56,220,225
  321. .byte 102,15,56,220,233
  322. .byte 102,15,56,220,241
  323. .byte 102,15,56,220,249
  324. .byte 102,15,56,221,208
  325. .byte 102,15,56,221,216
  326. .byte 102,15,56,221,224
  327. .byte 102,15,56,221,232
  328. .byte 102,15,56,221,240
  329. .byte 102,15,56,221,248
  330. .byte 0xf3,0xc3
  331. .cfi_endproc
  332. .size _aesni_encrypt6,.-_aesni_encrypt6
  333. .type _aesni_decrypt6,@function
  334. .align 16
  335. _aesni_decrypt6:
  336. .cfi_startproc
  337. movups (%rcx),%xmm0
  338. shll $4,%eax
  339. movups 16(%rcx),%xmm1
  340. xorps %xmm0,%xmm2
  341. pxor %xmm0,%xmm3
  342. pxor %xmm0,%xmm4
  343. .byte 102,15,56,222,209
  344. leaq 32(%rcx,%rax,1),%rcx
  345. negq %rax
  346. .byte 102,15,56,222,217
  347. pxor %xmm0,%xmm5
  348. pxor %xmm0,%xmm6
  349. .byte 102,15,56,222,225
  350. pxor %xmm0,%xmm7
  351. movups (%rcx,%rax,1),%xmm0
  352. addq $16,%rax
  353. jmp .Ldec_loop6_enter
  354. .align 16
  355. .Ldec_loop6:
  356. .byte 102,15,56,222,209
  357. .byte 102,15,56,222,217
  358. .byte 102,15,56,222,225
  359. .Ldec_loop6_enter:
  360. .byte 102,15,56,222,233
  361. .byte 102,15,56,222,241
  362. .byte 102,15,56,222,249
  363. movups (%rcx,%rax,1),%xmm1
  364. addq $32,%rax
  365. .byte 102,15,56,222,208
  366. .byte 102,15,56,222,216
  367. .byte 102,15,56,222,224
  368. .byte 102,15,56,222,232
  369. .byte 102,15,56,222,240
  370. .byte 102,15,56,222,248
  371. movups -16(%rcx,%rax,1),%xmm0
  372. jnz .Ldec_loop6
  373. .byte 102,15,56,222,209
  374. .byte 102,15,56,222,217
  375. .byte 102,15,56,222,225
  376. .byte 102,15,56,222,233
  377. .byte 102,15,56,222,241
  378. .byte 102,15,56,222,249
  379. .byte 102,15,56,223,208
  380. .byte 102,15,56,223,216
  381. .byte 102,15,56,223,224
  382. .byte 102,15,56,223,232
  383. .byte 102,15,56,223,240
  384. .byte 102,15,56,223,248
  385. .byte 0xf3,0xc3
  386. .cfi_endproc
  387. .size _aesni_decrypt6,.-_aesni_decrypt6
  388. .type _aesni_encrypt8,@function
  389. .align 16
  390. _aesni_encrypt8:
  391. .cfi_startproc
  392. movups (%rcx),%xmm0
  393. shll $4,%eax
  394. movups 16(%rcx),%xmm1
  395. xorps %xmm0,%xmm2
  396. xorps %xmm0,%xmm3
  397. pxor %xmm0,%xmm4
  398. pxor %xmm0,%xmm5
  399. pxor %xmm0,%xmm6
  400. leaq 32(%rcx,%rax,1),%rcx
  401. negq %rax
  402. .byte 102,15,56,220,209
  403. pxor %xmm0,%xmm7
  404. pxor %xmm0,%xmm8
  405. .byte 102,15,56,220,217
  406. pxor %xmm0,%xmm9
  407. movups (%rcx,%rax,1),%xmm0
  408. addq $16,%rax
  409. jmp .Lenc_loop8_inner
  410. .align 16
  411. .Lenc_loop8:
  412. .byte 102,15,56,220,209
  413. .byte 102,15,56,220,217
  414. .Lenc_loop8_inner:
  415. .byte 102,15,56,220,225
  416. .byte 102,15,56,220,233
  417. .byte 102,15,56,220,241
  418. .byte 102,15,56,220,249
  419. .byte 102,68,15,56,220,193
  420. .byte 102,68,15,56,220,201
  421. .Lenc_loop8_enter:
  422. movups (%rcx,%rax,1),%xmm1
  423. addq $32,%rax
  424. .byte 102,15,56,220,208
  425. .byte 102,15,56,220,216
  426. .byte 102,15,56,220,224
  427. .byte 102,15,56,220,232
  428. .byte 102,15,56,220,240
  429. .byte 102,15,56,220,248
  430. .byte 102,68,15,56,220,192
  431. .byte 102,68,15,56,220,200
  432. movups -16(%rcx,%rax,1),%xmm0
  433. jnz .Lenc_loop8
  434. .byte 102,15,56,220,209
  435. .byte 102,15,56,220,217
  436. .byte 102,15,56,220,225
  437. .byte 102,15,56,220,233
  438. .byte 102,15,56,220,241
  439. .byte 102,15,56,220,249
  440. .byte 102,68,15,56,220,193
  441. .byte 102,68,15,56,220,201
  442. .byte 102,15,56,221,208
  443. .byte 102,15,56,221,216
  444. .byte 102,15,56,221,224
  445. .byte 102,15,56,221,232
  446. .byte 102,15,56,221,240
  447. .byte 102,15,56,221,248
  448. .byte 102,68,15,56,221,192
  449. .byte 102,68,15,56,221,200
  450. .byte 0xf3,0xc3
  451. .cfi_endproc
  452. .size _aesni_encrypt8,.-_aesni_encrypt8
  453. .type _aesni_decrypt8,@function
  454. .align 16
  455. _aesni_decrypt8:
  456. .cfi_startproc
  457. movups (%rcx),%xmm0
  458. shll $4,%eax
  459. movups 16(%rcx),%xmm1
  460. xorps %xmm0,%xmm2
  461. xorps %xmm0,%xmm3
  462. pxor %xmm0,%xmm4
  463. pxor %xmm0,%xmm5
  464. pxor %xmm0,%xmm6
  465. leaq 32(%rcx,%rax,1),%rcx
  466. negq %rax
  467. .byte 102,15,56,222,209
  468. pxor %xmm0,%xmm7
  469. pxor %xmm0,%xmm8
  470. .byte 102,15,56,222,217
  471. pxor %xmm0,%xmm9
  472. movups (%rcx,%rax,1),%xmm0
  473. addq $16,%rax
  474. jmp .Ldec_loop8_inner
  475. .align 16
  476. .Ldec_loop8:
  477. .byte 102,15,56,222,209
  478. .byte 102,15,56,222,217
  479. .Ldec_loop8_inner:
  480. .byte 102,15,56,222,225
  481. .byte 102,15,56,222,233
  482. .byte 102,15,56,222,241
  483. .byte 102,15,56,222,249
  484. .byte 102,68,15,56,222,193
  485. .byte 102,68,15,56,222,201
  486. .Ldec_loop8_enter:
  487. movups (%rcx,%rax,1),%xmm1
  488. addq $32,%rax
  489. .byte 102,15,56,222,208
  490. .byte 102,15,56,222,216
  491. .byte 102,15,56,222,224
  492. .byte 102,15,56,222,232
  493. .byte 102,15,56,222,240
  494. .byte 102,15,56,222,248
  495. .byte 102,68,15,56,222,192
  496. .byte 102,68,15,56,222,200
  497. movups -16(%rcx,%rax,1),%xmm0
  498. jnz .Ldec_loop8
  499. .byte 102,15,56,222,209
  500. .byte 102,15,56,222,217
  501. .byte 102,15,56,222,225
  502. .byte 102,15,56,222,233
  503. .byte 102,15,56,222,241
  504. .byte 102,15,56,222,249
  505. .byte 102,68,15,56,222,193
  506. .byte 102,68,15,56,222,201
  507. .byte 102,15,56,223,208
  508. .byte 102,15,56,223,216
  509. .byte 102,15,56,223,224
  510. .byte 102,15,56,223,232
  511. .byte 102,15,56,223,240
  512. .byte 102,15,56,223,248
  513. .byte 102,68,15,56,223,192
  514. .byte 102,68,15,56,223,200
  515. .byte 0xf3,0xc3
  516. .cfi_endproc
  517. .size _aesni_decrypt8,.-_aesni_decrypt8
  518. .globl aes_hw_ecb_encrypt
  519. .hidden aes_hw_ecb_encrypt
  520. .type aes_hw_ecb_encrypt,@function
  521. .align 16
  522. aes_hw_ecb_encrypt:
  523. .cfi_startproc
  524. andq $-16,%rdx
  525. jz .Lecb_ret
  526. movl 240(%rcx),%eax
  527. movups (%rcx),%xmm0
  528. movq %rcx,%r11
  529. movl %eax,%r10d
  530. testl %r8d,%r8d
  531. jz .Lecb_decrypt
  532. cmpq $0x80,%rdx
  533. jb .Lecb_enc_tail
  534. movdqu (%rdi),%xmm2
  535. movdqu 16(%rdi),%xmm3
  536. movdqu 32(%rdi),%xmm4
  537. movdqu 48(%rdi),%xmm5
  538. movdqu 64(%rdi),%xmm6
  539. movdqu 80(%rdi),%xmm7
  540. movdqu 96(%rdi),%xmm8
  541. movdqu 112(%rdi),%xmm9
  542. leaq 128(%rdi),%rdi
  543. subq $0x80,%rdx
  544. jmp .Lecb_enc_loop8_enter
  545. .align 16
  546. .Lecb_enc_loop8:
  547. movups %xmm2,(%rsi)
  548. movq %r11,%rcx
  549. movdqu (%rdi),%xmm2
  550. movl %r10d,%eax
  551. movups %xmm3,16(%rsi)
  552. movdqu 16(%rdi),%xmm3
  553. movups %xmm4,32(%rsi)
  554. movdqu 32(%rdi),%xmm4
  555. movups %xmm5,48(%rsi)
  556. movdqu 48(%rdi),%xmm5
  557. movups %xmm6,64(%rsi)
  558. movdqu 64(%rdi),%xmm6
  559. movups %xmm7,80(%rsi)
  560. movdqu 80(%rdi),%xmm7
  561. movups %xmm8,96(%rsi)
  562. movdqu 96(%rdi),%xmm8
  563. movups %xmm9,112(%rsi)
  564. leaq 128(%rsi),%rsi
  565. movdqu 112(%rdi),%xmm9
  566. leaq 128(%rdi),%rdi
  567. .Lecb_enc_loop8_enter:
  568. call _aesni_encrypt8
  569. subq $0x80,%rdx
  570. jnc .Lecb_enc_loop8
  571. movups %xmm2,(%rsi)
  572. movq %r11,%rcx
  573. movups %xmm3,16(%rsi)
  574. movl %r10d,%eax
  575. movups %xmm4,32(%rsi)
  576. movups %xmm5,48(%rsi)
  577. movups %xmm6,64(%rsi)
  578. movups %xmm7,80(%rsi)
  579. movups %xmm8,96(%rsi)
  580. movups %xmm9,112(%rsi)
  581. leaq 128(%rsi),%rsi
  582. addq $0x80,%rdx
  583. jz .Lecb_ret
  584. .Lecb_enc_tail:
  585. movups (%rdi),%xmm2
  586. cmpq $0x20,%rdx
  587. jb .Lecb_enc_one
  588. movups 16(%rdi),%xmm3
  589. je .Lecb_enc_two
  590. movups 32(%rdi),%xmm4
  591. cmpq $0x40,%rdx
  592. jb .Lecb_enc_three
  593. movups 48(%rdi),%xmm5
  594. je .Lecb_enc_four
  595. movups 64(%rdi),%xmm6
  596. cmpq $0x60,%rdx
  597. jb .Lecb_enc_five
  598. movups 80(%rdi),%xmm7
  599. je .Lecb_enc_six
  600. movdqu 96(%rdi),%xmm8
  601. xorps %xmm9,%xmm9
  602. call _aesni_encrypt8
  603. movups %xmm2,(%rsi)
  604. movups %xmm3,16(%rsi)
  605. movups %xmm4,32(%rsi)
  606. movups %xmm5,48(%rsi)
  607. movups %xmm6,64(%rsi)
  608. movups %xmm7,80(%rsi)
  609. movups %xmm8,96(%rsi)
  610. jmp .Lecb_ret
  611. .align 16
  612. .Lecb_enc_one:
  613. movups (%rcx),%xmm0
  614. movups 16(%rcx),%xmm1
  615. leaq 32(%rcx),%rcx
  616. xorps %xmm0,%xmm2
  617. .Loop_enc1_3:
  618. .byte 102,15,56,220,209
  619. decl %eax
  620. movups (%rcx),%xmm1
  621. leaq 16(%rcx),%rcx
  622. jnz .Loop_enc1_3
  623. .byte 102,15,56,221,209
  624. movups %xmm2,(%rsi)
  625. jmp .Lecb_ret
  626. .align 16
  627. .Lecb_enc_two:
  628. call _aesni_encrypt2
  629. movups %xmm2,(%rsi)
  630. movups %xmm3,16(%rsi)
  631. jmp .Lecb_ret
  632. .align 16
  633. .Lecb_enc_three:
  634. call _aesni_encrypt3
  635. movups %xmm2,(%rsi)
  636. movups %xmm3,16(%rsi)
  637. movups %xmm4,32(%rsi)
  638. jmp .Lecb_ret
  639. .align 16
  640. .Lecb_enc_four:
  641. call _aesni_encrypt4
  642. movups %xmm2,(%rsi)
  643. movups %xmm3,16(%rsi)
  644. movups %xmm4,32(%rsi)
  645. movups %xmm5,48(%rsi)
  646. jmp .Lecb_ret
  647. .align 16
  648. .Lecb_enc_five:
  649. xorps %xmm7,%xmm7
  650. call _aesni_encrypt6
  651. movups %xmm2,(%rsi)
  652. movups %xmm3,16(%rsi)
  653. movups %xmm4,32(%rsi)
  654. movups %xmm5,48(%rsi)
  655. movups %xmm6,64(%rsi)
  656. jmp .Lecb_ret
  657. .align 16
  658. .Lecb_enc_six:
  659. call _aesni_encrypt6
  660. movups %xmm2,(%rsi)
  661. movups %xmm3,16(%rsi)
  662. movups %xmm4,32(%rsi)
  663. movups %xmm5,48(%rsi)
  664. movups %xmm6,64(%rsi)
  665. movups %xmm7,80(%rsi)
  666. jmp .Lecb_ret
  667. .align 16
  668. .Lecb_decrypt:
  669. cmpq $0x80,%rdx
  670. jb .Lecb_dec_tail
  671. movdqu (%rdi),%xmm2
  672. movdqu 16(%rdi),%xmm3
  673. movdqu 32(%rdi),%xmm4
  674. movdqu 48(%rdi),%xmm5
  675. movdqu 64(%rdi),%xmm6
  676. movdqu 80(%rdi),%xmm7
  677. movdqu 96(%rdi),%xmm8
  678. movdqu 112(%rdi),%xmm9
  679. leaq 128(%rdi),%rdi
  680. subq $0x80,%rdx
  681. jmp .Lecb_dec_loop8_enter
  682. .align 16
  683. .Lecb_dec_loop8:
  684. movups %xmm2,(%rsi)
  685. movq %r11,%rcx
  686. movdqu (%rdi),%xmm2
  687. movl %r10d,%eax
  688. movups %xmm3,16(%rsi)
  689. movdqu 16(%rdi),%xmm3
  690. movups %xmm4,32(%rsi)
  691. movdqu 32(%rdi),%xmm4
  692. movups %xmm5,48(%rsi)
  693. movdqu 48(%rdi),%xmm5
  694. movups %xmm6,64(%rsi)
  695. movdqu 64(%rdi),%xmm6
  696. movups %xmm7,80(%rsi)
  697. movdqu 80(%rdi),%xmm7
  698. movups %xmm8,96(%rsi)
  699. movdqu 96(%rdi),%xmm8
  700. movups %xmm9,112(%rsi)
  701. leaq 128(%rsi),%rsi
  702. movdqu 112(%rdi),%xmm9
  703. leaq 128(%rdi),%rdi
  704. .Lecb_dec_loop8_enter:
  705. call _aesni_decrypt8
  706. movups (%r11),%xmm0
  707. subq $0x80,%rdx
  708. jnc .Lecb_dec_loop8
  709. movups %xmm2,(%rsi)
  710. pxor %xmm2,%xmm2
  711. movq %r11,%rcx
  712. movups %xmm3,16(%rsi)
  713. pxor %xmm3,%xmm3
  714. movl %r10d,%eax
  715. movups %xmm4,32(%rsi)
  716. pxor %xmm4,%xmm4
  717. movups %xmm5,48(%rsi)
  718. pxor %xmm5,%xmm5
  719. movups %xmm6,64(%rsi)
  720. pxor %xmm6,%xmm6
  721. movups %xmm7,80(%rsi)
  722. pxor %xmm7,%xmm7
  723. movups %xmm8,96(%rsi)
  724. pxor %xmm8,%xmm8
  725. movups %xmm9,112(%rsi)
  726. pxor %xmm9,%xmm9
  727. leaq 128(%rsi),%rsi
  728. addq $0x80,%rdx
  729. jz .Lecb_ret
  730. .Lecb_dec_tail:
  731. movups (%rdi),%xmm2
  732. cmpq $0x20,%rdx
  733. jb .Lecb_dec_one
  734. movups 16(%rdi),%xmm3
  735. je .Lecb_dec_two
  736. movups 32(%rdi),%xmm4
  737. cmpq $0x40,%rdx
  738. jb .Lecb_dec_three
  739. movups 48(%rdi),%xmm5
  740. je .Lecb_dec_four
  741. movups 64(%rdi),%xmm6
  742. cmpq $0x60,%rdx
  743. jb .Lecb_dec_five
  744. movups 80(%rdi),%xmm7
  745. je .Lecb_dec_six
  746. movups 96(%rdi),%xmm8
  747. movups (%rcx),%xmm0
  748. xorps %xmm9,%xmm9
  749. call _aesni_decrypt8
  750. movups %xmm2,(%rsi)
  751. pxor %xmm2,%xmm2
  752. movups %xmm3,16(%rsi)
  753. pxor %xmm3,%xmm3
  754. movups %xmm4,32(%rsi)
  755. pxor %xmm4,%xmm4
  756. movups %xmm5,48(%rsi)
  757. pxor %xmm5,%xmm5
  758. movups %xmm6,64(%rsi)
  759. pxor %xmm6,%xmm6
  760. movups %xmm7,80(%rsi)
  761. pxor %xmm7,%xmm7
  762. movups %xmm8,96(%rsi)
  763. pxor %xmm8,%xmm8
  764. pxor %xmm9,%xmm9
  765. jmp .Lecb_ret
  766. .align 16
  767. .Lecb_dec_one:
  768. movups (%rcx),%xmm0
  769. movups 16(%rcx),%xmm1
  770. leaq 32(%rcx),%rcx
  771. xorps %xmm0,%xmm2
  772. .Loop_dec1_4:
  773. .byte 102,15,56,222,209
  774. decl %eax
  775. movups (%rcx),%xmm1
  776. leaq 16(%rcx),%rcx
  777. jnz .Loop_dec1_4
  778. .byte 102,15,56,223,209
  779. movups %xmm2,(%rsi)
  780. pxor %xmm2,%xmm2
  781. jmp .Lecb_ret
  782. .align 16
  783. .Lecb_dec_two:
  784. call _aesni_decrypt2
  785. movups %xmm2,(%rsi)
  786. pxor %xmm2,%xmm2
  787. movups %xmm3,16(%rsi)
  788. pxor %xmm3,%xmm3
  789. jmp .Lecb_ret
  790. .align 16
  791. .Lecb_dec_three:
  792. call _aesni_decrypt3
  793. movups %xmm2,(%rsi)
  794. pxor %xmm2,%xmm2
  795. movups %xmm3,16(%rsi)
  796. pxor %xmm3,%xmm3
  797. movups %xmm4,32(%rsi)
  798. pxor %xmm4,%xmm4
  799. jmp .Lecb_ret
  800. .align 16
  801. .Lecb_dec_four:
  802. call _aesni_decrypt4
  803. movups %xmm2,(%rsi)
  804. pxor %xmm2,%xmm2
  805. movups %xmm3,16(%rsi)
  806. pxor %xmm3,%xmm3
  807. movups %xmm4,32(%rsi)
  808. pxor %xmm4,%xmm4
  809. movups %xmm5,48(%rsi)
  810. pxor %xmm5,%xmm5
  811. jmp .Lecb_ret
  812. .align 16
  813. .Lecb_dec_five:
  814. xorps %xmm7,%xmm7
  815. call _aesni_decrypt6
  816. movups %xmm2,(%rsi)
  817. pxor %xmm2,%xmm2
  818. movups %xmm3,16(%rsi)
  819. pxor %xmm3,%xmm3
  820. movups %xmm4,32(%rsi)
  821. pxor %xmm4,%xmm4
  822. movups %xmm5,48(%rsi)
  823. pxor %xmm5,%xmm5
  824. movups %xmm6,64(%rsi)
  825. pxor %xmm6,%xmm6
  826. pxor %xmm7,%xmm7
  827. jmp .Lecb_ret
  828. .align 16
  829. .Lecb_dec_six:
  830. call _aesni_decrypt6
  831. movups %xmm2,(%rsi)
  832. pxor %xmm2,%xmm2
  833. movups %xmm3,16(%rsi)
  834. pxor %xmm3,%xmm3
  835. movups %xmm4,32(%rsi)
  836. pxor %xmm4,%xmm4
  837. movups %xmm5,48(%rsi)
  838. pxor %xmm5,%xmm5
  839. movups %xmm6,64(%rsi)
  840. pxor %xmm6,%xmm6
  841. movups %xmm7,80(%rsi)
  842. pxor %xmm7,%xmm7
  843. .Lecb_ret:
  844. xorps %xmm0,%xmm0
  845. pxor %xmm1,%xmm1
  846. .byte 0xf3,0xc3
  847. .cfi_endproc
  848. .size aes_hw_ecb_encrypt,.-aes_hw_ecb_encrypt
  849. .globl aes_hw_ctr32_encrypt_blocks
  850. .hidden aes_hw_ctr32_encrypt_blocks
  851. .type aes_hw_ctr32_encrypt_blocks,@function
  852. .align 16
  853. aes_hw_ctr32_encrypt_blocks:
  854. .cfi_startproc
  855. #ifdef BORINGSSL_DISPATCH_TEST
  856. movb $1,BORINGSSL_function_hit(%rip)
  857. #endif
  858. cmpq $1,%rdx
  859. jne .Lctr32_bulk
  860. movups (%r8),%xmm2
  861. movups (%rdi),%xmm3
  862. movl 240(%rcx),%edx
  863. movups (%rcx),%xmm0
  864. movups 16(%rcx),%xmm1
  865. leaq 32(%rcx),%rcx
  866. xorps %xmm0,%xmm2
  867. .Loop_enc1_5:
  868. .byte 102,15,56,220,209
  869. decl %edx
  870. movups (%rcx),%xmm1
  871. leaq 16(%rcx),%rcx
  872. jnz .Loop_enc1_5
  873. .byte 102,15,56,221,209
  874. pxor %xmm0,%xmm0
  875. pxor %xmm1,%xmm1
  876. xorps %xmm3,%xmm2
  877. pxor %xmm3,%xmm3
  878. movups %xmm2,(%rsi)
  879. xorps %xmm2,%xmm2
  880. jmp .Lctr32_epilogue
  881. .align 16
  882. .Lctr32_bulk:
  883. leaq (%rsp),%r11
  884. .cfi_def_cfa_register %r11
  885. pushq %rbp
  886. .cfi_offset %rbp,-16
  887. subq $128,%rsp
  888. andq $-16,%rsp
  889. movdqu (%r8),%xmm2
  890. movdqu (%rcx),%xmm0
  891. movl 12(%r8),%r8d
  892. pxor %xmm0,%xmm2
  893. movl 12(%rcx),%ebp
  894. movdqa %xmm2,0(%rsp)
  895. bswapl %r8d
  896. movdqa %xmm2,%xmm3
  897. movdqa %xmm2,%xmm4
  898. movdqa %xmm2,%xmm5
  899. movdqa %xmm2,64(%rsp)
  900. movdqa %xmm2,80(%rsp)
  901. movdqa %xmm2,96(%rsp)
  902. movq %rdx,%r10
  903. movdqa %xmm2,112(%rsp)
  904. leaq 1(%r8),%rax
  905. leaq 2(%r8),%rdx
  906. bswapl %eax
  907. bswapl %edx
  908. xorl %ebp,%eax
  909. xorl %ebp,%edx
  910. .byte 102,15,58,34,216,3
  911. leaq 3(%r8),%rax
  912. movdqa %xmm3,16(%rsp)
  913. .byte 102,15,58,34,226,3
  914. bswapl %eax
  915. movq %r10,%rdx
  916. leaq 4(%r8),%r10
  917. movdqa %xmm4,32(%rsp)
  918. xorl %ebp,%eax
  919. bswapl %r10d
  920. .byte 102,15,58,34,232,3
  921. xorl %ebp,%r10d
  922. movdqa %xmm5,48(%rsp)
  923. leaq 5(%r8),%r9
  924. movl %r10d,64+12(%rsp)
  925. bswapl %r9d
  926. leaq 6(%r8),%r10
  927. movl 240(%rcx),%eax
  928. xorl %ebp,%r9d
  929. bswapl %r10d
  930. movl %r9d,80+12(%rsp)
  931. xorl %ebp,%r10d
  932. leaq 7(%r8),%r9
  933. movl %r10d,96+12(%rsp)
  934. bswapl %r9d
  935. leaq OPENSSL_ia32cap_P(%rip),%r10
  936. movl 4(%r10),%r10d
  937. xorl %ebp,%r9d
  938. andl $71303168,%r10d
  939. movl %r9d,112+12(%rsp)
  940. movups 16(%rcx),%xmm1
  941. movdqa 64(%rsp),%xmm6
  942. movdqa 80(%rsp),%xmm7
  943. cmpq $8,%rdx
  944. jb .Lctr32_tail
  945. subq $6,%rdx
  946. cmpl $4194304,%r10d
  947. je .Lctr32_6x
  948. leaq 128(%rcx),%rcx
  949. subq $2,%rdx
  950. jmp .Lctr32_loop8
  951. .align 16
  952. .Lctr32_6x:
  953. shll $4,%eax
  954. movl $48,%r10d
  955. bswapl %ebp
  956. leaq 32(%rcx,%rax,1),%rcx
  957. subq %rax,%r10
  958. jmp .Lctr32_loop6
  959. .align 16
  960. .Lctr32_loop6:
  961. addl $6,%r8d
  962. movups -48(%rcx,%r10,1),%xmm0
  963. .byte 102,15,56,220,209
  964. movl %r8d,%eax
  965. xorl %ebp,%eax
  966. .byte 102,15,56,220,217
  967. .byte 0x0f,0x38,0xf1,0x44,0x24,12
  968. leal 1(%r8),%eax
  969. .byte 102,15,56,220,225
  970. xorl %ebp,%eax
  971. .byte 0x0f,0x38,0xf1,0x44,0x24,28
  972. .byte 102,15,56,220,233
  973. leal 2(%r8),%eax
  974. xorl %ebp,%eax
  975. .byte 102,15,56,220,241
  976. .byte 0x0f,0x38,0xf1,0x44,0x24,44
  977. leal 3(%r8),%eax
  978. .byte 102,15,56,220,249
  979. movups -32(%rcx,%r10,1),%xmm1
  980. xorl %ebp,%eax
  981. .byte 102,15,56,220,208
  982. .byte 0x0f,0x38,0xf1,0x44,0x24,60
  983. leal 4(%r8),%eax
  984. .byte 102,15,56,220,216
  985. xorl %ebp,%eax
  986. .byte 0x0f,0x38,0xf1,0x44,0x24,76
  987. .byte 102,15,56,220,224
  988. leal 5(%r8),%eax
  989. xorl %ebp,%eax
  990. .byte 102,15,56,220,232
  991. .byte 0x0f,0x38,0xf1,0x44,0x24,92
  992. movq %r10,%rax
  993. .byte 102,15,56,220,240
  994. .byte 102,15,56,220,248
  995. movups -16(%rcx,%r10,1),%xmm0
  996. call .Lenc_loop6
  997. movdqu (%rdi),%xmm8
  998. movdqu 16(%rdi),%xmm9
  999. movdqu 32(%rdi),%xmm10
  1000. movdqu 48(%rdi),%xmm11
  1001. movdqu 64(%rdi),%xmm12
  1002. movdqu 80(%rdi),%xmm13
  1003. leaq 96(%rdi),%rdi
  1004. movups -64(%rcx,%r10,1),%xmm1
  1005. pxor %xmm2,%xmm8
  1006. movaps 0(%rsp),%xmm2
  1007. pxor %xmm3,%xmm9
  1008. movaps 16(%rsp),%xmm3
  1009. pxor %xmm4,%xmm10
  1010. movaps 32(%rsp),%xmm4
  1011. pxor %xmm5,%xmm11
  1012. movaps 48(%rsp),%xmm5
  1013. pxor %xmm6,%xmm12
  1014. movaps 64(%rsp),%xmm6
  1015. pxor %xmm7,%xmm13
  1016. movaps 80(%rsp),%xmm7
  1017. movdqu %xmm8,(%rsi)
  1018. movdqu %xmm9,16(%rsi)
  1019. movdqu %xmm10,32(%rsi)
  1020. movdqu %xmm11,48(%rsi)
  1021. movdqu %xmm12,64(%rsi)
  1022. movdqu %xmm13,80(%rsi)
  1023. leaq 96(%rsi),%rsi
  1024. subq $6,%rdx
  1025. jnc .Lctr32_loop6
  1026. addq $6,%rdx
  1027. jz .Lctr32_done
  1028. leal -48(%r10),%eax
  1029. leaq -80(%rcx,%r10,1),%rcx
  1030. negl %eax
  1031. shrl $4,%eax
  1032. jmp .Lctr32_tail
  1033. .align 32
  1034. .Lctr32_loop8:
  1035. addl $8,%r8d
  1036. movdqa 96(%rsp),%xmm8
  1037. .byte 102,15,56,220,209
  1038. movl %r8d,%r9d
  1039. movdqa 112(%rsp),%xmm9
  1040. .byte 102,15,56,220,217
  1041. bswapl %r9d
  1042. movups 32-128(%rcx),%xmm0
  1043. .byte 102,15,56,220,225
  1044. xorl %ebp,%r9d
  1045. nop
  1046. .byte 102,15,56,220,233
  1047. movl %r9d,0+12(%rsp)
  1048. leaq 1(%r8),%r9
  1049. .byte 102,15,56,220,241
  1050. .byte 102,15,56,220,249
  1051. .byte 102,68,15,56,220,193
  1052. .byte 102,68,15,56,220,201
  1053. movups 48-128(%rcx),%xmm1
  1054. bswapl %r9d
  1055. .byte 102,15,56,220,208
  1056. .byte 102,15,56,220,216
  1057. xorl %ebp,%r9d
  1058. .byte 0x66,0x90
  1059. .byte 102,15,56,220,224
  1060. .byte 102,15,56,220,232
  1061. movl %r9d,16+12(%rsp)
  1062. leaq 2(%r8),%r9
  1063. .byte 102,15,56,220,240
  1064. .byte 102,15,56,220,248
  1065. .byte 102,68,15,56,220,192
  1066. .byte 102,68,15,56,220,200
  1067. movups 64-128(%rcx),%xmm0
  1068. bswapl %r9d
  1069. .byte 102,15,56,220,209
  1070. .byte 102,15,56,220,217
  1071. xorl %ebp,%r9d
  1072. .byte 0x66,0x90
  1073. .byte 102,15,56,220,225
  1074. .byte 102,15,56,220,233
  1075. movl %r9d,32+12(%rsp)
  1076. leaq 3(%r8),%r9
  1077. .byte 102,15,56,220,241
  1078. .byte 102,15,56,220,249
  1079. .byte 102,68,15,56,220,193
  1080. .byte 102,68,15,56,220,201
  1081. movups 80-128(%rcx),%xmm1
  1082. bswapl %r9d
  1083. .byte 102,15,56,220,208
  1084. .byte 102,15,56,220,216
  1085. xorl %ebp,%r9d
  1086. .byte 0x66,0x90
  1087. .byte 102,15,56,220,224
  1088. .byte 102,15,56,220,232
  1089. movl %r9d,48+12(%rsp)
  1090. leaq 4(%r8),%r9
  1091. .byte 102,15,56,220,240
  1092. .byte 102,15,56,220,248
  1093. .byte 102,68,15,56,220,192
  1094. .byte 102,68,15,56,220,200
  1095. movups 96-128(%rcx),%xmm0
  1096. bswapl %r9d
  1097. .byte 102,15,56,220,209
  1098. .byte 102,15,56,220,217
  1099. xorl %ebp,%r9d
  1100. .byte 0x66,0x90
  1101. .byte 102,15,56,220,225
  1102. .byte 102,15,56,220,233
  1103. movl %r9d,64+12(%rsp)
  1104. leaq 5(%r8),%r9
  1105. .byte 102,15,56,220,241
  1106. .byte 102,15,56,220,249
  1107. .byte 102,68,15,56,220,193
  1108. .byte 102,68,15,56,220,201
  1109. movups 112-128(%rcx),%xmm1
  1110. bswapl %r9d
  1111. .byte 102,15,56,220,208
  1112. .byte 102,15,56,220,216
  1113. xorl %ebp,%r9d
  1114. .byte 0x66,0x90
  1115. .byte 102,15,56,220,224
  1116. .byte 102,15,56,220,232
  1117. movl %r9d,80+12(%rsp)
  1118. leaq 6(%r8),%r9
  1119. .byte 102,15,56,220,240
  1120. .byte 102,15,56,220,248
  1121. .byte 102,68,15,56,220,192
  1122. .byte 102,68,15,56,220,200
  1123. movups 128-128(%rcx),%xmm0
  1124. bswapl %r9d
  1125. .byte 102,15,56,220,209
  1126. .byte 102,15,56,220,217
  1127. xorl %ebp,%r9d
  1128. .byte 0x66,0x90
  1129. .byte 102,15,56,220,225
  1130. .byte 102,15,56,220,233
  1131. movl %r9d,96+12(%rsp)
  1132. leaq 7(%r8),%r9
  1133. .byte 102,15,56,220,241
  1134. .byte 102,15,56,220,249
  1135. .byte 102,68,15,56,220,193
  1136. .byte 102,68,15,56,220,201
  1137. movups 144-128(%rcx),%xmm1
  1138. bswapl %r9d
  1139. .byte 102,15,56,220,208
  1140. .byte 102,15,56,220,216
  1141. .byte 102,15,56,220,224
  1142. xorl %ebp,%r9d
  1143. movdqu 0(%rdi),%xmm10
  1144. .byte 102,15,56,220,232
  1145. movl %r9d,112+12(%rsp)
  1146. cmpl $11,%eax
  1147. .byte 102,15,56,220,240
  1148. .byte 102,15,56,220,248
  1149. .byte 102,68,15,56,220,192
  1150. .byte 102,68,15,56,220,200
  1151. movups 160-128(%rcx),%xmm0
  1152. jb .Lctr32_enc_done
  1153. .byte 102,15,56,220,209
  1154. .byte 102,15,56,220,217
  1155. .byte 102,15,56,220,225
  1156. .byte 102,15,56,220,233
  1157. .byte 102,15,56,220,241
  1158. .byte 102,15,56,220,249
  1159. .byte 102,68,15,56,220,193
  1160. .byte 102,68,15,56,220,201
  1161. movups 176-128(%rcx),%xmm1
  1162. .byte 102,15,56,220,208
  1163. .byte 102,15,56,220,216
  1164. .byte 102,15,56,220,224
  1165. .byte 102,15,56,220,232
  1166. .byte 102,15,56,220,240
  1167. .byte 102,15,56,220,248
  1168. .byte 102,68,15,56,220,192
  1169. .byte 102,68,15,56,220,200
  1170. movups 192-128(%rcx),%xmm0
  1171. je .Lctr32_enc_done
  1172. .byte 102,15,56,220,209
  1173. .byte 102,15,56,220,217
  1174. .byte 102,15,56,220,225
  1175. .byte 102,15,56,220,233
  1176. .byte 102,15,56,220,241
  1177. .byte 102,15,56,220,249
  1178. .byte 102,68,15,56,220,193
  1179. .byte 102,68,15,56,220,201
  1180. movups 208-128(%rcx),%xmm1
  1181. .byte 102,15,56,220,208
  1182. .byte 102,15,56,220,216
  1183. .byte 102,15,56,220,224
  1184. .byte 102,15,56,220,232
  1185. .byte 102,15,56,220,240
  1186. .byte 102,15,56,220,248
  1187. .byte 102,68,15,56,220,192
  1188. .byte 102,68,15,56,220,200
  1189. movups 224-128(%rcx),%xmm0
  1190. jmp .Lctr32_enc_done
  1191. .align 16
  1192. .Lctr32_enc_done:
  1193. movdqu 16(%rdi),%xmm11
  1194. pxor %xmm0,%xmm10
  1195. movdqu 32(%rdi),%xmm12
  1196. pxor %xmm0,%xmm11
  1197. movdqu 48(%rdi),%xmm13
  1198. pxor %xmm0,%xmm12
  1199. movdqu 64(%rdi),%xmm14
  1200. pxor %xmm0,%xmm13
  1201. movdqu 80(%rdi),%xmm15
  1202. pxor %xmm0,%xmm14
  1203. pxor %xmm0,%xmm15
  1204. .byte 102,15,56,220,209
  1205. .byte 102,15,56,220,217
  1206. .byte 102,15,56,220,225
  1207. .byte 102,15,56,220,233
  1208. .byte 102,15,56,220,241
  1209. .byte 102,15,56,220,249
  1210. .byte 102,68,15,56,220,193
  1211. .byte 102,68,15,56,220,201
  1212. movdqu 96(%rdi),%xmm1
  1213. leaq 128(%rdi),%rdi
  1214. .byte 102,65,15,56,221,210
  1215. pxor %xmm0,%xmm1
  1216. movdqu 112-128(%rdi),%xmm10
  1217. .byte 102,65,15,56,221,219
  1218. pxor %xmm0,%xmm10
  1219. movdqa 0(%rsp),%xmm11
  1220. .byte 102,65,15,56,221,228
  1221. .byte 102,65,15,56,221,237
  1222. movdqa 16(%rsp),%xmm12
  1223. movdqa 32(%rsp),%xmm13
  1224. .byte 102,65,15,56,221,246
  1225. .byte 102,65,15,56,221,255
  1226. movdqa 48(%rsp),%xmm14
  1227. movdqa 64(%rsp),%xmm15
  1228. .byte 102,68,15,56,221,193
  1229. movdqa 80(%rsp),%xmm0
  1230. movups 16-128(%rcx),%xmm1
  1231. .byte 102,69,15,56,221,202
  1232. movups %xmm2,(%rsi)
  1233. movdqa %xmm11,%xmm2
  1234. movups %xmm3,16(%rsi)
  1235. movdqa %xmm12,%xmm3
  1236. movups %xmm4,32(%rsi)
  1237. movdqa %xmm13,%xmm4
  1238. movups %xmm5,48(%rsi)
  1239. movdqa %xmm14,%xmm5
  1240. movups %xmm6,64(%rsi)
  1241. movdqa %xmm15,%xmm6
  1242. movups %xmm7,80(%rsi)
  1243. movdqa %xmm0,%xmm7
  1244. movups %xmm8,96(%rsi)
  1245. movups %xmm9,112(%rsi)
  1246. leaq 128(%rsi),%rsi
  1247. subq $8,%rdx
  1248. jnc .Lctr32_loop8
  1249. addq $8,%rdx
  1250. jz .Lctr32_done
  1251. leaq -128(%rcx),%rcx
  1252. .Lctr32_tail:
  1253. leaq 16(%rcx),%rcx
  1254. cmpq $4,%rdx
  1255. jb .Lctr32_loop3
  1256. je .Lctr32_loop4
  1257. shll $4,%eax
  1258. movdqa 96(%rsp),%xmm8
  1259. pxor %xmm9,%xmm9
  1260. movups 16(%rcx),%xmm0
  1261. .byte 102,15,56,220,209
  1262. .byte 102,15,56,220,217
  1263. leaq 32-16(%rcx,%rax,1),%rcx
  1264. negq %rax
  1265. .byte 102,15,56,220,225
  1266. addq $16,%rax
  1267. movups (%rdi),%xmm10
  1268. .byte 102,15,56,220,233
  1269. .byte 102,15,56,220,241
  1270. movups 16(%rdi),%xmm11
  1271. movups 32(%rdi),%xmm12
  1272. .byte 102,15,56,220,249
  1273. .byte 102,68,15,56,220,193
  1274. call .Lenc_loop8_enter
  1275. movdqu 48(%rdi),%xmm13
  1276. pxor %xmm10,%xmm2
  1277. movdqu 64(%rdi),%xmm10
  1278. pxor %xmm11,%xmm3
  1279. movdqu %xmm2,(%rsi)
  1280. pxor %xmm12,%xmm4
  1281. movdqu %xmm3,16(%rsi)
  1282. pxor %xmm13,%xmm5
  1283. movdqu %xmm4,32(%rsi)
  1284. pxor %xmm10,%xmm6
  1285. movdqu %xmm5,48(%rsi)
  1286. movdqu %xmm6,64(%rsi)
  1287. cmpq $6,%rdx
  1288. jb .Lctr32_done
  1289. movups 80(%rdi),%xmm11
  1290. xorps %xmm11,%xmm7
  1291. movups %xmm7,80(%rsi)
  1292. je .Lctr32_done
  1293. movups 96(%rdi),%xmm12
  1294. xorps %xmm12,%xmm8
  1295. movups %xmm8,96(%rsi)
  1296. jmp .Lctr32_done
  1297. .align 32
  1298. .Lctr32_loop4:
  1299. .byte 102,15,56,220,209
  1300. leaq 16(%rcx),%rcx
  1301. decl %eax
  1302. .byte 102,15,56,220,217
  1303. .byte 102,15,56,220,225
  1304. .byte 102,15,56,220,233
  1305. movups (%rcx),%xmm1
  1306. jnz .Lctr32_loop4
  1307. .byte 102,15,56,221,209
  1308. .byte 102,15,56,221,217
  1309. movups (%rdi),%xmm10
  1310. movups 16(%rdi),%xmm11
  1311. .byte 102,15,56,221,225
  1312. .byte 102,15,56,221,233
  1313. movups 32(%rdi),%xmm12
  1314. movups 48(%rdi),%xmm13
  1315. xorps %xmm10,%xmm2
  1316. movups %xmm2,(%rsi)
  1317. xorps %xmm11,%xmm3
  1318. movups %xmm3,16(%rsi)
  1319. pxor %xmm12,%xmm4
  1320. movdqu %xmm4,32(%rsi)
  1321. pxor %xmm13,%xmm5
  1322. movdqu %xmm5,48(%rsi)
  1323. jmp .Lctr32_done
  1324. .align 32
  1325. .Lctr32_loop3:
  1326. .byte 102,15,56,220,209
  1327. leaq 16(%rcx),%rcx
  1328. decl %eax
  1329. .byte 102,15,56,220,217
  1330. .byte 102,15,56,220,225
  1331. movups (%rcx),%xmm1
  1332. jnz .Lctr32_loop3
  1333. .byte 102,15,56,221,209
  1334. .byte 102,15,56,221,217
  1335. .byte 102,15,56,221,225
  1336. movups (%rdi),%xmm10
  1337. xorps %xmm10,%xmm2
  1338. movups %xmm2,(%rsi)
  1339. cmpq $2,%rdx
  1340. jb .Lctr32_done
  1341. movups 16(%rdi),%xmm11
  1342. xorps %xmm11,%xmm3
  1343. movups %xmm3,16(%rsi)
  1344. je .Lctr32_done
  1345. movups 32(%rdi),%xmm12
  1346. xorps %xmm12,%xmm4
  1347. movups %xmm4,32(%rsi)
  1348. .Lctr32_done:
  1349. xorps %xmm0,%xmm0
  1350. xorl %ebp,%ebp
  1351. pxor %xmm1,%xmm1
  1352. pxor %xmm2,%xmm2
  1353. pxor %xmm3,%xmm3
  1354. pxor %xmm4,%xmm4
  1355. pxor %xmm5,%xmm5
  1356. pxor %xmm6,%xmm6
  1357. pxor %xmm7,%xmm7
  1358. movaps %xmm0,0(%rsp)
  1359. pxor %xmm8,%xmm8
  1360. movaps %xmm0,16(%rsp)
  1361. pxor %xmm9,%xmm9
  1362. movaps %xmm0,32(%rsp)
  1363. pxor %xmm10,%xmm10
  1364. movaps %xmm0,48(%rsp)
  1365. pxor %xmm11,%xmm11
  1366. movaps %xmm0,64(%rsp)
  1367. pxor %xmm12,%xmm12
  1368. movaps %xmm0,80(%rsp)
  1369. pxor %xmm13,%xmm13
  1370. movaps %xmm0,96(%rsp)
  1371. pxor %xmm14,%xmm14
  1372. movaps %xmm0,112(%rsp)
  1373. pxor %xmm15,%xmm15
  1374. movq -8(%r11),%rbp
  1375. .cfi_restore %rbp
  1376. leaq (%r11),%rsp
  1377. .cfi_def_cfa_register %rsp
  1378. .Lctr32_epilogue:
  1379. .byte 0xf3,0xc3
  1380. .cfi_endproc
  1381. .size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
  1382. .globl aes_hw_cbc_encrypt
  1383. .hidden aes_hw_cbc_encrypt
  1384. .type aes_hw_cbc_encrypt,@function
  1385. .align 16
  1386. aes_hw_cbc_encrypt:
  1387. .cfi_startproc
  1388. testq %rdx,%rdx
  1389. jz .Lcbc_ret
  1390. movl 240(%rcx),%r10d
  1391. movq %rcx,%r11
  1392. testl %r9d,%r9d
  1393. jz .Lcbc_decrypt
  1394. movups (%r8),%xmm2
  1395. movl %r10d,%eax
  1396. cmpq $16,%rdx
  1397. jb .Lcbc_enc_tail
  1398. subq $16,%rdx
  1399. jmp .Lcbc_enc_loop
  1400. .align 16
  1401. .Lcbc_enc_loop:
  1402. movups (%rdi),%xmm3
  1403. leaq 16(%rdi),%rdi
  1404. movups (%rcx),%xmm0
  1405. movups 16(%rcx),%xmm1
  1406. xorps %xmm0,%xmm3
  1407. leaq 32(%rcx),%rcx
  1408. xorps %xmm3,%xmm2
  1409. .Loop_enc1_6:
  1410. .byte 102,15,56,220,209
  1411. decl %eax
  1412. movups (%rcx),%xmm1
  1413. leaq 16(%rcx),%rcx
  1414. jnz .Loop_enc1_6
  1415. .byte 102,15,56,221,209
  1416. movl %r10d,%eax
  1417. movq %r11,%rcx
  1418. movups %xmm2,0(%rsi)
  1419. leaq 16(%rsi),%rsi
  1420. subq $16,%rdx
  1421. jnc .Lcbc_enc_loop
  1422. addq $16,%rdx
  1423. jnz .Lcbc_enc_tail
  1424. pxor %xmm0,%xmm0
  1425. pxor %xmm1,%xmm1
  1426. movups %xmm2,(%r8)
  1427. pxor %xmm2,%xmm2
  1428. pxor %xmm3,%xmm3
  1429. jmp .Lcbc_ret
  1430. .Lcbc_enc_tail:
  1431. movq %rdx,%rcx
  1432. xchgq %rdi,%rsi
  1433. .long 0x9066A4F3
  1434. movl $16,%ecx
  1435. subq %rdx,%rcx
  1436. xorl %eax,%eax
  1437. .long 0x9066AAF3
  1438. leaq -16(%rdi),%rdi
  1439. movl %r10d,%eax
  1440. movq %rdi,%rsi
  1441. movq %r11,%rcx
  1442. xorq %rdx,%rdx
  1443. jmp .Lcbc_enc_loop
  1444. .align 16
  1445. .Lcbc_decrypt:
  1446. cmpq $16,%rdx
  1447. jne .Lcbc_decrypt_bulk
  1448. movdqu (%rdi),%xmm2
  1449. movdqu (%r8),%xmm3
  1450. movdqa %xmm2,%xmm4
  1451. movups (%rcx),%xmm0
  1452. movups 16(%rcx),%xmm1
  1453. leaq 32(%rcx),%rcx
  1454. xorps %xmm0,%xmm2
  1455. .Loop_dec1_7:
  1456. .byte 102,15,56,222,209
  1457. decl %r10d
  1458. movups (%rcx),%xmm1
  1459. leaq 16(%rcx),%rcx
  1460. jnz .Loop_dec1_7
  1461. .byte 102,15,56,223,209
  1462. pxor %xmm0,%xmm0
  1463. pxor %xmm1,%xmm1
  1464. movdqu %xmm4,(%r8)
  1465. xorps %xmm3,%xmm2
  1466. pxor %xmm3,%xmm3
  1467. movups %xmm2,(%rsi)
  1468. pxor %xmm2,%xmm2
  1469. jmp .Lcbc_ret
  1470. .align 16
  1471. .Lcbc_decrypt_bulk:
  1472. leaq (%rsp),%r11
  1473. .cfi_def_cfa_register %r11
  1474. pushq %rbp
  1475. .cfi_offset %rbp,-16
  1476. subq $16,%rsp
  1477. andq $-16,%rsp
  1478. movq %rcx,%rbp
  1479. movups (%r8),%xmm10
  1480. movl %r10d,%eax
  1481. cmpq $0x50,%rdx
  1482. jbe .Lcbc_dec_tail
  1483. movups (%rcx),%xmm0
  1484. movdqu 0(%rdi),%xmm2
  1485. movdqu 16(%rdi),%xmm3
  1486. movdqa %xmm2,%xmm11
  1487. movdqu 32(%rdi),%xmm4
  1488. movdqa %xmm3,%xmm12
  1489. movdqu 48(%rdi),%xmm5
  1490. movdqa %xmm4,%xmm13
  1491. movdqu 64(%rdi),%xmm6
  1492. movdqa %xmm5,%xmm14
  1493. movdqu 80(%rdi),%xmm7
  1494. movdqa %xmm6,%xmm15
  1495. leaq OPENSSL_ia32cap_P(%rip),%r9
  1496. movl 4(%r9),%r9d
  1497. cmpq $0x70,%rdx
  1498. jbe .Lcbc_dec_six_or_seven
  1499. andl $71303168,%r9d
  1500. subq $0x50,%rdx
  1501. cmpl $4194304,%r9d
  1502. je .Lcbc_dec_loop6_enter
  1503. subq $0x20,%rdx
  1504. leaq 112(%rcx),%rcx
  1505. jmp .Lcbc_dec_loop8_enter
  1506. .align 16
  1507. .Lcbc_dec_loop8:
  1508. movups %xmm9,(%rsi)
  1509. leaq 16(%rsi),%rsi
  1510. .Lcbc_dec_loop8_enter:
  1511. movdqu 96(%rdi),%xmm8
  1512. pxor %xmm0,%xmm2
  1513. movdqu 112(%rdi),%xmm9
  1514. pxor %xmm0,%xmm3
  1515. movups 16-112(%rcx),%xmm1
  1516. pxor %xmm0,%xmm4
  1517. movq $-1,%rbp
  1518. cmpq $0x70,%rdx
  1519. pxor %xmm0,%xmm5
  1520. pxor %xmm0,%xmm6
  1521. pxor %xmm0,%xmm7
  1522. pxor %xmm0,%xmm8
  1523. .byte 102,15,56,222,209
  1524. pxor %xmm0,%xmm9
  1525. movups 32-112(%rcx),%xmm0
  1526. .byte 102,15,56,222,217
  1527. .byte 102,15,56,222,225
  1528. .byte 102,15,56,222,233
  1529. .byte 102,15,56,222,241
  1530. .byte 102,15,56,222,249
  1531. .byte 102,68,15,56,222,193
  1532. adcq $0,%rbp
  1533. andq $128,%rbp
  1534. .byte 102,68,15,56,222,201
  1535. addq %rdi,%rbp
  1536. movups 48-112(%rcx),%xmm1
  1537. .byte 102,15,56,222,208
  1538. .byte 102,15,56,222,216
  1539. .byte 102,15,56,222,224
  1540. .byte 102,15,56,222,232
  1541. .byte 102,15,56,222,240
  1542. .byte 102,15,56,222,248
  1543. .byte 102,68,15,56,222,192
  1544. .byte 102,68,15,56,222,200
  1545. movups 64-112(%rcx),%xmm0
  1546. nop
  1547. .byte 102,15,56,222,209
  1548. .byte 102,15,56,222,217
  1549. .byte 102,15,56,222,225
  1550. .byte 102,15,56,222,233
  1551. .byte 102,15,56,222,241
  1552. .byte 102,15,56,222,249
  1553. .byte 102,68,15,56,222,193
  1554. .byte 102,68,15,56,222,201
  1555. movups 80-112(%rcx),%xmm1
  1556. nop
  1557. .byte 102,15,56,222,208
  1558. .byte 102,15,56,222,216
  1559. .byte 102,15,56,222,224
  1560. .byte 102,15,56,222,232
  1561. .byte 102,15,56,222,240
  1562. .byte 102,15,56,222,248
  1563. .byte 102,68,15,56,222,192
  1564. .byte 102,68,15,56,222,200
  1565. movups 96-112(%rcx),%xmm0
  1566. nop
  1567. .byte 102,15,56,222,209
  1568. .byte 102,15,56,222,217
  1569. .byte 102,15,56,222,225
  1570. .byte 102,15,56,222,233
  1571. .byte 102,15,56,222,241
  1572. .byte 102,15,56,222,249
  1573. .byte 102,68,15,56,222,193
  1574. .byte 102,68,15,56,222,201
  1575. movups 112-112(%rcx),%xmm1
  1576. nop
  1577. .byte 102,15,56,222,208
  1578. .byte 102,15,56,222,216
  1579. .byte 102,15,56,222,224
  1580. .byte 102,15,56,222,232
  1581. .byte 102,15,56,222,240
  1582. .byte 102,15,56,222,248
  1583. .byte 102,68,15,56,222,192
  1584. .byte 102,68,15,56,222,200
  1585. movups 128-112(%rcx),%xmm0
  1586. nop
  1587. .byte 102,15,56,222,209
  1588. .byte 102,15,56,222,217
  1589. .byte 102,15,56,222,225
  1590. .byte 102,15,56,222,233
  1591. .byte 102,15,56,222,241
  1592. .byte 102,15,56,222,249
  1593. .byte 102,68,15,56,222,193
  1594. .byte 102,68,15,56,222,201
  1595. movups 144-112(%rcx),%xmm1
  1596. cmpl $11,%eax
  1597. .byte 102,15,56,222,208
  1598. .byte 102,15,56,222,216
  1599. .byte 102,15,56,222,224
  1600. .byte 102,15,56,222,232
  1601. .byte 102,15,56,222,240
  1602. .byte 102,15,56,222,248
  1603. .byte 102,68,15,56,222,192
  1604. .byte 102,68,15,56,222,200
  1605. movups 160-112(%rcx),%xmm0
  1606. jb .Lcbc_dec_done
  1607. .byte 102,15,56,222,209
  1608. .byte 102,15,56,222,217
  1609. .byte 102,15,56,222,225
  1610. .byte 102,15,56,222,233
  1611. .byte 102,15,56,222,241
  1612. .byte 102,15,56,222,249
  1613. .byte 102,68,15,56,222,193
  1614. .byte 102,68,15,56,222,201
  1615. movups 176-112(%rcx),%xmm1
  1616. nop
  1617. .byte 102,15,56,222,208
  1618. .byte 102,15,56,222,216
  1619. .byte 102,15,56,222,224
  1620. .byte 102,15,56,222,232
  1621. .byte 102,15,56,222,240
  1622. .byte 102,15,56,222,248
  1623. .byte 102,68,15,56,222,192
  1624. .byte 102,68,15,56,222,200
  1625. movups 192-112(%rcx),%xmm0
  1626. je .Lcbc_dec_done
  1627. .byte 102,15,56,222,209
  1628. .byte 102,15,56,222,217
  1629. .byte 102,15,56,222,225
  1630. .byte 102,15,56,222,233
  1631. .byte 102,15,56,222,241
  1632. .byte 102,15,56,222,249
  1633. .byte 102,68,15,56,222,193
  1634. .byte 102,68,15,56,222,201
  1635. movups 208-112(%rcx),%xmm1
  1636. nop
  1637. .byte 102,15,56,222,208
  1638. .byte 102,15,56,222,216
  1639. .byte 102,15,56,222,224
  1640. .byte 102,15,56,222,232
  1641. .byte 102,15,56,222,240
  1642. .byte 102,15,56,222,248
  1643. .byte 102,68,15,56,222,192
  1644. .byte 102,68,15,56,222,200
  1645. movups 224-112(%rcx),%xmm0
  1646. jmp .Lcbc_dec_done
  1647. .align 16
  1648. .Lcbc_dec_done:
  1649. .byte 102,15,56,222,209
  1650. .byte 102,15,56,222,217
  1651. pxor %xmm0,%xmm10
  1652. pxor %xmm0,%xmm11
  1653. .byte 102,15,56,222,225
  1654. .byte 102,15,56,222,233
  1655. pxor %xmm0,%xmm12
  1656. pxor %xmm0,%xmm13
  1657. .byte 102,15,56,222,241
  1658. .byte 102,15,56,222,249
  1659. pxor %xmm0,%xmm14
  1660. pxor %xmm0,%xmm15
  1661. .byte 102,68,15,56,222,193
  1662. .byte 102,68,15,56,222,201
  1663. movdqu 80(%rdi),%xmm1
  1664. .byte 102,65,15,56,223,210
  1665. movdqu 96(%rdi),%xmm10
  1666. pxor %xmm0,%xmm1
  1667. .byte 102,65,15,56,223,219
  1668. pxor %xmm0,%xmm10
  1669. movdqu 112(%rdi),%xmm0
  1670. .byte 102,65,15,56,223,228
  1671. leaq 128(%rdi),%rdi
  1672. movdqu 0(%rbp),%xmm11
  1673. .byte 102,65,15,56,223,237
  1674. .byte 102,65,15,56,223,246
  1675. movdqu 16(%rbp),%xmm12
  1676. movdqu 32(%rbp),%xmm13
  1677. .byte 102,65,15,56,223,255
  1678. .byte 102,68,15,56,223,193
  1679. movdqu 48(%rbp),%xmm14
  1680. movdqu 64(%rbp),%xmm15
  1681. .byte 102,69,15,56,223,202
  1682. movdqa %xmm0,%xmm10
  1683. movdqu 80(%rbp),%xmm1
  1684. movups -112(%rcx),%xmm0
  1685. movups %xmm2,(%rsi)
  1686. movdqa %xmm11,%xmm2
  1687. movups %xmm3,16(%rsi)
  1688. movdqa %xmm12,%xmm3
  1689. movups %xmm4,32(%rsi)
  1690. movdqa %xmm13,%xmm4
  1691. movups %xmm5,48(%rsi)
  1692. movdqa %xmm14,%xmm5
  1693. movups %xmm6,64(%rsi)
  1694. movdqa %xmm15,%xmm6
  1695. movups %xmm7,80(%rsi)
  1696. movdqa %xmm1,%xmm7
  1697. movups %xmm8,96(%rsi)
  1698. leaq 112(%rsi),%rsi
  1699. subq $0x80,%rdx
  1700. ja .Lcbc_dec_loop8
  1701. movaps %xmm9,%xmm2
  1702. leaq -112(%rcx),%rcx
  1703. addq $0x70,%rdx
  1704. jle .Lcbc_dec_clear_tail_collected
  1705. movups %xmm9,(%rsi)
  1706. leaq 16(%rsi),%rsi
  1707. cmpq $0x50,%rdx
  1708. jbe .Lcbc_dec_tail
  1709. movaps %xmm11,%xmm2
  1710. .Lcbc_dec_six_or_seven:
  1711. cmpq $0x60,%rdx
  1712. ja .Lcbc_dec_seven
  1713. movaps %xmm7,%xmm8
  1714. call _aesni_decrypt6
  1715. pxor %xmm10,%xmm2
  1716. movaps %xmm8,%xmm10
  1717. pxor %xmm11,%xmm3
  1718. movdqu %xmm2,(%rsi)
  1719. pxor %xmm12,%xmm4
  1720. movdqu %xmm3,16(%rsi)
  1721. pxor %xmm3,%xmm3
  1722. pxor %xmm13,%xmm5
  1723. movdqu %xmm4,32(%rsi)
  1724. pxor %xmm4,%xmm4
  1725. pxor %xmm14,%xmm6
  1726. movdqu %xmm5,48(%rsi)
  1727. pxor %xmm5,%xmm5
  1728. pxor %xmm15,%xmm7
  1729. movdqu %xmm6,64(%rsi)
  1730. pxor %xmm6,%xmm6
  1731. leaq 80(%rsi),%rsi
  1732. movdqa %xmm7,%xmm2
  1733. pxor %xmm7,%xmm7
  1734. jmp .Lcbc_dec_tail_collected
  1735. .align 16
  1736. .Lcbc_dec_seven:
  1737. movups 96(%rdi),%xmm8
  1738. xorps %xmm9,%xmm9
  1739. call _aesni_decrypt8
  1740. movups 80(%rdi),%xmm9
  1741. pxor %xmm10,%xmm2
  1742. movups 96(%rdi),%xmm10
  1743. pxor %xmm11,%xmm3
  1744. movdqu %xmm2,(%rsi)
  1745. pxor %xmm12,%xmm4
  1746. movdqu %xmm3,16(%rsi)
  1747. pxor %xmm3,%xmm3
  1748. pxor %xmm13,%xmm5
  1749. movdqu %xmm4,32(%rsi)
  1750. pxor %xmm4,%xmm4
  1751. pxor %xmm14,%xmm6
  1752. movdqu %xmm5,48(%rsi)
  1753. pxor %xmm5,%xmm5
  1754. pxor %xmm15,%xmm7
  1755. movdqu %xmm6,64(%rsi)
  1756. pxor %xmm6,%xmm6
  1757. pxor %xmm9,%xmm8
  1758. movdqu %xmm7,80(%rsi)
  1759. pxor %xmm7,%xmm7
  1760. leaq 96(%rsi),%rsi
  1761. movdqa %xmm8,%xmm2
  1762. pxor %xmm8,%xmm8
  1763. pxor %xmm9,%xmm9
  1764. jmp .Lcbc_dec_tail_collected
  1765. .align 16
  1766. .Lcbc_dec_loop6:
  1767. movups %xmm7,(%rsi)
  1768. leaq 16(%rsi),%rsi
  1769. movdqu 0(%rdi),%xmm2
  1770. movdqu 16(%rdi),%xmm3
  1771. movdqa %xmm2,%xmm11
  1772. movdqu 32(%rdi),%xmm4
  1773. movdqa %xmm3,%xmm12
  1774. movdqu 48(%rdi),%xmm5
  1775. movdqa %xmm4,%xmm13
  1776. movdqu 64(%rdi),%xmm6
  1777. movdqa %xmm5,%xmm14
  1778. movdqu 80(%rdi),%xmm7
  1779. movdqa %xmm6,%xmm15
  1780. .Lcbc_dec_loop6_enter:
  1781. leaq 96(%rdi),%rdi
  1782. movdqa %xmm7,%xmm8
  1783. call _aesni_decrypt6
  1784. pxor %xmm10,%xmm2
  1785. movdqa %xmm8,%xmm10
  1786. pxor %xmm11,%xmm3
  1787. movdqu %xmm2,(%rsi)
  1788. pxor %xmm12,%xmm4
  1789. movdqu %xmm3,16(%rsi)
  1790. pxor %xmm13,%xmm5
  1791. movdqu %xmm4,32(%rsi)
  1792. pxor %xmm14,%xmm6
  1793. movq %rbp,%rcx
  1794. movdqu %xmm5,48(%rsi)
  1795. pxor %xmm15,%xmm7
  1796. movl %r10d,%eax
  1797. movdqu %xmm6,64(%rsi)
  1798. leaq 80(%rsi),%rsi
  1799. subq $0x60,%rdx
  1800. ja .Lcbc_dec_loop6
  1801. movdqa %xmm7,%xmm2
  1802. addq $0x50,%rdx
  1803. jle .Lcbc_dec_clear_tail_collected
  1804. movups %xmm7,(%rsi)
  1805. leaq 16(%rsi),%rsi
  1806. .Lcbc_dec_tail:
  1807. movups (%rdi),%xmm2
  1808. subq $0x10,%rdx
  1809. jbe .Lcbc_dec_one
  1810. movups 16(%rdi),%xmm3
  1811. movaps %xmm2,%xmm11
  1812. subq $0x10,%rdx
  1813. jbe .Lcbc_dec_two
  1814. movups 32(%rdi),%xmm4
  1815. movaps %xmm3,%xmm12
  1816. subq $0x10,%rdx
  1817. jbe .Lcbc_dec_three
  1818. movups 48(%rdi),%xmm5
  1819. movaps %xmm4,%xmm13
  1820. subq $0x10,%rdx
  1821. jbe .Lcbc_dec_four
  1822. movups 64(%rdi),%xmm6
  1823. movaps %xmm5,%xmm14
  1824. movaps %xmm6,%xmm15
  1825. xorps %xmm7,%xmm7
  1826. call _aesni_decrypt6
  1827. pxor %xmm10,%xmm2
  1828. movaps %xmm15,%xmm10
  1829. pxor %xmm11,%xmm3
  1830. movdqu %xmm2,(%rsi)
  1831. pxor %xmm12,%xmm4
  1832. movdqu %xmm3,16(%rsi)
  1833. pxor %xmm3,%xmm3
  1834. pxor %xmm13,%xmm5
  1835. movdqu %xmm4,32(%rsi)
  1836. pxor %xmm4,%xmm4
  1837. pxor %xmm14,%xmm6
  1838. movdqu %xmm5,48(%rsi)
  1839. pxor %xmm5,%xmm5
  1840. leaq 64(%rsi),%rsi
  1841. movdqa %xmm6,%xmm2
  1842. pxor %xmm6,%xmm6
  1843. pxor %xmm7,%xmm7
  1844. subq $0x10,%rdx
  1845. jmp .Lcbc_dec_tail_collected
  1846. .align 16
  1847. .Lcbc_dec_one:
  1848. movaps %xmm2,%xmm11
  1849. movups (%rcx),%xmm0
  1850. movups 16(%rcx),%xmm1
  1851. leaq 32(%rcx),%rcx
  1852. xorps %xmm0,%xmm2
  1853. .Loop_dec1_8:
  1854. .byte 102,15,56,222,209
  1855. decl %eax
  1856. movups (%rcx),%xmm1
  1857. leaq 16(%rcx),%rcx
  1858. jnz .Loop_dec1_8
  1859. .byte 102,15,56,223,209
  1860. xorps %xmm10,%xmm2
  1861. movaps %xmm11,%xmm10
  1862. jmp .Lcbc_dec_tail_collected
  1863. .align 16
  1864. .Lcbc_dec_two:
  1865. movaps %xmm3,%xmm12
  1866. call _aesni_decrypt2
  1867. pxor %xmm10,%xmm2
  1868. movaps %xmm12,%xmm10
  1869. pxor %xmm11,%xmm3
  1870. movdqu %xmm2,(%rsi)
  1871. movdqa %xmm3,%xmm2
  1872. pxor %xmm3,%xmm3
  1873. leaq 16(%rsi),%rsi
  1874. jmp .Lcbc_dec_tail_collected
  1875. .align 16
  1876. .Lcbc_dec_three:
  1877. movaps %xmm4,%xmm13
  1878. call _aesni_decrypt3
  1879. pxor %xmm10,%xmm2
  1880. movaps %xmm13,%xmm10
  1881. pxor %xmm11,%xmm3
  1882. movdqu %xmm2,(%rsi)
  1883. pxor %xmm12,%xmm4
  1884. movdqu %xmm3,16(%rsi)
  1885. pxor %xmm3,%xmm3
  1886. movdqa %xmm4,%xmm2
  1887. pxor %xmm4,%xmm4
  1888. leaq 32(%rsi),%rsi
  1889. jmp .Lcbc_dec_tail_collected
  1890. .align 16
  1891. .Lcbc_dec_four:
  1892. movaps %xmm5,%xmm14
  1893. call _aesni_decrypt4
  1894. pxor %xmm10,%xmm2
  1895. movaps %xmm14,%xmm10
  1896. pxor %xmm11,%xmm3
  1897. movdqu %xmm2,(%rsi)
  1898. pxor %xmm12,%xmm4
  1899. movdqu %xmm3,16(%rsi)
  1900. pxor %xmm3,%xmm3
  1901. pxor %xmm13,%xmm5
  1902. movdqu %xmm4,32(%rsi)
  1903. pxor %xmm4,%xmm4
  1904. movdqa %xmm5,%xmm2
  1905. pxor %xmm5,%xmm5
  1906. leaq 48(%rsi),%rsi
  1907. jmp .Lcbc_dec_tail_collected
  1908. .align 16
  1909. .Lcbc_dec_clear_tail_collected:
  1910. pxor %xmm3,%xmm3
  1911. pxor %xmm4,%xmm4
  1912. pxor %xmm5,%xmm5
  1913. pxor %xmm6,%xmm6
  1914. pxor %xmm7,%xmm7
  1915. pxor %xmm8,%xmm8
  1916. pxor %xmm9,%xmm9
  1917. .Lcbc_dec_tail_collected:
  1918. movups %xmm10,(%r8)
  1919. andq $15,%rdx
  1920. jnz .Lcbc_dec_tail_partial
  1921. movups %xmm2,(%rsi)
  1922. pxor %xmm2,%xmm2
  1923. jmp .Lcbc_dec_ret
  1924. .align 16
  1925. .Lcbc_dec_tail_partial:
  1926. movaps %xmm2,(%rsp)
  1927. pxor %xmm2,%xmm2
  1928. movq $16,%rcx
  1929. movq %rsi,%rdi
  1930. subq %rdx,%rcx
  1931. leaq (%rsp),%rsi
  1932. .long 0x9066A4F3
  1933. movdqa %xmm2,(%rsp)
  1934. .Lcbc_dec_ret:
  1935. xorps %xmm0,%xmm0
  1936. pxor %xmm1,%xmm1
  1937. movq -8(%r11),%rbp
  1938. .cfi_restore %rbp
  1939. leaq (%r11),%rsp
  1940. .cfi_def_cfa_register %rsp
  1941. .Lcbc_ret:
  1942. .byte 0xf3,0xc3
  1943. .cfi_endproc
  1944. .size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
  1945. .globl aes_hw_set_decrypt_key
  1946. .hidden aes_hw_set_decrypt_key
  1947. .type aes_hw_set_decrypt_key,@function
  1948. .align 16
  1949. aes_hw_set_decrypt_key:
  1950. .cfi_startproc
  1951. .byte 0x48,0x83,0xEC,0x08
  1952. .cfi_adjust_cfa_offset 8
  1953. call __aesni_set_encrypt_key
  1954. shll $4,%esi
  1955. testl %eax,%eax
  1956. jnz .Ldec_key_ret
  1957. leaq 16(%rdx,%rsi,1),%rdi
  1958. movups (%rdx),%xmm0
  1959. movups (%rdi),%xmm1
  1960. movups %xmm0,(%rdi)
  1961. movups %xmm1,(%rdx)
  1962. leaq 16(%rdx),%rdx
  1963. leaq -16(%rdi),%rdi
  1964. .Ldec_key_inverse:
  1965. movups (%rdx),%xmm0
  1966. movups (%rdi),%xmm1
  1967. .byte 102,15,56,219,192
  1968. .byte 102,15,56,219,201
  1969. leaq 16(%rdx),%rdx
  1970. leaq -16(%rdi),%rdi
  1971. movups %xmm0,16(%rdi)
  1972. movups %xmm1,-16(%rdx)
  1973. cmpq %rdx,%rdi
  1974. ja .Ldec_key_inverse
  1975. movups (%rdx),%xmm0
  1976. .byte 102,15,56,219,192
  1977. pxor %xmm1,%xmm1
  1978. movups %xmm0,(%rdi)
  1979. pxor %xmm0,%xmm0
  1980. .Ldec_key_ret:
  1981. addq $8,%rsp
  1982. .cfi_adjust_cfa_offset -8
  1983. .byte 0xf3,0xc3
  1984. .cfi_endproc
  1985. .LSEH_end_set_decrypt_key:
  1986. .size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
  1987. .globl aes_hw_set_encrypt_key
  1988. .hidden aes_hw_set_encrypt_key
  1989. .type aes_hw_set_encrypt_key,@function
  1990. .align 16
  1991. aes_hw_set_encrypt_key:
  1992. __aesni_set_encrypt_key:
  1993. .cfi_startproc
  1994. #ifdef BORINGSSL_DISPATCH_TEST
  1995. movb $1,BORINGSSL_function_hit+3(%rip)
  1996. #endif
  1997. .byte 0x48,0x83,0xEC,0x08
  1998. .cfi_adjust_cfa_offset 8
  1999. movq $-1,%rax
  2000. testq %rdi,%rdi
  2001. jz .Lenc_key_ret
  2002. testq %rdx,%rdx
  2003. jz .Lenc_key_ret
  2004. movups (%rdi),%xmm0
  2005. xorps %xmm4,%xmm4
  2006. leaq OPENSSL_ia32cap_P(%rip),%r10
  2007. movl 4(%r10),%r10d
  2008. andl $268437504,%r10d
  2009. leaq 16(%rdx),%rax
  2010. cmpl $256,%esi
  2011. je .L14rounds
  2012. cmpl $192,%esi
  2013. je .L12rounds
  2014. cmpl $128,%esi
  2015. jne .Lbad_keybits
  2016. .L10rounds:
  2017. movl $9,%esi
  2018. cmpl $268435456,%r10d
  2019. je .L10rounds_alt
  2020. movups %xmm0,(%rdx)
  2021. .byte 102,15,58,223,200,1
  2022. call .Lkey_expansion_128_cold
  2023. .byte 102,15,58,223,200,2
  2024. call .Lkey_expansion_128
  2025. .byte 102,15,58,223,200,4
  2026. call .Lkey_expansion_128
  2027. .byte 102,15,58,223,200,8
  2028. call .Lkey_expansion_128
  2029. .byte 102,15,58,223,200,16
  2030. call .Lkey_expansion_128
  2031. .byte 102,15,58,223,200,32
  2032. call .Lkey_expansion_128
  2033. .byte 102,15,58,223,200,64
  2034. call .Lkey_expansion_128
  2035. .byte 102,15,58,223,200,128
  2036. call .Lkey_expansion_128
  2037. .byte 102,15,58,223,200,27
  2038. call .Lkey_expansion_128
  2039. .byte 102,15,58,223,200,54
  2040. call .Lkey_expansion_128
  2041. movups %xmm0,(%rax)
  2042. movl %esi,80(%rax)
  2043. xorl %eax,%eax
  2044. jmp .Lenc_key_ret
  2045. .align 16
  2046. .L10rounds_alt:
  2047. movdqa .Lkey_rotate(%rip),%xmm5
  2048. movl $8,%r10d
  2049. movdqa .Lkey_rcon1(%rip),%xmm4
  2050. movdqa %xmm0,%xmm2
  2051. movdqu %xmm0,(%rdx)
  2052. jmp .Loop_key128
  2053. .align 16
  2054. .Loop_key128:
  2055. .byte 102,15,56,0,197
  2056. .byte 102,15,56,221,196
  2057. pslld $1,%xmm4
  2058. leaq 16(%rax),%rax
  2059. movdqa %xmm2,%xmm3
  2060. pslldq $4,%xmm2
  2061. pxor %xmm2,%xmm3
  2062. pslldq $4,%xmm2
  2063. pxor %xmm2,%xmm3
  2064. pslldq $4,%xmm2
  2065. pxor %xmm3,%xmm2
  2066. pxor %xmm2,%xmm0
  2067. movdqu %xmm0,-16(%rax)
  2068. movdqa %xmm0,%xmm2
  2069. decl %r10d
  2070. jnz .Loop_key128
  2071. movdqa .Lkey_rcon1b(%rip),%xmm4
  2072. .byte 102,15,56,0,197
  2073. .byte 102,15,56,221,196
  2074. pslld $1,%xmm4
  2075. movdqa %xmm2,%xmm3
  2076. pslldq $4,%xmm2
  2077. pxor %xmm2,%xmm3
  2078. pslldq $4,%xmm2
  2079. pxor %xmm2,%xmm3
  2080. pslldq $4,%xmm2
  2081. pxor %xmm3,%xmm2
  2082. pxor %xmm2,%xmm0
  2083. movdqu %xmm0,(%rax)
  2084. movdqa %xmm0,%xmm2
  2085. .byte 102,15,56,0,197
  2086. .byte 102,15,56,221,196
  2087. movdqa %xmm2,%xmm3
  2088. pslldq $4,%xmm2
  2089. pxor %xmm2,%xmm3
  2090. pslldq $4,%xmm2
  2091. pxor %xmm2,%xmm3
  2092. pslldq $4,%xmm2
  2093. pxor %xmm3,%xmm2
  2094. pxor %xmm2,%xmm0
  2095. movdqu %xmm0,16(%rax)
  2096. movl %esi,96(%rax)
  2097. xorl %eax,%eax
  2098. jmp .Lenc_key_ret
  2099. .align 16
  2100. .L12rounds:
  2101. movq 16(%rdi),%xmm2
  2102. movl $11,%esi
  2103. cmpl $268435456,%r10d
  2104. je .L12rounds_alt
  2105. movups %xmm0,(%rdx)
  2106. .byte 102,15,58,223,202,1
  2107. call .Lkey_expansion_192a_cold
  2108. .byte 102,15,58,223,202,2
  2109. call .Lkey_expansion_192b
  2110. .byte 102,15,58,223,202,4
  2111. call .Lkey_expansion_192a
  2112. .byte 102,15,58,223,202,8
  2113. call .Lkey_expansion_192b
  2114. .byte 102,15,58,223,202,16
  2115. call .Lkey_expansion_192a
  2116. .byte 102,15,58,223,202,32
  2117. call .Lkey_expansion_192b
  2118. .byte 102,15,58,223,202,64
  2119. call .Lkey_expansion_192a
  2120. .byte 102,15,58,223,202,128
  2121. call .Lkey_expansion_192b
  2122. movups %xmm0,(%rax)
  2123. movl %esi,48(%rax)
  2124. xorq %rax,%rax
  2125. jmp .Lenc_key_ret
  2126. .align 16
  2127. .L12rounds_alt:
  2128. movdqa .Lkey_rotate192(%rip),%xmm5
  2129. movdqa .Lkey_rcon1(%rip),%xmm4
  2130. movl $8,%r10d
  2131. movdqu %xmm0,(%rdx)
  2132. jmp .Loop_key192
  2133. .align 16
  2134. .Loop_key192:
  2135. movq %xmm2,0(%rax)
  2136. movdqa %xmm2,%xmm1
  2137. .byte 102,15,56,0,213
  2138. .byte 102,15,56,221,212
  2139. pslld $1,%xmm4
  2140. leaq 24(%rax),%rax
  2141. movdqa %xmm0,%xmm3
  2142. pslldq $4,%xmm0
  2143. pxor %xmm0,%xmm3
  2144. pslldq $4,%xmm0
  2145. pxor %xmm0,%xmm3
  2146. pslldq $4,%xmm0
  2147. pxor %xmm3,%xmm0
  2148. pshufd $0xff,%xmm0,%xmm3
  2149. pxor %xmm1,%xmm3
  2150. pslldq $4,%xmm1
  2151. pxor %xmm1,%xmm3
  2152. pxor %xmm2,%xmm0
  2153. pxor %xmm3,%xmm2
  2154. movdqu %xmm0,-16(%rax)
  2155. decl %r10d
  2156. jnz .Loop_key192
  2157. movl %esi,32(%rax)
  2158. xorl %eax,%eax
  2159. jmp .Lenc_key_ret
  2160. .align 16
  2161. .L14rounds:
  2162. movups 16(%rdi),%xmm2
  2163. movl $13,%esi
  2164. leaq 16(%rax),%rax
  2165. cmpl $268435456,%r10d
  2166. je .L14rounds_alt
  2167. movups %xmm0,(%rdx)
  2168. movups %xmm2,16(%rdx)
  2169. .byte 102,15,58,223,202,1
  2170. call .Lkey_expansion_256a_cold
  2171. .byte 102,15,58,223,200,1
  2172. call .Lkey_expansion_256b
  2173. .byte 102,15,58,223,202,2
  2174. call .Lkey_expansion_256a
  2175. .byte 102,15,58,223,200,2
  2176. call .Lkey_expansion_256b
  2177. .byte 102,15,58,223,202,4
  2178. call .Lkey_expansion_256a
  2179. .byte 102,15,58,223,200,4
  2180. call .Lkey_expansion_256b
  2181. .byte 102,15,58,223,202,8
  2182. call .Lkey_expansion_256a
  2183. .byte 102,15,58,223,200,8
  2184. call .Lkey_expansion_256b
  2185. .byte 102,15,58,223,202,16
  2186. call .Lkey_expansion_256a
  2187. .byte 102,15,58,223,200,16
  2188. call .Lkey_expansion_256b
  2189. .byte 102,15,58,223,202,32
  2190. call .Lkey_expansion_256a
  2191. .byte 102,15,58,223,200,32
  2192. call .Lkey_expansion_256b
  2193. .byte 102,15,58,223,202,64
  2194. call .Lkey_expansion_256a
  2195. movups %xmm0,(%rax)
  2196. movl %esi,16(%rax)
  2197. xorq %rax,%rax
  2198. jmp .Lenc_key_ret
  2199. .align 16
  2200. .L14rounds_alt:
  2201. movdqa .Lkey_rotate(%rip),%xmm5
  2202. movdqa .Lkey_rcon1(%rip),%xmm4
  2203. movl $7,%r10d
  2204. movdqu %xmm0,0(%rdx)
  2205. movdqa %xmm2,%xmm1
  2206. movdqu %xmm2,16(%rdx)
  2207. jmp .Loop_key256
  2208. .align 16
  2209. .Loop_key256:
  2210. .byte 102,15,56,0,213
  2211. .byte 102,15,56,221,212
  2212. movdqa %xmm0,%xmm3
  2213. pslldq $4,%xmm0
  2214. pxor %xmm0,%xmm3
  2215. pslldq $4,%xmm0
  2216. pxor %xmm0,%xmm3
  2217. pslldq $4,%xmm0
  2218. pxor %xmm3,%xmm0
  2219. pslld $1,%xmm4
  2220. pxor %xmm2,%xmm0
  2221. movdqu %xmm0,(%rax)
  2222. decl %r10d
  2223. jz .Ldone_key256
  2224. pshufd $0xff,%xmm0,%xmm2
  2225. pxor %xmm3,%xmm3
  2226. .byte 102,15,56,221,211
  2227. movdqa %xmm1,%xmm3
  2228. pslldq $4,%xmm1
  2229. pxor %xmm1,%xmm3
  2230. pslldq $4,%xmm1
  2231. pxor %xmm1,%xmm3
  2232. pslldq $4,%xmm1
  2233. pxor %xmm3,%xmm1
  2234. pxor %xmm1,%xmm2
  2235. movdqu %xmm2,16(%rax)
  2236. leaq 32(%rax),%rax
  2237. movdqa %xmm2,%xmm1
  2238. jmp .Loop_key256
  2239. .Ldone_key256:
  2240. movl %esi,16(%rax)
  2241. xorl %eax,%eax
  2242. jmp .Lenc_key_ret
  2243. .align 16
  2244. .Lbad_keybits:
  2245. movq $-2,%rax
  2246. .Lenc_key_ret:
  2247. pxor %xmm0,%xmm0
  2248. pxor %xmm1,%xmm1
  2249. pxor %xmm2,%xmm2
  2250. pxor %xmm3,%xmm3
  2251. pxor %xmm4,%xmm4
  2252. pxor %xmm5,%xmm5
  2253. addq $8,%rsp
  2254. .cfi_adjust_cfa_offset -8
  2255. .byte 0xf3,0xc3
  2256. .cfi_endproc
  2257. .LSEH_end_set_encrypt_key:
  2258. .align 16
  2259. .Lkey_expansion_128:
  2260. movups %xmm0,(%rax)
  2261. leaq 16(%rax),%rax
  2262. .Lkey_expansion_128_cold:
  2263. shufps $16,%xmm0,%xmm4
  2264. xorps %xmm4,%xmm0
  2265. shufps $140,%xmm0,%xmm4
  2266. xorps %xmm4,%xmm0
  2267. shufps $255,%xmm1,%xmm1
  2268. xorps %xmm1,%xmm0
  2269. .byte 0xf3,0xc3
  2270. .align 16
  2271. .Lkey_expansion_192a:
  2272. movups %xmm0,(%rax)
  2273. leaq 16(%rax),%rax
  2274. .Lkey_expansion_192a_cold:
  2275. movaps %xmm2,%xmm5
  2276. .Lkey_expansion_192b_warm:
  2277. shufps $16,%xmm0,%xmm4
  2278. movdqa %xmm2,%xmm3
  2279. xorps %xmm4,%xmm0
  2280. shufps $140,%xmm0,%xmm4
  2281. pslldq $4,%xmm3
  2282. xorps %xmm4,%xmm0
  2283. pshufd $85,%xmm1,%xmm1
  2284. pxor %xmm3,%xmm2
  2285. pxor %xmm1,%xmm0
  2286. pshufd $255,%xmm0,%xmm3
  2287. pxor %xmm3,%xmm2
  2288. .byte 0xf3,0xc3
  2289. .align 16
  2290. .Lkey_expansion_192b:
  2291. movaps %xmm0,%xmm3
  2292. shufps $68,%xmm0,%xmm5
  2293. movups %xmm5,(%rax)
  2294. shufps $78,%xmm2,%xmm3
  2295. movups %xmm3,16(%rax)
  2296. leaq 32(%rax),%rax
  2297. jmp .Lkey_expansion_192b_warm
  2298. .align 16
  2299. .Lkey_expansion_256a:
  2300. movups %xmm2,(%rax)
  2301. leaq 16(%rax),%rax
  2302. .Lkey_expansion_256a_cold:
  2303. shufps $16,%xmm0,%xmm4
  2304. xorps %xmm4,%xmm0
  2305. shufps $140,%xmm0,%xmm4
  2306. xorps %xmm4,%xmm0
  2307. shufps $255,%xmm1,%xmm1
  2308. xorps %xmm1,%xmm0
  2309. .byte 0xf3,0xc3
  2310. .align 16
  2311. .Lkey_expansion_256b:
  2312. movups %xmm0,(%rax)
  2313. leaq 16(%rax),%rax
  2314. shufps $16,%xmm2,%xmm4
  2315. xorps %xmm4,%xmm2
  2316. shufps $140,%xmm2,%xmm4
  2317. xorps %xmm4,%xmm2
  2318. shufps $170,%xmm1,%xmm1
  2319. xorps %xmm1,%xmm2
  2320. .byte 0xf3,0xc3
  2321. .size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
  2322. .size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
  2323. .align 64
  2324. .Lbswap_mask:
  2325. .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  2326. .Lincrement32:
  2327. .long 6,6,6,0
  2328. .Lincrement64:
  2329. .long 1,0,0,0
  2330. .Lxts_magic:
  2331. .long 0x87,0,1,0
  2332. .Lincrement1:
  2333. .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
  2334. .Lkey_rotate:
  2335. .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
  2336. .Lkey_rotate192:
  2337. .long 0x04070605,0x04070605,0x04070605,0x04070605
  2338. .Lkey_rcon1:
  2339. .long 1,1,1,1
  2340. .Lkey_rcon1b:
  2341. .long 0x1b,0x1b,0x1b,0x1b
  2342. .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
  2343. .align 64
  2344. #endif
  2345. .section .note.GNU-stack,"",@progbits