aesni-x86_64.S 47 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__has_feature)
  4. #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
  5. #define OPENSSL_NO_ASM
  6. #endif
  7. #endif
  8. #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
  9. #if defined(BORINGSSL_PREFIX)
  10. #include <boringssl_prefix_symbols_asm.h>
  11. #endif
  12. .text
  13. .globl _aes_hw_encrypt
  14. .private_extern _aes_hw_encrypt
  15. .p2align 4
  16. _aes_hw_encrypt:
  17. #ifdef BORINGSSL_DISPATCH_TEST
  18. movb $1,_BORINGSSL_function_hit+1(%rip)
  19. #endif
  20. movups (%rdi),%xmm2
  21. movl 240(%rdx),%eax
  22. movups (%rdx),%xmm0
  23. movups 16(%rdx),%xmm1
  24. leaq 32(%rdx),%rdx
  25. xorps %xmm0,%xmm2
  26. L$oop_enc1_1:
  27. .byte 102,15,56,220,209
  28. decl %eax
  29. movups (%rdx),%xmm1
  30. leaq 16(%rdx),%rdx
  31. jnz L$oop_enc1_1
  32. .byte 102,15,56,221,209
  33. pxor %xmm0,%xmm0
  34. pxor %xmm1,%xmm1
  35. movups %xmm2,(%rsi)
  36. pxor %xmm2,%xmm2
  37. .byte 0xf3,0xc3
  38. .globl _aes_hw_decrypt
  39. .private_extern _aes_hw_decrypt
  40. .p2align 4
  41. _aes_hw_decrypt:
  42. movups (%rdi),%xmm2
  43. movl 240(%rdx),%eax
  44. movups (%rdx),%xmm0
  45. movups 16(%rdx),%xmm1
  46. leaq 32(%rdx),%rdx
  47. xorps %xmm0,%xmm2
  48. L$oop_dec1_2:
  49. .byte 102,15,56,222,209
  50. decl %eax
  51. movups (%rdx),%xmm1
  52. leaq 16(%rdx),%rdx
  53. jnz L$oop_dec1_2
  54. .byte 102,15,56,223,209
  55. pxor %xmm0,%xmm0
  56. pxor %xmm1,%xmm1
  57. movups %xmm2,(%rsi)
  58. pxor %xmm2,%xmm2
  59. .byte 0xf3,0xc3
  60. .p2align 4
  61. _aesni_encrypt2:
  62. movups (%rcx),%xmm0
  63. shll $4,%eax
  64. movups 16(%rcx),%xmm1
  65. xorps %xmm0,%xmm2
  66. xorps %xmm0,%xmm3
  67. movups 32(%rcx),%xmm0
  68. leaq 32(%rcx,%rax,1),%rcx
  69. negq %rax
  70. addq $16,%rax
  71. L$enc_loop2:
  72. .byte 102,15,56,220,209
  73. .byte 102,15,56,220,217
  74. movups (%rcx,%rax,1),%xmm1
  75. addq $32,%rax
  76. .byte 102,15,56,220,208
  77. .byte 102,15,56,220,216
  78. movups -16(%rcx,%rax,1),%xmm0
  79. jnz L$enc_loop2
  80. .byte 102,15,56,220,209
  81. .byte 102,15,56,220,217
  82. .byte 102,15,56,221,208
  83. .byte 102,15,56,221,216
  84. .byte 0xf3,0xc3
  85. .p2align 4
  86. _aesni_decrypt2:
  87. movups (%rcx),%xmm0
  88. shll $4,%eax
  89. movups 16(%rcx),%xmm1
  90. xorps %xmm0,%xmm2
  91. xorps %xmm0,%xmm3
  92. movups 32(%rcx),%xmm0
  93. leaq 32(%rcx,%rax,1),%rcx
  94. negq %rax
  95. addq $16,%rax
  96. L$dec_loop2:
  97. .byte 102,15,56,222,209
  98. .byte 102,15,56,222,217
  99. movups (%rcx,%rax,1),%xmm1
  100. addq $32,%rax
  101. .byte 102,15,56,222,208
  102. .byte 102,15,56,222,216
  103. movups -16(%rcx,%rax,1),%xmm0
  104. jnz L$dec_loop2
  105. .byte 102,15,56,222,209
  106. .byte 102,15,56,222,217
  107. .byte 102,15,56,223,208
  108. .byte 102,15,56,223,216
  109. .byte 0xf3,0xc3
  110. .p2align 4
  111. _aesni_encrypt3:
  112. movups (%rcx),%xmm0
  113. shll $4,%eax
  114. movups 16(%rcx),%xmm1
  115. xorps %xmm0,%xmm2
  116. xorps %xmm0,%xmm3
  117. xorps %xmm0,%xmm4
  118. movups 32(%rcx),%xmm0
  119. leaq 32(%rcx,%rax,1),%rcx
  120. negq %rax
  121. addq $16,%rax
  122. L$enc_loop3:
  123. .byte 102,15,56,220,209
  124. .byte 102,15,56,220,217
  125. .byte 102,15,56,220,225
  126. movups (%rcx,%rax,1),%xmm1
  127. addq $32,%rax
  128. .byte 102,15,56,220,208
  129. .byte 102,15,56,220,216
  130. .byte 102,15,56,220,224
  131. movups -16(%rcx,%rax,1),%xmm0
  132. jnz L$enc_loop3
  133. .byte 102,15,56,220,209
  134. .byte 102,15,56,220,217
  135. .byte 102,15,56,220,225
  136. .byte 102,15,56,221,208
  137. .byte 102,15,56,221,216
  138. .byte 102,15,56,221,224
  139. .byte 0xf3,0xc3
  140. .p2align 4
  141. _aesni_decrypt3:
  142. movups (%rcx),%xmm0
  143. shll $4,%eax
  144. movups 16(%rcx),%xmm1
  145. xorps %xmm0,%xmm2
  146. xorps %xmm0,%xmm3
  147. xorps %xmm0,%xmm4
  148. movups 32(%rcx),%xmm0
  149. leaq 32(%rcx,%rax,1),%rcx
  150. negq %rax
  151. addq $16,%rax
  152. L$dec_loop3:
  153. .byte 102,15,56,222,209
  154. .byte 102,15,56,222,217
  155. .byte 102,15,56,222,225
  156. movups (%rcx,%rax,1),%xmm1
  157. addq $32,%rax
  158. .byte 102,15,56,222,208
  159. .byte 102,15,56,222,216
  160. .byte 102,15,56,222,224
  161. movups -16(%rcx,%rax,1),%xmm0
  162. jnz L$dec_loop3
  163. .byte 102,15,56,222,209
  164. .byte 102,15,56,222,217
  165. .byte 102,15,56,222,225
  166. .byte 102,15,56,223,208
  167. .byte 102,15,56,223,216
  168. .byte 102,15,56,223,224
  169. .byte 0xf3,0xc3
  170. .p2align 4
  171. _aesni_encrypt4:
  172. movups (%rcx),%xmm0
  173. shll $4,%eax
  174. movups 16(%rcx),%xmm1
  175. xorps %xmm0,%xmm2
  176. xorps %xmm0,%xmm3
  177. xorps %xmm0,%xmm4
  178. xorps %xmm0,%xmm5
  179. movups 32(%rcx),%xmm0
  180. leaq 32(%rcx,%rax,1),%rcx
  181. negq %rax
  182. .byte 0x0f,0x1f,0x00
  183. addq $16,%rax
  184. L$enc_loop4:
  185. .byte 102,15,56,220,209
  186. .byte 102,15,56,220,217
  187. .byte 102,15,56,220,225
  188. .byte 102,15,56,220,233
  189. movups (%rcx,%rax,1),%xmm1
  190. addq $32,%rax
  191. .byte 102,15,56,220,208
  192. .byte 102,15,56,220,216
  193. .byte 102,15,56,220,224
  194. .byte 102,15,56,220,232
  195. movups -16(%rcx,%rax,1),%xmm0
  196. jnz L$enc_loop4
  197. .byte 102,15,56,220,209
  198. .byte 102,15,56,220,217
  199. .byte 102,15,56,220,225
  200. .byte 102,15,56,220,233
  201. .byte 102,15,56,221,208
  202. .byte 102,15,56,221,216
  203. .byte 102,15,56,221,224
  204. .byte 102,15,56,221,232
  205. .byte 0xf3,0xc3
  206. .p2align 4
  207. _aesni_decrypt4:
  208. movups (%rcx),%xmm0
  209. shll $4,%eax
  210. movups 16(%rcx),%xmm1
  211. xorps %xmm0,%xmm2
  212. xorps %xmm0,%xmm3
  213. xorps %xmm0,%xmm4
  214. xorps %xmm0,%xmm5
  215. movups 32(%rcx),%xmm0
  216. leaq 32(%rcx,%rax,1),%rcx
  217. negq %rax
  218. .byte 0x0f,0x1f,0x00
  219. addq $16,%rax
  220. L$dec_loop4:
  221. .byte 102,15,56,222,209
  222. .byte 102,15,56,222,217
  223. .byte 102,15,56,222,225
  224. .byte 102,15,56,222,233
  225. movups (%rcx,%rax,1),%xmm1
  226. addq $32,%rax
  227. .byte 102,15,56,222,208
  228. .byte 102,15,56,222,216
  229. .byte 102,15,56,222,224
  230. .byte 102,15,56,222,232
  231. movups -16(%rcx,%rax,1),%xmm0
  232. jnz L$dec_loop4
  233. .byte 102,15,56,222,209
  234. .byte 102,15,56,222,217
  235. .byte 102,15,56,222,225
  236. .byte 102,15,56,222,233
  237. .byte 102,15,56,223,208
  238. .byte 102,15,56,223,216
  239. .byte 102,15,56,223,224
  240. .byte 102,15,56,223,232
  241. .byte 0xf3,0xc3
  242. .p2align 4
  243. _aesni_encrypt6:
  244. movups (%rcx),%xmm0
  245. shll $4,%eax
  246. movups 16(%rcx),%xmm1
  247. xorps %xmm0,%xmm2
  248. pxor %xmm0,%xmm3
  249. pxor %xmm0,%xmm4
  250. .byte 102,15,56,220,209
  251. leaq 32(%rcx,%rax,1),%rcx
  252. negq %rax
  253. .byte 102,15,56,220,217
  254. pxor %xmm0,%xmm5
  255. pxor %xmm0,%xmm6
  256. .byte 102,15,56,220,225
  257. pxor %xmm0,%xmm7
  258. movups (%rcx,%rax,1),%xmm0
  259. addq $16,%rax
  260. jmp L$enc_loop6_enter
  261. .p2align 4
  262. L$enc_loop6:
  263. .byte 102,15,56,220,209
  264. .byte 102,15,56,220,217
  265. .byte 102,15,56,220,225
  266. L$enc_loop6_enter:
  267. .byte 102,15,56,220,233
  268. .byte 102,15,56,220,241
  269. .byte 102,15,56,220,249
  270. movups (%rcx,%rax,1),%xmm1
  271. addq $32,%rax
  272. .byte 102,15,56,220,208
  273. .byte 102,15,56,220,216
  274. .byte 102,15,56,220,224
  275. .byte 102,15,56,220,232
  276. .byte 102,15,56,220,240
  277. .byte 102,15,56,220,248
  278. movups -16(%rcx,%rax,1),%xmm0
  279. jnz L$enc_loop6
  280. .byte 102,15,56,220,209
  281. .byte 102,15,56,220,217
  282. .byte 102,15,56,220,225
  283. .byte 102,15,56,220,233
  284. .byte 102,15,56,220,241
  285. .byte 102,15,56,220,249
  286. .byte 102,15,56,221,208
  287. .byte 102,15,56,221,216
  288. .byte 102,15,56,221,224
  289. .byte 102,15,56,221,232
  290. .byte 102,15,56,221,240
  291. .byte 102,15,56,221,248
  292. .byte 0xf3,0xc3
  293. .p2align 4
  294. _aesni_decrypt6:
  295. movups (%rcx),%xmm0
  296. shll $4,%eax
  297. movups 16(%rcx),%xmm1
  298. xorps %xmm0,%xmm2
  299. pxor %xmm0,%xmm3
  300. pxor %xmm0,%xmm4
  301. .byte 102,15,56,222,209
  302. leaq 32(%rcx,%rax,1),%rcx
  303. negq %rax
  304. .byte 102,15,56,222,217
  305. pxor %xmm0,%xmm5
  306. pxor %xmm0,%xmm6
  307. .byte 102,15,56,222,225
  308. pxor %xmm0,%xmm7
  309. movups (%rcx,%rax,1),%xmm0
  310. addq $16,%rax
  311. jmp L$dec_loop6_enter
  312. .p2align 4
  313. L$dec_loop6:
  314. .byte 102,15,56,222,209
  315. .byte 102,15,56,222,217
  316. .byte 102,15,56,222,225
  317. L$dec_loop6_enter:
  318. .byte 102,15,56,222,233
  319. .byte 102,15,56,222,241
  320. .byte 102,15,56,222,249
  321. movups (%rcx,%rax,1),%xmm1
  322. addq $32,%rax
  323. .byte 102,15,56,222,208
  324. .byte 102,15,56,222,216
  325. .byte 102,15,56,222,224
  326. .byte 102,15,56,222,232
  327. .byte 102,15,56,222,240
  328. .byte 102,15,56,222,248
  329. movups -16(%rcx,%rax,1),%xmm0
  330. jnz L$dec_loop6
  331. .byte 102,15,56,222,209
  332. .byte 102,15,56,222,217
  333. .byte 102,15,56,222,225
  334. .byte 102,15,56,222,233
  335. .byte 102,15,56,222,241
  336. .byte 102,15,56,222,249
  337. .byte 102,15,56,223,208
  338. .byte 102,15,56,223,216
  339. .byte 102,15,56,223,224
  340. .byte 102,15,56,223,232
  341. .byte 102,15,56,223,240
  342. .byte 102,15,56,223,248
  343. .byte 0xf3,0xc3
  344. .p2align 4
  345. _aesni_encrypt8:
  346. movups (%rcx),%xmm0
  347. shll $4,%eax
  348. movups 16(%rcx),%xmm1
  349. xorps %xmm0,%xmm2
  350. xorps %xmm0,%xmm3
  351. pxor %xmm0,%xmm4
  352. pxor %xmm0,%xmm5
  353. pxor %xmm0,%xmm6
  354. leaq 32(%rcx,%rax,1),%rcx
  355. negq %rax
  356. .byte 102,15,56,220,209
  357. pxor %xmm0,%xmm7
  358. pxor %xmm0,%xmm8
  359. .byte 102,15,56,220,217
  360. pxor %xmm0,%xmm9
  361. movups (%rcx,%rax,1),%xmm0
  362. addq $16,%rax
  363. jmp L$enc_loop8_inner
  364. .p2align 4
  365. L$enc_loop8:
  366. .byte 102,15,56,220,209
  367. .byte 102,15,56,220,217
  368. L$enc_loop8_inner:
  369. .byte 102,15,56,220,225
  370. .byte 102,15,56,220,233
  371. .byte 102,15,56,220,241
  372. .byte 102,15,56,220,249
  373. .byte 102,68,15,56,220,193
  374. .byte 102,68,15,56,220,201
  375. L$enc_loop8_enter:
  376. movups (%rcx,%rax,1),%xmm1
  377. addq $32,%rax
  378. .byte 102,15,56,220,208
  379. .byte 102,15,56,220,216
  380. .byte 102,15,56,220,224
  381. .byte 102,15,56,220,232
  382. .byte 102,15,56,220,240
  383. .byte 102,15,56,220,248
  384. .byte 102,68,15,56,220,192
  385. .byte 102,68,15,56,220,200
  386. movups -16(%rcx,%rax,1),%xmm0
  387. jnz L$enc_loop8
  388. .byte 102,15,56,220,209
  389. .byte 102,15,56,220,217
  390. .byte 102,15,56,220,225
  391. .byte 102,15,56,220,233
  392. .byte 102,15,56,220,241
  393. .byte 102,15,56,220,249
  394. .byte 102,68,15,56,220,193
  395. .byte 102,68,15,56,220,201
  396. .byte 102,15,56,221,208
  397. .byte 102,15,56,221,216
  398. .byte 102,15,56,221,224
  399. .byte 102,15,56,221,232
  400. .byte 102,15,56,221,240
  401. .byte 102,15,56,221,248
  402. .byte 102,68,15,56,221,192
  403. .byte 102,68,15,56,221,200
  404. .byte 0xf3,0xc3
  405. .p2align 4
  406. _aesni_decrypt8:
  407. movups (%rcx),%xmm0
  408. shll $4,%eax
  409. movups 16(%rcx),%xmm1
  410. xorps %xmm0,%xmm2
  411. xorps %xmm0,%xmm3
  412. pxor %xmm0,%xmm4
  413. pxor %xmm0,%xmm5
  414. pxor %xmm0,%xmm6
  415. leaq 32(%rcx,%rax,1),%rcx
  416. negq %rax
  417. .byte 102,15,56,222,209
  418. pxor %xmm0,%xmm7
  419. pxor %xmm0,%xmm8
  420. .byte 102,15,56,222,217
  421. pxor %xmm0,%xmm9
  422. movups (%rcx,%rax,1),%xmm0
  423. addq $16,%rax
  424. jmp L$dec_loop8_inner
  425. .p2align 4
  426. L$dec_loop8:
  427. .byte 102,15,56,222,209
  428. .byte 102,15,56,222,217
  429. L$dec_loop8_inner:
  430. .byte 102,15,56,222,225
  431. .byte 102,15,56,222,233
  432. .byte 102,15,56,222,241
  433. .byte 102,15,56,222,249
  434. .byte 102,68,15,56,222,193
  435. .byte 102,68,15,56,222,201
  436. L$dec_loop8_enter:
  437. movups (%rcx,%rax,1),%xmm1
  438. addq $32,%rax
  439. .byte 102,15,56,222,208
  440. .byte 102,15,56,222,216
  441. .byte 102,15,56,222,224
  442. .byte 102,15,56,222,232
  443. .byte 102,15,56,222,240
  444. .byte 102,15,56,222,248
  445. .byte 102,68,15,56,222,192
  446. .byte 102,68,15,56,222,200
  447. movups -16(%rcx,%rax,1),%xmm0
  448. jnz L$dec_loop8
  449. .byte 102,15,56,222,209
  450. .byte 102,15,56,222,217
  451. .byte 102,15,56,222,225
  452. .byte 102,15,56,222,233
  453. .byte 102,15,56,222,241
  454. .byte 102,15,56,222,249
  455. .byte 102,68,15,56,222,193
  456. .byte 102,68,15,56,222,201
  457. .byte 102,15,56,223,208
  458. .byte 102,15,56,223,216
  459. .byte 102,15,56,223,224
  460. .byte 102,15,56,223,232
  461. .byte 102,15,56,223,240
  462. .byte 102,15,56,223,248
  463. .byte 102,68,15,56,223,192
  464. .byte 102,68,15,56,223,200
  465. .byte 0xf3,0xc3
  466. .globl _aes_hw_ecb_encrypt
  467. .private_extern _aes_hw_ecb_encrypt
  468. .p2align 4
  469. _aes_hw_ecb_encrypt:
  470. andq $-16,%rdx
  471. jz L$ecb_ret
  472. movl 240(%rcx),%eax
  473. movups (%rcx),%xmm0
  474. movq %rcx,%r11
  475. movl %eax,%r10d
  476. testl %r8d,%r8d
  477. jz L$ecb_decrypt
  478. cmpq $0x80,%rdx
  479. jb L$ecb_enc_tail
  480. movdqu (%rdi),%xmm2
  481. movdqu 16(%rdi),%xmm3
  482. movdqu 32(%rdi),%xmm4
  483. movdqu 48(%rdi),%xmm5
  484. movdqu 64(%rdi),%xmm6
  485. movdqu 80(%rdi),%xmm7
  486. movdqu 96(%rdi),%xmm8
  487. movdqu 112(%rdi),%xmm9
  488. leaq 128(%rdi),%rdi
  489. subq $0x80,%rdx
  490. jmp L$ecb_enc_loop8_enter
  491. .p2align 4
  492. L$ecb_enc_loop8:
  493. movups %xmm2,(%rsi)
  494. movq %r11,%rcx
  495. movdqu (%rdi),%xmm2
  496. movl %r10d,%eax
  497. movups %xmm3,16(%rsi)
  498. movdqu 16(%rdi),%xmm3
  499. movups %xmm4,32(%rsi)
  500. movdqu 32(%rdi),%xmm4
  501. movups %xmm5,48(%rsi)
  502. movdqu 48(%rdi),%xmm5
  503. movups %xmm6,64(%rsi)
  504. movdqu 64(%rdi),%xmm6
  505. movups %xmm7,80(%rsi)
  506. movdqu 80(%rdi),%xmm7
  507. movups %xmm8,96(%rsi)
  508. movdqu 96(%rdi),%xmm8
  509. movups %xmm9,112(%rsi)
  510. leaq 128(%rsi),%rsi
  511. movdqu 112(%rdi),%xmm9
  512. leaq 128(%rdi),%rdi
  513. L$ecb_enc_loop8_enter:
  514. call _aesni_encrypt8
  515. subq $0x80,%rdx
  516. jnc L$ecb_enc_loop8
  517. movups %xmm2,(%rsi)
  518. movq %r11,%rcx
  519. movups %xmm3,16(%rsi)
  520. movl %r10d,%eax
  521. movups %xmm4,32(%rsi)
  522. movups %xmm5,48(%rsi)
  523. movups %xmm6,64(%rsi)
  524. movups %xmm7,80(%rsi)
  525. movups %xmm8,96(%rsi)
  526. movups %xmm9,112(%rsi)
  527. leaq 128(%rsi),%rsi
  528. addq $0x80,%rdx
  529. jz L$ecb_ret
  530. L$ecb_enc_tail:
  531. movups (%rdi),%xmm2
  532. cmpq $0x20,%rdx
  533. jb L$ecb_enc_one
  534. movups 16(%rdi),%xmm3
  535. je L$ecb_enc_two
  536. movups 32(%rdi),%xmm4
  537. cmpq $0x40,%rdx
  538. jb L$ecb_enc_three
  539. movups 48(%rdi),%xmm5
  540. je L$ecb_enc_four
  541. movups 64(%rdi),%xmm6
  542. cmpq $0x60,%rdx
  543. jb L$ecb_enc_five
  544. movups 80(%rdi),%xmm7
  545. je L$ecb_enc_six
  546. movdqu 96(%rdi),%xmm8
  547. xorps %xmm9,%xmm9
  548. call _aesni_encrypt8
  549. movups %xmm2,(%rsi)
  550. movups %xmm3,16(%rsi)
  551. movups %xmm4,32(%rsi)
  552. movups %xmm5,48(%rsi)
  553. movups %xmm6,64(%rsi)
  554. movups %xmm7,80(%rsi)
  555. movups %xmm8,96(%rsi)
  556. jmp L$ecb_ret
  557. .p2align 4
  558. L$ecb_enc_one:
  559. movups (%rcx),%xmm0
  560. movups 16(%rcx),%xmm1
  561. leaq 32(%rcx),%rcx
  562. xorps %xmm0,%xmm2
  563. L$oop_enc1_3:
  564. .byte 102,15,56,220,209
  565. decl %eax
  566. movups (%rcx),%xmm1
  567. leaq 16(%rcx),%rcx
  568. jnz L$oop_enc1_3
  569. .byte 102,15,56,221,209
  570. movups %xmm2,(%rsi)
  571. jmp L$ecb_ret
  572. .p2align 4
  573. L$ecb_enc_two:
  574. call _aesni_encrypt2
  575. movups %xmm2,(%rsi)
  576. movups %xmm3,16(%rsi)
  577. jmp L$ecb_ret
  578. .p2align 4
  579. L$ecb_enc_three:
  580. call _aesni_encrypt3
  581. movups %xmm2,(%rsi)
  582. movups %xmm3,16(%rsi)
  583. movups %xmm4,32(%rsi)
  584. jmp L$ecb_ret
  585. .p2align 4
  586. L$ecb_enc_four:
  587. call _aesni_encrypt4
  588. movups %xmm2,(%rsi)
  589. movups %xmm3,16(%rsi)
  590. movups %xmm4,32(%rsi)
  591. movups %xmm5,48(%rsi)
  592. jmp L$ecb_ret
  593. .p2align 4
  594. L$ecb_enc_five:
  595. xorps %xmm7,%xmm7
  596. call _aesni_encrypt6
  597. movups %xmm2,(%rsi)
  598. movups %xmm3,16(%rsi)
  599. movups %xmm4,32(%rsi)
  600. movups %xmm5,48(%rsi)
  601. movups %xmm6,64(%rsi)
  602. jmp L$ecb_ret
  603. .p2align 4
  604. L$ecb_enc_six:
  605. call _aesni_encrypt6
  606. movups %xmm2,(%rsi)
  607. movups %xmm3,16(%rsi)
  608. movups %xmm4,32(%rsi)
  609. movups %xmm5,48(%rsi)
  610. movups %xmm6,64(%rsi)
  611. movups %xmm7,80(%rsi)
  612. jmp L$ecb_ret
  613. .p2align 4
  614. L$ecb_decrypt:
  615. cmpq $0x80,%rdx
  616. jb L$ecb_dec_tail
  617. movdqu (%rdi),%xmm2
  618. movdqu 16(%rdi),%xmm3
  619. movdqu 32(%rdi),%xmm4
  620. movdqu 48(%rdi),%xmm5
  621. movdqu 64(%rdi),%xmm6
  622. movdqu 80(%rdi),%xmm7
  623. movdqu 96(%rdi),%xmm8
  624. movdqu 112(%rdi),%xmm9
  625. leaq 128(%rdi),%rdi
  626. subq $0x80,%rdx
  627. jmp L$ecb_dec_loop8_enter
  628. .p2align 4
  629. L$ecb_dec_loop8:
  630. movups %xmm2,(%rsi)
  631. movq %r11,%rcx
  632. movdqu (%rdi),%xmm2
  633. movl %r10d,%eax
  634. movups %xmm3,16(%rsi)
  635. movdqu 16(%rdi),%xmm3
  636. movups %xmm4,32(%rsi)
  637. movdqu 32(%rdi),%xmm4
  638. movups %xmm5,48(%rsi)
  639. movdqu 48(%rdi),%xmm5
  640. movups %xmm6,64(%rsi)
  641. movdqu 64(%rdi),%xmm6
  642. movups %xmm7,80(%rsi)
  643. movdqu 80(%rdi),%xmm7
  644. movups %xmm8,96(%rsi)
  645. movdqu 96(%rdi),%xmm8
  646. movups %xmm9,112(%rsi)
  647. leaq 128(%rsi),%rsi
  648. movdqu 112(%rdi),%xmm9
  649. leaq 128(%rdi),%rdi
  650. L$ecb_dec_loop8_enter:
  651. call _aesni_decrypt8
  652. movups (%r11),%xmm0
  653. subq $0x80,%rdx
  654. jnc L$ecb_dec_loop8
  655. movups %xmm2,(%rsi)
  656. pxor %xmm2,%xmm2
  657. movq %r11,%rcx
  658. movups %xmm3,16(%rsi)
  659. pxor %xmm3,%xmm3
  660. movl %r10d,%eax
  661. movups %xmm4,32(%rsi)
  662. pxor %xmm4,%xmm4
  663. movups %xmm5,48(%rsi)
  664. pxor %xmm5,%xmm5
  665. movups %xmm6,64(%rsi)
  666. pxor %xmm6,%xmm6
  667. movups %xmm7,80(%rsi)
  668. pxor %xmm7,%xmm7
  669. movups %xmm8,96(%rsi)
  670. pxor %xmm8,%xmm8
  671. movups %xmm9,112(%rsi)
  672. pxor %xmm9,%xmm9
  673. leaq 128(%rsi),%rsi
  674. addq $0x80,%rdx
  675. jz L$ecb_ret
  676. L$ecb_dec_tail:
  677. movups (%rdi),%xmm2
  678. cmpq $0x20,%rdx
  679. jb L$ecb_dec_one
  680. movups 16(%rdi),%xmm3
  681. je L$ecb_dec_two
  682. movups 32(%rdi),%xmm4
  683. cmpq $0x40,%rdx
  684. jb L$ecb_dec_three
  685. movups 48(%rdi),%xmm5
  686. je L$ecb_dec_four
  687. movups 64(%rdi),%xmm6
  688. cmpq $0x60,%rdx
  689. jb L$ecb_dec_five
  690. movups 80(%rdi),%xmm7
  691. je L$ecb_dec_six
  692. movups 96(%rdi),%xmm8
  693. movups (%rcx),%xmm0
  694. xorps %xmm9,%xmm9
  695. call _aesni_decrypt8
  696. movups %xmm2,(%rsi)
  697. pxor %xmm2,%xmm2
  698. movups %xmm3,16(%rsi)
  699. pxor %xmm3,%xmm3
  700. movups %xmm4,32(%rsi)
  701. pxor %xmm4,%xmm4
  702. movups %xmm5,48(%rsi)
  703. pxor %xmm5,%xmm5
  704. movups %xmm6,64(%rsi)
  705. pxor %xmm6,%xmm6
  706. movups %xmm7,80(%rsi)
  707. pxor %xmm7,%xmm7
  708. movups %xmm8,96(%rsi)
  709. pxor %xmm8,%xmm8
  710. pxor %xmm9,%xmm9
  711. jmp L$ecb_ret
  712. .p2align 4
  713. L$ecb_dec_one:
  714. movups (%rcx),%xmm0
  715. movups 16(%rcx),%xmm1
  716. leaq 32(%rcx),%rcx
  717. xorps %xmm0,%xmm2
  718. L$oop_dec1_4:
  719. .byte 102,15,56,222,209
  720. decl %eax
  721. movups (%rcx),%xmm1
  722. leaq 16(%rcx),%rcx
  723. jnz L$oop_dec1_4
  724. .byte 102,15,56,223,209
  725. movups %xmm2,(%rsi)
  726. pxor %xmm2,%xmm2
  727. jmp L$ecb_ret
  728. .p2align 4
  729. L$ecb_dec_two:
  730. call _aesni_decrypt2
  731. movups %xmm2,(%rsi)
  732. pxor %xmm2,%xmm2
  733. movups %xmm3,16(%rsi)
  734. pxor %xmm3,%xmm3
  735. jmp L$ecb_ret
  736. .p2align 4
  737. L$ecb_dec_three:
  738. call _aesni_decrypt3
  739. movups %xmm2,(%rsi)
  740. pxor %xmm2,%xmm2
  741. movups %xmm3,16(%rsi)
  742. pxor %xmm3,%xmm3
  743. movups %xmm4,32(%rsi)
  744. pxor %xmm4,%xmm4
  745. jmp L$ecb_ret
  746. .p2align 4
  747. L$ecb_dec_four:
  748. call _aesni_decrypt4
  749. movups %xmm2,(%rsi)
  750. pxor %xmm2,%xmm2
  751. movups %xmm3,16(%rsi)
  752. pxor %xmm3,%xmm3
  753. movups %xmm4,32(%rsi)
  754. pxor %xmm4,%xmm4
  755. movups %xmm5,48(%rsi)
  756. pxor %xmm5,%xmm5
  757. jmp L$ecb_ret
  758. .p2align 4
  759. L$ecb_dec_five:
  760. xorps %xmm7,%xmm7
  761. call _aesni_decrypt6
  762. movups %xmm2,(%rsi)
  763. pxor %xmm2,%xmm2
  764. movups %xmm3,16(%rsi)
  765. pxor %xmm3,%xmm3
  766. movups %xmm4,32(%rsi)
  767. pxor %xmm4,%xmm4
  768. movups %xmm5,48(%rsi)
  769. pxor %xmm5,%xmm5
  770. movups %xmm6,64(%rsi)
  771. pxor %xmm6,%xmm6
  772. pxor %xmm7,%xmm7
  773. jmp L$ecb_ret
  774. .p2align 4
  775. L$ecb_dec_six:
  776. call _aesni_decrypt6
  777. movups %xmm2,(%rsi)
  778. pxor %xmm2,%xmm2
  779. movups %xmm3,16(%rsi)
  780. pxor %xmm3,%xmm3
  781. movups %xmm4,32(%rsi)
  782. pxor %xmm4,%xmm4
  783. movups %xmm5,48(%rsi)
  784. pxor %xmm5,%xmm5
  785. movups %xmm6,64(%rsi)
  786. pxor %xmm6,%xmm6
  787. movups %xmm7,80(%rsi)
  788. pxor %xmm7,%xmm7
  789. L$ecb_ret:
  790. xorps %xmm0,%xmm0
  791. pxor %xmm1,%xmm1
  792. .byte 0xf3,0xc3
  793. .globl _aes_hw_ctr32_encrypt_blocks
  794. .private_extern _aes_hw_ctr32_encrypt_blocks
  795. .p2align 4
  796. _aes_hw_ctr32_encrypt_blocks:
  797. #ifdef BORINGSSL_DISPATCH_TEST
  798. movb $1,_BORINGSSL_function_hit(%rip)
  799. #endif
  800. cmpq $1,%rdx
  801. jne L$ctr32_bulk
  802. movups (%r8),%xmm2
  803. movups (%rdi),%xmm3
  804. movl 240(%rcx),%edx
  805. movups (%rcx),%xmm0
  806. movups 16(%rcx),%xmm1
  807. leaq 32(%rcx),%rcx
  808. xorps %xmm0,%xmm2
  809. L$oop_enc1_5:
  810. .byte 102,15,56,220,209
  811. decl %edx
  812. movups (%rcx),%xmm1
  813. leaq 16(%rcx),%rcx
  814. jnz L$oop_enc1_5
  815. .byte 102,15,56,221,209
  816. pxor %xmm0,%xmm0
  817. pxor %xmm1,%xmm1
  818. xorps %xmm3,%xmm2
  819. pxor %xmm3,%xmm3
  820. movups %xmm2,(%rsi)
  821. xorps %xmm2,%xmm2
  822. jmp L$ctr32_epilogue
  823. .p2align 4
  824. L$ctr32_bulk:
  825. leaq (%rsp),%r11
  826. pushq %rbp
  827. subq $128,%rsp
  828. andq $-16,%rsp
  829. movdqu (%r8),%xmm2
  830. movdqu (%rcx),%xmm0
  831. movl 12(%r8),%r8d
  832. pxor %xmm0,%xmm2
  833. movl 12(%rcx),%ebp
  834. movdqa %xmm2,0(%rsp)
  835. bswapl %r8d
  836. movdqa %xmm2,%xmm3
  837. movdqa %xmm2,%xmm4
  838. movdqa %xmm2,%xmm5
  839. movdqa %xmm2,64(%rsp)
  840. movdqa %xmm2,80(%rsp)
  841. movdqa %xmm2,96(%rsp)
  842. movq %rdx,%r10
  843. movdqa %xmm2,112(%rsp)
  844. leaq 1(%r8),%rax
  845. leaq 2(%r8),%rdx
  846. bswapl %eax
  847. bswapl %edx
  848. xorl %ebp,%eax
  849. xorl %ebp,%edx
  850. .byte 102,15,58,34,216,3
  851. leaq 3(%r8),%rax
  852. movdqa %xmm3,16(%rsp)
  853. .byte 102,15,58,34,226,3
  854. bswapl %eax
  855. movq %r10,%rdx
  856. leaq 4(%r8),%r10
  857. movdqa %xmm4,32(%rsp)
  858. xorl %ebp,%eax
  859. bswapl %r10d
  860. .byte 102,15,58,34,232,3
  861. xorl %ebp,%r10d
  862. movdqa %xmm5,48(%rsp)
  863. leaq 5(%r8),%r9
  864. movl %r10d,64+12(%rsp)
  865. bswapl %r9d
  866. leaq 6(%r8),%r10
  867. movl 240(%rcx),%eax
  868. xorl %ebp,%r9d
  869. bswapl %r10d
  870. movl %r9d,80+12(%rsp)
  871. xorl %ebp,%r10d
  872. leaq 7(%r8),%r9
  873. movl %r10d,96+12(%rsp)
  874. bswapl %r9d
  875. leaq _OPENSSL_ia32cap_P(%rip),%r10
  876. movl 4(%r10),%r10d
  877. xorl %ebp,%r9d
  878. andl $71303168,%r10d
  879. movl %r9d,112+12(%rsp)
  880. movups 16(%rcx),%xmm1
  881. movdqa 64(%rsp),%xmm6
  882. movdqa 80(%rsp),%xmm7
  883. cmpq $8,%rdx
  884. jb L$ctr32_tail
  885. subq $6,%rdx
  886. cmpl $4194304,%r10d
  887. je L$ctr32_6x
  888. leaq 128(%rcx),%rcx
  889. subq $2,%rdx
  890. jmp L$ctr32_loop8
  891. .p2align 4
  892. L$ctr32_6x:
  893. shll $4,%eax
  894. movl $48,%r10d
  895. bswapl %ebp
  896. leaq 32(%rcx,%rax,1),%rcx
  897. subq %rax,%r10
  898. jmp L$ctr32_loop6
  899. .p2align 4
  900. L$ctr32_loop6:
  901. addl $6,%r8d
  902. movups -48(%rcx,%r10,1),%xmm0
  903. .byte 102,15,56,220,209
  904. movl %r8d,%eax
  905. xorl %ebp,%eax
  906. .byte 102,15,56,220,217
  907. .byte 0x0f,0x38,0xf1,0x44,0x24,12
  908. leal 1(%r8),%eax
  909. .byte 102,15,56,220,225
  910. xorl %ebp,%eax
  911. .byte 0x0f,0x38,0xf1,0x44,0x24,28
  912. .byte 102,15,56,220,233
  913. leal 2(%r8),%eax
  914. xorl %ebp,%eax
  915. .byte 102,15,56,220,241
  916. .byte 0x0f,0x38,0xf1,0x44,0x24,44
  917. leal 3(%r8),%eax
  918. .byte 102,15,56,220,249
  919. movups -32(%rcx,%r10,1),%xmm1
  920. xorl %ebp,%eax
  921. .byte 102,15,56,220,208
  922. .byte 0x0f,0x38,0xf1,0x44,0x24,60
  923. leal 4(%r8),%eax
  924. .byte 102,15,56,220,216
  925. xorl %ebp,%eax
  926. .byte 0x0f,0x38,0xf1,0x44,0x24,76
  927. .byte 102,15,56,220,224
  928. leal 5(%r8),%eax
  929. xorl %ebp,%eax
  930. .byte 102,15,56,220,232
  931. .byte 0x0f,0x38,0xf1,0x44,0x24,92
  932. movq %r10,%rax
  933. .byte 102,15,56,220,240
  934. .byte 102,15,56,220,248
  935. movups -16(%rcx,%r10,1),%xmm0
  936. call L$enc_loop6
  937. movdqu (%rdi),%xmm8
  938. movdqu 16(%rdi),%xmm9
  939. movdqu 32(%rdi),%xmm10
  940. movdqu 48(%rdi),%xmm11
  941. movdqu 64(%rdi),%xmm12
  942. movdqu 80(%rdi),%xmm13
  943. leaq 96(%rdi),%rdi
  944. movups -64(%rcx,%r10,1),%xmm1
  945. pxor %xmm2,%xmm8
  946. movaps 0(%rsp),%xmm2
  947. pxor %xmm3,%xmm9
  948. movaps 16(%rsp),%xmm3
  949. pxor %xmm4,%xmm10
  950. movaps 32(%rsp),%xmm4
  951. pxor %xmm5,%xmm11
  952. movaps 48(%rsp),%xmm5
  953. pxor %xmm6,%xmm12
  954. movaps 64(%rsp),%xmm6
  955. pxor %xmm7,%xmm13
  956. movaps 80(%rsp),%xmm7
  957. movdqu %xmm8,(%rsi)
  958. movdqu %xmm9,16(%rsi)
  959. movdqu %xmm10,32(%rsi)
  960. movdqu %xmm11,48(%rsi)
  961. movdqu %xmm12,64(%rsi)
  962. movdqu %xmm13,80(%rsi)
  963. leaq 96(%rsi),%rsi
  964. subq $6,%rdx
  965. jnc L$ctr32_loop6
  966. addq $6,%rdx
  967. jz L$ctr32_done
  968. leal -48(%r10),%eax
  969. leaq -80(%rcx,%r10,1),%rcx
  970. negl %eax
  971. shrl $4,%eax
  972. jmp L$ctr32_tail
  973. .p2align 5
  974. L$ctr32_loop8:
  975. addl $8,%r8d
  976. movdqa 96(%rsp),%xmm8
  977. .byte 102,15,56,220,209
  978. movl %r8d,%r9d
  979. movdqa 112(%rsp),%xmm9
  980. .byte 102,15,56,220,217
  981. bswapl %r9d
  982. movups 32-128(%rcx),%xmm0
  983. .byte 102,15,56,220,225
  984. xorl %ebp,%r9d
  985. nop
  986. .byte 102,15,56,220,233
  987. movl %r9d,0+12(%rsp)
  988. leaq 1(%r8),%r9
  989. .byte 102,15,56,220,241
  990. .byte 102,15,56,220,249
  991. .byte 102,68,15,56,220,193
  992. .byte 102,68,15,56,220,201
  993. movups 48-128(%rcx),%xmm1
  994. bswapl %r9d
  995. .byte 102,15,56,220,208
  996. .byte 102,15,56,220,216
  997. xorl %ebp,%r9d
  998. .byte 0x66,0x90
  999. .byte 102,15,56,220,224
  1000. .byte 102,15,56,220,232
  1001. movl %r9d,16+12(%rsp)
  1002. leaq 2(%r8),%r9
  1003. .byte 102,15,56,220,240
  1004. .byte 102,15,56,220,248
  1005. .byte 102,68,15,56,220,192
  1006. .byte 102,68,15,56,220,200
  1007. movups 64-128(%rcx),%xmm0
  1008. bswapl %r9d
  1009. .byte 102,15,56,220,209
  1010. .byte 102,15,56,220,217
  1011. xorl %ebp,%r9d
  1012. .byte 0x66,0x90
  1013. .byte 102,15,56,220,225
  1014. .byte 102,15,56,220,233
  1015. movl %r9d,32+12(%rsp)
  1016. leaq 3(%r8),%r9
  1017. .byte 102,15,56,220,241
  1018. .byte 102,15,56,220,249
  1019. .byte 102,68,15,56,220,193
  1020. .byte 102,68,15,56,220,201
  1021. movups 80-128(%rcx),%xmm1
  1022. bswapl %r9d
  1023. .byte 102,15,56,220,208
  1024. .byte 102,15,56,220,216
  1025. xorl %ebp,%r9d
  1026. .byte 0x66,0x90
  1027. .byte 102,15,56,220,224
  1028. .byte 102,15,56,220,232
  1029. movl %r9d,48+12(%rsp)
  1030. leaq 4(%r8),%r9
  1031. .byte 102,15,56,220,240
  1032. .byte 102,15,56,220,248
  1033. .byte 102,68,15,56,220,192
  1034. .byte 102,68,15,56,220,200
  1035. movups 96-128(%rcx),%xmm0
  1036. bswapl %r9d
  1037. .byte 102,15,56,220,209
  1038. .byte 102,15,56,220,217
  1039. xorl %ebp,%r9d
  1040. .byte 0x66,0x90
  1041. .byte 102,15,56,220,225
  1042. .byte 102,15,56,220,233
  1043. movl %r9d,64+12(%rsp)
  1044. leaq 5(%r8),%r9
  1045. .byte 102,15,56,220,241
  1046. .byte 102,15,56,220,249
  1047. .byte 102,68,15,56,220,193
  1048. .byte 102,68,15,56,220,201
  1049. movups 112-128(%rcx),%xmm1
  1050. bswapl %r9d
  1051. .byte 102,15,56,220,208
  1052. .byte 102,15,56,220,216
  1053. xorl %ebp,%r9d
  1054. .byte 0x66,0x90
  1055. .byte 102,15,56,220,224
  1056. .byte 102,15,56,220,232
  1057. movl %r9d,80+12(%rsp)
  1058. leaq 6(%r8),%r9
  1059. .byte 102,15,56,220,240
  1060. .byte 102,15,56,220,248
  1061. .byte 102,68,15,56,220,192
  1062. .byte 102,68,15,56,220,200
  1063. movups 128-128(%rcx),%xmm0
  1064. bswapl %r9d
  1065. .byte 102,15,56,220,209
  1066. .byte 102,15,56,220,217
  1067. xorl %ebp,%r9d
  1068. .byte 0x66,0x90
  1069. .byte 102,15,56,220,225
  1070. .byte 102,15,56,220,233
  1071. movl %r9d,96+12(%rsp)
  1072. leaq 7(%r8),%r9
  1073. .byte 102,15,56,220,241
  1074. .byte 102,15,56,220,249
  1075. .byte 102,68,15,56,220,193
  1076. .byte 102,68,15,56,220,201
  1077. movups 144-128(%rcx),%xmm1
  1078. bswapl %r9d
  1079. .byte 102,15,56,220,208
  1080. .byte 102,15,56,220,216
  1081. .byte 102,15,56,220,224
  1082. xorl %ebp,%r9d
  1083. movdqu 0(%rdi),%xmm10
  1084. .byte 102,15,56,220,232
  1085. movl %r9d,112+12(%rsp)
  1086. cmpl $11,%eax
  1087. .byte 102,15,56,220,240
  1088. .byte 102,15,56,220,248
  1089. .byte 102,68,15,56,220,192
  1090. .byte 102,68,15,56,220,200
  1091. movups 160-128(%rcx),%xmm0
  1092. jb L$ctr32_enc_done
  1093. .byte 102,15,56,220,209
  1094. .byte 102,15,56,220,217
  1095. .byte 102,15,56,220,225
  1096. .byte 102,15,56,220,233
  1097. .byte 102,15,56,220,241
  1098. .byte 102,15,56,220,249
  1099. .byte 102,68,15,56,220,193
  1100. .byte 102,68,15,56,220,201
  1101. movups 176-128(%rcx),%xmm1
  1102. .byte 102,15,56,220,208
  1103. .byte 102,15,56,220,216
  1104. .byte 102,15,56,220,224
  1105. .byte 102,15,56,220,232
  1106. .byte 102,15,56,220,240
  1107. .byte 102,15,56,220,248
  1108. .byte 102,68,15,56,220,192
  1109. .byte 102,68,15,56,220,200
  1110. movups 192-128(%rcx),%xmm0
  1111. je L$ctr32_enc_done
  1112. .byte 102,15,56,220,209
  1113. .byte 102,15,56,220,217
  1114. .byte 102,15,56,220,225
  1115. .byte 102,15,56,220,233
  1116. .byte 102,15,56,220,241
  1117. .byte 102,15,56,220,249
  1118. .byte 102,68,15,56,220,193
  1119. .byte 102,68,15,56,220,201
  1120. movups 208-128(%rcx),%xmm1
  1121. .byte 102,15,56,220,208
  1122. .byte 102,15,56,220,216
  1123. .byte 102,15,56,220,224
  1124. .byte 102,15,56,220,232
  1125. .byte 102,15,56,220,240
  1126. .byte 102,15,56,220,248
  1127. .byte 102,68,15,56,220,192
  1128. .byte 102,68,15,56,220,200
  1129. movups 224-128(%rcx),%xmm0
  1130. jmp L$ctr32_enc_done
  1131. .p2align 4
  1132. L$ctr32_enc_done:
  1133. movdqu 16(%rdi),%xmm11
  1134. pxor %xmm0,%xmm10
  1135. movdqu 32(%rdi),%xmm12
  1136. pxor %xmm0,%xmm11
  1137. movdqu 48(%rdi),%xmm13
  1138. pxor %xmm0,%xmm12
  1139. movdqu 64(%rdi),%xmm14
  1140. pxor %xmm0,%xmm13
  1141. movdqu 80(%rdi),%xmm15
  1142. pxor %xmm0,%xmm14
  1143. pxor %xmm0,%xmm15
  1144. .byte 102,15,56,220,209
  1145. .byte 102,15,56,220,217
  1146. .byte 102,15,56,220,225
  1147. .byte 102,15,56,220,233
  1148. .byte 102,15,56,220,241
  1149. .byte 102,15,56,220,249
  1150. .byte 102,68,15,56,220,193
  1151. .byte 102,68,15,56,220,201
  1152. movdqu 96(%rdi),%xmm1
  1153. leaq 128(%rdi),%rdi
  1154. .byte 102,65,15,56,221,210
  1155. pxor %xmm0,%xmm1
  1156. movdqu 112-128(%rdi),%xmm10
  1157. .byte 102,65,15,56,221,219
  1158. pxor %xmm0,%xmm10
  1159. movdqa 0(%rsp),%xmm11
  1160. .byte 102,65,15,56,221,228
  1161. .byte 102,65,15,56,221,237
  1162. movdqa 16(%rsp),%xmm12
  1163. movdqa 32(%rsp),%xmm13
  1164. .byte 102,65,15,56,221,246
  1165. .byte 102,65,15,56,221,255
  1166. movdqa 48(%rsp),%xmm14
  1167. movdqa 64(%rsp),%xmm15
  1168. .byte 102,68,15,56,221,193
  1169. movdqa 80(%rsp),%xmm0
  1170. movups 16-128(%rcx),%xmm1
  1171. .byte 102,69,15,56,221,202
  1172. movups %xmm2,(%rsi)
  1173. movdqa %xmm11,%xmm2
  1174. movups %xmm3,16(%rsi)
  1175. movdqa %xmm12,%xmm3
  1176. movups %xmm4,32(%rsi)
  1177. movdqa %xmm13,%xmm4
  1178. movups %xmm5,48(%rsi)
  1179. movdqa %xmm14,%xmm5
  1180. movups %xmm6,64(%rsi)
  1181. movdqa %xmm15,%xmm6
  1182. movups %xmm7,80(%rsi)
  1183. movdqa %xmm0,%xmm7
  1184. movups %xmm8,96(%rsi)
  1185. movups %xmm9,112(%rsi)
  1186. leaq 128(%rsi),%rsi
  1187. subq $8,%rdx
  1188. jnc L$ctr32_loop8
  1189. addq $8,%rdx
  1190. jz L$ctr32_done
  1191. leaq -128(%rcx),%rcx
  1192. L$ctr32_tail:
  1193. leaq 16(%rcx),%rcx
  1194. cmpq $4,%rdx
  1195. jb L$ctr32_loop3
  1196. je L$ctr32_loop4
  1197. shll $4,%eax
  1198. movdqa 96(%rsp),%xmm8
  1199. pxor %xmm9,%xmm9
  1200. movups 16(%rcx),%xmm0
  1201. .byte 102,15,56,220,209
  1202. .byte 102,15,56,220,217
  1203. leaq 32-16(%rcx,%rax,1),%rcx
  1204. negq %rax
  1205. .byte 102,15,56,220,225
  1206. addq $16,%rax
  1207. movups (%rdi),%xmm10
  1208. .byte 102,15,56,220,233
  1209. .byte 102,15,56,220,241
  1210. movups 16(%rdi),%xmm11
  1211. movups 32(%rdi),%xmm12
  1212. .byte 102,15,56,220,249
  1213. .byte 102,68,15,56,220,193
  1214. call L$enc_loop8_enter
  1215. movdqu 48(%rdi),%xmm13
  1216. pxor %xmm10,%xmm2
  1217. movdqu 64(%rdi),%xmm10
  1218. pxor %xmm11,%xmm3
  1219. movdqu %xmm2,(%rsi)
  1220. pxor %xmm12,%xmm4
  1221. movdqu %xmm3,16(%rsi)
  1222. pxor %xmm13,%xmm5
  1223. movdqu %xmm4,32(%rsi)
  1224. pxor %xmm10,%xmm6
  1225. movdqu %xmm5,48(%rsi)
  1226. movdqu %xmm6,64(%rsi)
  1227. cmpq $6,%rdx
  1228. jb L$ctr32_done
  1229. movups 80(%rdi),%xmm11
  1230. xorps %xmm11,%xmm7
  1231. movups %xmm7,80(%rsi)
  1232. je L$ctr32_done
  1233. movups 96(%rdi),%xmm12
  1234. xorps %xmm12,%xmm8
  1235. movups %xmm8,96(%rsi)
  1236. jmp L$ctr32_done
  1237. .p2align 5
  1238. L$ctr32_loop4:
  1239. .byte 102,15,56,220,209
  1240. leaq 16(%rcx),%rcx
  1241. decl %eax
  1242. .byte 102,15,56,220,217
  1243. .byte 102,15,56,220,225
  1244. .byte 102,15,56,220,233
  1245. movups (%rcx),%xmm1
  1246. jnz L$ctr32_loop4
  1247. .byte 102,15,56,221,209
  1248. .byte 102,15,56,221,217
  1249. movups (%rdi),%xmm10
  1250. movups 16(%rdi),%xmm11
  1251. .byte 102,15,56,221,225
  1252. .byte 102,15,56,221,233
  1253. movups 32(%rdi),%xmm12
  1254. movups 48(%rdi),%xmm13
  1255. xorps %xmm10,%xmm2
  1256. movups %xmm2,(%rsi)
  1257. xorps %xmm11,%xmm3
  1258. movups %xmm3,16(%rsi)
  1259. pxor %xmm12,%xmm4
  1260. movdqu %xmm4,32(%rsi)
  1261. pxor %xmm13,%xmm5
  1262. movdqu %xmm5,48(%rsi)
  1263. jmp L$ctr32_done
  1264. .p2align 5
  1265. L$ctr32_loop3:
  1266. .byte 102,15,56,220,209
  1267. leaq 16(%rcx),%rcx
  1268. decl %eax
  1269. .byte 102,15,56,220,217
  1270. .byte 102,15,56,220,225
  1271. movups (%rcx),%xmm1
  1272. jnz L$ctr32_loop3
  1273. .byte 102,15,56,221,209
  1274. .byte 102,15,56,221,217
  1275. .byte 102,15,56,221,225
  1276. movups (%rdi),%xmm10
  1277. xorps %xmm10,%xmm2
  1278. movups %xmm2,(%rsi)
  1279. cmpq $2,%rdx
  1280. jb L$ctr32_done
  1281. movups 16(%rdi),%xmm11
  1282. xorps %xmm11,%xmm3
  1283. movups %xmm3,16(%rsi)
  1284. je L$ctr32_done
  1285. movups 32(%rdi),%xmm12
  1286. xorps %xmm12,%xmm4
  1287. movups %xmm4,32(%rsi)
  1288. L$ctr32_done:
  1289. xorps %xmm0,%xmm0
  1290. xorl %ebp,%ebp
  1291. pxor %xmm1,%xmm1
  1292. pxor %xmm2,%xmm2
  1293. pxor %xmm3,%xmm3
  1294. pxor %xmm4,%xmm4
  1295. pxor %xmm5,%xmm5
  1296. pxor %xmm6,%xmm6
  1297. pxor %xmm7,%xmm7
  1298. movaps %xmm0,0(%rsp)
  1299. pxor %xmm8,%xmm8
  1300. movaps %xmm0,16(%rsp)
  1301. pxor %xmm9,%xmm9
  1302. movaps %xmm0,32(%rsp)
  1303. pxor %xmm10,%xmm10
  1304. movaps %xmm0,48(%rsp)
  1305. pxor %xmm11,%xmm11
  1306. movaps %xmm0,64(%rsp)
  1307. pxor %xmm12,%xmm12
  1308. movaps %xmm0,80(%rsp)
  1309. pxor %xmm13,%xmm13
  1310. movaps %xmm0,96(%rsp)
  1311. pxor %xmm14,%xmm14
  1312. movaps %xmm0,112(%rsp)
  1313. pxor %xmm15,%xmm15
  1314. movq -8(%r11),%rbp
  1315. leaq (%r11),%rsp
  1316. L$ctr32_epilogue:
  1317. .byte 0xf3,0xc3
  1318. .globl _aes_hw_cbc_encrypt
  1319. .private_extern _aes_hw_cbc_encrypt
  1320. .p2align 4
  1321. _aes_hw_cbc_encrypt:
  1322. testq %rdx,%rdx
  1323. jz L$cbc_ret
  1324. movl 240(%rcx),%r10d
  1325. movq %rcx,%r11
  1326. testl %r9d,%r9d
  1327. jz L$cbc_decrypt
  1328. movups (%r8),%xmm2
  1329. movl %r10d,%eax
  1330. cmpq $16,%rdx
  1331. jb L$cbc_enc_tail
  1332. subq $16,%rdx
  1333. jmp L$cbc_enc_loop
  1334. .p2align 4
  1335. L$cbc_enc_loop:
  1336. movups (%rdi),%xmm3
  1337. leaq 16(%rdi),%rdi
  1338. movups (%rcx),%xmm0
  1339. movups 16(%rcx),%xmm1
  1340. xorps %xmm0,%xmm3
  1341. leaq 32(%rcx),%rcx
  1342. xorps %xmm3,%xmm2
  1343. L$oop_enc1_6:
  1344. .byte 102,15,56,220,209
  1345. decl %eax
  1346. movups (%rcx),%xmm1
  1347. leaq 16(%rcx),%rcx
  1348. jnz L$oop_enc1_6
  1349. .byte 102,15,56,221,209
  1350. movl %r10d,%eax
  1351. movq %r11,%rcx
  1352. movups %xmm2,0(%rsi)
  1353. leaq 16(%rsi),%rsi
  1354. subq $16,%rdx
  1355. jnc L$cbc_enc_loop
  1356. addq $16,%rdx
  1357. jnz L$cbc_enc_tail
  1358. pxor %xmm0,%xmm0
  1359. pxor %xmm1,%xmm1
  1360. movups %xmm2,(%r8)
  1361. pxor %xmm2,%xmm2
  1362. pxor %xmm3,%xmm3
  1363. jmp L$cbc_ret
  1364. L$cbc_enc_tail:
  1365. movq %rdx,%rcx
  1366. xchgq %rdi,%rsi
  1367. .long 0x9066A4F3
  1368. movl $16,%ecx
  1369. subq %rdx,%rcx
  1370. xorl %eax,%eax
  1371. .long 0x9066AAF3
  1372. leaq -16(%rdi),%rdi
  1373. movl %r10d,%eax
  1374. movq %rdi,%rsi
  1375. movq %r11,%rcx
  1376. xorq %rdx,%rdx
  1377. jmp L$cbc_enc_loop
  1378. .p2align 4
  1379. L$cbc_decrypt:
  1380. cmpq $16,%rdx
  1381. jne L$cbc_decrypt_bulk
  1382. movdqu (%rdi),%xmm2
  1383. movdqu (%r8),%xmm3
  1384. movdqa %xmm2,%xmm4
  1385. movups (%rcx),%xmm0
  1386. movups 16(%rcx),%xmm1
  1387. leaq 32(%rcx),%rcx
  1388. xorps %xmm0,%xmm2
  1389. L$oop_dec1_7:
  1390. .byte 102,15,56,222,209
  1391. decl %r10d
  1392. movups (%rcx),%xmm1
  1393. leaq 16(%rcx),%rcx
  1394. jnz L$oop_dec1_7
  1395. .byte 102,15,56,223,209
  1396. pxor %xmm0,%xmm0
  1397. pxor %xmm1,%xmm1
  1398. movdqu %xmm4,(%r8)
  1399. xorps %xmm3,%xmm2
  1400. pxor %xmm3,%xmm3
  1401. movups %xmm2,(%rsi)
  1402. pxor %xmm2,%xmm2
  1403. jmp L$cbc_ret
  1404. .p2align 4
  1405. L$cbc_decrypt_bulk:
  1406. leaq (%rsp),%r11
  1407. pushq %rbp
  1408. subq $16,%rsp
  1409. andq $-16,%rsp
  1410. movq %rcx,%rbp
  1411. movups (%r8),%xmm10
  1412. movl %r10d,%eax
  1413. cmpq $0x50,%rdx
  1414. jbe L$cbc_dec_tail
  1415. movups (%rcx),%xmm0
  1416. movdqu 0(%rdi),%xmm2
  1417. movdqu 16(%rdi),%xmm3
  1418. movdqa %xmm2,%xmm11
  1419. movdqu 32(%rdi),%xmm4
  1420. movdqa %xmm3,%xmm12
  1421. movdqu 48(%rdi),%xmm5
  1422. movdqa %xmm4,%xmm13
  1423. movdqu 64(%rdi),%xmm6
  1424. movdqa %xmm5,%xmm14
  1425. movdqu 80(%rdi),%xmm7
  1426. movdqa %xmm6,%xmm15
  1427. leaq _OPENSSL_ia32cap_P(%rip),%r9
  1428. movl 4(%r9),%r9d
  1429. cmpq $0x70,%rdx
  1430. jbe L$cbc_dec_six_or_seven
  1431. andl $71303168,%r9d
  1432. subq $0x50,%rdx
  1433. cmpl $4194304,%r9d
  1434. je L$cbc_dec_loop6_enter
  1435. subq $0x20,%rdx
  1436. leaq 112(%rcx),%rcx
  1437. jmp L$cbc_dec_loop8_enter
  1438. .p2align 4
  1439. L$cbc_dec_loop8:
  1440. movups %xmm9,(%rsi)
  1441. leaq 16(%rsi),%rsi
  1442. L$cbc_dec_loop8_enter:
  1443. movdqu 96(%rdi),%xmm8
  1444. pxor %xmm0,%xmm2
  1445. movdqu 112(%rdi),%xmm9
  1446. pxor %xmm0,%xmm3
  1447. movups 16-112(%rcx),%xmm1
  1448. pxor %xmm0,%xmm4
  1449. movq $-1,%rbp
  1450. cmpq $0x70,%rdx
  1451. pxor %xmm0,%xmm5
  1452. pxor %xmm0,%xmm6
  1453. pxor %xmm0,%xmm7
  1454. pxor %xmm0,%xmm8
  1455. .byte 102,15,56,222,209
  1456. pxor %xmm0,%xmm9
  1457. movups 32-112(%rcx),%xmm0
  1458. .byte 102,15,56,222,217
  1459. .byte 102,15,56,222,225
  1460. .byte 102,15,56,222,233
  1461. .byte 102,15,56,222,241
  1462. .byte 102,15,56,222,249
  1463. .byte 102,68,15,56,222,193
  1464. adcq $0,%rbp
  1465. andq $128,%rbp
  1466. .byte 102,68,15,56,222,201
  1467. addq %rdi,%rbp
  1468. movups 48-112(%rcx),%xmm1
  1469. .byte 102,15,56,222,208
  1470. .byte 102,15,56,222,216
  1471. .byte 102,15,56,222,224
  1472. .byte 102,15,56,222,232
  1473. .byte 102,15,56,222,240
  1474. .byte 102,15,56,222,248
  1475. .byte 102,68,15,56,222,192
  1476. .byte 102,68,15,56,222,200
  1477. movups 64-112(%rcx),%xmm0
  1478. nop
  1479. .byte 102,15,56,222,209
  1480. .byte 102,15,56,222,217
  1481. .byte 102,15,56,222,225
  1482. .byte 102,15,56,222,233
  1483. .byte 102,15,56,222,241
  1484. .byte 102,15,56,222,249
  1485. .byte 102,68,15,56,222,193
  1486. .byte 102,68,15,56,222,201
  1487. movups 80-112(%rcx),%xmm1
  1488. nop
  1489. .byte 102,15,56,222,208
  1490. .byte 102,15,56,222,216
  1491. .byte 102,15,56,222,224
  1492. .byte 102,15,56,222,232
  1493. .byte 102,15,56,222,240
  1494. .byte 102,15,56,222,248
  1495. .byte 102,68,15,56,222,192
  1496. .byte 102,68,15,56,222,200
  1497. movups 96-112(%rcx),%xmm0
  1498. nop
  1499. .byte 102,15,56,222,209
  1500. .byte 102,15,56,222,217
  1501. .byte 102,15,56,222,225
  1502. .byte 102,15,56,222,233
  1503. .byte 102,15,56,222,241
  1504. .byte 102,15,56,222,249
  1505. .byte 102,68,15,56,222,193
  1506. .byte 102,68,15,56,222,201
  1507. movups 112-112(%rcx),%xmm1
  1508. nop
  1509. .byte 102,15,56,222,208
  1510. .byte 102,15,56,222,216
  1511. .byte 102,15,56,222,224
  1512. .byte 102,15,56,222,232
  1513. .byte 102,15,56,222,240
  1514. .byte 102,15,56,222,248
  1515. .byte 102,68,15,56,222,192
  1516. .byte 102,68,15,56,222,200
  1517. movups 128-112(%rcx),%xmm0
  1518. nop
  1519. .byte 102,15,56,222,209
  1520. .byte 102,15,56,222,217
  1521. .byte 102,15,56,222,225
  1522. .byte 102,15,56,222,233
  1523. .byte 102,15,56,222,241
  1524. .byte 102,15,56,222,249
  1525. .byte 102,68,15,56,222,193
  1526. .byte 102,68,15,56,222,201
  1527. movups 144-112(%rcx),%xmm1
  1528. cmpl $11,%eax
  1529. .byte 102,15,56,222,208
  1530. .byte 102,15,56,222,216
  1531. .byte 102,15,56,222,224
  1532. .byte 102,15,56,222,232
  1533. .byte 102,15,56,222,240
  1534. .byte 102,15,56,222,248
  1535. .byte 102,68,15,56,222,192
  1536. .byte 102,68,15,56,222,200
  1537. movups 160-112(%rcx),%xmm0
  1538. jb L$cbc_dec_done
  1539. .byte 102,15,56,222,209
  1540. .byte 102,15,56,222,217
  1541. .byte 102,15,56,222,225
  1542. .byte 102,15,56,222,233
  1543. .byte 102,15,56,222,241
  1544. .byte 102,15,56,222,249
  1545. .byte 102,68,15,56,222,193
  1546. .byte 102,68,15,56,222,201
  1547. movups 176-112(%rcx),%xmm1
  1548. nop
  1549. .byte 102,15,56,222,208
  1550. .byte 102,15,56,222,216
  1551. .byte 102,15,56,222,224
  1552. .byte 102,15,56,222,232
  1553. .byte 102,15,56,222,240
  1554. .byte 102,15,56,222,248
  1555. .byte 102,68,15,56,222,192
  1556. .byte 102,68,15,56,222,200
  1557. movups 192-112(%rcx),%xmm0
  1558. je L$cbc_dec_done
  1559. .byte 102,15,56,222,209
  1560. .byte 102,15,56,222,217
  1561. .byte 102,15,56,222,225
  1562. .byte 102,15,56,222,233
  1563. .byte 102,15,56,222,241
  1564. .byte 102,15,56,222,249
  1565. .byte 102,68,15,56,222,193
  1566. .byte 102,68,15,56,222,201
  1567. movups 208-112(%rcx),%xmm1
  1568. nop
  1569. .byte 102,15,56,222,208
  1570. .byte 102,15,56,222,216
  1571. .byte 102,15,56,222,224
  1572. .byte 102,15,56,222,232
  1573. .byte 102,15,56,222,240
  1574. .byte 102,15,56,222,248
  1575. .byte 102,68,15,56,222,192
  1576. .byte 102,68,15,56,222,200
  1577. movups 224-112(%rcx),%xmm0
  1578. jmp L$cbc_dec_done
  1579. .p2align 4
  1580. L$cbc_dec_done:
  1581. .byte 102,15,56,222,209
  1582. .byte 102,15,56,222,217
  1583. pxor %xmm0,%xmm10
  1584. pxor %xmm0,%xmm11
  1585. .byte 102,15,56,222,225
  1586. .byte 102,15,56,222,233
  1587. pxor %xmm0,%xmm12
  1588. pxor %xmm0,%xmm13
  1589. .byte 102,15,56,222,241
  1590. .byte 102,15,56,222,249
  1591. pxor %xmm0,%xmm14
  1592. pxor %xmm0,%xmm15
  1593. .byte 102,68,15,56,222,193
  1594. .byte 102,68,15,56,222,201
  1595. movdqu 80(%rdi),%xmm1
  1596. .byte 102,65,15,56,223,210
  1597. movdqu 96(%rdi),%xmm10
  1598. pxor %xmm0,%xmm1
  1599. .byte 102,65,15,56,223,219
  1600. pxor %xmm0,%xmm10
  1601. movdqu 112(%rdi),%xmm0
  1602. .byte 102,65,15,56,223,228
  1603. leaq 128(%rdi),%rdi
  1604. movdqu 0(%rbp),%xmm11
  1605. .byte 102,65,15,56,223,237
  1606. .byte 102,65,15,56,223,246
  1607. movdqu 16(%rbp),%xmm12
  1608. movdqu 32(%rbp),%xmm13
  1609. .byte 102,65,15,56,223,255
  1610. .byte 102,68,15,56,223,193
  1611. movdqu 48(%rbp),%xmm14
  1612. movdqu 64(%rbp),%xmm15
  1613. .byte 102,69,15,56,223,202
  1614. movdqa %xmm0,%xmm10
  1615. movdqu 80(%rbp),%xmm1
  1616. movups -112(%rcx),%xmm0
  1617. movups %xmm2,(%rsi)
  1618. movdqa %xmm11,%xmm2
  1619. movups %xmm3,16(%rsi)
  1620. movdqa %xmm12,%xmm3
  1621. movups %xmm4,32(%rsi)
  1622. movdqa %xmm13,%xmm4
  1623. movups %xmm5,48(%rsi)
  1624. movdqa %xmm14,%xmm5
  1625. movups %xmm6,64(%rsi)
  1626. movdqa %xmm15,%xmm6
  1627. movups %xmm7,80(%rsi)
  1628. movdqa %xmm1,%xmm7
  1629. movups %xmm8,96(%rsi)
  1630. leaq 112(%rsi),%rsi
  1631. subq $0x80,%rdx
  1632. ja L$cbc_dec_loop8
  1633. movaps %xmm9,%xmm2
  1634. leaq -112(%rcx),%rcx
  1635. addq $0x70,%rdx
  1636. jle L$cbc_dec_clear_tail_collected
  1637. movups %xmm9,(%rsi)
  1638. leaq 16(%rsi),%rsi
  1639. cmpq $0x50,%rdx
  1640. jbe L$cbc_dec_tail
  1641. movaps %xmm11,%xmm2
  1642. L$cbc_dec_six_or_seven:
  1643. cmpq $0x60,%rdx
  1644. ja L$cbc_dec_seven
  1645. movaps %xmm7,%xmm8
  1646. call _aesni_decrypt6
  1647. pxor %xmm10,%xmm2
  1648. movaps %xmm8,%xmm10
  1649. pxor %xmm11,%xmm3
  1650. movdqu %xmm2,(%rsi)
  1651. pxor %xmm12,%xmm4
  1652. movdqu %xmm3,16(%rsi)
  1653. pxor %xmm3,%xmm3
  1654. pxor %xmm13,%xmm5
  1655. movdqu %xmm4,32(%rsi)
  1656. pxor %xmm4,%xmm4
  1657. pxor %xmm14,%xmm6
  1658. movdqu %xmm5,48(%rsi)
  1659. pxor %xmm5,%xmm5
  1660. pxor %xmm15,%xmm7
  1661. movdqu %xmm6,64(%rsi)
  1662. pxor %xmm6,%xmm6
  1663. leaq 80(%rsi),%rsi
  1664. movdqa %xmm7,%xmm2
  1665. pxor %xmm7,%xmm7
  1666. jmp L$cbc_dec_tail_collected
  1667. .p2align 4
  1668. L$cbc_dec_seven:
  1669. movups 96(%rdi),%xmm8
  1670. xorps %xmm9,%xmm9
  1671. call _aesni_decrypt8
  1672. movups 80(%rdi),%xmm9
  1673. pxor %xmm10,%xmm2
  1674. movups 96(%rdi),%xmm10
  1675. pxor %xmm11,%xmm3
  1676. movdqu %xmm2,(%rsi)
  1677. pxor %xmm12,%xmm4
  1678. movdqu %xmm3,16(%rsi)
  1679. pxor %xmm3,%xmm3
  1680. pxor %xmm13,%xmm5
  1681. movdqu %xmm4,32(%rsi)
  1682. pxor %xmm4,%xmm4
  1683. pxor %xmm14,%xmm6
  1684. movdqu %xmm5,48(%rsi)
  1685. pxor %xmm5,%xmm5
  1686. pxor %xmm15,%xmm7
  1687. movdqu %xmm6,64(%rsi)
  1688. pxor %xmm6,%xmm6
  1689. pxor %xmm9,%xmm8
  1690. movdqu %xmm7,80(%rsi)
  1691. pxor %xmm7,%xmm7
  1692. leaq 96(%rsi),%rsi
  1693. movdqa %xmm8,%xmm2
  1694. pxor %xmm8,%xmm8
  1695. pxor %xmm9,%xmm9
  1696. jmp L$cbc_dec_tail_collected
  1697. .p2align 4
  1698. L$cbc_dec_loop6:
  1699. movups %xmm7,(%rsi)
  1700. leaq 16(%rsi),%rsi
  1701. movdqu 0(%rdi),%xmm2
  1702. movdqu 16(%rdi),%xmm3
  1703. movdqa %xmm2,%xmm11
  1704. movdqu 32(%rdi),%xmm4
  1705. movdqa %xmm3,%xmm12
  1706. movdqu 48(%rdi),%xmm5
  1707. movdqa %xmm4,%xmm13
  1708. movdqu 64(%rdi),%xmm6
  1709. movdqa %xmm5,%xmm14
  1710. movdqu 80(%rdi),%xmm7
  1711. movdqa %xmm6,%xmm15
  1712. L$cbc_dec_loop6_enter:
  1713. leaq 96(%rdi),%rdi
  1714. movdqa %xmm7,%xmm8
  1715. call _aesni_decrypt6
  1716. pxor %xmm10,%xmm2
  1717. movdqa %xmm8,%xmm10
  1718. pxor %xmm11,%xmm3
  1719. movdqu %xmm2,(%rsi)
  1720. pxor %xmm12,%xmm4
  1721. movdqu %xmm3,16(%rsi)
  1722. pxor %xmm13,%xmm5
  1723. movdqu %xmm4,32(%rsi)
  1724. pxor %xmm14,%xmm6
  1725. movq %rbp,%rcx
  1726. movdqu %xmm5,48(%rsi)
  1727. pxor %xmm15,%xmm7
  1728. movl %r10d,%eax
  1729. movdqu %xmm6,64(%rsi)
  1730. leaq 80(%rsi),%rsi
  1731. subq $0x60,%rdx
  1732. ja L$cbc_dec_loop6
  1733. movdqa %xmm7,%xmm2
  1734. addq $0x50,%rdx
  1735. jle L$cbc_dec_clear_tail_collected
  1736. movups %xmm7,(%rsi)
  1737. leaq 16(%rsi),%rsi
  1738. L$cbc_dec_tail:
  1739. movups (%rdi),%xmm2
  1740. subq $0x10,%rdx
  1741. jbe L$cbc_dec_one
  1742. movups 16(%rdi),%xmm3
  1743. movaps %xmm2,%xmm11
  1744. subq $0x10,%rdx
  1745. jbe L$cbc_dec_two
  1746. movups 32(%rdi),%xmm4
  1747. movaps %xmm3,%xmm12
  1748. subq $0x10,%rdx
  1749. jbe L$cbc_dec_three
  1750. movups 48(%rdi),%xmm5
  1751. movaps %xmm4,%xmm13
  1752. subq $0x10,%rdx
  1753. jbe L$cbc_dec_four
  1754. movups 64(%rdi),%xmm6
  1755. movaps %xmm5,%xmm14
  1756. movaps %xmm6,%xmm15
  1757. xorps %xmm7,%xmm7
  1758. call _aesni_decrypt6
  1759. pxor %xmm10,%xmm2
  1760. movaps %xmm15,%xmm10
  1761. pxor %xmm11,%xmm3
  1762. movdqu %xmm2,(%rsi)
  1763. pxor %xmm12,%xmm4
  1764. movdqu %xmm3,16(%rsi)
  1765. pxor %xmm3,%xmm3
  1766. pxor %xmm13,%xmm5
  1767. movdqu %xmm4,32(%rsi)
  1768. pxor %xmm4,%xmm4
  1769. pxor %xmm14,%xmm6
  1770. movdqu %xmm5,48(%rsi)
  1771. pxor %xmm5,%xmm5
  1772. leaq 64(%rsi),%rsi
  1773. movdqa %xmm6,%xmm2
  1774. pxor %xmm6,%xmm6
  1775. pxor %xmm7,%xmm7
  1776. subq $0x10,%rdx
  1777. jmp L$cbc_dec_tail_collected
  1778. .p2align 4
  1779. L$cbc_dec_one:
  1780. movaps %xmm2,%xmm11
  1781. movups (%rcx),%xmm0
  1782. movups 16(%rcx),%xmm1
  1783. leaq 32(%rcx),%rcx
  1784. xorps %xmm0,%xmm2
  1785. L$oop_dec1_8:
  1786. .byte 102,15,56,222,209
  1787. decl %eax
  1788. movups (%rcx),%xmm1
  1789. leaq 16(%rcx),%rcx
  1790. jnz L$oop_dec1_8
  1791. .byte 102,15,56,223,209
  1792. xorps %xmm10,%xmm2
  1793. movaps %xmm11,%xmm10
  1794. jmp L$cbc_dec_tail_collected
  1795. .p2align 4
  1796. L$cbc_dec_two:
  1797. movaps %xmm3,%xmm12
  1798. call _aesni_decrypt2
  1799. pxor %xmm10,%xmm2
  1800. movaps %xmm12,%xmm10
  1801. pxor %xmm11,%xmm3
  1802. movdqu %xmm2,(%rsi)
  1803. movdqa %xmm3,%xmm2
  1804. pxor %xmm3,%xmm3
  1805. leaq 16(%rsi),%rsi
  1806. jmp L$cbc_dec_tail_collected
  1807. .p2align 4
  1808. L$cbc_dec_three:
  1809. movaps %xmm4,%xmm13
  1810. call _aesni_decrypt3
  1811. pxor %xmm10,%xmm2
  1812. movaps %xmm13,%xmm10
  1813. pxor %xmm11,%xmm3
  1814. movdqu %xmm2,(%rsi)
  1815. pxor %xmm12,%xmm4
  1816. movdqu %xmm3,16(%rsi)
  1817. pxor %xmm3,%xmm3
  1818. movdqa %xmm4,%xmm2
  1819. pxor %xmm4,%xmm4
  1820. leaq 32(%rsi),%rsi
  1821. jmp L$cbc_dec_tail_collected
  1822. .p2align 4
  1823. L$cbc_dec_four:
  1824. movaps %xmm5,%xmm14
  1825. call _aesni_decrypt4
  1826. pxor %xmm10,%xmm2
  1827. movaps %xmm14,%xmm10
  1828. pxor %xmm11,%xmm3
  1829. movdqu %xmm2,(%rsi)
  1830. pxor %xmm12,%xmm4
  1831. movdqu %xmm3,16(%rsi)
  1832. pxor %xmm3,%xmm3
  1833. pxor %xmm13,%xmm5
  1834. movdqu %xmm4,32(%rsi)
  1835. pxor %xmm4,%xmm4
  1836. movdqa %xmm5,%xmm2
  1837. pxor %xmm5,%xmm5
  1838. leaq 48(%rsi),%rsi
  1839. jmp L$cbc_dec_tail_collected
  1840. .p2align 4
  1841. L$cbc_dec_clear_tail_collected:
  1842. pxor %xmm3,%xmm3
  1843. pxor %xmm4,%xmm4
  1844. pxor %xmm5,%xmm5
  1845. pxor %xmm6,%xmm6
  1846. pxor %xmm7,%xmm7
  1847. pxor %xmm8,%xmm8
  1848. pxor %xmm9,%xmm9
  1849. L$cbc_dec_tail_collected:
  1850. movups %xmm10,(%r8)
  1851. andq $15,%rdx
  1852. jnz L$cbc_dec_tail_partial
  1853. movups %xmm2,(%rsi)
  1854. pxor %xmm2,%xmm2
  1855. jmp L$cbc_dec_ret
  1856. .p2align 4
  1857. L$cbc_dec_tail_partial:
  1858. movaps %xmm2,(%rsp)
  1859. pxor %xmm2,%xmm2
  1860. movq $16,%rcx
  1861. movq %rsi,%rdi
  1862. subq %rdx,%rcx
  1863. leaq (%rsp),%rsi
  1864. .long 0x9066A4F3
  1865. movdqa %xmm2,(%rsp)
  1866. L$cbc_dec_ret:
  1867. xorps %xmm0,%xmm0
  1868. pxor %xmm1,%xmm1
  1869. movq -8(%r11),%rbp
  1870. leaq (%r11),%rsp
  1871. L$cbc_ret:
  1872. .byte 0xf3,0xc3
  1873. .globl _aes_hw_set_decrypt_key
  1874. .private_extern _aes_hw_set_decrypt_key
  1875. .p2align 4
  1876. _aes_hw_set_decrypt_key:
  1877. .byte 0x48,0x83,0xEC,0x08
  1878. call __aesni_set_encrypt_key
  1879. shll $4,%esi
  1880. testl %eax,%eax
  1881. jnz L$dec_key_ret
  1882. leaq 16(%rdx,%rsi,1),%rdi
  1883. movups (%rdx),%xmm0
  1884. movups (%rdi),%xmm1
  1885. movups %xmm0,(%rdi)
  1886. movups %xmm1,(%rdx)
  1887. leaq 16(%rdx),%rdx
  1888. leaq -16(%rdi),%rdi
  1889. L$dec_key_inverse:
  1890. movups (%rdx),%xmm0
  1891. movups (%rdi),%xmm1
  1892. .byte 102,15,56,219,192
  1893. .byte 102,15,56,219,201
  1894. leaq 16(%rdx),%rdx
  1895. leaq -16(%rdi),%rdi
  1896. movups %xmm0,16(%rdi)
  1897. movups %xmm1,-16(%rdx)
  1898. cmpq %rdx,%rdi
  1899. ja L$dec_key_inverse
  1900. movups (%rdx),%xmm0
  1901. .byte 102,15,56,219,192
  1902. pxor %xmm1,%xmm1
  1903. movups %xmm0,(%rdi)
  1904. pxor %xmm0,%xmm0
  1905. L$dec_key_ret:
  1906. addq $8,%rsp
  1907. .byte 0xf3,0xc3
  1908. L$SEH_end_set_decrypt_key:
  1909. .globl _aes_hw_set_encrypt_key
  1910. .private_extern _aes_hw_set_encrypt_key
  1911. .p2align 4
  1912. _aes_hw_set_encrypt_key:
  1913. __aesni_set_encrypt_key:
  1914. #ifdef BORINGSSL_DISPATCH_TEST
  1915. movb $1,_BORINGSSL_function_hit+3(%rip)
  1916. #endif
  1917. .byte 0x48,0x83,0xEC,0x08
  1918. movq $-1,%rax
  1919. testq %rdi,%rdi
  1920. jz L$enc_key_ret
  1921. testq %rdx,%rdx
  1922. jz L$enc_key_ret
  1923. movups (%rdi),%xmm0
  1924. xorps %xmm4,%xmm4
  1925. leaq _OPENSSL_ia32cap_P(%rip),%r10
  1926. movl 4(%r10),%r10d
  1927. andl $268437504,%r10d
  1928. leaq 16(%rdx),%rax
  1929. cmpl $256,%esi
  1930. je L$14rounds
  1931. cmpl $192,%esi
  1932. je L$12rounds
  1933. cmpl $128,%esi
  1934. jne L$bad_keybits
  1935. L$10rounds:
  1936. movl $9,%esi
  1937. cmpl $268435456,%r10d
  1938. je L$10rounds_alt
  1939. movups %xmm0,(%rdx)
  1940. .byte 102,15,58,223,200,1
  1941. call L$key_expansion_128_cold
  1942. .byte 102,15,58,223,200,2
  1943. call L$key_expansion_128
  1944. .byte 102,15,58,223,200,4
  1945. call L$key_expansion_128
  1946. .byte 102,15,58,223,200,8
  1947. call L$key_expansion_128
  1948. .byte 102,15,58,223,200,16
  1949. call L$key_expansion_128
  1950. .byte 102,15,58,223,200,32
  1951. call L$key_expansion_128
  1952. .byte 102,15,58,223,200,64
  1953. call L$key_expansion_128
  1954. .byte 102,15,58,223,200,128
  1955. call L$key_expansion_128
  1956. .byte 102,15,58,223,200,27
  1957. call L$key_expansion_128
  1958. .byte 102,15,58,223,200,54
  1959. call L$key_expansion_128
  1960. movups %xmm0,(%rax)
  1961. movl %esi,80(%rax)
  1962. xorl %eax,%eax
  1963. jmp L$enc_key_ret
  1964. .p2align 4
  1965. L$10rounds_alt:
  1966. movdqa L$key_rotate(%rip),%xmm5
  1967. movl $8,%r10d
  1968. movdqa L$key_rcon1(%rip),%xmm4
  1969. movdqa %xmm0,%xmm2
  1970. movdqu %xmm0,(%rdx)
  1971. jmp L$oop_key128
  1972. .p2align 4
  1973. L$oop_key128:
  1974. .byte 102,15,56,0,197
  1975. .byte 102,15,56,221,196
  1976. pslld $1,%xmm4
  1977. leaq 16(%rax),%rax
  1978. movdqa %xmm2,%xmm3
  1979. pslldq $4,%xmm2
  1980. pxor %xmm2,%xmm3
  1981. pslldq $4,%xmm2
  1982. pxor %xmm2,%xmm3
  1983. pslldq $4,%xmm2
  1984. pxor %xmm3,%xmm2
  1985. pxor %xmm2,%xmm0
  1986. movdqu %xmm0,-16(%rax)
  1987. movdqa %xmm0,%xmm2
  1988. decl %r10d
  1989. jnz L$oop_key128
  1990. movdqa L$key_rcon1b(%rip),%xmm4
  1991. .byte 102,15,56,0,197
  1992. .byte 102,15,56,221,196
  1993. pslld $1,%xmm4
  1994. movdqa %xmm2,%xmm3
  1995. pslldq $4,%xmm2
  1996. pxor %xmm2,%xmm3
  1997. pslldq $4,%xmm2
  1998. pxor %xmm2,%xmm3
  1999. pslldq $4,%xmm2
  2000. pxor %xmm3,%xmm2
  2001. pxor %xmm2,%xmm0
  2002. movdqu %xmm0,(%rax)
  2003. movdqa %xmm0,%xmm2
  2004. .byte 102,15,56,0,197
  2005. .byte 102,15,56,221,196
  2006. movdqa %xmm2,%xmm3
  2007. pslldq $4,%xmm2
  2008. pxor %xmm2,%xmm3
  2009. pslldq $4,%xmm2
  2010. pxor %xmm2,%xmm3
  2011. pslldq $4,%xmm2
  2012. pxor %xmm3,%xmm2
  2013. pxor %xmm2,%xmm0
  2014. movdqu %xmm0,16(%rax)
  2015. movl %esi,96(%rax)
  2016. xorl %eax,%eax
  2017. jmp L$enc_key_ret
  2018. .p2align 4
  2019. L$12rounds:
  2020. movq 16(%rdi),%xmm2
  2021. movl $11,%esi
  2022. cmpl $268435456,%r10d
  2023. je L$12rounds_alt
  2024. movups %xmm0,(%rdx)
  2025. .byte 102,15,58,223,202,1
  2026. call L$key_expansion_192a_cold
  2027. .byte 102,15,58,223,202,2
  2028. call L$key_expansion_192b
  2029. .byte 102,15,58,223,202,4
  2030. call L$key_expansion_192a
  2031. .byte 102,15,58,223,202,8
  2032. call L$key_expansion_192b
  2033. .byte 102,15,58,223,202,16
  2034. call L$key_expansion_192a
  2035. .byte 102,15,58,223,202,32
  2036. call L$key_expansion_192b
  2037. .byte 102,15,58,223,202,64
  2038. call L$key_expansion_192a
  2039. .byte 102,15,58,223,202,128
  2040. call L$key_expansion_192b
  2041. movups %xmm0,(%rax)
  2042. movl %esi,48(%rax)
  2043. xorq %rax,%rax
  2044. jmp L$enc_key_ret
  2045. .p2align 4
  2046. L$12rounds_alt:
  2047. movdqa L$key_rotate192(%rip),%xmm5
  2048. movdqa L$key_rcon1(%rip),%xmm4
  2049. movl $8,%r10d
  2050. movdqu %xmm0,(%rdx)
  2051. jmp L$oop_key192
  2052. .p2align 4
  2053. L$oop_key192:
  2054. movq %xmm2,0(%rax)
  2055. movdqa %xmm2,%xmm1
  2056. .byte 102,15,56,0,213
  2057. .byte 102,15,56,221,212
  2058. pslld $1,%xmm4
  2059. leaq 24(%rax),%rax
  2060. movdqa %xmm0,%xmm3
  2061. pslldq $4,%xmm0
  2062. pxor %xmm0,%xmm3
  2063. pslldq $4,%xmm0
  2064. pxor %xmm0,%xmm3
  2065. pslldq $4,%xmm0
  2066. pxor %xmm3,%xmm0
  2067. pshufd $0xff,%xmm0,%xmm3
  2068. pxor %xmm1,%xmm3
  2069. pslldq $4,%xmm1
  2070. pxor %xmm1,%xmm3
  2071. pxor %xmm2,%xmm0
  2072. pxor %xmm3,%xmm2
  2073. movdqu %xmm0,-16(%rax)
  2074. decl %r10d
  2075. jnz L$oop_key192
  2076. movl %esi,32(%rax)
  2077. xorl %eax,%eax
  2078. jmp L$enc_key_ret
  2079. .p2align 4
  2080. L$14rounds:
  2081. movups 16(%rdi),%xmm2
  2082. movl $13,%esi
  2083. leaq 16(%rax),%rax
  2084. cmpl $268435456,%r10d
  2085. je L$14rounds_alt
  2086. movups %xmm0,(%rdx)
  2087. movups %xmm2,16(%rdx)
  2088. .byte 102,15,58,223,202,1
  2089. call L$key_expansion_256a_cold
  2090. .byte 102,15,58,223,200,1
  2091. call L$key_expansion_256b
  2092. .byte 102,15,58,223,202,2
  2093. call L$key_expansion_256a
  2094. .byte 102,15,58,223,200,2
  2095. call L$key_expansion_256b
  2096. .byte 102,15,58,223,202,4
  2097. call L$key_expansion_256a
  2098. .byte 102,15,58,223,200,4
  2099. call L$key_expansion_256b
  2100. .byte 102,15,58,223,202,8
  2101. call L$key_expansion_256a
  2102. .byte 102,15,58,223,200,8
  2103. call L$key_expansion_256b
  2104. .byte 102,15,58,223,202,16
  2105. call L$key_expansion_256a
  2106. .byte 102,15,58,223,200,16
  2107. call L$key_expansion_256b
  2108. .byte 102,15,58,223,202,32
  2109. call L$key_expansion_256a
  2110. .byte 102,15,58,223,200,32
  2111. call L$key_expansion_256b
  2112. .byte 102,15,58,223,202,64
  2113. call L$key_expansion_256a
  2114. movups %xmm0,(%rax)
  2115. movl %esi,16(%rax)
  2116. xorq %rax,%rax
  2117. jmp L$enc_key_ret
  2118. .p2align 4
  2119. L$14rounds_alt:
  2120. movdqa L$key_rotate(%rip),%xmm5
  2121. movdqa L$key_rcon1(%rip),%xmm4
  2122. movl $7,%r10d
  2123. movdqu %xmm0,0(%rdx)
  2124. movdqa %xmm2,%xmm1
  2125. movdqu %xmm2,16(%rdx)
  2126. jmp L$oop_key256
  2127. .p2align 4
  2128. L$oop_key256:
  2129. .byte 102,15,56,0,213
  2130. .byte 102,15,56,221,212
  2131. movdqa %xmm0,%xmm3
  2132. pslldq $4,%xmm0
  2133. pxor %xmm0,%xmm3
  2134. pslldq $4,%xmm0
  2135. pxor %xmm0,%xmm3
  2136. pslldq $4,%xmm0
  2137. pxor %xmm3,%xmm0
  2138. pslld $1,%xmm4
  2139. pxor %xmm2,%xmm0
  2140. movdqu %xmm0,(%rax)
  2141. decl %r10d
  2142. jz L$done_key256
  2143. pshufd $0xff,%xmm0,%xmm2
  2144. pxor %xmm3,%xmm3
  2145. .byte 102,15,56,221,211
  2146. movdqa %xmm1,%xmm3
  2147. pslldq $4,%xmm1
  2148. pxor %xmm1,%xmm3
  2149. pslldq $4,%xmm1
  2150. pxor %xmm1,%xmm3
  2151. pslldq $4,%xmm1
  2152. pxor %xmm3,%xmm1
  2153. pxor %xmm1,%xmm2
  2154. movdqu %xmm2,16(%rax)
  2155. leaq 32(%rax),%rax
  2156. movdqa %xmm2,%xmm1
  2157. jmp L$oop_key256
  2158. L$done_key256:
  2159. movl %esi,16(%rax)
  2160. xorl %eax,%eax
  2161. jmp L$enc_key_ret
  2162. .p2align 4
  2163. L$bad_keybits:
  2164. movq $-2,%rax
  2165. L$enc_key_ret:
  2166. pxor %xmm0,%xmm0
  2167. pxor %xmm1,%xmm1
  2168. pxor %xmm2,%xmm2
  2169. pxor %xmm3,%xmm3
  2170. pxor %xmm4,%xmm4
  2171. pxor %xmm5,%xmm5
  2172. addq $8,%rsp
  2173. .byte 0xf3,0xc3
  2174. L$SEH_end_set_encrypt_key:
  2175. .p2align 4
  2176. L$key_expansion_128:
  2177. movups %xmm0,(%rax)
  2178. leaq 16(%rax),%rax
  2179. L$key_expansion_128_cold:
  2180. shufps $16,%xmm0,%xmm4
  2181. xorps %xmm4,%xmm0
  2182. shufps $140,%xmm0,%xmm4
  2183. xorps %xmm4,%xmm0
  2184. shufps $255,%xmm1,%xmm1
  2185. xorps %xmm1,%xmm0
  2186. .byte 0xf3,0xc3
  2187. .p2align 4
  2188. L$key_expansion_192a:
  2189. movups %xmm0,(%rax)
  2190. leaq 16(%rax),%rax
  2191. L$key_expansion_192a_cold:
  2192. movaps %xmm2,%xmm5
  2193. L$key_expansion_192b_warm:
  2194. shufps $16,%xmm0,%xmm4
  2195. movdqa %xmm2,%xmm3
  2196. xorps %xmm4,%xmm0
  2197. shufps $140,%xmm0,%xmm4
  2198. pslldq $4,%xmm3
  2199. xorps %xmm4,%xmm0
  2200. pshufd $85,%xmm1,%xmm1
  2201. pxor %xmm3,%xmm2
  2202. pxor %xmm1,%xmm0
  2203. pshufd $255,%xmm0,%xmm3
  2204. pxor %xmm3,%xmm2
  2205. .byte 0xf3,0xc3
  2206. .p2align 4
  2207. L$key_expansion_192b:
  2208. movaps %xmm0,%xmm3
  2209. shufps $68,%xmm0,%xmm5
  2210. movups %xmm5,(%rax)
  2211. shufps $78,%xmm2,%xmm3
  2212. movups %xmm3,16(%rax)
  2213. leaq 32(%rax),%rax
  2214. jmp L$key_expansion_192b_warm
  2215. .p2align 4
  2216. L$key_expansion_256a:
  2217. movups %xmm2,(%rax)
  2218. leaq 16(%rax),%rax
  2219. L$key_expansion_256a_cold:
  2220. shufps $16,%xmm0,%xmm4
  2221. xorps %xmm4,%xmm0
  2222. shufps $140,%xmm0,%xmm4
  2223. xorps %xmm4,%xmm0
  2224. shufps $255,%xmm1,%xmm1
  2225. xorps %xmm1,%xmm0
  2226. .byte 0xf3,0xc3
  2227. .p2align 4
  2228. L$key_expansion_256b:
  2229. movups %xmm0,(%rax)
  2230. leaq 16(%rax),%rax
  2231. shufps $16,%xmm2,%xmm4
  2232. xorps %xmm4,%xmm2
  2233. shufps $140,%xmm2,%xmm4
  2234. xorps %xmm4,%xmm2
  2235. shufps $170,%xmm1,%xmm1
  2236. xorps %xmm1,%xmm2
  2237. .byte 0xf3,0xc3
  2238. .p2align 6
  2239. L$bswap_mask:
  2240. .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  2241. L$increment32:
  2242. .long 6,6,6,0
  2243. L$increment64:
  2244. .long 1,0,0,0
  2245. L$xts_magic:
  2246. .long 0x87,0,1,0
  2247. L$increment1:
  2248. .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
  2249. L$key_rotate:
  2250. .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
  2251. L$key_rotate192:
  2252. .long 0x04070605,0x04070605,0x04070605,0x04070605
  2253. L$key_rcon1:
  2254. .long 1,1,1,1
  2255. L$key_rcon1b:
  2256. .long 0x1b,0x1b,0x1b,0x1b
  2257. .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
  2258. .p2align 6
  2259. #endif