aesni-x86.S 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__i386__)
  4. #if defined(BORINGSSL_PREFIX)
  5. #include <boringssl_prefix_symbols_asm.h>
  6. #endif
  7. .text
  8. #ifdef BORINGSSL_DISPATCH_TEST
  9. #endif
  10. .globl _aes_hw_encrypt
  11. .private_extern _aes_hw_encrypt
  12. .align 4
  13. _aes_hw_encrypt:
  14. L_aes_hw_encrypt_begin:
  15. #ifdef BORINGSSL_DISPATCH_TEST
  16. pushl %ebx
  17. pushl %edx
  18. call L000pic
  19. L000pic:
  20. popl %ebx
  21. leal _BORINGSSL_function_hit+1-L000pic(%ebx),%ebx
  22. movl $1,%edx
  23. movb %dl,(%ebx)
  24. popl %edx
  25. popl %ebx
  26. #endif
  27. movl 4(%esp),%eax
  28. movl 12(%esp),%edx
  29. movups (%eax),%xmm2
  30. movl 240(%edx),%ecx
  31. movl 8(%esp),%eax
  32. movups (%edx),%xmm0
  33. movups 16(%edx),%xmm1
  34. leal 32(%edx),%edx
  35. xorps %xmm0,%xmm2
  36. L001enc1_loop_1:
  37. .byte 102,15,56,220,209
  38. decl %ecx
  39. movups (%edx),%xmm1
  40. leal 16(%edx),%edx
  41. jnz L001enc1_loop_1
  42. .byte 102,15,56,221,209
  43. pxor %xmm0,%xmm0
  44. pxor %xmm1,%xmm1
  45. movups %xmm2,(%eax)
  46. pxor %xmm2,%xmm2
  47. ret
  48. .globl _aes_hw_decrypt
  49. .private_extern _aes_hw_decrypt
  50. .align 4
  51. _aes_hw_decrypt:
  52. L_aes_hw_decrypt_begin:
  53. movl 4(%esp),%eax
  54. movl 12(%esp),%edx
  55. movups (%eax),%xmm2
  56. movl 240(%edx),%ecx
  57. movl 8(%esp),%eax
  58. movups (%edx),%xmm0
  59. movups 16(%edx),%xmm1
  60. leal 32(%edx),%edx
  61. xorps %xmm0,%xmm2
  62. L002dec1_loop_2:
  63. .byte 102,15,56,222,209
  64. decl %ecx
  65. movups (%edx),%xmm1
  66. leal 16(%edx),%edx
  67. jnz L002dec1_loop_2
  68. .byte 102,15,56,223,209
  69. pxor %xmm0,%xmm0
  70. pxor %xmm1,%xmm1
  71. movups %xmm2,(%eax)
  72. pxor %xmm2,%xmm2
  73. ret
  74. .private_extern __aesni_encrypt2
  75. .align 4
  76. __aesni_encrypt2:
  77. movups (%edx),%xmm0
  78. shll $4,%ecx
  79. movups 16(%edx),%xmm1
  80. xorps %xmm0,%xmm2
  81. pxor %xmm0,%xmm3
  82. movups 32(%edx),%xmm0
  83. leal 32(%edx,%ecx,1),%edx
  84. negl %ecx
  85. addl $16,%ecx
  86. L003enc2_loop:
  87. .byte 102,15,56,220,209
  88. .byte 102,15,56,220,217
  89. movups (%edx,%ecx,1),%xmm1
  90. addl $32,%ecx
  91. .byte 102,15,56,220,208
  92. .byte 102,15,56,220,216
  93. movups -16(%edx,%ecx,1),%xmm0
  94. jnz L003enc2_loop
  95. .byte 102,15,56,220,209
  96. .byte 102,15,56,220,217
  97. .byte 102,15,56,221,208
  98. .byte 102,15,56,221,216
  99. ret
  100. .private_extern __aesni_decrypt2
  101. .align 4
  102. __aesni_decrypt2:
  103. movups (%edx),%xmm0
  104. shll $4,%ecx
  105. movups 16(%edx),%xmm1
  106. xorps %xmm0,%xmm2
  107. pxor %xmm0,%xmm3
  108. movups 32(%edx),%xmm0
  109. leal 32(%edx,%ecx,1),%edx
  110. negl %ecx
  111. addl $16,%ecx
  112. L004dec2_loop:
  113. .byte 102,15,56,222,209
  114. .byte 102,15,56,222,217
  115. movups (%edx,%ecx,1),%xmm1
  116. addl $32,%ecx
  117. .byte 102,15,56,222,208
  118. .byte 102,15,56,222,216
  119. movups -16(%edx,%ecx,1),%xmm0
  120. jnz L004dec2_loop
  121. .byte 102,15,56,222,209
  122. .byte 102,15,56,222,217
  123. .byte 102,15,56,223,208
  124. .byte 102,15,56,223,216
  125. ret
  126. .private_extern __aesni_encrypt3
  127. .align 4
  128. __aesni_encrypt3:
  129. movups (%edx),%xmm0
  130. shll $4,%ecx
  131. movups 16(%edx),%xmm1
  132. xorps %xmm0,%xmm2
  133. pxor %xmm0,%xmm3
  134. pxor %xmm0,%xmm4
  135. movups 32(%edx),%xmm0
  136. leal 32(%edx,%ecx,1),%edx
  137. negl %ecx
  138. addl $16,%ecx
  139. L005enc3_loop:
  140. .byte 102,15,56,220,209
  141. .byte 102,15,56,220,217
  142. .byte 102,15,56,220,225
  143. movups (%edx,%ecx,1),%xmm1
  144. addl $32,%ecx
  145. .byte 102,15,56,220,208
  146. .byte 102,15,56,220,216
  147. .byte 102,15,56,220,224
  148. movups -16(%edx,%ecx,1),%xmm0
  149. jnz L005enc3_loop
  150. .byte 102,15,56,220,209
  151. .byte 102,15,56,220,217
  152. .byte 102,15,56,220,225
  153. .byte 102,15,56,221,208
  154. .byte 102,15,56,221,216
  155. .byte 102,15,56,221,224
  156. ret
  157. .private_extern __aesni_decrypt3
  158. .align 4
  159. __aesni_decrypt3:
  160. movups (%edx),%xmm0
  161. shll $4,%ecx
  162. movups 16(%edx),%xmm1
  163. xorps %xmm0,%xmm2
  164. pxor %xmm0,%xmm3
  165. pxor %xmm0,%xmm4
  166. movups 32(%edx),%xmm0
  167. leal 32(%edx,%ecx,1),%edx
  168. negl %ecx
  169. addl $16,%ecx
  170. L006dec3_loop:
  171. .byte 102,15,56,222,209
  172. .byte 102,15,56,222,217
  173. .byte 102,15,56,222,225
  174. movups (%edx,%ecx,1),%xmm1
  175. addl $32,%ecx
  176. .byte 102,15,56,222,208
  177. .byte 102,15,56,222,216
  178. .byte 102,15,56,222,224
  179. movups -16(%edx,%ecx,1),%xmm0
  180. jnz L006dec3_loop
  181. .byte 102,15,56,222,209
  182. .byte 102,15,56,222,217
  183. .byte 102,15,56,222,225
  184. .byte 102,15,56,223,208
  185. .byte 102,15,56,223,216
  186. .byte 102,15,56,223,224
  187. ret
  188. .private_extern __aesni_encrypt4
  189. .align 4
  190. __aesni_encrypt4:
  191. movups (%edx),%xmm0
  192. movups 16(%edx),%xmm1
  193. shll $4,%ecx
  194. xorps %xmm0,%xmm2
  195. pxor %xmm0,%xmm3
  196. pxor %xmm0,%xmm4
  197. pxor %xmm0,%xmm5
  198. movups 32(%edx),%xmm0
  199. leal 32(%edx,%ecx,1),%edx
  200. negl %ecx
  201. .byte 15,31,64,0
  202. addl $16,%ecx
  203. L007enc4_loop:
  204. .byte 102,15,56,220,209
  205. .byte 102,15,56,220,217
  206. .byte 102,15,56,220,225
  207. .byte 102,15,56,220,233
  208. movups (%edx,%ecx,1),%xmm1
  209. addl $32,%ecx
  210. .byte 102,15,56,220,208
  211. .byte 102,15,56,220,216
  212. .byte 102,15,56,220,224
  213. .byte 102,15,56,220,232
  214. movups -16(%edx,%ecx,1),%xmm0
  215. jnz L007enc4_loop
  216. .byte 102,15,56,220,209
  217. .byte 102,15,56,220,217
  218. .byte 102,15,56,220,225
  219. .byte 102,15,56,220,233
  220. .byte 102,15,56,221,208
  221. .byte 102,15,56,221,216
  222. .byte 102,15,56,221,224
  223. .byte 102,15,56,221,232
  224. ret
  225. .private_extern __aesni_decrypt4
  226. .align 4
  227. __aesni_decrypt4:
  228. movups (%edx),%xmm0
  229. movups 16(%edx),%xmm1
  230. shll $4,%ecx
  231. xorps %xmm0,%xmm2
  232. pxor %xmm0,%xmm3
  233. pxor %xmm0,%xmm4
  234. pxor %xmm0,%xmm5
  235. movups 32(%edx),%xmm0
  236. leal 32(%edx,%ecx,1),%edx
  237. negl %ecx
  238. .byte 15,31,64,0
  239. addl $16,%ecx
  240. L008dec4_loop:
  241. .byte 102,15,56,222,209
  242. .byte 102,15,56,222,217
  243. .byte 102,15,56,222,225
  244. .byte 102,15,56,222,233
  245. movups (%edx,%ecx,1),%xmm1
  246. addl $32,%ecx
  247. .byte 102,15,56,222,208
  248. .byte 102,15,56,222,216
  249. .byte 102,15,56,222,224
  250. .byte 102,15,56,222,232
  251. movups -16(%edx,%ecx,1),%xmm0
  252. jnz L008dec4_loop
  253. .byte 102,15,56,222,209
  254. .byte 102,15,56,222,217
  255. .byte 102,15,56,222,225
  256. .byte 102,15,56,222,233
  257. .byte 102,15,56,223,208
  258. .byte 102,15,56,223,216
  259. .byte 102,15,56,223,224
  260. .byte 102,15,56,223,232
  261. ret
  262. .private_extern __aesni_encrypt6
  263. .align 4
  264. __aesni_encrypt6:
  265. movups (%edx),%xmm0
  266. shll $4,%ecx
  267. movups 16(%edx),%xmm1
  268. xorps %xmm0,%xmm2
  269. pxor %xmm0,%xmm3
  270. pxor %xmm0,%xmm4
  271. .byte 102,15,56,220,209
  272. pxor %xmm0,%xmm5
  273. pxor %xmm0,%xmm6
  274. .byte 102,15,56,220,217
  275. leal 32(%edx,%ecx,1),%edx
  276. negl %ecx
  277. .byte 102,15,56,220,225
  278. pxor %xmm0,%xmm7
  279. movups (%edx,%ecx,1),%xmm0
  280. addl $16,%ecx
  281. jmp L009_aesni_encrypt6_inner
  282. .align 4,0x90
  283. L010enc6_loop:
  284. .byte 102,15,56,220,209
  285. .byte 102,15,56,220,217
  286. .byte 102,15,56,220,225
  287. L009_aesni_encrypt6_inner:
  288. .byte 102,15,56,220,233
  289. .byte 102,15,56,220,241
  290. .byte 102,15,56,220,249
  291. L_aesni_encrypt6_enter:
  292. movups (%edx,%ecx,1),%xmm1
  293. addl $32,%ecx
  294. .byte 102,15,56,220,208
  295. .byte 102,15,56,220,216
  296. .byte 102,15,56,220,224
  297. .byte 102,15,56,220,232
  298. .byte 102,15,56,220,240
  299. .byte 102,15,56,220,248
  300. movups -16(%edx,%ecx,1),%xmm0
  301. jnz L010enc6_loop
  302. .byte 102,15,56,220,209
  303. .byte 102,15,56,220,217
  304. .byte 102,15,56,220,225
  305. .byte 102,15,56,220,233
  306. .byte 102,15,56,220,241
  307. .byte 102,15,56,220,249
  308. .byte 102,15,56,221,208
  309. .byte 102,15,56,221,216
  310. .byte 102,15,56,221,224
  311. .byte 102,15,56,221,232
  312. .byte 102,15,56,221,240
  313. .byte 102,15,56,221,248
  314. ret
  315. .private_extern __aesni_decrypt6
  316. .align 4
  317. __aesni_decrypt6:
  318. movups (%edx),%xmm0
  319. shll $4,%ecx
  320. movups 16(%edx),%xmm1
  321. xorps %xmm0,%xmm2
  322. pxor %xmm0,%xmm3
  323. pxor %xmm0,%xmm4
  324. .byte 102,15,56,222,209
  325. pxor %xmm0,%xmm5
  326. pxor %xmm0,%xmm6
  327. .byte 102,15,56,222,217
  328. leal 32(%edx,%ecx,1),%edx
  329. negl %ecx
  330. .byte 102,15,56,222,225
  331. pxor %xmm0,%xmm7
  332. movups (%edx,%ecx,1),%xmm0
  333. addl $16,%ecx
  334. jmp L011_aesni_decrypt6_inner
  335. .align 4,0x90
  336. L012dec6_loop:
  337. .byte 102,15,56,222,209
  338. .byte 102,15,56,222,217
  339. .byte 102,15,56,222,225
  340. L011_aesni_decrypt6_inner:
  341. .byte 102,15,56,222,233
  342. .byte 102,15,56,222,241
  343. .byte 102,15,56,222,249
  344. L_aesni_decrypt6_enter:
  345. movups (%edx,%ecx,1),%xmm1
  346. addl $32,%ecx
  347. .byte 102,15,56,222,208
  348. .byte 102,15,56,222,216
  349. .byte 102,15,56,222,224
  350. .byte 102,15,56,222,232
  351. .byte 102,15,56,222,240
  352. .byte 102,15,56,222,248
  353. movups -16(%edx,%ecx,1),%xmm0
  354. jnz L012dec6_loop
  355. .byte 102,15,56,222,209
  356. .byte 102,15,56,222,217
  357. .byte 102,15,56,222,225
  358. .byte 102,15,56,222,233
  359. .byte 102,15,56,222,241
  360. .byte 102,15,56,222,249
  361. .byte 102,15,56,223,208
  362. .byte 102,15,56,223,216
  363. .byte 102,15,56,223,224
  364. .byte 102,15,56,223,232
  365. .byte 102,15,56,223,240
  366. .byte 102,15,56,223,248
  367. ret
  368. .globl _aes_hw_ecb_encrypt
  369. .private_extern _aes_hw_ecb_encrypt
  370. .align 4
  371. _aes_hw_ecb_encrypt:
  372. L_aes_hw_ecb_encrypt_begin:
  373. pushl %ebp
  374. pushl %ebx
  375. pushl %esi
  376. pushl %edi
  377. movl 20(%esp),%esi
  378. movl 24(%esp),%edi
  379. movl 28(%esp),%eax
  380. movl 32(%esp),%edx
  381. movl 36(%esp),%ebx
  382. andl $-16,%eax
  383. jz L013ecb_ret
  384. movl 240(%edx),%ecx
  385. testl %ebx,%ebx
  386. jz L014ecb_decrypt
  387. movl %edx,%ebp
  388. movl %ecx,%ebx
  389. cmpl $96,%eax
  390. jb L015ecb_enc_tail
  391. movdqu (%esi),%xmm2
  392. movdqu 16(%esi),%xmm3
  393. movdqu 32(%esi),%xmm4
  394. movdqu 48(%esi),%xmm5
  395. movdqu 64(%esi),%xmm6
  396. movdqu 80(%esi),%xmm7
  397. leal 96(%esi),%esi
  398. subl $96,%eax
  399. jmp L016ecb_enc_loop6_enter
  400. .align 4,0x90
  401. L017ecb_enc_loop6:
  402. movups %xmm2,(%edi)
  403. movdqu (%esi),%xmm2
  404. movups %xmm3,16(%edi)
  405. movdqu 16(%esi),%xmm3
  406. movups %xmm4,32(%edi)
  407. movdqu 32(%esi),%xmm4
  408. movups %xmm5,48(%edi)
  409. movdqu 48(%esi),%xmm5
  410. movups %xmm6,64(%edi)
  411. movdqu 64(%esi),%xmm6
  412. movups %xmm7,80(%edi)
  413. leal 96(%edi),%edi
  414. movdqu 80(%esi),%xmm7
  415. leal 96(%esi),%esi
  416. L016ecb_enc_loop6_enter:
  417. call __aesni_encrypt6
  418. movl %ebp,%edx
  419. movl %ebx,%ecx
  420. subl $96,%eax
  421. jnc L017ecb_enc_loop6
  422. movups %xmm2,(%edi)
  423. movups %xmm3,16(%edi)
  424. movups %xmm4,32(%edi)
  425. movups %xmm5,48(%edi)
  426. movups %xmm6,64(%edi)
  427. movups %xmm7,80(%edi)
  428. leal 96(%edi),%edi
  429. addl $96,%eax
  430. jz L013ecb_ret
  431. L015ecb_enc_tail:
  432. movups (%esi),%xmm2
  433. cmpl $32,%eax
  434. jb L018ecb_enc_one
  435. movups 16(%esi),%xmm3
  436. je L019ecb_enc_two
  437. movups 32(%esi),%xmm4
  438. cmpl $64,%eax
  439. jb L020ecb_enc_three
  440. movups 48(%esi),%xmm5
  441. je L021ecb_enc_four
  442. movups 64(%esi),%xmm6
  443. xorps %xmm7,%xmm7
  444. call __aesni_encrypt6
  445. movups %xmm2,(%edi)
  446. movups %xmm3,16(%edi)
  447. movups %xmm4,32(%edi)
  448. movups %xmm5,48(%edi)
  449. movups %xmm6,64(%edi)
  450. jmp L013ecb_ret
  451. .align 4,0x90
  452. L018ecb_enc_one:
  453. movups (%edx),%xmm0
  454. movups 16(%edx),%xmm1
  455. leal 32(%edx),%edx
  456. xorps %xmm0,%xmm2
  457. L022enc1_loop_3:
  458. .byte 102,15,56,220,209
  459. decl %ecx
  460. movups (%edx),%xmm1
  461. leal 16(%edx),%edx
  462. jnz L022enc1_loop_3
  463. .byte 102,15,56,221,209
  464. movups %xmm2,(%edi)
  465. jmp L013ecb_ret
  466. .align 4,0x90
  467. L019ecb_enc_two:
  468. call __aesni_encrypt2
  469. movups %xmm2,(%edi)
  470. movups %xmm3,16(%edi)
  471. jmp L013ecb_ret
  472. .align 4,0x90
  473. L020ecb_enc_three:
  474. call __aesni_encrypt3
  475. movups %xmm2,(%edi)
  476. movups %xmm3,16(%edi)
  477. movups %xmm4,32(%edi)
  478. jmp L013ecb_ret
  479. .align 4,0x90
  480. L021ecb_enc_four:
  481. call __aesni_encrypt4
  482. movups %xmm2,(%edi)
  483. movups %xmm3,16(%edi)
  484. movups %xmm4,32(%edi)
  485. movups %xmm5,48(%edi)
  486. jmp L013ecb_ret
  487. .align 4,0x90
  488. L014ecb_decrypt:
  489. movl %edx,%ebp
  490. movl %ecx,%ebx
  491. cmpl $96,%eax
  492. jb L023ecb_dec_tail
  493. movdqu (%esi),%xmm2
  494. movdqu 16(%esi),%xmm3
  495. movdqu 32(%esi),%xmm4
  496. movdqu 48(%esi),%xmm5
  497. movdqu 64(%esi),%xmm6
  498. movdqu 80(%esi),%xmm7
  499. leal 96(%esi),%esi
  500. subl $96,%eax
  501. jmp L024ecb_dec_loop6_enter
  502. .align 4,0x90
  503. L025ecb_dec_loop6:
  504. movups %xmm2,(%edi)
  505. movdqu (%esi),%xmm2
  506. movups %xmm3,16(%edi)
  507. movdqu 16(%esi),%xmm3
  508. movups %xmm4,32(%edi)
  509. movdqu 32(%esi),%xmm4
  510. movups %xmm5,48(%edi)
  511. movdqu 48(%esi),%xmm5
  512. movups %xmm6,64(%edi)
  513. movdqu 64(%esi),%xmm6
  514. movups %xmm7,80(%edi)
  515. leal 96(%edi),%edi
  516. movdqu 80(%esi),%xmm7
  517. leal 96(%esi),%esi
  518. L024ecb_dec_loop6_enter:
  519. call __aesni_decrypt6
  520. movl %ebp,%edx
  521. movl %ebx,%ecx
  522. subl $96,%eax
  523. jnc L025ecb_dec_loop6
  524. movups %xmm2,(%edi)
  525. movups %xmm3,16(%edi)
  526. movups %xmm4,32(%edi)
  527. movups %xmm5,48(%edi)
  528. movups %xmm6,64(%edi)
  529. movups %xmm7,80(%edi)
  530. leal 96(%edi),%edi
  531. addl $96,%eax
  532. jz L013ecb_ret
  533. L023ecb_dec_tail:
  534. movups (%esi),%xmm2
  535. cmpl $32,%eax
  536. jb L026ecb_dec_one
  537. movups 16(%esi),%xmm3
  538. je L027ecb_dec_two
  539. movups 32(%esi),%xmm4
  540. cmpl $64,%eax
  541. jb L028ecb_dec_three
  542. movups 48(%esi),%xmm5
  543. je L029ecb_dec_four
  544. movups 64(%esi),%xmm6
  545. xorps %xmm7,%xmm7
  546. call __aesni_decrypt6
  547. movups %xmm2,(%edi)
  548. movups %xmm3,16(%edi)
  549. movups %xmm4,32(%edi)
  550. movups %xmm5,48(%edi)
  551. movups %xmm6,64(%edi)
  552. jmp L013ecb_ret
  553. .align 4,0x90
  554. L026ecb_dec_one:
  555. movups (%edx),%xmm0
  556. movups 16(%edx),%xmm1
  557. leal 32(%edx),%edx
  558. xorps %xmm0,%xmm2
  559. L030dec1_loop_4:
  560. .byte 102,15,56,222,209
  561. decl %ecx
  562. movups (%edx),%xmm1
  563. leal 16(%edx),%edx
  564. jnz L030dec1_loop_4
  565. .byte 102,15,56,223,209
  566. movups %xmm2,(%edi)
  567. jmp L013ecb_ret
  568. .align 4,0x90
  569. L027ecb_dec_two:
  570. call __aesni_decrypt2
  571. movups %xmm2,(%edi)
  572. movups %xmm3,16(%edi)
  573. jmp L013ecb_ret
  574. .align 4,0x90
  575. L028ecb_dec_three:
  576. call __aesni_decrypt3
  577. movups %xmm2,(%edi)
  578. movups %xmm3,16(%edi)
  579. movups %xmm4,32(%edi)
  580. jmp L013ecb_ret
  581. .align 4,0x90
  582. L029ecb_dec_four:
  583. call __aesni_decrypt4
  584. movups %xmm2,(%edi)
  585. movups %xmm3,16(%edi)
  586. movups %xmm4,32(%edi)
  587. movups %xmm5,48(%edi)
  588. L013ecb_ret:
  589. pxor %xmm0,%xmm0
  590. pxor %xmm1,%xmm1
  591. pxor %xmm2,%xmm2
  592. pxor %xmm3,%xmm3
  593. pxor %xmm4,%xmm4
  594. pxor %xmm5,%xmm5
  595. pxor %xmm6,%xmm6
  596. pxor %xmm7,%xmm7
  597. popl %edi
  598. popl %esi
  599. popl %ebx
  600. popl %ebp
  601. ret
  602. .globl _aes_hw_ccm64_encrypt_blocks
  603. .private_extern _aes_hw_ccm64_encrypt_blocks
  604. .align 4
  605. _aes_hw_ccm64_encrypt_blocks:
  606. L_aes_hw_ccm64_encrypt_blocks_begin:
  607. pushl %ebp
  608. pushl %ebx
  609. pushl %esi
  610. pushl %edi
  611. movl 20(%esp),%esi
  612. movl 24(%esp),%edi
  613. movl 28(%esp),%eax
  614. movl 32(%esp),%edx
  615. movl 36(%esp),%ebx
  616. movl 40(%esp),%ecx
  617. movl %esp,%ebp
  618. subl $60,%esp
  619. andl $-16,%esp
  620. movl %ebp,48(%esp)
  621. movdqu (%ebx),%xmm7
  622. movdqu (%ecx),%xmm3
  623. movl 240(%edx),%ecx
  624. movl $202182159,(%esp)
  625. movl $134810123,4(%esp)
  626. movl $67438087,8(%esp)
  627. movl $66051,12(%esp)
  628. movl $1,%ebx
  629. xorl %ebp,%ebp
  630. movl %ebx,16(%esp)
  631. movl %ebp,20(%esp)
  632. movl %ebp,24(%esp)
  633. movl %ebp,28(%esp)
  634. shll $4,%ecx
  635. movl $16,%ebx
  636. leal (%edx),%ebp
  637. movdqa (%esp),%xmm5
  638. movdqa %xmm7,%xmm2
  639. leal 32(%edx,%ecx,1),%edx
  640. subl %ecx,%ebx
  641. .byte 102,15,56,0,253
  642. L031ccm64_enc_outer:
  643. movups (%ebp),%xmm0
  644. movl %ebx,%ecx
  645. movups (%esi),%xmm6
  646. xorps %xmm0,%xmm2
  647. movups 16(%ebp),%xmm1
  648. xorps %xmm6,%xmm0
  649. xorps %xmm0,%xmm3
  650. movups 32(%ebp),%xmm0
  651. L032ccm64_enc2_loop:
  652. .byte 102,15,56,220,209
  653. .byte 102,15,56,220,217
  654. movups (%edx,%ecx,1),%xmm1
  655. addl $32,%ecx
  656. .byte 102,15,56,220,208
  657. .byte 102,15,56,220,216
  658. movups -16(%edx,%ecx,1),%xmm0
  659. jnz L032ccm64_enc2_loop
  660. .byte 102,15,56,220,209
  661. .byte 102,15,56,220,217
  662. paddq 16(%esp),%xmm7
  663. decl %eax
  664. .byte 102,15,56,221,208
  665. .byte 102,15,56,221,216
  666. leal 16(%esi),%esi
  667. xorps %xmm2,%xmm6
  668. movdqa %xmm7,%xmm2
  669. movups %xmm6,(%edi)
  670. .byte 102,15,56,0,213
  671. leal 16(%edi),%edi
  672. jnz L031ccm64_enc_outer
  673. movl 48(%esp),%esp
  674. movl 40(%esp),%edi
  675. movups %xmm3,(%edi)
  676. pxor %xmm0,%xmm0
  677. pxor %xmm1,%xmm1
  678. pxor %xmm2,%xmm2
  679. pxor %xmm3,%xmm3
  680. pxor %xmm4,%xmm4
  681. pxor %xmm5,%xmm5
  682. pxor %xmm6,%xmm6
  683. pxor %xmm7,%xmm7
  684. popl %edi
  685. popl %esi
  686. popl %ebx
  687. popl %ebp
  688. ret
  689. .globl _aes_hw_ccm64_decrypt_blocks
  690. .private_extern _aes_hw_ccm64_decrypt_blocks
  691. .align 4
  692. _aes_hw_ccm64_decrypt_blocks:
  693. L_aes_hw_ccm64_decrypt_blocks_begin:
  694. pushl %ebp
  695. pushl %ebx
  696. pushl %esi
  697. pushl %edi
  698. movl 20(%esp),%esi
  699. movl 24(%esp),%edi
  700. movl 28(%esp),%eax
  701. movl 32(%esp),%edx
  702. movl 36(%esp),%ebx
  703. movl 40(%esp),%ecx
  704. movl %esp,%ebp
  705. subl $60,%esp
  706. andl $-16,%esp
  707. movl %ebp,48(%esp)
  708. movdqu (%ebx),%xmm7
  709. movdqu (%ecx),%xmm3
  710. movl 240(%edx),%ecx
  711. movl $202182159,(%esp)
  712. movl $134810123,4(%esp)
  713. movl $67438087,8(%esp)
  714. movl $66051,12(%esp)
  715. movl $1,%ebx
  716. xorl %ebp,%ebp
  717. movl %ebx,16(%esp)
  718. movl %ebp,20(%esp)
  719. movl %ebp,24(%esp)
  720. movl %ebp,28(%esp)
  721. movdqa (%esp),%xmm5
  722. movdqa %xmm7,%xmm2
  723. movl %edx,%ebp
  724. movl %ecx,%ebx
  725. .byte 102,15,56,0,253
  726. movups (%edx),%xmm0
  727. movups 16(%edx),%xmm1
  728. leal 32(%edx),%edx
  729. xorps %xmm0,%xmm2
  730. L033enc1_loop_5:
  731. .byte 102,15,56,220,209
  732. decl %ecx
  733. movups (%edx),%xmm1
  734. leal 16(%edx),%edx
  735. jnz L033enc1_loop_5
  736. .byte 102,15,56,221,209
  737. shll $4,%ebx
  738. movl $16,%ecx
  739. movups (%esi),%xmm6
  740. paddq 16(%esp),%xmm7
  741. leal 16(%esi),%esi
  742. subl %ebx,%ecx
  743. leal 32(%ebp,%ebx,1),%edx
  744. movl %ecx,%ebx
  745. jmp L034ccm64_dec_outer
  746. .align 4,0x90
  747. L034ccm64_dec_outer:
  748. xorps %xmm2,%xmm6
  749. movdqa %xmm7,%xmm2
  750. movups %xmm6,(%edi)
  751. leal 16(%edi),%edi
  752. .byte 102,15,56,0,213
  753. subl $1,%eax
  754. jz L035ccm64_dec_break
  755. movups (%ebp),%xmm0
  756. movl %ebx,%ecx
  757. movups 16(%ebp),%xmm1
  758. xorps %xmm0,%xmm6
  759. xorps %xmm0,%xmm2
  760. xorps %xmm6,%xmm3
  761. movups 32(%ebp),%xmm0
  762. L036ccm64_dec2_loop:
  763. .byte 102,15,56,220,209
  764. .byte 102,15,56,220,217
  765. movups (%edx,%ecx,1),%xmm1
  766. addl $32,%ecx
  767. .byte 102,15,56,220,208
  768. .byte 102,15,56,220,216
  769. movups -16(%edx,%ecx,1),%xmm0
  770. jnz L036ccm64_dec2_loop
  771. movups (%esi),%xmm6
  772. paddq 16(%esp),%xmm7
  773. .byte 102,15,56,220,209
  774. .byte 102,15,56,220,217
  775. .byte 102,15,56,221,208
  776. .byte 102,15,56,221,216
  777. leal 16(%esi),%esi
  778. jmp L034ccm64_dec_outer
  779. .align 4,0x90
  780. L035ccm64_dec_break:
  781. movl 240(%ebp),%ecx
  782. movl %ebp,%edx
  783. movups (%edx),%xmm0
  784. movups 16(%edx),%xmm1
  785. xorps %xmm0,%xmm6
  786. leal 32(%edx),%edx
  787. xorps %xmm6,%xmm3
  788. L037enc1_loop_6:
  789. .byte 102,15,56,220,217
  790. decl %ecx
  791. movups (%edx),%xmm1
  792. leal 16(%edx),%edx
  793. jnz L037enc1_loop_6
  794. .byte 102,15,56,221,217
  795. movl 48(%esp),%esp
  796. movl 40(%esp),%edi
  797. movups %xmm3,(%edi)
  798. pxor %xmm0,%xmm0
  799. pxor %xmm1,%xmm1
  800. pxor %xmm2,%xmm2
  801. pxor %xmm3,%xmm3
  802. pxor %xmm4,%xmm4
  803. pxor %xmm5,%xmm5
  804. pxor %xmm6,%xmm6
  805. pxor %xmm7,%xmm7
  806. popl %edi
  807. popl %esi
  808. popl %ebx
  809. popl %ebp
  810. ret
  811. .globl _aes_hw_ctr32_encrypt_blocks
  812. .private_extern _aes_hw_ctr32_encrypt_blocks
  813. .align 4
  814. _aes_hw_ctr32_encrypt_blocks:
  815. L_aes_hw_ctr32_encrypt_blocks_begin:
  816. pushl %ebp
  817. pushl %ebx
  818. pushl %esi
  819. pushl %edi
  820. #ifdef BORINGSSL_DISPATCH_TEST
  821. pushl %ebx
  822. pushl %edx
  823. call L038pic
  824. L038pic:
  825. popl %ebx
  826. leal _BORINGSSL_function_hit+0-L038pic(%ebx),%ebx
  827. movl $1,%edx
  828. movb %dl,(%ebx)
  829. popl %edx
  830. popl %ebx
  831. #endif
  832. movl 20(%esp),%esi
  833. movl 24(%esp),%edi
  834. movl 28(%esp),%eax
  835. movl 32(%esp),%edx
  836. movl 36(%esp),%ebx
  837. movl %esp,%ebp
  838. subl $88,%esp
  839. andl $-16,%esp
  840. movl %ebp,80(%esp)
  841. cmpl $1,%eax
  842. je L039ctr32_one_shortcut
  843. movdqu (%ebx),%xmm7
  844. movl $202182159,(%esp)
  845. movl $134810123,4(%esp)
  846. movl $67438087,8(%esp)
  847. movl $66051,12(%esp)
  848. movl $6,%ecx
  849. xorl %ebp,%ebp
  850. movl %ecx,16(%esp)
  851. movl %ecx,20(%esp)
  852. movl %ecx,24(%esp)
  853. movl %ebp,28(%esp)
  854. .byte 102,15,58,22,251,3
  855. .byte 102,15,58,34,253,3
  856. movl 240(%edx),%ecx
  857. bswap %ebx
  858. pxor %xmm0,%xmm0
  859. pxor %xmm1,%xmm1
  860. movdqa (%esp),%xmm2
  861. .byte 102,15,58,34,195,0
  862. leal 3(%ebx),%ebp
  863. .byte 102,15,58,34,205,0
  864. incl %ebx
  865. .byte 102,15,58,34,195,1
  866. incl %ebp
  867. .byte 102,15,58,34,205,1
  868. incl %ebx
  869. .byte 102,15,58,34,195,2
  870. incl %ebp
  871. .byte 102,15,58,34,205,2
  872. movdqa %xmm0,48(%esp)
  873. .byte 102,15,56,0,194
  874. movdqu (%edx),%xmm6
  875. movdqa %xmm1,64(%esp)
  876. .byte 102,15,56,0,202
  877. pshufd $192,%xmm0,%xmm2
  878. pshufd $128,%xmm0,%xmm3
  879. cmpl $6,%eax
  880. jb L040ctr32_tail
  881. pxor %xmm6,%xmm7
  882. shll $4,%ecx
  883. movl $16,%ebx
  884. movdqa %xmm7,32(%esp)
  885. movl %edx,%ebp
  886. subl %ecx,%ebx
  887. leal 32(%edx,%ecx,1),%edx
  888. subl $6,%eax
  889. jmp L041ctr32_loop6
  890. .align 4,0x90
  891. L041ctr32_loop6:
  892. pshufd $64,%xmm0,%xmm4
  893. movdqa 32(%esp),%xmm0
  894. pshufd $192,%xmm1,%xmm5
  895. pxor %xmm0,%xmm2
  896. pshufd $128,%xmm1,%xmm6
  897. pxor %xmm0,%xmm3
  898. pshufd $64,%xmm1,%xmm7
  899. movups 16(%ebp),%xmm1
  900. pxor %xmm0,%xmm4
  901. pxor %xmm0,%xmm5
  902. .byte 102,15,56,220,209
  903. pxor %xmm0,%xmm6
  904. pxor %xmm0,%xmm7
  905. .byte 102,15,56,220,217
  906. movups 32(%ebp),%xmm0
  907. movl %ebx,%ecx
  908. .byte 102,15,56,220,225
  909. .byte 102,15,56,220,233
  910. .byte 102,15,56,220,241
  911. .byte 102,15,56,220,249
  912. call L_aesni_encrypt6_enter
  913. movups (%esi),%xmm1
  914. movups 16(%esi),%xmm0
  915. xorps %xmm1,%xmm2
  916. movups 32(%esi),%xmm1
  917. xorps %xmm0,%xmm3
  918. movups %xmm2,(%edi)
  919. movdqa 16(%esp),%xmm0
  920. xorps %xmm1,%xmm4
  921. movdqa 64(%esp),%xmm1
  922. movups %xmm3,16(%edi)
  923. movups %xmm4,32(%edi)
  924. paddd %xmm0,%xmm1
  925. paddd 48(%esp),%xmm0
  926. movdqa (%esp),%xmm2
  927. movups 48(%esi),%xmm3
  928. movups 64(%esi),%xmm4
  929. xorps %xmm3,%xmm5
  930. movups 80(%esi),%xmm3
  931. leal 96(%esi),%esi
  932. movdqa %xmm0,48(%esp)
  933. .byte 102,15,56,0,194
  934. xorps %xmm4,%xmm6
  935. movups %xmm5,48(%edi)
  936. xorps %xmm3,%xmm7
  937. movdqa %xmm1,64(%esp)
  938. .byte 102,15,56,0,202
  939. movups %xmm6,64(%edi)
  940. pshufd $192,%xmm0,%xmm2
  941. movups %xmm7,80(%edi)
  942. leal 96(%edi),%edi
  943. pshufd $128,%xmm0,%xmm3
  944. subl $6,%eax
  945. jnc L041ctr32_loop6
  946. addl $6,%eax
  947. jz L042ctr32_ret
  948. movdqu (%ebp),%xmm7
  949. movl %ebp,%edx
  950. pxor 32(%esp),%xmm7
  951. movl 240(%ebp),%ecx
  952. L040ctr32_tail:
  953. por %xmm7,%xmm2
  954. cmpl $2,%eax
  955. jb L043ctr32_one
  956. pshufd $64,%xmm0,%xmm4
  957. por %xmm7,%xmm3
  958. je L044ctr32_two
  959. pshufd $192,%xmm1,%xmm5
  960. por %xmm7,%xmm4
  961. cmpl $4,%eax
  962. jb L045ctr32_three
  963. pshufd $128,%xmm1,%xmm6
  964. por %xmm7,%xmm5
  965. je L046ctr32_four
  966. por %xmm7,%xmm6
  967. call __aesni_encrypt6
  968. movups (%esi),%xmm1
  969. movups 16(%esi),%xmm0
  970. xorps %xmm1,%xmm2
  971. movups 32(%esi),%xmm1
  972. xorps %xmm0,%xmm3
  973. movups 48(%esi),%xmm0
  974. xorps %xmm1,%xmm4
  975. movups 64(%esi),%xmm1
  976. xorps %xmm0,%xmm5
  977. movups %xmm2,(%edi)
  978. xorps %xmm1,%xmm6
  979. movups %xmm3,16(%edi)
  980. movups %xmm4,32(%edi)
  981. movups %xmm5,48(%edi)
  982. movups %xmm6,64(%edi)
  983. jmp L042ctr32_ret
  984. .align 4,0x90
  985. L039ctr32_one_shortcut:
  986. movups (%ebx),%xmm2
  987. movl 240(%edx),%ecx
  988. L043ctr32_one:
  989. movups (%edx),%xmm0
  990. movups 16(%edx),%xmm1
  991. leal 32(%edx),%edx
  992. xorps %xmm0,%xmm2
  993. L047enc1_loop_7:
  994. .byte 102,15,56,220,209
  995. decl %ecx
  996. movups (%edx),%xmm1
  997. leal 16(%edx),%edx
  998. jnz L047enc1_loop_7
  999. .byte 102,15,56,221,209
  1000. movups (%esi),%xmm6
  1001. xorps %xmm2,%xmm6
  1002. movups %xmm6,(%edi)
  1003. jmp L042ctr32_ret
  1004. .align 4,0x90
  1005. L044ctr32_two:
  1006. call __aesni_encrypt2
  1007. movups (%esi),%xmm5
  1008. movups 16(%esi),%xmm6
  1009. xorps %xmm5,%xmm2
  1010. xorps %xmm6,%xmm3
  1011. movups %xmm2,(%edi)
  1012. movups %xmm3,16(%edi)
  1013. jmp L042ctr32_ret
  1014. .align 4,0x90
  1015. L045ctr32_three:
  1016. call __aesni_encrypt3
  1017. movups (%esi),%xmm5
  1018. movups 16(%esi),%xmm6
  1019. xorps %xmm5,%xmm2
  1020. movups 32(%esi),%xmm7
  1021. xorps %xmm6,%xmm3
  1022. movups %xmm2,(%edi)
  1023. xorps %xmm7,%xmm4
  1024. movups %xmm3,16(%edi)
  1025. movups %xmm4,32(%edi)
  1026. jmp L042ctr32_ret
  1027. .align 4,0x90
  1028. L046ctr32_four:
  1029. call __aesni_encrypt4
  1030. movups (%esi),%xmm6
  1031. movups 16(%esi),%xmm7
  1032. movups 32(%esi),%xmm1
  1033. xorps %xmm6,%xmm2
  1034. movups 48(%esi),%xmm0
  1035. xorps %xmm7,%xmm3
  1036. movups %xmm2,(%edi)
  1037. xorps %xmm1,%xmm4
  1038. movups %xmm3,16(%edi)
  1039. xorps %xmm0,%xmm5
  1040. movups %xmm4,32(%edi)
  1041. movups %xmm5,48(%edi)
  1042. L042ctr32_ret:
  1043. pxor %xmm0,%xmm0
  1044. pxor %xmm1,%xmm1
  1045. pxor %xmm2,%xmm2
  1046. pxor %xmm3,%xmm3
  1047. pxor %xmm4,%xmm4
  1048. movdqa %xmm0,32(%esp)
  1049. pxor %xmm5,%xmm5
  1050. movdqa %xmm0,48(%esp)
  1051. pxor %xmm6,%xmm6
  1052. movdqa %xmm0,64(%esp)
  1053. pxor %xmm7,%xmm7
  1054. movl 80(%esp),%esp
  1055. popl %edi
  1056. popl %esi
  1057. popl %ebx
  1058. popl %ebp
  1059. ret
  1060. .globl _aes_hw_xts_encrypt
  1061. .private_extern _aes_hw_xts_encrypt
  1062. .align 4
  1063. _aes_hw_xts_encrypt:
  1064. L_aes_hw_xts_encrypt_begin:
  1065. pushl %ebp
  1066. pushl %ebx
  1067. pushl %esi
  1068. pushl %edi
  1069. movl 36(%esp),%edx
  1070. movl 40(%esp),%esi
  1071. movl 240(%edx),%ecx
  1072. movups (%esi),%xmm2
  1073. movups (%edx),%xmm0
  1074. movups 16(%edx),%xmm1
  1075. leal 32(%edx),%edx
  1076. xorps %xmm0,%xmm2
  1077. L048enc1_loop_8:
  1078. .byte 102,15,56,220,209
  1079. decl %ecx
  1080. movups (%edx),%xmm1
  1081. leal 16(%edx),%edx
  1082. jnz L048enc1_loop_8
  1083. .byte 102,15,56,221,209
  1084. movl 20(%esp),%esi
  1085. movl 24(%esp),%edi
  1086. movl 28(%esp),%eax
  1087. movl 32(%esp),%edx
  1088. movl %esp,%ebp
  1089. subl $120,%esp
  1090. movl 240(%edx),%ecx
  1091. andl $-16,%esp
  1092. movl $135,96(%esp)
  1093. movl $0,100(%esp)
  1094. movl $1,104(%esp)
  1095. movl $0,108(%esp)
  1096. movl %eax,112(%esp)
  1097. movl %ebp,116(%esp)
  1098. movdqa %xmm2,%xmm1
  1099. pxor %xmm0,%xmm0
  1100. movdqa 96(%esp),%xmm3
  1101. pcmpgtd %xmm1,%xmm0
  1102. andl $-16,%eax
  1103. movl %edx,%ebp
  1104. movl %ecx,%ebx
  1105. subl $96,%eax
  1106. jc L049xts_enc_short
  1107. shll $4,%ecx
  1108. movl $16,%ebx
  1109. subl %ecx,%ebx
  1110. leal 32(%edx,%ecx,1),%edx
  1111. jmp L050xts_enc_loop6
  1112. .align 4,0x90
  1113. L050xts_enc_loop6:
  1114. pshufd $19,%xmm0,%xmm2
  1115. pxor %xmm0,%xmm0
  1116. movdqa %xmm1,(%esp)
  1117. paddq %xmm1,%xmm1
  1118. pand %xmm3,%xmm2
  1119. pcmpgtd %xmm1,%xmm0
  1120. pxor %xmm2,%xmm1
  1121. pshufd $19,%xmm0,%xmm2
  1122. pxor %xmm0,%xmm0
  1123. movdqa %xmm1,16(%esp)
  1124. paddq %xmm1,%xmm1
  1125. pand %xmm3,%xmm2
  1126. pcmpgtd %xmm1,%xmm0
  1127. pxor %xmm2,%xmm1
  1128. pshufd $19,%xmm0,%xmm2
  1129. pxor %xmm0,%xmm0
  1130. movdqa %xmm1,32(%esp)
  1131. paddq %xmm1,%xmm1
  1132. pand %xmm3,%xmm2
  1133. pcmpgtd %xmm1,%xmm0
  1134. pxor %xmm2,%xmm1
  1135. pshufd $19,%xmm0,%xmm2
  1136. pxor %xmm0,%xmm0
  1137. movdqa %xmm1,48(%esp)
  1138. paddq %xmm1,%xmm1
  1139. pand %xmm3,%xmm2
  1140. pcmpgtd %xmm1,%xmm0
  1141. pxor %xmm2,%xmm1
  1142. pshufd $19,%xmm0,%xmm7
  1143. movdqa %xmm1,64(%esp)
  1144. paddq %xmm1,%xmm1
  1145. movups (%ebp),%xmm0
  1146. pand %xmm3,%xmm7
  1147. movups (%esi),%xmm2
  1148. pxor %xmm1,%xmm7
  1149. movl %ebx,%ecx
  1150. movdqu 16(%esi),%xmm3
  1151. xorps %xmm0,%xmm2
  1152. movdqu 32(%esi),%xmm4
  1153. pxor %xmm0,%xmm3
  1154. movdqu 48(%esi),%xmm5
  1155. pxor %xmm0,%xmm4
  1156. movdqu 64(%esi),%xmm6
  1157. pxor %xmm0,%xmm5
  1158. movdqu 80(%esi),%xmm1
  1159. pxor %xmm0,%xmm6
  1160. leal 96(%esi),%esi
  1161. pxor (%esp),%xmm2
  1162. movdqa %xmm7,80(%esp)
  1163. pxor %xmm1,%xmm7
  1164. movups 16(%ebp),%xmm1
  1165. pxor 16(%esp),%xmm3
  1166. pxor 32(%esp),%xmm4
  1167. .byte 102,15,56,220,209
  1168. pxor 48(%esp),%xmm5
  1169. pxor 64(%esp),%xmm6
  1170. .byte 102,15,56,220,217
  1171. pxor %xmm0,%xmm7
  1172. movups 32(%ebp),%xmm0
  1173. .byte 102,15,56,220,225
  1174. .byte 102,15,56,220,233
  1175. .byte 102,15,56,220,241
  1176. .byte 102,15,56,220,249
  1177. call L_aesni_encrypt6_enter
  1178. movdqa 80(%esp),%xmm1
  1179. pxor %xmm0,%xmm0
  1180. xorps (%esp),%xmm2
  1181. pcmpgtd %xmm1,%xmm0
  1182. xorps 16(%esp),%xmm3
  1183. movups %xmm2,(%edi)
  1184. xorps 32(%esp),%xmm4
  1185. movups %xmm3,16(%edi)
  1186. xorps 48(%esp),%xmm5
  1187. movups %xmm4,32(%edi)
  1188. xorps 64(%esp),%xmm6
  1189. movups %xmm5,48(%edi)
  1190. xorps %xmm1,%xmm7
  1191. movups %xmm6,64(%edi)
  1192. pshufd $19,%xmm0,%xmm2
  1193. movups %xmm7,80(%edi)
  1194. leal 96(%edi),%edi
  1195. movdqa 96(%esp),%xmm3
  1196. pxor %xmm0,%xmm0
  1197. paddq %xmm1,%xmm1
  1198. pand %xmm3,%xmm2
  1199. pcmpgtd %xmm1,%xmm0
  1200. pxor %xmm2,%xmm1
  1201. subl $96,%eax
  1202. jnc L050xts_enc_loop6
  1203. movl 240(%ebp),%ecx
  1204. movl %ebp,%edx
  1205. movl %ecx,%ebx
  1206. L049xts_enc_short:
  1207. addl $96,%eax
  1208. jz L051xts_enc_done6x
  1209. movdqa %xmm1,%xmm5
  1210. cmpl $32,%eax
  1211. jb L052xts_enc_one
  1212. pshufd $19,%xmm0,%xmm2
  1213. pxor %xmm0,%xmm0
  1214. paddq %xmm1,%xmm1
  1215. pand %xmm3,%xmm2
  1216. pcmpgtd %xmm1,%xmm0
  1217. pxor %xmm2,%xmm1
  1218. je L053xts_enc_two
  1219. pshufd $19,%xmm0,%xmm2
  1220. pxor %xmm0,%xmm0
  1221. movdqa %xmm1,%xmm6
  1222. paddq %xmm1,%xmm1
  1223. pand %xmm3,%xmm2
  1224. pcmpgtd %xmm1,%xmm0
  1225. pxor %xmm2,%xmm1
  1226. cmpl $64,%eax
  1227. jb L054xts_enc_three
  1228. pshufd $19,%xmm0,%xmm2
  1229. pxor %xmm0,%xmm0
  1230. movdqa %xmm1,%xmm7
  1231. paddq %xmm1,%xmm1
  1232. pand %xmm3,%xmm2
  1233. pcmpgtd %xmm1,%xmm0
  1234. pxor %xmm2,%xmm1
  1235. movdqa %xmm5,(%esp)
  1236. movdqa %xmm6,16(%esp)
  1237. je L055xts_enc_four
  1238. movdqa %xmm7,32(%esp)
  1239. pshufd $19,%xmm0,%xmm7
  1240. movdqa %xmm1,48(%esp)
  1241. paddq %xmm1,%xmm1
  1242. pand %xmm3,%xmm7
  1243. pxor %xmm1,%xmm7
  1244. movdqu (%esi),%xmm2
  1245. movdqu 16(%esi),%xmm3
  1246. movdqu 32(%esi),%xmm4
  1247. pxor (%esp),%xmm2
  1248. movdqu 48(%esi),%xmm5
  1249. pxor 16(%esp),%xmm3
  1250. movdqu 64(%esi),%xmm6
  1251. pxor 32(%esp),%xmm4
  1252. leal 80(%esi),%esi
  1253. pxor 48(%esp),%xmm5
  1254. movdqa %xmm7,64(%esp)
  1255. pxor %xmm7,%xmm6
  1256. call __aesni_encrypt6
  1257. movaps 64(%esp),%xmm1
  1258. xorps (%esp),%xmm2
  1259. xorps 16(%esp),%xmm3
  1260. xorps 32(%esp),%xmm4
  1261. movups %xmm2,(%edi)
  1262. xorps 48(%esp),%xmm5
  1263. movups %xmm3,16(%edi)
  1264. xorps %xmm1,%xmm6
  1265. movups %xmm4,32(%edi)
  1266. movups %xmm5,48(%edi)
  1267. movups %xmm6,64(%edi)
  1268. leal 80(%edi),%edi
  1269. jmp L056xts_enc_done
  1270. .align 4,0x90
  1271. L052xts_enc_one:
  1272. movups (%esi),%xmm2
  1273. leal 16(%esi),%esi
  1274. xorps %xmm5,%xmm2
  1275. movups (%edx),%xmm0
  1276. movups 16(%edx),%xmm1
  1277. leal 32(%edx),%edx
  1278. xorps %xmm0,%xmm2
  1279. L057enc1_loop_9:
  1280. .byte 102,15,56,220,209
  1281. decl %ecx
  1282. movups (%edx),%xmm1
  1283. leal 16(%edx),%edx
  1284. jnz L057enc1_loop_9
  1285. .byte 102,15,56,221,209
  1286. xorps %xmm5,%xmm2
  1287. movups %xmm2,(%edi)
  1288. leal 16(%edi),%edi
  1289. movdqa %xmm5,%xmm1
  1290. jmp L056xts_enc_done
  1291. .align 4,0x90
  1292. L053xts_enc_two:
  1293. movaps %xmm1,%xmm6
  1294. movups (%esi),%xmm2
  1295. movups 16(%esi),%xmm3
  1296. leal 32(%esi),%esi
  1297. xorps %xmm5,%xmm2
  1298. xorps %xmm6,%xmm3
  1299. call __aesni_encrypt2
  1300. xorps %xmm5,%xmm2
  1301. xorps %xmm6,%xmm3
  1302. movups %xmm2,(%edi)
  1303. movups %xmm3,16(%edi)
  1304. leal 32(%edi),%edi
  1305. movdqa %xmm6,%xmm1
  1306. jmp L056xts_enc_done
  1307. .align 4,0x90
  1308. L054xts_enc_three:
  1309. movaps %xmm1,%xmm7
  1310. movups (%esi),%xmm2
  1311. movups 16(%esi),%xmm3
  1312. movups 32(%esi),%xmm4
  1313. leal 48(%esi),%esi
  1314. xorps %xmm5,%xmm2
  1315. xorps %xmm6,%xmm3
  1316. xorps %xmm7,%xmm4
  1317. call __aesni_encrypt3
  1318. xorps %xmm5,%xmm2
  1319. xorps %xmm6,%xmm3
  1320. xorps %xmm7,%xmm4
  1321. movups %xmm2,(%edi)
  1322. movups %xmm3,16(%edi)
  1323. movups %xmm4,32(%edi)
  1324. leal 48(%edi),%edi
  1325. movdqa %xmm7,%xmm1
  1326. jmp L056xts_enc_done
  1327. .align 4,0x90
  1328. L055xts_enc_four:
  1329. movaps %xmm1,%xmm6
  1330. movups (%esi),%xmm2
  1331. movups 16(%esi),%xmm3
  1332. movups 32(%esi),%xmm4
  1333. xorps (%esp),%xmm2
  1334. movups 48(%esi),%xmm5
  1335. leal 64(%esi),%esi
  1336. xorps 16(%esp),%xmm3
  1337. xorps %xmm7,%xmm4
  1338. xorps %xmm6,%xmm5
  1339. call __aesni_encrypt4
  1340. xorps (%esp),%xmm2
  1341. xorps 16(%esp),%xmm3
  1342. xorps %xmm7,%xmm4
  1343. movups %xmm2,(%edi)
  1344. xorps %xmm6,%xmm5
  1345. movups %xmm3,16(%edi)
  1346. movups %xmm4,32(%edi)
  1347. movups %xmm5,48(%edi)
  1348. leal 64(%edi),%edi
  1349. movdqa %xmm6,%xmm1
  1350. jmp L056xts_enc_done
  1351. .align 4,0x90
  1352. L051xts_enc_done6x:
  1353. movl 112(%esp),%eax
  1354. andl $15,%eax
  1355. jz L058xts_enc_ret
  1356. movdqa %xmm1,%xmm5
  1357. movl %eax,112(%esp)
  1358. jmp L059xts_enc_steal
  1359. .align 4,0x90
  1360. L056xts_enc_done:
  1361. movl 112(%esp),%eax
  1362. pxor %xmm0,%xmm0
  1363. andl $15,%eax
  1364. jz L058xts_enc_ret
  1365. pcmpgtd %xmm1,%xmm0
  1366. movl %eax,112(%esp)
  1367. pshufd $19,%xmm0,%xmm5
  1368. paddq %xmm1,%xmm1
  1369. pand 96(%esp),%xmm5
  1370. pxor %xmm1,%xmm5
  1371. L059xts_enc_steal:
  1372. movzbl (%esi),%ecx
  1373. movzbl -16(%edi),%edx
  1374. leal 1(%esi),%esi
  1375. movb %cl,-16(%edi)
  1376. movb %dl,(%edi)
  1377. leal 1(%edi),%edi
  1378. subl $1,%eax
  1379. jnz L059xts_enc_steal
  1380. subl 112(%esp),%edi
  1381. movl %ebp,%edx
  1382. movl %ebx,%ecx
  1383. movups -16(%edi),%xmm2
  1384. xorps %xmm5,%xmm2
  1385. movups (%edx),%xmm0
  1386. movups 16(%edx),%xmm1
  1387. leal 32(%edx),%edx
  1388. xorps %xmm0,%xmm2
  1389. L060enc1_loop_10:
  1390. .byte 102,15,56,220,209
  1391. decl %ecx
  1392. movups (%edx),%xmm1
  1393. leal 16(%edx),%edx
  1394. jnz L060enc1_loop_10
  1395. .byte 102,15,56,221,209
  1396. xorps %xmm5,%xmm2
  1397. movups %xmm2,-16(%edi)
  1398. L058xts_enc_ret:
  1399. pxor %xmm0,%xmm0
  1400. pxor %xmm1,%xmm1
  1401. pxor %xmm2,%xmm2
  1402. movdqa %xmm0,(%esp)
  1403. pxor %xmm3,%xmm3
  1404. movdqa %xmm0,16(%esp)
  1405. pxor %xmm4,%xmm4
  1406. movdqa %xmm0,32(%esp)
  1407. pxor %xmm5,%xmm5
  1408. movdqa %xmm0,48(%esp)
  1409. pxor %xmm6,%xmm6
  1410. movdqa %xmm0,64(%esp)
  1411. pxor %xmm7,%xmm7
  1412. movdqa %xmm0,80(%esp)
  1413. movl 116(%esp),%esp
  1414. popl %edi
  1415. popl %esi
  1416. popl %ebx
  1417. popl %ebp
  1418. ret
  1419. .globl _aes_hw_xts_decrypt
  1420. .private_extern _aes_hw_xts_decrypt
  1421. .align 4
  1422. _aes_hw_xts_decrypt:
  1423. L_aes_hw_xts_decrypt_begin:
  1424. pushl %ebp
  1425. pushl %ebx
  1426. pushl %esi
  1427. pushl %edi
  1428. movl 36(%esp),%edx
  1429. movl 40(%esp),%esi
  1430. movl 240(%edx),%ecx
  1431. movups (%esi),%xmm2
  1432. movups (%edx),%xmm0
  1433. movups 16(%edx),%xmm1
  1434. leal 32(%edx),%edx
  1435. xorps %xmm0,%xmm2
  1436. L061enc1_loop_11:
  1437. .byte 102,15,56,220,209
  1438. decl %ecx
  1439. movups (%edx),%xmm1
  1440. leal 16(%edx),%edx
  1441. jnz L061enc1_loop_11
  1442. .byte 102,15,56,221,209
  1443. movl 20(%esp),%esi
  1444. movl 24(%esp),%edi
  1445. movl 28(%esp),%eax
  1446. movl 32(%esp),%edx
  1447. movl %esp,%ebp
  1448. subl $120,%esp
  1449. andl $-16,%esp
  1450. xorl %ebx,%ebx
  1451. testl $15,%eax
  1452. setnz %bl
  1453. shll $4,%ebx
  1454. subl %ebx,%eax
  1455. movl $135,96(%esp)
  1456. movl $0,100(%esp)
  1457. movl $1,104(%esp)
  1458. movl $0,108(%esp)
  1459. movl %eax,112(%esp)
  1460. movl %ebp,116(%esp)
  1461. movl 240(%edx),%ecx
  1462. movl %edx,%ebp
  1463. movl %ecx,%ebx
  1464. movdqa %xmm2,%xmm1
  1465. pxor %xmm0,%xmm0
  1466. movdqa 96(%esp),%xmm3
  1467. pcmpgtd %xmm1,%xmm0
  1468. andl $-16,%eax
  1469. subl $96,%eax
  1470. jc L062xts_dec_short
  1471. shll $4,%ecx
  1472. movl $16,%ebx
  1473. subl %ecx,%ebx
  1474. leal 32(%edx,%ecx,1),%edx
  1475. jmp L063xts_dec_loop6
  1476. .align 4,0x90
  1477. L063xts_dec_loop6:
  1478. pshufd $19,%xmm0,%xmm2
  1479. pxor %xmm0,%xmm0
  1480. movdqa %xmm1,(%esp)
  1481. paddq %xmm1,%xmm1
  1482. pand %xmm3,%xmm2
  1483. pcmpgtd %xmm1,%xmm0
  1484. pxor %xmm2,%xmm1
  1485. pshufd $19,%xmm0,%xmm2
  1486. pxor %xmm0,%xmm0
  1487. movdqa %xmm1,16(%esp)
  1488. paddq %xmm1,%xmm1
  1489. pand %xmm3,%xmm2
  1490. pcmpgtd %xmm1,%xmm0
  1491. pxor %xmm2,%xmm1
  1492. pshufd $19,%xmm0,%xmm2
  1493. pxor %xmm0,%xmm0
  1494. movdqa %xmm1,32(%esp)
  1495. paddq %xmm1,%xmm1
  1496. pand %xmm3,%xmm2
  1497. pcmpgtd %xmm1,%xmm0
  1498. pxor %xmm2,%xmm1
  1499. pshufd $19,%xmm0,%xmm2
  1500. pxor %xmm0,%xmm0
  1501. movdqa %xmm1,48(%esp)
  1502. paddq %xmm1,%xmm1
  1503. pand %xmm3,%xmm2
  1504. pcmpgtd %xmm1,%xmm0
  1505. pxor %xmm2,%xmm1
  1506. pshufd $19,%xmm0,%xmm7
  1507. movdqa %xmm1,64(%esp)
  1508. paddq %xmm1,%xmm1
  1509. movups (%ebp),%xmm0
  1510. pand %xmm3,%xmm7
  1511. movups (%esi),%xmm2
  1512. pxor %xmm1,%xmm7
  1513. movl %ebx,%ecx
  1514. movdqu 16(%esi),%xmm3
  1515. xorps %xmm0,%xmm2
  1516. movdqu 32(%esi),%xmm4
  1517. pxor %xmm0,%xmm3
  1518. movdqu 48(%esi),%xmm5
  1519. pxor %xmm0,%xmm4
  1520. movdqu 64(%esi),%xmm6
  1521. pxor %xmm0,%xmm5
  1522. movdqu 80(%esi),%xmm1
  1523. pxor %xmm0,%xmm6
  1524. leal 96(%esi),%esi
  1525. pxor (%esp),%xmm2
  1526. movdqa %xmm7,80(%esp)
  1527. pxor %xmm1,%xmm7
  1528. movups 16(%ebp),%xmm1
  1529. pxor 16(%esp),%xmm3
  1530. pxor 32(%esp),%xmm4
  1531. .byte 102,15,56,222,209
  1532. pxor 48(%esp),%xmm5
  1533. pxor 64(%esp),%xmm6
  1534. .byte 102,15,56,222,217
  1535. pxor %xmm0,%xmm7
  1536. movups 32(%ebp),%xmm0
  1537. .byte 102,15,56,222,225
  1538. .byte 102,15,56,222,233
  1539. .byte 102,15,56,222,241
  1540. .byte 102,15,56,222,249
  1541. call L_aesni_decrypt6_enter
  1542. movdqa 80(%esp),%xmm1
  1543. pxor %xmm0,%xmm0
  1544. xorps (%esp),%xmm2
  1545. pcmpgtd %xmm1,%xmm0
  1546. xorps 16(%esp),%xmm3
  1547. movups %xmm2,(%edi)
  1548. xorps 32(%esp),%xmm4
  1549. movups %xmm3,16(%edi)
  1550. xorps 48(%esp),%xmm5
  1551. movups %xmm4,32(%edi)
  1552. xorps 64(%esp),%xmm6
  1553. movups %xmm5,48(%edi)
  1554. xorps %xmm1,%xmm7
  1555. movups %xmm6,64(%edi)
  1556. pshufd $19,%xmm0,%xmm2
  1557. movups %xmm7,80(%edi)
  1558. leal 96(%edi),%edi
  1559. movdqa 96(%esp),%xmm3
  1560. pxor %xmm0,%xmm0
  1561. paddq %xmm1,%xmm1
  1562. pand %xmm3,%xmm2
  1563. pcmpgtd %xmm1,%xmm0
  1564. pxor %xmm2,%xmm1
  1565. subl $96,%eax
  1566. jnc L063xts_dec_loop6
  1567. movl 240(%ebp),%ecx
  1568. movl %ebp,%edx
  1569. movl %ecx,%ebx
  1570. L062xts_dec_short:
  1571. addl $96,%eax
  1572. jz L064xts_dec_done6x
  1573. movdqa %xmm1,%xmm5
  1574. cmpl $32,%eax
  1575. jb L065xts_dec_one
  1576. pshufd $19,%xmm0,%xmm2
  1577. pxor %xmm0,%xmm0
  1578. paddq %xmm1,%xmm1
  1579. pand %xmm3,%xmm2
  1580. pcmpgtd %xmm1,%xmm0
  1581. pxor %xmm2,%xmm1
  1582. je L066xts_dec_two
  1583. pshufd $19,%xmm0,%xmm2
  1584. pxor %xmm0,%xmm0
  1585. movdqa %xmm1,%xmm6
  1586. paddq %xmm1,%xmm1
  1587. pand %xmm3,%xmm2
  1588. pcmpgtd %xmm1,%xmm0
  1589. pxor %xmm2,%xmm1
  1590. cmpl $64,%eax
  1591. jb L067xts_dec_three
  1592. pshufd $19,%xmm0,%xmm2
  1593. pxor %xmm0,%xmm0
  1594. movdqa %xmm1,%xmm7
  1595. paddq %xmm1,%xmm1
  1596. pand %xmm3,%xmm2
  1597. pcmpgtd %xmm1,%xmm0
  1598. pxor %xmm2,%xmm1
  1599. movdqa %xmm5,(%esp)
  1600. movdqa %xmm6,16(%esp)
  1601. je L068xts_dec_four
  1602. movdqa %xmm7,32(%esp)
  1603. pshufd $19,%xmm0,%xmm7
  1604. movdqa %xmm1,48(%esp)
  1605. paddq %xmm1,%xmm1
  1606. pand %xmm3,%xmm7
  1607. pxor %xmm1,%xmm7
  1608. movdqu (%esi),%xmm2
  1609. movdqu 16(%esi),%xmm3
  1610. movdqu 32(%esi),%xmm4
  1611. pxor (%esp),%xmm2
  1612. movdqu 48(%esi),%xmm5
  1613. pxor 16(%esp),%xmm3
  1614. movdqu 64(%esi),%xmm6
  1615. pxor 32(%esp),%xmm4
  1616. leal 80(%esi),%esi
  1617. pxor 48(%esp),%xmm5
  1618. movdqa %xmm7,64(%esp)
  1619. pxor %xmm7,%xmm6
  1620. call __aesni_decrypt6
  1621. movaps 64(%esp),%xmm1
  1622. xorps (%esp),%xmm2
  1623. xorps 16(%esp),%xmm3
  1624. xorps 32(%esp),%xmm4
  1625. movups %xmm2,(%edi)
  1626. xorps 48(%esp),%xmm5
  1627. movups %xmm3,16(%edi)
  1628. xorps %xmm1,%xmm6
  1629. movups %xmm4,32(%edi)
  1630. movups %xmm5,48(%edi)
  1631. movups %xmm6,64(%edi)
  1632. leal 80(%edi),%edi
  1633. jmp L069xts_dec_done
  1634. .align 4,0x90
  1635. L065xts_dec_one:
  1636. movups (%esi),%xmm2
  1637. leal 16(%esi),%esi
  1638. xorps %xmm5,%xmm2
  1639. movups (%edx),%xmm0
  1640. movups 16(%edx),%xmm1
  1641. leal 32(%edx),%edx
  1642. xorps %xmm0,%xmm2
  1643. L070dec1_loop_12:
  1644. .byte 102,15,56,222,209
  1645. decl %ecx
  1646. movups (%edx),%xmm1
  1647. leal 16(%edx),%edx
  1648. jnz L070dec1_loop_12
  1649. .byte 102,15,56,223,209
  1650. xorps %xmm5,%xmm2
  1651. movups %xmm2,(%edi)
  1652. leal 16(%edi),%edi
  1653. movdqa %xmm5,%xmm1
  1654. jmp L069xts_dec_done
  1655. .align 4,0x90
  1656. L066xts_dec_two:
  1657. movaps %xmm1,%xmm6
  1658. movups (%esi),%xmm2
  1659. movups 16(%esi),%xmm3
  1660. leal 32(%esi),%esi
  1661. xorps %xmm5,%xmm2
  1662. xorps %xmm6,%xmm3
  1663. call __aesni_decrypt2
  1664. xorps %xmm5,%xmm2
  1665. xorps %xmm6,%xmm3
  1666. movups %xmm2,(%edi)
  1667. movups %xmm3,16(%edi)
  1668. leal 32(%edi),%edi
  1669. movdqa %xmm6,%xmm1
  1670. jmp L069xts_dec_done
  1671. .align 4,0x90
  1672. L067xts_dec_three:
  1673. movaps %xmm1,%xmm7
  1674. movups (%esi),%xmm2
  1675. movups 16(%esi),%xmm3
  1676. movups 32(%esi),%xmm4
  1677. leal 48(%esi),%esi
  1678. xorps %xmm5,%xmm2
  1679. xorps %xmm6,%xmm3
  1680. xorps %xmm7,%xmm4
  1681. call __aesni_decrypt3
  1682. xorps %xmm5,%xmm2
  1683. xorps %xmm6,%xmm3
  1684. xorps %xmm7,%xmm4
  1685. movups %xmm2,(%edi)
  1686. movups %xmm3,16(%edi)
  1687. movups %xmm4,32(%edi)
  1688. leal 48(%edi),%edi
  1689. movdqa %xmm7,%xmm1
  1690. jmp L069xts_dec_done
  1691. .align 4,0x90
  1692. L068xts_dec_four:
  1693. movaps %xmm1,%xmm6
  1694. movups (%esi),%xmm2
  1695. movups 16(%esi),%xmm3
  1696. movups 32(%esi),%xmm4
  1697. xorps (%esp),%xmm2
  1698. movups 48(%esi),%xmm5
  1699. leal 64(%esi),%esi
  1700. xorps 16(%esp),%xmm3
  1701. xorps %xmm7,%xmm4
  1702. xorps %xmm6,%xmm5
  1703. call __aesni_decrypt4
  1704. xorps (%esp),%xmm2
  1705. xorps 16(%esp),%xmm3
  1706. xorps %xmm7,%xmm4
  1707. movups %xmm2,(%edi)
  1708. xorps %xmm6,%xmm5
  1709. movups %xmm3,16(%edi)
  1710. movups %xmm4,32(%edi)
  1711. movups %xmm5,48(%edi)
  1712. leal 64(%edi),%edi
  1713. movdqa %xmm6,%xmm1
  1714. jmp L069xts_dec_done
  1715. .align 4,0x90
  1716. L064xts_dec_done6x:
  1717. movl 112(%esp),%eax
  1718. andl $15,%eax
  1719. jz L071xts_dec_ret
  1720. movl %eax,112(%esp)
  1721. jmp L072xts_dec_only_one_more
  1722. .align 4,0x90
  1723. L069xts_dec_done:
  1724. movl 112(%esp),%eax
  1725. pxor %xmm0,%xmm0
  1726. andl $15,%eax
  1727. jz L071xts_dec_ret
  1728. pcmpgtd %xmm1,%xmm0
  1729. movl %eax,112(%esp)
  1730. pshufd $19,%xmm0,%xmm2
  1731. pxor %xmm0,%xmm0
  1732. movdqa 96(%esp),%xmm3
  1733. paddq %xmm1,%xmm1
  1734. pand %xmm3,%xmm2
  1735. pcmpgtd %xmm1,%xmm0
  1736. pxor %xmm2,%xmm1
  1737. L072xts_dec_only_one_more:
  1738. pshufd $19,%xmm0,%xmm5
  1739. movdqa %xmm1,%xmm6
  1740. paddq %xmm1,%xmm1
  1741. pand %xmm3,%xmm5
  1742. pxor %xmm1,%xmm5
  1743. movl %ebp,%edx
  1744. movl %ebx,%ecx
  1745. movups (%esi),%xmm2
  1746. xorps %xmm5,%xmm2
  1747. movups (%edx),%xmm0
  1748. movups 16(%edx),%xmm1
  1749. leal 32(%edx),%edx
  1750. xorps %xmm0,%xmm2
  1751. L073dec1_loop_13:
  1752. .byte 102,15,56,222,209
  1753. decl %ecx
  1754. movups (%edx),%xmm1
  1755. leal 16(%edx),%edx
  1756. jnz L073dec1_loop_13
  1757. .byte 102,15,56,223,209
  1758. xorps %xmm5,%xmm2
  1759. movups %xmm2,(%edi)
  1760. L074xts_dec_steal:
  1761. movzbl 16(%esi),%ecx
  1762. movzbl (%edi),%edx
  1763. leal 1(%esi),%esi
  1764. movb %cl,(%edi)
  1765. movb %dl,16(%edi)
  1766. leal 1(%edi),%edi
  1767. subl $1,%eax
  1768. jnz L074xts_dec_steal
  1769. subl 112(%esp),%edi
  1770. movl %ebp,%edx
  1771. movl %ebx,%ecx
  1772. movups (%edi),%xmm2
  1773. xorps %xmm6,%xmm2
  1774. movups (%edx),%xmm0
  1775. movups 16(%edx),%xmm1
  1776. leal 32(%edx),%edx
  1777. xorps %xmm0,%xmm2
  1778. L075dec1_loop_14:
  1779. .byte 102,15,56,222,209
  1780. decl %ecx
  1781. movups (%edx),%xmm1
  1782. leal 16(%edx),%edx
  1783. jnz L075dec1_loop_14
  1784. .byte 102,15,56,223,209
  1785. xorps %xmm6,%xmm2
  1786. movups %xmm2,(%edi)
  1787. L071xts_dec_ret:
  1788. pxor %xmm0,%xmm0
  1789. pxor %xmm1,%xmm1
  1790. pxor %xmm2,%xmm2
  1791. movdqa %xmm0,(%esp)
  1792. pxor %xmm3,%xmm3
  1793. movdqa %xmm0,16(%esp)
  1794. pxor %xmm4,%xmm4
  1795. movdqa %xmm0,32(%esp)
  1796. pxor %xmm5,%xmm5
  1797. movdqa %xmm0,48(%esp)
  1798. pxor %xmm6,%xmm6
  1799. movdqa %xmm0,64(%esp)
  1800. pxor %xmm7,%xmm7
  1801. movdqa %xmm0,80(%esp)
  1802. movl 116(%esp),%esp
  1803. popl %edi
  1804. popl %esi
  1805. popl %ebx
  1806. popl %ebp
  1807. ret
  1808. .globl _aes_hw_cbc_encrypt
  1809. .private_extern _aes_hw_cbc_encrypt
  1810. .align 4
  1811. _aes_hw_cbc_encrypt:
  1812. L_aes_hw_cbc_encrypt_begin:
  1813. pushl %ebp
  1814. pushl %ebx
  1815. pushl %esi
  1816. pushl %edi
  1817. movl 20(%esp),%esi
  1818. movl %esp,%ebx
  1819. movl 24(%esp),%edi
  1820. subl $24,%ebx
  1821. movl 28(%esp),%eax
  1822. andl $-16,%ebx
  1823. movl 32(%esp),%edx
  1824. movl 36(%esp),%ebp
  1825. testl %eax,%eax
  1826. jz L076cbc_abort
  1827. cmpl $0,40(%esp)
  1828. xchgl %esp,%ebx
  1829. movups (%ebp),%xmm7
  1830. movl 240(%edx),%ecx
  1831. movl %edx,%ebp
  1832. movl %ebx,16(%esp)
  1833. movl %ecx,%ebx
  1834. je L077cbc_decrypt
  1835. movaps %xmm7,%xmm2
  1836. cmpl $16,%eax
  1837. jb L078cbc_enc_tail
  1838. subl $16,%eax
  1839. jmp L079cbc_enc_loop
  1840. .align 4,0x90
  1841. L079cbc_enc_loop:
  1842. movups (%esi),%xmm7
  1843. leal 16(%esi),%esi
  1844. movups (%edx),%xmm0
  1845. movups 16(%edx),%xmm1
  1846. xorps %xmm0,%xmm7
  1847. leal 32(%edx),%edx
  1848. xorps %xmm7,%xmm2
  1849. L080enc1_loop_15:
  1850. .byte 102,15,56,220,209
  1851. decl %ecx
  1852. movups (%edx),%xmm1
  1853. leal 16(%edx),%edx
  1854. jnz L080enc1_loop_15
  1855. .byte 102,15,56,221,209
  1856. movl %ebx,%ecx
  1857. movl %ebp,%edx
  1858. movups %xmm2,(%edi)
  1859. leal 16(%edi),%edi
  1860. subl $16,%eax
  1861. jnc L079cbc_enc_loop
  1862. addl $16,%eax
  1863. jnz L078cbc_enc_tail
  1864. movaps %xmm2,%xmm7
  1865. pxor %xmm2,%xmm2
  1866. jmp L081cbc_ret
  1867. L078cbc_enc_tail:
  1868. movl %eax,%ecx
  1869. .long 2767451785
  1870. movl $16,%ecx
  1871. subl %eax,%ecx
  1872. xorl %eax,%eax
  1873. .long 2868115081
  1874. leal -16(%edi),%edi
  1875. movl %ebx,%ecx
  1876. movl %edi,%esi
  1877. movl %ebp,%edx
  1878. jmp L079cbc_enc_loop
  1879. .align 4,0x90
  1880. L077cbc_decrypt:
  1881. cmpl $80,%eax
  1882. jbe L082cbc_dec_tail
  1883. movaps %xmm7,(%esp)
  1884. subl $80,%eax
  1885. jmp L083cbc_dec_loop6_enter
  1886. .align 4,0x90
  1887. L084cbc_dec_loop6:
  1888. movaps %xmm0,(%esp)
  1889. movups %xmm7,(%edi)
  1890. leal 16(%edi),%edi
  1891. L083cbc_dec_loop6_enter:
  1892. movdqu (%esi),%xmm2
  1893. movdqu 16(%esi),%xmm3
  1894. movdqu 32(%esi),%xmm4
  1895. movdqu 48(%esi),%xmm5
  1896. movdqu 64(%esi),%xmm6
  1897. movdqu 80(%esi),%xmm7
  1898. call __aesni_decrypt6
  1899. movups (%esi),%xmm1
  1900. movups 16(%esi),%xmm0
  1901. xorps (%esp),%xmm2
  1902. xorps %xmm1,%xmm3
  1903. movups 32(%esi),%xmm1
  1904. xorps %xmm0,%xmm4
  1905. movups 48(%esi),%xmm0
  1906. xorps %xmm1,%xmm5
  1907. movups 64(%esi),%xmm1
  1908. xorps %xmm0,%xmm6
  1909. movups 80(%esi),%xmm0
  1910. xorps %xmm1,%xmm7
  1911. movups %xmm2,(%edi)
  1912. movups %xmm3,16(%edi)
  1913. leal 96(%esi),%esi
  1914. movups %xmm4,32(%edi)
  1915. movl %ebx,%ecx
  1916. movups %xmm5,48(%edi)
  1917. movl %ebp,%edx
  1918. movups %xmm6,64(%edi)
  1919. leal 80(%edi),%edi
  1920. subl $96,%eax
  1921. ja L084cbc_dec_loop6
  1922. movaps %xmm7,%xmm2
  1923. movaps %xmm0,%xmm7
  1924. addl $80,%eax
  1925. jle L085cbc_dec_clear_tail_collected
  1926. movups %xmm2,(%edi)
  1927. leal 16(%edi),%edi
  1928. L082cbc_dec_tail:
  1929. movups (%esi),%xmm2
  1930. movaps %xmm2,%xmm6
  1931. cmpl $16,%eax
  1932. jbe L086cbc_dec_one
  1933. movups 16(%esi),%xmm3
  1934. movaps %xmm3,%xmm5
  1935. cmpl $32,%eax
  1936. jbe L087cbc_dec_two
  1937. movups 32(%esi),%xmm4
  1938. cmpl $48,%eax
  1939. jbe L088cbc_dec_three
  1940. movups 48(%esi),%xmm5
  1941. cmpl $64,%eax
  1942. jbe L089cbc_dec_four
  1943. movups 64(%esi),%xmm6
  1944. movaps %xmm7,(%esp)
  1945. movups (%esi),%xmm2
  1946. xorps %xmm7,%xmm7
  1947. call __aesni_decrypt6
  1948. movups (%esi),%xmm1
  1949. movups 16(%esi),%xmm0
  1950. xorps (%esp),%xmm2
  1951. xorps %xmm1,%xmm3
  1952. movups 32(%esi),%xmm1
  1953. xorps %xmm0,%xmm4
  1954. movups 48(%esi),%xmm0
  1955. xorps %xmm1,%xmm5
  1956. movups 64(%esi),%xmm7
  1957. xorps %xmm0,%xmm6
  1958. movups %xmm2,(%edi)
  1959. movups %xmm3,16(%edi)
  1960. pxor %xmm3,%xmm3
  1961. movups %xmm4,32(%edi)
  1962. pxor %xmm4,%xmm4
  1963. movups %xmm5,48(%edi)
  1964. pxor %xmm5,%xmm5
  1965. leal 64(%edi),%edi
  1966. movaps %xmm6,%xmm2
  1967. pxor %xmm6,%xmm6
  1968. subl $80,%eax
  1969. jmp L090cbc_dec_tail_collected
  1970. .align 4,0x90
  1971. L086cbc_dec_one:
  1972. movups (%edx),%xmm0
  1973. movups 16(%edx),%xmm1
  1974. leal 32(%edx),%edx
  1975. xorps %xmm0,%xmm2
  1976. L091dec1_loop_16:
  1977. .byte 102,15,56,222,209
  1978. decl %ecx
  1979. movups (%edx),%xmm1
  1980. leal 16(%edx),%edx
  1981. jnz L091dec1_loop_16
  1982. .byte 102,15,56,223,209
  1983. xorps %xmm7,%xmm2
  1984. movaps %xmm6,%xmm7
  1985. subl $16,%eax
  1986. jmp L090cbc_dec_tail_collected
  1987. .align 4,0x90
  1988. L087cbc_dec_two:
  1989. call __aesni_decrypt2
  1990. xorps %xmm7,%xmm2
  1991. xorps %xmm6,%xmm3
  1992. movups %xmm2,(%edi)
  1993. movaps %xmm3,%xmm2
  1994. pxor %xmm3,%xmm3
  1995. leal 16(%edi),%edi
  1996. movaps %xmm5,%xmm7
  1997. subl $32,%eax
  1998. jmp L090cbc_dec_tail_collected
  1999. .align 4,0x90
  2000. L088cbc_dec_three:
  2001. call __aesni_decrypt3
  2002. xorps %xmm7,%xmm2
  2003. xorps %xmm6,%xmm3
  2004. xorps %xmm5,%xmm4
  2005. movups %xmm2,(%edi)
  2006. movaps %xmm4,%xmm2
  2007. pxor %xmm4,%xmm4
  2008. movups %xmm3,16(%edi)
  2009. pxor %xmm3,%xmm3
  2010. leal 32(%edi),%edi
  2011. movups 32(%esi),%xmm7
  2012. subl $48,%eax
  2013. jmp L090cbc_dec_tail_collected
  2014. .align 4,0x90
  2015. L089cbc_dec_four:
  2016. call __aesni_decrypt4
  2017. movups 16(%esi),%xmm1
  2018. movups 32(%esi),%xmm0
  2019. xorps %xmm7,%xmm2
  2020. movups 48(%esi),%xmm7
  2021. xorps %xmm6,%xmm3
  2022. movups %xmm2,(%edi)
  2023. xorps %xmm1,%xmm4
  2024. movups %xmm3,16(%edi)
  2025. pxor %xmm3,%xmm3
  2026. xorps %xmm0,%xmm5
  2027. movups %xmm4,32(%edi)
  2028. pxor %xmm4,%xmm4
  2029. leal 48(%edi),%edi
  2030. movaps %xmm5,%xmm2
  2031. pxor %xmm5,%xmm5
  2032. subl $64,%eax
  2033. jmp L090cbc_dec_tail_collected
  2034. .align 4,0x90
  2035. L085cbc_dec_clear_tail_collected:
  2036. pxor %xmm3,%xmm3
  2037. pxor %xmm4,%xmm4
  2038. pxor %xmm5,%xmm5
  2039. pxor %xmm6,%xmm6
  2040. L090cbc_dec_tail_collected:
  2041. andl $15,%eax
  2042. jnz L092cbc_dec_tail_partial
  2043. movups %xmm2,(%edi)
  2044. pxor %xmm0,%xmm0
  2045. jmp L081cbc_ret
  2046. .align 4,0x90
  2047. L092cbc_dec_tail_partial:
  2048. movaps %xmm2,(%esp)
  2049. pxor %xmm0,%xmm0
  2050. movl $16,%ecx
  2051. movl %esp,%esi
  2052. subl %eax,%ecx
  2053. .long 2767451785
  2054. movdqa %xmm2,(%esp)
  2055. L081cbc_ret:
  2056. movl 16(%esp),%esp
  2057. movl 36(%esp),%ebp
  2058. pxor %xmm2,%xmm2
  2059. pxor %xmm1,%xmm1
  2060. movups %xmm7,(%ebp)
  2061. pxor %xmm7,%xmm7
  2062. L076cbc_abort:
  2063. popl %edi
  2064. popl %esi
  2065. popl %ebx
  2066. popl %ebp
  2067. ret
  2068. .private_extern __aesni_set_encrypt_key
  2069. .align 4
  2070. __aesni_set_encrypt_key:
  2071. pushl %ebp
  2072. pushl %ebx
  2073. testl %eax,%eax
  2074. jz L093bad_pointer
  2075. testl %edx,%edx
  2076. jz L093bad_pointer
  2077. call L094pic
  2078. L094pic:
  2079. popl %ebx
  2080. leal Lkey_const-L094pic(%ebx),%ebx
  2081. movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp
  2082. movups (%eax),%xmm0
  2083. xorps %xmm4,%xmm4
  2084. movl 4(%ebp),%ebp
  2085. leal 16(%edx),%edx
  2086. andl $268437504,%ebp
  2087. cmpl $256,%ecx
  2088. je L09514rounds
  2089. cmpl $192,%ecx
  2090. je L09612rounds
  2091. cmpl $128,%ecx
  2092. jne L097bad_keybits
  2093. .align 4,0x90
  2094. L09810rounds:
  2095. cmpl $268435456,%ebp
  2096. je L09910rounds_alt
  2097. movl $9,%ecx
  2098. movups %xmm0,-16(%edx)
  2099. .byte 102,15,58,223,200,1
  2100. call L100key_128_cold
  2101. .byte 102,15,58,223,200,2
  2102. call L101key_128
  2103. .byte 102,15,58,223,200,4
  2104. call L101key_128
  2105. .byte 102,15,58,223,200,8
  2106. call L101key_128
  2107. .byte 102,15,58,223,200,16
  2108. call L101key_128
  2109. .byte 102,15,58,223,200,32
  2110. call L101key_128
  2111. .byte 102,15,58,223,200,64
  2112. call L101key_128
  2113. .byte 102,15,58,223,200,128
  2114. call L101key_128
  2115. .byte 102,15,58,223,200,27
  2116. call L101key_128
  2117. .byte 102,15,58,223,200,54
  2118. call L101key_128
  2119. movups %xmm0,(%edx)
  2120. movl %ecx,80(%edx)
  2121. jmp L102good_key
  2122. .align 4,0x90
  2123. L101key_128:
  2124. movups %xmm0,(%edx)
  2125. leal 16(%edx),%edx
  2126. L100key_128_cold:
  2127. shufps $16,%xmm0,%xmm4
  2128. xorps %xmm4,%xmm0
  2129. shufps $140,%xmm0,%xmm4
  2130. xorps %xmm4,%xmm0
  2131. shufps $255,%xmm1,%xmm1
  2132. xorps %xmm1,%xmm0
  2133. ret
  2134. .align 4,0x90
  2135. L09910rounds_alt:
  2136. movdqa (%ebx),%xmm5
  2137. movl $8,%ecx
  2138. movdqa 32(%ebx),%xmm4
  2139. movdqa %xmm0,%xmm2
  2140. movdqu %xmm0,-16(%edx)
  2141. L103loop_key128:
  2142. .byte 102,15,56,0,197
  2143. .byte 102,15,56,221,196
  2144. pslld $1,%xmm4
  2145. leal 16(%edx),%edx
  2146. movdqa %xmm2,%xmm3
  2147. pslldq $4,%xmm2
  2148. pxor %xmm2,%xmm3
  2149. pslldq $4,%xmm2
  2150. pxor %xmm2,%xmm3
  2151. pslldq $4,%xmm2
  2152. pxor %xmm3,%xmm2
  2153. pxor %xmm2,%xmm0
  2154. movdqu %xmm0,-16(%edx)
  2155. movdqa %xmm0,%xmm2
  2156. decl %ecx
  2157. jnz L103loop_key128
  2158. movdqa 48(%ebx),%xmm4
  2159. .byte 102,15,56,0,197
  2160. .byte 102,15,56,221,196
  2161. pslld $1,%xmm4
  2162. movdqa %xmm2,%xmm3
  2163. pslldq $4,%xmm2
  2164. pxor %xmm2,%xmm3
  2165. pslldq $4,%xmm2
  2166. pxor %xmm2,%xmm3
  2167. pslldq $4,%xmm2
  2168. pxor %xmm3,%xmm2
  2169. pxor %xmm2,%xmm0
  2170. movdqu %xmm0,(%edx)
  2171. movdqa %xmm0,%xmm2
  2172. .byte 102,15,56,0,197
  2173. .byte 102,15,56,221,196
  2174. movdqa %xmm2,%xmm3
  2175. pslldq $4,%xmm2
  2176. pxor %xmm2,%xmm3
  2177. pslldq $4,%xmm2
  2178. pxor %xmm2,%xmm3
  2179. pslldq $4,%xmm2
  2180. pxor %xmm3,%xmm2
  2181. pxor %xmm2,%xmm0
  2182. movdqu %xmm0,16(%edx)
  2183. movl $9,%ecx
  2184. movl %ecx,96(%edx)
  2185. jmp L102good_key
  2186. .align 4,0x90
  2187. L09612rounds:
  2188. movq 16(%eax),%xmm2
  2189. cmpl $268435456,%ebp
  2190. je L10412rounds_alt
  2191. movl $11,%ecx
  2192. movups %xmm0,-16(%edx)
  2193. .byte 102,15,58,223,202,1
  2194. call L105key_192a_cold
  2195. .byte 102,15,58,223,202,2
  2196. call L106key_192b
  2197. .byte 102,15,58,223,202,4
  2198. call L107key_192a
  2199. .byte 102,15,58,223,202,8
  2200. call L106key_192b
  2201. .byte 102,15,58,223,202,16
  2202. call L107key_192a
  2203. .byte 102,15,58,223,202,32
  2204. call L106key_192b
  2205. .byte 102,15,58,223,202,64
  2206. call L107key_192a
  2207. .byte 102,15,58,223,202,128
  2208. call L106key_192b
  2209. movups %xmm0,(%edx)
  2210. movl %ecx,48(%edx)
  2211. jmp L102good_key
  2212. .align 4,0x90
  2213. L107key_192a:
  2214. movups %xmm0,(%edx)
  2215. leal 16(%edx),%edx
  2216. .align 4,0x90
  2217. L105key_192a_cold:
  2218. movaps %xmm2,%xmm5
  2219. L108key_192b_warm:
  2220. shufps $16,%xmm0,%xmm4
  2221. movdqa %xmm2,%xmm3
  2222. xorps %xmm4,%xmm0
  2223. shufps $140,%xmm0,%xmm4
  2224. pslldq $4,%xmm3
  2225. xorps %xmm4,%xmm0
  2226. pshufd $85,%xmm1,%xmm1
  2227. pxor %xmm3,%xmm2
  2228. pxor %xmm1,%xmm0
  2229. pshufd $255,%xmm0,%xmm3
  2230. pxor %xmm3,%xmm2
  2231. ret
  2232. .align 4,0x90
  2233. L106key_192b:
  2234. movaps %xmm0,%xmm3
  2235. shufps $68,%xmm0,%xmm5
  2236. movups %xmm5,(%edx)
  2237. shufps $78,%xmm2,%xmm3
  2238. movups %xmm3,16(%edx)
  2239. leal 32(%edx),%edx
  2240. jmp L108key_192b_warm
  2241. .align 4,0x90
  2242. L10412rounds_alt:
  2243. movdqa 16(%ebx),%xmm5
  2244. movdqa 32(%ebx),%xmm4
  2245. movl $8,%ecx
  2246. movdqu %xmm0,-16(%edx)
  2247. L109loop_key192:
  2248. movq %xmm2,(%edx)
  2249. movdqa %xmm2,%xmm1
  2250. .byte 102,15,56,0,213
  2251. .byte 102,15,56,221,212
  2252. pslld $1,%xmm4
  2253. leal 24(%edx),%edx
  2254. movdqa %xmm0,%xmm3
  2255. pslldq $4,%xmm0
  2256. pxor %xmm0,%xmm3
  2257. pslldq $4,%xmm0
  2258. pxor %xmm0,%xmm3
  2259. pslldq $4,%xmm0
  2260. pxor %xmm3,%xmm0
  2261. pshufd $255,%xmm0,%xmm3
  2262. pxor %xmm1,%xmm3
  2263. pslldq $4,%xmm1
  2264. pxor %xmm1,%xmm3
  2265. pxor %xmm2,%xmm0
  2266. pxor %xmm3,%xmm2
  2267. movdqu %xmm0,-16(%edx)
  2268. decl %ecx
  2269. jnz L109loop_key192
  2270. movl $11,%ecx
  2271. movl %ecx,32(%edx)
  2272. jmp L102good_key
  2273. .align 4,0x90
  2274. L09514rounds:
  2275. movups 16(%eax),%xmm2
  2276. leal 16(%edx),%edx
  2277. cmpl $268435456,%ebp
  2278. je L11014rounds_alt
  2279. movl $13,%ecx
  2280. movups %xmm0,-32(%edx)
  2281. movups %xmm2,-16(%edx)
  2282. .byte 102,15,58,223,202,1
  2283. call L111key_256a_cold
  2284. .byte 102,15,58,223,200,1
  2285. call L112key_256b
  2286. .byte 102,15,58,223,202,2
  2287. call L113key_256a
  2288. .byte 102,15,58,223,200,2
  2289. call L112key_256b
  2290. .byte 102,15,58,223,202,4
  2291. call L113key_256a
  2292. .byte 102,15,58,223,200,4
  2293. call L112key_256b
  2294. .byte 102,15,58,223,202,8
  2295. call L113key_256a
  2296. .byte 102,15,58,223,200,8
  2297. call L112key_256b
  2298. .byte 102,15,58,223,202,16
  2299. call L113key_256a
  2300. .byte 102,15,58,223,200,16
  2301. call L112key_256b
  2302. .byte 102,15,58,223,202,32
  2303. call L113key_256a
  2304. .byte 102,15,58,223,200,32
  2305. call L112key_256b
  2306. .byte 102,15,58,223,202,64
  2307. call L113key_256a
  2308. movups %xmm0,(%edx)
  2309. movl %ecx,16(%edx)
  2310. xorl %eax,%eax
  2311. jmp L102good_key
  2312. .align 4,0x90
  2313. L113key_256a:
  2314. movups %xmm2,(%edx)
  2315. leal 16(%edx),%edx
  2316. L111key_256a_cold:
  2317. shufps $16,%xmm0,%xmm4
  2318. xorps %xmm4,%xmm0
  2319. shufps $140,%xmm0,%xmm4
  2320. xorps %xmm4,%xmm0
  2321. shufps $255,%xmm1,%xmm1
  2322. xorps %xmm1,%xmm0
  2323. ret
  2324. .align 4,0x90
  2325. L112key_256b:
  2326. movups %xmm0,(%edx)
  2327. leal 16(%edx),%edx
  2328. shufps $16,%xmm2,%xmm4
  2329. xorps %xmm4,%xmm2
  2330. shufps $140,%xmm2,%xmm4
  2331. xorps %xmm4,%xmm2
  2332. shufps $170,%xmm1,%xmm1
  2333. xorps %xmm1,%xmm2
  2334. ret
  2335. .align 4,0x90
  2336. L11014rounds_alt:
  2337. movdqa (%ebx),%xmm5
  2338. movdqa 32(%ebx),%xmm4
  2339. movl $7,%ecx
  2340. movdqu %xmm0,-32(%edx)
  2341. movdqa %xmm2,%xmm1
  2342. movdqu %xmm2,-16(%edx)
  2343. L114loop_key256:
  2344. .byte 102,15,56,0,213
  2345. .byte 102,15,56,221,212
  2346. movdqa %xmm0,%xmm3
  2347. pslldq $4,%xmm0
  2348. pxor %xmm0,%xmm3
  2349. pslldq $4,%xmm0
  2350. pxor %xmm0,%xmm3
  2351. pslldq $4,%xmm0
  2352. pxor %xmm3,%xmm0
  2353. pslld $1,%xmm4
  2354. pxor %xmm2,%xmm0
  2355. movdqu %xmm0,(%edx)
  2356. decl %ecx
  2357. jz L115done_key256
  2358. pshufd $255,%xmm0,%xmm2
  2359. pxor %xmm3,%xmm3
  2360. .byte 102,15,56,221,211
  2361. movdqa %xmm1,%xmm3
  2362. pslldq $4,%xmm1
  2363. pxor %xmm1,%xmm3
  2364. pslldq $4,%xmm1
  2365. pxor %xmm1,%xmm3
  2366. pslldq $4,%xmm1
  2367. pxor %xmm3,%xmm1
  2368. pxor %xmm1,%xmm2
  2369. movdqu %xmm2,16(%edx)
  2370. leal 32(%edx),%edx
  2371. movdqa %xmm2,%xmm1
  2372. jmp L114loop_key256
  2373. L115done_key256:
  2374. movl $13,%ecx
  2375. movl %ecx,16(%edx)
  2376. L102good_key:
  2377. pxor %xmm0,%xmm0
  2378. pxor %xmm1,%xmm1
  2379. pxor %xmm2,%xmm2
  2380. pxor %xmm3,%xmm3
  2381. pxor %xmm4,%xmm4
  2382. pxor %xmm5,%xmm5
  2383. xorl %eax,%eax
  2384. popl %ebx
  2385. popl %ebp
  2386. ret
  2387. .align 2,0x90
  2388. L093bad_pointer:
  2389. movl $-1,%eax
  2390. popl %ebx
  2391. popl %ebp
  2392. ret
  2393. .align 2,0x90
  2394. L097bad_keybits:
  2395. pxor %xmm0,%xmm0
  2396. movl $-2,%eax
  2397. popl %ebx
  2398. popl %ebp
  2399. ret
  2400. .globl _aes_hw_set_encrypt_key
  2401. .private_extern _aes_hw_set_encrypt_key
  2402. .align 4
  2403. _aes_hw_set_encrypt_key:
  2404. L_aes_hw_set_encrypt_key_begin:
  2405. #ifdef BORINGSSL_DISPATCH_TEST
  2406. pushl %ebx
  2407. pushl %edx
  2408. call L116pic
  2409. L116pic:
  2410. popl %ebx
  2411. leal _BORINGSSL_function_hit+3-L116pic(%ebx),%ebx
  2412. movl $1,%edx
  2413. movb %dl,(%ebx)
  2414. popl %edx
  2415. popl %ebx
  2416. #endif
  2417. movl 4(%esp),%eax
  2418. movl 8(%esp),%ecx
  2419. movl 12(%esp),%edx
  2420. call __aesni_set_encrypt_key
  2421. ret
  2422. .globl _aes_hw_set_decrypt_key
  2423. .private_extern _aes_hw_set_decrypt_key
  2424. .align 4
  2425. _aes_hw_set_decrypt_key:
  2426. L_aes_hw_set_decrypt_key_begin:
  2427. movl 4(%esp),%eax
  2428. movl 8(%esp),%ecx
  2429. movl 12(%esp),%edx
  2430. call __aesni_set_encrypt_key
  2431. movl 12(%esp),%edx
  2432. shll $4,%ecx
  2433. testl %eax,%eax
  2434. jnz L117dec_key_ret
  2435. leal 16(%edx,%ecx,1),%eax
  2436. movups (%edx),%xmm0
  2437. movups (%eax),%xmm1
  2438. movups %xmm0,(%eax)
  2439. movups %xmm1,(%edx)
  2440. leal 16(%edx),%edx
  2441. leal -16(%eax),%eax
  2442. L118dec_key_inverse:
  2443. movups (%edx),%xmm0
  2444. movups (%eax),%xmm1
  2445. .byte 102,15,56,219,192
  2446. .byte 102,15,56,219,201
  2447. leal 16(%edx),%edx
  2448. leal -16(%eax),%eax
  2449. movups %xmm0,16(%eax)
  2450. movups %xmm1,-16(%edx)
  2451. cmpl %edx,%eax
  2452. ja L118dec_key_inverse
  2453. movups (%edx),%xmm0
  2454. .byte 102,15,56,219,192
  2455. movups %xmm0,(%edx)
  2456. pxor %xmm0,%xmm0
  2457. pxor %xmm1,%xmm1
  2458. xorl %eax,%eax
  2459. L117dec_key_ret:
  2460. ret
  2461. .align 6,0x90
  2462. Lkey_const:
  2463. .long 202313229,202313229,202313229,202313229
  2464. .long 67569157,67569157,67569157,67569157
  2465. .long 1,1,1,1
  2466. .long 27,27,27,27
  2467. .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
  2468. .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
  2469. .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
  2470. .byte 115,108,46,111,114,103,62,0
  2471. .section __IMPORT,__pointers,non_lazy_symbol_pointers
  2472. L_OPENSSL_ia32cap_P$non_lazy_ptr:
  2473. .indirect_symbol _OPENSSL_ia32cap_P
  2474. .long 0
  2475. #endif