aesv8-armx64.S 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if !defined(__has_feature)
  4. #define __has_feature(x) 0
  5. #endif
  6. #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
  7. #define OPENSSL_NO_ASM
  8. #endif
  9. #if !defined(OPENSSL_NO_ASM)
  10. #if defined(__aarch64__)
  11. #if defined(BORINGSSL_PREFIX)
  12. #include <boringssl_prefix_symbols_asm.h>
  13. #endif
  14. #include <openssl/arm_arch.h>
  15. #if __ARM_MAX_ARCH__>=7
  16. .text
  17. .arch armv8-a+crypto
  18. .section .rodata
  19. .align 5
  20. .Lrcon:
  21. .long 0x01,0x01,0x01,0x01
  22. .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
  23. .long 0x1b,0x1b,0x1b,0x1b
  24. .text
  25. .globl aes_hw_set_encrypt_key
  26. .hidden aes_hw_set_encrypt_key
  27. .type aes_hw_set_encrypt_key,%function
  28. .align 5
  29. aes_hw_set_encrypt_key:
  30. .Lenc_key:
  31. // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
  32. AARCH64_VALID_CALL_TARGET
  33. stp x29,x30,[sp,#-16]!
  34. add x29,sp,#0
  35. mov x3,#-1
  36. cmp x0,#0
  37. b.eq .Lenc_key_abort
  38. cmp x2,#0
  39. b.eq .Lenc_key_abort
  40. mov x3,#-2
  41. cmp w1,#128
  42. b.lt .Lenc_key_abort
  43. cmp w1,#256
  44. b.gt .Lenc_key_abort
  45. tst w1,#0x3f
  46. b.ne .Lenc_key_abort
  47. adrp x3,.Lrcon
  48. add x3,x3,:lo12:.Lrcon
  49. cmp w1,#192
  50. eor v0.16b,v0.16b,v0.16b
  51. ld1 {v3.16b},[x0],#16
  52. mov w1,#8 // reuse w1
  53. ld1 {v1.4s,v2.4s},[x3],#32
  54. b.lt .Loop128
  55. b.eq .L192
  56. b .L256
  57. .align 4
  58. .Loop128:
  59. tbl v6.16b,{v3.16b},v2.16b
  60. ext v5.16b,v0.16b,v3.16b,#12
  61. st1 {v3.4s},[x2],#16
  62. aese v6.16b,v0.16b
  63. subs w1,w1,#1
  64. eor v3.16b,v3.16b,v5.16b
  65. ext v5.16b,v0.16b,v5.16b,#12
  66. eor v3.16b,v3.16b,v5.16b
  67. ext v5.16b,v0.16b,v5.16b,#12
  68. eor v6.16b,v6.16b,v1.16b
  69. eor v3.16b,v3.16b,v5.16b
  70. shl v1.16b,v1.16b,#1
  71. eor v3.16b,v3.16b,v6.16b
  72. b.ne .Loop128
  73. ld1 {v1.4s},[x3]
  74. tbl v6.16b,{v3.16b},v2.16b
  75. ext v5.16b,v0.16b,v3.16b,#12
  76. st1 {v3.4s},[x2],#16
  77. aese v6.16b,v0.16b
  78. eor v3.16b,v3.16b,v5.16b
  79. ext v5.16b,v0.16b,v5.16b,#12
  80. eor v3.16b,v3.16b,v5.16b
  81. ext v5.16b,v0.16b,v5.16b,#12
  82. eor v6.16b,v6.16b,v1.16b
  83. eor v3.16b,v3.16b,v5.16b
  84. shl v1.16b,v1.16b,#1
  85. eor v3.16b,v3.16b,v6.16b
  86. tbl v6.16b,{v3.16b},v2.16b
  87. ext v5.16b,v0.16b,v3.16b,#12
  88. st1 {v3.4s},[x2],#16
  89. aese v6.16b,v0.16b
  90. eor v3.16b,v3.16b,v5.16b
  91. ext v5.16b,v0.16b,v5.16b,#12
  92. eor v3.16b,v3.16b,v5.16b
  93. ext v5.16b,v0.16b,v5.16b,#12
  94. eor v6.16b,v6.16b,v1.16b
  95. eor v3.16b,v3.16b,v5.16b
  96. eor v3.16b,v3.16b,v6.16b
  97. st1 {v3.4s},[x2]
  98. add x2,x2,#0x50
  99. mov w12,#10
  100. b .Ldone
  101. .align 4
  102. .L192:
  103. ld1 {v4.8b},[x0],#8
  104. movi v6.16b,#8 // borrow v6.16b
  105. st1 {v3.4s},[x2],#16
  106. sub v2.16b,v2.16b,v6.16b // adjust the mask
  107. .Loop192:
  108. tbl v6.16b,{v4.16b},v2.16b
  109. ext v5.16b,v0.16b,v3.16b,#12
  110. st1 {v4.8b},[x2],#8
  111. aese v6.16b,v0.16b
  112. subs w1,w1,#1
  113. eor v3.16b,v3.16b,v5.16b
  114. ext v5.16b,v0.16b,v5.16b,#12
  115. eor v3.16b,v3.16b,v5.16b
  116. ext v5.16b,v0.16b,v5.16b,#12
  117. eor v3.16b,v3.16b,v5.16b
  118. dup v5.4s,v3.s[3]
  119. eor v5.16b,v5.16b,v4.16b
  120. eor v6.16b,v6.16b,v1.16b
  121. ext v4.16b,v0.16b,v4.16b,#12
  122. shl v1.16b,v1.16b,#1
  123. eor v4.16b,v4.16b,v5.16b
  124. eor v3.16b,v3.16b,v6.16b
  125. eor v4.16b,v4.16b,v6.16b
  126. st1 {v3.4s},[x2],#16
  127. b.ne .Loop192
  128. mov w12,#12
  129. add x2,x2,#0x20
  130. b .Ldone
  131. .align 4
  132. .L256:
  133. ld1 {v4.16b},[x0]
  134. mov w1,#7
  135. mov w12,#14
  136. st1 {v3.4s},[x2],#16
  137. .Loop256:
  138. tbl v6.16b,{v4.16b},v2.16b
  139. ext v5.16b,v0.16b,v3.16b,#12
  140. st1 {v4.4s},[x2],#16
  141. aese v6.16b,v0.16b
  142. subs w1,w1,#1
  143. eor v3.16b,v3.16b,v5.16b
  144. ext v5.16b,v0.16b,v5.16b,#12
  145. eor v3.16b,v3.16b,v5.16b
  146. ext v5.16b,v0.16b,v5.16b,#12
  147. eor v6.16b,v6.16b,v1.16b
  148. eor v3.16b,v3.16b,v5.16b
  149. shl v1.16b,v1.16b,#1
  150. eor v3.16b,v3.16b,v6.16b
  151. st1 {v3.4s},[x2],#16
  152. b.eq .Ldone
  153. dup v6.4s,v3.s[3] // just splat
  154. ext v5.16b,v0.16b,v4.16b,#12
  155. aese v6.16b,v0.16b
  156. eor v4.16b,v4.16b,v5.16b
  157. ext v5.16b,v0.16b,v5.16b,#12
  158. eor v4.16b,v4.16b,v5.16b
  159. ext v5.16b,v0.16b,v5.16b,#12
  160. eor v4.16b,v4.16b,v5.16b
  161. eor v4.16b,v4.16b,v6.16b
  162. b .Loop256
  163. .Ldone:
  164. str w12,[x2]
  165. mov x3,#0
  166. .Lenc_key_abort:
  167. mov x0,x3 // return value
  168. ldr x29,[sp],#16
  169. ret
  170. .size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
  171. .globl aes_hw_set_decrypt_key
  172. .hidden aes_hw_set_decrypt_key
  173. .type aes_hw_set_decrypt_key,%function
  174. .align 5
  175. aes_hw_set_decrypt_key:
  176. AARCH64_SIGN_LINK_REGISTER
  177. stp x29,x30,[sp,#-16]!
  178. add x29,sp,#0
  179. bl .Lenc_key
  180. cmp x0,#0
  181. b.ne .Ldec_key_abort
  182. sub x2,x2,#240 // restore original x2
  183. mov x4,#-16
  184. add x0,x2,x12,lsl#4 // end of key schedule
  185. ld1 {v0.4s},[x2]
  186. ld1 {v1.4s},[x0]
  187. st1 {v0.4s},[x0],x4
  188. st1 {v1.4s},[x2],#16
  189. .Loop_imc:
  190. ld1 {v0.4s},[x2]
  191. ld1 {v1.4s},[x0]
  192. aesimc v0.16b,v0.16b
  193. aesimc v1.16b,v1.16b
  194. st1 {v0.4s},[x0],x4
  195. st1 {v1.4s},[x2],#16
  196. cmp x0,x2
  197. b.hi .Loop_imc
  198. ld1 {v0.4s},[x2]
  199. aesimc v0.16b,v0.16b
  200. st1 {v0.4s},[x0]
  201. eor x0,x0,x0 // return value
  202. .Ldec_key_abort:
  203. ldp x29,x30,[sp],#16
  204. AARCH64_VALIDATE_LINK_REGISTER
  205. ret
  206. .size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
  207. .globl aes_hw_encrypt
  208. .hidden aes_hw_encrypt
  209. .type aes_hw_encrypt,%function
  210. .align 5
  211. aes_hw_encrypt:
  212. AARCH64_VALID_CALL_TARGET
  213. ldr w3,[x2,#240]
  214. ld1 {v0.4s},[x2],#16
  215. ld1 {v2.16b},[x0]
  216. sub w3,w3,#2
  217. ld1 {v1.4s},[x2],#16
  218. .Loop_enc:
  219. aese v2.16b,v0.16b
  220. aesmc v2.16b,v2.16b
  221. ld1 {v0.4s},[x2],#16
  222. subs w3,w3,#2
  223. aese v2.16b,v1.16b
  224. aesmc v2.16b,v2.16b
  225. ld1 {v1.4s},[x2],#16
  226. b.gt .Loop_enc
  227. aese v2.16b,v0.16b
  228. aesmc v2.16b,v2.16b
  229. ld1 {v0.4s},[x2]
  230. aese v2.16b,v1.16b
  231. eor v2.16b,v2.16b,v0.16b
  232. st1 {v2.16b},[x1]
  233. ret
  234. .size aes_hw_encrypt,.-aes_hw_encrypt
  235. .globl aes_hw_decrypt
  236. .hidden aes_hw_decrypt
  237. .type aes_hw_decrypt,%function
  238. .align 5
  239. aes_hw_decrypt:
  240. AARCH64_VALID_CALL_TARGET
  241. ldr w3,[x2,#240]
  242. ld1 {v0.4s},[x2],#16
  243. ld1 {v2.16b},[x0]
  244. sub w3,w3,#2
  245. ld1 {v1.4s},[x2],#16
  246. .Loop_dec:
  247. aesd v2.16b,v0.16b
  248. aesimc v2.16b,v2.16b
  249. ld1 {v0.4s},[x2],#16
  250. subs w3,w3,#2
  251. aesd v2.16b,v1.16b
  252. aesimc v2.16b,v2.16b
  253. ld1 {v1.4s},[x2],#16
  254. b.gt .Loop_dec
  255. aesd v2.16b,v0.16b
  256. aesimc v2.16b,v2.16b
  257. ld1 {v0.4s},[x2]
  258. aesd v2.16b,v1.16b
  259. eor v2.16b,v2.16b,v0.16b
  260. st1 {v2.16b},[x1]
  261. ret
  262. .size aes_hw_decrypt,.-aes_hw_decrypt
  263. .globl aes_hw_cbc_encrypt
  264. .hidden aes_hw_cbc_encrypt
  265. .type aes_hw_cbc_encrypt,%function
  266. .align 5
  267. aes_hw_cbc_encrypt:
  268. // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
  269. AARCH64_VALID_CALL_TARGET
  270. stp x29,x30,[sp,#-16]!
  271. add x29,sp,#0
  272. subs x2,x2,#16
  273. mov x8,#16
  274. b.lo .Lcbc_abort
  275. csel x8,xzr,x8,eq
  276. cmp w5,#0 // en- or decrypting?
  277. ldr w5,[x3,#240]
  278. and x2,x2,#-16
  279. ld1 {v6.16b},[x4]
  280. ld1 {v0.16b},[x0],x8
  281. ld1 {v16.4s,v17.4s},[x3] // load key schedule...
  282. sub w5,w5,#6
  283. add x7,x3,x5,lsl#4 // pointer to last 7 round keys
  284. sub w5,w5,#2
  285. ld1 {v18.4s,v19.4s},[x7],#32
  286. ld1 {v20.4s,v21.4s},[x7],#32
  287. ld1 {v22.4s,v23.4s},[x7],#32
  288. ld1 {v7.4s},[x7]
  289. add x7,x3,#32
  290. mov w6,w5
  291. b.eq .Lcbc_dec
  292. cmp w5,#2
  293. eor v0.16b,v0.16b,v6.16b
  294. eor v5.16b,v16.16b,v7.16b
  295. b.eq .Lcbc_enc128
  296. ld1 {v2.4s,v3.4s},[x7]
  297. add x7,x3,#16
  298. add x6,x3,#16*4
  299. add x12,x3,#16*5
  300. aese v0.16b,v16.16b
  301. aesmc v0.16b,v0.16b
  302. add x14,x3,#16*6
  303. add x3,x3,#16*7
  304. b .Lenter_cbc_enc
  305. .align 4
  306. .Loop_cbc_enc:
  307. aese v0.16b,v16.16b
  308. aesmc v0.16b,v0.16b
  309. st1 {v6.16b},[x1],#16
  310. .Lenter_cbc_enc:
  311. aese v0.16b,v17.16b
  312. aesmc v0.16b,v0.16b
  313. aese v0.16b,v2.16b
  314. aesmc v0.16b,v0.16b
  315. ld1 {v16.4s},[x6]
  316. cmp w5,#4
  317. aese v0.16b,v3.16b
  318. aesmc v0.16b,v0.16b
  319. ld1 {v17.4s},[x12]
  320. b.eq .Lcbc_enc192
  321. aese v0.16b,v16.16b
  322. aesmc v0.16b,v0.16b
  323. ld1 {v16.4s},[x14]
  324. aese v0.16b,v17.16b
  325. aesmc v0.16b,v0.16b
  326. ld1 {v17.4s},[x3]
  327. nop
  328. .Lcbc_enc192:
  329. aese v0.16b,v16.16b
  330. aesmc v0.16b,v0.16b
  331. subs x2,x2,#16
  332. aese v0.16b,v17.16b
  333. aesmc v0.16b,v0.16b
  334. csel x8,xzr,x8,eq
  335. aese v0.16b,v18.16b
  336. aesmc v0.16b,v0.16b
  337. aese v0.16b,v19.16b
  338. aesmc v0.16b,v0.16b
  339. ld1 {v16.16b},[x0],x8
  340. aese v0.16b,v20.16b
  341. aesmc v0.16b,v0.16b
  342. eor v16.16b,v16.16b,v5.16b
  343. aese v0.16b,v21.16b
  344. aesmc v0.16b,v0.16b
  345. ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
  346. aese v0.16b,v22.16b
  347. aesmc v0.16b,v0.16b
  348. aese v0.16b,v23.16b
  349. eor v6.16b,v0.16b,v7.16b
  350. b.hs .Loop_cbc_enc
  351. st1 {v6.16b},[x1],#16
  352. b .Lcbc_done
  353. .align 5
  354. .Lcbc_enc128:
  355. ld1 {v2.4s,v3.4s},[x7]
  356. aese v0.16b,v16.16b
  357. aesmc v0.16b,v0.16b
  358. b .Lenter_cbc_enc128
  359. .Loop_cbc_enc128:
  360. aese v0.16b,v16.16b
  361. aesmc v0.16b,v0.16b
  362. st1 {v6.16b},[x1],#16
  363. .Lenter_cbc_enc128:
  364. aese v0.16b,v17.16b
  365. aesmc v0.16b,v0.16b
  366. subs x2,x2,#16
  367. aese v0.16b,v2.16b
  368. aesmc v0.16b,v0.16b
  369. csel x8,xzr,x8,eq
  370. aese v0.16b,v3.16b
  371. aesmc v0.16b,v0.16b
  372. aese v0.16b,v18.16b
  373. aesmc v0.16b,v0.16b
  374. aese v0.16b,v19.16b
  375. aesmc v0.16b,v0.16b
  376. ld1 {v16.16b},[x0],x8
  377. aese v0.16b,v20.16b
  378. aesmc v0.16b,v0.16b
  379. aese v0.16b,v21.16b
  380. aesmc v0.16b,v0.16b
  381. aese v0.16b,v22.16b
  382. aesmc v0.16b,v0.16b
  383. eor v16.16b,v16.16b,v5.16b
  384. aese v0.16b,v23.16b
  385. eor v6.16b,v0.16b,v7.16b
  386. b.hs .Loop_cbc_enc128
  387. st1 {v6.16b},[x1],#16
  388. b .Lcbc_done
  389. .align 5
  390. .Lcbc_dec:
  391. ld1 {v18.16b},[x0],#16
  392. subs x2,x2,#32 // bias
  393. add w6,w5,#2
  394. orr v3.16b,v0.16b,v0.16b
  395. orr v1.16b,v0.16b,v0.16b
  396. orr v19.16b,v18.16b,v18.16b
  397. b.lo .Lcbc_dec_tail
  398. orr v1.16b,v18.16b,v18.16b
  399. ld1 {v18.16b},[x0],#16
  400. orr v2.16b,v0.16b,v0.16b
  401. orr v3.16b,v1.16b,v1.16b
  402. orr v19.16b,v18.16b,v18.16b
  403. .Loop3x_cbc_dec:
  404. aesd v0.16b,v16.16b
  405. aesimc v0.16b,v0.16b
  406. aesd v1.16b,v16.16b
  407. aesimc v1.16b,v1.16b
  408. aesd v18.16b,v16.16b
  409. aesimc v18.16b,v18.16b
  410. ld1 {v16.4s},[x7],#16
  411. subs w6,w6,#2
  412. aesd v0.16b,v17.16b
  413. aesimc v0.16b,v0.16b
  414. aesd v1.16b,v17.16b
  415. aesimc v1.16b,v1.16b
  416. aesd v18.16b,v17.16b
  417. aesimc v18.16b,v18.16b
  418. ld1 {v17.4s},[x7],#16
  419. b.gt .Loop3x_cbc_dec
  420. aesd v0.16b,v16.16b
  421. aesimc v0.16b,v0.16b
  422. aesd v1.16b,v16.16b
  423. aesimc v1.16b,v1.16b
  424. aesd v18.16b,v16.16b
  425. aesimc v18.16b,v18.16b
  426. eor v4.16b,v6.16b,v7.16b
  427. subs x2,x2,#0x30
  428. eor v5.16b,v2.16b,v7.16b
  429. csel x6,x2,x6,lo // x6, w6, is zero at this point
  430. aesd v0.16b,v17.16b
  431. aesimc v0.16b,v0.16b
  432. aesd v1.16b,v17.16b
  433. aesimc v1.16b,v1.16b
  434. aesd v18.16b,v17.16b
  435. aesimc v18.16b,v18.16b
  436. eor v17.16b,v3.16b,v7.16b
  437. add x0,x0,x6 // x0 is adjusted in such way that
  438. // at exit from the loop v1.16b-v18.16b
  439. // are loaded with last "words"
  440. orr v6.16b,v19.16b,v19.16b
  441. mov x7,x3
  442. aesd v0.16b,v20.16b
  443. aesimc v0.16b,v0.16b
  444. aesd v1.16b,v20.16b
  445. aesimc v1.16b,v1.16b
  446. aesd v18.16b,v20.16b
  447. aesimc v18.16b,v18.16b
  448. ld1 {v2.16b},[x0],#16
  449. aesd v0.16b,v21.16b
  450. aesimc v0.16b,v0.16b
  451. aesd v1.16b,v21.16b
  452. aesimc v1.16b,v1.16b
  453. aesd v18.16b,v21.16b
  454. aesimc v18.16b,v18.16b
  455. ld1 {v3.16b},[x0],#16
  456. aesd v0.16b,v22.16b
  457. aesimc v0.16b,v0.16b
  458. aesd v1.16b,v22.16b
  459. aesimc v1.16b,v1.16b
  460. aesd v18.16b,v22.16b
  461. aesimc v18.16b,v18.16b
  462. ld1 {v19.16b},[x0],#16
  463. aesd v0.16b,v23.16b
  464. aesd v1.16b,v23.16b
  465. aesd v18.16b,v23.16b
  466. ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
  467. add w6,w5,#2
  468. eor v4.16b,v4.16b,v0.16b
  469. eor v5.16b,v5.16b,v1.16b
  470. eor v18.16b,v18.16b,v17.16b
  471. ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
  472. st1 {v4.16b},[x1],#16
  473. orr v0.16b,v2.16b,v2.16b
  474. st1 {v5.16b},[x1],#16
  475. orr v1.16b,v3.16b,v3.16b
  476. st1 {v18.16b},[x1],#16
  477. orr v18.16b,v19.16b,v19.16b
  478. b.hs .Loop3x_cbc_dec
  479. cmn x2,#0x30
  480. b.eq .Lcbc_done
  481. nop
  482. .Lcbc_dec_tail:
  483. aesd v1.16b,v16.16b
  484. aesimc v1.16b,v1.16b
  485. aesd v18.16b,v16.16b
  486. aesimc v18.16b,v18.16b
  487. ld1 {v16.4s},[x7],#16
  488. subs w6,w6,#2
  489. aesd v1.16b,v17.16b
  490. aesimc v1.16b,v1.16b
  491. aesd v18.16b,v17.16b
  492. aesimc v18.16b,v18.16b
  493. ld1 {v17.4s},[x7],#16
  494. b.gt .Lcbc_dec_tail
  495. aesd v1.16b,v16.16b
  496. aesimc v1.16b,v1.16b
  497. aesd v18.16b,v16.16b
  498. aesimc v18.16b,v18.16b
  499. aesd v1.16b,v17.16b
  500. aesimc v1.16b,v1.16b
  501. aesd v18.16b,v17.16b
  502. aesimc v18.16b,v18.16b
  503. aesd v1.16b,v20.16b
  504. aesimc v1.16b,v1.16b
  505. aesd v18.16b,v20.16b
  506. aesimc v18.16b,v18.16b
  507. cmn x2,#0x20
  508. aesd v1.16b,v21.16b
  509. aesimc v1.16b,v1.16b
  510. aesd v18.16b,v21.16b
  511. aesimc v18.16b,v18.16b
  512. eor v5.16b,v6.16b,v7.16b
  513. aesd v1.16b,v22.16b
  514. aesimc v1.16b,v1.16b
  515. aesd v18.16b,v22.16b
  516. aesimc v18.16b,v18.16b
  517. eor v17.16b,v3.16b,v7.16b
  518. aesd v1.16b,v23.16b
  519. aesd v18.16b,v23.16b
  520. b.eq .Lcbc_dec_one
  521. eor v5.16b,v5.16b,v1.16b
  522. eor v17.16b,v17.16b,v18.16b
  523. orr v6.16b,v19.16b,v19.16b
  524. st1 {v5.16b},[x1],#16
  525. st1 {v17.16b},[x1],#16
  526. b .Lcbc_done
  527. .Lcbc_dec_one:
  528. eor v5.16b,v5.16b,v18.16b
  529. orr v6.16b,v19.16b,v19.16b
  530. st1 {v5.16b},[x1],#16
  531. .Lcbc_done:
  532. st1 {v6.16b},[x4]
  533. .Lcbc_abort:
  534. ldr x29,[sp],#16
  535. ret
  536. .size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
  537. .globl aes_hw_ctr32_encrypt_blocks
  538. .hidden aes_hw_ctr32_encrypt_blocks
  539. .type aes_hw_ctr32_encrypt_blocks,%function
  540. .align 5
  541. aes_hw_ctr32_encrypt_blocks:
  542. // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
  543. AARCH64_VALID_CALL_TARGET
  544. stp x29,x30,[sp,#-16]!
  545. add x29,sp,#0
  546. ldr w5,[x3,#240]
  547. ldr w8, [x4, #12]
  548. ld1 {v0.4s},[x4]
  549. ld1 {v16.4s,v17.4s},[x3] // load key schedule...
  550. sub w5,w5,#4
  551. mov x12,#16
  552. cmp x2,#2
  553. add x7,x3,x5,lsl#4 // pointer to last 5 round keys
  554. sub w5,w5,#2
  555. ld1 {v20.4s,v21.4s},[x7],#32
  556. ld1 {v22.4s,v23.4s},[x7],#32
  557. ld1 {v7.4s},[x7]
  558. add x7,x3,#32
  559. mov w6,w5
  560. csel x12,xzr,x12,lo
  561. // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
  562. // affected by silicon errata #1742098 [0] and #1655431 [1],
  563. // respectively, where the second instruction of an aese/aesmc
  564. // instruction pair may execute twice if an interrupt is taken right
  565. // after the first instruction consumes an input register of which a
  566. // single 32-bit lane has been updated the last time it was modified.
  567. //
  568. // This function uses a counter in one 32-bit lane. The vmov lines
  569. // could write to v1.16b and v18.16b directly, but that trips this bugs.
  570. // We write to v6.16b and copy to the final register as a workaround.
  571. //
  572. // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
  573. // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
  574. #ifndef __ARMEB__
  575. rev w8, w8
  576. #endif
  577. add w10, w8, #1
  578. orr v6.16b,v0.16b,v0.16b
  579. rev w10, w10
  580. mov v6.s[3],w10
  581. add w8, w8, #2
  582. orr v1.16b,v6.16b,v6.16b
  583. b.ls .Lctr32_tail
  584. rev w12, w8
  585. mov v6.s[3],w12
  586. sub x2,x2,#3 // bias
  587. orr v18.16b,v6.16b,v6.16b
  588. b .Loop3x_ctr32
  589. .align 4
  590. .Loop3x_ctr32:
  591. aese v0.16b,v16.16b
  592. aesmc v0.16b,v0.16b
  593. aese v1.16b,v16.16b
  594. aesmc v1.16b,v1.16b
  595. aese v18.16b,v16.16b
  596. aesmc v18.16b,v18.16b
  597. ld1 {v16.4s},[x7],#16
  598. subs w6,w6,#2
  599. aese v0.16b,v17.16b
  600. aesmc v0.16b,v0.16b
  601. aese v1.16b,v17.16b
  602. aesmc v1.16b,v1.16b
  603. aese v18.16b,v17.16b
  604. aesmc v18.16b,v18.16b
  605. ld1 {v17.4s},[x7],#16
  606. b.gt .Loop3x_ctr32
  607. aese v0.16b,v16.16b
  608. aesmc v4.16b,v0.16b
  609. aese v1.16b,v16.16b
  610. aesmc v5.16b,v1.16b
  611. ld1 {v2.16b},[x0],#16
  612. add w9,w8,#1
  613. aese v18.16b,v16.16b
  614. aesmc v18.16b,v18.16b
  615. ld1 {v3.16b},[x0],#16
  616. rev w9,w9
  617. aese v4.16b,v17.16b
  618. aesmc v4.16b,v4.16b
  619. aese v5.16b,v17.16b
  620. aesmc v5.16b,v5.16b
  621. ld1 {v19.16b},[x0],#16
  622. mov x7,x3
  623. aese v18.16b,v17.16b
  624. aesmc v17.16b,v18.16b
  625. aese v4.16b,v20.16b
  626. aesmc v4.16b,v4.16b
  627. aese v5.16b,v20.16b
  628. aesmc v5.16b,v5.16b
  629. eor v2.16b,v2.16b,v7.16b
  630. add w10,w8,#2
  631. aese v17.16b,v20.16b
  632. aesmc v17.16b,v17.16b
  633. eor v3.16b,v3.16b,v7.16b
  634. add w8,w8,#3
  635. aese v4.16b,v21.16b
  636. aesmc v4.16b,v4.16b
  637. aese v5.16b,v21.16b
  638. aesmc v5.16b,v5.16b
  639. // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
  640. // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
  641. // 32-bit mode. See the comment above.
  642. eor v19.16b,v19.16b,v7.16b
  643. mov v6.s[3], w9
  644. aese v17.16b,v21.16b
  645. aesmc v17.16b,v17.16b
  646. orr v0.16b,v6.16b,v6.16b
  647. rev w10,w10
  648. aese v4.16b,v22.16b
  649. aesmc v4.16b,v4.16b
  650. mov v6.s[3], w10
  651. rev w12,w8
  652. aese v5.16b,v22.16b
  653. aesmc v5.16b,v5.16b
  654. orr v1.16b,v6.16b,v6.16b
  655. mov v6.s[3], w12
  656. aese v17.16b,v22.16b
  657. aesmc v17.16b,v17.16b
  658. orr v18.16b,v6.16b,v6.16b
  659. subs x2,x2,#3
  660. aese v4.16b,v23.16b
  661. aese v5.16b,v23.16b
  662. aese v17.16b,v23.16b
  663. eor v2.16b,v2.16b,v4.16b
  664. ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
  665. st1 {v2.16b},[x1],#16
  666. eor v3.16b,v3.16b,v5.16b
  667. mov w6,w5
  668. st1 {v3.16b},[x1],#16
  669. eor v19.16b,v19.16b,v17.16b
  670. ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
  671. st1 {v19.16b},[x1],#16
  672. b.hs .Loop3x_ctr32
  673. adds x2,x2,#3
  674. b.eq .Lctr32_done
  675. cmp x2,#1
  676. mov x12,#16
  677. csel x12,xzr,x12,eq
  678. .Lctr32_tail:
  679. aese v0.16b,v16.16b
  680. aesmc v0.16b,v0.16b
  681. aese v1.16b,v16.16b
  682. aesmc v1.16b,v1.16b
  683. ld1 {v16.4s},[x7],#16
  684. subs w6,w6,#2
  685. aese v0.16b,v17.16b
  686. aesmc v0.16b,v0.16b
  687. aese v1.16b,v17.16b
  688. aesmc v1.16b,v1.16b
  689. ld1 {v17.4s},[x7],#16
  690. b.gt .Lctr32_tail
  691. aese v0.16b,v16.16b
  692. aesmc v0.16b,v0.16b
  693. aese v1.16b,v16.16b
  694. aesmc v1.16b,v1.16b
  695. aese v0.16b,v17.16b
  696. aesmc v0.16b,v0.16b
  697. aese v1.16b,v17.16b
  698. aesmc v1.16b,v1.16b
  699. ld1 {v2.16b},[x0],x12
  700. aese v0.16b,v20.16b
  701. aesmc v0.16b,v0.16b
  702. aese v1.16b,v20.16b
  703. aesmc v1.16b,v1.16b
  704. ld1 {v3.16b},[x0]
  705. aese v0.16b,v21.16b
  706. aesmc v0.16b,v0.16b
  707. aese v1.16b,v21.16b
  708. aesmc v1.16b,v1.16b
  709. eor v2.16b,v2.16b,v7.16b
  710. aese v0.16b,v22.16b
  711. aesmc v0.16b,v0.16b
  712. aese v1.16b,v22.16b
  713. aesmc v1.16b,v1.16b
  714. eor v3.16b,v3.16b,v7.16b
  715. aese v0.16b,v23.16b
  716. aese v1.16b,v23.16b
  717. cmp x2,#1
  718. eor v2.16b,v2.16b,v0.16b
  719. eor v3.16b,v3.16b,v1.16b
  720. st1 {v2.16b},[x1],#16
  721. b.eq .Lctr32_done
  722. st1 {v3.16b},[x1]
  723. .Lctr32_done:
  724. ldr x29,[sp],#16
  725. ret
  726. .size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
  727. #endif
  728. #endif
  729. #endif // !OPENSSL_NO_ASM
  730. .section .note.GNU-stack,"",%progbits