aesv8-armx32.S 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if !defined(__has_feature)
  4. #define __has_feature(x) 0
  5. #endif
  6. #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
  7. #define OPENSSL_NO_ASM
  8. #endif
  9. #if !defined(OPENSSL_NO_ASM)
  10. #if defined(__arm__)
  11. #if defined(BORINGSSL_PREFIX)
  12. #include <boringssl_prefix_symbols_asm.h>
  13. #endif
  14. #include <openssl/arm_arch.h>
  15. #if __ARM_MAX_ARCH__>=7
  16. .text
  17. .arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-)
  18. .fpu neon
  19. .code 32
  20. #undef __thumb2__
  21. .align 5
  22. .Lrcon:
  23. .long 0x01,0x01,0x01,0x01
  24. .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
  25. .long 0x1b,0x1b,0x1b,0x1b
  26. .text
  27. .globl aes_hw_set_encrypt_key
  28. .hidden aes_hw_set_encrypt_key
  29. .type aes_hw_set_encrypt_key,%function
  30. .align 5
  31. aes_hw_set_encrypt_key:
  32. .Lenc_key:
  33. mov r3,#-1
  34. cmp r0,#0
  35. beq .Lenc_key_abort
  36. cmp r2,#0
  37. beq .Lenc_key_abort
  38. mov r3,#-2
  39. cmp r1,#128
  40. blt .Lenc_key_abort
  41. cmp r1,#256
  42. bgt .Lenc_key_abort
  43. tst r1,#0x3f
  44. bne .Lenc_key_abort
  45. adr r3,.Lrcon
  46. cmp r1,#192
  47. veor q0,q0,q0
  48. vld1.8 {q3},[r0]!
  49. mov r1,#8 @ reuse r1
  50. vld1.32 {q1,q2},[r3]!
  51. blt .Loop128
  52. beq .L192
  53. b .L256
  54. .align 4
  55. .Loop128:
  56. vtbl.8 d20,{q3},d4
  57. vtbl.8 d21,{q3},d5
  58. vext.8 q9,q0,q3,#12
  59. vst1.32 {q3},[r2]!
  60. .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
  61. subs r1,r1,#1
  62. veor q3,q3,q9
  63. vext.8 q9,q0,q9,#12
  64. veor q3,q3,q9
  65. vext.8 q9,q0,q9,#12
  66. veor q10,q10,q1
  67. veor q3,q3,q9
  68. vshl.u8 q1,q1,#1
  69. veor q3,q3,q10
  70. bne .Loop128
  71. vld1.32 {q1},[r3]
  72. vtbl.8 d20,{q3},d4
  73. vtbl.8 d21,{q3},d5
  74. vext.8 q9,q0,q3,#12
  75. vst1.32 {q3},[r2]!
  76. .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
  77. veor q3,q3,q9
  78. vext.8 q9,q0,q9,#12
  79. veor q3,q3,q9
  80. vext.8 q9,q0,q9,#12
  81. veor q10,q10,q1
  82. veor q3,q3,q9
  83. vshl.u8 q1,q1,#1
  84. veor q3,q3,q10
  85. vtbl.8 d20,{q3},d4
  86. vtbl.8 d21,{q3},d5
  87. vext.8 q9,q0,q3,#12
  88. vst1.32 {q3},[r2]!
  89. .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
  90. veor q3,q3,q9
  91. vext.8 q9,q0,q9,#12
  92. veor q3,q3,q9
  93. vext.8 q9,q0,q9,#12
  94. veor q10,q10,q1
  95. veor q3,q3,q9
  96. veor q3,q3,q10
  97. vst1.32 {q3},[r2]
  98. add r2,r2,#0x50
  99. mov r12,#10
  100. b .Ldone
  101. .align 4
  102. .L192:
  103. vld1.8 {d16},[r0]!
  104. vmov.i8 q10,#8 @ borrow q10
  105. vst1.32 {q3},[r2]!
  106. vsub.i8 q2,q2,q10 @ adjust the mask
  107. .Loop192:
  108. vtbl.8 d20,{q8},d4
  109. vtbl.8 d21,{q8},d5
  110. vext.8 q9,q0,q3,#12
  111. vst1.32 {d16},[r2]!
  112. .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
  113. subs r1,r1,#1
  114. veor q3,q3,q9
  115. vext.8 q9,q0,q9,#12
  116. veor q3,q3,q9
  117. vext.8 q9,q0,q9,#12
  118. veor q3,q3,q9
  119. vdup.32 q9,d7[1]
  120. veor q9,q9,q8
  121. veor q10,q10,q1
  122. vext.8 q8,q0,q8,#12
  123. vshl.u8 q1,q1,#1
  124. veor q8,q8,q9
  125. veor q3,q3,q10
  126. veor q8,q8,q10
  127. vst1.32 {q3},[r2]!
  128. bne .Loop192
  129. mov r12,#12
  130. add r2,r2,#0x20
  131. b .Ldone
  132. .align 4
  133. .L256:
  134. vld1.8 {q8},[r0]
  135. mov r1,#7
  136. mov r12,#14
  137. vst1.32 {q3},[r2]!
  138. .Loop256:
  139. vtbl.8 d20,{q8},d4
  140. vtbl.8 d21,{q8},d5
  141. vext.8 q9,q0,q3,#12
  142. vst1.32 {q8},[r2]!
  143. .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
  144. subs r1,r1,#1
  145. veor q3,q3,q9
  146. vext.8 q9,q0,q9,#12
  147. veor q3,q3,q9
  148. vext.8 q9,q0,q9,#12
  149. veor q10,q10,q1
  150. veor q3,q3,q9
  151. vshl.u8 q1,q1,#1
  152. veor q3,q3,q10
  153. vst1.32 {q3},[r2]!
  154. beq .Ldone
  155. vdup.32 q10,d7[1]
  156. vext.8 q9,q0,q8,#12
  157. .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
  158. veor q8,q8,q9
  159. vext.8 q9,q0,q9,#12
  160. veor q8,q8,q9
  161. vext.8 q9,q0,q9,#12
  162. veor q8,q8,q9
  163. veor q8,q8,q10
  164. b .Loop256
  165. .Ldone:
  166. str r12,[r2]
  167. mov r3,#0
  168. .Lenc_key_abort:
  169. mov r0,r3 @ return value
  170. bx lr
  171. .size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
  172. .globl aes_hw_set_decrypt_key
  173. .hidden aes_hw_set_decrypt_key
  174. .type aes_hw_set_decrypt_key,%function
  175. .align 5
  176. aes_hw_set_decrypt_key:
  177. stmdb sp!,{r4,lr}
  178. bl .Lenc_key
  179. cmp r0,#0
  180. bne .Ldec_key_abort
  181. sub r2,r2,#240 @ restore original r2
  182. mov r4,#-16
  183. add r0,r2,r12,lsl#4 @ end of key schedule
  184. vld1.32 {q0},[r2]
  185. vld1.32 {q1},[r0]
  186. vst1.32 {q0},[r0],r4
  187. vst1.32 {q1},[r2]!
  188. .Loop_imc:
  189. vld1.32 {q0},[r2]
  190. vld1.32 {q1},[r0]
  191. .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
  192. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  193. vst1.32 {q0},[r0],r4
  194. vst1.32 {q1},[r2]!
  195. cmp r0,r2
  196. bhi .Loop_imc
  197. vld1.32 {q0},[r2]
  198. .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
  199. vst1.32 {q0},[r0]
  200. eor r0,r0,r0 @ return value
  201. .Ldec_key_abort:
  202. ldmia sp!,{r4,pc}
  203. .size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
  204. .globl aes_hw_encrypt
  205. .hidden aes_hw_encrypt
  206. .type aes_hw_encrypt,%function
  207. .align 5
  208. aes_hw_encrypt:
  209. AARCH64_VALID_CALL_TARGET
  210. ldr r3,[r2,#240]
  211. vld1.32 {q0},[r2]!
  212. vld1.8 {q2},[r0]
  213. sub r3,r3,#2
  214. vld1.32 {q1},[r2]!
  215. .Loop_enc:
  216. .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
  217. .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
  218. vld1.32 {q0},[r2]!
  219. subs r3,r3,#2
  220. .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
  221. .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
  222. vld1.32 {q1},[r2]!
  223. bgt .Loop_enc
  224. .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
  225. .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
  226. vld1.32 {q0},[r2]
  227. .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
  228. veor q2,q2,q0
  229. vst1.8 {q2},[r1]
  230. bx lr
  231. .size aes_hw_encrypt,.-aes_hw_encrypt
  232. .globl aes_hw_decrypt
  233. .hidden aes_hw_decrypt
  234. .type aes_hw_decrypt,%function
  235. .align 5
  236. aes_hw_decrypt:
  237. AARCH64_VALID_CALL_TARGET
  238. ldr r3,[r2,#240]
  239. vld1.32 {q0},[r2]!
  240. vld1.8 {q2},[r0]
  241. sub r3,r3,#2
  242. vld1.32 {q1},[r2]!
  243. .Loop_dec:
  244. .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
  245. .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
  246. vld1.32 {q0},[r2]!
  247. subs r3,r3,#2
  248. .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
  249. .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
  250. vld1.32 {q1},[r2]!
  251. bgt .Loop_dec
  252. .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
  253. .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
  254. vld1.32 {q0},[r2]
  255. .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
  256. veor q2,q2,q0
  257. vst1.8 {q2},[r1]
  258. bx lr
  259. .size aes_hw_decrypt,.-aes_hw_decrypt
  260. .globl aes_hw_cbc_encrypt
  261. .hidden aes_hw_cbc_encrypt
  262. .type aes_hw_cbc_encrypt,%function
  263. .align 5
  264. aes_hw_cbc_encrypt:
  265. mov ip,sp
  266. stmdb sp!,{r4,r5,r6,r7,r8,lr}
  267. vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
  268. ldmia ip,{r4,r5} @ load remaining args
  269. subs r2,r2,#16
  270. mov r8,#16
  271. blo .Lcbc_abort
  272. moveq r8,#0
  273. cmp r5,#0 @ en- or decrypting?
  274. ldr r5,[r3,#240]
  275. and r2,r2,#-16
  276. vld1.8 {q6},[r4]
  277. vld1.8 {q0},[r0],r8
  278. vld1.32 {q8,q9},[r3] @ load key schedule...
  279. sub r5,r5,#6
  280. add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
  281. sub r5,r5,#2
  282. vld1.32 {q10,q11},[r7]!
  283. vld1.32 {q12,q13},[r7]!
  284. vld1.32 {q14,q15},[r7]!
  285. vld1.32 {q7},[r7]
  286. add r7,r3,#32
  287. mov r6,r5
  288. beq .Lcbc_dec
  289. cmp r5,#2
  290. veor q0,q0,q6
  291. veor q5,q8,q7
  292. beq .Lcbc_enc128
  293. vld1.32 {q2,q3},[r7]
  294. add r7,r3,#16
  295. add r6,r3,#16*4
  296. add r12,r3,#16*5
  297. .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
  298. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  299. add r14,r3,#16*6
  300. add r3,r3,#16*7
  301. b .Lenter_cbc_enc
  302. .align 4
  303. .Loop_cbc_enc:
  304. .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
  305. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  306. vst1.8 {q6},[r1]!
  307. .Lenter_cbc_enc:
  308. .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
  309. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  310. .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
  311. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  312. vld1.32 {q8},[r6]
  313. cmp r5,#4
  314. .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
  315. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  316. vld1.32 {q9},[r12]
  317. beq .Lcbc_enc192
  318. .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
  319. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  320. vld1.32 {q8},[r14]
  321. .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
  322. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  323. vld1.32 {q9},[r3]
  324. nop
  325. .Lcbc_enc192:
  326. .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
  327. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  328. subs r2,r2,#16
  329. .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
  330. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  331. moveq r8,#0
  332. .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
  333. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  334. .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
  335. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  336. vld1.8 {q8},[r0],r8
  337. .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
  338. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  339. veor q8,q8,q5
  340. .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
  341. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  342. vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
  343. .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
  344. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  345. .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
  346. veor q6,q0,q7
  347. bhs .Loop_cbc_enc
  348. vst1.8 {q6},[r1]!
  349. b .Lcbc_done
  350. .align 5
  351. .Lcbc_enc128:
  352. vld1.32 {q2,q3},[r7]
  353. .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
  354. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  355. b .Lenter_cbc_enc128
  356. .Loop_cbc_enc128:
  357. .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
  358. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  359. vst1.8 {q6},[r1]!
  360. .Lenter_cbc_enc128:
  361. .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
  362. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  363. subs r2,r2,#16
  364. .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
  365. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  366. moveq r8,#0
  367. .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
  368. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  369. .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
  370. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  371. .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
  372. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  373. vld1.8 {q8},[r0],r8
  374. .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
  375. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  376. .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
  377. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  378. .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
  379. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  380. veor q8,q8,q5
  381. .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
  382. veor q6,q0,q7
  383. bhs .Loop_cbc_enc128
  384. vst1.8 {q6},[r1]!
  385. b .Lcbc_done
  386. .align 5
  387. .Lcbc_dec:
  388. vld1.8 {q10},[r0]!
  389. subs r2,r2,#32 @ bias
  390. add r6,r5,#2
  391. vorr q3,q0,q0
  392. vorr q1,q0,q0
  393. vorr q11,q10,q10
  394. blo .Lcbc_dec_tail
  395. vorr q1,q10,q10
  396. vld1.8 {q10},[r0]!
  397. vorr q2,q0,q0
  398. vorr q3,q1,q1
  399. vorr q11,q10,q10
  400. .Loop3x_cbc_dec:
  401. .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
  402. .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
  403. .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
  404. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  405. .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
  406. .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
  407. vld1.32 {q8},[r7]!
  408. subs r6,r6,#2
  409. .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
  410. .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
  411. .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
  412. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  413. .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
  414. .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
  415. vld1.32 {q9},[r7]!
  416. bgt .Loop3x_cbc_dec
  417. .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
  418. .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
  419. .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
  420. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  421. .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
  422. .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
  423. veor q4,q6,q7
  424. subs r2,r2,#0x30
  425. veor q5,q2,q7
  426. movlo r6,r2 @ r6, r6, is zero at this point
  427. .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
  428. .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
  429. .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
  430. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  431. .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
  432. .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
  433. veor q9,q3,q7
  434. add r0,r0,r6 @ r0 is adjusted in such way that
  435. @ at exit from the loop q1-q10
  436. @ are loaded with last "words"
  437. vorr q6,q11,q11
  438. mov r7,r3
  439. .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
  440. .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
  441. .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
  442. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  443. .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
  444. .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
  445. vld1.8 {q2},[r0]!
  446. .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
  447. .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
  448. .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
  449. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  450. .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
  451. .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
  452. vld1.8 {q3},[r0]!
  453. .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
  454. .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
  455. .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
  456. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  457. .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
  458. .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
  459. vld1.8 {q11},[r0]!
  460. .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
  461. .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
  462. .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
  463. vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
  464. add r6,r5,#2
  465. veor q4,q4,q0
  466. veor q5,q5,q1
  467. veor q10,q10,q9
  468. vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
  469. vst1.8 {q4},[r1]!
  470. vorr q0,q2,q2
  471. vst1.8 {q5},[r1]!
  472. vorr q1,q3,q3
  473. vst1.8 {q10},[r1]!
  474. vorr q10,q11,q11
  475. bhs .Loop3x_cbc_dec
  476. cmn r2,#0x30
  477. beq .Lcbc_done
  478. nop
  479. .Lcbc_dec_tail:
  480. .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
  481. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  482. .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
  483. .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
  484. vld1.32 {q8},[r7]!
  485. subs r6,r6,#2
  486. .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
  487. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  488. .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
  489. .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
  490. vld1.32 {q9},[r7]!
  491. bgt .Lcbc_dec_tail
  492. .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
  493. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  494. .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
  495. .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
  496. .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
  497. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  498. .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
  499. .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
  500. .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
  501. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  502. .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
  503. .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
  504. cmn r2,#0x20
  505. .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
  506. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  507. .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
  508. .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
  509. veor q5,q6,q7
  510. .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
  511. .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
  512. .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
  513. .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
  514. veor q9,q3,q7
  515. .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
  516. .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
  517. beq .Lcbc_dec_one
  518. veor q5,q5,q1
  519. veor q9,q9,q10
  520. vorr q6,q11,q11
  521. vst1.8 {q5},[r1]!
  522. vst1.8 {q9},[r1]!
  523. b .Lcbc_done
  524. .Lcbc_dec_one:
  525. veor q5,q5,q10
  526. vorr q6,q11,q11
  527. vst1.8 {q5},[r1]!
  528. .Lcbc_done:
  529. vst1.8 {q6},[r4]
  530. .Lcbc_abort:
  531. vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
  532. ldmia sp!,{r4,r5,r6,r7,r8,pc}
  533. .size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
  534. .globl aes_hw_ctr32_encrypt_blocks
  535. .hidden aes_hw_ctr32_encrypt_blocks
  536. .type aes_hw_ctr32_encrypt_blocks,%function
  537. .align 5
  538. aes_hw_ctr32_encrypt_blocks:
  539. mov ip,sp
  540. stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
  541. vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
  542. ldr r4, [ip] @ load remaining arg
  543. ldr r5,[r3,#240]
  544. ldr r8, [r4, #12]
  545. vld1.32 {q0},[r4]
  546. vld1.32 {q8,q9},[r3] @ load key schedule...
  547. sub r5,r5,#4
  548. mov r12,#16
  549. cmp r2,#2
  550. add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
  551. sub r5,r5,#2
  552. vld1.32 {q12,q13},[r7]!
  553. vld1.32 {q14,q15},[r7]!
  554. vld1.32 {q7},[r7]
  555. add r7,r3,#32
  556. mov r6,r5
  557. movlo r12,#0
  558. @ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
  559. @ affected by silicon errata #1742098 [0] and #1655431 [1],
  560. @ respectively, where the second instruction of an aese/aesmc
  561. @ instruction pair may execute twice if an interrupt is taken right
  562. @ after the first instruction consumes an input register of which a
  563. @ single 32-bit lane has been updated the last time it was modified.
  564. @
  565. @ This function uses a counter in one 32-bit lane. The
  566. @ could write to q1 and q10 directly, but that trips this bugs.
  567. @ We write to q6 and copy to the final register as a workaround.
  568. @
  569. @ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
  570. @ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
  571. #ifndef __ARMEB__
  572. rev r8, r8
  573. #endif
  574. add r10, r8, #1
  575. vorr q6,q0,q0
  576. rev r10, r10
  577. vmov.32 d13[1],r10
  578. add r8, r8, #2
  579. vorr q1,q6,q6
  580. bls .Lctr32_tail
  581. rev r12, r8
  582. vmov.32 d13[1],r12
  583. sub r2,r2,#3 @ bias
  584. vorr q10,q6,q6
  585. b .Loop3x_ctr32
  586. .align 4
  587. .Loop3x_ctr32:
  588. .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
  589. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  590. .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
  591. .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
  592. .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
  593. .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
  594. vld1.32 {q8},[r7]!
  595. subs r6,r6,#2
  596. .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
  597. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  598. .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
  599. .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
  600. .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
  601. .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
  602. vld1.32 {q9},[r7]!
  603. bgt .Loop3x_ctr32
  604. .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
  605. .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
  606. .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
  607. .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
  608. vld1.8 {q2},[r0]!
  609. add r9,r8,#1
  610. .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
  611. .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
  612. vld1.8 {q3},[r0]!
  613. rev r9,r9
  614. .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
  615. .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
  616. .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
  617. .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
  618. vld1.8 {q11},[r0]!
  619. mov r7,r3
  620. .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
  621. .byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
  622. .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
  623. .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
  624. .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
  625. .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
  626. veor q2,q2,q7
  627. add r10,r8,#2
  628. .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
  629. .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
  630. veor q3,q3,q7
  631. add r8,r8,#3
  632. .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
  633. .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
  634. .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
  635. .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
  636. @ Note the logic to update q0, q1, and q1 is written to work
  637. @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
  638. @ 32-bit mode. See the comment above.
  639. veor q11,q11,q7
  640. vmov.32 d13[1], r9
  641. .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
  642. .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
  643. vorr q0,q6,q6
  644. rev r10,r10
  645. .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
  646. .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
  647. vmov.32 d13[1], r10
  648. rev r12,r8
  649. .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
  650. .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
  651. vorr q1,q6,q6
  652. vmov.32 d13[1], r12
  653. .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
  654. .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
  655. vorr q10,q6,q6
  656. subs r2,r2,#3
  657. .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
  658. .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
  659. .byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
  660. veor q2,q2,q4
  661. vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
  662. vst1.8 {q2},[r1]!
  663. veor q3,q3,q5
  664. mov r6,r5
  665. vst1.8 {q3},[r1]!
  666. veor q11,q11,q9
  667. vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
  668. vst1.8 {q11},[r1]!
  669. bhs .Loop3x_ctr32
  670. adds r2,r2,#3
  671. beq .Lctr32_done
  672. cmp r2,#1
  673. mov r12,#16
  674. moveq r12,#0
  675. .Lctr32_tail:
  676. .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
  677. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  678. .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
  679. .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
  680. vld1.32 {q8},[r7]!
  681. subs r6,r6,#2
  682. .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
  683. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  684. .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
  685. .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
  686. vld1.32 {q9},[r7]!
  687. bgt .Lctr32_tail
  688. .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
  689. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  690. .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
  691. .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
  692. .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
  693. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  694. .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
  695. .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
  696. vld1.8 {q2},[r0],r12
  697. .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
  698. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  699. .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
  700. .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
  701. vld1.8 {q3},[r0]
  702. .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
  703. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  704. .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
  705. .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
  706. veor q2,q2,q7
  707. .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
  708. .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
  709. .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
  710. .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
  711. veor q3,q3,q7
  712. .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
  713. .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
  714. cmp r2,#1
  715. veor q2,q2,q0
  716. veor q3,q3,q1
  717. vst1.8 {q2},[r1]!
  718. beq .Lctr32_done
  719. vst1.8 {q3},[r1]
  720. .Lctr32_done:
  721. vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
  722. ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
  723. .size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
  724. #endif
  725. #endif
  726. #endif // !OPENSSL_NO_ASM
  727. .section .note.GNU-stack,"",%progbits