aesni-gcm-x86_64.S 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852
  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if defined(__has_feature)
  4. #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
  5. #define OPENSSL_NO_ASM
  6. #endif
  7. #endif
  8. #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
  9. #if defined(BORINGSSL_PREFIX)
  10. #include <boringssl_prefix_symbols_asm.h>
  11. #endif
  12. .text
  13. .type _aesni_ctr32_ghash_6x,@function
  14. .align 32
  15. _aesni_ctr32_ghash_6x:
  16. .cfi_startproc
  17. vmovdqu 32(%r11),%xmm2
  18. subq $6,%rdx
  19. vpxor %xmm4,%xmm4,%xmm4
  20. vmovdqu 0-128(%rcx),%xmm15
  21. vpaddb %xmm2,%xmm1,%xmm10
  22. vpaddb %xmm2,%xmm10,%xmm11
  23. vpaddb %xmm2,%xmm11,%xmm12
  24. vpaddb %xmm2,%xmm12,%xmm13
  25. vpaddb %xmm2,%xmm13,%xmm14
  26. vpxor %xmm15,%xmm1,%xmm9
  27. vmovdqu %xmm4,16+8(%rsp)
  28. jmp .Loop6x
  29. .align 32
  30. .Loop6x:
  31. addl $100663296,%ebx
  32. jc .Lhandle_ctr32
  33. vmovdqu 0-32(%r9),%xmm3
  34. vpaddb %xmm2,%xmm14,%xmm1
  35. vpxor %xmm15,%xmm10,%xmm10
  36. vpxor %xmm15,%xmm11,%xmm11
  37. .Lresume_ctr32:
  38. vmovdqu %xmm1,(%r8)
  39. vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
  40. vpxor %xmm15,%xmm12,%xmm12
  41. vmovups 16-128(%rcx),%xmm2
  42. vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
  43. xorq %r12,%r12
  44. cmpq %r14,%r15
  45. vaesenc %xmm2,%xmm9,%xmm9
  46. vmovdqu 48+8(%rsp),%xmm0
  47. vpxor %xmm15,%xmm13,%xmm13
  48. vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
  49. vaesenc %xmm2,%xmm10,%xmm10
  50. vpxor %xmm15,%xmm14,%xmm14
  51. setnc %r12b
  52. vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
  53. vaesenc %xmm2,%xmm11,%xmm11
  54. vmovdqu 16-32(%r9),%xmm3
  55. negq %r12
  56. vaesenc %xmm2,%xmm12,%xmm12
  57. vpxor %xmm5,%xmm6,%xmm6
  58. vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
  59. vpxor %xmm4,%xmm8,%xmm8
  60. vaesenc %xmm2,%xmm13,%xmm13
  61. vpxor %xmm5,%xmm1,%xmm4
  62. andq $0x60,%r12
  63. vmovups 32-128(%rcx),%xmm15
  64. vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
  65. vaesenc %xmm2,%xmm14,%xmm14
  66. vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
  67. leaq (%r14,%r12,1),%r14
  68. vaesenc %xmm15,%xmm9,%xmm9
  69. vpxor 16+8(%rsp),%xmm8,%xmm8
  70. vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
  71. vmovdqu 64+8(%rsp),%xmm0
  72. vaesenc %xmm15,%xmm10,%xmm10
  73. movbeq 88(%r14),%r13
  74. vaesenc %xmm15,%xmm11,%xmm11
  75. movbeq 80(%r14),%r12
  76. vaesenc %xmm15,%xmm12,%xmm12
  77. movq %r13,32+8(%rsp)
  78. vaesenc %xmm15,%xmm13,%xmm13
  79. movq %r12,40+8(%rsp)
  80. vmovdqu 48-32(%r9),%xmm5
  81. vaesenc %xmm15,%xmm14,%xmm14
  82. vmovups 48-128(%rcx),%xmm15
  83. vpxor %xmm1,%xmm6,%xmm6
  84. vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
  85. vaesenc %xmm15,%xmm9,%xmm9
  86. vpxor %xmm2,%xmm6,%xmm6
  87. vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
  88. vaesenc %xmm15,%xmm10,%xmm10
  89. vpxor %xmm3,%xmm7,%xmm7
  90. vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
  91. vaesenc %xmm15,%xmm11,%xmm11
  92. vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
  93. vmovdqu 80+8(%rsp),%xmm0
  94. vaesenc %xmm15,%xmm12,%xmm12
  95. vaesenc %xmm15,%xmm13,%xmm13
  96. vpxor %xmm1,%xmm4,%xmm4
  97. vmovdqu 64-32(%r9),%xmm1
  98. vaesenc %xmm15,%xmm14,%xmm14
  99. vmovups 64-128(%rcx),%xmm15
  100. vpxor %xmm2,%xmm6,%xmm6
  101. vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
  102. vaesenc %xmm15,%xmm9,%xmm9
  103. vpxor %xmm3,%xmm6,%xmm6
  104. vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
  105. vaesenc %xmm15,%xmm10,%xmm10
  106. movbeq 72(%r14),%r13
  107. vpxor %xmm5,%xmm7,%xmm7
  108. vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
  109. vaesenc %xmm15,%xmm11,%xmm11
  110. movbeq 64(%r14),%r12
  111. vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
  112. vmovdqu 96+8(%rsp),%xmm0
  113. vaesenc %xmm15,%xmm12,%xmm12
  114. movq %r13,48+8(%rsp)
  115. vaesenc %xmm15,%xmm13,%xmm13
  116. movq %r12,56+8(%rsp)
  117. vpxor %xmm2,%xmm4,%xmm4
  118. vmovdqu 96-32(%r9),%xmm2
  119. vaesenc %xmm15,%xmm14,%xmm14
  120. vmovups 80-128(%rcx),%xmm15
  121. vpxor %xmm3,%xmm6,%xmm6
  122. vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
  123. vaesenc %xmm15,%xmm9,%xmm9
  124. vpxor %xmm5,%xmm6,%xmm6
  125. vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
  126. vaesenc %xmm15,%xmm10,%xmm10
  127. movbeq 56(%r14),%r13
  128. vpxor %xmm1,%xmm7,%xmm7
  129. vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
  130. vpxor 112+8(%rsp),%xmm8,%xmm8
  131. vaesenc %xmm15,%xmm11,%xmm11
  132. movbeq 48(%r14),%r12
  133. vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
  134. vaesenc %xmm15,%xmm12,%xmm12
  135. movq %r13,64+8(%rsp)
  136. vaesenc %xmm15,%xmm13,%xmm13
  137. movq %r12,72+8(%rsp)
  138. vpxor %xmm3,%xmm4,%xmm4
  139. vmovdqu 112-32(%r9),%xmm3
  140. vaesenc %xmm15,%xmm14,%xmm14
  141. vmovups 96-128(%rcx),%xmm15
  142. vpxor %xmm5,%xmm6,%xmm6
  143. vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
  144. vaesenc %xmm15,%xmm9,%xmm9
  145. vpxor %xmm1,%xmm6,%xmm6
  146. vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
  147. vaesenc %xmm15,%xmm10,%xmm10
  148. movbeq 40(%r14),%r13
  149. vpxor %xmm2,%xmm7,%xmm7
  150. vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
  151. vaesenc %xmm15,%xmm11,%xmm11
  152. movbeq 32(%r14),%r12
  153. vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
  154. vaesenc %xmm15,%xmm12,%xmm12
  155. movq %r13,80+8(%rsp)
  156. vaesenc %xmm15,%xmm13,%xmm13
  157. movq %r12,88+8(%rsp)
  158. vpxor %xmm5,%xmm6,%xmm6
  159. vaesenc %xmm15,%xmm14,%xmm14
  160. vpxor %xmm1,%xmm6,%xmm6
  161. vmovups 112-128(%rcx),%xmm15
  162. vpslldq $8,%xmm6,%xmm5
  163. vpxor %xmm2,%xmm4,%xmm4
  164. vmovdqu 16(%r11),%xmm3
  165. vaesenc %xmm15,%xmm9,%xmm9
  166. vpxor %xmm8,%xmm7,%xmm7
  167. vaesenc %xmm15,%xmm10,%xmm10
  168. vpxor %xmm5,%xmm4,%xmm4
  169. movbeq 24(%r14),%r13
  170. vaesenc %xmm15,%xmm11,%xmm11
  171. movbeq 16(%r14),%r12
  172. vpalignr $8,%xmm4,%xmm4,%xmm0
  173. vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
  174. movq %r13,96+8(%rsp)
  175. vaesenc %xmm15,%xmm12,%xmm12
  176. movq %r12,104+8(%rsp)
  177. vaesenc %xmm15,%xmm13,%xmm13
  178. vmovups 128-128(%rcx),%xmm1
  179. vaesenc %xmm15,%xmm14,%xmm14
  180. vaesenc %xmm1,%xmm9,%xmm9
  181. vmovups 144-128(%rcx),%xmm15
  182. vaesenc %xmm1,%xmm10,%xmm10
  183. vpsrldq $8,%xmm6,%xmm6
  184. vaesenc %xmm1,%xmm11,%xmm11
  185. vpxor %xmm6,%xmm7,%xmm7
  186. vaesenc %xmm1,%xmm12,%xmm12
  187. vpxor %xmm0,%xmm4,%xmm4
  188. movbeq 8(%r14),%r13
  189. vaesenc %xmm1,%xmm13,%xmm13
  190. movbeq 0(%r14),%r12
  191. vaesenc %xmm1,%xmm14,%xmm14
  192. vmovups 160-128(%rcx),%xmm1
  193. cmpl $11,%ebp
  194. jb .Lenc_tail
  195. vaesenc %xmm15,%xmm9,%xmm9
  196. vaesenc %xmm15,%xmm10,%xmm10
  197. vaesenc %xmm15,%xmm11,%xmm11
  198. vaesenc %xmm15,%xmm12,%xmm12
  199. vaesenc %xmm15,%xmm13,%xmm13
  200. vaesenc %xmm15,%xmm14,%xmm14
  201. vaesenc %xmm1,%xmm9,%xmm9
  202. vaesenc %xmm1,%xmm10,%xmm10
  203. vaesenc %xmm1,%xmm11,%xmm11
  204. vaesenc %xmm1,%xmm12,%xmm12
  205. vaesenc %xmm1,%xmm13,%xmm13
  206. vmovups 176-128(%rcx),%xmm15
  207. vaesenc %xmm1,%xmm14,%xmm14
  208. vmovups 192-128(%rcx),%xmm1
  209. je .Lenc_tail
  210. vaesenc %xmm15,%xmm9,%xmm9
  211. vaesenc %xmm15,%xmm10,%xmm10
  212. vaesenc %xmm15,%xmm11,%xmm11
  213. vaesenc %xmm15,%xmm12,%xmm12
  214. vaesenc %xmm15,%xmm13,%xmm13
  215. vaesenc %xmm15,%xmm14,%xmm14
  216. vaesenc %xmm1,%xmm9,%xmm9
  217. vaesenc %xmm1,%xmm10,%xmm10
  218. vaesenc %xmm1,%xmm11,%xmm11
  219. vaesenc %xmm1,%xmm12,%xmm12
  220. vaesenc %xmm1,%xmm13,%xmm13
  221. vmovups 208-128(%rcx),%xmm15
  222. vaesenc %xmm1,%xmm14,%xmm14
  223. vmovups 224-128(%rcx),%xmm1
  224. jmp .Lenc_tail
  225. .align 32
  226. .Lhandle_ctr32:
  227. vmovdqu (%r11),%xmm0
  228. vpshufb %xmm0,%xmm1,%xmm6
  229. vmovdqu 48(%r11),%xmm5
  230. vpaddd 64(%r11),%xmm6,%xmm10
  231. vpaddd %xmm5,%xmm6,%xmm11
  232. vmovdqu 0-32(%r9),%xmm3
  233. vpaddd %xmm5,%xmm10,%xmm12
  234. vpshufb %xmm0,%xmm10,%xmm10
  235. vpaddd %xmm5,%xmm11,%xmm13
  236. vpshufb %xmm0,%xmm11,%xmm11
  237. vpxor %xmm15,%xmm10,%xmm10
  238. vpaddd %xmm5,%xmm12,%xmm14
  239. vpshufb %xmm0,%xmm12,%xmm12
  240. vpxor %xmm15,%xmm11,%xmm11
  241. vpaddd %xmm5,%xmm13,%xmm1
  242. vpshufb %xmm0,%xmm13,%xmm13
  243. vpshufb %xmm0,%xmm14,%xmm14
  244. vpshufb %xmm0,%xmm1,%xmm1
  245. jmp .Lresume_ctr32
  246. .align 32
  247. .Lenc_tail:
  248. vaesenc %xmm15,%xmm9,%xmm9
  249. vmovdqu %xmm7,16+8(%rsp)
  250. vpalignr $8,%xmm4,%xmm4,%xmm8
  251. vaesenc %xmm15,%xmm10,%xmm10
  252. vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
  253. vpxor 0(%rdi),%xmm1,%xmm2
  254. vaesenc %xmm15,%xmm11,%xmm11
  255. vpxor 16(%rdi),%xmm1,%xmm0
  256. vaesenc %xmm15,%xmm12,%xmm12
  257. vpxor 32(%rdi),%xmm1,%xmm5
  258. vaesenc %xmm15,%xmm13,%xmm13
  259. vpxor 48(%rdi),%xmm1,%xmm6
  260. vaesenc %xmm15,%xmm14,%xmm14
  261. vpxor 64(%rdi),%xmm1,%xmm7
  262. vpxor 80(%rdi),%xmm1,%xmm3
  263. vmovdqu (%r8),%xmm1
  264. vaesenclast %xmm2,%xmm9,%xmm9
  265. vmovdqu 32(%r11),%xmm2
  266. vaesenclast %xmm0,%xmm10,%xmm10
  267. vpaddb %xmm2,%xmm1,%xmm0
  268. movq %r13,112+8(%rsp)
  269. leaq 96(%rdi),%rdi
  270. vaesenclast %xmm5,%xmm11,%xmm11
  271. vpaddb %xmm2,%xmm0,%xmm5
  272. movq %r12,120+8(%rsp)
  273. leaq 96(%rsi),%rsi
  274. vmovdqu 0-128(%rcx),%xmm15
  275. vaesenclast %xmm6,%xmm12,%xmm12
  276. vpaddb %xmm2,%xmm5,%xmm6
  277. vaesenclast %xmm7,%xmm13,%xmm13
  278. vpaddb %xmm2,%xmm6,%xmm7
  279. vaesenclast %xmm3,%xmm14,%xmm14
  280. vpaddb %xmm2,%xmm7,%xmm3
  281. addq $0x60,%r10
  282. subq $0x6,%rdx
  283. jc .L6x_done
  284. vmovups %xmm9,-96(%rsi)
  285. vpxor %xmm15,%xmm1,%xmm9
  286. vmovups %xmm10,-80(%rsi)
  287. vmovdqa %xmm0,%xmm10
  288. vmovups %xmm11,-64(%rsi)
  289. vmovdqa %xmm5,%xmm11
  290. vmovups %xmm12,-48(%rsi)
  291. vmovdqa %xmm6,%xmm12
  292. vmovups %xmm13,-32(%rsi)
  293. vmovdqa %xmm7,%xmm13
  294. vmovups %xmm14,-16(%rsi)
  295. vmovdqa %xmm3,%xmm14
  296. vmovdqu 32+8(%rsp),%xmm7
  297. jmp .Loop6x
  298. .L6x_done:
  299. vpxor 16+8(%rsp),%xmm8,%xmm8
  300. vpxor %xmm4,%xmm8,%xmm8
  301. .byte 0xf3,0xc3
  302. .cfi_endproc
  303. .size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
  304. .globl aesni_gcm_decrypt
  305. .hidden aesni_gcm_decrypt
  306. .type aesni_gcm_decrypt,@function
  307. .align 32
  308. aesni_gcm_decrypt:
  309. .cfi_startproc
  310. xorq %r10,%r10
  311. cmpq $0x60,%rdx
  312. jb .Lgcm_dec_abort
  313. leaq (%rsp),%rax
  314. .cfi_def_cfa_register %rax
  315. pushq %rbx
  316. .cfi_offset %rbx,-16
  317. pushq %rbp
  318. .cfi_offset %rbp,-24
  319. pushq %r12
  320. .cfi_offset %r12,-32
  321. pushq %r13
  322. .cfi_offset %r13,-40
  323. pushq %r14
  324. .cfi_offset %r14,-48
  325. pushq %r15
  326. .cfi_offset %r15,-56
  327. vzeroupper
  328. vmovdqu (%r8),%xmm1
  329. addq $-128,%rsp
  330. movl 12(%r8),%ebx
  331. leaq .Lbswap_mask(%rip),%r11
  332. leaq -128(%rcx),%r14
  333. movq $0xf80,%r15
  334. vmovdqu (%r9),%xmm8
  335. andq $-128,%rsp
  336. vmovdqu (%r11),%xmm0
  337. leaq 128(%rcx),%rcx
  338. leaq 32+32(%r9),%r9
  339. movl 240-128(%rcx),%ebp
  340. vpshufb %xmm0,%xmm8,%xmm8
  341. andq %r15,%r14
  342. andq %rsp,%r15
  343. subq %r14,%r15
  344. jc .Ldec_no_key_aliasing
  345. cmpq $768,%r15
  346. jnc .Ldec_no_key_aliasing
  347. subq %r15,%rsp
  348. .Ldec_no_key_aliasing:
  349. vmovdqu 80(%rdi),%xmm7
  350. leaq (%rdi),%r14
  351. vmovdqu 64(%rdi),%xmm4
  352. leaq -192(%rdi,%rdx,1),%r15
  353. vmovdqu 48(%rdi),%xmm5
  354. shrq $4,%rdx
  355. xorq %r10,%r10
  356. vmovdqu 32(%rdi),%xmm6
  357. vpshufb %xmm0,%xmm7,%xmm7
  358. vmovdqu 16(%rdi),%xmm2
  359. vpshufb %xmm0,%xmm4,%xmm4
  360. vmovdqu (%rdi),%xmm3
  361. vpshufb %xmm0,%xmm5,%xmm5
  362. vmovdqu %xmm4,48(%rsp)
  363. vpshufb %xmm0,%xmm6,%xmm6
  364. vmovdqu %xmm5,64(%rsp)
  365. vpshufb %xmm0,%xmm2,%xmm2
  366. vmovdqu %xmm6,80(%rsp)
  367. vpshufb %xmm0,%xmm3,%xmm3
  368. vmovdqu %xmm2,96(%rsp)
  369. vmovdqu %xmm3,112(%rsp)
  370. call _aesni_ctr32_ghash_6x
  371. vmovups %xmm9,-96(%rsi)
  372. vmovups %xmm10,-80(%rsi)
  373. vmovups %xmm11,-64(%rsi)
  374. vmovups %xmm12,-48(%rsi)
  375. vmovups %xmm13,-32(%rsi)
  376. vmovups %xmm14,-16(%rsi)
  377. vpshufb (%r11),%xmm8,%xmm8
  378. vmovdqu %xmm8,-64(%r9)
  379. vzeroupper
  380. movq -48(%rax),%r15
  381. .cfi_restore %r15
  382. movq -40(%rax),%r14
  383. .cfi_restore %r14
  384. movq -32(%rax),%r13
  385. .cfi_restore %r13
  386. movq -24(%rax),%r12
  387. .cfi_restore %r12
  388. movq -16(%rax),%rbp
  389. .cfi_restore %rbp
  390. movq -8(%rax),%rbx
  391. .cfi_restore %rbx
  392. leaq (%rax),%rsp
  393. .cfi_def_cfa_register %rsp
  394. .Lgcm_dec_abort:
  395. movq %r10,%rax
  396. .byte 0xf3,0xc3
  397. .cfi_endproc
  398. .size aesni_gcm_decrypt,.-aesni_gcm_decrypt
  399. .type _aesni_ctr32_6x,@function
  400. .align 32
  401. _aesni_ctr32_6x:
  402. .cfi_startproc
  403. vmovdqu 0-128(%rcx),%xmm4
  404. vmovdqu 32(%r11),%xmm2
  405. leaq -1(%rbp),%r13
  406. vmovups 16-128(%rcx),%xmm15
  407. leaq 32-128(%rcx),%r12
  408. vpxor %xmm4,%xmm1,%xmm9
  409. addl $100663296,%ebx
  410. jc .Lhandle_ctr32_2
  411. vpaddb %xmm2,%xmm1,%xmm10
  412. vpaddb %xmm2,%xmm10,%xmm11
  413. vpxor %xmm4,%xmm10,%xmm10
  414. vpaddb %xmm2,%xmm11,%xmm12
  415. vpxor %xmm4,%xmm11,%xmm11
  416. vpaddb %xmm2,%xmm12,%xmm13
  417. vpxor %xmm4,%xmm12,%xmm12
  418. vpaddb %xmm2,%xmm13,%xmm14
  419. vpxor %xmm4,%xmm13,%xmm13
  420. vpaddb %xmm2,%xmm14,%xmm1
  421. vpxor %xmm4,%xmm14,%xmm14
  422. jmp .Loop_ctr32
  423. .align 16
  424. .Loop_ctr32:
  425. vaesenc %xmm15,%xmm9,%xmm9
  426. vaesenc %xmm15,%xmm10,%xmm10
  427. vaesenc %xmm15,%xmm11,%xmm11
  428. vaesenc %xmm15,%xmm12,%xmm12
  429. vaesenc %xmm15,%xmm13,%xmm13
  430. vaesenc %xmm15,%xmm14,%xmm14
  431. vmovups (%r12),%xmm15
  432. leaq 16(%r12),%r12
  433. decl %r13d
  434. jnz .Loop_ctr32
  435. vmovdqu (%r12),%xmm3
  436. vaesenc %xmm15,%xmm9,%xmm9
  437. vpxor 0(%rdi),%xmm3,%xmm4
  438. vaesenc %xmm15,%xmm10,%xmm10
  439. vpxor 16(%rdi),%xmm3,%xmm5
  440. vaesenc %xmm15,%xmm11,%xmm11
  441. vpxor 32(%rdi),%xmm3,%xmm6
  442. vaesenc %xmm15,%xmm12,%xmm12
  443. vpxor 48(%rdi),%xmm3,%xmm8
  444. vaesenc %xmm15,%xmm13,%xmm13
  445. vpxor 64(%rdi),%xmm3,%xmm2
  446. vaesenc %xmm15,%xmm14,%xmm14
  447. vpxor 80(%rdi),%xmm3,%xmm3
  448. leaq 96(%rdi),%rdi
  449. vaesenclast %xmm4,%xmm9,%xmm9
  450. vaesenclast %xmm5,%xmm10,%xmm10
  451. vaesenclast %xmm6,%xmm11,%xmm11
  452. vaesenclast %xmm8,%xmm12,%xmm12
  453. vaesenclast %xmm2,%xmm13,%xmm13
  454. vaesenclast %xmm3,%xmm14,%xmm14
  455. vmovups %xmm9,0(%rsi)
  456. vmovups %xmm10,16(%rsi)
  457. vmovups %xmm11,32(%rsi)
  458. vmovups %xmm12,48(%rsi)
  459. vmovups %xmm13,64(%rsi)
  460. vmovups %xmm14,80(%rsi)
  461. leaq 96(%rsi),%rsi
  462. .byte 0xf3,0xc3
  463. .align 32
  464. .Lhandle_ctr32_2:
  465. vpshufb %xmm0,%xmm1,%xmm6
  466. vmovdqu 48(%r11),%xmm5
  467. vpaddd 64(%r11),%xmm6,%xmm10
  468. vpaddd %xmm5,%xmm6,%xmm11
  469. vpaddd %xmm5,%xmm10,%xmm12
  470. vpshufb %xmm0,%xmm10,%xmm10
  471. vpaddd %xmm5,%xmm11,%xmm13
  472. vpshufb %xmm0,%xmm11,%xmm11
  473. vpxor %xmm4,%xmm10,%xmm10
  474. vpaddd %xmm5,%xmm12,%xmm14
  475. vpshufb %xmm0,%xmm12,%xmm12
  476. vpxor %xmm4,%xmm11,%xmm11
  477. vpaddd %xmm5,%xmm13,%xmm1
  478. vpshufb %xmm0,%xmm13,%xmm13
  479. vpxor %xmm4,%xmm12,%xmm12
  480. vpshufb %xmm0,%xmm14,%xmm14
  481. vpxor %xmm4,%xmm13,%xmm13
  482. vpshufb %xmm0,%xmm1,%xmm1
  483. vpxor %xmm4,%xmm14,%xmm14
  484. jmp .Loop_ctr32
  485. .cfi_endproc
  486. .size _aesni_ctr32_6x,.-_aesni_ctr32_6x
  487. .globl aesni_gcm_encrypt
  488. .hidden aesni_gcm_encrypt
  489. .type aesni_gcm_encrypt,@function
  490. .align 32
  491. aesni_gcm_encrypt:
  492. .cfi_startproc
  493. #ifdef BORINGSSL_DISPATCH_TEST
  494. .extern BORINGSSL_function_hit
  495. .hidden BORINGSSL_function_hit
  496. movb $1,BORINGSSL_function_hit+2(%rip)
  497. #endif
  498. xorq %r10,%r10
  499. cmpq $288,%rdx
  500. jb .Lgcm_enc_abort
  501. leaq (%rsp),%rax
  502. .cfi_def_cfa_register %rax
  503. pushq %rbx
  504. .cfi_offset %rbx,-16
  505. pushq %rbp
  506. .cfi_offset %rbp,-24
  507. pushq %r12
  508. .cfi_offset %r12,-32
  509. pushq %r13
  510. .cfi_offset %r13,-40
  511. pushq %r14
  512. .cfi_offset %r14,-48
  513. pushq %r15
  514. .cfi_offset %r15,-56
  515. vzeroupper
  516. vmovdqu (%r8),%xmm1
  517. addq $-128,%rsp
  518. movl 12(%r8),%ebx
  519. leaq .Lbswap_mask(%rip),%r11
  520. leaq -128(%rcx),%r14
  521. movq $0xf80,%r15
  522. leaq 128(%rcx),%rcx
  523. vmovdqu (%r11),%xmm0
  524. andq $-128,%rsp
  525. movl 240-128(%rcx),%ebp
  526. andq %r15,%r14
  527. andq %rsp,%r15
  528. subq %r14,%r15
  529. jc .Lenc_no_key_aliasing
  530. cmpq $768,%r15
  531. jnc .Lenc_no_key_aliasing
  532. subq %r15,%rsp
  533. .Lenc_no_key_aliasing:
  534. leaq (%rsi),%r14
  535. leaq -192(%rsi,%rdx,1),%r15
  536. shrq $4,%rdx
  537. call _aesni_ctr32_6x
  538. vpshufb %xmm0,%xmm9,%xmm8
  539. vpshufb %xmm0,%xmm10,%xmm2
  540. vmovdqu %xmm8,112(%rsp)
  541. vpshufb %xmm0,%xmm11,%xmm4
  542. vmovdqu %xmm2,96(%rsp)
  543. vpshufb %xmm0,%xmm12,%xmm5
  544. vmovdqu %xmm4,80(%rsp)
  545. vpshufb %xmm0,%xmm13,%xmm6
  546. vmovdqu %xmm5,64(%rsp)
  547. vpshufb %xmm0,%xmm14,%xmm7
  548. vmovdqu %xmm6,48(%rsp)
  549. call _aesni_ctr32_6x
  550. vmovdqu (%r9),%xmm8
  551. leaq 32+32(%r9),%r9
  552. subq $12,%rdx
  553. movq $192,%r10
  554. vpshufb %xmm0,%xmm8,%xmm8
  555. call _aesni_ctr32_ghash_6x
  556. vmovdqu 32(%rsp),%xmm7
  557. vmovdqu (%r11),%xmm0
  558. vmovdqu 0-32(%r9),%xmm3
  559. vpunpckhqdq %xmm7,%xmm7,%xmm1
  560. vmovdqu 32-32(%r9),%xmm15
  561. vmovups %xmm9,-96(%rsi)
  562. vpshufb %xmm0,%xmm9,%xmm9
  563. vpxor %xmm7,%xmm1,%xmm1
  564. vmovups %xmm10,-80(%rsi)
  565. vpshufb %xmm0,%xmm10,%xmm10
  566. vmovups %xmm11,-64(%rsi)
  567. vpshufb %xmm0,%xmm11,%xmm11
  568. vmovups %xmm12,-48(%rsi)
  569. vpshufb %xmm0,%xmm12,%xmm12
  570. vmovups %xmm13,-32(%rsi)
  571. vpshufb %xmm0,%xmm13,%xmm13
  572. vmovups %xmm14,-16(%rsi)
  573. vpshufb %xmm0,%xmm14,%xmm14
  574. vmovdqu %xmm9,16(%rsp)
  575. vmovdqu 48(%rsp),%xmm6
  576. vmovdqu 16-32(%r9),%xmm0
  577. vpunpckhqdq %xmm6,%xmm6,%xmm2
  578. vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
  579. vpxor %xmm6,%xmm2,%xmm2
  580. vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
  581. vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
  582. vmovdqu 64(%rsp),%xmm9
  583. vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
  584. vmovdqu 48-32(%r9),%xmm3
  585. vpxor %xmm5,%xmm4,%xmm4
  586. vpunpckhqdq %xmm9,%xmm9,%xmm5
  587. vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
  588. vpxor %xmm9,%xmm5,%xmm5
  589. vpxor %xmm7,%xmm6,%xmm6
  590. vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
  591. vmovdqu 80-32(%r9),%xmm15
  592. vpxor %xmm1,%xmm2,%xmm2
  593. vmovdqu 80(%rsp),%xmm1
  594. vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
  595. vmovdqu 64-32(%r9),%xmm0
  596. vpxor %xmm4,%xmm7,%xmm7
  597. vpunpckhqdq %xmm1,%xmm1,%xmm4
  598. vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
  599. vpxor %xmm1,%xmm4,%xmm4
  600. vpxor %xmm6,%xmm9,%xmm9
  601. vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
  602. vpxor %xmm2,%xmm5,%xmm5
  603. vmovdqu 96(%rsp),%xmm2
  604. vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
  605. vmovdqu 96-32(%r9),%xmm3
  606. vpxor %xmm7,%xmm6,%xmm6
  607. vpunpckhqdq %xmm2,%xmm2,%xmm7
  608. vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
  609. vpxor %xmm2,%xmm7,%xmm7
  610. vpxor %xmm9,%xmm1,%xmm1
  611. vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
  612. vmovdqu 128-32(%r9),%xmm15
  613. vpxor %xmm5,%xmm4,%xmm4
  614. vpxor 112(%rsp),%xmm8,%xmm8
  615. vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
  616. vmovdqu 112-32(%r9),%xmm0
  617. vpunpckhqdq %xmm8,%xmm8,%xmm9
  618. vpxor %xmm6,%xmm5,%xmm5
  619. vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
  620. vpxor %xmm8,%xmm9,%xmm9
  621. vpxor %xmm1,%xmm2,%xmm2
  622. vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
  623. vpxor %xmm4,%xmm7,%xmm4
  624. vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
  625. vmovdqu 0-32(%r9),%xmm3
  626. vpunpckhqdq %xmm14,%xmm14,%xmm1
  627. vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
  628. vpxor %xmm14,%xmm1,%xmm1
  629. vpxor %xmm5,%xmm6,%xmm5
  630. vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
  631. vmovdqu 32-32(%r9),%xmm15
  632. vpxor %xmm2,%xmm8,%xmm7
  633. vpxor %xmm4,%xmm9,%xmm6
  634. vmovdqu 16-32(%r9),%xmm0
  635. vpxor %xmm5,%xmm7,%xmm9
  636. vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
  637. vpxor %xmm9,%xmm6,%xmm6
  638. vpunpckhqdq %xmm13,%xmm13,%xmm2
  639. vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
  640. vpxor %xmm13,%xmm2,%xmm2
  641. vpslldq $8,%xmm6,%xmm9
  642. vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
  643. vpxor %xmm9,%xmm5,%xmm8
  644. vpsrldq $8,%xmm6,%xmm6
  645. vpxor %xmm6,%xmm7,%xmm7
  646. vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
  647. vmovdqu 48-32(%r9),%xmm3
  648. vpxor %xmm4,%xmm5,%xmm5
  649. vpunpckhqdq %xmm12,%xmm12,%xmm9
  650. vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
  651. vpxor %xmm12,%xmm9,%xmm9
  652. vpxor %xmm14,%xmm13,%xmm13
  653. vpalignr $8,%xmm8,%xmm8,%xmm14
  654. vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
  655. vmovdqu 80-32(%r9),%xmm15
  656. vpxor %xmm1,%xmm2,%xmm2
  657. vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
  658. vmovdqu 64-32(%r9),%xmm0
  659. vpxor %xmm5,%xmm4,%xmm4
  660. vpunpckhqdq %xmm11,%xmm11,%xmm1
  661. vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
  662. vpxor %xmm11,%xmm1,%xmm1
  663. vpxor %xmm13,%xmm12,%xmm12
  664. vxorps 16(%rsp),%xmm7,%xmm7
  665. vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
  666. vpxor %xmm2,%xmm9,%xmm9
  667. vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
  668. vxorps %xmm14,%xmm8,%xmm8
  669. vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
  670. vmovdqu 96-32(%r9),%xmm3
  671. vpxor %xmm4,%xmm5,%xmm5
  672. vpunpckhqdq %xmm10,%xmm10,%xmm2
  673. vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
  674. vpxor %xmm10,%xmm2,%xmm2
  675. vpalignr $8,%xmm8,%xmm8,%xmm14
  676. vpxor %xmm12,%xmm11,%xmm11
  677. vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
  678. vmovdqu 128-32(%r9),%xmm15
  679. vpxor %xmm9,%xmm1,%xmm1
  680. vxorps %xmm7,%xmm14,%xmm14
  681. vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
  682. vxorps %xmm14,%xmm8,%xmm8
  683. vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
  684. vmovdqu 112-32(%r9),%xmm0
  685. vpxor %xmm5,%xmm4,%xmm4
  686. vpunpckhqdq %xmm8,%xmm8,%xmm9
  687. vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
  688. vpxor %xmm8,%xmm9,%xmm9
  689. vpxor %xmm11,%xmm10,%xmm10
  690. vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
  691. vpxor %xmm1,%xmm2,%xmm2
  692. vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
  693. vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
  694. vpxor %xmm4,%xmm5,%xmm5
  695. vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
  696. vpxor %xmm10,%xmm7,%xmm7
  697. vpxor %xmm2,%xmm6,%xmm6
  698. vpxor %xmm5,%xmm7,%xmm4
  699. vpxor %xmm4,%xmm6,%xmm6
  700. vpslldq $8,%xmm6,%xmm1
  701. vmovdqu 16(%r11),%xmm3
  702. vpsrldq $8,%xmm6,%xmm6
  703. vpxor %xmm1,%xmm5,%xmm8
  704. vpxor %xmm6,%xmm7,%xmm7
  705. vpalignr $8,%xmm8,%xmm8,%xmm2
  706. vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
  707. vpxor %xmm2,%xmm8,%xmm8
  708. vpalignr $8,%xmm8,%xmm8,%xmm2
  709. vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
  710. vpxor %xmm7,%xmm2,%xmm2
  711. vpxor %xmm2,%xmm8,%xmm8
  712. vpshufb (%r11),%xmm8,%xmm8
  713. vmovdqu %xmm8,-64(%r9)
  714. vzeroupper
  715. movq -48(%rax),%r15
  716. .cfi_restore %r15
  717. movq -40(%rax),%r14
  718. .cfi_restore %r14
  719. movq -32(%rax),%r13
  720. .cfi_restore %r13
  721. movq -24(%rax),%r12
  722. .cfi_restore %r12
  723. movq -16(%rax),%rbp
  724. .cfi_restore %rbp
  725. movq -8(%rax),%rbx
  726. .cfi_restore %rbx
  727. leaq (%rax),%rsp
  728. .cfi_def_cfa_register %rsp
  729. .Lgcm_enc_abort:
  730. movq %r10,%rax
  731. .byte 0xf3,0xc3
  732. .cfi_endproc
  733. .size aesni_gcm_encrypt,.-aesni_gcm_encrypt
  734. .align 64
  735. .Lbswap_mask:
  736. .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  737. .Lpoly:
  738. .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
  739. .Lone_msb:
  740. .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
  741. .Ltwo_lsb:
  742. .byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
  743. .Lone_lsb:
  744. .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
  745. .byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
  746. .align 64
  747. #endif
  748. .section .note.GNU-stack,"",@progbits