forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-11 1f93a7dfd1f8d5ff7a5c53246c7534fe2332d6f4
kernel/arch/arm64/crypto/aes-neonbs-core.S
....@@ -1,11 +1,8 @@
1
+/* SPDX-License-Identifier: GPL-2.0-only */
12 /*
23 * Bit sliced AES using NEON instructions
34 *
45 * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
5
- *
6
- * This program is free software; you can redistribute it and/or modify
7
- * it under the terms of the GNU General Public License version 2 as
8
- * published by the Free Software Foundation.
96 */
107
118 /*
....@@ -383,7 +380,7 @@
383380 /*
384381 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
385382 */
386
-ENTRY(aesbs_convert_key)
383
+SYM_FUNC_START(aesbs_convert_key)
387384 ld1 {v7.4s}, [x1], #16 // load round 0 key
388385 ld1 {v17.4s}, [x1], #16 // load round 1 key
389386
....@@ -428,10 +425,10 @@
428425 eor v17.16b, v17.16b, v7.16b
429426 str q17, [x0]
430427 ret
431
-ENDPROC(aesbs_convert_key)
428
+SYM_FUNC_END(aesbs_convert_key)
432429
433430 .align 4
434
-aesbs_encrypt8:
431
+SYM_FUNC_START_LOCAL(aesbs_encrypt8)
435432 ldr q9, [bskey], #16 // round 0 key
436433 ldr q8, M0SR
437434 ldr q24, SR
....@@ -491,10 +488,10 @@
491488 eor v2.16b, v2.16b, v12.16b
492489 eor v5.16b, v5.16b, v12.16b
493490 ret
494
-ENDPROC(aesbs_encrypt8)
491
+SYM_FUNC_END(aesbs_encrypt8)
495492
496493 .align 4
497
-aesbs_decrypt8:
494
+SYM_FUNC_START_LOCAL(aesbs_decrypt8)
498495 lsl x9, rounds, #7
499496 add bskey, bskey, x9
500497
....@@ -556,7 +553,7 @@
556553 eor v3.16b, v3.16b, v12.16b
557554 eor v5.16b, v5.16b, v12.16b
558555 ret
559
-ENDPROC(aesbs_decrypt8)
556
+SYM_FUNC_END(aesbs_decrypt8)
560557
561558 /*
562559 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
....@@ -616,7 +613,6 @@
616613 st1 {\o7\().16b}, [x19], #16
617614
618615 cbz x23, 1f
619
- cond_yield_neon
620616 b 99b
621617
622618 1: frame_pop
....@@ -624,21 +620,21 @@
624620 .endm
625621
626622 .align 4
627
-ENTRY(aesbs_ecb_encrypt)
623
+SYM_FUNC_START(aesbs_ecb_encrypt)
628624 __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
629
-ENDPROC(aesbs_ecb_encrypt)
625
+SYM_FUNC_END(aesbs_ecb_encrypt)
630626
631627 .align 4
632
-ENTRY(aesbs_ecb_decrypt)
628
+SYM_FUNC_START(aesbs_ecb_decrypt)
633629 __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
634
-ENDPROC(aesbs_ecb_decrypt)
630
+SYM_FUNC_END(aesbs_ecb_decrypt)
635631
636632 /*
637633 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
638634 * int blocks, u8 iv[])
639635 */
640636 .align 4
641
-ENTRY(aesbs_cbc_decrypt)
637
+SYM_FUNC_START(aesbs_cbc_decrypt)
642638 frame_push 6
643639
644640 mov x19, x0
....@@ -718,12 +714,11 @@
718714 1: st1 {v24.16b}, [x24] // store IV
719715
720716 cbz x23, 2f
721
- cond_yield_neon
722717 b 99b
723718
724719 2: frame_pop
725720 ret
726
-ENDPROC(aesbs_cbc_decrypt)
721
+SYM_FUNC_END(aesbs_cbc_decrypt)
727722
728723 .macro next_tweak, out, in, const, tmp
729724 sshr \tmp\().2d, \in\().2d, #63
....@@ -733,18 +728,13 @@
733728 eor \out\().16b, \out\().16b, \tmp\().16b
734729 .endm
735730
736
- .align 4
737
-.Lxts_mul_x:
738
-CPU_LE( .quad 1, 0x87 )
739
-CPU_BE( .quad 0x87, 1 )
740
-
741731 /*
742732 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
743733 * int blocks, u8 iv[])
744734 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
745735 * int blocks, u8 iv[])
746736 */
747
-__xts_crypt8:
737
+SYM_FUNC_START_LOCAL(__xts_crypt8)
748738 mov x6, #1
749739 lsl x6, x6, x23
750740 subs w23, w23, #8
....@@ -796,8 +786,8 @@
796786
797787 0: mov bskey, x21
798788 mov rounds, x22
799
- br x7
800
-ENDPROC(__xts_crypt8)
789
+ br x16
790
+SYM_FUNC_END(__xts_crypt8)
801791
802792 .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
803793 frame_push 6, 64
....@@ -809,10 +799,12 @@
809799 mov x23, x4
810800 mov x24, x5
811801
812
-0: ldr q30, .Lxts_mul_x
802
+ movi v30.2s, #0x1
803
+ movi v25.2s, #0x87
804
+ uzp1 v30.4s, v30.4s, v25.4s
813805 ld1 {v25.16b}, [x24]
814806
815
-99: adr x7, \do8
807
+99: adr x16, \do8
816808 bl __xts_crypt8
817809
818810 ldp q16, q17, [sp, #.Lframe_local_offset]
....@@ -852,7 +844,6 @@
852844 cbz x23, 1f
853845 st1 {v25.16b}, [x24]
854846
855
- cond_yield_neon 0b
856847 b 99b
857848
858849 1: st1 {v25.16b}, [x24]
....@@ -860,13 +851,13 @@
860851 ret
861852 .endm
862853
863
-ENTRY(aesbs_xts_encrypt)
854
+SYM_FUNC_START(aesbs_xts_encrypt)
864855 __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
865
-ENDPROC(aesbs_xts_encrypt)
856
+SYM_FUNC_END(aesbs_xts_encrypt)
866857
867
-ENTRY(aesbs_xts_decrypt)
858
+SYM_FUNC_START(aesbs_xts_decrypt)
868859 __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
869
-ENDPROC(aesbs_xts_decrypt)
860
+SYM_FUNC_END(aesbs_xts_decrypt)
870861
871862 .macro next_ctr, v
872863 mov \v\().d[1], x8
....@@ -880,7 +871,7 @@
880871 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
881872 * int rounds, int blocks, u8 iv[], u8 final[])
882873 */
883
-ENTRY(aesbs_ctr_encrypt)
874
+SYM_FUNC_START(aesbs_ctr_encrypt)
884875 frame_push 8
885876
886877 mov x19, x0
....@@ -895,7 +886,7 @@
895886 cset x26, ne
896887 add x23, x23, x26 // do one extra block if final
897888
898
-98: ldp x7, x8, [x24]
889
+ ldp x7, x8, [x24]
899890 ld1 {v0.16b}, [x24]
900891 CPU_LE( rev x7, x7 )
901892 CPU_LE( rev x8, x8 )
....@@ -973,7 +964,6 @@
973964 st1 {v0.16b}, [x24]
974965 cbz x23, .Lctr_done
975966
976
- cond_yield_neon 98b
977967 b 99b
978968
979969 .Lctr_done:
....@@ -1008,4 +998,4 @@
1008998 7: cbz x25, 8b
1009999 st1 {v5.16b}, [x25]
10101000 b 8b
1011
-ENDPROC(aesbs_ctr_encrypt)
1001
+SYM_FUNC_END(aesbs_ctr_encrypt)