hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/arch/arm/crypto/aes-neonbs-core.S
....@@ -1,12 +1,9 @@
1
+/* SPDX-License-Identifier: GPL-2.0-only */
12 /*
23 * Bit sliced AES using NEON instructions
34 *
45 * Copyright (C) 2017 Linaro Ltd.
56 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
6
- *
7
- * This program is free software; you can redistribute it and/or modify
8
- * it under the terms of the GNU General Public License version 2 as
9
- * published by the Free Software Foundation.
107 */
118
129 /*
....@@ -78,11 +75,6 @@
7875 .macro __ldr, out, sym
7976 vldr \out\()l, \sym
8077 vldr \out\()h, \sym + 8
81
- .endm
82
-
83
- .macro __adr, reg, lbl
84
- adr \reg, \lbl
85
-THUMB( orr \reg, \reg, #1 )
8678 .endm
8779
8880 .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
....@@ -632,11 +624,11 @@
632624 push {r4-r6, lr}
633625 ldr r5, [sp, #16] // number of blocks
634626
635
-99: __adr ip, 0f
627
+99: adr ip, 0f
636628 and lr, r5, #7
637629 cmp r5, #8
638630 sub ip, ip, lr, lsl #2
639
- bxlt ip // computed goto if blocks < 8
631
+ movlt pc, ip // computed goto if blocks < 8
640632
641633 vld1.8 {q0}, [r1]!
642634 vld1.8 {q1}, [r1]!
....@@ -651,11 +643,11 @@
651643 mov rounds, r3
652644 bl \do8
653645
654
- __adr ip, 1f
646
+ adr ip, 1f
655647 and lr, r5, #7
656648 cmp r5, #8
657649 sub ip, ip, lr, lsl #2
658
- bxlt ip // computed goto if blocks < 8
650
+ movlt pc, ip // computed goto if blocks < 8
659651
660652 vst1.8 {\o0}, [r0]!
661653 vst1.8 {\o1}, [r0]!
....@@ -692,12 +684,12 @@
692684 push {r4-r6, lr}
693685 ldm ip, {r5-r6} // load args 4-5
694686
695
-99: __adr ip, 0f
687
+99: adr ip, 0f
696688 and lr, r5, #7
697689 cmp r5, #8
698690 sub ip, ip, lr, lsl #2
699691 mov lr, r1
700
- bxlt ip // computed goto if blocks < 8
692
+ movlt pc, ip // computed goto if blocks < 8
701693
702694 vld1.8 {q0}, [lr]!
703695 vld1.8 {q1}, [lr]!
....@@ -721,11 +713,11 @@
721713 vmov q14, q8
722714 vmov q15, q8
723715
724
- __adr ip, 1f
716
+ adr ip, 1f
725717 and lr, r5, #7
726718 cmp r5, #8
727719 sub ip, ip, lr, lsl #2
728
- bxlt ip // computed goto if blocks < 8
720
+ movlt pc, ip // computed goto if blocks < 8
729721
730722 vld1.8 {q9}, [r1]!
731723 vld1.8 {q10}, [r1]!
....@@ -736,9 +728,9 @@
736728 vld1.8 {q15}, [r1]!
737729 W(nop)
738730
739
-1: __adr ip, 2f
731
+1: adr ip, 2f
740732 sub ip, ip, lr, lsl #3
741
- bxlt ip // computed goto if blocks < 8
733
+ movlt pc, ip // computed goto if blocks < 8
742734
743735 veor q0, q0, q8
744736 vst1.8 {q0}, [r0]!
....@@ -807,13 +799,13 @@
807799 vmov q6, q0
808800 vmov q7, q0
809801
810
- __adr ip, 0f
802
+ adr ip, 0f
811803 sub lr, r5, #1
812804 and lr, lr, #7
813805 cmp r5, #8
814806 sub ip, ip, lr, lsl #5
815807 sub ip, ip, lr, lsl #2
816
- bxlt ip // computed goto if blocks < 8
808
+ movlt pc, ip // computed goto if blocks < 8
817809
818810 next_ctr q1
819811 next_ctr q2
....@@ -827,13 +819,13 @@
827819 mov rounds, r3
828820 bl aesbs_encrypt8
829821
830
- __adr ip, 1f
822
+ adr ip, 1f
831823 and lr, r5, #7
832824 cmp r5, #8
833825 movgt r4, #0
834826 ldrle r4, [sp, #40] // load final in the last round
835827 sub ip, ip, lr, lsl #2
836
- bxlt ip // computed goto if blocks < 8
828
+ movlt pc, ip // computed goto if blocks < 8
837829
838830 vld1.8 {q8}, [r1]!
839831 vld1.8 {q9}, [r1]!
....@@ -846,10 +838,10 @@
846838 1: bne 2f
847839 vld1.8 {q15}, [r1]!
848840
849
-2: __adr ip, 3f
841
+2: adr ip, 3f
850842 cmp r5, #8
851843 sub ip, ip, lr, lsl #3
852
- bxlt ip // computed goto if blocks < 8
844
+ movlt pc, ip // computed goto if blocks < 8
853845
854846 veor q0, q0, q8
855847 vst1.8 {q0}, [r0]!
....@@ -890,27 +882,25 @@
890882 veor \out, \out, \tmp
891883 .endm
892884
893
- .align 4
894
-.Lxts_mul_x:
895
- .quad 1, 0x87
896
-
897885 /*
898886 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
899
- * int blocks, u8 iv[])
887
+ * int blocks, u8 iv[], int reorder_last_tweak)
900888 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
901
- * int blocks, u8 iv[])
889
+ * int blocks, u8 iv[], int reorder_last_tweak)
902890 */
903891 __xts_prepare8:
904892 vld1.8 {q14}, [r7] // load iv
905
- __ldr q15, .Lxts_mul_x // load tweak mask
893
+ vmov.i32 d30, #0x87 // compose tweak mask vector
894
+ vmovl.u32 q15, d30
895
+ vshr.u64 d30, d31, #7
906896 vmov q12, q14
907897
908
- __adr ip, 0f
898
+ adr ip, 0f
909899 and r4, r6, #7
910900 cmp r6, #8
911901 sub ip, ip, r4, lsl #5
912902 mov r4, sp
913
- bxlt ip // computed goto if blocks < 8
903
+ movlt pc, ip // computed goto if blocks < 8
914904
915905 vld1.8 {q0}, [r1]!
916906 next_tweak q12, q14, q15, q13
....@@ -949,17 +939,24 @@
949939
950940 vld1.8 {q7}, [r1]!
951941 next_tweak q14, q12, q15, q13
952
- veor q7, q7, q12
942
+THUMB( itt le )
943
+ W(cmple) r8, #0
944
+ ble 1f
945
+0: veor q7, q7, q12
953946 vst1.8 {q12}, [r4, :128]
954947
955
-0: vst1.8 {q14}, [r7] // store next iv
948
+ vst1.8 {q14}, [r7] // store next iv
956949 bx lr
950
+
951
+1: vswp q12, q14
952
+ b 0b
957953 ENDPROC(__xts_prepare8)
958954
959955 .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
960956 push {r4-r8, lr}
961957 mov r5, sp // preserve sp
962958 ldrd r6, r7, [sp, #24] // get blocks and iv args
959
+ rsb r8, ip, #1
963960 sub ip, sp, #128 // make room for 8x tweak
964961 bic ip, ip, #0xf // align sp to 16 bytes
965962 mov sp, ip
....@@ -970,12 +967,12 @@
970967 mov rounds, r3
971968 bl \do8
972969
973
- __adr ip, 0f
970
+ adr ip, 0f
974971 and lr, r6, #7
975972 cmp r6, #8
976973 sub ip, ip, lr, lsl #2
977974 mov r4, sp
978
- bxlt ip // computed goto if blocks < 8
975
+ movlt pc, ip // computed goto if blocks < 8
979976
980977 vld1.8 {q8}, [r4, :128]!
981978 vld1.8 {q9}, [r4, :128]!
....@@ -986,9 +983,9 @@
986983 vld1.8 {q14}, [r4, :128]!
987984 vld1.8 {q15}, [r4, :128]
988985
989
-0: __adr ip, 1f
986
+0: adr ip, 1f
990987 sub ip, ip, lr, lsl #3
991
- bxlt ip // computed goto if blocks < 8
988
+ movlt pc, ip // computed goto if blocks < 8
992989
993990 veor \o0, \o0, q8
994991 vst1.8 {\o0}, [r0]!
....@@ -1015,9 +1012,11 @@
10151012 .endm
10161013
10171014 ENTRY(aesbs_xts_encrypt)
1015
+ mov ip, #0 // never reorder final tweak
10181016 __xts_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5
10191017 ENDPROC(aesbs_xts_encrypt)
10201018
10211019 ENTRY(aesbs_xts_decrypt)
1020
+ ldr ip, [sp, #8] // reorder final tweak?
10221021 __xts_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5
10231022 ENDPROC(aesbs_xts_decrypt)