hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/arch/arm64/crypto/sha3-ce-core.S
....@@ -37,20 +37,13 @@
3737 .endm
3838
3939 /*
40
- * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
40
+ * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
4141 */
4242 .text
43
-ENTRY(sha3_ce_transform)
44
- frame_push 4
45
-
46
- mov x19, x0
47
- mov x20, x1
48
- mov x21, x2
49
- mov x22, x3
50
-
51
-0: /* load state */
52
- add x8, x19, #32
53
- ld1 { v0.1d- v3.1d}, [x19]
43
+SYM_FUNC_START(sha3_ce_transform)
44
+ /* load state */
45
+ add x8, x0, #32
46
+ ld1 { v0.1d- v3.1d}, [x0]
5447 ld1 { v4.1d- v7.1d}, [x8], #32
5548 ld1 { v8.1d-v11.1d}, [x8], #32
5649 ld1 {v12.1d-v15.1d}, [x8], #32
....@@ -58,13 +51,13 @@
5851 ld1 {v20.1d-v23.1d}, [x8], #32
5952 ld1 {v24.1d}, [x8]
6053
61
-1: sub w21, w21, #1
54
+0: sub w2, w2, #1
6255 mov w8, #24
6356 adr_l x9, .Lsha3_rcon
6457
6558 /* load input */
66
- ld1 {v25.8b-v28.8b}, [x20], #32
67
- ld1 {v29.8b-v31.8b}, [x20], #24
59
+ ld1 {v25.8b-v28.8b}, [x1], #32
60
+ ld1 {v29.8b-v31.8b}, [x1], #24
6861 eor v0.8b, v0.8b, v25.8b
6962 eor v1.8b, v1.8b, v26.8b
7063 eor v2.8b, v2.8b, v27.8b
....@@ -73,10 +66,10 @@
7366 eor v5.8b, v5.8b, v30.8b
7467 eor v6.8b, v6.8b, v31.8b
7568
76
- tbnz x22, #6, 3f // SHA3-512
69
+ tbnz x3, #6, 2f // SHA3-512
7770
78
- ld1 {v25.8b-v28.8b}, [x20], #32
79
- ld1 {v29.8b-v30.8b}, [x20], #16
71
+ ld1 {v25.8b-v28.8b}, [x1], #32
72
+ ld1 {v29.8b-v30.8b}, [x1], #16
8073 eor v7.8b, v7.8b, v25.8b
8174 eor v8.8b, v8.8b, v26.8b
8275 eor v9.8b, v9.8b, v27.8b
....@@ -84,34 +77,34 @@
8477 eor v11.8b, v11.8b, v29.8b
8578 eor v12.8b, v12.8b, v30.8b
8679
87
- tbnz x22, #4, 2f // SHA3-384 or SHA3-224
80
+ tbnz x3, #4, 1f // SHA3-384 or SHA3-224
8881
8982 // SHA3-256
90
- ld1 {v25.8b-v28.8b}, [x20], #32
83
+ ld1 {v25.8b-v28.8b}, [x1], #32
9184 eor v13.8b, v13.8b, v25.8b
9285 eor v14.8b, v14.8b, v26.8b
9386 eor v15.8b, v15.8b, v27.8b
9487 eor v16.8b, v16.8b, v28.8b
95
- b 4f
88
+ b 3f
9689
97
-2: tbz x22, #2, 4f // bit 2 cleared? SHA-384
90
+1: tbz x3, #2, 3f // bit 2 cleared? SHA-384
9891
9992 // SHA3-224
100
- ld1 {v25.8b-v28.8b}, [x20], #32
101
- ld1 {v29.8b}, [x20], #8
93
+ ld1 {v25.8b-v28.8b}, [x1], #32
94
+ ld1 {v29.8b}, [x1], #8
10295 eor v13.8b, v13.8b, v25.8b
10396 eor v14.8b, v14.8b, v26.8b
10497 eor v15.8b, v15.8b, v27.8b
10598 eor v16.8b, v16.8b, v28.8b
10699 eor v17.8b, v17.8b, v29.8b
107
- b 4f
100
+ b 3f
108101
109102 // SHA3-512
110
-3: ld1 {v25.8b-v26.8b}, [x20], #16
103
+2: ld1 {v25.8b-v26.8b}, [x1], #16
111104 eor v7.8b, v7.8b, v25.8b
112105 eor v8.8b, v8.8b, v26.8b
113106
114
-4: sub w8, w8, #1
107
+3: sub w8, w8, #1
115108
116109 eor3 v29.16b, v4.16b, v9.16b, v14.16b
117110 eor3 v26.16b, v1.16b, v6.16b, v11.16b
....@@ -190,35 +183,21 @@
190183
191184 eor v0.16b, v0.16b, v31.16b
192185
193
- cbnz w8, 4b
194
- cbz w21, 5f
195
-
196
- if_will_cond_yield_neon
197
- add x8, x19, #32
198
- st1 { v0.1d- v3.1d}, [x19]
199
- st1 { v4.1d- v7.1d}, [x8], #32
200
- st1 { v8.1d-v11.1d}, [x8], #32
201
- st1 {v12.1d-v15.1d}, [x8], #32
202
- st1 {v16.1d-v19.1d}, [x8], #32
203
- st1 {v20.1d-v23.1d}, [x8], #32
204
- st1 {v24.1d}, [x8]
205
- do_cond_yield_neon
206
- b 0b
207
- endif_yield_neon
208
-
209
- b 1b
186
+ cbnz w8, 3b
187
+ cond_yield 4f, x8, x9
188
+ cbnz w2, 0b
210189
211190 /* save state */
212
-5: st1 { v0.1d- v3.1d}, [x19], #32
213
- st1 { v4.1d- v7.1d}, [x19], #32
214
- st1 { v8.1d-v11.1d}, [x19], #32
215
- st1 {v12.1d-v15.1d}, [x19], #32
216
- st1 {v16.1d-v19.1d}, [x19], #32
217
- st1 {v20.1d-v23.1d}, [x19], #32
218
- st1 {v24.1d}, [x19]
219
- frame_pop
191
+4: st1 { v0.1d- v3.1d}, [x0], #32
192
+ st1 { v4.1d- v7.1d}, [x0], #32
193
+ st1 { v8.1d-v11.1d}, [x0], #32
194
+ st1 {v12.1d-v15.1d}, [x0], #32
195
+ st1 {v16.1d-v19.1d}, [x0], #32
196
+ st1 {v20.1d-v23.1d}, [x0], #32
197
+ st1 {v24.1d}, [x0]
198
+ mov w0, w2
220199 ret
221
-ENDPROC(sha3_ce_transform)
200
+SYM_FUNC_END(sha3_ce_transform)
222201
223202 .section ".rodata", "a"
224203 .align 8