.. | .. |
---|
37 | 37 | .endm |
---|
38 | 38 | |
---|
39 | 39 | /* |
---|
40 | | - * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) |
---|
| 40 | + * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) |
---|
41 | 41 | */ |
---|
42 | 42 | .text |
---|
43 | | -ENTRY(sha3_ce_transform) |
---|
44 | | - frame_push 4 |
---|
45 | | - |
---|
46 | | - mov x19, x0 |
---|
47 | | - mov x20, x1 |
---|
48 | | - mov x21, x2 |
---|
49 | | - mov x22, x3 |
---|
50 | | - |
---|
51 | | -0: /* load state */ |
---|
52 | | - add x8, x19, #32 |
---|
53 | | - ld1 { v0.1d- v3.1d}, [x19] |
---|
| 43 | +SYM_FUNC_START(sha3_ce_transform) |
---|
| 44 | + /* load state */ |
---|
| 45 | + add x8, x0, #32 |
---|
| 46 | + ld1 { v0.1d- v3.1d}, [x0] |
---|
54 | 47 | ld1 { v4.1d- v7.1d}, [x8], #32 |
---|
55 | 48 | ld1 { v8.1d-v11.1d}, [x8], #32 |
---|
56 | 49 | ld1 {v12.1d-v15.1d}, [x8], #32 |
---|
.. | .. |
---|
58 | 51 | ld1 {v20.1d-v23.1d}, [x8], #32 |
---|
59 | 52 | ld1 {v24.1d}, [x8] |
---|
60 | 53 | |
---|
61 | | -1: sub w21, w21, #1 |
---|
| 54 | +0: sub w2, w2, #1 |
---|
62 | 55 | mov w8, #24 |
---|
63 | 56 | adr_l x9, .Lsha3_rcon |
---|
64 | 57 | |
---|
65 | 58 | /* load input */ |
---|
66 | | - ld1 {v25.8b-v28.8b}, [x20], #32 |
---|
67 | | - ld1 {v29.8b-v31.8b}, [x20], #24 |
---|
| 59 | + ld1 {v25.8b-v28.8b}, [x1], #32 |
---|
| 60 | + ld1 {v29.8b-v31.8b}, [x1], #24 |
---|
68 | 61 | eor v0.8b, v0.8b, v25.8b |
---|
69 | 62 | eor v1.8b, v1.8b, v26.8b |
---|
70 | 63 | eor v2.8b, v2.8b, v27.8b |
---|
.. | .. |
---|
73 | 66 | eor v5.8b, v5.8b, v30.8b |
---|
74 | 67 | eor v6.8b, v6.8b, v31.8b |
---|
75 | 68 | |
---|
76 | | - tbnz x22, #6, 3f // SHA3-512 |
---|
| 69 | + tbnz x3, #6, 2f // SHA3-512 |
---|
77 | 70 | |
---|
78 | | - ld1 {v25.8b-v28.8b}, [x20], #32 |
---|
79 | | - ld1 {v29.8b-v30.8b}, [x20], #16 |
---|
| 71 | + ld1 {v25.8b-v28.8b}, [x1], #32 |
---|
| 72 | + ld1 {v29.8b-v30.8b}, [x1], #16 |
---|
80 | 73 | eor v7.8b, v7.8b, v25.8b |
---|
81 | 74 | eor v8.8b, v8.8b, v26.8b |
---|
82 | 75 | eor v9.8b, v9.8b, v27.8b |
---|
.. | .. |
---|
84 | 77 | eor v11.8b, v11.8b, v29.8b |
---|
85 | 78 | eor v12.8b, v12.8b, v30.8b |
---|
86 | 79 | |
---|
87 | | - tbnz x22, #4, 2f // SHA3-384 or SHA3-224 |
---|
| 80 | + tbnz x3, #4, 1f // SHA3-384 or SHA3-224 |
---|
88 | 81 | |
---|
89 | 82 | // SHA3-256 |
---|
90 | | - ld1 {v25.8b-v28.8b}, [x20], #32 |
---|
| 83 | + ld1 {v25.8b-v28.8b}, [x1], #32 |
---|
91 | 84 | eor v13.8b, v13.8b, v25.8b |
---|
92 | 85 | eor v14.8b, v14.8b, v26.8b |
---|
93 | 86 | eor v15.8b, v15.8b, v27.8b |
---|
94 | 87 | eor v16.8b, v16.8b, v28.8b |
---|
95 | | - b 4f |
---|
| 88 | + b 3f |
---|
96 | 89 | |
---|
97 | | -2: tbz x22, #2, 4f // bit 2 cleared? SHA-384 |
---|
| 90 | +1: tbz x3, #2, 3f // bit 2 cleared? SHA-384 |
---|
98 | 91 | |
---|
99 | 92 | // SHA3-224 |
---|
100 | | - ld1 {v25.8b-v28.8b}, [x20], #32 |
---|
101 | | - ld1 {v29.8b}, [x20], #8 |
---|
| 93 | + ld1 {v25.8b-v28.8b}, [x1], #32 |
---|
| 94 | + ld1 {v29.8b}, [x1], #8 |
---|
102 | 95 | eor v13.8b, v13.8b, v25.8b |
---|
103 | 96 | eor v14.8b, v14.8b, v26.8b |
---|
104 | 97 | eor v15.8b, v15.8b, v27.8b |
---|
105 | 98 | eor v16.8b, v16.8b, v28.8b |
---|
106 | 99 | eor v17.8b, v17.8b, v29.8b |
---|
107 | | - b 4f |
---|
| 100 | + b 3f |
---|
108 | 101 | |
---|
109 | 102 | // SHA3-512 |
---|
110 | | -3: ld1 {v25.8b-v26.8b}, [x20], #16 |
---|
| 103 | +2: ld1 {v25.8b-v26.8b}, [x1], #16 |
---|
111 | 104 | eor v7.8b, v7.8b, v25.8b |
---|
112 | 105 | eor v8.8b, v8.8b, v26.8b |
---|
113 | 106 | |
---|
114 | | -4: sub w8, w8, #1 |
---|
| 107 | +3: sub w8, w8, #1 |
---|
115 | 108 | |
---|
116 | 109 | eor3 v29.16b, v4.16b, v9.16b, v14.16b |
---|
117 | 110 | eor3 v26.16b, v1.16b, v6.16b, v11.16b |
---|
.. | .. |
---|
190 | 183 | |
---|
191 | 184 | eor v0.16b, v0.16b, v31.16b |
---|
192 | 185 | |
---|
193 | | - cbnz w8, 4b |
---|
194 | | - cbz w21, 5f |
---|
195 | | - |
---|
196 | | - if_will_cond_yield_neon |
---|
197 | | - add x8, x19, #32 |
---|
198 | | - st1 { v0.1d- v3.1d}, [x19] |
---|
199 | | - st1 { v4.1d- v7.1d}, [x8], #32 |
---|
200 | | - st1 { v8.1d-v11.1d}, [x8], #32 |
---|
201 | | - st1 {v12.1d-v15.1d}, [x8], #32 |
---|
202 | | - st1 {v16.1d-v19.1d}, [x8], #32 |
---|
203 | | - st1 {v20.1d-v23.1d}, [x8], #32 |
---|
204 | | - st1 {v24.1d}, [x8] |
---|
205 | | - do_cond_yield_neon |
---|
206 | | - b 0b |
---|
207 | | - endif_yield_neon |
---|
208 | | - |
---|
209 | | - b 1b |
---|
| 186 | + cbnz w8, 3b |
---|
| 187 | + cond_yield 4f, x8, x9 |
---|
| 188 | + cbnz w2, 0b |
---|
210 | 189 | |
---|
211 | 190 | /* save state */ |
---|
212 | | -5: st1 { v0.1d- v3.1d}, [x19], #32 |
---|
213 | | - st1 { v4.1d- v7.1d}, [x19], #32 |
---|
214 | | - st1 { v8.1d-v11.1d}, [x19], #32 |
---|
215 | | - st1 {v12.1d-v15.1d}, [x19], #32 |
---|
216 | | - st1 {v16.1d-v19.1d}, [x19], #32 |
---|
217 | | - st1 {v20.1d-v23.1d}, [x19], #32 |
---|
218 | | - st1 {v24.1d}, [x19] |
---|
219 | | - frame_pop |
---|
| 191 | +4: st1 { v0.1d- v3.1d}, [x0], #32 |
---|
| 192 | + st1 { v4.1d- v7.1d}, [x0], #32 |
---|
| 193 | + st1 { v8.1d-v11.1d}, [x0], #32 |
---|
| 194 | + st1 {v12.1d-v15.1d}, [x0], #32 |
---|
| 195 | + st1 {v16.1d-v19.1d}, [x0], #32 |
---|
| 196 | + st1 {v20.1d-v23.1d}, [x0], #32 |
---|
| 197 | + st1 {v24.1d}, [x0] |
---|
| 198 | + mov w0, w2 |
---|
220 | 199 | ret |
---|
221 | | -ENDPROC(sha3_ce_transform) |
---|
| 200 | +SYM_FUNC_END(sha3_ce_transform) |
---|
222 | 201 | |
---|
223 | 202 | .section ".rodata", "a" |
---|
224 | 203 | .align 8 |
---|