.. | .. |
---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-only */ |
---|
1 | 2 | /* |
---|
2 | 3 | * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> |
---|
5 | | - * |
---|
6 | | - * This program is free software; you can redistribute it and/or modify |
---|
7 | | - * it under the terms of the GNU General Public License version 2 as |
---|
8 | | - * published by the Free Software Foundation. |
---|
9 | 6 | */ |
---|
10 | 7 | |
---|
11 | 8 | #include <linux/linkage.h> |
---|
.. | .. |
---|
78 | 75 | * int blocks) |
---|
79 | 76 | */ |
---|
80 | 77 | .text |
---|
81 | | -ENTRY(sha2_ce_transform) |
---|
82 | | - frame_push 3 |
---|
83 | | - |
---|
84 | | - mov x19, x0 |
---|
85 | | - mov x20, x1 |
---|
86 | | - mov x21, x2 |
---|
87 | | - |
---|
| 78 | +SYM_FUNC_START(sha2_ce_transform) |
---|
88 | 79 | /* load round constants */ |
---|
89 | | -0: adr_l x8, .Lsha2_rcon |
---|
| 80 | + adr_l x8, .Lsha2_rcon |
---|
90 | 81 | ld1 { v0.4s- v3.4s}, [x8], #64 |
---|
91 | 82 | ld1 { v4.4s- v7.4s}, [x8], #64 |
---|
92 | 83 | ld1 { v8.4s-v11.4s}, [x8], #64 |
---|
93 | 84 | ld1 {v12.4s-v15.4s}, [x8] |
---|
94 | 85 | |
---|
95 | 86 | /* load state */ |
---|
96 | | - ld1 {dgav.4s, dgbv.4s}, [x19] |
---|
| 87 | + ld1 {dgav.4s, dgbv.4s}, [x0] |
---|
97 | 88 | |
---|
98 | 89 | /* load sha256_ce_state::finalize */ |
---|
99 | 90 | ldr_l w4, sha256_ce_offsetof_finalize, x4 |
---|
100 | | - ldr w4, [x19, x4] |
---|
| 91 | + ldr w4, [x0, x4] |
---|
101 | 92 | |
---|
102 | 93 | /* load input */ |
---|
103 | | -1: ld1 {v16.4s-v19.4s}, [x20], #64 |
---|
104 | | - sub w21, w21, #1 |
---|
| 94 | +0: ld1 {v16.4s-v19.4s}, [x1], #64 |
---|
| 95 | + sub w2, w2, #1 |
---|
105 | 96 | |
---|
106 | 97 | CPU_LE( rev32 v16.16b, v16.16b ) |
---|
107 | 98 | CPU_LE( rev32 v17.16b, v17.16b ) |
---|
108 | 99 | CPU_LE( rev32 v18.16b, v18.16b ) |
---|
109 | 100 | CPU_LE( rev32 v19.16b, v19.16b ) |
---|
110 | 101 | |
---|
111 | | -2: add t0.4s, v16.4s, v0.4s |
---|
| 102 | +1: add t0.4s, v16.4s, v0.4s |
---|
112 | 103 | mov dg0v.16b, dgav.16b |
---|
113 | 104 | mov dg1v.16b, dgbv.16b |
---|
114 | 105 | |
---|
.. | .. |
---|
137 | 128 | add dgbv.4s, dgbv.4s, dg1v.4s |
---|
138 | 129 | |
---|
139 | 130 | /* handled all input blocks? */ |
---|
140 | | - cbz w21, 3f |
---|
141 | | - |
---|
142 | | - if_will_cond_yield_neon |
---|
143 | | - st1 {dgav.4s, dgbv.4s}, [x19] |
---|
144 | | - do_cond_yield_neon |
---|
| 131 | + cbz w2, 2f |
---|
| 132 | + cond_yield 3f, x5, x6 |
---|
145 | 133 | b 0b |
---|
146 | | - endif_yield_neon |
---|
147 | | - |
---|
148 | | - b 1b |
---|
149 | 134 | |
---|
150 | 135 | /* |
---|
151 | 136 | * Final block: add padding and total bit count. |
---|
152 | 137 | * Skip if the input size was not a round multiple of the block size, |
---|
153 | 138 | * the padding is handled by the C code in that case. |
---|
154 | 139 | */ |
---|
155 | | -3: cbz x4, 4f |
---|
| 140 | +2: cbz x4, 3f |
---|
156 | 141 | ldr_l w4, sha256_ce_offsetof_count, x4 |
---|
157 | | - ldr x4, [x19, x4] |
---|
| 142 | + ldr x4, [x0, x4] |
---|
158 | 143 | movi v17.2d, #0 |
---|
159 | 144 | mov x8, #0x80000000 |
---|
160 | 145 | movi v18.2d, #0 |
---|
.. | .. |
---|
163 | 148 | mov x4, #0 |
---|
164 | 149 | mov v19.d[0], xzr |
---|
165 | 150 | mov v19.d[1], x7 |
---|
166 | | - b 2b |
---|
| 151 | + b 1b |
---|
167 | 152 | |
---|
168 | 153 | /* store new state */ |
---|
169 | | -4: st1 {dgav.4s, dgbv.4s}, [x19] |
---|
170 | | - frame_pop |
---|
| 154 | +3: st1 {dgav.4s, dgbv.4s}, [x0] |
---|
| 155 | + mov w0, w2 |
---|
171 | 156 | ret |
---|
172 | | -ENDPROC(sha2_ce_transform) |
---|
| 157 | +SYM_FUNC_END(sha2_ce_transform) |
---|