| .. | .. |
|---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-only */ |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions |
|---|
| 3 | 4 | * |
|---|
| 4 | 5 | * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> |
|---|
| 5 | | - * |
|---|
| 6 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 7 | | - * it under the terms of the GNU General Public License version 2 as |
|---|
| 8 | | - * published by the Free Software Foundation. |
|---|
| 9 | 6 | */ |
|---|
| 10 | 7 | |
|---|
| 11 | 8 | #include <linux/linkage.h> |
|---|
| .. | .. |
|---|
| 78 | 75 | * int blocks) |
|---|
| 79 | 76 | */ |
|---|
| 80 | 77 | .text |
|---|
| 81 | | -ENTRY(sha2_ce_transform) |
|---|
| 82 | | - frame_push 3 |
|---|
| 83 | | - |
|---|
| 84 | | - mov x19, x0 |
|---|
| 85 | | - mov x20, x1 |
|---|
| 86 | | - mov x21, x2 |
|---|
| 87 | | - |
|---|
| 78 | +SYM_FUNC_START(sha2_ce_transform) |
|---|
| 88 | 79 | /* load round constants */ |
|---|
| 89 | | -0: adr_l x8, .Lsha2_rcon |
|---|
| 80 | + adr_l x8, .Lsha2_rcon |
|---|
| 90 | 81 | ld1 { v0.4s- v3.4s}, [x8], #64 |
|---|
| 91 | 82 | ld1 { v4.4s- v7.4s}, [x8], #64 |
|---|
| 92 | 83 | ld1 { v8.4s-v11.4s}, [x8], #64 |
|---|
| 93 | 84 | ld1 {v12.4s-v15.4s}, [x8] |
|---|
| 94 | 85 | |
|---|
| 95 | 86 | /* load state */ |
|---|
| 96 | | - ld1 {dgav.4s, dgbv.4s}, [x19] |
|---|
| 87 | + ld1 {dgav.4s, dgbv.4s}, [x0] |
|---|
| 97 | 88 | |
|---|
| 98 | 89 | /* load sha256_ce_state::finalize */ |
|---|
| 99 | 90 | ldr_l w4, sha256_ce_offsetof_finalize, x4 |
|---|
| 100 | | - ldr w4, [x19, x4] |
|---|
| 91 | + ldr w4, [x0, x4] |
|---|
| 101 | 92 | |
|---|
| 102 | 93 | /* load input */ |
|---|
| 103 | | -1: ld1 {v16.4s-v19.4s}, [x20], #64 |
|---|
| 104 | | - sub w21, w21, #1 |
|---|
| 94 | +0: ld1 {v16.4s-v19.4s}, [x1], #64 |
|---|
| 95 | + sub w2, w2, #1 |
|---|
| 105 | 96 | |
|---|
| 106 | 97 | CPU_LE( rev32 v16.16b, v16.16b ) |
|---|
| 107 | 98 | CPU_LE( rev32 v17.16b, v17.16b ) |
|---|
| 108 | 99 | CPU_LE( rev32 v18.16b, v18.16b ) |
|---|
| 109 | 100 | CPU_LE( rev32 v19.16b, v19.16b ) |
|---|
| 110 | 101 | |
|---|
| 111 | | -2: add t0.4s, v16.4s, v0.4s |
|---|
| 102 | +1: add t0.4s, v16.4s, v0.4s |
|---|
| 112 | 103 | mov dg0v.16b, dgav.16b |
|---|
| 113 | 104 | mov dg1v.16b, dgbv.16b |
|---|
| 114 | 105 | |
|---|
| .. | .. |
|---|
| 137 | 128 | add dgbv.4s, dgbv.4s, dg1v.4s |
|---|
| 138 | 129 | |
|---|
| 139 | 130 | /* handled all input blocks? */ |
|---|
| 140 | | - cbz w21, 3f |
|---|
| 141 | | - |
|---|
| 142 | | - if_will_cond_yield_neon |
|---|
| 143 | | - st1 {dgav.4s, dgbv.4s}, [x19] |
|---|
| 144 | | - do_cond_yield_neon |
|---|
| 131 | + cbz w2, 2f |
|---|
| 132 | + cond_yield 3f, x5, x6 |
|---|
| 145 | 133 | b 0b |
|---|
| 146 | | - endif_yield_neon |
|---|
| 147 | | - |
|---|
| 148 | | - b 1b |
|---|
| 149 | 134 | |
|---|
| 150 | 135 | /* |
|---|
| 151 | 136 | * Final block: add padding and total bit count. |
|---|
| 152 | 137 | * Skip if the input size was not a round multiple of the block size, |
|---|
| 153 | 138 | * the padding is handled by the C code in that case. |
|---|
| 154 | 139 | */ |
|---|
| 155 | | -3: cbz x4, 4f |
|---|
| 140 | +2: cbz x4, 3f |
|---|
| 156 | 141 | ldr_l w4, sha256_ce_offsetof_count, x4 |
|---|
| 157 | | - ldr x4, [x19, x4] |
|---|
| 142 | + ldr x4, [x0, x4] |
|---|
| 158 | 143 | movi v17.2d, #0 |
|---|
| 159 | 144 | mov x8, #0x80000000 |
|---|
| 160 | 145 | movi v18.2d, #0 |
|---|
| .. | .. |
|---|
| 163 | 148 | mov x4, #0 |
|---|
| 164 | 149 | mov v19.d[0], xzr |
|---|
| 165 | 150 | mov v19.d[1], x7 |
|---|
| 166 | | - b 2b |
|---|
| 151 | + b 1b |
|---|
| 167 | 152 | |
|---|
| 168 | 153 | /* store new state */ |
|---|
| 169 | | -4: st1 {dgav.4s, dgbv.4s}, [x19] |
|---|
| 170 | | - frame_pop |
|---|
| 154 | +3: st1 {dgav.4s, dgbv.4s}, [x0] |
|---|
| 155 | + mov w0, w2 |
|---|
| 171 | 156 | ret |
|---|
| 172 | | -ENDPROC(sha2_ce_transform) |
|---|
| 157 | +SYM_FUNC_END(sha2_ce_transform) |
|---|