| .. | .. |
|---|
| 37 | 37 | .endm |
|---|
| 38 | 38 | |
|---|
| 39 | 39 | /* |
|---|
| 40 | | - * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) |
|---|
| 40 | + * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) |
|---|
| 41 | 41 | */ |
|---|
| 42 | 42 | .text |
|---|
| 43 | | -ENTRY(sha3_ce_transform) |
|---|
| 44 | | - frame_push 4 |
|---|
| 45 | | - |
|---|
| 46 | | - mov x19, x0 |
|---|
| 47 | | - mov x20, x1 |
|---|
| 48 | | - mov x21, x2 |
|---|
| 49 | | - mov x22, x3 |
|---|
| 50 | | - |
|---|
| 51 | | -0: /* load state */ |
|---|
| 52 | | - add x8, x19, #32 |
|---|
| 53 | | - ld1 { v0.1d- v3.1d}, [x19] |
|---|
| 43 | +SYM_FUNC_START(sha3_ce_transform) |
|---|
| 44 | + /* load state */ |
|---|
| 45 | + add x8, x0, #32 |
|---|
| 46 | + ld1 { v0.1d- v3.1d}, [x0] |
|---|
| 54 | 47 | ld1 { v4.1d- v7.1d}, [x8], #32 |
|---|
| 55 | 48 | ld1 { v8.1d-v11.1d}, [x8], #32 |
|---|
| 56 | 49 | ld1 {v12.1d-v15.1d}, [x8], #32 |
|---|
| .. | .. |
|---|
| 58 | 51 | ld1 {v20.1d-v23.1d}, [x8], #32 |
|---|
| 59 | 52 | ld1 {v24.1d}, [x8] |
|---|
| 60 | 53 | |
|---|
| 61 | | -1: sub w21, w21, #1 |
|---|
| 54 | +0: sub w2, w2, #1 |
|---|
| 62 | 55 | mov w8, #24 |
|---|
| 63 | 56 | adr_l x9, .Lsha3_rcon |
|---|
| 64 | 57 | |
|---|
| 65 | 58 | /* load input */ |
|---|
| 66 | | - ld1 {v25.8b-v28.8b}, [x20], #32 |
|---|
| 67 | | - ld1 {v29.8b-v31.8b}, [x20], #24 |
|---|
| 59 | + ld1 {v25.8b-v28.8b}, [x1], #32 |
|---|
| 60 | + ld1 {v29.8b-v31.8b}, [x1], #24 |
|---|
| 68 | 61 | eor v0.8b, v0.8b, v25.8b |
|---|
| 69 | 62 | eor v1.8b, v1.8b, v26.8b |
|---|
| 70 | 63 | eor v2.8b, v2.8b, v27.8b |
|---|
| .. | .. |
|---|
| 73 | 66 | eor v5.8b, v5.8b, v30.8b |
|---|
| 74 | 67 | eor v6.8b, v6.8b, v31.8b |
|---|
| 75 | 68 | |
|---|
| 76 | | - tbnz x22, #6, 3f // SHA3-512 |
|---|
| 69 | + tbnz x3, #6, 2f // SHA3-512 |
|---|
| 77 | 70 | |
|---|
| 78 | | - ld1 {v25.8b-v28.8b}, [x20], #32 |
|---|
| 79 | | - ld1 {v29.8b-v30.8b}, [x20], #16 |
|---|
| 71 | + ld1 {v25.8b-v28.8b}, [x1], #32 |
|---|
| 72 | + ld1 {v29.8b-v30.8b}, [x1], #16 |
|---|
| 80 | 73 | eor v7.8b, v7.8b, v25.8b |
|---|
| 81 | 74 | eor v8.8b, v8.8b, v26.8b |
|---|
| 82 | 75 | eor v9.8b, v9.8b, v27.8b |
|---|
| .. | .. |
|---|
| 84 | 77 | eor v11.8b, v11.8b, v29.8b |
|---|
| 85 | 78 | eor v12.8b, v12.8b, v30.8b |
|---|
| 86 | 79 | |
|---|
| 87 | | - tbnz x22, #4, 2f // SHA3-384 or SHA3-224 |
|---|
| 80 | + tbnz x3, #4, 1f // SHA3-384 or SHA3-224 |
|---|
| 88 | 81 | |
|---|
| 89 | 82 | // SHA3-256 |
|---|
| 90 | | - ld1 {v25.8b-v28.8b}, [x20], #32 |
|---|
| 83 | + ld1 {v25.8b-v28.8b}, [x1], #32 |
|---|
| 91 | 84 | eor v13.8b, v13.8b, v25.8b |
|---|
| 92 | 85 | eor v14.8b, v14.8b, v26.8b |
|---|
| 93 | 86 | eor v15.8b, v15.8b, v27.8b |
|---|
| 94 | 87 | eor v16.8b, v16.8b, v28.8b |
|---|
| 95 | | - b 4f |
|---|
| 88 | + b 3f |
|---|
| 96 | 89 | |
|---|
| 97 | | -2: tbz x22, #2, 4f // bit 2 cleared? SHA-384 |
|---|
| 90 | +1: tbz x3, #2, 3f // bit 2 cleared? SHA-384 |
|---|
| 98 | 91 | |
|---|
| 99 | 92 | // SHA3-224 |
|---|
| 100 | | - ld1 {v25.8b-v28.8b}, [x20], #32 |
|---|
| 101 | | - ld1 {v29.8b}, [x20], #8 |
|---|
| 93 | + ld1 {v25.8b-v28.8b}, [x1], #32 |
|---|
| 94 | + ld1 {v29.8b}, [x1], #8 |
|---|
| 102 | 95 | eor v13.8b, v13.8b, v25.8b |
|---|
| 103 | 96 | eor v14.8b, v14.8b, v26.8b |
|---|
| 104 | 97 | eor v15.8b, v15.8b, v27.8b |
|---|
| 105 | 98 | eor v16.8b, v16.8b, v28.8b |
|---|
| 106 | 99 | eor v17.8b, v17.8b, v29.8b |
|---|
| 107 | | - b 4f |
|---|
| 100 | + b 3f |
|---|
| 108 | 101 | |
|---|
| 109 | 102 | // SHA3-512 |
|---|
| 110 | | -3: ld1 {v25.8b-v26.8b}, [x20], #16 |
|---|
| 103 | +2: ld1 {v25.8b-v26.8b}, [x1], #16 |
|---|
| 111 | 104 | eor v7.8b, v7.8b, v25.8b |
|---|
| 112 | 105 | eor v8.8b, v8.8b, v26.8b |
|---|
| 113 | 106 | |
|---|
| 114 | | -4: sub w8, w8, #1 |
|---|
| 107 | +3: sub w8, w8, #1 |
|---|
| 115 | 108 | |
|---|
| 116 | 109 | eor3 v29.16b, v4.16b, v9.16b, v14.16b |
|---|
| 117 | 110 | eor3 v26.16b, v1.16b, v6.16b, v11.16b |
|---|
| .. | .. |
|---|
| 190 | 183 | |
|---|
| 191 | 184 | eor v0.16b, v0.16b, v31.16b |
|---|
| 192 | 185 | |
|---|
| 193 | | - cbnz w8, 4b |
|---|
| 194 | | - cbz w21, 5f |
|---|
| 195 | | - |
|---|
| 196 | | - if_will_cond_yield_neon |
|---|
| 197 | | - add x8, x19, #32 |
|---|
| 198 | | - st1 { v0.1d- v3.1d}, [x19] |
|---|
| 199 | | - st1 { v4.1d- v7.1d}, [x8], #32 |
|---|
| 200 | | - st1 { v8.1d-v11.1d}, [x8], #32 |
|---|
| 201 | | - st1 {v12.1d-v15.1d}, [x8], #32 |
|---|
| 202 | | - st1 {v16.1d-v19.1d}, [x8], #32 |
|---|
| 203 | | - st1 {v20.1d-v23.1d}, [x8], #32 |
|---|
| 204 | | - st1 {v24.1d}, [x8] |
|---|
| 205 | | - do_cond_yield_neon |
|---|
| 206 | | - b 0b |
|---|
| 207 | | - endif_yield_neon |
|---|
| 208 | | - |
|---|
| 209 | | - b 1b |
|---|
| 186 | + cbnz w8, 3b |
|---|
| 187 | + cond_yield 4f, x8, x9 |
|---|
| 188 | + cbnz w2, 0b |
|---|
| 210 | 189 | |
|---|
| 211 | 190 | /* save state */ |
|---|
| 212 | | -5: st1 { v0.1d- v3.1d}, [x19], #32 |
|---|
| 213 | | - st1 { v4.1d- v7.1d}, [x19], #32 |
|---|
| 214 | | - st1 { v8.1d-v11.1d}, [x19], #32 |
|---|
| 215 | | - st1 {v12.1d-v15.1d}, [x19], #32 |
|---|
| 216 | | - st1 {v16.1d-v19.1d}, [x19], #32 |
|---|
| 217 | | - st1 {v20.1d-v23.1d}, [x19], #32 |
|---|
| 218 | | - st1 {v24.1d}, [x19] |
|---|
| 219 | | - frame_pop |
|---|
| 191 | +4: st1 { v0.1d- v3.1d}, [x0], #32 |
|---|
| 192 | + st1 { v4.1d- v7.1d}, [x0], #32 |
|---|
| 193 | + st1 { v8.1d-v11.1d}, [x0], #32 |
|---|
| 194 | + st1 {v12.1d-v15.1d}, [x0], #32 |
|---|
| 195 | + st1 {v16.1d-v19.1d}, [x0], #32 |
|---|
| 196 | + st1 {v20.1d-v23.1d}, [x0], #32 |
|---|
| 197 | + st1 {v24.1d}, [x0] |
|---|
| 198 | + mov w0, w2 |
|---|
| 220 | 199 | ret |
|---|
| 221 | | -ENDPROC(sha3_ce_transform) |
|---|
| 200 | +SYM_FUNC_END(sha3_ce_transform) |
|---|
| 222 | 201 | |
|---|
| 223 | 202 | .section ".rodata", "a" |
|---|
| 224 | 203 | .align 8 |
|---|