| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (C) 2012 Intel Corporation |
|---|
| 3 | 4 | * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> |
|---|
| 4 | | - * |
|---|
| 5 | | - * This program is free software; you can redistribute it and/or |
|---|
| 6 | | - * modify it under the terms of the GNU General Public License |
|---|
| 7 | | - * as published by the Free Software Foundation; version 2 |
|---|
| 8 | | - * of the License. |
|---|
| 9 | 5 | */ |
|---|
| 10 | 6 | |
|---|
| 11 | 7 | #include <arm_neon.h> |
|---|
| .. | .. |
|---|
| 56 | 52 | px = veorq_u8(vld1q_u8(p), vld1q_u8(dp)); |
|---|
| 57 | 53 | vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq)); |
|---|
| 58 | 54 | |
|---|
| 59 | | - vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4); |
|---|
| 55 | + vy = vshrq_n_u8(vx, 4); |
|---|
| 60 | 56 | vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f)); |
|---|
| 61 | | - vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f)); |
|---|
| 57 | + vy = vqtbl1q_u8(qm1, vy); |
|---|
| 62 | 58 | qx = veorq_u8(vx, vy); |
|---|
| 63 | 59 | |
|---|
| 64 | | - vy = (uint8x16_t)vshrq_n_s16((int16x8_t)px, 4); |
|---|
| 60 | + vy = vshrq_n_u8(px, 4); |
|---|
| 65 | 61 | vx = vqtbl1q_u8(pm0, vandq_u8(px, x0f)); |
|---|
| 66 | | - vy = vqtbl1q_u8(pm1, vandq_u8(vy, x0f)); |
|---|
| 62 | + vy = vqtbl1q_u8(pm1, vy); |
|---|
| 67 | 63 | vx = veorq_u8(vx, vy); |
|---|
| 68 | 64 | db = veorq_u8(vx, qx); |
|---|
| 69 | 65 | |
|---|
| .. | .. |
|---|
| 97 | 93 | |
|---|
| 98 | 94 | vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq)); |
|---|
| 99 | 95 | |
|---|
| 100 | | - vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4); |
|---|
| 96 | + vy = vshrq_n_u8(vx, 4); |
|---|
| 101 | 97 | vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f)); |
|---|
| 102 | | - vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f)); |
|---|
| 98 | + vy = vqtbl1q_u8(qm1, vy); |
|---|
| 103 | 99 | vx = veorq_u8(vx, vy); |
|---|
| 104 | 100 | vy = veorq_u8(vx, vld1q_u8(p)); |
|---|
| 105 | 101 | |
|---|