From ea08eeccae9297f7aabd2ef7f0c2517ac4549acc Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 20 Feb 2024 01:18:26 +0000
Subject: [PATCH] write in 30M
---
kernel/arch/arm/crypto/aes-neonbs-core.S | 79 +++++++++++++++++++--------------------
1 files changed, 39 insertions(+), 40 deletions(-)
diff --git a/kernel/arch/arm/crypto/aes-neonbs-core.S b/kernel/arch/arm/crypto/aes-neonbs-core.S
index 2b625c6..7d0cc7f 100644
--- a/kernel/arch/arm/crypto/aes-neonbs-core.S
+++ b/kernel/arch/arm/crypto/aes-neonbs-core.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Bit sliced AES using NEON instructions
*
* Copyright (C) 2017 Linaro Ltd.
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
/*
@@ -78,11 +75,6 @@
.macro __ldr, out, sym
vldr \out\()l, \sym
vldr \out\()h, \sym + 8
- .endm
-
- .macro __adr, reg, lbl
- adr \reg, \lbl
-THUMB( orr \reg, \reg, #1 )
.endm
.macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
@@ -632,11 +624,11 @@
push {r4-r6, lr}
ldr r5, [sp, #16] // number of blocks
-99: __adr ip, 0f
+99: adr ip, 0f
and lr, r5, #7
cmp r5, #8
sub ip, ip, lr, lsl #2
- bxlt ip // computed goto if blocks < 8
+ movlt pc, ip // computed goto if blocks < 8
vld1.8 {q0}, [r1]!
vld1.8 {q1}, [r1]!
@@ -651,11 +643,11 @@
mov rounds, r3
bl \do8
- __adr ip, 1f
+ adr ip, 1f
and lr, r5, #7
cmp r5, #8
sub ip, ip, lr, lsl #2
- bxlt ip // computed goto if blocks < 8
+ movlt pc, ip // computed goto if blocks < 8
vst1.8 {\o0}, [r0]!
vst1.8 {\o1}, [r0]!
@@ -692,12 +684,12 @@
push {r4-r6, lr}
ldm ip, {r5-r6} // load args 4-5
-99: __adr ip, 0f
+99: adr ip, 0f
and lr, r5, #7
cmp r5, #8
sub ip, ip, lr, lsl #2
mov lr, r1
- bxlt ip // computed goto if blocks < 8
+ movlt pc, ip // computed goto if blocks < 8
vld1.8 {q0}, [lr]!
vld1.8 {q1}, [lr]!
@@ -721,11 +713,11 @@
vmov q14, q8
vmov q15, q8
- __adr ip, 1f
+ adr ip, 1f
and lr, r5, #7
cmp r5, #8
sub ip, ip, lr, lsl #2
- bxlt ip // computed goto if blocks < 8
+ movlt pc, ip // computed goto if blocks < 8
vld1.8 {q9}, [r1]!
vld1.8 {q10}, [r1]!
@@ -736,9 +728,9 @@
vld1.8 {q15}, [r1]!
W(nop)
-1: __adr ip, 2f
+1: adr ip, 2f
sub ip, ip, lr, lsl #3
- bxlt ip // computed goto if blocks < 8
+ movlt pc, ip // computed goto if blocks < 8
veor q0, q0, q8
vst1.8 {q0}, [r0]!
@@ -807,13 +799,13 @@
vmov q6, q0
vmov q7, q0
- __adr ip, 0f
+ adr ip, 0f
sub lr, r5, #1
and lr, lr, #7
cmp r5, #8
sub ip, ip, lr, lsl #5
sub ip, ip, lr, lsl #2
- bxlt ip // computed goto if blocks < 8
+ movlt pc, ip // computed goto if blocks < 8
next_ctr q1
next_ctr q2
@@ -827,13 +819,13 @@
mov rounds, r3
bl aesbs_encrypt8
- __adr ip, 1f
+ adr ip, 1f
and lr, r5, #7
cmp r5, #8
movgt r4, #0
ldrle r4, [sp, #40] // load final in the last round
sub ip, ip, lr, lsl #2
- bxlt ip // computed goto if blocks < 8
+ movlt pc, ip // computed goto if blocks < 8
vld1.8 {q8}, [r1]!
vld1.8 {q9}, [r1]!
@@ -846,10 +838,10 @@
1: bne 2f
vld1.8 {q15}, [r1]!
-2: __adr ip, 3f
+2: adr ip, 3f
cmp r5, #8
sub ip, ip, lr, lsl #3
- bxlt ip // computed goto if blocks < 8
+ movlt pc, ip // computed goto if blocks < 8
veor q0, q0, q8
vst1.8 {q0}, [r0]!
@@ -890,27 +882,25 @@
veor \out, \out, \tmp
.endm
- .align 4
-.Lxts_mul_x:
- .quad 1, 0x87
-
/*
* aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- * int blocks, u8 iv[])
+ * int blocks, u8 iv[], int reorder_last_tweak)
* aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- * int blocks, u8 iv[])
+ * int blocks, u8 iv[], int reorder_last_tweak)
*/
__xts_prepare8:
vld1.8 {q14}, [r7] // load iv
- __ldr q15, .Lxts_mul_x // load tweak mask
+ vmov.i32 d30, #0x87 // compose tweak mask vector
+ vmovl.u32 q15, d30
+ vshr.u64 d30, d31, #7
vmov q12, q14
- __adr ip, 0f
+ adr ip, 0f
and r4, r6, #7
cmp r6, #8
sub ip, ip, r4, lsl #5
mov r4, sp
- bxlt ip // computed goto if blocks < 8
+ movlt pc, ip // computed goto if blocks < 8
vld1.8 {q0}, [r1]!
next_tweak q12, q14, q15, q13
@@ -949,17 +939,24 @@
vld1.8 {q7}, [r1]!
next_tweak q14, q12, q15, q13
- veor q7, q7, q12
+THUMB( itt le )
+ W(cmple) r8, #0
+ ble 1f
+0: veor q7, q7, q12
vst1.8 {q12}, [r4, :128]
-0: vst1.8 {q14}, [r7] // store next iv
+ vst1.8 {q14}, [r7] // store next iv
bx lr
+
+1: vswp q12, q14
+ b 0b
ENDPROC(__xts_prepare8)
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
push {r4-r8, lr}
mov r5, sp // preserve sp
ldrd r6, r7, [sp, #24] // get blocks and iv args
+ rsb r8, ip, #1
sub ip, sp, #128 // make room for 8x tweak
bic ip, ip, #0xf // align sp to 16 bytes
mov sp, ip
@@ -970,12 +967,12 @@
mov rounds, r3
bl \do8
- __adr ip, 0f
+ adr ip, 0f
and lr, r6, #7
cmp r6, #8
sub ip, ip, lr, lsl #2
mov r4, sp
- bxlt ip // computed goto if blocks < 8
+ movlt pc, ip // computed goto if blocks < 8
vld1.8 {q8}, [r4, :128]!
vld1.8 {q9}, [r4, :128]!
@@ -986,9 +983,9 @@
vld1.8 {q14}, [r4, :128]!
vld1.8 {q15}, [r4, :128]
-0: __adr ip, 1f
+0: adr ip, 1f
sub ip, ip, lr, lsl #3
- bxlt ip // computed goto if blocks < 8
+ movlt pc, ip // computed goto if blocks < 8
veor \o0, \o0, q8
vst1.8 {\o0}, [r0]!
@@ -1015,9 +1012,11 @@
.endm
ENTRY(aesbs_xts_encrypt)
+ mov ip, #0 // never reorder final tweak
__xts_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5
ENDPROC(aesbs_xts_encrypt)
ENTRY(aesbs_xts_decrypt)
+ ldr ip, [sp, #8] // reorder final tweak?
__xts_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5
ENDPROC(aesbs_xts_decrypt)
--
Gitblit v1.6.2