1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
| /*
| * Scalar AES core transform
| *
| * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
| *
| * This program is free software; you can redistribute it and/or modify
| * it under the terms of the GNU General Public License version 2 as
| * published by the Free Software Foundation.
| */
|
| #include <linux/linkage.h>
| #include <asm/assembler.h>
| #include <asm/cache.h>
|
| .text
|
| rk .req x0
| out .req x1
| in .req x2
| rounds .req x3
| tt .req x2
|
| .macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
| .ifc \op\shift, b0
| ubfiz \reg0, \in0, #2, #8
| ubfiz \reg1, \in1e, #2, #8
| .else
| ubfx \reg0, \in0, #\shift, #8
| ubfx \reg1, \in1e, #\shift, #8
| .endif
|
| /*
| * AArch64 cannot do byte size indexed loads from a table containing
| * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
| * valid instruction. So perform the shift explicitly first for the
| * high bytes (the low byte is shifted implicitly by using ubfiz rather
| * than ubfx above)
| */
| .ifnc \op, b
| ldr \reg0, [tt, \reg0, uxtw #2]
| ldr \reg1, [tt, \reg1, uxtw #2]
| .else
| .if \shift > 0
| lsl \reg0, \reg0, #2
| lsl \reg1, \reg1, #2
| .endif
| ldrb \reg0, [tt, \reg0, uxtw]
| ldrb \reg1, [tt, \reg1, uxtw]
| .endif
| .endm
|
| .macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
| ubfx \reg0, \in0, #\shift, #8
| ubfx \reg1, \in1d, #\shift, #8
| ldr\op \reg0, [tt, \reg0, uxtw #\sz]
| ldr\op \reg1, [tt, \reg1, uxtw #\sz]
| .endm
|
| .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
| ldp \out0, \out1, [rk], #8
|
| __pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0
| __pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8
| __pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16
| __pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24
|
| eor \out0, \out0, w12
| eor \out1, \out1, w13
| eor \out0, \out0, w14, ror #24
| eor \out1, \out1, w15, ror #24
| eor \out0, \out0, w16, ror #16
| eor \out1, \out1, w17, ror #16
| eor \out0, \out0, \t0, ror #8
| eor \out1, \out1, \t1, ror #8
| .endm
|
| .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
| __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
| __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
| .endm
|
| .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
| __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
| __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
| .endm
|
| .macro do_crypt, round, ttab, ltab, bsz
| ldp w4, w5, [in]
| ldp w6, w7, [in, #8]
| ldp w8, w9, [rk], #16
| ldp w10, w11, [rk, #-8]
|
| CPU_BE( rev w4, w4 )
| CPU_BE( rev w5, w5 )
| CPU_BE( rev w6, w6 )
| CPU_BE( rev w7, w7 )
|
| eor w4, w4, w8
| eor w5, w5, w9
| eor w6, w6, w10
| eor w7, w7, w11
|
| adr_l tt, \ttab
|
| tbnz rounds, #1, 1f
|
| 0: \round w8, w9, w10, w11, w4, w5, w6, w7
| \round w4, w5, w6, w7, w8, w9, w10, w11
|
| 1: subs rounds, rounds, #4
| \round w8, w9, w10, w11, w4, w5, w6, w7
| b.ls 3f
| 2: \round w4, w5, w6, w7, w8, w9, w10, w11
| b 0b
| 3: adr_l tt, \ltab
| \round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
|
| CPU_BE( rev w4, w4 )
| CPU_BE( rev w5, w5 )
| CPU_BE( rev w6, w6 )
| CPU_BE( rev w7, w7 )
|
| stp w4, w5, [out]
| stp w6, w7, [out, #8]
| ret
| .endm
|
| ENTRY(__aes_arm64_encrypt)
| do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2
| ENDPROC(__aes_arm64_encrypt)
|
| .align 5
| ENTRY(__aes_arm64_decrypt)
| do_crypt iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
| ENDPROC(__aes_arm64_decrypt)
|
| .section ".rodata", "a"
| .align L1_CACHE_SHIFT
| .type __aes_arm64_inverse_sbox, %object
| __aes_arm64_inverse_sbox:
| .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
| .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
| .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
| .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
| .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
| .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
| .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
| .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
| .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
| .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
| .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
| .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
| .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
| .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
| .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
| .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
| .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
| .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
| .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
| .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
| .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
| .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
| .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
| .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
| .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
| .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
| .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
| .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
| .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
| .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
| .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
| .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
| .size __aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox
|
|