hc
2024-02-19 1c055e55a242a33e574e48be530e06770a210dcd
kernel/arch/x86/lib/csum-copy_64.S
....@@ -18,9 +18,6 @@
1818 * rdi source
1919 * rsi destination
2020 * edx len (32bit)
21
- * ecx sum (32bit)
22
- * r8 src_err_ptr (int)
23
- * r9 dst_err_ptr (int)
2421 *
2522 * Output
2623 * eax 64bit sum. undefined in case of exception.
....@@ -31,40 +28,32 @@
3128
3229 .macro source
3330 10:
34
- _ASM_EXTABLE(10b, .Lbad_source)
31
+ _ASM_EXTABLE_UA(10b, .Lfault)
3532 .endm
3633
3734 .macro dest
3835 20:
39
- _ASM_EXTABLE(20b, .Lbad_dest)
36
+ _ASM_EXTABLE_UA(20b, .Lfault)
4037 .endm
4138
42
- .macro ignore L=.Lignore
43
-30:
44
- _ASM_EXTABLE(30b, \L)
45
- .endm
39
+SYM_FUNC_START(csum_partial_copy_generic)
40
+ subq $5*8, %rsp
41
+ movq %rbx, 0*8(%rsp)
42
+ movq %r12, 1*8(%rsp)
43
+ movq %r14, 2*8(%rsp)
44
+ movq %r13, 3*8(%rsp)
45
+ movq %r15, 4*8(%rsp)
4646
47
-
48
-ENTRY(csum_partial_copy_generic)
49
- cmpl $3*64, %edx
50
- jle .Lignore
51
-
52
-.Lignore:
53
- subq $7*8, %rsp
54
- movq %rbx, 2*8(%rsp)
55
- movq %r12, 3*8(%rsp)
56
- movq %r14, 4*8(%rsp)
57
- movq %r13, 5*8(%rsp)
58
- movq %r15, 6*8(%rsp)
59
-
60
- movq %r8, (%rsp)
61
- movq %r9, 1*8(%rsp)
62
-
63
- movl %ecx, %eax
64
- movl %edx, %ecx
65
-
47
+ movl $-1, %eax
6648 xorl %r9d, %r9d
67
- movq %rcx, %r12
49
+ movl %edx, %ecx
50
+ cmpl $8, %ecx
51
+ jb .Lshort
52
+
53
+ testb $7, %sil
54
+ jne .Lunaligned
55
+.Laligned:
56
+ movl %ecx, %r12d
6857
6958 shrq $6, %r12
7059 jz .Lhandle_tail /* < 64 */
....@@ -95,7 +84,12 @@
9584 source
9685 movq 56(%rdi), %r13
9786
98
- ignore 2f
87
+30:
88
+ /*
89
+ * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
90
+ * potentially unmapped kernel address.
91
+ */
92
+ _ASM_EXTABLE(30b, 2f)
9993 prefetcht0 5*64(%rdi)
10094 2:
10195 adcq %rbx, %rax
....@@ -127,8 +121,6 @@
127121 dest
128122 movq %r13, 56(%rsi)
129123
130
-3:
131
-
132124 leaq 64(%rdi), %rdi
133125 leaq 64(%rsi), %rsi
134126
....@@ -138,8 +130,8 @@
138130
139131 /* do last up to 56 bytes */
140132 .Lhandle_tail:
141
- /* ecx: count */
142
- movl %ecx, %r10d
133
+ /* ecx: count, rcx.63: the end result needs to be rol8 */
134
+ movq %rcx, %r10
143135 andl $63, %ecx
144136 shrl $3, %ecx
145137 jz .Lfold
....@@ -168,6 +160,7 @@
168160 .Lhandle_7:
169161 movl %r10d, %ecx
170162 andl $7, %ecx
163
+.L1: /* .Lshort rejoins the common path here */
171164 shrl $1, %ecx
172165 jz .Lhandle_1
173166 movl $2, %edx
....@@ -199,26 +192,65 @@
199192 adcl %r9d, %eax /* carry */
200193
201194 .Lende:
202
- movq 2*8(%rsp), %rbx
203
- movq 3*8(%rsp), %r12
204
- movq 4*8(%rsp), %r14
205
- movq 5*8(%rsp), %r13
206
- movq 6*8(%rsp), %r15
207
- addq $7*8, %rsp
208
- ret
195
+ testq %r10, %r10
196
+ js .Lwas_odd
197
+.Lout:
198
+ movq 0*8(%rsp), %rbx
199
+ movq 1*8(%rsp), %r12
200
+ movq 2*8(%rsp), %r14
201
+ movq 3*8(%rsp), %r13
202
+ movq 4*8(%rsp), %r15
203
+ addq $5*8, %rsp
204
+ RET
205
+.Lshort:
206
+ movl %ecx, %r10d
207
+ jmp .L1
208
+.Lunaligned:
209
+ xorl %ebx, %ebx
210
+ testb $1, %sil
211
+ jne .Lodd
212
+1: testb $2, %sil
213
+ je 2f
214
+ source
215
+ movw (%rdi), %bx
216
+ dest
217
+ movw %bx, (%rsi)
218
+ leaq 2(%rdi), %rdi
219
+ subq $2, %rcx
220
+ leaq 2(%rsi), %rsi
221
+ addq %rbx, %rax
222
+2: testb $4, %sil
223
+ je .Laligned
224
+ source
225
+ movl (%rdi), %ebx
226
+ dest
227
+ movl %ebx, (%rsi)
228
+ leaq 4(%rdi), %rdi
229
+ subq $4, %rcx
230
+ leaq 4(%rsi), %rsi
231
+ addq %rbx, %rax
232
+ jmp .Laligned
209233
210
- /* Exception handlers. Very simple, zeroing is done in the wrappers */
211
-.Lbad_source:
212
- movq (%rsp), %rax
213
- testq %rax, %rax
214
- jz .Lende
215
- movl $-EFAULT, (%rax)
216
- jmp .Lende
234
+.Lodd:
235
+ source
236
+ movb (%rdi), %bl
237
+ dest
238
+ movb %bl, (%rsi)
239
+ leaq 1(%rdi), %rdi
240
+ leaq 1(%rsi), %rsi
241
+ /* decrement, set MSB */
242
+ leaq -1(%rcx, %rcx), %rcx
243
+ rorq $1, %rcx
244
+ shll $8, %ebx
245
+ addq %rbx, %rax
246
+ jmp 1b
217247
218
-.Lbad_dest:
219
- movq 8(%rsp), %rax
220
- testq %rax, %rax
221
- jz .Lende
222
- movl $-EFAULT, (%rax)
223
- jmp .Lende
224
-ENDPROC(csum_partial_copy_generic)
248
+.Lwas_odd:
249
+ roll $8, %eax
250
+ jmp .Lout
251
+
252
+ /* Exception: just return 0 */
253
+.Lfault:
254
+ xorl %eax, %eax
255
+ jmp .Lout
256
+SYM_FUNC_END(csum_partial_copy_generic)