1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
| /* SPDX-License-Identifier: GPL-2.0-or-later */
| /*
| * arch/ia64/lib/xor.S
| *
| * Optimized RAID-5 checksumming functions for IA-64.
| */
|
| #include <asm/asmmacro.h>
| #include <asm/export.h>
|
| GLOBAL_ENTRY(xor_ia64_2)
| .prologue
| .fframe 0
| .save ar.pfs, r31
| alloc r31 = ar.pfs, 3, 0, 13, 16
| .save ar.lc, r30
| mov r30 = ar.lc
| .save pr, r29
| mov r29 = pr
| ;;
| .body
| mov r8 = in1
| mov ar.ec = 6 + 2
| shr in0 = in0, 3
| ;;
| adds in0 = -1, in0
| mov r16 = in1
| mov r17 = in2
| ;;
| mov ar.lc = in0
| mov pr.rot = 1 << 16
| ;;
| .rotr s1[6+1], s2[6+1], d[2]
| .rotp p[6+2]
| 0:
| (p[0]) ld8.nta s1[0] = [r16], 8
| (p[0]) ld8.nta s2[0] = [r17], 8
| (p[6]) xor d[0] = s1[6], s2[6]
| (p[6+1])st8.nta [r8] = d[1], 8
| nop.f 0
| br.ctop.dptk.few 0b
| ;;
| mov ar.lc = r30
| mov pr = r29, -1
| br.ret.sptk.few rp
| END(xor_ia64_2)
| EXPORT_SYMBOL(xor_ia64_2)
|
| GLOBAL_ENTRY(xor_ia64_3)
| .prologue
| .fframe 0
| .save ar.pfs, r31
| alloc r31 = ar.pfs, 4, 0, 20, 24
| .save ar.lc, r30
| mov r30 = ar.lc
| .save pr, r29
| mov r29 = pr
| ;;
| .body
| mov r8 = in1
| mov ar.ec = 6 + 2
| shr in0 = in0, 3
| ;;
| adds in0 = -1, in0
| mov r16 = in1
| mov r17 = in2
| ;;
| mov r18 = in3
| mov ar.lc = in0
| mov pr.rot = 1 << 16
| ;;
| .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
| .rotp p[6+2]
| 0:
| (p[0]) ld8.nta s1[0] = [r16], 8
| (p[0]) ld8.nta s2[0] = [r17], 8
| (p[6]) xor d[0] = s1[6], s2[6]
| ;;
| (p[0]) ld8.nta s3[0] = [r18], 8
| (p[6+1])st8.nta [r8] = d[1], 8
| (p[6]) xor d[0] = d[0], s3[6]
| br.ctop.dptk.few 0b
| ;;
| mov ar.lc = r30
| mov pr = r29, -1
| br.ret.sptk.few rp
| END(xor_ia64_3)
| EXPORT_SYMBOL(xor_ia64_3)
|
| GLOBAL_ENTRY(xor_ia64_4)
| .prologue
| .fframe 0
| .save ar.pfs, r31
| alloc r31 = ar.pfs, 5, 0, 27, 32
| .save ar.lc, r30
| mov r30 = ar.lc
| .save pr, r29
| mov r29 = pr
| ;;
| .body
| mov r8 = in1
| mov ar.ec = 6 + 2
| shr in0 = in0, 3
| ;;
| adds in0 = -1, in0
| mov r16 = in1
| mov r17 = in2
| ;;
| mov r18 = in3
| mov ar.lc = in0
| mov pr.rot = 1 << 16
| mov r19 = in4
| ;;
| .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
| .rotp p[6+2]
| 0:
| (p[0]) ld8.nta s1[0] = [r16], 8
| (p[0]) ld8.nta s2[0] = [r17], 8
| (p[6]) xor d[0] = s1[6], s2[6]
| (p[0]) ld8.nta s3[0] = [r18], 8
| (p[0]) ld8.nta s4[0] = [r19], 8
| (p[6]) xor r20 = s3[6], s4[6]
| ;;
| (p[6+1])st8.nta [r8] = d[1], 8
| (p[6]) xor d[0] = d[0], r20
| br.ctop.dptk.few 0b
| ;;
| mov ar.lc = r30
| mov pr = r29, -1
| br.ret.sptk.few rp
| END(xor_ia64_4)
| EXPORT_SYMBOL(xor_ia64_4)
|
| GLOBAL_ENTRY(xor_ia64_5)
| .prologue
| .fframe 0
| .save ar.pfs, r31
| alloc r31 = ar.pfs, 6, 0, 34, 40
| .save ar.lc, r30
| mov r30 = ar.lc
| .save pr, r29
| mov r29 = pr
| ;;
| .body
| mov r8 = in1
| mov ar.ec = 6 + 2
| shr in0 = in0, 3
| ;;
| adds in0 = -1, in0
| mov r16 = in1
| mov r17 = in2
| ;;
| mov r18 = in3
| mov ar.lc = in0
| mov pr.rot = 1 << 16
| mov r19 = in4
| mov r20 = in5
| ;;
| .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
| .rotp p[6+2]
| 0:
| (p[0]) ld8.nta s1[0] = [r16], 8
| (p[0]) ld8.nta s2[0] = [r17], 8
| (p[6]) xor d[0] = s1[6], s2[6]
| (p[0]) ld8.nta s3[0] = [r18], 8
| (p[0]) ld8.nta s4[0] = [r19], 8
| (p[6]) xor r21 = s3[6], s4[6]
| ;;
| (p[0]) ld8.nta s5[0] = [r20], 8
| (p[6+1])st8.nta [r8] = d[1], 8
| (p[6]) xor d[0] = d[0], r21
| ;;
| (p[6]) xor d[0] = d[0], s5[6]
| nop.f 0
| br.ctop.dptk.few 0b
| ;;
| mov ar.lc = r30
| mov pr = r29, -1
| br.ret.sptk.few rp
| END(xor_ia64_5)
| EXPORT_SYMBOL(xor_ia64_5)
|
|