.. | .. |
---|
19 | 19 | extern __wsum csum_partial(const void *, int, __wsum); |
---|
20 | 20 | |
---|
21 | 21 | /* |
---|
22 | | - * The same as csum_partial, but copies from src while it checksums. |
---|
23 | | - * |
---|
24 | | - * Here even more important to align src and dst on a 32-bit (or even |
---|
25 | | - * better 64-bit) boundary |
---|
26 | | - */ |
---|
27 | | -extern __wsum csum_partial_copy_nocheck(const void *, void *, int, __wsum); |
---|
28 | | - |
---|
29 | | -/* |
---|
30 | | - * this is a new version of the above that records errors it finds in *errp, |
---|
31 | | - * but continues and zeros the rest of the buffer. |
---|
32 | | - */ |
---|
33 | | -extern __wsum csum_partial_copy_from_user(const void __user *src, |
---|
34 | | - void *dst, int len, __wsum sum, int *errp); |
---|
35 | | - |
---|
36 | | -/* |
---|
37 | 22 | * Optimized for IP headers, which always checksum on 4 octet boundaries. |
---|
38 | 23 | * |
---|
39 | 24 | * Written by Randolph Chung <tausq@debian.org>, and then mucked with by |
---|
.. | .. |
---|
42 | 27 | static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) |
---|
43 | 28 | { |
---|
44 | 29 | unsigned int sum; |
---|
| 30 | + unsigned long t0, t1, t2; |
---|
45 | 31 | |
---|
46 | 32 | __asm__ __volatile__ ( |
---|
47 | 33 | " ldws,ma 4(%1), %0\n" |
---|
48 | 34 | " addib,<= -4, %2, 2f\n" |
---|
49 | 35 | "\n" |
---|
50 | | -" ldws 4(%1), %%r20\n" |
---|
51 | | -" ldws 8(%1), %%r21\n" |
---|
52 | | -" add %0, %%r20, %0\n" |
---|
53 | | -" ldws,ma 12(%1), %%r19\n" |
---|
54 | | -" addc %0, %%r21, %0\n" |
---|
55 | | -" addc %0, %%r19, %0\n" |
---|
56 | | -"1: ldws,ma 4(%1), %%r19\n" |
---|
| 36 | +" ldws 4(%1), %4\n" |
---|
| 37 | +" ldws 8(%1), %5\n" |
---|
| 38 | +" add %0, %4, %0\n" |
---|
| 39 | +" ldws,ma 12(%1), %3\n" |
---|
| 40 | +" addc %0, %5, %0\n" |
---|
| 41 | +" addc %0, %3, %0\n" |
---|
| 42 | +"1: ldws,ma 4(%1), %3\n" |
---|
57 | 43 | " addib,< 0, %2, 1b\n" |
---|
58 | | -" addc %0, %%r19, %0\n" |
---|
| 44 | +" addc %0, %3, %0\n" |
---|
59 | 45 | "\n" |
---|
60 | | -" extru %0, 31, 16, %%r20\n" |
---|
61 | | -" extru %0, 15, 16, %%r21\n" |
---|
62 | | -" addc %%r20, %%r21, %0\n" |
---|
63 | | -" extru %0, 15, 16, %%r21\n" |
---|
64 | | -" add %0, %%r21, %0\n" |
---|
| 46 | +" extru %0, 31, 16, %4\n" |
---|
| 47 | +" extru %0, 15, 16, %5\n" |
---|
| 48 | +" addc %4, %5, %0\n" |
---|
| 49 | +" extru %0, 15, 16, %5\n" |
---|
| 50 | +" add %0, %5, %0\n" |
---|
65 | 51 | " subi -1, %0, %0\n" |
---|
66 | 52 | "2:\n" |
---|
67 | | - : "=r" (sum), "=r" (iph), "=r" (ihl) |
---|
| 53 | + : "=r" (sum), "=r" (iph), "=r" (ihl), "=r" (t0), "=r" (t1), "=r" (t2) |
---|
68 | 54 | : "1" (iph), "2" (ihl) |
---|
69 | | - : "r19", "r20", "r21", "memory"); |
---|
| 55 | + : "memory"); |
---|
70 | 56 | |
---|
71 | 57 | return (__force __sum16)sum; |
---|
72 | 58 | } |
---|
.. | .. |
---|
126 | 112 | __u32 len, __u8 proto, |
---|
127 | 113 | __wsum sum) |
---|
128 | 114 | { |
---|
| 115 | + unsigned long t0, t1, t2, t3; |
---|
| 116 | + |
---|
| 117 | + len += proto; /* add 16-bit proto + len */ |
---|
| 118 | + |
---|
129 | 119 | __asm__ __volatile__ ( |
---|
130 | 120 | |
---|
131 | 121 | #if BITS_PER_LONG > 32 |
---|
.. | .. |
---|
136 | 126 | ** Try to keep 4 registers with "live" values ahead of the ALU. |
---|
137 | 127 | */ |
---|
138 | 128 | |
---|
139 | | -" ldd,ma 8(%1), %%r19\n" /* get 1st saddr word */ |
---|
140 | | -" ldd,ma 8(%2), %%r20\n" /* get 1st daddr word */ |
---|
141 | | -" add %8, %3, %3\n"/* add 16-bit proto + len */ |
---|
142 | | -" add %%r19, %0, %0\n" |
---|
143 | | -" ldd,ma 8(%1), %%r21\n" /* 2cd saddr */ |
---|
144 | | -" ldd,ma 8(%2), %%r22\n" /* 2cd daddr */ |
---|
145 | | -" add,dc %%r20, %0, %0\n" |
---|
146 | | -" add,dc %%r21, %0, %0\n" |
---|
147 | | -" add,dc %%r22, %0, %0\n" |
---|
| 129 | +" ldd,ma 8(%1), %4\n" /* get 1st saddr word */ |
---|
| 130 | +" ldd,ma 8(%2), %5\n" /* get 1st daddr word */ |
---|
| 131 | +" add %4, %0, %0\n" |
---|
| 132 | +" ldd,ma 8(%1), %6\n" /* 2nd saddr */ |
---|
| 133 | +" ldd,ma 8(%2), %7\n" /* 2nd daddr */ |
---|
| 134 | +" add,dc %5, %0, %0\n" |
---|
| 135 | +" add,dc %6, %0, %0\n" |
---|
| 136 | +" add,dc %7, %0, %0\n" |
---|
148 | 137 | " add,dc %3, %0, %0\n" /* fold in proto+len | carry bit */ |
---|
149 | | -" extrd,u %0, 31, 32, %%r19\n" /* copy upper half down */ |
---|
150 | | -" depdi 0, 31, 32, %0\n" /* clear upper half */ |
---|
151 | | -" add %%r19, %0, %0\n" /* fold into 32-bits */ |
---|
152 | | -" addc 0, %0, %0\n" /* add carry */ |
---|
| 138 | +" extrd,u %0, 31, 32, %4\n"/* copy upper half down */ |
---|
| 139 | +" depdi 0, 31, 32, %0\n"/* clear upper half */ |
---|
| 140 | +" add %4, %0, %0\n" /* fold into 32-bits */ |
---|
| 141 | +" addc 0, %0, %0\n" /* add carry */ |
---|
153 | 142 | |
---|
154 | 143 | #else |
---|
155 | 144 | |
---|
.. | .. |
---|
158 | 147 | ** Insn stream is serialized on the carry bit here too. |
---|
159 | 148 | ** result from the previous operation (eg r0 + x) |
---|
160 | 149 | */ |
---|
161 | | - |
---|
162 | | -" ldw,ma 4(%1), %%r19\n" /* get 1st saddr word */ |
---|
163 | | -" ldw,ma 4(%2), %%r20\n" /* get 1st daddr word */ |
---|
164 | | -" add %8, %3, %3\n" /* add 16-bit proto + len */ |
---|
165 | | -" add %%r19, %0, %0\n" |
---|
166 | | -" ldw,ma 4(%1), %%r21\n" /* 2cd saddr */ |
---|
167 | | -" addc %%r20, %0, %0\n" |
---|
168 | | -" ldw,ma 4(%2), %%r22\n" /* 2cd daddr */ |
---|
169 | | -" addc %%r21, %0, %0\n" |
---|
170 | | -" ldw,ma 4(%1), %%r19\n" /* 3rd saddr */ |
---|
171 | | -" addc %%r22, %0, %0\n" |
---|
172 | | -" ldw,ma 4(%2), %%r20\n" /* 3rd daddr */ |
---|
173 | | -" addc %%r19, %0, %0\n" |
---|
174 | | -" ldw,ma 4(%1), %%r21\n" /* 4th saddr */ |
---|
175 | | -" addc %%r20, %0, %0\n" |
---|
176 | | -" ldw,ma 4(%2), %%r22\n" /* 4th daddr */ |
---|
177 | | -" addc %%r21, %0, %0\n" |
---|
178 | | -" addc %%r22, %0, %0\n" |
---|
| 150 | +" ldw,ma 4(%1), %4\n" /* get 1st saddr word */ |
---|
| 151 | +" ldw,ma 4(%2), %5\n" /* get 1st daddr word */ |
---|
| 152 | +" add %4, %0, %0\n" |
---|
| 153 | +" ldw,ma 4(%1), %6\n" /* 2nd saddr */ |
---|
| 154 | +" addc %5, %0, %0\n" |
---|
| 155 | +" ldw,ma 4(%2), %7\n" /* 2nd daddr */ |
---|
| 156 | +" addc %6, %0, %0\n" |
---|
| 157 | +" ldw,ma 4(%1), %4\n" /* 3rd saddr */ |
---|
| 158 | +" addc %7, %0, %0\n" |
---|
| 159 | +" ldw,ma 4(%2), %5\n" /* 3rd daddr */ |
---|
| 160 | +" addc %4, %0, %0\n" |
---|
| 161 | +" ldw,ma 4(%1), %6\n" /* 4th saddr */ |
---|
| 162 | +" addc %5, %0, %0\n" |
---|
| 163 | +" ldw,ma 4(%2), %7\n" /* 4th daddr */ |
---|
| 164 | +" addc %6, %0, %0\n" |
---|
| 165 | +" addc %7, %0, %0\n" |
---|
179 | 166 | " addc %3, %0, %0\n" /* fold in proto+len, catch carry */ |
---|
180 | 167 | |
---|
181 | 168 | #endif |
---|
182 | | - : "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len) |
---|
183 | | - : "0" (sum), "1" (saddr), "2" (daddr), "3" (len), "r" (proto) |
---|
184 | | - : "r19", "r20", "r21", "r22", "memory"); |
---|
| 169 | + : "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len), |
---|
| 170 | + "=r" (t0), "=r" (t1), "=r" (t2), "=r" (t3) |
---|
| 171 | + : "0" (sum), "1" (saddr), "2" (daddr), "3" (len) |
---|
| 172 | + : "memory"); |
---|
185 | 173 | return csum_fold(sum); |
---|
186 | | -} |
---|
187 | | - |
---|
188 | | -/* |
---|
189 | | - * Copy and checksum to user |
---|
190 | | - */ |
---|
191 | | -#define HAVE_CSUM_COPY_USER |
---|
192 | | -static __inline__ __wsum csum_and_copy_to_user(const void *src, |
---|
193 | | - void __user *dst, |
---|
194 | | - int len, __wsum sum, |
---|
195 | | - int *err_ptr) |
---|
196 | | -{ |
---|
197 | | - /* code stolen from include/asm-mips64 */ |
---|
198 | | - sum = csum_partial(src, len, sum); |
---|
199 | | - |
---|
200 | | - if (copy_to_user(dst, src, len)) { |
---|
201 | | - *err_ptr = -EFAULT; |
---|
202 | | - return (__force __wsum)-1; |
---|
203 | | - } |
---|
204 | | - |
---|
205 | | - return sum; |
---|
206 | 174 | } |
---|
207 | 175 | |
---|
208 | 176 | #endif |
---|