.. | .. |
---|
188 | 188 | { |
---|
189 | 189 | u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel; |
---|
190 | 190 | u32 ppw = 32/bpp, spitch = (image->width + 7)/8; |
---|
191 | | - u32 bit_mask, end_mask, eorx, shift; |
---|
192 | | - const char *s = image->data, *src; |
---|
| 191 | + u32 bit_mask, eorx, shift; |
---|
| 192 | + const u8 *s = image->data, *src; |
---|
193 | 193 | u32 *dst; |
---|
194 | | - const u32 *tab = NULL; |
---|
| 194 | + const u32 *tab; |
---|
| 195 | + size_t tablen; |
---|
| 196 | + u32 colortab[16]; |
---|
195 | 197 | int i, j, k; |
---|
196 | 198 | |
---|
197 | 199 | switch (bpp) { |
---|
198 | 200 | case 8: |
---|
199 | 201 | tab = fb_be_math(p) ? cfb_tab8_be : cfb_tab8_le; |
---|
| 202 | + tablen = 16; |
---|
200 | 203 | break; |
---|
201 | 204 | case 16: |
---|
202 | 205 | tab = fb_be_math(p) ? cfb_tab16_be : cfb_tab16_le; |
---|
| 206 | + tablen = 4; |
---|
203 | 207 | break; |
---|
204 | 208 | case 32: |
---|
205 | | - default: |
---|
206 | 209 | tab = cfb_tab32; |
---|
| 210 | + tablen = 2; |
---|
207 | 211 | break; |
---|
| 212 | + default: |
---|
| 213 | + return; |
---|
208 | 214 | } |
---|
209 | 215 | |
---|
210 | 216 | for (i = ppw-1; i--; ) { |
---|
.. | .. |
---|
218 | 224 | eorx = fgx ^ bgx; |
---|
219 | 225 | k = image->width/ppw; |
---|
220 | 226 | |
---|
| 227 | + for (i = 0; i < tablen; ++i) |
---|
| 228 | + colortab[i] = (tab[i] & eorx) ^ bgx; |
---|
| 229 | + |
---|
221 | 230 | for (i = image->height; i--; ) { |
---|
222 | 231 | dst = dst1; |
---|
223 | 232 | shift = 8; |
---|
224 | 233 | src = s; |
---|
225 | 234 | |
---|
226 | | - for (j = k; j--; ) { |
---|
| 235 | + /* |
---|
| 236 | + * Manually unroll the per-line copying loop for better |
---|
| 237 | + * performance. This works until we processed the last |
---|
| 238 | + * completely filled source byte (inclusive). |
---|
| 239 | + */ |
---|
| 240 | + switch (ppw) { |
---|
| 241 | + case 4: /* 8 bpp */ |
---|
| 242 | + for (j = k; j >= 2; j -= 2, ++src) { |
---|
| 243 | + *dst++ = colortab[(*src >> 4) & bit_mask]; |
---|
| 244 | + *dst++ = colortab[(*src >> 0) & bit_mask]; |
---|
| 245 | + } |
---|
| 246 | + break; |
---|
| 247 | + case 2: /* 16 bpp */ |
---|
| 248 | + for (j = k; j >= 4; j -= 4, ++src) { |
---|
| 249 | + *dst++ = colortab[(*src >> 6) & bit_mask]; |
---|
| 250 | + *dst++ = colortab[(*src >> 4) & bit_mask]; |
---|
| 251 | + *dst++ = colortab[(*src >> 2) & bit_mask]; |
---|
| 252 | + *dst++ = colortab[(*src >> 0) & bit_mask]; |
---|
| 253 | + } |
---|
| 254 | + break; |
---|
| 255 | + case 1: /* 32 bpp */ |
---|
| 256 | + for (j = k; j >= 8; j -= 8, ++src) { |
---|
| 257 | + *dst++ = colortab[(*src >> 7) & bit_mask]; |
---|
| 258 | + *dst++ = colortab[(*src >> 6) & bit_mask]; |
---|
| 259 | + *dst++ = colortab[(*src >> 5) & bit_mask]; |
---|
| 260 | + *dst++ = colortab[(*src >> 4) & bit_mask]; |
---|
| 261 | + *dst++ = colortab[(*src >> 3) & bit_mask]; |
---|
| 262 | + *dst++ = colortab[(*src >> 2) & bit_mask]; |
---|
| 263 | + *dst++ = colortab[(*src >> 1) & bit_mask]; |
---|
| 264 | + *dst++ = colortab[(*src >> 0) & bit_mask]; |
---|
| 265 | + } |
---|
| 266 | + break; |
---|
| 267 | + } |
---|
| 268 | + |
---|
| 269 | + /* |
---|
| 270 | + * For image widths that are not a multiple of 8, there |
---|
| 271 | + * are trailing pixels left on the current line. Print |
---|
| 272 | + * them as well. |
---|
| 273 | + */ |
---|
| 274 | + for (; j--; ) { |
---|
227 | 275 | shift -= ppw; |
---|
228 | | - end_mask = tab[(*src >> shift) & bit_mask]; |
---|
229 | | - *dst++ = (end_mask & eorx) ^ bgx; |
---|
| 276 | + *dst++ = colortab[(*src >> shift) & bit_mask]; |
---|
230 | 277 | if (!shift) { |
---|
231 | 278 | shift = 8; |
---|
232 | | - src++; |
---|
| 279 | + ++src; |
---|
233 | 280 | } |
---|
234 | 281 | } |
---|
| 282 | + |
---|
235 | 283 | dst1 += p->fix.line_length; |
---|
236 | 284 | s += spitch; |
---|
237 | 285 | } |
---|