hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/drivers/video/fbdev/core/sysimgblt.c
....@@ -188,23 +188,29 @@
188188 {
189189 u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
190190 u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
191
- u32 bit_mask, end_mask, eorx, shift;
192
- const char *s = image->data, *src;
191
+ u32 bit_mask, eorx, shift;
192
+ const u8 *s = image->data, *src;
193193 u32 *dst;
194
- const u32 *tab = NULL;
194
+ const u32 *tab;
195
+ size_t tablen;
196
+ u32 colortab[16];
195197 int i, j, k;
196198
197199 switch (bpp) {
198200 case 8:
199201 tab = fb_be_math(p) ? cfb_tab8_be : cfb_tab8_le;
202
+ tablen = 16;
200203 break;
201204 case 16:
202205 tab = fb_be_math(p) ? cfb_tab16_be : cfb_tab16_le;
206
+ tablen = 4;
203207 break;
204208 case 32:
205
- default:
206209 tab = cfb_tab32;
210
+ tablen = 2;
207211 break;
212
+ default:
213
+ return;
208214 }
209215
210216 for (i = ppw-1; i--; ) {
....@@ -218,20 +224,62 @@
218224 eorx = fgx ^ bgx;
219225 k = image->width/ppw;
220226
227
+ for (i = 0; i < tablen; ++i)
228
+ colortab[i] = (tab[i] & eorx) ^ bgx;
229
+
221230 for (i = image->height; i--; ) {
222231 dst = dst1;
223232 shift = 8;
224233 src = s;
225234
226
- for (j = k; j--; ) {
235
+ /*
236
+ * Manually unroll the per-line copying loop for better
237
+ * performance. This works until we processed the last
238
+ * completely filled source byte (inclusive).
239
+ */
240
+ switch (ppw) {
241
+ case 4: /* 8 bpp */
242
+ for (j = k; j >= 2; j -= 2, ++src) {
243
+ *dst++ = colortab[(*src >> 4) & bit_mask];
244
+ *dst++ = colortab[(*src >> 0) & bit_mask];
245
+ }
246
+ break;
247
+ case 2: /* 16 bpp */
248
+ for (j = k; j >= 4; j -= 4, ++src) {
249
+ *dst++ = colortab[(*src >> 6) & bit_mask];
250
+ *dst++ = colortab[(*src >> 4) & bit_mask];
251
+ *dst++ = colortab[(*src >> 2) & bit_mask];
252
+ *dst++ = colortab[(*src >> 0) & bit_mask];
253
+ }
254
+ break;
255
+ case 1: /* 32 bpp */
256
+ for (j = k; j >= 8; j -= 8, ++src) {
257
+ *dst++ = colortab[(*src >> 7) & bit_mask];
258
+ *dst++ = colortab[(*src >> 6) & bit_mask];
259
+ *dst++ = colortab[(*src >> 5) & bit_mask];
260
+ *dst++ = colortab[(*src >> 4) & bit_mask];
261
+ *dst++ = colortab[(*src >> 3) & bit_mask];
262
+ *dst++ = colortab[(*src >> 2) & bit_mask];
263
+ *dst++ = colortab[(*src >> 1) & bit_mask];
264
+ *dst++ = colortab[(*src >> 0) & bit_mask];
265
+ }
266
+ break;
267
+ }
268
+
269
+ /*
270
+ * For image widths that are not a multiple of 8, there
271
+ * are trailing pixels left on the current line. Print
272
+ * them as well.
273
+ */
274
+ for (; j--; ) {
227275 shift -= ppw;
228
- end_mask = tab[(*src >> shift) & bit_mask];
229
- *dst++ = (end_mask & eorx) ^ bgx;
276
+ *dst++ = colortab[(*src >> shift) & bit_mask];
230277 if (!shift) {
231278 shift = 8;
232
- src++;
279
+ ++src;
233280 }
234281 }
282
+
235283 dst1 += p->fix.line_length;
236284 s += spitch;
237285 }