forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-09 958e46acc8e900e8569dd467c1af9b8d2d019394
kernel/drivers/gpu/drm/vc4/vc4_plane.c
....@@ -1,9 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (C) 2015 Broadcom
3
- *
4
- * This program is free software; you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License version 2 as
6
- * published by the Free Software Foundation.
74 */
85
96 /**
....@@ -20,10 +17,14 @@
2017
2118 #include <drm/drm_atomic.h>
2219 #include <drm/drm_atomic_helper.h>
20
+#include <drm/drm_atomic_uapi.h>
2321 #include <drm/drm_fb_cma_helper.h>
22
+#include <drm/drm_fourcc.h>
23
+#include <drm/drm_gem_framebuffer_helper.h>
2424 #include <drm/drm_plane_helper.h>
2525
2626 #include "uapi/drm/vc4_drm.h"
27
+
2728 #include "vc4_drv.h"
2829 #include "vc4_regs.h"
2930
....@@ -31,45 +32,60 @@
3132 u32 drm; /* DRM_FORMAT_* */
3233 u32 hvs; /* HVS_FORMAT_* */
3334 u32 pixel_order;
35
+ u32 pixel_order_hvs5;
3436 } hvs_formats[] = {
3537 {
36
- .drm = DRM_FORMAT_XRGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
38
+ .drm = DRM_FORMAT_XRGB8888,
39
+ .hvs = HVS_PIXEL_FORMAT_RGBA8888,
3740 .pixel_order = HVS_PIXEL_ORDER_ABGR,
41
+ .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
3842 },
3943 {
40
- .drm = DRM_FORMAT_ARGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
44
+ .drm = DRM_FORMAT_ARGB8888,
45
+ .hvs = HVS_PIXEL_FORMAT_RGBA8888,
4146 .pixel_order = HVS_PIXEL_ORDER_ABGR,
47
+ .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
4248 },
4349 {
44
- .drm = DRM_FORMAT_ABGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
50
+ .drm = DRM_FORMAT_ABGR8888,
51
+ .hvs = HVS_PIXEL_FORMAT_RGBA8888,
4552 .pixel_order = HVS_PIXEL_ORDER_ARGB,
53
+ .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
4654 },
4755 {
48
- .drm = DRM_FORMAT_XBGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
56
+ .drm = DRM_FORMAT_XBGR8888,
57
+ .hvs = HVS_PIXEL_FORMAT_RGBA8888,
4958 .pixel_order = HVS_PIXEL_ORDER_ARGB,
59
+ .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
5060 },
5161 {
52
- .drm = DRM_FORMAT_RGB565, .hvs = HVS_PIXEL_FORMAT_RGB565,
62
+ .drm = DRM_FORMAT_RGB565,
63
+ .hvs = HVS_PIXEL_FORMAT_RGB565,
5364 .pixel_order = HVS_PIXEL_ORDER_XRGB,
5465 },
5566 {
56
- .drm = DRM_FORMAT_BGR565, .hvs = HVS_PIXEL_FORMAT_RGB565,
67
+ .drm = DRM_FORMAT_BGR565,
68
+ .hvs = HVS_PIXEL_FORMAT_RGB565,
5769 .pixel_order = HVS_PIXEL_ORDER_XBGR,
5870 },
5971 {
60
- .drm = DRM_FORMAT_ARGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551,
72
+ .drm = DRM_FORMAT_ARGB1555,
73
+ .hvs = HVS_PIXEL_FORMAT_RGBA5551,
6174 .pixel_order = HVS_PIXEL_ORDER_ABGR,
6275 },
6376 {
64
- .drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551,
77
+ .drm = DRM_FORMAT_XRGB1555,
78
+ .hvs = HVS_PIXEL_FORMAT_RGBA5551,
6579 .pixel_order = HVS_PIXEL_ORDER_ABGR,
6680 },
6781 {
68
- .drm = DRM_FORMAT_RGB888, .hvs = HVS_PIXEL_FORMAT_RGB888,
82
+ .drm = DRM_FORMAT_RGB888,
83
+ .hvs = HVS_PIXEL_FORMAT_RGB888,
6984 .pixel_order = HVS_PIXEL_ORDER_XRGB,
7085 },
7186 {
72
- .drm = DRM_FORMAT_BGR888, .hvs = HVS_PIXEL_FORMAT_RGB888,
87
+ .drm = DRM_FORMAT_BGR888,
88
+ .hvs = HVS_PIXEL_FORMAT_RGB888,
7389 .pixel_order = HVS_PIXEL_ORDER_XBGR,
7490 },
7591 {
....@@ -128,17 +144,17 @@
128144
129145 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
130146 {
131
- if (dst > src)
132
- return VC4_SCALING_PPF;
133
- else if (dst < src)
134
- return VC4_SCALING_TPZ;
135
- else
147
+ if (dst == src)
136148 return VC4_SCALING_NONE;
149
+ if (3 * dst >= 2 * src)
150
+ return VC4_SCALING_PPF;
151
+ else
152
+ return VC4_SCALING_TPZ;
137153 }
138154
139155 static bool plane_enabled(struct drm_plane_state *state)
140156 {
141
- return state->fb && state->crtc;
157
+ return state->fb && !WARN_ON(!state->crtc);
142158 }
143159
144160 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
....@@ -153,6 +169,7 @@
153169 return NULL;
154170
155171 memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
172
+ vc4_state->dlist_initialized = 0;
156173
157174 __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
158175
....@@ -176,7 +193,7 @@
176193 struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
177194 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
178195
179
- if (vc4_state->lbm.allocated) {
196
+ if (drm_mm_node_allocated(&vc4_state->lbm)) {
180197 unsigned long irqflags;
181198
182199 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
....@@ -200,12 +217,10 @@
200217 if (!vc4_state)
201218 return;
202219
203
- plane->state = &vc4_state->base;
204
- plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE;
205
- vc4_state->base.plane = plane;
220
+ __drm_atomic_helper_plane_reset(plane, &vc4_state->base);
206221 }
207222
208
-static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
223
+static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state)
209224 {
210225 if (vc4_state->dlist_count == vc4_state->dlist_size) {
211226 u32 new_size = max(4u, vc4_state->dlist_count * 2);
....@@ -220,7 +235,15 @@
220235 vc4_state->dlist_size = new_size;
221236 }
222237
223
- vc4_state->dlist[vc4_state->dlist_count++] = val;
238
+ vc4_state->dlist_count++;
239
+}
240
+
241
+static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
242
+{
243
+ unsigned int idx = vc4_state->dlist_count;
244
+
245
+ vc4_dlist_counter_increment(vc4_state);
246
+ vc4_state->dlist[idx] = val;
224247 }
225248
226249 /* Returns the scl0/scl1 field based on whether the dimensions need to
....@@ -258,39 +281,96 @@
258281 }
259282 }
260283
284
+static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
285
+{
286
+ struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
287
+ unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay;
288
+ struct drm_crtc_state *crtc_state;
289
+
290
+ crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
291
+ pstate->crtc);
292
+
293
+ vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom);
294
+ if (!left && !right && !top && !bottom)
295
+ return 0;
296
+
297
+ if (left + right >= crtc_state->mode.hdisplay ||
298
+ top + bottom >= crtc_state->mode.vdisplay)
299
+ return -EINVAL;
300
+
301
+ adjhdisplay = crtc_state->mode.hdisplay - (left + right);
302
+ vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x *
303
+ adjhdisplay,
304
+ crtc_state->mode.hdisplay);
305
+ vc4_pstate->crtc_x += left;
306
+ if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right)
307
+ vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right;
308
+
309
+ adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
310
+ vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
311
+ adjvdisplay,
312
+ crtc_state->mode.vdisplay);
313
+ vc4_pstate->crtc_y += top;
314
+ if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom)
315
+ vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom;
316
+
317
+ vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
318
+ adjhdisplay,
319
+ crtc_state->mode.hdisplay);
320
+ vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h *
321
+ adjvdisplay,
322
+ crtc_state->mode.vdisplay);
323
+
324
+ if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
325
+ return -EINVAL;
326
+
327
+ return 0;
328
+}
329
+
261330 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
262331 {
263
- struct drm_plane *plane = state->plane;
264332 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
265333 struct drm_framebuffer *fb = state->fb;
266334 struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
267
- u32 subpixel_src_mask = (1 << 16) - 1;
268
- u32 format = fb->format->format;
269335 int num_planes = fb->format->num_planes;
270
- u32 h_subsample = 1;
271
- u32 v_subsample = 1;
272
- int i;
336
+ struct drm_crtc_state *crtc_state;
337
+ u32 h_subsample = fb->format->hsub;
338
+ u32 v_subsample = fb->format->vsub;
339
+ int i, ret;
340
+
341
+ crtc_state = drm_atomic_get_existing_crtc_state(state->state,
342
+ state->crtc);
343
+ if (!crtc_state) {
344
+ DRM_DEBUG_KMS("Invalid crtc state\n");
345
+ return -EINVAL;
346
+ }
347
+
348
+ ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1,
349
+ INT_MAX, true, true);
350
+ if (ret)
351
+ return ret;
273352
274353 for (i = 0; i < num_planes; i++)
275354 vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
276355
277
- /* We don't support subpixel source positioning for scaling. */
278
- if ((state->src_x & subpixel_src_mask) ||
279
- (state->src_y & subpixel_src_mask) ||
280
- (state->src_w & subpixel_src_mask) ||
281
- (state->src_h & subpixel_src_mask)) {
282
- return -EINVAL;
283
- }
356
+ /*
357
+ * We don't support subpixel source positioning for scaling,
358
+ * but fractional coordinates can be generated by clipping
359
+ * so just round for now
360
+ */
361
+ vc4_state->src_x = DIV_ROUND_CLOSEST(state->src.x1, 1 << 16);
362
+ vc4_state->src_y = DIV_ROUND_CLOSEST(state->src.y1, 1 << 16);
363
+ vc4_state->src_w[0] = DIV_ROUND_CLOSEST(state->src.x2, 1 << 16) - vc4_state->src_x;
364
+ vc4_state->src_h[0] = DIV_ROUND_CLOSEST(state->src.y2, 1 << 16) - vc4_state->src_y;
284365
285
- vc4_state->src_x = state->src_x >> 16;
286
- vc4_state->src_y = state->src_y >> 16;
287
- vc4_state->src_w[0] = state->src_w >> 16;
288
- vc4_state->src_h[0] = state->src_h >> 16;
366
+ vc4_state->crtc_x = state->dst.x1;
367
+ vc4_state->crtc_y = state->dst.y1;
368
+ vc4_state->crtc_w = state->dst.x2 - state->dst.x1;
369
+ vc4_state->crtc_h = state->dst.y2 - state->dst.y1;
289370
290
- vc4_state->crtc_x = state->crtc_x;
291
- vc4_state->crtc_y = state->crtc_y;
292
- vc4_state->crtc_w = state->crtc_w;
293
- vc4_state->crtc_h = state->crtc_h;
371
+ ret = vc4_plane_margins_adj(state);
372
+ if (ret)
373
+ return ret;
294374
295375 vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
296376 vc4_state->crtc_w);
....@@ -303,8 +383,6 @@
303383 if (num_planes > 1) {
304384 vc4_state->is_yuv = true;
305385
306
- h_subsample = drm_format_horz_chroma_subsampling(format);
307
- v_subsample = drm_format_vert_chroma_subsampling(format);
308386 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
309387 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
310388
....@@ -327,41 +405,6 @@
327405 vc4_state->is_yuv = false;
328406 vc4_state->x_scaling[1] = VC4_SCALING_NONE;
329407 vc4_state->y_scaling[1] = VC4_SCALING_NONE;
330
- }
331
-
332
- /* No configuring scaling on the cursor plane, since it gets
333
- non-vblank-synced updates, and scaling requires requires
334
- LBM changes which have to be vblank-synced.
335
- */
336
- if (plane->type == DRM_PLANE_TYPE_CURSOR && !vc4_state->is_unity)
337
- return -EINVAL;
338
-
339
- /* Clamp the on-screen start x/y to 0. The hardware doesn't
340
- * support negative y, and negative x wastes bandwidth.
341
- */
342
- if (vc4_state->crtc_x < 0) {
343
- for (i = 0; i < num_planes; i++) {
344
- u32 cpp = fb->format->cpp[i];
345
- u32 subs = ((i == 0) ? 1 : h_subsample);
346
-
347
- vc4_state->offsets[i] += (cpp *
348
- (-vc4_state->crtc_x) / subs);
349
- }
350
- vc4_state->src_w[0] += vc4_state->crtc_x;
351
- vc4_state->src_w[1] += vc4_state->crtc_x / h_subsample;
352
- vc4_state->crtc_x = 0;
353
- }
354
-
355
- if (vc4_state->crtc_y < 0) {
356
- for (i = 0; i < num_planes; i++) {
357
- u32 subs = ((i == 0) ? 1 : v_subsample);
358
-
359
- vc4_state->offsets[i] += (fb->pitches[i] *
360
- (-vc4_state->crtc_y) / subs);
361
- }
362
- vc4_state->src_h[0] += vc4_state->crtc_y;
363
- vc4_state->src_h[1] += vc4_state->crtc_y / v_subsample;
364
- vc4_state->crtc_y = 0;
365408 }
366409
367410 return 0;
....@@ -398,16 +441,29 @@
398441 static u32 vc4_lbm_size(struct drm_plane_state *state)
399442 {
400443 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
401
- /* This is the worst case number. One of the two sizes will
402
- * be used depending on the scaling configuration.
403
- */
404
- u32 pix_per_line = max(vc4_state->src_w[0], (u32)vc4_state->crtc_w);
444
+ struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
445
+ u32 pix_per_line;
405446 u32 lbm;
406447
448
+ /* LBM is not needed when there's no vertical scaling. */
449
+ if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
450
+ vc4_state->y_scaling[1] == VC4_SCALING_NONE)
451
+ return 0;
452
+
453
+ /*
454
+ * This can be further optimized in the RGB/YUV444 case if the PPF
455
+ * decimation factor is between 0.5 and 1.0 by using crtc_w.
456
+ *
457
+ * It's not an issue though, since in that case since src_w[0] is going
458
+ * to be greater than or equal to crtc_w.
459
+ */
460
+ if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
461
+ pix_per_line = vc4_state->crtc_w;
462
+ else
463
+ pix_per_line = vc4_state->src_w[0];
464
+
407465 if (!vc4_state->is_yuv) {
408
- if (vc4_state->is_unity)
409
- return 0;
410
- else if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
466
+ if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
411467 lbm = pix_per_line * 8;
412468 else {
413469 /* In special cases, this multiplier might be 12. */
....@@ -421,7 +477,11 @@
421477 lbm = pix_per_line * 16;
422478 }
423479
424
- lbm = roundup(lbm, 32);
480
+ /* Align it to 64 or 128 (hvs5) bytes */
481
+ lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64);
482
+
483
+ /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */
484
+ lbm /= vc4->hvs->hvs5 ? 4 : 2;
425485
426486 return lbm;
427487 }
....@@ -458,6 +518,105 @@
458518 }
459519 }
460520
521
+static void vc4_plane_calc_load(struct drm_plane_state *state)
522
+{
523
+ unsigned int hvs_load_shift, vrefresh, i;
524
+ struct drm_framebuffer *fb = state->fb;
525
+ struct vc4_plane_state *vc4_state;
526
+ struct drm_crtc_state *crtc_state;
527
+ unsigned int vscale_factor;
528
+ struct vc4_dev *vc4;
529
+
530
+ vc4 = to_vc4_dev(state->plane->dev);
531
+ if (!vc4->load_tracker_available)
532
+ return;
533
+
534
+ vc4_state = to_vc4_plane_state(state);
535
+ crtc_state = drm_atomic_get_existing_crtc_state(state->state,
536
+ state->crtc);
537
+ vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
538
+
539
+ /* The HVS is able to process 2 pixels/cycle when scaling the source,
540
+ * 4 pixels/cycle otherwise.
541
+ * Alpha blending step seems to be pipelined and it's always operating
542
+ * at 4 pixels/cycle, so the limiting aspect here seems to be the
543
+ * scaler block.
544
+ * HVS load is expressed in clk-cycles/sec (AKA Hz).
545
+ */
546
+ if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
547
+ vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
548
+ vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
549
+ vc4_state->y_scaling[1] != VC4_SCALING_NONE)
550
+ hvs_load_shift = 1;
551
+ else
552
+ hvs_load_shift = 2;
553
+
554
+ vc4_state->membus_load = 0;
555
+ vc4_state->hvs_load = 0;
556
+ for (i = 0; i < fb->format->num_planes; i++) {
557
+ /* Even if the bandwidth/plane required for a single frame is
558
+ *
559
+ * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh
560
+ *
561
+ * when downscaling, we have to read more pixels per line in
562
+ * the time frame reserved for a single line, so the bandwidth
563
+ * demand can be punctually higher. To account for that, we
564
+ * calculate the down-scaling factor and multiply the plane
565
+ * load by this number. We're likely over-estimating the read
566
+ * demand, but that's better than under-estimating it.
567
+ */
568
+ vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i],
569
+ vc4_state->crtc_h);
570
+ vc4_state->membus_load += vc4_state->src_w[i] *
571
+ vc4_state->src_h[i] * vscale_factor *
572
+ fb->format->cpp[i];
573
+ vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
574
+ }
575
+
576
+ vc4_state->hvs_load *= vrefresh;
577
+ vc4_state->hvs_load >>= hvs_load_shift;
578
+ vc4_state->membus_load *= vrefresh;
579
+}
580
+
581
+static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
582
+{
583
+ struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
584
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
585
+ unsigned long irqflags;
586
+ u32 lbm_size;
587
+
588
+ lbm_size = vc4_lbm_size(state);
589
+ if (!lbm_size)
590
+ return 0;
591
+
592
+ if (WARN_ON(!vc4_state->lbm_offset))
593
+ return -EINVAL;
594
+
595
+ /* Allocate the LBM memory that the HVS will use for temporary
596
+ * storage due to our scaling/format conversion.
597
+ */
598
+ if (!drm_mm_node_allocated(&vc4_state->lbm)) {
599
+ int ret;
600
+
601
+ spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
602
+ ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
603
+ &vc4_state->lbm,
604
+ lbm_size,
605
+ vc4->hvs->hvs5 ? 64 : 32,
606
+ 0, 0);
607
+ spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
608
+
609
+ if (ret)
610
+ return ret;
611
+ } else {
612
+ WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
613
+ }
614
+
615
+ vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;
616
+
617
+ return 0;
618
+}
619
+
461620 /* Writes out a full display list for an active plane to the plane's
462621 * private dlist state.
463622 */
....@@ -470,35 +629,21 @@
470629 u32 ctl0_offset = vc4_state->dlist_count;
471630 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
472631 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
473
- int num_planes = drm_format_num_planes(format->drm);
632
+ int num_planes = fb->format->num_planes;
633
+ u32 h_subsample = fb->format->hsub;
634
+ u32 v_subsample = fb->format->vsub;
474635 bool mix_plane_alpha;
475636 bool covers_screen;
476637 u32 scl0, scl1, pitch0;
477
- u32 lbm_size, tiling;
478
- unsigned long irqflags;
638
+ u32 tiling, src_y;
479639 u32 hvs_format = format->hvs;
640
+ unsigned int rotation;
480641 int ret, i;
481642
643
+ if (vc4_state->dlist_initialized)
644
+ return 0;
645
+
482646 ret = vc4_plane_setup_clipping_and_scaling(state);
483
- if (ret)
484
- return ret;
485
-
486
- /* Allocate the LBM memory that the HVS will use for temporary
487
- * storage due to our scaling/format conversion.
488
- */
489
- lbm_size = vc4_lbm_size(state);
490
- if (lbm_size) {
491
- if (!vc4_state->lbm.allocated) {
492
- spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
493
- ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
494
- &vc4_state->lbm,
495
- lbm_size, 32, 0, 0);
496
- spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
497
- } else {
498
- WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
499
- }
500
- }
501
-
502647 if (ret)
503648 return ret;
504649
....@@ -516,26 +661,101 @@
516661 scl1 = vc4_get_scl_field(state, 0);
517662 }
518663
664
+ rotation = drm_rotation_simplify(state->rotation,
665
+ DRM_MODE_ROTATE_0 |
666
+ DRM_MODE_REFLECT_X |
667
+ DRM_MODE_REFLECT_Y);
668
+
669
+ /* We must point to the last line when Y reflection is enabled. */
670
+ src_y = vc4_state->src_y;
671
+ if (rotation & DRM_MODE_REFLECT_Y)
672
+ src_y += vc4_state->src_h[0] - 1;
673
+
519674 switch (base_format_mod) {
520675 case DRM_FORMAT_MOD_LINEAR:
521676 tiling = SCALER_CTL0_TILING_LINEAR;
522677 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
678
+
679
+ /* Adjust the base pointer to the first pixel to be scanned
680
+ * out.
681
+ */
682
+ for (i = 0; i < num_planes; i++) {
683
+ vc4_state->offsets[i] += src_y /
684
+ (i ? v_subsample : 1) *
685
+ fb->pitches[i];
686
+
687
+ vc4_state->offsets[i] += vc4_state->src_x /
688
+ (i ? h_subsample : 1) *
689
+ fb->format->cpp[i];
690
+ }
691
+
523692 break;
524693
525694 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
526
- /* For T-tiled, the FB pitch is "how many bytes from
527
- * one row to the next, such that pitch * tile_h ==
528
- * tile_size * tiles_per_row."
529
- */
530695 u32 tile_size_shift = 12; /* T tiles are 4kb */
696
+ /* Whole-tile offsets, mostly for setting the pitch. */
697
+ u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
531698 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
699
+ u32 tile_w_mask = (1 << tile_w_shift) - 1;
700
+ /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice
701
+ * the height (in pixels) of a 4k tile.
702
+ */
703
+ u32 tile_h_mask = (2 << tile_h_shift) - 1;
704
+ /* For T-tiled, the FB pitch is "how many bytes from one row to
705
+ * the next, such that
706
+ *
707
+ * pitch * tile_h == tile_size * tiles_per_row
708
+ */
532709 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
710
+ u32 tiles_l = vc4_state->src_x >> tile_w_shift;
711
+ u32 tiles_r = tiles_w - tiles_l;
712
+ u32 tiles_t = src_y >> tile_h_shift;
713
+ /* Intra-tile offsets, which modify the base address (the
714
+ * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that
715
+ * base address).
716
+ */
717
+ u32 tile_y = (src_y >> 4) & 1;
718
+ u32 subtile_y = (src_y >> 2) & 3;
719
+ u32 utile_y = src_y & 3;
720
+ u32 x_off = vc4_state->src_x & tile_w_mask;
721
+ u32 y_off = src_y & tile_h_mask;
722
+
723
+ /* When Y reflection is requested we must set the
724
+ * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines
725
+ * after the initial one should be fetched in descending order,
726
+ * which makes sense since we start from the last line and go
727
+ * backward.
728
+ * Don't know why we need y_off = max_y_off - y_off, but it's
729
+ * definitely required (I guess it's also related to the "going
730
+ * backward" situation).
731
+ */
732
+ if (rotation & DRM_MODE_REFLECT_Y) {
733
+ y_off = tile_h_mask - y_off;
734
+ pitch0 = SCALER_PITCH0_TILE_LINE_DIR;
735
+ } else {
736
+ pitch0 = 0;
737
+ }
533738
534739 tiling = SCALER_CTL0_TILING_256B_OR_T;
740
+ pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) |
741
+ VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) |
742
+ VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) |
743
+ VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R));
744
+ vc4_state->offsets[0] += tiles_t * (tiles_w << tile_size_shift);
745
+ vc4_state->offsets[0] += subtile_y << 8;
746
+ vc4_state->offsets[0] += utile_y << 4;
535747
536
- pitch0 = (VC4_SET_FIELD(0, SCALER_PITCH0_TILE_Y_OFFSET) |
537
- VC4_SET_FIELD(0, SCALER_PITCH0_TILE_WIDTH_L) |
538
- VC4_SET_FIELD(tiles_w, SCALER_PITCH0_TILE_WIDTH_R));
748
+ /* Rows of tiles alternate left-to-right and right-to-left. */
749
+ if (tiles_t & 1) {
750
+ pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR;
751
+ vc4_state->offsets[0] += (tiles_w - tiles_l) <<
752
+ tile_size_shift;
753
+ vc4_state->offsets[0] -= (1 + !tile_y) << 10;
754
+ } else {
755
+ vc4_state->offsets[0] += tiles_l << tile_size_shift;
756
+ vc4_state->offsets[0] += tile_y << 10;
757
+ }
758
+
539759 break;
540760 }
541761
....@@ -543,31 +763,22 @@
543763 case DRM_FORMAT_MOD_BROADCOM_SAND128:
544764 case DRM_FORMAT_MOD_BROADCOM_SAND256: {
545765 uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
766
+ u32 tile_w, tile, x_off, pix_per_tile;
546767
547
- /* Column-based NV12 or RGBA.
548
- */
549
- if (fb->format->num_planes > 1) {
550
- if (hvs_format != HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE) {
551
- DRM_DEBUG_KMS("SAND format only valid for NV12/21");
552
- return -EINVAL;
553
- }
554
- hvs_format = HVS_PIXEL_FORMAT_H264;
555
- } else {
556
- if (base_format_mod == DRM_FORMAT_MOD_BROADCOM_SAND256) {
557
- DRM_DEBUG_KMS("SAND256 format only valid for H.264");
558
- return -EINVAL;
559
- }
560
- }
768
+ hvs_format = HVS_PIXEL_FORMAT_H264;
561769
562770 switch (base_format_mod) {
563771 case DRM_FORMAT_MOD_BROADCOM_SAND64:
564772 tiling = SCALER_CTL0_TILING_64B;
773
+ tile_w = 64;
565774 break;
566775 case DRM_FORMAT_MOD_BROADCOM_SAND128:
567776 tiling = SCALER_CTL0_TILING_128B;
777
+ tile_w = 128;
568778 break;
569779 case DRM_FORMAT_MOD_BROADCOM_SAND256:
570780 tiling = SCALER_CTL0_TILING_256B_OR_T;
781
+ tile_w = 256;
571782 break;
572783 default:
573784 break;
....@@ -576,6 +787,23 @@
576787 if (param > SCALER_TILE_HEIGHT_MASK) {
577788 DRM_DEBUG_KMS("SAND height too large (%d)\n", param);
578789 return -EINVAL;
790
+ }
791
+
792
+ pix_per_tile = tile_w / fb->format->cpp[0];
793
+ tile = vc4_state->src_x / pix_per_tile;
794
+ x_off = vc4_state->src_x % pix_per_tile;
795
+
796
+ /* Adjust the base pointer to the first pixel to be scanned
797
+ * out.
798
+ */
799
+ for (i = 0; i < num_planes; i++) {
800
+ vc4_state->offsets[i] += param * tile_w * tile;
801
+ vc4_state->offsets[i] += src_y /
802
+ (i ? v_subsample : 1) *
803
+ tile_w;
804
+ vc4_state->offsets[i] += x_off /
805
+ (i ? h_subsample : 1) *
806
+ fb->format->cpp[i];
579807 }
580808
581809 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
....@@ -588,33 +816,6 @@
588816 return -EINVAL;
589817 }
590818
591
- /* Control word */
592
- vc4_dlist_write(vc4_state,
593
- SCALER_CTL0_VALID |
594
- VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
595
- (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
596
- (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
597
- VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
598
- (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
599
- VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
600
- VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
601
-
602
- /* Position Word 0: Image Positions and Alpha Value */
603
- vc4_state->pos0_offset = vc4_state->dlist_count;
604
- vc4_dlist_write(vc4_state,
605
- VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
606
- VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
607
- VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
608
-
609
- /* Position Word 1: Scaled Image Dimensions. */
610
- if (!vc4_state->is_unity) {
611
- vc4_dlist_write(vc4_state,
612
- VC4_SET_FIELD(vc4_state->crtc_w,
613
- SCALER_POS1_SCL_WIDTH) |
614
- VC4_SET_FIELD(vc4_state->crtc_h,
615
- SCALER_POS1_SCL_HEIGHT));
616
- }
617
-
618819 /* Don't waste cycles mixing with plane alpha if the set alpha
619820 * is opaque or there is no per-pixel alpha information.
620821 * In any case we use the alpha property value as the fixed alpha.
....@@ -622,20 +823,120 @@
622823 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
623824 fb->format->has_alpha;
624825
625
- /* Position Word 2: Source Image Size, Alpha */
626
- vc4_state->pos2_offset = vc4_state->dlist_count;
627
- vc4_dlist_write(vc4_state,
628
- VC4_SET_FIELD(fb->format->has_alpha ?
629
- SCALER_POS2_ALPHA_MODE_PIPELINE :
630
- SCALER_POS2_ALPHA_MODE_FIXED,
631
- SCALER_POS2_ALPHA_MODE) |
632
- (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
633
- (fb->format->has_alpha ? SCALER_POS2_ALPHA_PREMULT : 0) |
634
- VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) |
635
- VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT));
826
+ if (!vc4->hvs->hvs5) {
827
+ /* Control word */
828
+ vc4_dlist_write(vc4_state,
829
+ SCALER_CTL0_VALID |
830
+ (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) |
831
+ (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) |
832
+ VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
833
+ (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
834
+ (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
835
+ VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
836
+ (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
837
+ VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
838
+ VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
636839
637
- /* Position Word 3: Context. Written by the HVS. */
638
- vc4_dlist_write(vc4_state, 0xc0c0c0c0);
840
+ /* Position Word 0: Image Positions and Alpha Value */
841
+ vc4_state->pos0_offset = vc4_state->dlist_count;
842
+ vc4_dlist_write(vc4_state,
843
+ VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
844
+ VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
845
+ VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
846
+
847
+ /* Position Word 1: Scaled Image Dimensions. */
848
+ if (!vc4_state->is_unity) {
849
+ vc4_dlist_write(vc4_state,
850
+ VC4_SET_FIELD(vc4_state->crtc_w,
851
+ SCALER_POS1_SCL_WIDTH) |
852
+ VC4_SET_FIELD(vc4_state->crtc_h,
853
+ SCALER_POS1_SCL_HEIGHT));
854
+ }
855
+
856
+ /* Position Word 2: Source Image Size, Alpha */
857
+ vc4_state->pos2_offset = vc4_state->dlist_count;
858
+ vc4_dlist_write(vc4_state,
859
+ VC4_SET_FIELD(fb->format->has_alpha ?
860
+ SCALER_POS2_ALPHA_MODE_PIPELINE :
861
+ SCALER_POS2_ALPHA_MODE_FIXED,
862
+ SCALER_POS2_ALPHA_MODE) |
863
+ (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
864
+ (fb->format->has_alpha ?
865
+ SCALER_POS2_ALPHA_PREMULT : 0) |
866
+ VC4_SET_FIELD(vc4_state->src_w[0],
867
+ SCALER_POS2_WIDTH) |
868
+ VC4_SET_FIELD(vc4_state->src_h[0],
869
+ SCALER_POS2_HEIGHT));
870
+
871
+ /* Position Word 3: Context. Written by the HVS. */
872
+ vc4_dlist_write(vc4_state, 0xc0c0c0c0);
873
+
874
+ } else {
875
+ u32 hvs_pixel_order = format->pixel_order;
876
+
877
+ if (format->pixel_order_hvs5)
878
+ hvs_pixel_order = format->pixel_order_hvs5;
879
+
880
+ /* Control word */
881
+ vc4_dlist_write(vc4_state,
882
+ SCALER_CTL0_VALID |
883
+ (hvs_pixel_order << SCALER_CTL0_ORDER_SHIFT) |
884
+ (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
885
+ VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
886
+ (vc4_state->is_unity ?
887
+ SCALER5_CTL0_UNITY : 0) |
888
+ VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
889
+ VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) |
890
+ SCALER5_CTL0_ALPHA_EXPAND |
891
+ SCALER5_CTL0_RGB_EXPAND);
892
+
893
+ /* Position Word 0: Image Positions and Alpha Value */
894
+ vc4_state->pos0_offset = vc4_state->dlist_count;
895
+ vc4_dlist_write(vc4_state,
896
+ (rotation & DRM_MODE_REFLECT_Y ?
897
+ SCALER5_POS0_VFLIP : 0) |
898
+ VC4_SET_FIELD(vc4_state->crtc_x,
899
+ SCALER_POS0_START_X) |
900
+ (rotation & DRM_MODE_REFLECT_X ?
901
+ SCALER5_POS0_HFLIP : 0) |
902
+ VC4_SET_FIELD(vc4_state->crtc_y,
903
+ SCALER5_POS0_START_Y)
904
+ );
905
+
906
+ /* Control Word 2 */
907
+ vc4_dlist_write(vc4_state,
908
+ VC4_SET_FIELD(state->alpha >> 4,
909
+ SCALER5_CTL2_ALPHA) |
910
+ (fb->format->has_alpha ?
911
+ SCALER5_CTL2_ALPHA_PREMULT : 0) |
912
+ (mix_plane_alpha ?
913
+ SCALER5_CTL2_ALPHA_MIX : 0) |
914
+ VC4_SET_FIELD(fb->format->has_alpha ?
915
+ SCALER5_CTL2_ALPHA_MODE_PIPELINE :
916
+ SCALER5_CTL2_ALPHA_MODE_FIXED,
917
+ SCALER5_CTL2_ALPHA_MODE)
918
+ );
919
+
920
+ /* Position Word 1: Scaled Image Dimensions. */
921
+ if (!vc4_state->is_unity) {
922
+ vc4_dlist_write(vc4_state,
923
+ VC4_SET_FIELD(vc4_state->crtc_w,
924
+ SCALER5_POS1_SCL_WIDTH) |
925
+ VC4_SET_FIELD(vc4_state->crtc_h,
926
+ SCALER5_POS1_SCL_HEIGHT));
927
+ }
928
+
929
+ /* Position Word 2: Source Image Size */
930
+ vc4_state->pos2_offset = vc4_state->dlist_count;
931
+ vc4_dlist_write(vc4_state,
932
+ VC4_SET_FIELD(vc4_state->src_w[0],
933
+ SCALER5_POS2_WIDTH) |
934
+ VC4_SET_FIELD(vc4_state->src_h[0],
935
+ SCALER5_POS2_HEIGHT));
936
+
937
+ /* Position Word 3: Context. Written by the HVS. */
938
+ vc4_dlist_write(vc4_state, 0xc0c0c0c0);
939
+ }
639940
640941
641942 /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
....@@ -671,14 +972,19 @@
671972 vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5);
672973 }
673974
975
+ vc4_state->lbm_offset = 0;
976
+
674977 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
675978 vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
676979 vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
677980 vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
678
- /* LBM Base Address. */
981
+ /* Reserve a slot for the LBM Base Address. The real value will
982
+ * be set when calling vc4_plane_allocate_lbm().
983
+ */
679984 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
680985 vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
681
- vc4_dlist_write(vc4_state, vc4_state->lbm.start);
986
+ vc4_state->lbm_offset = vc4_state->dlist_count;
987
+ vc4_dlist_counter_increment(vc4_state);
682988 }
683989
684990 if (num_planes > 1) {
....@@ -725,6 +1031,15 @@
7251031 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
7261032 state->alpha != DRM_BLEND_ALPHA_OPAQUE;
7271033
1034
+ /* Flag the dlist as initialized to avoid checking it twice in case
1035
+ * the async update check already called vc4_plane_mode_set() and
1036
+ * decided to fallback to sync update because async update was not
1037
+ * possible.
1038
+ */
1039
+ vc4_state->dlist_initialized = 1;
1040
+
1041
+ vc4_plane_calc_load(state);
1042
+
7281043 return 0;
7291044 }
7301045
....@@ -739,13 +1054,18 @@
7391054 struct drm_plane_state *state)
7401055 {
7411056 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1057
+ int ret;
7421058
7431059 vc4_state->dlist_count = 0;
7441060
745
- if (plane_enabled(state))
746
- return vc4_plane_mode_set(plane, state);
747
- else
1061
+ if (!plane_enabled(state))
7481062 return 0;
1063
+
1064
+ ret = vc4_plane_mode_set(plane, state);
1065
+ if (ret)
1066
+ return ret;
1067
+
1068
+ return vc4_plane_allocate_lbm(state);
7491069 }
7501070
7511071 static void vc4_plane_atomic_update(struct drm_plane *plane,
....@@ -811,29 +1131,59 @@
8111131 static void vc4_plane_atomic_async_update(struct drm_plane *plane,
8121132 struct drm_plane_state *state)
8131133 {
814
- struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
815
-
816
- if (plane->state->fb != state->fb) {
817
- vc4_plane_async_set_fb(plane, state->fb);
818
- drm_atomic_set_fb_for_plane(plane->state, state->fb);
819
- }
1134
+ struct vc4_plane_state *vc4_state, *new_vc4_state;
8201135
8211136 swap(plane->state->fb, state->fb);
822
- /* Set the cursor's position on the screen. This is the
823
- * expected change from the drm_mode_cursor_universal()
824
- * helper.
825
- */
8261137 plane->state->crtc_x = state->crtc_x;
8271138 plane->state->crtc_y = state->crtc_y;
828
-
829
- /* Allow changing the start position within the cursor BO, if
830
- * that matters.
831
- */
1139
+ plane->state->crtc_w = state->crtc_w;
1140
+ plane->state->crtc_h = state->crtc_h;
8321141 plane->state->src_x = state->src_x;
8331142 plane->state->src_y = state->src_y;
1143
+ plane->state->src_w = state->src_w;
1144
+ plane->state->src_h = state->src_h;
1145
+ plane->state->src_h = state->src_h;
1146
+ plane->state->alpha = state->alpha;
1147
+ plane->state->pixel_blend_mode = state->pixel_blend_mode;
1148
+ plane->state->rotation = state->rotation;
1149
+ plane->state->zpos = state->zpos;
1150
+ plane->state->normalized_zpos = state->normalized_zpos;
1151
+ plane->state->color_encoding = state->color_encoding;
1152
+ plane->state->color_range = state->color_range;
1153
+ plane->state->src = state->src;
1154
+ plane->state->dst = state->dst;
1155
+ plane->state->visible = state->visible;
8341156
835
- /* Update the display list based on the new crtc_x/y. */
836
- vc4_plane_atomic_check(plane, plane->state);
1157
+ new_vc4_state = to_vc4_plane_state(state);
1158
+ vc4_state = to_vc4_plane_state(plane->state);
1159
+
1160
+ vc4_state->crtc_x = new_vc4_state->crtc_x;
1161
+ vc4_state->crtc_y = new_vc4_state->crtc_y;
1162
+ vc4_state->crtc_h = new_vc4_state->crtc_h;
1163
+ vc4_state->crtc_w = new_vc4_state->crtc_w;
1164
+ vc4_state->src_x = new_vc4_state->src_x;
1165
+ vc4_state->src_y = new_vc4_state->src_y;
1166
+ memcpy(vc4_state->src_w, new_vc4_state->src_w,
1167
+ sizeof(vc4_state->src_w));
1168
+ memcpy(vc4_state->src_h, new_vc4_state->src_h,
1169
+ sizeof(vc4_state->src_h));
1170
+ memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling,
1171
+ sizeof(vc4_state->x_scaling));
1172
+ memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling,
1173
+ sizeof(vc4_state->y_scaling));
1174
+ vc4_state->is_unity = new_vc4_state->is_unity;
1175
+ vc4_state->is_yuv = new_vc4_state->is_yuv;
1176
+ memcpy(vc4_state->offsets, new_vc4_state->offsets,
1177
+ sizeof(vc4_state->offsets));
1178
+ vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill;
1179
+
1180
+ /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
1181
+ vc4_state->dlist[vc4_state->pos0_offset] =
1182
+ new_vc4_state->dlist[vc4_state->pos0_offset];
1183
+ vc4_state->dlist[vc4_state->pos2_offset] =
1184
+ new_vc4_state->dlist[vc4_state->pos2_offset];
1185
+ vc4_state->dlist[vc4_state->ptr0_offset] =
1186
+ new_vc4_state->dlist[vc4_state->ptr0_offset];
8371187
8381188 /* Note that we can't just call vc4_plane_write_dlist()
8391189 * because that would smash the context data that the HVS is
....@@ -850,12 +1200,37 @@
8501200 static int vc4_plane_atomic_async_check(struct drm_plane *plane,
8511201 struct drm_plane_state *state)
8521202 {
853
- /* No configuring new scaling in the fast path. */
854
- if (plane->state->crtc_w != state->crtc_w ||
855
- plane->state->crtc_h != state->crtc_h ||
856
- plane->state->src_w != state->src_w ||
857
- plane->state->src_h != state->src_h)
1203
+ struct vc4_plane_state *old_vc4_state, *new_vc4_state;
1204
+ int ret;
1205
+ u32 i;
1206
+
1207
+ ret = vc4_plane_mode_set(plane, state);
1208
+ if (ret)
1209
+ return ret;
1210
+
1211
+ old_vc4_state = to_vc4_plane_state(plane->state);
1212
+ new_vc4_state = to_vc4_plane_state(state);
1213
+ if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
1214
+ old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
1215
+ old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
1216
+ old_vc4_state->ptr0_offset != new_vc4_state->ptr0_offset ||
1217
+ vc4_lbm_size(plane->state) != vc4_lbm_size(state))
8581218 return -EINVAL;
1219
+
1220
+ /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update
1221
+ * if anything else has changed, fallback to a sync update.
1222
+ */
1223
+ for (i = 0; i < new_vc4_state->dlist_count; i++) {
1224
+ if (i == new_vc4_state->pos0_offset ||
1225
+ i == new_vc4_state->pos2_offset ||
1226
+ i == new_vc4_state->ptr0_offset ||
1227
+ (new_vc4_state->lbm_offset &&
1228
+ i == new_vc4_state->lbm_offset))
1229
+ continue;
1230
+
1231
+ if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i])
1232
+ return -EINVAL;
1233
+ }
8591234
8601235 return 0;
8611236 }
....@@ -864,7 +1239,6 @@
8641239 struct drm_plane_state *state)
8651240 {
8661241 struct vc4_bo *bo;
867
- struct dma_fence *fence;
8681242 int ret;
8691243
8701244 if (!state->fb)
....@@ -872,8 +1246,7 @@
8721246
8731247 bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
8741248
875
- fence = reservation_object_get_excl_rcu(bo->resv);
876
- drm_atomic_set_fence_for_plane(state, fence);
1249
+ drm_gem_fb_prepare_fb(plane, state);
8771250
8781251 if (plane->state->fb == state->fb)
8791252 return 0;
....@@ -908,7 +1281,6 @@
9081281
9091282 static void vc4_plane_destroy(struct drm_plane *plane)
9101283 {
911
- drm_plane_helper_disable(plane, NULL);
9121284 drm_plane_cleanup(plane);
9131285 }
9141286
....@@ -929,8 +1301,6 @@
9291301 switch (fourcc_mod_broadcom_mod(modifier)) {
9301302 case DRM_FORMAT_MOD_LINEAR:
9311303 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
932
- case DRM_FORMAT_MOD_BROADCOM_SAND64:
933
- case DRM_FORMAT_MOD_BROADCOM_SAND128:
9341304 return true;
9351305 default:
9361306 return false;
....@@ -946,6 +1316,10 @@
9461316 default:
9471317 return false;
9481318 }
1319
+ case DRM_FORMAT_RGBX1010102:
1320
+ case DRM_FORMAT_BGRX1010102:
1321
+ case DRM_FORMAT_RGBA1010102:
1322
+ case DRM_FORMAT_BGRA1010102:
9491323 case DRM_FORMAT_YUV422:
9501324 case DRM_FORMAT_YVU422:
9511325 case DRM_FORMAT_YUV420:
....@@ -974,7 +1348,6 @@
9741348 struct drm_plane *plane = NULL;
9751349 struct vc4_plane *vc4_plane;
9761350 u32 formats[ARRAY_SIZE(hvs_formats)];
977
- u32 num_formats = 0;
9781351 int ret = 0;
9791352 unsigned i;
9801353 static const uint64_t modifiers[] = {
....@@ -991,25 +1364,66 @@
9911364 if (!vc4_plane)
9921365 return ERR_PTR(-ENOMEM);
9931366
994
- for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
995
- /* Don't allow YUV in cursor planes, since that means
996
- * tuning on the scaler, which we don't allow for the
997
- * cursor.
998
- */
999
- if (type != DRM_PLANE_TYPE_CURSOR ||
1000
- hvs_formats[i].hvs < HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE) {
1001
- formats[num_formats++] = hvs_formats[i].drm;
1002
- }
1003
- }
1367
+ for (i = 0; i < ARRAY_SIZE(hvs_formats); i++)
1368
+ formats[i] = hvs_formats[i].drm;
1369
+
10041370 plane = &vc4_plane->base;
10051371 ret = drm_universal_plane_init(dev, plane, 0,
10061372 &vc4_plane_funcs,
1007
- formats, num_formats,
1373
+ formats, ARRAY_SIZE(formats),
10081374 modifiers, type, NULL);
1375
+ if (ret)
1376
+ return ERR_PTR(ret);
10091377
10101378 drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
10111379
10121380 drm_plane_create_alpha_property(plane);
1381
+ drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
1382
+ DRM_MODE_ROTATE_0 |
1383
+ DRM_MODE_ROTATE_180 |
1384
+ DRM_MODE_REFLECT_X |
1385
+ DRM_MODE_REFLECT_Y);
10131386
10141387 return plane;
10151388 }
1389
+
1390
+int vc4_plane_create_additional_planes(struct drm_device *drm)
1391
+{
1392
+ struct drm_plane *cursor_plane;
1393
+ struct drm_crtc *crtc;
1394
+ unsigned int i;
1395
+
1396
+ /* Set up some arbitrary number of planes. We're not limited
1397
+ * by a set number of physical registers, just the space in
1398
+ * the HVS (16k) and how small an plane can be (28 bytes).
1399
+ * However, each plane we set up takes up some memory, and
1400
+ * increases the cost of looping over planes, which atomic
1401
+ * modesetting does quite a bit. As a result, we pick a
1402
+ * modest number of planes to expose, that should hopefully
1403
+ * still cover any sane usecase.
1404
+ */
1405
+ for (i = 0; i < 16; i++) {
1406
+ struct drm_plane *plane =
1407
+ vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY);
1408
+
1409
+ if (IS_ERR(plane))
1410
+ continue;
1411
+
1412
+ plane->possible_crtcs =
1413
+ GENMASK(drm->mode_config.num_crtc - 1, 0);
1414
+ }
1415
+
1416
+ drm_for_each_crtc(crtc, drm) {
1417
+ /* Set up the legacy cursor after overlay initialization,
1418
+ * since we overlay planes on the CRTC in the order they were
1419
+ * initialized.
1420
+ */
1421
+ cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR);
1422
+ if (!IS_ERR(cursor_plane)) {
1423
+ cursor_plane->possible_crtcs = drm_crtc_mask(crtc);
1424
+ crtc->cursor = cursor_plane;
1425
+ }
1426
+ }
1427
+
1428
+ return 0;
1429
+}