forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
....@@ -1,5 +1,6 @@
11 /*
22 * Copyright 2017 Advanced Micro Devices, Inc.
3
+ * Copyright 2019 Raptor Engineering, LLC
34 *
45 * Permission is hereby granted, free of charge, to any person obtaining a
56 * copy of this software and associated documentation files (the "Software"),
....@@ -24,19 +25,26 @@
2425 */
2526
2627 #include "dm_services.h"
28
+#include "dc.h"
2729 #include "dcn_calcs.h"
2830 #include "dcn_calc_auto.h"
29
-#include "dc.h"
3031 #include "dal_asic_id.h"
31
-
3232 #include "resource.h"
3333 #include "dcn10/dcn10_resource.h"
3434 #include "dcn10/dcn10_hubbub.h"
35
+#include "dml/dml1_display_rq_dlg_calc.h"
3536
3637 #include "dcn_calc_math.h"
3738
3839 #define DC_LOGGER \
3940 dc->ctx->logger
41
+
42
+#define WM_SET_COUNT 4
43
+#define WM_A 0
44
+#define WM_B 1
45
+#define WM_C 2
46
+#define WM_D 3
47
+
4048 /*
4149 * NOTE:
4250 * This file is gcc-parseable HW gospel, coming straight from HW engineers.
....@@ -46,7 +54,9 @@
4654 * remain as-is as it provides us with a guarantee from HW that it is correct.
4755 */
4856
49
-/* Defaults from spreadsheet rev#247 */
57
+/* Defaults from spreadsheet rev#247.
58
+ * RV2 delta: dram_clock_change_latency, max_num_dpp
59
+ */
5060 const struct dcn_soc_bounding_box dcn10_soc_defaults = {
5161 /* latencies */
5262 .sr_exit_time = 17, /*us*/
....@@ -240,15 +250,69 @@
240250 }
241251 }
242252
253
+enum source_macro_tile_size swizzle_mode_to_macro_tile_size(enum swizzle_mode_values sw_mode)
254
+{
255
+ switch (sw_mode) {
256
+ /* for 4/8/16 high tiles */
257
+ case DC_SW_LINEAR:
258
+ return dm_4k_tile;
259
+ case DC_SW_4KB_S:
260
+ case DC_SW_4KB_S_X:
261
+ return dm_4k_tile;
262
+ case DC_SW_64KB_S:
263
+ case DC_SW_64KB_S_X:
264
+ case DC_SW_64KB_S_T:
265
+ return dm_64k_tile;
266
+ case DC_SW_VAR_S:
267
+ case DC_SW_VAR_S_X:
268
+ return dm_256k_tile;
269
+
270
+ /* For 64bpp 2 high tiles */
271
+ case DC_SW_4KB_D:
272
+ case DC_SW_4KB_D_X:
273
+ return dm_4k_tile;
274
+ case DC_SW_64KB_D:
275
+ case DC_SW_64KB_D_X:
276
+ case DC_SW_64KB_D_T:
277
+ return dm_64k_tile;
278
+ case DC_SW_VAR_D:
279
+ case DC_SW_VAR_D_X:
280
+ return dm_256k_tile;
281
+
282
+ case DC_SW_4KB_R:
283
+ case DC_SW_4KB_R_X:
284
+ return dm_4k_tile;
285
+ case DC_SW_64KB_R:
286
+ case DC_SW_64KB_R_X:
287
+ return dm_64k_tile;
288
+ case DC_SW_VAR_R:
289
+ case DC_SW_VAR_R_X:
290
+ return dm_256k_tile;
291
+
292
+ /* Unsupported swizzle modes for dcn */
293
+ case DC_SW_256B_S:
294
+ default:
295
+ ASSERT(0); /* Not supported */
296
+ return 0;
297
+ }
298
+}
299
+
243300 static void pipe_ctx_to_e2e_pipe_params (
244301 const struct pipe_ctx *pipe,
245302 struct _vcs_dpi_display_pipe_params_st *input)
246303 {
247304 input->src.is_hsplit = false;
248
- if (pipe->top_pipe != NULL && pipe->top_pipe->plane_state == pipe->plane_state)
305
+
306
+ /* stereo can never be split */
307
+ if (pipe->plane_state->stereo_format == PLANE_STEREO_FORMAT_SIDE_BY_SIDE ||
308
+ pipe->plane_state->stereo_format == PLANE_STEREO_FORMAT_TOP_AND_BOTTOM) {
309
+ /* reset the split group if it was already considered split. */
310
+ input->src.hsplit_grp = pipe->pipe_idx;
311
+ } else if (pipe->top_pipe != NULL && pipe->top_pipe->plane_state == pipe->plane_state) {
249312 input->src.is_hsplit = true;
250
- else if (pipe->bottom_pipe != NULL && pipe->bottom_pipe->plane_state == pipe->plane_state)
313
+ } else if (pipe->bottom_pipe != NULL && pipe->bottom_pipe->plane_state == pipe->plane_state) {
251314 input->src.is_hsplit = true;
315
+ }
252316
253317 if (pipe->plane_res.dpp->ctx->dc->debug.optimized_watermark) {
254318 /*
....@@ -269,7 +333,7 @@
269333 dcc_support_pixel_format(pipe->plane_state->format, &bpe) ? 1 : 0;
270334 }
271335 input->src.dcc_rate = 1;
272
- input->src.meta_pitch = pipe->plane_state->dcc.grph.meta_pitch;
336
+ input->src.meta_pitch = pipe->plane_state->dcc.meta_pitch;
273337 input->src.source_scan = dm_horz;
274338 input->src.sw_mode = pipe->plane_state->tiling_info.gfx9.swizzle;
275339
....@@ -280,53 +344,7 @@
280344 input->src.cur0_src_width = 128; /* TODO: Cursor calcs, not curently stored */
281345 input->src.cur0_bpp = 32;
282346
283
- switch (pipe->plane_state->tiling_info.gfx9.swizzle) {
284
- /* for 4/8/16 high tiles */
285
- case DC_SW_LINEAR:
286
- input->src.is_display_sw = 1;
287
- input->src.macro_tile_size = dm_4k_tile;
288
- break;
289
- case DC_SW_4KB_S:
290
- case DC_SW_4KB_S_X:
291
- input->src.is_display_sw = 0;
292
- input->src.macro_tile_size = dm_4k_tile;
293
- break;
294
- case DC_SW_64KB_S:
295
- case DC_SW_64KB_S_X:
296
- case DC_SW_64KB_S_T:
297
- input->src.is_display_sw = 0;
298
- input->src.macro_tile_size = dm_64k_tile;
299
- break;
300
- case DC_SW_VAR_S:
301
- case DC_SW_VAR_S_X:
302
- input->src.is_display_sw = 0;
303
- input->src.macro_tile_size = dm_256k_tile;
304
- break;
305
-
306
- /* For 64bpp 2 high tiles */
307
- case DC_SW_4KB_D:
308
- case DC_SW_4KB_D_X:
309
- input->src.is_display_sw = 1;
310
- input->src.macro_tile_size = dm_4k_tile;
311
- break;
312
- case DC_SW_64KB_D:
313
- case DC_SW_64KB_D_X:
314
- case DC_SW_64KB_D_T:
315
- input->src.is_display_sw = 1;
316
- input->src.macro_tile_size = dm_64k_tile;
317
- break;
318
- case DC_SW_VAR_D:
319
- case DC_SW_VAR_D_X:
320
- input->src.is_display_sw = 1;
321
- input->src.macro_tile_size = dm_256k_tile;
322
- break;
323
-
324
- /* Unsupported swizzle modes for dcn */
325
- case DC_SW_256B_S:
326
- default:
327
- ASSERT(0); /* Not supported */
328
- break;
329
- }
347
+ input->src.macro_tile_size = swizzle_mode_to_macro_tile_size(pipe->plane_state->tiling_info.gfx9.swizzle);
330348
331349 switch (pipe->plane_state->rotation) {
332350 case ROTATION_ANGLE_0:
....@@ -363,6 +381,13 @@
363381 input->src.viewport_width_c = input->src.viewport_width;
364382 input->src.viewport_height_c = input->src.viewport_height;
365383 break;
384
+#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
385
+ case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
386
+ input->src.source_format = dm_rgbe_alpha;
387
+ input->src.viewport_width_c = input->src.viewport_width;
388
+ input->src.viewport_height_c = input->src.viewport_height;
389
+ break;
390
+#endif
366391 default:
367392 input->src.source_format = dm_444_32;
368393 input->src.viewport_width_c = input->src.viewport_width;
....@@ -416,7 +441,7 @@
416441 - pipe->stream->timing.v_addressable
417442 - pipe->stream->timing.v_border_bottom
418443 - pipe->stream->timing.v_border_top;
419
- input->dest.pixel_rate_mhz = pipe->stream->timing.pix_clk_khz/1000.0;
444
+ input->dest.pixel_rate_mhz = pipe->stream->timing.pix_clk_100hz/10000.0;
420445 input->dest.vstartup_start = pipe->pipe_dlg_param.vstartup_start;
421446 input->dest.vupdate_offset = pipe->pipe_dlg_param.vupdate_offset;
422447 input->dest.vupdate_offset = pipe->pipe_dlg_param.vupdate_offset;
....@@ -466,7 +491,7 @@
466491 input.clks_cfg.dcfclk_mhz = v->dcfclk;
467492 input.clks_cfg.dispclk_mhz = v->dispclk;
468493 input.clks_cfg.dppclk_mhz = v->dppclk;
469
- input.clks_cfg.refclk_mhz = dc->res_pool->ref_clock_inKhz / 1000.0;
494
+ input.clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
470495 input.clks_cfg.socclk_mhz = v->socclk;
471496 input.clks_cfg.voltage = v->voltage_level;
472497 // dc->dml.logger = pool->base.logger;
....@@ -536,28 +561,28 @@
536561 v->fabric_and_dram_bandwidth = v->fabric_and_dram_bandwidth_vnom0p8;
537562 dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v);
538563
539
- context->bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns =
564
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns =
540565 v->stutter_exit_watermark * 1000;
541
- context->bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns =
566
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns =
542567 v->stutter_enter_plus_exit_watermark * 1000;
543
- context->bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns =
568
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns =
544569 v->dram_clock_change_watermark * 1000;
545
- context->bw.dcn.watermarks.b.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
546
- context->bw.dcn.watermarks.b.urgent_ns = v->urgent_watermark * 1000;
570
+ context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
571
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = v->urgent_watermark * 1000;
547572
548573 v->dcfclk_per_state[1] = v->dcfclkv_nom0p8;
549574 v->dcfclk_per_state[0] = v->dcfclkv_nom0p8;
550575 v->dcfclk = v->dcfclkv_nom0p8;
551576 dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v);
552577
553
- context->bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns =
578
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns =
554579 v->stutter_exit_watermark * 1000;
555
- context->bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns =
580
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns =
556581 v->stutter_enter_plus_exit_watermark * 1000;
557
- context->bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns =
582
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns =
558583 v->dram_clock_change_watermark * 1000;
559
- context->bw.dcn.watermarks.c.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
560
- context->bw.dcn.watermarks.c.urgent_ns = v->urgent_watermark * 1000;
584
+ context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
585
+ context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = v->urgent_watermark * 1000;
561586 }
562587
563588 if (v->voltage_level < 3) {
....@@ -571,14 +596,14 @@
571596 v->dcfclk = v->dcfclkv_max0p9;
572597 dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v);
573598
574
- context->bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns =
599
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns =
575600 v->stutter_exit_watermark * 1000;
576
- context->bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns =
601
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns =
577602 v->stutter_enter_plus_exit_watermark * 1000;
578
- context->bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns =
603
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns =
579604 v->dram_clock_change_watermark * 1000;
580
- context->bw.dcn.watermarks.d.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
581
- context->bw.dcn.watermarks.d.urgent_ns = v->urgent_watermark * 1000;
605
+ context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
606
+ context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = v->urgent_watermark * 1000;
582607 }
583608
584609 v->fabric_and_dram_bandwidth_per_state[2] = v->fabric_and_dram_bandwidth_vnom0p8;
....@@ -591,20 +616,20 @@
591616 v->dcfclk = v->dcfclk_per_state[v->voltage_level];
592617 dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v);
593618
594
- context->bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns =
619
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns =
595620 v->stutter_exit_watermark * 1000;
596
- context->bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns =
621
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns =
597622 v->stutter_enter_plus_exit_watermark * 1000;
598
- context->bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns =
623
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns =
599624 v->dram_clock_change_watermark * 1000;
600
- context->bw.dcn.watermarks.a.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
601
- context->bw.dcn.watermarks.a.urgent_ns = v->urgent_watermark * 1000;
625
+ context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
626
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = v->urgent_watermark * 1000;
602627 if (v->voltage_level >= 2) {
603
- context->bw.dcn.watermarks.b = context->bw.dcn.watermarks.a;
604
- context->bw.dcn.watermarks.c = context->bw.dcn.watermarks.a;
628
+ context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a;
629
+ context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a;
605630 }
606631 if (v->voltage_level >= 3)
607
- context->bw.dcn.watermarks.d = context->bw.dcn.watermarks.a;
632
+ context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
608633 }
609634 #endif
610635
....@@ -612,7 +637,7 @@
612637 {
613638 bool updated = false;
614639
615
- kernel_fpu_begin();
640
+ DC_FP_START();
616641 if ((int)(dc->dcn_soc->sr_exit_time * 1000) != dc->debug.sr_exit_time_ns
617642 && dc->debug.sr_exit_time_ns) {
618643 updated = true;
....@@ -648,7 +673,7 @@
648673 dc->dcn_soc->dram_clock_change_latency =
649674 dc->debug.dram_clock_change_latency_ns / 1000.0;
650675 }
651
- kernel_fpu_end();
676
+ DC_FP_END();
652677
653678 return updated;
654679 }
....@@ -663,9 +688,9 @@
663688 }
664689
665690 static void hack_force_pipe_split(struct dcn_bw_internal_vars *v,
666
- unsigned int pixel_rate_khz)
691
+ unsigned int pixel_rate_100hz)
667692 {
668
- float pixel_rate_mhz = pixel_rate_khz / 1000;
693
+ float pixel_rate_mhz = pixel_rate_100hz / 10000;
669694
670695 /*
671696 * force enabling pipe split by lower dpp clock for DPM0 to just
....@@ -679,6 +704,26 @@
679704 struct dc_debug_options *dbg,
680705 struct dc_state *context)
681706 {
707
+ int i;
708
+
709
+ for (i = 0; i < MAX_PIPES; i++) {
710
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
711
+
712
+ /**
713
+ * Workaround for avoiding pipe-split in cases where we'd split
714
+ * planes that are too small, resulting in splits that aren't
715
+ * valid for the scaler.
716
+ */
717
+ if (pipe->plane_state &&
718
+ (pipe->plane_state->dst_rect.width <= 16 ||
719
+ pipe->plane_state->dst_rect.height <= 16 ||
720
+ pipe->plane_state->src_rect.width <= 16 ||
721
+ pipe->plane_state->src_rect.height <= 16)) {
722
+ hack_disable_optional_pipe_split(v);
723
+ return;
724
+ }
725
+ }
726
+
682727 if (dbg->pipe_split_policy == MPC_SPLIT_AVOID)
683728 hack_disable_optional_pipe_split(v);
684729
....@@ -688,26 +733,60 @@
688733
689734 if (context->stream_count == 1 &&
690735 dbg->force_single_disp_pipe_split)
691
- hack_force_pipe_split(v, context->streams[0]->timing.pix_clk_khz);
736
+ hack_force_pipe_split(v, context->streams[0]->timing.pix_clk_100hz);
737
+}
738
+
739
+unsigned int get_highest_allowed_voltage_level(uint32_t hw_internal_rev, uint32_t pci_revision_id)
740
+{
741
+ /* for low power RV2 variants, the highest voltage level we want is 0 */
742
+ if (ASICREV_IS_RAVEN2(hw_internal_rev))
743
+ switch (pci_revision_id) {
744
+ case PRID_DALI_DE:
745
+ case PRID_DALI_DF:
746
+ case PRID_DALI_E3:
747
+ case PRID_DALI_E4:
748
+ case PRID_POLLOCK_94:
749
+ case PRID_POLLOCK_95:
750
+ case PRID_POLLOCK_E9:
751
+ case PRID_POLLOCK_EA:
752
+ case PRID_POLLOCK_EB:
753
+ return 0;
754
+ default:
755
+ break;
756
+ }
757
+
758
+ /* we are ok with all levels */
759
+ return 4;
692760 }
693761
694762 bool dcn_validate_bandwidth(
695763 struct dc *dc,
696
- struct dc_state *context)
764
+ struct dc_state *context,
765
+ bool fast_validate)
697766 {
767
+ /*
768
+ * we want a breakdown of the various stages of validation, which the
769
+ * perf_trace macro doesn't support
770
+ */
771
+ BW_VAL_TRACE_SETUP();
772
+
698773 const struct resource_pool *pool = dc->res_pool;
699774 struct dcn_bw_internal_vars *v = &context->dcn_bw_vars;
700
- int i, input_idx;
775
+ int i, input_idx, k;
701776 int vesa_sync_start, asic_blank_end, asic_blank_start;
702777 bool bw_limit_pass;
703778 float bw_limit;
704779
705780 PERFORMANCE_TRACE_START();
781
+
782
+ BW_VAL_TRACE_COUNT();
783
+
706784 if (dcn_bw_apply_registry_override(dc))
707785 dcn_bw_sync_calcs_and_dml(dc);
708786
709787 memset(v, 0, sizeof(*v));
710
- kernel_fpu_begin();
788
+ DC_FP_START();
789
+
711790 v->sr_exit_time = dc->dcn_soc->sr_exit_time;
712791 v->sr_enter_plus_exit_time = dc->dcn_soc->sr_enter_plus_exit_time;
713792 v->urgent_latency = dc->dcn_soc->urgent_latency;
....@@ -845,8 +924,9 @@
845924 v->v_sync_plus_back_porch[input_idx] = pipe->stream->timing.v_total
846925 - v->vactive[input_idx]
847926 - pipe->stream->timing.v_front_porch;
848
- v->pixel_clock[input_idx] = pipe->stream->timing.pix_clk_khz / 1000.0f;
849
-
927
+ v->pixel_clock[input_idx] = pipe->stream->timing.pix_clk_100hz/10000.0;
928
+ if (pipe->stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
929
+ v->pixel_clock[input_idx] *= 2;
850930 if (!pipe->plane_state) {
851931 v->dcc_enable[input_idx] = dcn_bw_yes;
852932 v->source_pixel_format[input_idx] = dcn_bw_rgb_sub_32;
....@@ -854,8 +934,19 @@
854934 v->lb_bit_per_pixel[input_idx] = 30;
855935 v->viewport_width[input_idx] = pipe->stream->timing.h_addressable;
856936 v->viewport_height[input_idx] = pipe->stream->timing.v_addressable;
857
- v->scaler_rec_out_width[input_idx] = pipe->stream->timing.h_addressable;
858
- v->scaler_recout_height[input_idx] = pipe->stream->timing.v_addressable;
937
+ /*
938
+ * for cases where we have no plane, we want to validate up to 1080p
939
+ * source size because here we are only interested in if the output
940
+ * timing is supported or not. if we cannot support native resolution
941
+ * of the high res display, we still want to support lower res up scale
942
+ * to native
943
+ */
944
+ if (v->viewport_width[input_idx] > 1920)
945
+ v->viewport_width[input_idx] = 1920;
946
+ if (v->viewport_height[input_idx] > 1080)
947
+ v->viewport_height[input_idx] = 1080;
948
+ v->scaler_rec_out_width[input_idx] = v->viewport_width[input_idx];
949
+ v->scaler_recout_height[input_idx] = v->viewport_height[input_idx];
859950 v->override_hta_ps[input_idx] = 1;
860951 v->override_vta_ps[input_idx] = 1;
861952 v->override_hta_pschroma[input_idx] = 1;
....@@ -953,7 +1044,7 @@
9531044 v->dcc_rate[input_idx] = 1; /*TODO: Worst case? does this change?*/
9541045 v->output_format[input_idx] = pipe->stream->timing.pixel_encoding ==
9551046 PIXEL_ENCODING_YCBCR420 ? dcn_bw_420 : dcn_bw_444;
956
- v->output[input_idx] = pipe->stream->sink->sink_signal ==
1047
+ v->output[input_idx] = pipe->stream->signal ==
9571048 SIGNAL_TYPE_HDMI_TYPE_A ? dcn_bw_hdmi : dcn_bw_dp;
9581049 v->output_deep_color[input_idx] = dcn_bw_encoder_8bpc;
9591050 if (v->output[input_idx] == dcn_bw_hdmi) {
....@@ -999,13 +1090,53 @@
9991090 dc->debug.sr_enter_plus_exit_time_dpm0_ns / 1000.0f;
10001091 if (dc->debug.sr_exit_time_dpm0_ns)
10011092 v->sr_exit_time = dc->debug.sr_exit_time_dpm0_ns / 1000.0f;
1002
- dc->dml.soc.sr_enter_plus_exit_time_us = v->sr_enter_plus_exit_time;
1003
- dc->dml.soc.sr_exit_time_us = v->sr_exit_time;
1093
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = v->sr_enter_plus_exit_time;
1094
+ context->bw_ctx.dml.soc.sr_exit_time_us = v->sr_exit_time;
10041095 mode_support_and_system_configuration(v);
10051096 }
10061097
1007
- if (v->voltage_level != 5) {
1098
+ display_pipe_configuration(v);
1099
+
1100
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
1101
+ if (v->source_scan[k] == dcn_bw_hor)
1102
+ v->swath_width_y[k] = v->viewport_width[k] / v->dpp_per_plane[k];
1103
+ else
1104
+ v->swath_width_y[k] = v->viewport_height[k] / v->dpp_per_plane[k];
1105
+ }
1106
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
1107
+ if (v->source_pixel_format[k] == dcn_bw_rgb_sub_64) {
1108
+ v->byte_per_pixel_dety[k] = 8.0;
1109
+ v->byte_per_pixel_detc[k] = 0.0;
1110
+ } else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_32) {
1111
+ v->byte_per_pixel_dety[k] = 4.0;
1112
+ v->byte_per_pixel_detc[k] = 0.0;
1113
+ } else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_16) {
1114
+ v->byte_per_pixel_dety[k] = 2.0;
1115
+ v->byte_per_pixel_detc[k] = 0.0;
1116
+ } else if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_8) {
1117
+ v->byte_per_pixel_dety[k] = 1.0;
1118
+ v->byte_per_pixel_detc[k] = 2.0;
1119
+ } else {
1120
+ v->byte_per_pixel_dety[k] = 4.0f / 3.0f;
1121
+ v->byte_per_pixel_detc[k] = 8.0f / 3.0f;
1122
+ }
1123
+ }
1124
+
1125
+ v->total_data_read_bandwidth = 0.0;
1126
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
1127
+ v->read_bandwidth_plane_luma[k] = v->swath_width_y[k] * v->dpp_per_plane[k] *
1128
+ dcn_bw_ceil2(v->byte_per_pixel_dety[k], 1.0) / (v->htotal[k] / v->pixel_clock[k]) * v->v_ratio[k];
1129
+ v->read_bandwidth_plane_chroma[k] = v->swath_width_y[k] / 2.0 * v->dpp_per_plane[k] *
1130
+ dcn_bw_ceil2(v->byte_per_pixel_detc[k], 2.0) / (v->htotal[k] / v->pixel_clock[k]) * v->v_ratio[k] / 2.0;
1131
+ v->total_data_read_bandwidth = v->total_data_read_bandwidth +
1132
+ v->read_bandwidth_plane_luma[k] + v->read_bandwidth_plane_chroma[k];
1133
+ }
1134
+
1135
+ BW_VAL_TRACE_END_VOLTAGE_LEVEL();
1136
+
1137
+ if (v->voltage_level != number_of_states_plus_one && !fast_validate) {
10081138 float bw_consumed = v->total_bandwidth_consumed_gbyte_per_second;
1139
+
10091140 if (bw_consumed < v->fabric_and_dram_bandwidth_vmin0p65)
10101141 bw_consumed = v->fabric_and_dram_bandwidth_vmin0p65;
10111142 else if (bw_consumed < v->fabric_and_dram_bandwidth_vmid0p72)
....@@ -1026,57 +1157,59 @@
10261157 */
10271158 dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v);
10281159
1029
- context->bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns =
1160
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns =
10301161 v->stutter_exit_watermark * 1000;
1031
- context->bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns =
1162
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns =
10321163 v->stutter_enter_plus_exit_watermark * 1000;
1033
- context->bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns =
1164
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns =
10341165 v->dram_clock_change_watermark * 1000;
1035
- context->bw.dcn.watermarks.a.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
1036
- context->bw.dcn.watermarks.a.urgent_ns = v->urgent_watermark * 1000;
1037
- context->bw.dcn.watermarks.b = context->bw.dcn.watermarks.a;
1038
- context->bw.dcn.watermarks.c = context->bw.dcn.watermarks.a;
1039
- context->bw.dcn.watermarks.d = context->bw.dcn.watermarks.a;
1166
+ context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
1167
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = v->urgent_watermark * 1000;
1168
+ context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a;
1169
+ context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a;
1170
+ context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
10401171
1041
- context->bw.dcn.clk.fclk_khz = (int)(bw_consumed * 1000000 /
1172
+ context->bw_ctx.bw.dcn.clk.fclk_khz = (int)(bw_consumed * 1000000 /
10421173 (ddr4_dram_factor_single_Channel * v->number_of_channels));
1043
- if (bw_consumed == v->fabric_and_dram_bandwidth_vmin0p65) {
1044
- context->bw.dcn.clk.fclk_khz = (int)(bw_consumed * 1000000 / 32);
1045
- }
1174
+ if (bw_consumed == v->fabric_and_dram_bandwidth_vmin0p65)
1175
+ context->bw_ctx.bw.dcn.clk.fclk_khz = (int)(bw_consumed * 1000000 / 32);
10461176
1047
- context->bw.dcn.clk.dcfclk_deep_sleep_khz = (int)(v->dcf_clk_deep_sleep * 1000);
1048
- context->bw.dcn.clk.dcfclk_khz = (int)(v->dcfclk * 1000);
1177
+ context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = (int)(v->dcf_clk_deep_sleep * 1000);
1178
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = (int)(v->dcfclk * 1000);
10491179
1050
- context->bw.dcn.clk.dispclk_khz = (int)(v->dispclk * 1000);
1180
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = (int)(v->dispclk * 1000);
10511181 if (dc->debug.max_disp_clk == true)
1052
- context->bw.dcn.clk.dispclk_khz = (int)(dc->dcn_soc->max_dispclk_vmax0p9 * 1000);
1182
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = (int)(dc->dcn_soc->max_dispclk_vmax0p9 * 1000);
10531183
1054
- if (context->bw.dcn.clk.dispclk_khz <
1184
+ if (context->bw_ctx.bw.dcn.clk.dispclk_khz <
10551185 dc->debug.min_disp_clk_khz) {
1056
- context->bw.dcn.clk.dispclk_khz =
1186
+ context->bw_ctx.bw.dcn.clk.dispclk_khz =
10571187 dc->debug.min_disp_clk_khz;
10581188 }
10591189
1060
- context->bw.dcn.clk.dppclk_khz = context->bw.dcn.clk.dispclk_khz / v->dispclk_dppclk_ratio;
1061
- context->bw.dcn.clk.phyclk_khz = v->phyclk_per_state[v->voltage_level];
1190
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz /
1191
+ v->dispclk_dppclk_ratio;
1192
+ context->bw_ctx.bw.dcn.clk.phyclk_khz = v->phyclk_per_state[v->voltage_level];
10621193 switch (v->voltage_level) {
10631194 case 0:
1064
- context->bw.dcn.clk.max_supported_dppclk_khz =
1195
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz =
10651196 (int)(dc->dcn_soc->max_dppclk_vmin0p65 * 1000);
10661197 break;
10671198 case 1:
1068
- context->bw.dcn.clk.max_supported_dppclk_khz =
1199
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz =
10691200 (int)(dc->dcn_soc->max_dppclk_vmid0p72 * 1000);
10701201 break;
10711202 case 2:
1072
- context->bw.dcn.clk.max_supported_dppclk_khz =
1203
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz =
10731204 (int)(dc->dcn_soc->max_dppclk_vnom0p8 * 1000);
10741205 break;
10751206 default:
1076
- context->bw.dcn.clk.max_supported_dppclk_khz =
1207
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz =
10771208 (int)(dc->dcn_soc->max_dppclk_vmax0p9 * 1000);
10781209 break;
10791210 }
1211
+
1212
+ BW_VAL_TRACE_END_WATERMARKS();
10801213
10811214 for (i = 0, input_idx = 0; i < pool->pipe_count; i++) {
10821215 struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
....@@ -1088,9 +1221,9 @@
10881221 if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state)
10891222 continue;
10901223
1091
- pipe->pipe_dlg_param.vupdate_width = v->v_update_width[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0];
1092
- pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0];
1093
- pipe->pipe_dlg_param.vready_offset = v->v_ready_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0];
1224
+ pipe->pipe_dlg_param.vupdate_width = v->v_update_width_pix[input_idx];
1225
+ pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset_pix[input_idx];
1226
+ pipe->pipe_dlg_param.vready_offset = v->v_ready_offset_pix[input_idx];
10941227 pipe->pipe_dlg_param.vstartup_start = v->v_startup[input_idx];
10951228
10961229 pipe->pipe_dlg_param.htotal = pipe->stream->timing.h_total;
....@@ -1129,9 +1262,9 @@
11291262 TIMING_3D_FORMAT_SIDE_BY_SIDE))) {
11301263 if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) {
11311264 /* update previously split pipe */
1132
- hsplit_pipe->pipe_dlg_param.vupdate_width = v->v_update_width[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0];
1133
- hsplit_pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0];
1134
- hsplit_pipe->pipe_dlg_param.vready_offset = v->v_ready_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0];
1265
+ hsplit_pipe->pipe_dlg_param.vupdate_width = v->v_update_width_pix[input_idx];
1266
+ hsplit_pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset_pix[input_idx];
1267
+ hsplit_pipe->pipe_dlg_param.vready_offset = v->v_ready_offset_pix[input_idx];
11351268 hsplit_pipe->pipe_dlg_param.vstartup_start = v->v_startup[input_idx];
11361269
11371270 hsplit_pipe->pipe_dlg_param.htotal = pipe->stream->timing.h_total;
....@@ -1140,11 +1273,9 @@
11401273 hsplit_pipe->pipe_dlg_param.vblank_end = pipe->pipe_dlg_param.vblank_end;
11411274 } else {
11421275 /* pipe not split previously needs split */
1143
- hsplit_pipe = find_idle_secondary_pipe(&context->res_ctx, pool);
1276
+ hsplit_pipe = find_idle_secondary_pipe(&context->res_ctx, pool, pipe);
11441277 ASSERT(hsplit_pipe);
1145
- split_stream_across_pipes(
1146
- &context->res_ctx, pool,
1147
- pipe, hsplit_pipe);
1278
+ split_stream_across_pipes(&context->res_ctx, pool, pipe, hsplit_pipe);
11481279 }
11491280
11501281 dcn_bw_calc_rq_dlg_ttu(dc, v, hsplit_pipe, input_idx);
....@@ -1168,13 +1299,16 @@
11681299
11691300 input_idx++;
11701301 }
1302
+ } else if (v->voltage_level == number_of_states_plus_one) {
1303
+ BW_VAL_TRACE_SKIP(fail);
1304
+ } else if (fast_validate) {
1305
+ BW_VAL_TRACE_SKIP(fast);
11711306 }
11721307
11731308 if (v->voltage_level == 0) {
1174
-
1175
- dc->dml.soc.sr_enter_plus_exit_time_us =
1309
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us =
11761310 dc->dcn_soc->sr_enter_plus_exit_time;
1177
- dc->dml.soc.sr_exit_time_us = dc->dcn_soc->sr_exit_time;
1311
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->dcn_soc->sr_exit_time;
11781312 }
11791313
11801314 /*
....@@ -1184,11 +1318,14 @@
11841318 bw_limit = dc->dcn_soc->percent_disp_bw_limit * v->fabric_and_dram_bandwidth_vmax0p9;
11851319 bw_limit_pass = (v->total_data_read_bandwidth / 1000.0) < bw_limit;
11861320
1187
- kernel_fpu_end();
1321
+ DC_FP_END();
11881322
11891323 PERFORMANCE_TRACE_END();
1324
+ BW_VAL_TRACE_FINISH();
11901325
1191
- if (bw_limit_pass && v->voltage_level != 5)
1326
+ if (bw_limit_pass && v->voltage_level <= get_highest_allowed_voltage_level(
1327
+ dc->ctx->asic_id.hw_internal_rev,
1328
+ dc->ctx->asic_id.pci_revision_id))
11921329 return true;
11931330 else
11941331 return false;
....@@ -1352,7 +1489,7 @@
13521489 res = dm_pp_get_clock_levels_by_type_with_voltage(
13531490 ctx, DM_PP_CLOCK_TYPE_FCLK, &fclks);
13541491
1355
- kernel_fpu_begin();
1492
+ DC_FP_START();
13561493
13571494 if (res)
13581495 res = verify_clock_values(&fclks);
....@@ -1383,12 +1520,12 @@
13831520 } else
13841521 BREAK_TO_DEBUGGER();
13851522
1386
- kernel_fpu_end();
1523
+ DC_FP_END();
13871524
13881525 res = dm_pp_get_clock_levels_by_type_with_voltage(
13891526 ctx, DM_PP_CLOCK_TYPE_DCFCLK, &dcfclks);
13901527
1391
- kernel_fpu_begin();
1528
+ DC_FP_START();
13921529
13931530 if (res)
13941531 res = verify_clock_values(&dcfclks);
....@@ -1401,24 +1538,26 @@
14011538 } else
14021539 BREAK_TO_DEBUGGER();
14031540
1404
- kernel_fpu_end();
1541
+ DC_FP_END();
14051542 }
14061543
14071544 void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc)
14081545 {
1409
- struct pp_smu_funcs_rv *pp = dc->res_pool->pp_smu;
1546
+ struct pp_smu_funcs_rv *pp = NULL;
14101547 struct pp_smu_wm_range_sets ranges = {0};
14111548 int min_fclk_khz, min_dcfclk_khz, socclk_khz;
14121549 const int overdrive = 5000000; /* 5 GHz to cover Overdrive */
14131550
1414
- if (!pp->set_wm_ranges)
1551
+ if (dc->res_pool->pp_smu)
1552
+ pp = &dc->res_pool->pp_smu->rv_funcs;
1553
+ if (!pp || !pp->set_wm_ranges)
14151554 return;
14161555
1417
- kernel_fpu_begin();
1556
+ DC_FP_START();
14181557 min_fclk_khz = dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 * 1000000 / 32;
14191558 min_dcfclk_khz = dc->dcn_soc->dcfclkv_min0p65 * 1000;
14201559 socclk_khz = dc->dcn_soc->socclk * 1000;
1421
- kernel_fpu_end();
1560
+ DC_FP_END();
14221561
14231562 /* Now notify PPLib/SMU about which Watermarks sets they should select
14241563 * depending on DPM state they are in. And update BW MGR GFX Engine and
....@@ -1431,27 +1570,27 @@
14311570 ranges.num_reader_wm_sets = WM_SET_COUNT;
14321571 ranges.num_writer_wm_sets = WM_SET_COUNT;
14331572 ranges.reader_wm_sets[0].wm_inst = WM_A;
1434
- ranges.reader_wm_sets[0].min_drain_clk_khz = min_dcfclk_khz;
1435
- ranges.reader_wm_sets[0].max_drain_clk_khz = overdrive;
1436
- ranges.reader_wm_sets[0].min_fill_clk_khz = min_fclk_khz;
1437
- ranges.reader_wm_sets[0].max_fill_clk_khz = overdrive;
1573
+ ranges.reader_wm_sets[0].min_drain_clk_mhz = min_dcfclk_khz / 1000;
1574
+ ranges.reader_wm_sets[0].max_drain_clk_mhz = overdrive / 1000;
1575
+ ranges.reader_wm_sets[0].min_fill_clk_mhz = min_fclk_khz / 1000;
1576
+ ranges.reader_wm_sets[0].max_fill_clk_mhz = overdrive / 1000;
14381577 ranges.writer_wm_sets[0].wm_inst = WM_A;
1439
- ranges.writer_wm_sets[0].min_fill_clk_khz = socclk_khz;
1440
- ranges.writer_wm_sets[0].max_fill_clk_khz = overdrive;
1441
- ranges.writer_wm_sets[0].min_drain_clk_khz = min_fclk_khz;
1442
- ranges.writer_wm_sets[0].max_drain_clk_khz = overdrive;
1578
+ ranges.writer_wm_sets[0].min_fill_clk_mhz = socclk_khz / 1000;
1579
+ ranges.writer_wm_sets[0].max_fill_clk_mhz = overdrive / 1000;
1580
+ ranges.writer_wm_sets[0].min_drain_clk_mhz = min_fclk_khz / 1000;
1581
+ ranges.writer_wm_sets[0].max_drain_clk_mhz = overdrive / 1000;
14431582
14441583 if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) {
14451584 ranges.reader_wm_sets[0].wm_inst = WM_A;
1446
- ranges.reader_wm_sets[0].min_drain_clk_khz = 300000;
1447
- ranges.reader_wm_sets[0].max_drain_clk_khz = 5000000;
1448
- ranges.reader_wm_sets[0].min_fill_clk_khz = 800000;
1449
- ranges.reader_wm_sets[0].max_fill_clk_khz = 5000000;
1585
+ ranges.reader_wm_sets[0].min_drain_clk_mhz = 300;
1586
+ ranges.reader_wm_sets[0].max_drain_clk_mhz = 5000;
1587
+ ranges.reader_wm_sets[0].min_fill_clk_mhz = 800;
1588
+ ranges.reader_wm_sets[0].max_fill_clk_mhz = 5000;
14501589 ranges.writer_wm_sets[0].wm_inst = WM_A;
1451
- ranges.writer_wm_sets[0].min_fill_clk_khz = 200000;
1452
- ranges.writer_wm_sets[0].max_fill_clk_khz = 5000000;
1453
- ranges.writer_wm_sets[0].min_drain_clk_khz = 800000;
1454
- ranges.writer_wm_sets[0].max_drain_clk_khz = 5000000;
1590
+ ranges.writer_wm_sets[0].min_fill_clk_mhz = 200;
1591
+ ranges.writer_wm_sets[0].max_fill_clk_mhz = 5000;
1592
+ ranges.writer_wm_sets[0].min_drain_clk_mhz = 800;
1593
+ ranges.writer_wm_sets[0].max_drain_clk_mhz = 5000;
14551594 }
14561595
14571596 ranges.reader_wm_sets[1] = ranges.writer_wm_sets[0];
....@@ -1469,7 +1608,7 @@
14691608
14701609 void dcn_bw_sync_calcs_and_dml(struct dc *dc)
14711610 {
1472
- kernel_fpu_begin();
1611
+ DC_FP_START();
14731612 DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %f ns\n"
14741613 "sr_enter_plus_exit_time: %f ns\n"
14751614 "urgent_latency: %f ns\n"
....@@ -1658,5 +1797,5 @@
16581797 dc->dml.ip.bug_forcing_LC_req_same_size_fixed =
16591798 dc->dcn_ip->bug_forcing_luma_and_chroma_request_to_same_size_fixed == dcn_bw_yes;
16601799 dc->dml.ip.dcfclk_cstate_latency = dc->dcn_ip->dcfclk_cstate_latency;
1661
- kernel_fpu_end();
1800
+ DC_FP_END();
16621801 }