hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
....@@ -22,9 +22,11 @@
2222 * OTHER DEALINGS IN THE SOFTWARE.
2323 */
2424
25
-#include <drm/drmP.h>
2625 #include <drm/amdgpu_drm.h>
26
+#include <drm/drm_drv.h>
2727 #include <drm/drm_gem.h>
28
+#include <drm/drm_vblank.h>
29
+#include <drm/drm_managed.h>
2830 #include "amdgpu_drv.h"
2931
3032 #include <drm/drm_pciids.h>
....@@ -32,12 +34,16 @@
3234 #include <linux/module.h>
3335 #include <linux/pm_runtime.h>
3436 #include <linux/vga_switcheroo.h>
35
-#include <drm/drm_crtc_helper.h>
37
+#include <drm/drm_probe_helper.h>
38
+#include <linux/mmu_notifier.h>
3639
3740 #include "amdgpu.h"
3841 #include "amdgpu_irq.h"
42
+#include "amdgpu_dma_buf.h"
3943
4044 #include "amdgpu_amdkfd.h"
45
+
46
+#include "amdgpu_ras.h"
4147
4248 /*
4349 * KMS wrapper.
....@@ -70,9 +76,22 @@
7076 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
7177 * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
7278 * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
79
+ * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
80
+ * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
81
+ * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE.
82
+ * - 3.31.0 - Add support for per-flip tiling attribute changes with DC
83
+ * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS.
84
+ * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS.
85
+ * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches
86
+ * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask
87
+ * - 3.36.0 - Allow reading more status registers on si/cik
88
+ * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
89
+ * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
90
+ * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
91
+ * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ
7392 */
7493 #define KMS_DRIVER_MAJOR 3
75
-#define KMS_DRIVER_MINOR 27
94
+#define KMS_DRIVER_MINOR 40
7695 #define KMS_DRIVER_PATCHLEVEL 0
7796
7897 int amdgpu_vram_limit = 0;
....@@ -87,7 +106,7 @@
87106 int amdgpu_hw_i2c = 0;
88107 int amdgpu_pcie_gen2 = -1;
89108 int amdgpu_msi = -1;
90
-int amdgpu_lockup_timeout = 10000;
109
+char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
91110 int amdgpu_dpm = -1;
92111 int amdgpu_fw_load_type = -1;
93112 int amdgpu_aspm = -1;
....@@ -100,7 +119,6 @@
100119 int amdgpu_vm_block_size = -1;
101120 int amdgpu_vm_fault_stop = 0;
102121 int amdgpu_vm_debug = 0;
103
-int amdgpu_vram_page_split = 512;
104122 int amdgpu_vm_update_mode = -1;
105123 int amdgpu_exp_hw_support = 0;
106124 int amdgpu_dc = -1;
....@@ -113,19 +131,34 @@
113131 uint amdgpu_sdma_phase_quantum = 32;
114132 char *amdgpu_disable_cu = NULL;
115133 char *amdgpu_virtual_display = NULL;
116
-/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/
117
-uint amdgpu_pp_feature_mask = 0xfffd3fff;
118
-int amdgpu_ngg = 0;
119
-int amdgpu_prim_buf_per_se = 0;
120
-int amdgpu_pos_buf_per_se = 0;
121
-int amdgpu_cntl_sb_buf_per_se = 0;
122
-int amdgpu_param_buf_per_se = 0;
134
+/* OverDrive(bit 14) disabled by default*/
135
+uint amdgpu_pp_feature_mask = 0xffffbfff;
136
+uint amdgpu_force_long_training = 0;
123137 int amdgpu_job_hang_limit = 0;
124138 int amdgpu_lbpw = -1;
125139 int amdgpu_compute_multipipe = -1;
126140 int amdgpu_gpu_recovery = -1; /* auto */
127141 int amdgpu_emu_mode = 0;
128142 uint amdgpu_smu_memory_pool_size = 0;
143
+/* FBC (bit 0) disabled by default*/
144
+uint amdgpu_dc_feature_mask = 0;
145
+uint amdgpu_dc_debug_mask = 0;
146
+int amdgpu_async_gfx_ring = 1;
147
+int amdgpu_mcbp = 0;
148
+int amdgpu_discovery = -1;
149
+int amdgpu_mes = 0;
150
+int amdgpu_noretry = -1;
151
+int amdgpu_force_asic_type = -1;
152
+int amdgpu_tmz = 0;
153
+int amdgpu_reset_method = -1; /* auto */
154
+int amdgpu_num_kcq = -1;
155
+
156
+struct amdgpu_mgpu_info mgpu_info = {
157
+ .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
158
+};
159
+int amdgpu_ras_enable = -1;
160
+uint amdgpu_ras_mask = 0xffffffff;
161
+int amdgpu_bad_page_threshold = -1;
129162
130163 /**
131164 * DOC: vramlimit (int)
....@@ -213,16 +246,33 @@
213246 module_param_named(msi, amdgpu_msi, int, 0444);
214247
215248 /**
216
- * DOC: lockup_timeout (int)
217
- * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000.
218
- * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000.
249
+ * DOC: lockup_timeout (string)
250
+ * Set GPU scheduler timeout value in ms.
251
+ *
252
+ * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
253
+ * multiple values specified. 0 and negative values are invalidated. They will be adjusted
254
+ * to the default timeout.
255
+ *
256
+ * - With one value specified, the setting will apply to all non-compute jobs.
257
+ * - With multiple values specified, the first one will be for GFX.
258
+ * The second one is for Compute. The third and fourth ones are
259
+ * for SDMA and Video.
260
+ *
261
+ * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
262
+ * jobs is 10000. And there is no timeout enforced on compute jobs.
219263 */
220
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)");
221
-module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);
264
+MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; "
265
+ "for passthrough or sriov, 10000 for all jobs."
266
+ " 0: keep default value. negative: infinity timeout), "
267
+ "format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
268
+ "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
269
+module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
222270
223271 /**
224272 * DOC: dpm (int)
225
- * Override for dynamic power management setting (1 = enable, 0 = disable). The default is -1 (auto).
273
+ * Override for dynamic power management setting
274
+ * (0 = disable, 1 = enable, 2 = enable sw smu driver for vega20)
275
+ * The default is -1 (auto).
226276 */
227277 MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)");
228278 module_param_named(dpm, amdgpu_dpm, int, 0444);
....@@ -318,13 +368,6 @@
318368 module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
319369
320370 /**
321
- * DOC: vram_page_split (int)
322
- * Override the number of pages after we split VRAM allocations (default 512, -1 = disable). The default is 512.
323
- */
324
-MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 512, -1 = disable)");
325
-module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);
326
-
327
-/**
328371 * DOC: exp_hw_support (int)
329372 * Enable experimental hw support (1 = enable). The default is 0 (disabled).
330373 */
....@@ -353,12 +396,20 @@
353396 module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
354397
355398 /**
356
- * DOC: ppfeaturemask (uint)
399
+ * DOC: ppfeaturemask (hexint)
357400 * Override power features enabled. See enum PP_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
358401 * The default is the current set of stable power features.
359402 */
360403 MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
361
-module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444);
404
+module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, hexint, 0444);
405
+
406
+/**
407
+ * DOC: forcelongtraining (uint)
408
+ * Force long memory training in resume.
409
+ * The default is zero, indicates short training in resume.
410
+ */
411
+MODULE_PARM_DESC(forcelongtraining, "force memory long training");
412
+module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 0444);
362413
363414 /**
364415 * DOC: pcie_gen_cap (uint)
....@@ -418,41 +469,6 @@
418469 module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
419470
420471 /**
421
- * DOC: ngg (int)
422
- * Set to enable Next Generation Graphics (1 = enable). The default is 0 (disabled).
423
- */
424
-MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))");
425
-module_param_named(ngg, amdgpu_ngg, int, 0444);
426
-
427
-/**
428
- * DOC: prim_buf_per_se (int)
429
- * Override the size of Primitive Buffer per Shader Engine in Byte. The default is 0 (depending on gfx).
430
- */
431
-MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)");
432
-module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444);
433
-
434
-/**
435
- * DOC: pos_buf_per_se (int)
436
- * Override the size of Position Buffer per Shader Engine in Byte. The default is 0 (depending on gfx).
437
- */
438
-MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)");
439
-module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444);
440
-
441
-/**
442
- * DOC: cntl_sb_buf_per_se (int)
443
- * Override the size of Control Sideband per Shader Engine in Byte. The default is 0 (depending on gfx).
444
- */
445
-MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)");
446
-module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);
447
-
448
-/**
449
- * DOC: param_buf_per_se (int)
450
- * Override the size of Off-Chip Pramater Cache per Shader Engine in Byte. The default is 0 (depending on gfx).
451
- */
452
-MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)");
453
-module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
454
-
455
-/**
456472 * DOC: job_hang_limit (int)
457473 * Set how much time allow a job hang and not drop it. The default is 0.
458474 */
....@@ -482,6 +498,21 @@
482498 */
483499 MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
484500 module_param_named(emu_mode, amdgpu_emu_mode, int, 0444);
501
+
502
+/**
503
+ * DOC: ras_enable (int)
504
+ * Enable RAS features on the GPU (0 = disable, 1 = enable, -1 = auto (default))
505
+ */
506
+MODULE_PARM_DESC(ras_enable, "Enable RAS features on the GPU (0 = disable, 1 = enable, -1 = auto (default))");
507
+module_param_named(ras_enable, amdgpu_ras_enable, int, 0444);
508
+
509
+/**
510
+ * DOC: ras_mask (uint)
511
+ * Mask of RAS features to enable (default 0xffffffff), only valid when ras_enable == 1
512
+ * See the flags in drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
513
+ */
514
+MODULE_PARM_DESC(ras_mask, "Mask of RAS features to enable (default 0xffffffff), only valid when ras_enable == 1");
515
+module_param_named(ras_mask, amdgpu_ras_mask, uint, 0444);
485516
486517 /**
487518 * DOC: si_support (int)
....@@ -530,6 +561,246 @@
530561 "reserve gtt for smu debug usage, 0 = disable,"
531562 "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
532563 module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
564
+
565
+/**
566
+ * DOC: async_gfx_ring (int)
567
+ * It is used to enable gfx rings that could be configured with different prioritites or equal priorities
568
+ */
569
+MODULE_PARM_DESC(async_gfx_ring,
570
+ "Asynchronous GFX rings that could be configured with either different priorities (HP3D ring and LP3D ring), or equal priorities (0 = disabled, 1 = enabled (default))");
571
+module_param_named(async_gfx_ring, amdgpu_async_gfx_ring, int, 0444);
572
+
573
+/**
574
+ * DOC: mcbp (int)
575
+ * It is used to enable mid command buffer preemption. (0 = disabled (default), 1 = enabled)
576
+ */
577
+MODULE_PARM_DESC(mcbp,
578
+ "Enable Mid-command buffer preemption (0 = disabled (default), 1 = enabled)");
579
+module_param_named(mcbp, amdgpu_mcbp, int, 0444);
580
+
581
+/**
582
+ * DOC: discovery (int)
583
+ * Allow driver to discover hardware IP information from IP Discovery table at the top of VRAM.
584
+ * (-1 = auto (default), 0 = disabled, 1 = enabled)
585
+ */
586
+MODULE_PARM_DESC(discovery,
587
+ "Allow driver to discover hardware IPs from IP Discovery table at the top of VRAM");
588
+module_param_named(discovery, amdgpu_discovery, int, 0444);
589
+
590
+/**
591
+ * DOC: mes (int)
592
+ * Enable Micro Engine Scheduler. This is a new hw scheduling engine for gfx, sdma, and compute.
593
+ * (0 = disabled (default), 1 = enabled)
594
+ */
595
+MODULE_PARM_DESC(mes,
596
+ "Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)");
597
+module_param_named(mes, amdgpu_mes, int, 0444);
598
+
599
+/**
600
+ * DOC: noretry (int)
601
+ * Disable retry faults in the GPU memory controller.
602
+ * (0 = retry enabled, 1 = retry disabled, -1 auto (default))
603
+ */
604
+MODULE_PARM_DESC(noretry,
605
+ "Disable retry faults (0 = retry enabled, 1 = retry disabled, -1 auto (default))");
606
+module_param_named(noretry, amdgpu_noretry, int, 0644);
607
+
608
+/**
609
+ * DOC: force_asic_type (int)
610
+ * A non negative value used to specify the asic type for all supported GPUs.
611
+ */
612
+MODULE_PARM_DESC(force_asic_type,
613
+ "A non negative value used to specify the asic type for all supported GPUs");
614
+module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444);
615
+
616
+
617
+
618
+#ifdef CONFIG_HSA_AMD
619
+/**
620
+ * DOC: sched_policy (int)
621
+ * Set scheduling policy. Default is HWS(hardware scheduling) with over-subscription.
622
+ * Setting 1 disables over-subscription. Setting 2 disables HWS and statically
623
+ * assigns queues to HQDs.
624
+ */
625
+int sched_policy = KFD_SCHED_POLICY_HWS;
626
+module_param(sched_policy, int, 0444);
627
+MODULE_PARM_DESC(sched_policy,
628
+ "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
629
+
630
+/**
631
+ * DOC: hws_max_conc_proc (int)
632
+ * Maximum number of processes that HWS can schedule concurrently. The maximum is the
633
+ * number of VMIDs assigned to the HWS, which is also the default.
634
+ */
635
+int hws_max_conc_proc = -1;
636
+module_param(hws_max_conc_proc, int, 0444);
637
+MODULE_PARM_DESC(hws_max_conc_proc,
638
+ "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
639
+
640
+/**
641
+ * DOC: cwsr_enable (int)
642
+ * CWSR(compute wave store and resume) allows the GPU to preempt shader execution in
643
+ * the middle of a compute wave. Default is 1 to enable this feature. Setting 0
644
+ * disables it.
645
+ */
646
+int cwsr_enable = 1;
647
+module_param(cwsr_enable, int, 0444);
648
+MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
649
+
650
+/**
651
+ * DOC: max_num_of_queues_per_device (int)
652
+ * Maximum number of queues per device. Valid setting is between 1 and 4096. Default
653
+ * is 4096.
654
+ */
655
+int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
656
+module_param(max_num_of_queues_per_device, int, 0444);
657
+MODULE_PARM_DESC(max_num_of_queues_per_device,
658
+ "Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
659
+
660
+/**
661
+ * DOC: send_sigterm (int)
662
+ * Send sigterm to HSA process on unhandled exceptions. Default is not to send sigterm
663
+ * but just print errors on dmesg. Setting 1 enables sending sigterm.
664
+ */
665
+int send_sigterm;
666
+module_param(send_sigterm, int, 0444);
667
+MODULE_PARM_DESC(send_sigterm,
668
+ "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
669
+
670
+/**
671
+ * DOC: debug_largebar (int)
672
+ * Set debug_largebar as 1 to enable simulating large-bar capability on non-large bar
673
+ * system. This limits the VRAM size reported to ROCm applications to the visible
674
+ * size, usually 256MB.
675
+ * Default value is 0, diabled.
676
+ */
677
+int debug_largebar;
678
+module_param(debug_largebar, int, 0444);
679
+MODULE_PARM_DESC(debug_largebar,
680
+ "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
681
+
682
+/**
683
+ * DOC: ignore_crat (int)
684
+ * Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT
685
+ * table to get information about AMD APUs. This option can serve as a workaround on
686
+ * systems with a broken CRAT table.
687
+ *
688
+ * Default is auto (according to asic type, iommu_v2, and crat table, to decide
689
+ * whehter use CRAT)
690
+ */
691
+int ignore_crat;
692
+module_param(ignore_crat, int, 0444);
693
+MODULE_PARM_DESC(ignore_crat,
694
+ "Ignore CRAT table during KFD initialization (0 = auto (default), 1 = ignore CRAT)");
695
+
696
+/**
697
+ * DOC: halt_if_hws_hang (int)
698
+ * Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
699
+ * Setting 1 enables halt on hang.
700
+ */
701
+int halt_if_hws_hang;
702
+module_param(halt_if_hws_hang, int, 0644);
703
+MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
704
+
705
+/**
706
+ * DOC: hws_gws_support(bool)
707
+ * Assume that HWS supports GWS barriers regardless of what firmware version
708
+ * check says. Default value: false (rely on MEC2 firmware version check).
709
+ */
710
+bool hws_gws_support;
711
+module_param(hws_gws_support, bool, 0444);
712
+MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)");
713
+
714
+/**
715
+ * DOC: queue_preemption_timeout_ms (int)
716
+ * queue preemption timeout in ms (1 = Minimum, 9000 = default)
717
+ */
718
+int queue_preemption_timeout_ms = 9000;
719
+module_param(queue_preemption_timeout_ms, int, 0644);
720
+MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)");
721
+
722
+/**
723
+ * DOC: debug_evictions(bool)
724
+ * Enable extra debug messages to help determine the cause of evictions
725
+ */
726
+bool debug_evictions;
727
+module_param(debug_evictions, bool, 0644);
728
+MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)");
729
+
730
+/**
731
+ * DOC: no_system_mem_limit(bool)
732
+ * Disable system memory limit, to support multiple process shared memory
733
+ */
734
+bool no_system_mem_limit;
735
+module_param(no_system_mem_limit, bool, 0644);
736
+MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)");
737
+
738
+#endif
739
+
740
+/**
741
+ * DOC: dcfeaturemask (uint)
742
+ * Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
743
+ * The default is the current set of stable display features.
744
+ */
745
+MODULE_PARM_DESC(dcfeaturemask, "all stable DC features enabled (default))");
746
+module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444);
747
+
748
+/**
749
+ * DOC: dcdebugmask (uint)
750
+ * Override display features enabled. See enum DC_DEBUG_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
751
+ */
752
+MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))");
753
+module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444);
754
+
755
+/**
756
+ * DOC: abmlevel (uint)
757
+ * Override the default ABM (Adaptive Backlight Management) level used for DC
758
+ * enabled hardware. Requires DMCU to be supported and loaded.
759
+ * Valid levels are 0-4. A value of 0 indicates that ABM should be disabled by
760
+ * default. Values 1-4 control the maximum allowable brightness reduction via
761
+ * the ABM algorithm, with 1 being the least reduction and 4 being the most
762
+ * reduction.
763
+ *
764
+ * Defaults to 0, or disabled. Userspace can still override this level later
765
+ * after boot.
766
+ */
767
+uint amdgpu_dm_abm_level = 0;
768
+MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) ");
769
+module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
770
+
771
+int amdgpu_backlight = -1;
772
+MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))");
773
+module_param_named(backlight, amdgpu_backlight, bint, 0444);
774
+
775
+/**
776
+ * DOC: tmz (int)
777
+ * Trusted Memory Zone (TMZ) is a method to protect data being written
778
+ * to or read from memory.
779
+ *
780
+ * The default value: 0 (off). TODO: change to auto till it is completed.
781
+ */
782
+MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)");
783
+module_param_named(tmz, amdgpu_tmz, int, 0444);
784
+
785
+/**
786
+ * DOC: reset_method (int)
787
+ * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)
788
+ */
789
+MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
790
+module_param_named(reset_method, amdgpu_reset_method, int, 0444);
791
+
792
+/**
793
+ * DOC: bad_page_threshold (int)
794
+ * Bad page threshold is to specify the threshold value of faulty pages
795
+ * detected by RAS ECC, that may result in GPU entering bad status if total
796
+ * faulty pages by ECC exceed threshold value and leave it for user's further
797
+ * check.
798
+ */
799
+MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto(default value), 0 = disable bad page retirement)");
800
+module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);
801
+
802
+MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
803
+module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
533804
534805 static const struct pci_device_id pciidlist[] = {
535806 #ifdef CONFIG_DRM_AMDGPU_SI
....@@ -777,14 +1048,53 @@
7771048 {0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
7781049 {0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
7791050 /* Vega 20 */
780
- {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
781
- {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
782
- {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
783
- {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
784
- {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
785
- {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
1051
+ {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
1052
+ {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
1053
+ {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
1054
+ {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
1055
+ {0x1002, 0x66A4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
1056
+ {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
1057
+ {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
7861058 /* Raven */
7871059 {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
1060
+ {0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
1061
+ /* Arcturus */
1062
+ {0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
1063
+ {0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
1064
+ {0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
1065
+ {0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
1066
+ /* Navi10 */
1067
+ {0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
1068
+ {0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
1069
+ {0x1002, 0x7318, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
1070
+ {0x1002, 0x7319, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
1071
+ {0x1002, 0x731A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
1072
+ {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
1073
+ {0x1002, 0x731E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
1074
+ {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
1075
+ /* Navi14 */
1076
+ {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
1077
+ {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
1078
+ {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
1079
+ {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
1080
+
1081
+ /* Renoir */
1082
+ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
1083
+ {0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
1084
+ {0x1002, 0x164C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
1085
+
1086
+ /* Navi12 */
1087
+ {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12},
1088
+ {0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12},
1089
+
1090
+ /* Sienna_Cichlid */
1091
+ {0x1002, 0x73A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
1092
+ {0x1002, 0x73A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
1093
+ {0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
1094
+ {0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
1095
+ {0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
1096
+ {0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
1097
+ {0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
7881098
7891099 {0, 0, 0}
7901100 };
....@@ -793,32 +1103,11 @@
7931103
7941104 static struct drm_driver kms_driver;
7951105
796
-static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev)
797
-{
798
- struct apertures_struct *ap;
799
- bool primary = false;
800
-
801
- ap = alloc_apertures(1);
802
- if (!ap)
803
- return -ENOMEM;
804
-
805
- ap->ranges[0].base = pci_resource_start(pdev, 0);
806
- ap->ranges[0].size = pci_resource_len(pdev, 0);
807
-
808
-#ifdef CONFIG_X86
809
- primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
810
-#endif
811
- drm_fb_helper_remove_conflicting_framebuffers(ap, "amdgpudrmfb", primary);
812
- kfree(ap);
813
-
814
- return 0;
815
-}
816
-
817
-
8181106 static int amdgpu_pci_probe(struct pci_dev *pdev,
8191107 const struct pci_device_id *ent)
8201108 {
821
- struct drm_device *dev;
1109
+ struct drm_device *ddev;
1110
+ struct amdgpu_device *adev;
8221111 unsigned long flags = ent->driver_data;
8231112 int ret, retry = 0;
8241113 bool supports_atomic = false;
....@@ -832,14 +1121,25 @@
8321121 "See modparam exp_hw_support\n");
8331122 return -ENODEV;
8341123 }
1124
+ /* differentiate between P10 and P11 asics with the same DID */
1125
+ if (pdev->device == 0x67FF &&
1126
+ (pdev->revision == 0xE3 ||
1127
+ pdev->revision == 0xE7 ||
1128
+ pdev->revision == 0xF3 ||
1129
+ pdev->revision == 0xF7)) {
1130
+ flags &= ~AMD_ASIC_MASK;
1131
+ flags |= CHIP_POLARIS10;
1132
+ }
8351133
836
- /*
837
- * Initialize amdkfd before starting radeon. If it was not loaded yet,
838
- * defer radeon probing
1134
+ /* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping,
1135
+ * however, SME requires an indirect IOMMU mapping because the encryption
1136
+ * bit is beyond the DMA mask of the chip.
8391137 */
840
- ret = amdgpu_amdkfd_init();
841
- if (ret == -EPROBE_DEFER)
842
- return ret;
1138
+ if (mem_encrypt_active() && ((flags & AMD_ASIC_MASK) == CHIP_RAVEN)) {
1139
+ dev_info(&pdev->dev,
1140
+ "SME is not compatible with RAVEN\n");
1141
+ return -ENOTSUPP;
1142
+ }
8431143
8441144 #ifdef CONFIG_DRM_AMDGPU_SI
8451145 if (!amdgpu_si_support) {
....@@ -877,45 +1177,51 @@
8771177 #endif
8781178
8791179 /* Get rid of things like offb */
880
- ret = amdgpu_kick_out_firmware_fb(pdev);
1180
+ ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "amdgpudrmfb");
8811181 if (ret)
8821182 return ret;
8831183
884
- /* warn the user if they mix atomic and non-atomic capable GPUs */
885
- if ((kms_driver.driver_features & DRIVER_ATOMIC) && !supports_atomic)
886
- DRM_ERROR("Mixing atomic and non-atomic capable GPUs!\n");
887
- /* support atomic early so the atomic debugfs stuff gets created */
888
- if (supports_atomic)
889
- kms_driver.driver_features |= DRIVER_ATOMIC;
1184
+ adev = devm_drm_dev_alloc(&pdev->dev, &kms_driver, typeof(*adev), ddev);
1185
+ if (IS_ERR(adev))
1186
+ return PTR_ERR(adev);
8901187
891
- dev = drm_dev_alloc(&kms_driver, &pdev->dev);
892
- if (IS_ERR(dev))
893
- return PTR_ERR(dev);
1188
+ adev->dev = &pdev->dev;
1189
+ adev->pdev = pdev;
1190
+ ddev = adev_to_drm(adev);
1191
+
1192
+ if (!supports_atomic)
1193
+ ddev->driver_features &= ~DRIVER_ATOMIC;
8941194
8951195 ret = pci_enable_device(pdev);
8961196 if (ret)
897
- goto err_free;
1197
+ return ret;
8981198
899
- dev->pdev = pdev;
1199
+ ddev->pdev = pdev;
1200
+ pci_set_drvdata(pdev, ddev);
9001201
901
- pci_set_drvdata(pdev, dev);
1202
+ ret = amdgpu_driver_load_kms(adev, flags);
1203
+ if (ret)
1204
+ goto err_pci;
9021205
9031206 retry_init:
904
- ret = drm_dev_register(dev, ent->driver_data);
1207
+ ret = drm_dev_register(ddev, flags);
9051208 if (ret == -EAGAIN && ++retry <= 3) {
9061209 DRM_INFO("retry init %d\n", retry);
9071210 /* Don't request EX mode too frequently which is attacking */
9081211 msleep(5000);
9091212 goto retry_init;
910
- } else if (ret)
1213
+ } else if (ret) {
9111214 goto err_pci;
1215
+ }
1216
+
1217
+ ret = amdgpu_debugfs_init(adev);
1218
+ if (ret)
1219
+ DRM_ERROR("Creating debugfs files failed (%d).\n", ret);
9121220
9131221 return 0;
9141222
9151223 err_pci:
9161224 pci_disable_device(pdev);
917
-err_free:
918
- drm_dev_put(dev);
9191225 return ret;
9201226 }
9211227
....@@ -924,8 +1230,12 @@
9241230 {
9251231 struct drm_device *dev = pci_get_drvdata(pdev);
9261232
927
- drm_dev_unregister(dev);
928
- drm_dev_put(dev);
1233
+#ifdef MODULE
1234
+ if (THIS_MODULE->state != MODULE_STATE_GOING)
1235
+#endif
1236
+ DRM_ERROR("Hotplug removal is not supported\n");
1237
+ drm_dev_unplug(dev);
1238
+ amdgpu_driver_unload_kms(dev);
9291239 pci_disable_device(pdev);
9301240 pci_set_drvdata(pdev, NULL);
9311241 }
....@@ -934,94 +1244,120 @@
9341244 amdgpu_pci_shutdown(struct pci_dev *pdev)
9351245 {
9361246 struct drm_device *dev = pci_get_drvdata(pdev);
937
- struct amdgpu_device *adev = dev->dev_private;
1247
+ struct amdgpu_device *adev = drm_to_adev(dev);
1248
+
1249
+ if (amdgpu_ras_intr_triggered())
1250
+ return;
9381251
9391252 /* if we are running in a VM, make sure the device
9401253 * torn down properly on reboot/shutdown.
9411254 * unfortunately we can't detect certain
9421255 * hypervisors so just do this all the time.
9431256 */
1257
+ if (!amdgpu_passthrough(adev))
1258
+ adev->mp1_state = PP_MP1_STATE_UNLOAD;
9441259 amdgpu_device_ip_suspend(adev);
1260
+ adev->mp1_state = PP_MP1_STATE_NONE;
9451261 }
9461262
9471263 static int amdgpu_pmops_suspend(struct device *dev)
9481264 {
949
- struct pci_dev *pdev = to_pci_dev(dev);
1265
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
9501266
951
- struct drm_device *drm_dev = pci_get_drvdata(pdev);
952
- return amdgpu_device_suspend(drm_dev, true, true);
1267
+ return amdgpu_device_suspend(drm_dev, true);
9531268 }
9541269
9551270 static int amdgpu_pmops_resume(struct device *dev)
9561271 {
957
- struct pci_dev *pdev = to_pci_dev(dev);
958
- struct drm_device *drm_dev = pci_get_drvdata(pdev);
1272
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
9591273
960
- /* GPU comes up enabled by the bios on resume */
961
- if (amdgpu_device_is_px(drm_dev)) {
962
- pm_runtime_disable(dev);
963
- pm_runtime_set_active(dev);
964
- pm_runtime_enable(dev);
965
- }
966
-
967
- return amdgpu_device_resume(drm_dev, true, true);
1274
+ return amdgpu_device_resume(drm_dev, true);
9681275 }
9691276
9701277 static int amdgpu_pmops_freeze(struct device *dev)
9711278 {
972
- struct pci_dev *pdev = to_pci_dev(dev);
1279
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
1280
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
1281
+ int r;
9731282
974
- struct drm_device *drm_dev = pci_get_drvdata(pdev);
975
- return amdgpu_device_suspend(drm_dev, false, true);
1283
+ adev->in_hibernate = true;
1284
+ r = amdgpu_device_suspend(drm_dev, true);
1285
+ adev->in_hibernate = false;
1286
+ if (r)
1287
+ return r;
1288
+ return amdgpu_asic_reset(adev);
9761289 }
9771290
9781291 static int amdgpu_pmops_thaw(struct device *dev)
9791292 {
980
- struct pci_dev *pdev = to_pci_dev(dev);
1293
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
9811294
982
- struct drm_device *drm_dev = pci_get_drvdata(pdev);
983
- return amdgpu_device_resume(drm_dev, false, true);
1295
+ return amdgpu_device_resume(drm_dev, true);
9841296 }
9851297
9861298 static int amdgpu_pmops_poweroff(struct device *dev)
9871299 {
988
- struct pci_dev *pdev = to_pci_dev(dev);
1300
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
9891301
990
- struct drm_device *drm_dev = pci_get_drvdata(pdev);
991
- return amdgpu_device_suspend(drm_dev, true, true);
1302
+ return amdgpu_device_suspend(drm_dev, true);
9921303 }
9931304
9941305 static int amdgpu_pmops_restore(struct device *dev)
9951306 {
996
- struct pci_dev *pdev = to_pci_dev(dev);
1307
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
9971308
998
- struct drm_device *drm_dev = pci_get_drvdata(pdev);
999
- return amdgpu_device_resume(drm_dev, false, true);
1309
+ return amdgpu_device_resume(drm_dev, true);
10001310 }
10011311
10021312 static int amdgpu_pmops_runtime_suspend(struct device *dev)
10031313 {
10041314 struct pci_dev *pdev = to_pci_dev(dev);
10051315 struct drm_device *drm_dev = pci_get_drvdata(pdev);
1006
- int ret;
1316
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
1317
+ int ret, i;
10071318
1008
- if (!amdgpu_device_is_px(drm_dev)) {
1319
+ if (!adev->runpm) {
10091320 pm_runtime_forbid(dev);
10101321 return -EBUSY;
10111322 }
10121323
1013
- drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1324
+ /* wait for all rings to drain before suspending */
1325
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
1326
+ struct amdgpu_ring *ring = adev->rings[i];
1327
+ if (ring && ring->sched.ready) {
1328
+ ret = amdgpu_fence_wait_empty(ring);
1329
+ if (ret)
1330
+ return -EBUSY;
1331
+ }
1332
+ }
1333
+
1334
+ adev->in_runpm = true;
1335
+ if (amdgpu_device_supports_boco(drm_dev))
1336
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
10141337 drm_kms_helper_poll_disable(drm_dev);
10151338
1016
- ret = amdgpu_device_suspend(drm_dev, false, false);
1017
- pci_save_state(pdev);
1018
- pci_disable_device(pdev);
1019
- pci_ignore_hotplug(pdev);
1020
- if (amdgpu_is_atpx_hybrid())
1021
- pci_set_power_state(pdev, PCI_D3cold);
1022
- else if (!amdgpu_has_atpx_dgpu_power_cntl())
1023
- pci_set_power_state(pdev, PCI_D3hot);
1024
- drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
1339
+ ret = amdgpu_device_suspend(drm_dev, false);
1340
+ if (ret)
1341
+ return ret;
1342
+
1343
+ if (amdgpu_device_supports_boco(drm_dev)) {
1344
+ /* Only need to handle PCI state in the driver for ATPX
1345
+ * PCI core handles it for _PR3.
1346
+ */
1347
+ if (amdgpu_is_atpx_hybrid()) {
1348
+ pci_ignore_hotplug(pdev);
1349
+ } else {
1350
+ amdgpu_device_cache_pci_state(pdev);
1351
+ pci_disable_device(pdev);
1352
+ pci_ignore_hotplug(pdev);
1353
+ pci_set_power_state(pdev, PCI_D3cold);
1354
+ }
1355
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
1356
+ } else if (amdgpu_device_supports_boco(drm_dev)) {
1357
+ /* nothing to do */
1358
+ } else if (amdgpu_device_supports_baco(drm_dev)) {
1359
+ amdgpu_device_baco_enter(drm_dev);
1360
+ }
10251361
10261362 return 0;
10271363 }
....@@ -1030,50 +1366,92 @@
10301366 {
10311367 struct pci_dev *pdev = to_pci_dev(dev);
10321368 struct drm_device *drm_dev = pci_get_drvdata(pdev);
1369
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
10331370 int ret;
10341371
1035
- if (!amdgpu_device_is_px(drm_dev))
1372
+ if (!adev->runpm)
10361373 return -EINVAL;
10371374
1038
- drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1375
+ if (amdgpu_device_supports_boco(drm_dev)) {
1376
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
10391377
1040
- if (amdgpu_is_atpx_hybrid() ||
1041
- !amdgpu_has_atpx_dgpu_power_cntl())
1042
- pci_set_power_state(pdev, PCI_D0);
1043
- pci_restore_state(pdev);
1044
- ret = pci_enable_device(pdev);
1045
- if (ret)
1046
- return ret;
1047
- pci_set_master(pdev);
1048
-
1049
- ret = amdgpu_device_resume(drm_dev, false, false);
1378
+ /* Only need to handle PCI state in the driver for ATPX
1379
+ * PCI core handles it for _PR3.
1380
+ */
1381
+ if (amdgpu_is_atpx_hybrid()) {
1382
+ pci_set_master(pdev);
1383
+ } else {
1384
+ pci_set_power_state(pdev, PCI_D0);
1385
+ amdgpu_device_load_pci_state(pdev);
1386
+ ret = pci_enable_device(pdev);
1387
+ if (ret)
1388
+ return ret;
1389
+ pci_set_master(pdev);
1390
+ }
1391
+ } else if (amdgpu_device_supports_baco(drm_dev)) {
1392
+ amdgpu_device_baco_exit(drm_dev);
1393
+ }
1394
+ ret = amdgpu_device_resume(drm_dev, false);
10501395 drm_kms_helper_poll_enable(drm_dev);
1051
- drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
1396
+ if (amdgpu_device_supports_boco(drm_dev))
1397
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
1398
+ adev->in_runpm = false;
10521399 return 0;
10531400 }
10541401
10551402 static int amdgpu_pmops_runtime_idle(struct device *dev)
10561403 {
1057
- struct pci_dev *pdev = to_pci_dev(dev);
1058
- struct drm_device *drm_dev = pci_get_drvdata(pdev);
1059
- struct drm_crtc *crtc;
1404
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
1405
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
1406
+ /* we don't want the main rpm_idle to call suspend - we want to autosuspend */
1407
+ int ret = 1;
10601408
1061
- if (!amdgpu_device_is_px(drm_dev)) {
1409
+ if (!adev->runpm) {
10621410 pm_runtime_forbid(dev);
10631411 return -EBUSY;
10641412 }
10651413
1066
- list_for_each_entry(crtc, &drm_dev->mode_config.crtc_list, head) {
1067
- if (crtc->enabled) {
1068
- DRM_DEBUG_DRIVER("failing to power off - crtc active\n");
1069
- return -EBUSY;
1414
+ if (amdgpu_device_has_dc_support(adev)) {
1415
+ struct drm_crtc *crtc;
1416
+
1417
+ drm_modeset_lock_all(drm_dev);
1418
+
1419
+ drm_for_each_crtc(crtc, drm_dev) {
1420
+ if (crtc->state->active) {
1421
+ ret = -EBUSY;
1422
+ break;
1423
+ }
10701424 }
1425
+
1426
+ drm_modeset_unlock_all(drm_dev);
1427
+
1428
+ } else {
1429
+ struct drm_connector *list_connector;
1430
+ struct drm_connector_list_iter iter;
1431
+
1432
+ mutex_lock(&drm_dev->mode_config.mutex);
1433
+ drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
1434
+
1435
+ drm_connector_list_iter_begin(drm_dev, &iter);
1436
+ drm_for_each_connector_iter(list_connector, &iter) {
1437
+ if (list_connector->dpms == DRM_MODE_DPMS_ON) {
1438
+ ret = -EBUSY;
1439
+ break;
1440
+ }
1441
+ }
1442
+
1443
+ drm_connector_list_iter_end(&iter);
1444
+
1445
+ drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
1446
+ mutex_unlock(&drm_dev->mode_config.mutex);
10711447 }
1448
+
1449
+ if (ret == -EBUSY)
1450
+ DRM_DEBUG_DRIVER("failing to power off - crtc active\n");
10721451
10731452 pm_runtime_mark_last_busy(dev);
10741453 pm_runtime_autosuspend(dev);
1075
- /* we don't want the main rpm_idle to call suspend - we want to autosuspend */
1076
- return 1;
1454
+ return ret;
10771455 }
10781456
10791457 long amdgpu_drm_ioctl(struct file *filp,
....@@ -1111,12 +1489,13 @@
11111489 {
11121490 struct drm_file *file_priv = f->private_data;
11131491 struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
1492
+ long timeout = MAX_WAIT_SCHED_ENTITY_Q_EMPTY;
11141493
1115
- amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr);
1494
+ timeout = amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr, timeout);
1495
+ timeout = amdgpu_vm_wait_idle(&fpriv->vm, timeout);
11161496
1117
- return 0;
1497
+ return timeout >= 0 ? 0 : timeout;
11181498 }
1119
-
11201499
11211500 static const struct file_operations amdgpu_driver_kms_fops = {
11221501 .owner = THIS_MODULE,
....@@ -1132,31 +1511,31 @@
11321511 #endif
11331512 };
11341513
1135
-static bool
1136
-amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
1137
- bool in_vblank_irq, int *vpos, int *hpos,
1138
- ktime_t *stime, ktime_t *etime,
1139
- const struct drm_display_mode *mode)
1514
+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
11401515 {
1141
- return amdgpu_display_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
1142
- stime, etime, mode);
1516
+ struct drm_file *file;
1517
+
1518
+ if (!filp)
1519
+ return -EINVAL;
1520
+
1521
+ if (filp->f_op != &amdgpu_driver_kms_fops) {
1522
+ return -EINVAL;
1523
+ }
1524
+
1525
+ file = filp->private_data;
1526
+ *fpriv = file->driver_priv;
1527
+ return 0;
11431528 }
11441529
11451530 static struct drm_driver kms_driver = {
11461531 .driver_features =
1147
- DRIVER_USE_AGP |
1148
- DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
1149
- DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
1150
- .load = amdgpu_driver_load_kms,
1532
+ DRIVER_ATOMIC |
1533
+ DRIVER_GEM |
1534
+ DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ |
1535
+ DRIVER_SYNCOBJ_TIMELINE,
11511536 .open = amdgpu_driver_open_kms,
11521537 .postclose = amdgpu_driver_postclose_kms,
11531538 .lastclose = amdgpu_driver_lastclose_kms,
1154
- .unload = amdgpu_driver_unload_kms,
1155
- .get_vblank_counter = amdgpu_get_vblank_counter_kms,
1156
- .enable_vblank = amdgpu_enable_vblank_kms,
1157
- .disable_vblank = amdgpu_disable_vblank_kms,
1158
- .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
1159
- .get_scanout_position = amdgpu_get_crtc_scanout_position,
11601539 .irq_handler = amdgpu_irq_handler,
11611540 .ioctls = amdgpu_ioctls_kms,
11621541 .gem_free_object_unlocked = amdgpu_gem_object_free,
....@@ -1170,9 +1549,6 @@
11701549 .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
11711550 .gem_prime_export = amdgpu_gem_prime_export,
11721551 .gem_prime_import = amdgpu_gem_prime_import,
1173
- .gem_prime_res_obj = amdgpu_gem_prime_res_obj,
1174
- .gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,
1175
- .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
11761552 .gem_prime_vmap = amdgpu_gem_prime_vmap,
11771553 .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
11781554 .gem_prime_mmap = amdgpu_gem_prime_mmap,
....@@ -1185,8 +1561,12 @@
11851561 .patchlevel = KMS_DRIVER_PATCHLEVEL,
11861562 };
11871563
1188
-static struct drm_driver *driver;
1189
-static struct pci_driver *pdriver;
1564
+static struct pci_error_handlers amdgpu_pci_err_handler = {
1565
+ .error_detected = amdgpu_pci_error_detected,
1566
+ .mmio_enabled = amdgpu_pci_mmio_enabled,
1567
+ .slot_reset = amdgpu_pci_slot_reset,
1568
+ .resume = amdgpu_pci_resume,
1569
+};
11901570
11911571 static struct pci_driver amdgpu_kms_pci_driver = {
11921572 .name = DRIVER_NAME,
....@@ -1195,9 +1575,8 @@
11951575 .remove = amdgpu_pci_remove,
11961576 .shutdown = amdgpu_pci_shutdown,
11971577 .driver.pm = &amdgpu_pm_ops,
1578
+ .err_handler = &amdgpu_pci_err_handler,
11981579 };
1199
-
1200
-
12011580
12021581 static int __init amdgpu_init(void)
12031582 {
....@@ -1217,12 +1596,14 @@
12171596 goto error_fence;
12181597
12191598 DRM_INFO("amdgpu kernel modesetting enabled.\n");
1220
- driver = &kms_driver;
1221
- pdriver = &amdgpu_kms_pci_driver;
1222
- driver->num_ioctls = amdgpu_max_kms_ioctl;
1599
+ kms_driver.num_ioctls = amdgpu_max_kms_ioctl;
12231600 amdgpu_register_atpx_handler();
1601
+
1602
+ /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
1603
+ amdgpu_amdkfd_init();
1604
+
12241605 /* let modprobe override vga console setting */
1225
- return pci_register_driver(pdriver);
1606
+ return pci_register_driver(&amdgpu_kms_pci_driver);
12261607
12271608 error_fence:
12281609 amdgpu_sync_fini();
....@@ -1234,10 +1615,11 @@
12341615 static void __exit amdgpu_exit(void)
12351616 {
12361617 amdgpu_amdkfd_fini();
1237
- pci_unregister_driver(pdriver);
1618
+ pci_unregister_driver(&amdgpu_kms_pci_driver);
12381619 amdgpu_unregister_atpx_handler();
12391620 amdgpu_sync_fini();
12401621 amdgpu_fence_slab_fini();
1622
+ mmu_notifier_synchronize();
12411623 }
12421624
12431625 module_init(amdgpu_init);