From d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 02:45:28 +0000 Subject: [PATCH] add boot partition size --- kernel/drivers/gpu/drm/amd/amdkfd/kfd_device.c | 693 +++++++++++++++++++++++++++++++++++++++++--------------- 1 files changed, 503 insertions(+), 190 deletions(-) diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 28022d1..148e43d 100644 --- a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -28,6 +28,8 @@ #include "kfd_pm4_headers_vi.h" #include "cwsr_trap_handler.h" #include "kfd_iommu.h" +#include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" #define MQD_SIZE_ALIGNED 768 @@ -38,9 +40,48 @@ */ static atomic_t kfd_locked = ATOMIC_INIT(0); +#ifdef CONFIG_DRM_AMDGPU_CIK +extern const struct kfd2kgd_calls gfx_v7_kfd2kgd; +#endif +extern const struct kfd2kgd_calls gfx_v8_kfd2kgd; +extern const struct kfd2kgd_calls gfx_v9_kfd2kgd; +extern const struct kfd2kgd_calls arcturus_kfd2kgd; +extern const struct kfd2kgd_calls gfx_v10_kfd2kgd; +extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd; + +static const struct kfd2kgd_calls *kfd2kgd_funcs[] = { +#ifdef KFD_SUPPORT_IOMMU_V2 +#ifdef CONFIG_DRM_AMDGPU_CIK + [CHIP_KAVERI] = &gfx_v7_kfd2kgd, +#endif + [CHIP_CARRIZO] = &gfx_v8_kfd2kgd, + [CHIP_RAVEN] = &gfx_v9_kfd2kgd, +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + [CHIP_HAWAII] = &gfx_v7_kfd2kgd, +#endif + [CHIP_TONGA] = &gfx_v8_kfd2kgd, + [CHIP_FIJI] = &gfx_v8_kfd2kgd, + [CHIP_POLARIS10] = &gfx_v8_kfd2kgd, + [CHIP_POLARIS11] = &gfx_v8_kfd2kgd, + [CHIP_POLARIS12] = &gfx_v8_kfd2kgd, + [CHIP_VEGAM] = &gfx_v8_kfd2kgd, + [CHIP_VEGA10] = &gfx_v9_kfd2kgd, + [CHIP_VEGA12] = &gfx_v9_kfd2kgd, + [CHIP_VEGA20] = &gfx_v9_kfd2kgd, + [CHIP_RENOIR] = &gfx_v9_kfd2kgd, + [CHIP_ARCTURUS] = &arcturus_kfd2kgd, + [CHIP_NAVI10] = &gfx_v10_kfd2kgd, + [CHIP_NAVI12] = &gfx_v10_kfd2kgd, + [CHIP_NAVI14] = &gfx_v10_kfd2kgd, + [CHIP_SIENNA_CICHLID] = &gfx_v10_3_kfd2kgd, + [CHIP_NAVY_FLOUNDER] = &gfx_v10_3_kfd2kgd, +}; + #ifdef KFD_SUPPORT_IOMMU_V2 static const struct kfd_device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, + .asic_name = "kaveri", .max_pasid_bits = 16, /* max num of queues for KV.TODO should be a dynamic value */ .max_no_of_hqd = 24, @@ -53,10 +94,13 @@ .needs_iommu_device = true, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info carrizo_device_info = { .asic_family = CHIP_CARRIZO, + .asic_name = "carrizo", .max_pasid_bits = 16, /* max num of queues for CZ.TODO should be a dynamic value */ .max_no_of_hqd = 24, @@ -69,10 +113,14 @@ .needs_iommu_device = true, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, }; +#endif static const struct kfd_device_info raven_device_info = { .asic_family = CHIP_RAVEN, + .asic_name = "raven", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -84,11 +132,13 @@ .needs_iommu_device = true, .needs_pci_atomics = true, .num_sdma_engines = 1, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, }; -#endif static const struct kfd_device_info hawaii_device_info = { .asic_family = CHIP_HAWAII, + .asic_name = "hawaii", .max_pasid_bits = 16, /* max num of queues for KV.TODO should be a dynamic value */ .max_no_of_hqd = 24, @@ -101,10 +151,13 @@ .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info tonga_device_info = { .asic_family = CHIP_TONGA, + .asic_name = "tonga", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -116,25 +169,13 @@ .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, -}; - -static const struct kfd_device_info tonga_vf_device_info = { - .asic_family = CHIP_TONGA, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = false, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info fiji_device_info = { .asic_family = CHIP_FIJI, + .asic_name = "fiji", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -146,10 +187,13 @@ .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info fiji_vf_device_info = { .asic_family = CHIP_FIJI, + .asic_name = "fiji", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -161,11 +205,14 @@ .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info polaris10_device_info = { .asic_family = CHIP_POLARIS10, + .asic_name = "polaris10", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -177,10 +224,13 @@ .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info polaris10_vf_device_info = { .asic_family = CHIP_POLARIS10, + .asic_name = "polaris10", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -192,10 +242,13 @@ .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info polaris11_device_info = { .asic_family = CHIP_POLARIS11, + .asic_name = "polaris11", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -207,10 +260,49 @@ .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, +}; + +static const struct kfd_device_info polaris12_device_info = { + .asic_family = CHIP_POLARIS12, + .asic_name = "polaris12", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 4, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, +}; + +static const struct kfd_device_info vegam_device_info = { + .asic_family = CHIP_VEGAM, + .asic_name = "vegam", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 4, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info vega10_device_info = { .asic_family = CHIP_VEGA10, + .asic_name = "vega10", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -222,10 +314,13 @@ .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info vega10_vf_device_info = { .asic_family = CHIP_VEGA10, + .asic_name = "vega10", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -237,104 +332,196 @@ .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, }; - -struct kfd_deviceid { - unsigned short did; - const struct kfd_device_info *device_info; +static const struct kfd_device_info vega12_device_info = { + .asic_family = CHIP_VEGA12, + .asic_name = "vega12", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, }; -static const struct kfd_deviceid supported_devices[] = { +static const struct kfd_device_info vega20_device_info = { + .asic_family = CHIP_VEGA20, + .asic_name = "vega20", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 8, +}; + +static const struct kfd_device_info arcturus_device_info = { + .asic_family = CHIP_ARCTURUS, + .asic_name = "arcturus", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 6, + .num_sdma_queues_per_engine = 8, +}; + +static const struct kfd_device_info renoir_device_info = { + .asic_family = CHIP_RENOIR, + .asic_name = "renoir", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 1, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, +}; + +static const struct kfd_device_info navi10_device_info = { + .asic_family = CHIP_NAVI10, + .asic_name = "navi10", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .needs_iommu_device = false, + .supports_cwsr = true, + .needs_pci_atomics = false, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 8, +}; + +static const struct kfd_device_info navi12_device_info = { + .asic_family = CHIP_NAVI12, + .asic_name = "navi12", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .needs_iommu_device = false, + .supports_cwsr = true, + .needs_pci_atomics = false, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 8, +}; + +static const struct kfd_device_info navi14_device_info = { + .asic_family = CHIP_NAVI14, + .asic_name = "navi14", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .needs_iommu_device = false, + .supports_cwsr = true, + .needs_pci_atomics = false, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 8, +}; + +static const struct kfd_device_info sienna_cichlid_device_info = { + .asic_family = CHIP_SIENNA_CICHLID, + .asic_name = "sienna_cichlid", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .needs_iommu_device = false, + .supports_cwsr = true, + .needs_pci_atomics = false, + .num_sdma_engines = 4, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 8, +}; + +static const struct kfd_device_info navy_flounder_device_info = { + .asic_family = CHIP_NAVY_FLOUNDER, + .asic_name = "navy_flounder", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .needs_iommu_device = false, + .supports_cwsr = true, + .needs_pci_atomics = false, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 8, +}; + +/* For each entry, [0] is regular and [1] is virtualisation device. */ +static const struct kfd_device_info *kfd_supported_devices[][2] = { #ifdef KFD_SUPPORT_IOMMU_V2 - { 0x1304, &kaveri_device_info }, /* Kaveri */ - { 0x1305, &kaveri_device_info }, /* Kaveri */ - { 0x1306, &kaveri_device_info }, /* Kaveri */ - { 0x1307, &kaveri_device_info }, /* Kaveri */ - { 0x1309, &kaveri_device_info }, /* Kaveri */ - { 0x130A, &kaveri_device_info }, /* Kaveri */ - { 0x130B, &kaveri_device_info }, /* Kaveri */ - { 0x130C, &kaveri_device_info }, /* Kaveri */ - { 0x130D, &kaveri_device_info }, /* Kaveri */ - { 0x130E, &kaveri_device_info }, /* Kaveri */ - { 0x130F, &kaveri_device_info }, /* Kaveri */ - { 0x1310, &kaveri_device_info }, /* Kaveri */ - { 0x1311, &kaveri_device_info }, /* Kaveri */ - { 0x1312, &kaveri_device_info }, /* Kaveri */ - { 0x1313, &kaveri_device_info }, /* Kaveri */ - { 0x1315, &kaveri_device_info }, /* Kaveri */ - { 0x1316, &kaveri_device_info }, /* Kaveri */ - { 0x1317, &kaveri_device_info }, /* Kaveri */ - { 0x1318, &kaveri_device_info }, /* Kaveri */ - { 0x131B, &kaveri_device_info }, /* Kaveri */ - { 0x131C, &kaveri_device_info }, /* Kaveri */ - { 0x131D, &kaveri_device_info }, /* Kaveri */ - { 0x9870, &carrizo_device_info }, /* Carrizo */ - { 0x9874, &carrizo_device_info }, /* Carrizo */ - { 0x9875, &carrizo_device_info }, /* Carrizo */ - { 0x9876, &carrizo_device_info }, /* Carrizo */ - { 0x9877, &carrizo_device_info }, /* Carrizo */ - { 0x15DD, &raven_device_info }, /* Raven */ + [CHIP_KAVERI] = {&kaveri_device_info, NULL}, + [CHIP_CARRIZO] = {&carrizo_device_info, NULL}, #endif - { 0x67A0, &hawaii_device_info }, /* Hawaii */ - { 0x67A1, &hawaii_device_info }, /* Hawaii */ - { 0x67A2, &hawaii_device_info }, /* Hawaii */ - { 0x67A8, &hawaii_device_info }, /* Hawaii */ - { 0x67A9, &hawaii_device_info }, /* Hawaii */ - { 0x67AA, &hawaii_device_info }, /* Hawaii */ - { 0x67B0, &hawaii_device_info }, /* Hawaii */ - { 0x67B1, &hawaii_device_info }, /* Hawaii */ - { 0x67B8, &hawaii_device_info }, /* Hawaii */ - { 0x67B9, &hawaii_device_info }, /* Hawaii */ - { 0x67BA, &hawaii_device_info }, /* Hawaii */ - { 0x67BE, &hawaii_device_info }, /* Hawaii */ - { 0x6920, &tonga_device_info }, /* Tonga */ - { 0x6921, &tonga_device_info }, /* Tonga */ - { 0x6928, &tonga_device_info }, /* Tonga */ - { 0x6929, &tonga_device_info }, /* Tonga */ - { 0x692B, &tonga_device_info }, /* Tonga */ - { 0x692F, &tonga_vf_device_info }, /* Tonga vf */ - { 0x6938, &tonga_device_info }, /* Tonga */ - { 0x6939, &tonga_device_info }, /* Tonga */ - { 0x7300, &fiji_device_info }, /* Fiji */ - { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/ - { 0x67C0, &polaris10_device_info }, /* Polaris10 */ - { 0x67C1, &polaris10_device_info }, /* Polaris10 */ - { 0x67C2, &polaris10_device_info }, /* Polaris10 */ - { 0x67C4, &polaris10_device_info }, /* Polaris10 */ - { 0x67C7, &polaris10_device_info }, /* Polaris10 */ - { 0x67C8, &polaris10_device_info }, /* Polaris10 */ - { 0x67C9, &polaris10_device_info }, /* Polaris10 */ - { 0x67CA, &polaris10_device_info }, /* Polaris10 */ - { 0x67CC, &polaris10_device_info }, /* Polaris10 */ - { 0x67CF, &polaris10_device_info }, /* Polaris10 */ - { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ - { 0x67DF, &polaris10_device_info }, /* Polaris10 */ - { 0x6FDF, &polaris10_device_info }, /* Polaris10 */ - { 0x67E0, &polaris11_device_info }, /* Polaris11 */ - { 0x67E1, &polaris11_device_info }, /* Polaris11 */ - { 0x67E3, &polaris11_device_info }, /* Polaris11 */ - { 0x67E7, &polaris11_device_info }, /* Polaris11 */ - { 0x67E8, &polaris11_device_info }, /* Polaris11 */ - { 0x67E9, &polaris11_device_info }, /* Polaris11 */ - { 0x67EB, &polaris11_device_info }, /* Polaris11 */ - { 0x67EF, &polaris11_device_info }, /* Polaris11 */ - { 0x67FF, &polaris11_device_info }, /* Polaris11 */ - { 0x6860, &vega10_device_info }, /* Vega10 */ - { 0x6861, &vega10_device_info }, /* Vega10 */ - { 0x6862, &vega10_device_info }, /* Vega10 */ - { 0x6863, &vega10_device_info }, /* Vega10 */ - { 0x6864, &vega10_device_info }, /* Vega10 */ - { 0x6867, &vega10_device_info }, /* Vega10 */ - { 0x6868, &vega10_device_info }, /* Vega10 */ - { 0x6869, &vega10_device_info }, /* Vega10 */ - { 0x686A, &vega10_device_info }, /* Vega10 */ - { 0x686B, &vega10_device_info }, /* Vega10 */ - { 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/ - { 0x686D, &vega10_device_info }, /* Vega10 */ - { 0x686E, &vega10_device_info }, /* Vega10 */ - { 0x686F, &vega10_device_info }, /* Vega10 */ - { 0x687F, &vega10_device_info }, /* Vega10 */ + [CHIP_RAVEN] = {&raven_device_info, NULL}, + [CHIP_HAWAII] = {&hawaii_device_info, NULL}, + [CHIP_TONGA] = {&tonga_device_info, NULL}, + [CHIP_FIJI] = {&fiji_device_info, &fiji_vf_device_info}, + [CHIP_POLARIS10] = {&polaris10_device_info, &polaris10_vf_device_info}, + [CHIP_POLARIS11] = {&polaris11_device_info, NULL}, + [CHIP_POLARIS12] = {&polaris12_device_info, NULL}, + [CHIP_VEGAM] = {&vegam_device_info, NULL}, + [CHIP_VEGA10] = {&vega10_device_info, &vega10_vf_device_info}, + [CHIP_VEGA12] = {&vega12_device_info, NULL}, + [CHIP_VEGA20] = {&vega20_device_info, NULL}, + [CHIP_RENOIR] = {&renoir_device_info, NULL}, + [CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info}, + [CHIP_NAVI10] = {&navi10_device_info, NULL}, + [CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info}, + [CHIP_NAVI14] = {&navi14_device_info, NULL}, + [CHIP_SIENNA_CICHLID] = {&sienna_cichlid_device_info, &sienna_cichlid_device_info}, + [CHIP_NAVY_FLOUNDER] = {&navy_flounder_device_info, &navy_flounder_device_info}, }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, @@ -343,47 +530,25 @@ static int kfd_resume(struct kfd_dev *kfd); -static const struct kfd_device_info *lookup_device_info(unsigned short did) -{ - size_t i; - - for (i = 0; i < ARRAY_SIZE(supported_devices); i++) { - if (supported_devices[i].did == did) { - WARN_ON(!supported_devices[i].device_info); - return supported_devices[i].device_info; - } - } - - dev_warn(kfd_device, "DID %04x is missing in supported_devices\n", - did); - - return NULL; -} - struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, - struct pci_dev *pdev, const struct kfd2kgd_calls *f2g) + struct pci_dev *pdev, unsigned int asic_type, bool vf) { struct kfd_dev *kfd; - int ret; - const struct kfd_device_info *device_info = - lookup_device_info(pdev->device); + const struct kfd_device_info *device_info; + const struct kfd2kgd_calls *f2g; - if (!device_info) { - dev_err(kfd_device, "kgd2kfd_probe failed\n"); - return NULL; + if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void *) * 2) + || asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) { + dev_err(kfd_device, "asic_type %d out of range\n", asic_type); + return NULL; /* asic_type out of range */ } - /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. - * 32 and 64-bit requests are possible and must be - * supported. - */ - ret = pci_enable_atomic_ops_to_root(pdev, - PCI_EXP_DEVCAP2_ATOMIC_COMP32 | - PCI_EXP_DEVCAP2_ATOMIC_COMP64); - if (device_info->needs_pci_atomics && ret < 0) { - dev_info(kfd_device, - "skipped device %x:%x, PCI rejects atomics\n", - pdev->vendor, pdev->device); + device_info = kfd_supported_devices[asic_type][vf]; + f2g = kfd2kgd_funcs[asic_type]; + + if (!device_info || !f2g) { + dev_err(kfd_device, "%s %s not supported in kfd\n", + amdgpu_asic_name[asic_type], vf ? "VF" : ""); return NULL; } @@ -391,15 +556,34 @@ if (!kfd) return NULL; + /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. + * 32 and 64-bit requests are possible and must be + * supported. + */ + kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd); + if (device_info->needs_pci_atomics && + !kfd->pci_atomic_requested) { + dev_info(kfd_device, + "skipped device %x:%x, PCI rejects atomics\n", + pdev->vendor, pdev->device); + kfree(kfd); + return NULL; + } + kfd->kgd = kgd; kfd->device_info = device_info; kfd->pdev = pdev; kfd->init_complete = false; kfd->kfd2kgd = f2g; + atomic_set(&kfd->compute_profile, 0); mutex_init(&kfd->doorbell_mutex); memset(&kfd->doorbell_available_index, 0, sizeof(kfd->doorbell_available_index)); + + atomic_set(&kfd->sram_ecc_flag, 0); + + ida_init(&kfd->doorbell_ida); return kfd; } @@ -411,21 +595,67 @@ BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_gfx8_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); - } else { + } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) { + BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_arcturus_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); + } else if (kfd->device_info->asic_family < CHIP_NAVI10) { BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_gfx9_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); + } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) { + BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_nv1x_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); + } else { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx10_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex); } kfd->cwsr_enabled = true; } } +static int kfd_gws_init(struct kfd_dev *kfd) +{ + int ret = 0; + + if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) + return 0; + + if (hws_gws_support + || (kfd->device_info->asic_family == CHIP_VEGA10 + && kfd->mec2_fw_version >= 0x81b3) + || (kfd->device_info->asic_family >= CHIP_VEGA12 + && kfd->device_info->asic_family <= CHIP_RAVEN + && kfd->mec2_fw_version >= 0x1b3) + || (kfd->device_info->asic_family == CHIP_ARCTURUS + && kfd->mec2_fw_version >= 0x30)) + ret = amdgpu_amdkfd_alloc_gws(kfd->kgd, + amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws); + + return ret; +} + +static void kfd_smi_init(struct kfd_dev *dev) { + INIT_LIST_HEAD(&dev->smi_clients); + spin_lock_init(&dev->smi_lock); +} + bool kgd2kfd_device_init(struct kfd_dev *kfd, + struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) { unsigned int size; + kfd->ddev = ddev; + kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, + KGD_ENGINE_MEC1); + kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, + KGD_ENGINE_MEC2); + kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, + KGD_ENGINE_SDMA1); kfd->shared_resources = *gpu_resources; kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; @@ -434,15 +664,10 @@ - kfd->vm_info.first_vmid_kfd + 1; /* Verify module parameters regarding mapped process number*/ - if ((hws_max_conc_proc < 0) - || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { - dev_err(kfd_device, - "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", - hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, - kfd->vm_info.vmid_num_kfd); + if (hws_max_conc_proc >= 0) + kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd); + else kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; - } else - kfd->max_proc_per_quantum = hws_max_conc_proc; /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * @@ -462,12 +687,12 @@ /* add another 512KB for all other allocations on gart (HPD, fences) */ size += 512 * 1024; - if (kfd->kfd2kgd->init_gtt_mem_allocation( + if (amdgpu_amdkfd_alloc_gtt_mem( kfd->kgd, size, &kfd->gtt_mem, &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, false)) { dev_err(kfd_device, "Could not allocate %d bytes\n", size); - goto out; + goto alloc_gtt_mem_failure; } dev_info(kfd_device, "Allocated %d bytes on gart\n", size); @@ -484,10 +709,11 @@ goto kfd_doorbell_error; } - if (kfd_topology_add_device(kfd)) { - dev_err(kfd_device, "Error adding device to topology\n"); - goto kfd_topology_add_device_error; - } + kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd); + + kfd->unique_id = amdgpu_amdkfd_get_unique_id(kfd->kgd); + + kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd); if (kfd_interrupt_init(kfd)) { dev_err(kfd_device, "Error initializing interrupts\n"); @@ -500,17 +726,40 @@ goto device_queue_manager_error; } + /* If supported on this device, allocate global GWS that is shared + * by all KFD processes + */ + if (kfd_gws_init(kfd)) { + dev_err(kfd_device, "Could not allocate %d gws\n", + amdgpu_amdkfd_get_num_gws(kfd->kgd)); + goto gws_error; + } + + /* If CRAT is broken, won't set iommu enabled */ + kfd_double_confirm_iommu_support(kfd); + if (kfd_iommu_device_init(kfd)) { + kfd->use_iommu_v2 = false; dev_err(kfd_device, "Error initializing iommuv2\n"); goto device_iommu_error; } kfd_cwsr_init(kfd); + if(kgd2kfd_resume_iommu(kfd)) + goto device_iommu_error; + if (kfd_resume(kfd)) goto kfd_resume_error; kfd->dbgmgr = NULL; + + if (kfd_topology_add_device(kfd)) { + dev_err(kfd_device, "Error adding device to topology\n"); + goto kfd_topology_add_device_error; + } + + kfd_smi_init(kfd); kfd->init_complete = true; dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, @@ -521,19 +770,22 @@ goto out; +kfd_topology_add_device_error: kfd_resume_error: device_iommu_error: +gws_error: device_queue_manager_uninit(kfd->dqm); device_queue_manager_error: kfd_interrupt_exit(kfd); kfd_interrupt_error: - kfd_topology_remove_device(kfd); -kfd_topology_add_device_error: kfd_doorbell_fini(kfd); kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: - kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); +alloc_gtt_mem_failure: + if (kfd->gws) + amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); dev_err(kfd_device, "device %x:%x NOT added due to errors\n", kfd->pdev->vendor, kfd->pdev->device); @@ -544,13 +796,16 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) { if (kfd->init_complete) { - kgd2kfd_suspend(kfd); + kgd2kfd_suspend(kfd, false); device_queue_manager_uninit(kfd->dqm); kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); kfd_doorbell_fini(kfd); + ida_destroy(&kfd->doorbell_ida); kfd_gtt_sa_fini(kfd); - kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); + if (kfd->gws) + amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); } kfree(kfd); @@ -560,10 +815,12 @@ { if (!kfd->init_complete) return 0; - kgd2kfd_suspend(kfd); - /* hold dqm->lock to prevent further execution*/ - dqm_lock(kfd->dqm); + kfd_smi_event_update_gpu_reset(kfd, false); + + kfd->dqm->ops.pre_reset(kfd->dqm); + + kgd2kfd_suspend(kfd, false); kfd_signal_reset_event(kfd); return 0; @@ -577,18 +834,20 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) { - int ret, count; + int ret; if (!kfd->init_complete) return 0; - dqm_unlock(kfd->dqm); - ret = kfd_resume(kfd); if (ret) return ret; - count = atomic_dec_return(&kfd_locked); - WARN_ONCE(count != 0, "KFD reset ref. error"); + atomic_dec(&kfd_locked); + + atomic_set(&kfd->sram_ecc_flag, 0); + + kfd_smi_event_update_gpu_reset(kfd, true); + return 0; } @@ -597,21 +856,23 @@ return (atomic_read(&kfd_locked) > 0); } -void kgd2kfd_suspend(struct kfd_dev *kfd) +void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) { if (!kfd->init_complete) return; - /* For first KFD device suspend all the KFD processes */ - if (atomic_inc_return(&kfd_locked) == 1) - kfd_suspend_all_processes(); + /* for runtime suspend, skip locking kfd */ + if (!run_pm) { + /* For first KFD device suspend all the KFD processes */ + if (atomic_inc_return(&kfd_locked) == 1) + kfd_suspend_all_processes(); + } kfd->dqm->ops.stop(kfd->dqm); - kfd_iommu_suspend(kfd); } -int kgd2kfd_resume(struct kfd_dev *kfd) +int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) { int ret, count; @@ -622,25 +883,32 @@ if (ret) return ret; - count = atomic_dec_return(&kfd_locked); - WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); - if (count == 0) - ret = kfd_resume_all_processes(); + /* for runtime resume, skip unlocking kfd */ + if (!run_pm) { + count = atomic_dec_return(&kfd_locked); + WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); + if (count == 0) + ret = kfd_resume_all_processes(); + } return ret; +} + +int kgd2kfd_resume_iommu(struct kfd_dev *kfd) +{ + int err = 0; + + err = kfd_iommu_resume(kfd); + if (err) + dev_err(kfd_device, + "Failed to resume IOMMU for device %x:%x\n", + kfd->pdev->vendor, kfd->pdev->device); + return err; } static int kfd_resume(struct kfd_dev *kfd) { int err = 0; - - err = kfd_iommu_resume(kfd); - if (err) { - dev_err(kfd_device, - "Failed to resume IOMMU for device %x:%x\n", - kfd->pdev->vendor, kfd->pdev->device); - return err; - } err = kfd->dqm->ops.start(kfd->dqm); if (err) { @@ -655,6 +923,21 @@ dqm_start_error: kfd_iommu_suspend(kfd); return err; +} + +static inline void kfd_queue_work(struct workqueue_struct *wq, + struct work_struct *work) +{ + int cpu, new_cpu; + + cpu = new_cpu = smp_processor_id(); + do { + new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids; + if (cpu_to_node(new_cpu) == numa_node_id()) + break; + } while (cpu != new_cpu); + + queue_work_on(new_cpu, wq, work); } /* This is called directly from KGD at ISR. */ @@ -679,7 +962,7 @@ patched_ihre, &is_patched) && enqueue_ih_ring_entry(kfd, is_patched ? patched_ihre : ih_ring_entry)) - queue_work(kfd->ih_wq, &kfd->interrupt_work); + kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work); spin_unlock_irqrestore(&kfd->interrupt_lock, flags); } @@ -697,6 +980,7 @@ if (!p) return -ESRCH; + WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); r = kfd_process_evict_queues(p); kfd_unref_process(p); @@ -764,6 +1048,8 @@ /* During process initialization eviction_work.dwork is initialized * to kfd_evict_bo_worker */ + WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies", + p->lead_thread->pid, delay_jiffies); schedule_delayed_work(&p->eviction_work, delay_jiffies); out: kfd_unref_process(p); @@ -952,6 +1238,33 @@ return 0; } +void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) +{ + if (kfd) + atomic_inc(&kfd->sram_ecc_flag); +} + +void kfd_inc_compute_active(struct kfd_dev *kfd) +{ + if (atomic_inc_return(&kfd->compute_profile) == 1) + amdgpu_amdkfd_set_compute_idle(kfd->kgd, false); +} + +void kfd_dec_compute_active(struct kfd_dev *kfd) +{ + int count = atomic_dec_return(&kfd->compute_profile); + + if (count == 0) + amdgpu_amdkfd_set_compute_idle(kfd->kgd, true); + WARN_ONCE(count < 0, "Compute profile ref. count error"); +} + +void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) +{ + if (kfd) + kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS -- Gitblit v1.6.2