From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB
---
kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 454 +++++++++++++++++++++++++++++++++++++-------------------
1 files changed, 302 insertions(+), 152 deletions(-)
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 5cf499a..2b31c30 100644
--- a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -36,6 +36,8 @@
#include "kfd_topology.h"
#include "kfd_device_queue_manager.h"
#include "kfd_iommu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_ras.h"
/* topology_device_list - Master list of all topology devices */
static struct list_head topology_device_list;
@@ -100,7 +102,25 @@
down_read(&topology_lock);
list_for_each_entry(top_dev, &topology_device_list, list)
- if (top_dev->gpu->pdev == pdev) {
+ if (top_dev->gpu && top_dev->gpu->pdev == pdev) {
+ device = top_dev->gpu;
+ break;
+ }
+
+ up_read(&topology_lock);
+
+ return device;
+}
+
+struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
+{
+ struct kfd_topology_device *top_dev;
+ struct kfd_dev *device = NULL;
+
+ down_read(&topology_lock);
+
+ list_for_each_entry(top_dev, &topology_device_list, list)
+ if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
device = top_dev->gpu;
break;
}
@@ -190,39 +210,41 @@
}
-#define sysfs_show_gen_prop(buffer, fmt, ...) \
- snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__)
-#define sysfs_show_32bit_prop(buffer, name, value) \
- sysfs_show_gen_prop(buffer, "%s %u\n", name, value)
-#define sysfs_show_64bit_prop(buffer, name, value) \
- sysfs_show_gen_prop(buffer, "%s %llu\n", name, value)
-#define sysfs_show_32bit_val(buffer, value) \
- sysfs_show_gen_prop(buffer, "%u\n", value)
-#define sysfs_show_str_val(buffer, value) \
- sysfs_show_gen_prop(buffer, "%s\n", value)
+#define sysfs_show_gen_prop(buffer, offs, fmt, ...) \
+ (offs += snprintf(buffer+offs, PAGE_SIZE-offs, \
+ fmt, __VA_ARGS__))
+#define sysfs_show_32bit_prop(buffer, offs, name, value) \
+ sysfs_show_gen_prop(buffer, offs, "%s %u\n", name, value)
+#define sysfs_show_64bit_prop(buffer, offs, name, value) \
+ sysfs_show_gen_prop(buffer, offs, "%s %llu\n", name, value)
+#define sysfs_show_32bit_val(buffer, offs, value) \
+ sysfs_show_gen_prop(buffer, offs, "%u\n", value)
+#define sysfs_show_str_val(buffer, offs, value) \
+ sysfs_show_gen_prop(buffer, offs, "%s\n", value)
static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
- ssize_t ret;
+ int offs = 0;
/* Making sure that the buffer is an empty string */
buffer[0] = 0;
if (attr == &sys_props.attr_genid) {
- ret = sysfs_show_32bit_val(buffer, sys_props.generation_count);
+ sysfs_show_32bit_val(buffer, offs,
+ sys_props.generation_count);
} else if (attr == &sys_props.attr_props) {
- sysfs_show_64bit_prop(buffer, "platform_oem",
- sys_props.platform_oem);
- sysfs_show_64bit_prop(buffer, "platform_id",
- sys_props.platform_id);
- ret = sysfs_show_64bit_prop(buffer, "platform_rev",
- sys_props.platform_rev);
+ sysfs_show_64bit_prop(buffer, offs, "platform_oem",
+ sys_props.platform_oem);
+ sysfs_show_64bit_prop(buffer, offs, "platform_id",
+ sys_props.platform_id);
+ sysfs_show_64bit_prop(buffer, offs, "platform_rev",
+ sys_props.platform_rev);
} else {
- ret = -EINVAL;
+ offs = -EINVAL;
}
- return ret;
+ return offs;
}
static void kfd_topology_kobj_release(struct kobject *kobj)
@@ -242,28 +264,32 @@
static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
- ssize_t ret;
+ int offs = 0;
struct kfd_iolink_properties *iolink;
/* Making sure that the buffer is an empty string */
buffer[0] = 0;
iolink = container_of(attr, struct kfd_iolink_properties, attr);
- sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type);
- sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj);
- sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min);
- sysfs_show_32bit_prop(buffer, "node_from", iolink->node_from);
- sysfs_show_32bit_prop(buffer, "node_to", iolink->node_to);
- sysfs_show_32bit_prop(buffer, "weight", iolink->weight);
- sysfs_show_32bit_prop(buffer, "min_latency", iolink->min_latency);
- sysfs_show_32bit_prop(buffer, "max_latency", iolink->max_latency);
- sysfs_show_32bit_prop(buffer, "min_bandwidth", iolink->min_bandwidth);
- sysfs_show_32bit_prop(buffer, "max_bandwidth", iolink->max_bandwidth);
- sysfs_show_32bit_prop(buffer, "recommended_transfer_size",
- iolink->rec_transfer_size);
- ret = sysfs_show_32bit_prop(buffer, "flags", iolink->flags);
+ if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu))
+ return -EPERM;
+ sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type);
+ sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj);
+ sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min);
+ sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from);
+ sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to);
+ sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight);
+ sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency);
+ sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency);
+ sysfs_show_32bit_prop(buffer, offs, "min_bandwidth",
+ iolink->min_bandwidth);
+ sysfs_show_32bit_prop(buffer, offs, "max_bandwidth",
+ iolink->max_bandwidth);
+ sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size",
+ iolink->rec_transfer_size);
+ sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags);
- return ret;
+ return offs;
}
static const struct sysfs_ops iolink_ops = {
@@ -278,20 +304,24 @@
static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
- ssize_t ret;
+ int offs = 0;
struct kfd_mem_properties *mem;
/* Making sure that the buffer is an empty string */
buffer[0] = 0;
mem = container_of(attr, struct kfd_mem_properties, attr);
- sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type);
- sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes);
- sysfs_show_32bit_prop(buffer, "flags", mem->flags);
- sysfs_show_32bit_prop(buffer, "width", mem->width);
- ret = sysfs_show_32bit_prop(buffer, "mem_clk_max", mem->mem_clk_max);
+ if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu))
+ return -EPERM;
+ sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type);
+ sysfs_show_64bit_prop(buffer, offs, "size_in_bytes",
+ mem->size_in_bytes);
+ sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags);
+ sysfs_show_32bit_prop(buffer, offs, "width", mem->width);
+ sysfs_show_32bit_prop(buffer, offs, "mem_clk_max",
+ mem->mem_clk_max);
- return ret;
+ return offs;
}
static const struct sysfs_ops mem_ops = {
@@ -306,7 +336,7 @@
static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
- ssize_t ret;
+ int offs = 0;
uint32_t i, j;
struct kfd_cache_properties *cache;
@@ -314,30 +344,29 @@
buffer[0] = 0;
cache = container_of(attr, struct kfd_cache_properties, attr);
- sysfs_show_32bit_prop(buffer, "processor_id_low",
+ if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
+ return -EPERM;
+ sysfs_show_32bit_prop(buffer, offs, "processor_id_low",
cache->processor_id_low);
- sysfs_show_32bit_prop(buffer, "level", cache->cache_level);
- sysfs_show_32bit_prop(buffer, "size", cache->cache_size);
- sysfs_show_32bit_prop(buffer, "cache_line_size", cache->cacheline_size);
- sysfs_show_32bit_prop(buffer, "cache_lines_per_tag",
- cache->cachelines_per_tag);
- sysfs_show_32bit_prop(buffer, "association", cache->cache_assoc);
- sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency);
- sysfs_show_32bit_prop(buffer, "type", cache->cache_type);
- snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer);
+ sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level);
+ sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size);
+ sysfs_show_32bit_prop(buffer, offs, "cache_line_size",
+ cache->cacheline_size);
+ sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag",
+ cache->cachelines_per_tag);
+ sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
+ sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
+ sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
+ offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
- for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) {
+ for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
/* Check each bit */
- if (cache->sibling_map[i] & (1 << j))
- ret = snprintf(buffer, PAGE_SIZE,
- "%s%d%s", buffer, 1, ",");
- else
- ret = snprintf(buffer, PAGE_SIZE,
- "%s%d%s", buffer, 0, ",");
- }
+ offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
+ (cache->sibling_map[i] >> j) & 1);
+
/* Replace the last "," with end of line */
- *(buffer + strlen(buffer) - 1) = 0xA;
- return ret;
+ buffer[offs-1] = '\n';
+ return offs;
}
static const struct sysfs_ops cache_ops = {
@@ -359,6 +388,7 @@
static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
char *buf)
{
+ int offs = 0;
struct kfd_perf_attr *attr;
buf[0] = 0;
@@ -366,7 +396,7 @@
if (!attr->data) /* invalid data for PMC */
return 0;
else
- return sysfs_show_32bit_val(buf, attr->data);
+ return sysfs_show_32bit_val(buf, offs, attr->data);
}
#define KFD_PERF_DESC(_name, _data) \
@@ -385,9 +415,8 @@
static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
+ int offs = 0;
struct kfd_topology_device *dev;
- char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
- uint32_t i;
uint32_t log_max_watch_addr;
/* Making sure that the buffer is an empty string */
@@ -396,64 +425,80 @@
if (strcmp(attr->name, "gpu_id") == 0) {
dev = container_of(attr, struct kfd_topology_device,
attr_gpuid);
- return sysfs_show_32bit_val(buffer, dev->gpu_id);
+ if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+ return -EPERM;
+ return sysfs_show_32bit_val(buffer, offs, dev->gpu_id);
}
if (strcmp(attr->name, "name") == 0) {
dev = container_of(attr, struct kfd_topology_device,
attr_name);
- for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) {
- public_name[i] =
- (char)dev->node_props.marketing_name[i];
- if (dev->node_props.marketing_name[i] == 0)
- break;
- }
- public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0;
- return sysfs_show_str_val(buffer, public_name);
+
+ if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+ return -EPERM;
+ return sysfs_show_str_val(buffer, offs, dev->node_props.name);
}
dev = container_of(attr, struct kfd_topology_device,
attr_props);
- sysfs_show_32bit_prop(buffer, "cpu_cores_count",
- dev->node_props.cpu_cores_count);
- sysfs_show_32bit_prop(buffer, "simd_count",
- dev->node_props.simd_count);
- sysfs_show_32bit_prop(buffer, "mem_banks_count",
- dev->node_props.mem_banks_count);
- sysfs_show_32bit_prop(buffer, "caches_count",
- dev->node_props.caches_count);
- sysfs_show_32bit_prop(buffer, "io_links_count",
- dev->node_props.io_links_count);
- sysfs_show_32bit_prop(buffer, "cpu_core_id_base",
- dev->node_props.cpu_core_id_base);
- sysfs_show_32bit_prop(buffer, "simd_id_base",
- dev->node_props.simd_id_base);
- sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
- dev->node_props.max_waves_per_simd);
- sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
- dev->node_props.lds_size_in_kb);
- sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
- dev->node_props.gds_size_in_kb);
- sysfs_show_32bit_prop(buffer, "wave_front_size",
- dev->node_props.wave_front_size);
- sysfs_show_32bit_prop(buffer, "array_count",
- dev->node_props.array_count);
- sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine",
- dev->node_props.simd_arrays_per_engine);
- sysfs_show_32bit_prop(buffer, "cu_per_simd_array",
- dev->node_props.cu_per_simd_array);
- sysfs_show_32bit_prop(buffer, "simd_per_cu",
- dev->node_props.simd_per_cu);
- sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
- dev->node_props.max_slots_scratch_cu);
- sysfs_show_32bit_prop(buffer, "vendor_id",
- dev->node_props.vendor_id);
- sysfs_show_32bit_prop(buffer, "device_id",
- dev->node_props.device_id);
- sysfs_show_32bit_prop(buffer, "location_id",
- dev->node_props.location_id);
- sysfs_show_32bit_prop(buffer, "drm_render_minor",
- dev->node_props.drm_render_minor);
+ if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+ return -EPERM;
+ sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
+ dev->node_props.cpu_cores_count);
+ sysfs_show_32bit_prop(buffer, offs, "simd_count",
+ dev->gpu ? dev->node_props.simd_count : 0);
+ sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
+ dev->node_props.mem_banks_count);
+ sysfs_show_32bit_prop(buffer, offs, "caches_count",
+ dev->node_props.caches_count);
+ sysfs_show_32bit_prop(buffer, offs, "io_links_count",
+ dev->node_props.io_links_count);
+ sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base",
+ dev->node_props.cpu_core_id_base);
+ sysfs_show_32bit_prop(buffer, offs, "simd_id_base",
+ dev->node_props.simd_id_base);
+ sysfs_show_32bit_prop(buffer, offs, "max_waves_per_simd",
+ dev->node_props.max_waves_per_simd);
+ sysfs_show_32bit_prop(buffer, offs, "lds_size_in_kb",
+ dev->node_props.lds_size_in_kb);
+ sysfs_show_32bit_prop(buffer, offs, "gds_size_in_kb",
+ dev->node_props.gds_size_in_kb);
+ sysfs_show_32bit_prop(buffer, offs, "num_gws",
+ dev->node_props.num_gws);
+ sysfs_show_32bit_prop(buffer, offs, "wave_front_size",
+ dev->node_props.wave_front_size);
+ sysfs_show_32bit_prop(buffer, offs, "array_count",
+ dev->node_props.array_count);
+ sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine",
+ dev->node_props.simd_arrays_per_engine);
+ sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array",
+ dev->node_props.cu_per_simd_array);
+ sysfs_show_32bit_prop(buffer, offs, "simd_per_cu",
+ dev->node_props.simd_per_cu);
+ sysfs_show_32bit_prop(buffer, offs, "max_slots_scratch_cu",
+ dev->node_props.max_slots_scratch_cu);
+ sysfs_show_32bit_prop(buffer, offs, "vendor_id",
+ dev->node_props.vendor_id);
+ sysfs_show_32bit_prop(buffer, offs, "device_id",
+ dev->node_props.device_id);
+ sysfs_show_32bit_prop(buffer, offs, "location_id",
+ dev->node_props.location_id);
+ sysfs_show_32bit_prop(buffer, offs, "domain",
+ dev->node_props.domain);
+ sysfs_show_32bit_prop(buffer, offs, "drm_render_minor",
+ dev->node_props.drm_render_minor);
+ sysfs_show_64bit_prop(buffer, offs, "hive_id",
+ dev->node_props.hive_id);
+ sysfs_show_32bit_prop(buffer, offs, "num_sdma_engines",
+ dev->node_props.num_sdma_engines);
+ sysfs_show_32bit_prop(buffer, offs, "num_sdma_xgmi_engines",
+ dev->node_props.num_sdma_xgmi_engines);
+ sysfs_show_32bit_prop(buffer, offs, "num_sdma_queues_per_engine",
+ dev->node_props.num_sdma_queues_per_engine);
+ sysfs_show_32bit_prop(buffer, offs, "num_cp_queues",
+ dev->node_props.num_cp_queues);
+ sysfs_show_64bit_prop(buffer, offs, "unique_id",
+ dev->node_props.unique_id);
if (dev->gpu) {
log_max_watch_addr =
@@ -473,22 +518,21 @@
dev->node_props.capability |=
HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
- sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
+ sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute",
dev->node_props.max_engine_clk_fcompute);
- sysfs_show_64bit_prop(buffer, "local_mem_size",
- (unsigned long long int) 0);
+ sysfs_show_64bit_prop(buffer, offs, "local_mem_size", 0ULL);
- sysfs_show_32bit_prop(buffer, "fw_version",
- dev->gpu->kfd2kgd->get_fw_version(
- dev->gpu->kgd,
- KGD_ENGINE_MEC1));
- sysfs_show_32bit_prop(buffer, "capability",
- dev->node_props.capability);
+ sysfs_show_32bit_prop(buffer, offs, "fw_version",
+ dev->gpu->mec_fw_version);
+ sysfs_show_32bit_prop(buffer, offs, "capability",
+ dev->node_props.capability);
+ sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
+ dev->gpu->sdma_fw_version);
}
- return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
- cpufreq_quick_get_max(0)/1000);
+ return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute",
+ cpufreq_quick_get_max(0)/1000);
}
static const struct sysfs_ops node_ops = {
@@ -757,7 +801,6 @@
{
int ret;
- pr_info("Creating topology SYSFS entries\n");
if (!sys_props.kobj_topology) {
sys_props.kobj_topology =
kfd_alloc_struct(sys_props.kobj_topology);
@@ -1020,7 +1063,6 @@
sys_props.generation_count++;
kfd_update_system_properties();
kfd_debug_print_topology();
- pr_info("Finished initializing topology\n");
} else
pr_err("Failed to update topology in sysfs ret=%d\n", ret);
@@ -1060,14 +1102,15 @@
if (!gpu)
return 0;
- gpu->kfd2kgd->get_local_mem_info(gpu->kgd, &local_mem_info);
+ amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info);
local_mem_size = local_mem_info.local_mem_size_private +
local_mem_info.local_mem_size_public;
buf[0] = gpu->pdev->devfn;
- buf[1] = gpu->pdev->subsystem_vendor;
- buf[2] = gpu->pdev->subsystem_device;
+ buf[1] = gpu->pdev->subsystem_vendor |
+ (gpu->pdev->subsystem_device << 16);
+ buf[2] = pci_domain_nr(gpu->pdev->bus);
buf[3] = gpu->pdev->device;
buf[4] = gpu->pdev->bus->number;
buf[5] = lower_32_bits(local_mem_size);
@@ -1087,19 +1130,29 @@
{
struct kfd_topology_device *dev;
struct kfd_topology_device *out_dev = NULL;
+ struct kfd_mem_properties *mem;
+ struct kfd_cache_properties *cache;
+ struct kfd_iolink_properties *iolink;
down_write(&topology_lock);
list_for_each_entry(dev, &topology_device_list, list) {
/* Discrete GPUs need their own topology device list
* entries. Don't assign them to CPU/APU nodes.
*/
- if (!gpu->device_info->needs_iommu_device &&
+ if (!gpu->use_iommu_v2 &&
dev->node_props.cpu_cores_count)
continue;
if (!dev->gpu && (dev->node_props.simd_count > 0)) {
dev->gpu = gpu;
out_dev = dev;
+
+ list_for_each_entry(mem, &dev->mem_props, list)
+ mem->gpu = dev->gpu;
+ list_for_each_entry(cache, &dev->cache_props, list)
+ cache->gpu = dev->gpu;
+ list_for_each_entry(iolink, &dev->io_link_props, list)
+ iolink->gpu = dev->gpu;
break;
}
}
@@ -1132,8 +1185,7 @@
* for APUs - If CRAT from ACPI reports more than one bank, then
* all the banks will report the same mem_clk_max information
*/
- dev->gpu->kfd2kgd->get_local_mem_info(dev->gpu->kgd,
- &local_mem_info);
+ amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info);
list_for_each_entry(mem, &dev->mem_props, list)
mem->mem_clk_max = local_mem_info.mem_clk_max;
@@ -1141,17 +1193,40 @@
static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
{
- struct kfd_iolink_properties *link;
+ struct kfd_iolink_properties *link, *cpu_link;
+ struct kfd_topology_device *cpu_dev;
+ uint32_t cap;
+ uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED;
+ uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED;
if (!dev || !dev->gpu)
return;
- /* GPU only creates direck links so apply flags setting to all */
- if (dev->gpu->device_info->asic_family == CHIP_HAWAII)
- list_for_each_entry(link, &dev->io_link_props, list)
- link->flags = CRAT_IOLINK_FLAGS_ENABLED |
- CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
- CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+ pcie_capability_read_dword(dev->gpu->pdev,
+ PCI_EXP_DEVCAP2, &cap);
+
+ if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
+ cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+ CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+
+ if (!dev->gpu->pci_atomic_requested ||
+ dev->gpu->device_info->asic_family == CHIP_HAWAII)
+ flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+ CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+
+ /* GPU only creates direct links so apply flags setting to all */
+ list_for_each_entry(link, &dev->io_link_props, list) {
+ link->flags = flag;
+ cpu_dev = kfd_topology_device_by_proximity_domain(
+ link->node_to);
+ if (cpu_dev) {
+ list_for_each_entry(cpu_link,
+ &cpu_dev->io_link_props, list)
+ if (cpu_link->node_to == link->node_from)
+ cpu_link->flags = cpu_flag;
+ }
+ }
}
int kfd_topology_add_device(struct kfd_dev *gpu)
@@ -1164,6 +1239,7 @@
void *crat_image = NULL;
size_t image_size = 0;
int proximity_domain;
+ struct amdgpu_device *adev;
INIT_LIST_HEAD(&temp_topology_device_list);
@@ -1231,20 +1307,40 @@
* needed for the topology
*/
- dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info);
+ amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info);
+
+ strncpy(dev->node_props.name, gpu->device_info->asic_name,
+ KFD_TOPOLOGY_PUBLIC_NAME_SIZE);
+
dev->node_props.simd_arrays_per_engine =
cu_info.num_shader_arrays_per_engine;
dev->node_props.vendor_id = gpu->pdev->vendor;
dev->node_props.device_id = gpu->pdev->device;
- dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number,
- gpu->pdev->devfn);
+ dev->node_props.capability |=
+ ((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) <<
+ HSA_CAP_ASIC_REVISION_SHIFT) &
+ HSA_CAP_ASIC_REVISION_MASK);
+ dev->node_props.location_id = pci_dev_id(gpu->pdev);
+ dev->node_props.domain = pci_domain_nr(gpu->pdev->bus);
dev->node_props.max_engine_clk_fcompute =
- dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd);
+ amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd);
dev->node_props.max_engine_clk_ccompute =
cpufreq_quick_get_max(0) / 1000;
dev->node_props.drm_render_minor =
gpu->shared_resources.drm_render_minor;
+
+ dev->node_props.hive_id = gpu->hive_id;
+ dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
+ dev->node_props.num_sdma_xgmi_engines =
+ gpu->device_info->num_xgmi_sdma_engines;
+ dev->node_props.num_sdma_queues_per_engine =
+ gpu->device_info->num_sdma_queues_per_engine;
+ dev->node_props.num_gws = (dev->gpu->gws &&
+ dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
+ amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
+ dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);
+ dev->node_props.unique_id = gpu->unique_id;
kfd_fill_mem_clk_max_info(dev);
kfd_fill_iolink_non_crat_info(dev);
@@ -1261,13 +1357,24 @@
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
pr_debug("Adding doorbell packet type capability\n");
dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
break;
case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
+ case CHIP_RENOIR:
+ case CHIP_ARCTURUS:
+ case CHIP_NAVI10:
+ case CHIP_NAVI12:
+ case CHIP_NAVI14:
+ case CHIP_SIENNA_CICHLID:
+ case CHIP_NAVY_FLOUNDER:
dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
@@ -1277,18 +1384,37 @@
dev->gpu->device_info->asic_family);
}
+ /*
+ * Overwrite ATS capability according to needs_iommu_device to fix
+ * potential missing corresponding bit in CRAT of BIOS.
+ */
+ if (dev->gpu->use_iommu_v2)
+ dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
+ else
+ dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
+
/* Fix errors in CZ CRAT.
* simd_count: Carrizo CRAT reports wrong simd_count, probably
* because it doesn't consider masked out CUs
* max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
- * capability flag: Carrizo CRAT doesn't report IOMMU flags
*/
if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
dev->node_props.simd_count =
cu_info.simd_per_cu * cu_info.cu_active_number;
dev->node_props.max_waves_per_simd = 10;
- dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
}
+
+ adev = (struct amdgpu_device *)(dev->gpu->kgd);
+ /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
+ dev->node_props.capability |=
+ ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
+ HSA_CAP_SRAM_EDCSUPPORTED : 0;
+ dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
+ HSA_CAP_MEM_EDCSUPPORTED : 0;
+
+ if (adev->asic_type != CHIP_VEGA10)
+ dev->node_props.capability |= (adev->ras_features != 0) ?
+ HSA_CAP_RASEVENTNOTIFY : 0;
kfd_debug_print_topology();
@@ -1360,7 +1486,6 @@
static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
{
- const struct cpuinfo_x86 *cpuinfo;
int first_cpu_of_numa_node;
if (!cpumask || cpumask == cpu_none_mask)
@@ -1368,9 +1493,11 @@
first_cpu_of_numa_node = cpumask_first(cpumask);
if (first_cpu_of_numa_node >= nr_cpu_ids)
return -1;
- cpuinfo = &cpu_data(first_cpu_of_numa_node);
-
- return cpuinfo->apicid;
+#ifdef CONFIG_X86_64
+ return cpu_data(first_cpu_of_numa_node).apicid;
+#else
+ return first_cpu_of_numa_node;
+#endif
}
/* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
@@ -1386,6 +1513,29 @@
return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
}
+void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
+{
+ struct kfd_topology_device *dev;
+
+ gpu->use_iommu_v2 = false;
+
+ if (!gpu->device_info->needs_iommu_device)
+ return;
+
+ down_read(&topology_lock);
+
+ /* Only use IOMMUv2 if there is an APU topology node with no GPU
+ * assigned yet. This GPU will be assigned to it.
+ */
+ list_for_each_entry(dev, &topology_device_list, list)
+ if (dev->node_props.cpu_cores_count &&
+ dev->node_props.simd_count &&
+ !dev->gpu)
+ gpu->use_iommu_v2 = true;
+
+ up_read(&topology_lock);
+}
+
#if defined(CONFIG_DEBUG_FS)
int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
--
Gitblit v1.6.2