From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB

---
 kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.c |  454 +++++++++++++++++++++++++++++++++++++-------------------
 1 files changed, 302 insertions(+), 152 deletions(-)

diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 5cf499a..2b31c30 100644
--- a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -36,6 +36,8 @@
 #include "kfd_topology.h"
 #include "kfd_device_queue_manager.h"
 #include "kfd_iommu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_ras.h"
 
 /* topology_device_list - Master list of all topology devices */
 static struct list_head topology_device_list;
@@ -100,7 +102,25 @@
 	down_read(&topology_lock);
 
 	list_for_each_entry(top_dev, &topology_device_list, list)
-		if (top_dev->gpu->pdev == pdev) {
+		if (top_dev->gpu && top_dev->gpu->pdev == pdev) {
+			device = top_dev->gpu;
+			break;
+		}
+
+	up_read(&topology_lock);
+
+	return device;
+}
+
+struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
+{
+	struct kfd_topology_device *top_dev;
+	struct kfd_dev *device = NULL;
+
+	down_read(&topology_lock);
+
+	list_for_each_entry(top_dev, &topology_device_list, list)
+		if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
 			device = top_dev->gpu;
 			break;
 		}
@@ -190,39 +210,41 @@
 }
 
 
-#define sysfs_show_gen_prop(buffer, fmt, ...) \
-		snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__)
-#define sysfs_show_32bit_prop(buffer, name, value) \
-		sysfs_show_gen_prop(buffer, "%s %u\n", name, value)
-#define sysfs_show_64bit_prop(buffer, name, value) \
-		sysfs_show_gen_prop(buffer, "%s %llu\n", name, value)
-#define sysfs_show_32bit_val(buffer, value) \
-		sysfs_show_gen_prop(buffer, "%u\n", value)
-#define sysfs_show_str_val(buffer, value) \
-		sysfs_show_gen_prop(buffer, "%s\n", value)
+#define sysfs_show_gen_prop(buffer, offs, fmt, ...)		\
+		(offs += snprintf(buffer+offs, PAGE_SIZE-offs,	\
+				  fmt, __VA_ARGS__))
+#define sysfs_show_32bit_prop(buffer, offs, name, value) \
+		sysfs_show_gen_prop(buffer, offs, "%s %u\n", name, value)
+#define sysfs_show_64bit_prop(buffer, offs, name, value) \
+		sysfs_show_gen_prop(buffer, offs, "%s %llu\n", name, value)
+#define sysfs_show_32bit_val(buffer, offs, value) \
+		sysfs_show_gen_prop(buffer, offs, "%u\n", value)
+#define sysfs_show_str_val(buffer, offs, value) \
+		sysfs_show_gen_prop(buffer, offs, "%s\n", value)
 
 static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
 		char *buffer)
 {
-	ssize_t ret;
+	int offs = 0;
 
 	/* Making sure that the buffer is an empty string */
 	buffer[0] = 0;
 
 	if (attr == &sys_props.attr_genid) {
-		ret = sysfs_show_32bit_val(buffer, sys_props.generation_count);
+		sysfs_show_32bit_val(buffer, offs,
+				     sys_props.generation_count);
 	} else if (attr == &sys_props.attr_props) {
-		sysfs_show_64bit_prop(buffer, "platform_oem",
-				sys_props.platform_oem);
-		sysfs_show_64bit_prop(buffer, "platform_id",
-				sys_props.platform_id);
-		ret = sysfs_show_64bit_prop(buffer, "platform_rev",
-				sys_props.platform_rev);
+		sysfs_show_64bit_prop(buffer, offs, "platform_oem",
+				      sys_props.platform_oem);
+		sysfs_show_64bit_prop(buffer, offs, "platform_id",
+				      sys_props.platform_id);
+		sysfs_show_64bit_prop(buffer, offs, "platform_rev",
+				      sys_props.platform_rev);
 	} else {
-		ret = -EINVAL;
+		offs = -EINVAL;
 	}
 
-	return ret;
+	return offs;
 }
 
 static void kfd_topology_kobj_release(struct kobject *kobj)
@@ -242,28 +264,32 @@
 static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
 		char *buffer)
 {
-	ssize_t ret;
+	int offs = 0;
 	struct kfd_iolink_properties *iolink;
 
 	/* Making sure that the buffer is an empty string */
 	buffer[0] = 0;
 
 	iolink = container_of(attr, struct kfd_iolink_properties, attr);
-	sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type);
-	sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj);
-	sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min);
-	sysfs_show_32bit_prop(buffer, "node_from", iolink->node_from);
-	sysfs_show_32bit_prop(buffer, "node_to", iolink->node_to);
-	sysfs_show_32bit_prop(buffer, "weight", iolink->weight);
-	sysfs_show_32bit_prop(buffer, "min_latency", iolink->min_latency);
-	sysfs_show_32bit_prop(buffer, "max_latency", iolink->max_latency);
-	sysfs_show_32bit_prop(buffer, "min_bandwidth", iolink->min_bandwidth);
-	sysfs_show_32bit_prop(buffer, "max_bandwidth", iolink->max_bandwidth);
-	sysfs_show_32bit_prop(buffer, "recommended_transfer_size",
-			iolink->rec_transfer_size);
-	ret = sysfs_show_32bit_prop(buffer, "flags", iolink->flags);
+	if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu))
+		return -EPERM;
+	sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type);
+	sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj);
+	sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min);
+	sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from);
+	sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to);
+	sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight);
+	sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency);
+	sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency);
+	sysfs_show_32bit_prop(buffer, offs, "min_bandwidth",
+			      iolink->min_bandwidth);
+	sysfs_show_32bit_prop(buffer, offs, "max_bandwidth",
+			      iolink->max_bandwidth);
+	sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size",
+			      iolink->rec_transfer_size);
+	sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags);
 
-	return ret;
+	return offs;
 }
 
 static const struct sysfs_ops iolink_ops = {
@@ -278,20 +304,24 @@
 static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
 		char *buffer)
 {
-	ssize_t ret;
+	int offs = 0;
 	struct kfd_mem_properties *mem;
 
 	/* Making sure that the buffer is an empty string */
 	buffer[0] = 0;
 
 	mem = container_of(attr, struct kfd_mem_properties, attr);
-	sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type);
-	sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes);
-	sysfs_show_32bit_prop(buffer, "flags", mem->flags);
-	sysfs_show_32bit_prop(buffer, "width", mem->width);
-	ret = sysfs_show_32bit_prop(buffer, "mem_clk_max", mem->mem_clk_max);
+	if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu))
+		return -EPERM;
+	sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type);
+	sysfs_show_64bit_prop(buffer, offs, "size_in_bytes",
+			      mem->size_in_bytes);
+	sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags);
+	sysfs_show_32bit_prop(buffer, offs, "width", mem->width);
+	sysfs_show_32bit_prop(buffer, offs, "mem_clk_max",
+			      mem->mem_clk_max);
 
-	return ret;
+	return offs;
 }
 
 static const struct sysfs_ops mem_ops = {
@@ -306,7 +336,7 @@
 static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
 		char *buffer)
 {
-	ssize_t ret;
+	int offs = 0;
 	uint32_t i, j;
 	struct kfd_cache_properties *cache;
 
@@ -314,30 +344,29 @@
 	buffer[0] = 0;
 
 	cache = container_of(attr, struct kfd_cache_properties, attr);
-	sysfs_show_32bit_prop(buffer, "processor_id_low",
+	if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
+		return -EPERM;
+	sysfs_show_32bit_prop(buffer, offs, "processor_id_low",
 			cache->processor_id_low);
-	sysfs_show_32bit_prop(buffer, "level", cache->cache_level);
-	sysfs_show_32bit_prop(buffer, "size", cache->cache_size);
-	sysfs_show_32bit_prop(buffer, "cache_line_size", cache->cacheline_size);
-	sysfs_show_32bit_prop(buffer, "cache_lines_per_tag",
-			cache->cachelines_per_tag);
-	sysfs_show_32bit_prop(buffer, "association", cache->cache_assoc);
-	sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency);
-	sysfs_show_32bit_prop(buffer, "type", cache->cache_type);
-	snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer);
+	sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level);
+	sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size);
+	sysfs_show_32bit_prop(buffer, offs, "cache_line_size",
+			      cache->cacheline_size);
+	sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag",
+			      cache->cachelines_per_tag);
+	sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
+	sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
+	sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
+	offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
 	for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
-		for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) {
+		for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
 			/* Check each bit */
-			if (cache->sibling_map[i] & (1 << j))
-				ret = snprintf(buffer, PAGE_SIZE,
-					 "%s%d%s", buffer, 1, ",");
-			else
-				ret = snprintf(buffer, PAGE_SIZE,
-					 "%s%d%s", buffer, 0, ",");
-		}
+			offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
+					 (cache->sibling_map[i] >> j) & 1);
+
 	/* Replace the last "," with end of line */
-	*(buffer + strlen(buffer) - 1) = 0xA;
-	return ret;
+	buffer[offs-1] = '\n';
+	return offs;
 }
 
 static const struct sysfs_ops cache_ops = {
@@ -359,6 +388,7 @@
 static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
 			char *buf)
 {
+	int offs = 0;
 	struct kfd_perf_attr *attr;
 
 	buf[0] = 0;
@@ -366,7 +396,7 @@
 	if (!attr->data) /* invalid data for PMC */
 		return 0;
 	else
-		return sysfs_show_32bit_val(buf, attr->data);
+		return sysfs_show_32bit_val(buf, offs, attr->data);
 }
 
 #define KFD_PERF_DESC(_name, _data)			\
@@ -385,9 +415,8 @@
 static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 		char *buffer)
 {
+	int offs = 0;
 	struct kfd_topology_device *dev;
-	char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
-	uint32_t i;
 	uint32_t log_max_watch_addr;
 
 	/* Making sure that the buffer is an empty string */
@@ -396,64 +425,80 @@
 	if (strcmp(attr->name, "gpu_id") == 0) {
 		dev = container_of(attr, struct kfd_topology_device,
 				attr_gpuid);
-		return sysfs_show_32bit_val(buffer, dev->gpu_id);
+		if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+			return -EPERM;
+		return sysfs_show_32bit_val(buffer, offs, dev->gpu_id);
 	}
 
 	if (strcmp(attr->name, "name") == 0) {
 		dev = container_of(attr, struct kfd_topology_device,
 				attr_name);
-		for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) {
-			public_name[i] =
-					(char)dev->node_props.marketing_name[i];
-			if (dev->node_props.marketing_name[i] == 0)
-				break;
-		}
-		public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0;
-		return sysfs_show_str_val(buffer, public_name);
+
+		if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+			return -EPERM;
+		return sysfs_show_str_val(buffer, offs, dev->node_props.name);
 	}
 
 	dev = container_of(attr, struct kfd_topology_device,
 			attr_props);
-	sysfs_show_32bit_prop(buffer, "cpu_cores_count",
-			dev->node_props.cpu_cores_count);
-	sysfs_show_32bit_prop(buffer, "simd_count",
-			dev->node_props.simd_count);
-	sysfs_show_32bit_prop(buffer, "mem_banks_count",
-			dev->node_props.mem_banks_count);
-	sysfs_show_32bit_prop(buffer, "caches_count",
-			dev->node_props.caches_count);
-	sysfs_show_32bit_prop(buffer, "io_links_count",
-			dev->node_props.io_links_count);
-	sysfs_show_32bit_prop(buffer, "cpu_core_id_base",
-			dev->node_props.cpu_core_id_base);
-	sysfs_show_32bit_prop(buffer, "simd_id_base",
-			dev->node_props.simd_id_base);
-	sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
-			dev->node_props.max_waves_per_simd);
-	sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
-			dev->node_props.lds_size_in_kb);
-	sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
-			dev->node_props.gds_size_in_kb);
-	sysfs_show_32bit_prop(buffer, "wave_front_size",
-			dev->node_props.wave_front_size);
-	sysfs_show_32bit_prop(buffer, "array_count",
-			dev->node_props.array_count);
-	sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine",
-			dev->node_props.simd_arrays_per_engine);
-	sysfs_show_32bit_prop(buffer, "cu_per_simd_array",
-			dev->node_props.cu_per_simd_array);
-	sysfs_show_32bit_prop(buffer, "simd_per_cu",
-			dev->node_props.simd_per_cu);
-	sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
-			dev->node_props.max_slots_scratch_cu);
-	sysfs_show_32bit_prop(buffer, "vendor_id",
-			dev->node_props.vendor_id);
-	sysfs_show_32bit_prop(buffer, "device_id",
-			dev->node_props.device_id);
-	sysfs_show_32bit_prop(buffer, "location_id",
-			dev->node_props.location_id);
-	sysfs_show_32bit_prop(buffer, "drm_render_minor",
-			dev->node_props.drm_render_minor);
+	if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+		return -EPERM;
+	sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
+			      dev->node_props.cpu_cores_count);
+	sysfs_show_32bit_prop(buffer, offs, "simd_count",
+			      dev->gpu ? dev->node_props.simd_count : 0);
+	sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
+			      dev->node_props.mem_banks_count);
+	sysfs_show_32bit_prop(buffer, offs, "caches_count",
+			      dev->node_props.caches_count);
+	sysfs_show_32bit_prop(buffer, offs, "io_links_count",
+			      dev->node_props.io_links_count);
+	sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base",
+			      dev->node_props.cpu_core_id_base);
+	sysfs_show_32bit_prop(buffer, offs, "simd_id_base",
+			      dev->node_props.simd_id_base);
+	sysfs_show_32bit_prop(buffer, offs, "max_waves_per_simd",
+			      dev->node_props.max_waves_per_simd);
+	sysfs_show_32bit_prop(buffer, offs, "lds_size_in_kb",
+			      dev->node_props.lds_size_in_kb);
+	sysfs_show_32bit_prop(buffer, offs, "gds_size_in_kb",
+			      dev->node_props.gds_size_in_kb);
+	sysfs_show_32bit_prop(buffer, offs, "num_gws",
+			      dev->node_props.num_gws);
+	sysfs_show_32bit_prop(buffer, offs, "wave_front_size",
+			      dev->node_props.wave_front_size);
+	sysfs_show_32bit_prop(buffer, offs, "array_count",
+			      dev->node_props.array_count);
+	sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine",
+			      dev->node_props.simd_arrays_per_engine);
+	sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array",
+			      dev->node_props.cu_per_simd_array);
+	sysfs_show_32bit_prop(buffer, offs, "simd_per_cu",
+			      dev->node_props.simd_per_cu);
+	sysfs_show_32bit_prop(buffer, offs, "max_slots_scratch_cu",
+			      dev->node_props.max_slots_scratch_cu);
+	sysfs_show_32bit_prop(buffer, offs, "vendor_id",
+			      dev->node_props.vendor_id);
+	sysfs_show_32bit_prop(buffer, offs, "device_id",
+			      dev->node_props.device_id);
+	sysfs_show_32bit_prop(buffer, offs, "location_id",
+			      dev->node_props.location_id);
+	sysfs_show_32bit_prop(buffer, offs, "domain",
+			      dev->node_props.domain);
+	sysfs_show_32bit_prop(buffer, offs, "drm_render_minor",
+			      dev->node_props.drm_render_minor);
+	sysfs_show_64bit_prop(buffer, offs, "hive_id",
+			      dev->node_props.hive_id);
+	sysfs_show_32bit_prop(buffer, offs, "num_sdma_engines",
+			      dev->node_props.num_sdma_engines);
+	sysfs_show_32bit_prop(buffer, offs, "num_sdma_xgmi_engines",
+			      dev->node_props.num_sdma_xgmi_engines);
+	sysfs_show_32bit_prop(buffer, offs, "num_sdma_queues_per_engine",
+			      dev->node_props.num_sdma_queues_per_engine);
+	sysfs_show_32bit_prop(buffer, offs, "num_cp_queues",
+			      dev->node_props.num_cp_queues);
+	sysfs_show_64bit_prop(buffer, offs, "unique_id",
+			      dev->node_props.unique_id);
 
 	if (dev->gpu) {
 		log_max_watch_addr =
@@ -473,22 +518,21 @@
 			dev->node_props.capability |=
 					HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
 
-		sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
+		sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute",
 			dev->node_props.max_engine_clk_fcompute);
 
-		sysfs_show_64bit_prop(buffer, "local_mem_size",
-				(unsigned long long int) 0);
+		sysfs_show_64bit_prop(buffer, offs, "local_mem_size", 0ULL);
 
-		sysfs_show_32bit_prop(buffer, "fw_version",
-			dev->gpu->kfd2kgd->get_fw_version(
-						dev->gpu->kgd,
-						KGD_ENGINE_MEC1));
-		sysfs_show_32bit_prop(buffer, "capability",
-				dev->node_props.capability);
+		sysfs_show_32bit_prop(buffer, offs, "fw_version",
+				      dev->gpu->mec_fw_version);
+		sysfs_show_32bit_prop(buffer, offs, "capability",
+				      dev->node_props.capability);
+		sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
+				      dev->gpu->sdma_fw_version);
 	}
 
-	return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
-					cpufreq_quick_get_max(0)/1000);
+	return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute",
+				     cpufreq_quick_get_max(0)/1000);
 }
 
 static const struct sysfs_ops node_ops = {
@@ -757,7 +801,6 @@
 {
 	int ret;
 
-	pr_info("Creating topology SYSFS entries\n");
 	if (!sys_props.kobj_topology) {
 		sys_props.kobj_topology =
 				kfd_alloc_struct(sys_props.kobj_topology);
@@ -1020,7 +1063,6 @@
 		sys_props.generation_count++;
 		kfd_update_system_properties();
 		kfd_debug_print_topology();
-		pr_info("Finished initializing topology\n");
 	} else
 		pr_err("Failed to update topology in sysfs ret=%d\n", ret);
 
@@ -1060,14 +1102,15 @@
 	if (!gpu)
 		return 0;
 
-	gpu->kfd2kgd->get_local_mem_info(gpu->kgd, &local_mem_info);
+	amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info);
 
 	local_mem_size = local_mem_info.local_mem_size_private +
 			local_mem_info.local_mem_size_public;
 
 	buf[0] = gpu->pdev->devfn;
-	buf[1] = gpu->pdev->subsystem_vendor;
-	buf[2] = gpu->pdev->subsystem_device;
+	buf[1] = gpu->pdev->subsystem_vendor |
+		(gpu->pdev->subsystem_device << 16);
+	buf[2] = pci_domain_nr(gpu->pdev->bus);
 	buf[3] = gpu->pdev->device;
 	buf[4] = gpu->pdev->bus->number;
 	buf[5] = lower_32_bits(local_mem_size);
@@ -1087,19 +1130,29 @@
 {
 	struct kfd_topology_device *dev;
 	struct kfd_topology_device *out_dev = NULL;
+	struct kfd_mem_properties *mem;
+	struct kfd_cache_properties *cache;
+	struct kfd_iolink_properties *iolink;
 
 	down_write(&topology_lock);
 	list_for_each_entry(dev, &topology_device_list, list) {
 		/* Discrete GPUs need their own topology device list
 		 * entries. Don't assign them to CPU/APU nodes.
 		 */
-		if (!gpu->device_info->needs_iommu_device &&
+		if (!gpu->use_iommu_v2 &&
 		    dev->node_props.cpu_cores_count)
 			continue;
 
 		if (!dev->gpu && (dev->node_props.simd_count > 0)) {
 			dev->gpu = gpu;
 			out_dev = dev;
+
+			list_for_each_entry(mem, &dev->mem_props, list)
+				mem->gpu = dev->gpu;
+			list_for_each_entry(cache, &dev->cache_props, list)
+				cache->gpu = dev->gpu;
+			list_for_each_entry(iolink, &dev->io_link_props, list)
+				iolink->gpu = dev->gpu;
 			break;
 		}
 	}
@@ -1132,8 +1185,7 @@
 	 * for APUs - If CRAT from ACPI reports more than one bank, then
 	 *	all the banks will report the same mem_clk_max information
 	 */
-	dev->gpu->kfd2kgd->get_local_mem_info(dev->gpu->kgd,
-		&local_mem_info);
+	amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info);
 
 	list_for_each_entry(mem, &dev->mem_props, list)
 		mem->mem_clk_max = local_mem_info.mem_clk_max;
@@ -1141,17 +1193,40 @@
 
 static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
 {
-	struct kfd_iolink_properties *link;
+	struct kfd_iolink_properties *link, *cpu_link;
+	struct kfd_topology_device *cpu_dev;
+	uint32_t cap;
+	uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED;
+	uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED;
 
 	if (!dev || !dev->gpu)
 		return;
 
-	/* GPU only creates direck links so apply flags setting to all */
-	if (dev->gpu->device_info->asic_family == CHIP_HAWAII)
-		list_for_each_entry(link, &dev->io_link_props, list)
-			link->flags = CRAT_IOLINK_FLAGS_ENABLED |
-				CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
-				CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+	pcie_capability_read_dword(dev->gpu->pdev,
+			PCI_EXP_DEVCAP2, &cap);
+
+	if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+		     PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
+		cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+			CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+
+	if (!dev->gpu->pci_atomic_requested ||
+	    dev->gpu->device_info->asic_family == CHIP_HAWAII)
+		flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+			CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+
+	/* GPU only creates direct links so apply flags setting to all */
+	list_for_each_entry(link, &dev->io_link_props, list) {
+		link->flags = flag;
+		cpu_dev = kfd_topology_device_by_proximity_domain(
+				link->node_to);
+		if (cpu_dev) {
+			list_for_each_entry(cpu_link,
+					    &cpu_dev->io_link_props, list)
+				if (cpu_link->node_to == link->node_from)
+					cpu_link->flags = cpu_flag;
+		}
+	}
 }
 
 int kfd_topology_add_device(struct kfd_dev *gpu)
@@ -1164,6 +1239,7 @@
 	void *crat_image = NULL;
 	size_t image_size = 0;
 	int proximity_domain;
+	struct amdgpu_device *adev;
 
 	INIT_LIST_HEAD(&temp_topology_device_list);
 
@@ -1231,20 +1307,40 @@
 	 * needed for the topology
 	 */
 
-	dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info);
+	amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info);
+
+	strncpy(dev->node_props.name, gpu->device_info->asic_name,
+			KFD_TOPOLOGY_PUBLIC_NAME_SIZE);
+
 	dev->node_props.simd_arrays_per_engine =
 		cu_info.num_shader_arrays_per_engine;
 
 	dev->node_props.vendor_id = gpu->pdev->vendor;
 	dev->node_props.device_id = gpu->pdev->device;
-	dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number,
-		gpu->pdev->devfn);
+	dev->node_props.capability |=
+		((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) <<
+			HSA_CAP_ASIC_REVISION_SHIFT) &
+			HSA_CAP_ASIC_REVISION_MASK);
+	dev->node_props.location_id = pci_dev_id(gpu->pdev);
+	dev->node_props.domain = pci_domain_nr(gpu->pdev->bus);
 	dev->node_props.max_engine_clk_fcompute =
-		dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd);
+		amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd);
 	dev->node_props.max_engine_clk_ccompute =
 		cpufreq_quick_get_max(0) / 1000;
 	dev->node_props.drm_render_minor =
 		gpu->shared_resources.drm_render_minor;
+
+	dev->node_props.hive_id = gpu->hive_id;
+	dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
+	dev->node_props.num_sdma_xgmi_engines =
+				gpu->device_info->num_xgmi_sdma_engines;
+	dev->node_props.num_sdma_queues_per_engine =
+				gpu->device_info->num_sdma_queues_per_engine;
+	dev->node_props.num_gws = (dev->gpu->gws &&
+		dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
+		amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
+	dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);
+	dev->node_props.unique_id = gpu->unique_id;
 
 	kfd_fill_mem_clk_max_info(dev);
 	kfd_fill_iolink_non_crat_info(dev);
@@ -1261,13 +1357,24 @@
 	case CHIP_FIJI:
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		pr_debug("Adding doorbell packet type capability\n");
 		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
 		break;
 	case CHIP_VEGA10:
+	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
+	case CHIP_RENOIR:
+	case CHIP_ARCTURUS:
+	case CHIP_NAVI10:
+	case CHIP_NAVI12:
+	case CHIP_NAVI14:
+	case CHIP_SIENNA_CICHLID:
+	case CHIP_NAVY_FLOUNDER:
 		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
@@ -1277,18 +1384,37 @@
 		     dev->gpu->device_info->asic_family);
 	}
 
+	/*
+	* Overwrite ATS capability according to needs_iommu_device to fix
+	* potential missing corresponding bit in CRAT of BIOS.
+	*/
+	if (dev->gpu->use_iommu_v2)
+		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
+	else
+		dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
+
 	/* Fix errors in CZ CRAT.
 	 * simd_count: Carrizo CRAT reports wrong simd_count, probably
 	 *		because it doesn't consider masked out CUs
 	 * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
-	 * capability flag: Carrizo CRAT doesn't report IOMMU flags
 	 */
 	if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
 		dev->node_props.simd_count =
 			cu_info.simd_per_cu * cu_info.cu_active_number;
 		dev->node_props.max_waves_per_simd = 10;
-		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
 	}
+
+	adev = (struct amdgpu_device *)(dev->gpu->kgd);
+	/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
+	dev->node_props.capability |=
+		((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
+		HSA_CAP_SRAM_EDCSUPPORTED : 0;
+	dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
+		HSA_CAP_MEM_EDCSUPPORTED : 0;
+
+	if (adev->asic_type != CHIP_VEGA10)
+		dev->node_props.capability |= (adev->ras_features != 0) ?
+			HSA_CAP_RASEVENTNOTIFY : 0;
 
 	kfd_debug_print_topology();
 
@@ -1360,7 +1486,6 @@
 
 static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
 {
-	const struct cpuinfo_x86 *cpuinfo;
 	int first_cpu_of_numa_node;
 
 	if (!cpumask || cpumask == cpu_none_mask)
@@ -1368,9 +1493,11 @@
 	first_cpu_of_numa_node = cpumask_first(cpumask);
 	if (first_cpu_of_numa_node >= nr_cpu_ids)
 		return -1;
-	cpuinfo = &cpu_data(first_cpu_of_numa_node);
-
-	return cpuinfo->apicid;
+#ifdef CONFIG_X86_64
+	return cpu_data(first_cpu_of_numa_node).apicid;
+#else
+	return first_cpu_of_numa_node;
+#endif
 }
 
 /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
@@ -1386,6 +1513,29 @@
 	return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
 }
 
+void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
+{
+	struct kfd_topology_device *dev;
+
+	gpu->use_iommu_v2 = false;
+
+	if (!gpu->device_info->needs_iommu_device)
+		return;
+
+	down_read(&topology_lock);
+
+	/* Only use IOMMUv2 if there is an APU topology node with no GPU
+	 * assigned yet. This GPU will be assigned to it.
+	 */
+	list_for_each_entry(dev, &topology_device_list, list)
+		if (dev->node_props.cpu_cores_count &&
+		    dev->node_props.simd_count &&
+		    !dev->gpu)
+			gpu->use_iommu_v2 = true;
+
+	up_read(&topology_lock);
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)

--
Gitblit v1.6.2