hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
....@@ -36,6 +36,8 @@
3636 #include "kfd_topology.h"
3737 #include "kfd_device_queue_manager.h"
3838 #include "kfd_iommu.h"
39
+#include "amdgpu_amdkfd.h"
40
+#include "amdgpu_ras.h"
3941
4042 /* topology_device_list - Master list of all topology devices */
4143 static struct list_head topology_device_list;
....@@ -100,7 +102,25 @@
100102 down_read(&topology_lock);
101103
102104 list_for_each_entry(top_dev, &topology_device_list, list)
103
- if (top_dev->gpu->pdev == pdev) {
105
+ if (top_dev->gpu && top_dev->gpu->pdev == pdev) {
106
+ device = top_dev->gpu;
107
+ break;
108
+ }
109
+
110
+ up_read(&topology_lock);
111
+
112
+ return device;
113
+}
114
+
115
+struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
116
+{
117
+ struct kfd_topology_device *top_dev;
118
+ struct kfd_dev *device = NULL;
119
+
120
+ down_read(&topology_lock);
121
+
122
+ list_for_each_entry(top_dev, &topology_device_list, list)
123
+ if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
104124 device = top_dev->gpu;
105125 break;
106126 }
....@@ -190,39 +210,41 @@
190210 }
191211
192212
193
-#define sysfs_show_gen_prop(buffer, fmt, ...) \
194
- snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__)
195
-#define sysfs_show_32bit_prop(buffer, name, value) \
196
- sysfs_show_gen_prop(buffer, "%s %u\n", name, value)
197
-#define sysfs_show_64bit_prop(buffer, name, value) \
198
- sysfs_show_gen_prop(buffer, "%s %llu\n", name, value)
199
-#define sysfs_show_32bit_val(buffer, value) \
200
- sysfs_show_gen_prop(buffer, "%u\n", value)
201
-#define sysfs_show_str_val(buffer, value) \
202
- sysfs_show_gen_prop(buffer, "%s\n", value)
213
+#define sysfs_show_gen_prop(buffer, offs, fmt, ...) \
214
+ (offs += snprintf(buffer+offs, PAGE_SIZE-offs, \
215
+ fmt, __VA_ARGS__))
216
+#define sysfs_show_32bit_prop(buffer, offs, name, value) \
217
+ sysfs_show_gen_prop(buffer, offs, "%s %u\n", name, value)
218
+#define sysfs_show_64bit_prop(buffer, offs, name, value) \
219
+ sysfs_show_gen_prop(buffer, offs, "%s %llu\n", name, value)
220
+#define sysfs_show_32bit_val(buffer, offs, value) \
221
+ sysfs_show_gen_prop(buffer, offs, "%u\n", value)
222
+#define sysfs_show_str_val(buffer, offs, value) \
223
+ sysfs_show_gen_prop(buffer, offs, "%s\n", value)
203224
204225 static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
205226 char *buffer)
206227 {
207
- ssize_t ret;
228
+ int offs = 0;
208229
209230 /* Making sure that the buffer is an empty string */
210231 buffer[0] = 0;
211232
212233 if (attr == &sys_props.attr_genid) {
213
- ret = sysfs_show_32bit_val(buffer, sys_props.generation_count);
234
+ sysfs_show_32bit_val(buffer, offs,
235
+ sys_props.generation_count);
214236 } else if (attr == &sys_props.attr_props) {
215
- sysfs_show_64bit_prop(buffer, "platform_oem",
216
- sys_props.platform_oem);
217
- sysfs_show_64bit_prop(buffer, "platform_id",
218
- sys_props.platform_id);
219
- ret = sysfs_show_64bit_prop(buffer, "platform_rev",
220
- sys_props.platform_rev);
237
+ sysfs_show_64bit_prop(buffer, offs, "platform_oem",
238
+ sys_props.platform_oem);
239
+ sysfs_show_64bit_prop(buffer, offs, "platform_id",
240
+ sys_props.platform_id);
241
+ sysfs_show_64bit_prop(buffer, offs, "platform_rev",
242
+ sys_props.platform_rev);
221243 } else {
222
- ret = -EINVAL;
244
+ offs = -EINVAL;
223245 }
224246
225
- return ret;
247
+ return offs;
226248 }
227249
228250 static void kfd_topology_kobj_release(struct kobject *kobj)
....@@ -242,28 +264,32 @@
242264 static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
243265 char *buffer)
244266 {
245
- ssize_t ret;
267
+ int offs = 0;
246268 struct kfd_iolink_properties *iolink;
247269
248270 /* Making sure that the buffer is an empty string */
249271 buffer[0] = 0;
250272
251273 iolink = container_of(attr, struct kfd_iolink_properties, attr);
252
- sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type);
253
- sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj);
254
- sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min);
255
- sysfs_show_32bit_prop(buffer, "node_from", iolink->node_from);
256
- sysfs_show_32bit_prop(buffer, "node_to", iolink->node_to);
257
- sysfs_show_32bit_prop(buffer, "weight", iolink->weight);
258
- sysfs_show_32bit_prop(buffer, "min_latency", iolink->min_latency);
259
- sysfs_show_32bit_prop(buffer, "max_latency", iolink->max_latency);
260
- sysfs_show_32bit_prop(buffer, "min_bandwidth", iolink->min_bandwidth);
261
- sysfs_show_32bit_prop(buffer, "max_bandwidth", iolink->max_bandwidth);
262
- sysfs_show_32bit_prop(buffer, "recommended_transfer_size",
263
- iolink->rec_transfer_size);
264
- ret = sysfs_show_32bit_prop(buffer, "flags", iolink->flags);
274
+ if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu))
275
+ return -EPERM;
276
+ sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type);
277
+ sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj);
278
+ sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min);
279
+ sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from);
280
+ sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to);
281
+ sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight);
282
+ sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency);
283
+ sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency);
284
+ sysfs_show_32bit_prop(buffer, offs, "min_bandwidth",
285
+ iolink->min_bandwidth);
286
+ sysfs_show_32bit_prop(buffer, offs, "max_bandwidth",
287
+ iolink->max_bandwidth);
288
+ sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size",
289
+ iolink->rec_transfer_size);
290
+ sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags);
265291
266
- return ret;
292
+ return offs;
267293 }
268294
269295 static const struct sysfs_ops iolink_ops = {
....@@ -278,20 +304,24 @@
278304 static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
279305 char *buffer)
280306 {
281
- ssize_t ret;
307
+ int offs = 0;
282308 struct kfd_mem_properties *mem;
283309
284310 /* Making sure that the buffer is an empty string */
285311 buffer[0] = 0;
286312
287313 mem = container_of(attr, struct kfd_mem_properties, attr);
288
- sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type);
289
- sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes);
290
- sysfs_show_32bit_prop(buffer, "flags", mem->flags);
291
- sysfs_show_32bit_prop(buffer, "width", mem->width);
292
- ret = sysfs_show_32bit_prop(buffer, "mem_clk_max", mem->mem_clk_max);
314
+ if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu))
315
+ return -EPERM;
316
+ sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type);
317
+ sysfs_show_64bit_prop(buffer, offs, "size_in_bytes",
318
+ mem->size_in_bytes);
319
+ sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags);
320
+ sysfs_show_32bit_prop(buffer, offs, "width", mem->width);
321
+ sysfs_show_32bit_prop(buffer, offs, "mem_clk_max",
322
+ mem->mem_clk_max);
293323
294
- return ret;
324
+ return offs;
295325 }
296326
297327 static const struct sysfs_ops mem_ops = {
....@@ -306,7 +336,7 @@
306336 static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
307337 char *buffer)
308338 {
309
- ssize_t ret;
339
+ int offs = 0;
310340 uint32_t i, j;
311341 struct kfd_cache_properties *cache;
312342
....@@ -314,30 +344,29 @@
314344 buffer[0] = 0;
315345
316346 cache = container_of(attr, struct kfd_cache_properties, attr);
317
- sysfs_show_32bit_prop(buffer, "processor_id_low",
347
+ if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
348
+ return -EPERM;
349
+ sysfs_show_32bit_prop(buffer, offs, "processor_id_low",
318350 cache->processor_id_low);
319
- sysfs_show_32bit_prop(buffer, "level", cache->cache_level);
320
- sysfs_show_32bit_prop(buffer, "size", cache->cache_size);
321
- sysfs_show_32bit_prop(buffer, "cache_line_size", cache->cacheline_size);
322
- sysfs_show_32bit_prop(buffer, "cache_lines_per_tag",
323
- cache->cachelines_per_tag);
324
- sysfs_show_32bit_prop(buffer, "association", cache->cache_assoc);
325
- sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency);
326
- sysfs_show_32bit_prop(buffer, "type", cache->cache_type);
327
- snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer);
351
+ sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level);
352
+ sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size);
353
+ sysfs_show_32bit_prop(buffer, offs, "cache_line_size",
354
+ cache->cacheline_size);
355
+ sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag",
356
+ cache->cachelines_per_tag);
357
+ sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
358
+ sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
359
+ sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
360
+ offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
328361 for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
329
- for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) {
362
+ for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
330363 /* Check each bit */
331
- if (cache->sibling_map[i] & (1 << j))
332
- ret = snprintf(buffer, PAGE_SIZE,
333
- "%s%d%s", buffer, 1, ",");
334
- else
335
- ret = snprintf(buffer, PAGE_SIZE,
336
- "%s%d%s", buffer, 0, ",");
337
- }
364
+ offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
365
+ (cache->sibling_map[i] >> j) & 1);
366
+
338367 /* Replace the last "," with end of line */
339
- *(buffer + strlen(buffer) - 1) = 0xA;
340
- return ret;
368
+ buffer[offs-1] = '\n';
369
+ return offs;
341370 }
342371
343372 static const struct sysfs_ops cache_ops = {
....@@ -359,6 +388,7 @@
359388 static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
360389 char *buf)
361390 {
391
+ int offs = 0;
362392 struct kfd_perf_attr *attr;
363393
364394 buf[0] = 0;
....@@ -366,7 +396,7 @@
366396 if (!attr->data) /* invalid data for PMC */
367397 return 0;
368398 else
369
- return sysfs_show_32bit_val(buf, attr->data);
399
+ return sysfs_show_32bit_val(buf, offs, attr->data);
370400 }
371401
372402 #define KFD_PERF_DESC(_name, _data) \
....@@ -385,9 +415,8 @@
385415 static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
386416 char *buffer)
387417 {
418
+ int offs = 0;
388419 struct kfd_topology_device *dev;
389
- char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
390
- uint32_t i;
391420 uint32_t log_max_watch_addr;
392421
393422 /* Making sure that the buffer is an empty string */
....@@ -396,64 +425,80 @@
396425 if (strcmp(attr->name, "gpu_id") == 0) {
397426 dev = container_of(attr, struct kfd_topology_device,
398427 attr_gpuid);
399
- return sysfs_show_32bit_val(buffer, dev->gpu_id);
428
+ if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
429
+ return -EPERM;
430
+ return sysfs_show_32bit_val(buffer, offs, dev->gpu_id);
400431 }
401432
402433 if (strcmp(attr->name, "name") == 0) {
403434 dev = container_of(attr, struct kfd_topology_device,
404435 attr_name);
405
- for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) {
406
- public_name[i] =
407
- (char)dev->node_props.marketing_name[i];
408
- if (dev->node_props.marketing_name[i] == 0)
409
- break;
410
- }
411
- public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0;
412
- return sysfs_show_str_val(buffer, public_name);
436
+
437
+ if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
438
+ return -EPERM;
439
+ return sysfs_show_str_val(buffer, offs, dev->node_props.name);
413440 }
414441
415442 dev = container_of(attr, struct kfd_topology_device,
416443 attr_props);
417
- sysfs_show_32bit_prop(buffer, "cpu_cores_count",
418
- dev->node_props.cpu_cores_count);
419
- sysfs_show_32bit_prop(buffer, "simd_count",
420
- dev->node_props.simd_count);
421
- sysfs_show_32bit_prop(buffer, "mem_banks_count",
422
- dev->node_props.mem_banks_count);
423
- sysfs_show_32bit_prop(buffer, "caches_count",
424
- dev->node_props.caches_count);
425
- sysfs_show_32bit_prop(buffer, "io_links_count",
426
- dev->node_props.io_links_count);
427
- sysfs_show_32bit_prop(buffer, "cpu_core_id_base",
428
- dev->node_props.cpu_core_id_base);
429
- sysfs_show_32bit_prop(buffer, "simd_id_base",
430
- dev->node_props.simd_id_base);
431
- sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
432
- dev->node_props.max_waves_per_simd);
433
- sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
434
- dev->node_props.lds_size_in_kb);
435
- sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
436
- dev->node_props.gds_size_in_kb);
437
- sysfs_show_32bit_prop(buffer, "wave_front_size",
438
- dev->node_props.wave_front_size);
439
- sysfs_show_32bit_prop(buffer, "array_count",
440
- dev->node_props.array_count);
441
- sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine",
442
- dev->node_props.simd_arrays_per_engine);
443
- sysfs_show_32bit_prop(buffer, "cu_per_simd_array",
444
- dev->node_props.cu_per_simd_array);
445
- sysfs_show_32bit_prop(buffer, "simd_per_cu",
446
- dev->node_props.simd_per_cu);
447
- sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
448
- dev->node_props.max_slots_scratch_cu);
449
- sysfs_show_32bit_prop(buffer, "vendor_id",
450
- dev->node_props.vendor_id);
451
- sysfs_show_32bit_prop(buffer, "device_id",
452
- dev->node_props.device_id);
453
- sysfs_show_32bit_prop(buffer, "location_id",
454
- dev->node_props.location_id);
455
- sysfs_show_32bit_prop(buffer, "drm_render_minor",
456
- dev->node_props.drm_render_minor);
444
+ if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
445
+ return -EPERM;
446
+ sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
447
+ dev->node_props.cpu_cores_count);
448
+ sysfs_show_32bit_prop(buffer, offs, "simd_count",
449
+ dev->gpu ? dev->node_props.simd_count : 0);
450
+ sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
451
+ dev->node_props.mem_banks_count);
452
+ sysfs_show_32bit_prop(buffer, offs, "caches_count",
453
+ dev->node_props.caches_count);
454
+ sysfs_show_32bit_prop(buffer, offs, "io_links_count",
455
+ dev->node_props.io_links_count);
456
+ sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base",
457
+ dev->node_props.cpu_core_id_base);
458
+ sysfs_show_32bit_prop(buffer, offs, "simd_id_base",
459
+ dev->node_props.simd_id_base);
460
+ sysfs_show_32bit_prop(buffer, offs, "max_waves_per_simd",
461
+ dev->node_props.max_waves_per_simd);
462
+ sysfs_show_32bit_prop(buffer, offs, "lds_size_in_kb",
463
+ dev->node_props.lds_size_in_kb);
464
+ sysfs_show_32bit_prop(buffer, offs, "gds_size_in_kb",
465
+ dev->node_props.gds_size_in_kb);
466
+ sysfs_show_32bit_prop(buffer, offs, "num_gws",
467
+ dev->node_props.num_gws);
468
+ sysfs_show_32bit_prop(buffer, offs, "wave_front_size",
469
+ dev->node_props.wave_front_size);
470
+ sysfs_show_32bit_prop(buffer, offs, "array_count",
471
+ dev->node_props.array_count);
472
+ sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine",
473
+ dev->node_props.simd_arrays_per_engine);
474
+ sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array",
475
+ dev->node_props.cu_per_simd_array);
476
+ sysfs_show_32bit_prop(buffer, offs, "simd_per_cu",
477
+ dev->node_props.simd_per_cu);
478
+ sysfs_show_32bit_prop(buffer, offs, "max_slots_scratch_cu",
479
+ dev->node_props.max_slots_scratch_cu);
480
+ sysfs_show_32bit_prop(buffer, offs, "vendor_id",
481
+ dev->node_props.vendor_id);
482
+ sysfs_show_32bit_prop(buffer, offs, "device_id",
483
+ dev->node_props.device_id);
484
+ sysfs_show_32bit_prop(buffer, offs, "location_id",
485
+ dev->node_props.location_id);
486
+ sysfs_show_32bit_prop(buffer, offs, "domain",
487
+ dev->node_props.domain);
488
+ sysfs_show_32bit_prop(buffer, offs, "drm_render_minor",
489
+ dev->node_props.drm_render_minor);
490
+ sysfs_show_64bit_prop(buffer, offs, "hive_id",
491
+ dev->node_props.hive_id);
492
+ sysfs_show_32bit_prop(buffer, offs, "num_sdma_engines",
493
+ dev->node_props.num_sdma_engines);
494
+ sysfs_show_32bit_prop(buffer, offs, "num_sdma_xgmi_engines",
495
+ dev->node_props.num_sdma_xgmi_engines);
496
+ sysfs_show_32bit_prop(buffer, offs, "num_sdma_queues_per_engine",
497
+ dev->node_props.num_sdma_queues_per_engine);
498
+ sysfs_show_32bit_prop(buffer, offs, "num_cp_queues",
499
+ dev->node_props.num_cp_queues);
500
+ sysfs_show_64bit_prop(buffer, offs, "unique_id",
501
+ dev->node_props.unique_id);
457502
458503 if (dev->gpu) {
459504 log_max_watch_addr =
....@@ -473,22 +518,21 @@
473518 dev->node_props.capability |=
474519 HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
475520
476
- sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
521
+ sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute",
477522 dev->node_props.max_engine_clk_fcompute);
478523
479
- sysfs_show_64bit_prop(buffer, "local_mem_size",
480
- (unsigned long long int) 0);
524
+ sysfs_show_64bit_prop(buffer, offs, "local_mem_size", 0ULL);
481525
482
- sysfs_show_32bit_prop(buffer, "fw_version",
483
- dev->gpu->kfd2kgd->get_fw_version(
484
- dev->gpu->kgd,
485
- KGD_ENGINE_MEC1));
486
- sysfs_show_32bit_prop(buffer, "capability",
487
- dev->node_props.capability);
526
+ sysfs_show_32bit_prop(buffer, offs, "fw_version",
527
+ dev->gpu->mec_fw_version);
528
+ sysfs_show_32bit_prop(buffer, offs, "capability",
529
+ dev->node_props.capability);
530
+ sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
531
+ dev->gpu->sdma_fw_version);
488532 }
489533
490
- return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
491
- cpufreq_quick_get_max(0)/1000);
534
+ return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute",
535
+ cpufreq_quick_get_max(0)/1000);
492536 }
493537
494538 static const struct sysfs_ops node_ops = {
....@@ -757,7 +801,6 @@
757801 {
758802 int ret;
759803
760
- pr_info("Creating topology SYSFS entries\n");
761804 if (!sys_props.kobj_topology) {
762805 sys_props.kobj_topology =
763806 kfd_alloc_struct(sys_props.kobj_topology);
....@@ -1020,7 +1063,6 @@
10201063 sys_props.generation_count++;
10211064 kfd_update_system_properties();
10221065 kfd_debug_print_topology();
1023
- pr_info("Finished initializing topology\n");
10241066 } else
10251067 pr_err("Failed to update topology in sysfs ret=%d\n", ret);
10261068
....@@ -1060,14 +1102,15 @@
10601102 if (!gpu)
10611103 return 0;
10621104
1063
- gpu->kfd2kgd->get_local_mem_info(gpu->kgd, &local_mem_info);
1105
+ amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info);
10641106
10651107 local_mem_size = local_mem_info.local_mem_size_private +
10661108 local_mem_info.local_mem_size_public;
10671109
10681110 buf[0] = gpu->pdev->devfn;
1069
- buf[1] = gpu->pdev->subsystem_vendor;
1070
- buf[2] = gpu->pdev->subsystem_device;
1111
+ buf[1] = gpu->pdev->subsystem_vendor |
1112
+ (gpu->pdev->subsystem_device << 16);
1113
+ buf[2] = pci_domain_nr(gpu->pdev->bus);
10711114 buf[3] = gpu->pdev->device;
10721115 buf[4] = gpu->pdev->bus->number;
10731116 buf[5] = lower_32_bits(local_mem_size);
....@@ -1087,19 +1130,29 @@
10871130 {
10881131 struct kfd_topology_device *dev;
10891132 struct kfd_topology_device *out_dev = NULL;
1133
+ struct kfd_mem_properties *mem;
1134
+ struct kfd_cache_properties *cache;
1135
+ struct kfd_iolink_properties *iolink;
10901136
10911137 down_write(&topology_lock);
10921138 list_for_each_entry(dev, &topology_device_list, list) {
10931139 /* Discrete GPUs need their own topology device list
10941140 * entries. Don't assign them to CPU/APU nodes.
10951141 */
1096
- if (!gpu->device_info->needs_iommu_device &&
1142
+ if (!gpu->use_iommu_v2 &&
10971143 dev->node_props.cpu_cores_count)
10981144 continue;
10991145
11001146 if (!dev->gpu && (dev->node_props.simd_count > 0)) {
11011147 dev->gpu = gpu;
11021148 out_dev = dev;
1149
+
1150
+ list_for_each_entry(mem, &dev->mem_props, list)
1151
+ mem->gpu = dev->gpu;
1152
+ list_for_each_entry(cache, &dev->cache_props, list)
1153
+ cache->gpu = dev->gpu;
1154
+ list_for_each_entry(iolink, &dev->io_link_props, list)
1155
+ iolink->gpu = dev->gpu;
11031156 break;
11041157 }
11051158 }
....@@ -1132,8 +1185,7 @@
11321185 * for APUs - If CRAT from ACPI reports more than one bank, then
11331186 * all the banks will report the same mem_clk_max information
11341187 */
1135
- dev->gpu->kfd2kgd->get_local_mem_info(dev->gpu->kgd,
1136
- &local_mem_info);
1188
+ amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info);
11371189
11381190 list_for_each_entry(mem, &dev->mem_props, list)
11391191 mem->mem_clk_max = local_mem_info.mem_clk_max;
....@@ -1141,17 +1193,40 @@
11411193
11421194 static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
11431195 {
1144
- struct kfd_iolink_properties *link;
1196
+ struct kfd_iolink_properties *link, *cpu_link;
1197
+ struct kfd_topology_device *cpu_dev;
1198
+ uint32_t cap;
1199
+ uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED;
1200
+ uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED;
11451201
11461202 if (!dev || !dev->gpu)
11471203 return;
11481204
1149
- /* GPU only creates direck links so apply flags setting to all */
1150
- if (dev->gpu->device_info->asic_family == CHIP_HAWAII)
1151
- list_for_each_entry(link, &dev->io_link_props, list)
1152
- link->flags = CRAT_IOLINK_FLAGS_ENABLED |
1153
- CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
1154
- CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
1205
+ pcie_capability_read_dword(dev->gpu->pdev,
1206
+ PCI_EXP_DEVCAP2, &cap);
1207
+
1208
+ if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
1209
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
1210
+ cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
1211
+ CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
1212
+
1213
+ if (!dev->gpu->pci_atomic_requested ||
1214
+ dev->gpu->device_info->asic_family == CHIP_HAWAII)
1215
+ flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
1216
+ CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
1217
+
1218
+ /* GPU only creates direct links so apply flags setting to all */
1219
+ list_for_each_entry(link, &dev->io_link_props, list) {
1220
+ link->flags = flag;
1221
+ cpu_dev = kfd_topology_device_by_proximity_domain(
1222
+ link->node_to);
1223
+ if (cpu_dev) {
1224
+ list_for_each_entry(cpu_link,
1225
+ &cpu_dev->io_link_props, list)
1226
+ if (cpu_link->node_to == link->node_from)
1227
+ cpu_link->flags = cpu_flag;
1228
+ }
1229
+ }
11551230 }
11561231
11571232 int kfd_topology_add_device(struct kfd_dev *gpu)
....@@ -1164,6 +1239,7 @@
11641239 void *crat_image = NULL;
11651240 size_t image_size = 0;
11661241 int proximity_domain;
1242
+ struct amdgpu_device *adev;
11671243
11681244 INIT_LIST_HEAD(&temp_topology_device_list);
11691245
....@@ -1231,20 +1307,40 @@
12311307 * needed for the topology
12321308 */
12331309
1234
- dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info);
1310
+ amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info);
1311
+
1312
+ strncpy(dev->node_props.name, gpu->device_info->asic_name,
1313
+ KFD_TOPOLOGY_PUBLIC_NAME_SIZE);
1314
+
12351315 dev->node_props.simd_arrays_per_engine =
12361316 cu_info.num_shader_arrays_per_engine;
12371317
12381318 dev->node_props.vendor_id = gpu->pdev->vendor;
12391319 dev->node_props.device_id = gpu->pdev->device;
1240
- dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number,
1241
- gpu->pdev->devfn);
1320
+ dev->node_props.capability |=
1321
+ ((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) <<
1322
+ HSA_CAP_ASIC_REVISION_SHIFT) &
1323
+ HSA_CAP_ASIC_REVISION_MASK);
1324
+ dev->node_props.location_id = pci_dev_id(gpu->pdev);
1325
+ dev->node_props.domain = pci_domain_nr(gpu->pdev->bus);
12421326 dev->node_props.max_engine_clk_fcompute =
1243
- dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd);
1327
+ amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd);
12441328 dev->node_props.max_engine_clk_ccompute =
12451329 cpufreq_quick_get_max(0) / 1000;
12461330 dev->node_props.drm_render_minor =
12471331 gpu->shared_resources.drm_render_minor;
1332
+
1333
+ dev->node_props.hive_id = gpu->hive_id;
1334
+ dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
1335
+ dev->node_props.num_sdma_xgmi_engines =
1336
+ gpu->device_info->num_xgmi_sdma_engines;
1337
+ dev->node_props.num_sdma_queues_per_engine =
1338
+ gpu->device_info->num_sdma_queues_per_engine;
1339
+ dev->node_props.num_gws = (dev->gpu->gws &&
1340
+ dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
1341
+ amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
1342
+ dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);
1343
+ dev->node_props.unique_id = gpu->unique_id;
12481344
12491345 kfd_fill_mem_clk_max_info(dev);
12501346 kfd_fill_iolink_non_crat_info(dev);
....@@ -1261,13 +1357,24 @@
12611357 case CHIP_FIJI:
12621358 case CHIP_POLARIS10:
12631359 case CHIP_POLARIS11:
1360
+ case CHIP_POLARIS12:
1361
+ case CHIP_VEGAM:
12641362 pr_debug("Adding doorbell packet type capability\n");
12651363 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
12661364 HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
12671365 HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
12681366 break;
12691367 case CHIP_VEGA10:
1368
+ case CHIP_VEGA12:
1369
+ case CHIP_VEGA20:
12701370 case CHIP_RAVEN:
1371
+ case CHIP_RENOIR:
1372
+ case CHIP_ARCTURUS:
1373
+ case CHIP_NAVI10:
1374
+ case CHIP_NAVI12:
1375
+ case CHIP_NAVI14:
1376
+ case CHIP_SIENNA_CICHLID:
1377
+ case CHIP_NAVY_FLOUNDER:
12711378 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
12721379 HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
12731380 HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
....@@ -1277,18 +1384,37 @@
12771384 dev->gpu->device_info->asic_family);
12781385 }
12791386
1387
+ /*
1388
+ * Overwrite ATS capability according to needs_iommu_device to fix
1389
+ * potential missing corresponding bit in CRAT of BIOS.
1390
+ */
1391
+ if (dev->gpu->use_iommu_v2)
1392
+ dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
1393
+ else
1394
+ dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
1395
+
12801396 /* Fix errors in CZ CRAT.
12811397 * simd_count: Carrizo CRAT reports wrong simd_count, probably
12821398 * because it doesn't consider masked out CUs
12831399 * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
1284
- * capability flag: Carrizo CRAT doesn't report IOMMU flags
12851400 */
12861401 if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
12871402 dev->node_props.simd_count =
12881403 cu_info.simd_per_cu * cu_info.cu_active_number;
12891404 dev->node_props.max_waves_per_simd = 10;
1290
- dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
12911405 }
1406
+
1407
+ adev = (struct amdgpu_device *)(dev->gpu->kgd);
1408
+ /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
1409
+ dev->node_props.capability |=
1410
+ ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
1411
+ HSA_CAP_SRAM_EDCSUPPORTED : 0;
1412
+ dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
1413
+ HSA_CAP_MEM_EDCSUPPORTED : 0;
1414
+
1415
+ if (adev->asic_type != CHIP_VEGA10)
1416
+ dev->node_props.capability |= (adev->ras_features != 0) ?
1417
+ HSA_CAP_RASEVENTNOTIFY : 0;
12921418
12931419 kfd_debug_print_topology();
12941420
....@@ -1360,7 +1486,6 @@
13601486
13611487 static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
13621488 {
1363
- const struct cpuinfo_x86 *cpuinfo;
13641489 int first_cpu_of_numa_node;
13651490
13661491 if (!cpumask || cpumask == cpu_none_mask)
....@@ -1368,9 +1493,11 @@
13681493 first_cpu_of_numa_node = cpumask_first(cpumask);
13691494 if (first_cpu_of_numa_node >= nr_cpu_ids)
13701495 return -1;
1371
- cpuinfo = &cpu_data(first_cpu_of_numa_node);
1372
-
1373
- return cpuinfo->apicid;
1496
+#ifdef CONFIG_X86_64
1497
+ return cpu_data(first_cpu_of_numa_node).apicid;
1498
+#else
1499
+ return first_cpu_of_numa_node;
1500
+#endif
13741501 }
13751502
13761503 /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
....@@ -1386,6 +1513,29 @@
13861513 return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
13871514 }
13881515
1516
+void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
1517
+{
1518
+ struct kfd_topology_device *dev;
1519
+
1520
+ gpu->use_iommu_v2 = false;
1521
+
1522
+ if (!gpu->device_info->needs_iommu_device)
1523
+ return;
1524
+
1525
+ down_read(&topology_lock);
1526
+
1527
+ /* Only use IOMMUv2 if there is an APU topology node with no GPU
1528
+ * assigned yet. This GPU will be assigned to it.
1529
+ */
1530
+ list_for_each_entry(dev, &topology_device_list, list)
1531
+ if (dev->node_props.cpu_cores_count &&
1532
+ dev->node_props.simd_count &&
1533
+ !dev->gpu)
1534
+ gpu->use_iommu_v2 = true;
1535
+
1536
+ up_read(&topology_lock);
1537
+}
1538
+
13891539 #if defined(CONFIG_DEBUG_FS)
13901540
13911541 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)