hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
....@@ -22,93 +22,58 @@
2222
2323 #include "amdgpu_amdkfd.h"
2424 #include "amd_shared.h"
25
-#include <drm/drmP.h>
25
+
2626 #include "amdgpu.h"
2727 #include "amdgpu_gfx.h"
28
+#include "amdgpu_dma_buf.h"
2829 #include <linux/module.h>
30
+#include <linux/dma-buf.h>
31
+#include "amdgpu_xgmi.h"
32
+#include <uapi/linux/kfd_ioctl.h>
2933
30
-const struct kgd2kfd_calls *kgd2kfd;
31
-bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
34
+/* Total memory size in system memory and all GPU VRAM. Used to
35
+ * estimate worst case amount of memory to reserve for page tables
36
+ */
37
+uint64_t amdgpu_amdkfd_total_mem_size;
3238
33
-static const unsigned int compute_vmid_bitmap = 0xFF00;
39
+static bool kfd_initialized;
3440
3541 int amdgpu_amdkfd_init(void)
3642 {
43
+ struct sysinfo si;
3744 int ret;
3845
39
-#if defined(CONFIG_HSA_AMD_MODULE)
40
- int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
46
+ si_meminfo(&si);
47
+ amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh;
48
+ amdgpu_amdkfd_total_mem_size *= si.mem_unit;
4149
42
- kgd2kfd_init_p = symbol_request(kgd2kfd_init);
43
-
44
- if (kgd2kfd_init_p == NULL)
45
- return -ENOENT;
46
-
47
- ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
48
- if (ret) {
49
- symbol_put(kgd2kfd_init);
50
- kgd2kfd = NULL;
51
- }
52
-
53
-
54
-#elif defined(CONFIG_HSA_AMD)
55
-
56
- ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
57
- if (ret)
58
- kgd2kfd = NULL;
59
-
60
-#else
61
- kgd2kfd = NULL;
62
- ret = -ENOENT;
63
-#endif
64
-
65
-#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD)
50
+ ret = kgd2kfd_init();
6651 amdgpu_amdkfd_gpuvm_init_mem_limits();
67
-#endif
52
+ kfd_initialized = !ret;
6853
6954 return ret;
7055 }
7156
7257 void amdgpu_amdkfd_fini(void)
7358 {
74
- if (kgd2kfd) {
75
- kgd2kfd->exit();
76
- symbol_put(kgd2kfd_init);
59
+ if (kfd_initialized) {
60
+ kgd2kfd_exit();
61
+ kfd_initialized = false;
7762 }
7863 }
7964
8065 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
8166 {
82
- const struct kfd2kgd_calls *kfd2kgd;
67
+ bool vf = amdgpu_sriov_vf(adev);
8368
84
- if (!kgd2kfd)
69
+ if (!kfd_initialized)
8570 return;
8671
87
- switch (adev->asic_type) {
88
-#ifdef CONFIG_DRM_AMDGPU_CIK
89
- case CHIP_KAVERI:
90
- case CHIP_HAWAII:
91
- kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
92
- break;
93
-#endif
94
- case CHIP_CARRIZO:
95
- case CHIP_TONGA:
96
- case CHIP_FIJI:
97
- case CHIP_POLARIS10:
98
- case CHIP_POLARIS11:
99
- kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
100
- break;
101
- case CHIP_VEGA10:
102
- case CHIP_RAVEN:
103
- kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
104
- break;
105
- default:
106
- dev_info(adev->dev, "kfd not supported on this ASIC\n");
107
- return;
108
- }
72
+ adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev,
73
+ adev->pdev, adev->asic_type, vf);
10974
110
- adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
111
- adev->pdev, kfd2kgd);
75
+ if (adev->kfd.dev)
76
+ amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
11277 }
11378
11479 /**
....@@ -148,97 +113,99 @@
148113 {
149114 int i;
150115 int last_valid_bit;
151
- if (adev->kfd) {
116
+
117
+ if (adev->kfd.dev) {
152118 struct kgd2kfd_shared_resources gpu_resources = {
153
- .compute_vmid_bitmap = compute_vmid_bitmap,
119
+ .compute_vmid_bitmap =
120
+ ((1 << AMDGPU_NUM_VMID) - 1) -
121
+ ((1 << adev->vm_manager.first_kfd_vmid) - 1),
154122 .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
155123 .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
156124 .gpuvm_size = min(adev->vm_manager.max_pfn
157125 << AMDGPU_GPU_PAGE_SHIFT,
158
- AMDGPU_VA_HOLE_START),
159
- .drm_render_minor = adev->ddev->render->index
126
+ AMDGPU_GMC_HOLE_START),
127
+ .drm_render_minor = adev_to_drm(adev)->render->index,
128
+ .sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
129
+
160130 };
161131
162132 /* this is going to have a few of the MSBs set that we need to
163
- * clear */
164
- bitmap_complement(gpu_resources.queue_bitmap,
133
+ * clear
134
+ */
135
+ bitmap_complement(gpu_resources.cp_queue_bitmap,
165136 adev->gfx.mec.queue_bitmap,
166137 KGD_MAX_QUEUES);
167138
168
- /* remove the KIQ bit as well */
169
- if (adev->gfx.kiq.ring.ready)
170
- clear_bit(amdgpu_gfx_queue_to_bit(adev,
171
- adev->gfx.kiq.ring.me - 1,
172
- adev->gfx.kiq.ring.pipe,
173
- adev->gfx.kiq.ring.queue),
174
- gpu_resources.queue_bitmap);
175
-
176139 /* According to linux/bitmap.h we shouldn't use bitmap_clear if
177
- * nbits is not compile time constant */
140
+ * nbits is not compile time constant
141
+ */
178142 last_valid_bit = 1 /* only first MEC can have compute queues */
179143 * adev->gfx.mec.num_pipe_per_mec
180144 * adev->gfx.mec.num_queue_per_pipe;
181145 for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
182
- clear_bit(i, gpu_resources.queue_bitmap);
146
+ clear_bit(i, gpu_resources.cp_queue_bitmap);
183147
184148 amdgpu_doorbell_get_kfd_info(adev,
185149 &gpu_resources.doorbell_physical_address,
186150 &gpu_resources.doorbell_aperture_size,
187151 &gpu_resources.doorbell_start_offset);
152
+
153
+ /* Since SOC15, BIF starts to statically use the
154
+ * lower 12 bits of doorbell addresses for routing
155
+ * based on settings in registers like
156
+ * SDMA0_DOORBELL_RANGE etc..
157
+ * In order to route a doorbell to CP engine, the lower
158
+ * 12 bits of its address has to be outside the range
159
+ * set for SDMA, VCN, and IH blocks.
160
+ */
188161 if (adev->asic_type >= CHIP_VEGA10) {
189
- /* On SOC15 the BIF is involved in routing
190
- * doorbells using the low 12 bits of the
191
- * address. Communicate the assignments to
192
- * KFD. KFD uses two doorbell pages per
193
- * process in case of 64-bit doorbells so we
194
- * can use each doorbell assignment twice.
195
- */
196
- gpu_resources.sdma_doorbell[0][0] =
197
- AMDGPU_DOORBELL64_sDMA_ENGINE0;
198
- gpu_resources.sdma_doorbell[0][1] =
199
- AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
200
- gpu_resources.sdma_doorbell[1][0] =
201
- AMDGPU_DOORBELL64_sDMA_ENGINE1;
202
- gpu_resources.sdma_doorbell[1][1] =
203
- AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
204
- /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
205
- * SDMA, IH and VCN. So don't use them for the CP.
206
- */
207
- gpu_resources.reserved_doorbell_mask = 0x1f0;
208
- gpu_resources.reserved_doorbell_val = 0x0f0;
162
+ gpu_resources.non_cp_doorbells_start =
163
+ adev->doorbell_index.first_non_cp;
164
+ gpu_resources.non_cp_doorbells_end =
165
+ adev->doorbell_index.last_non_cp;
209166 }
210167
211
- kgd2kfd->device_init(adev->kfd, &gpu_resources);
168
+ kgd2kfd_device_init(adev->kfd.dev, adev_to_drm(adev), &gpu_resources);
212169 }
213170 }
214171
215172 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
216173 {
217
- if (adev->kfd) {
218
- kgd2kfd->device_exit(adev->kfd);
219
- adev->kfd = NULL;
174
+ if (adev->kfd.dev) {
175
+ kgd2kfd_device_exit(adev->kfd.dev);
176
+ adev->kfd.dev = NULL;
220177 }
221178 }
222179
223180 void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
224181 const void *ih_ring_entry)
225182 {
226
- if (adev->kfd)
227
- kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
183
+ if (adev->kfd.dev)
184
+ kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
228185 }
229186
230
-void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
187
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
231188 {
232
- if (adev->kfd)
233
- kgd2kfd->suspend(adev->kfd);
189
+ if (adev->kfd.dev)
190
+ kgd2kfd_suspend(adev->kfd.dev, run_pm);
234191 }
235192
236
-int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
193
+int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev)
237194 {
238195 int r = 0;
239196
240
- if (adev->kfd)
241
- r = kgd2kfd->resume(adev->kfd);
197
+ if (adev->kfd.dev)
198
+ r = kgd2kfd_resume_iommu(adev->kfd.dev);
199
+
200
+ return r;
201
+}
202
+
203
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
204
+{
205
+ int r = 0;
206
+
207
+ if (adev->kfd.dev)
208
+ r = kgd2kfd_resume(adev->kfd.dev, run_pm);
242209
243210 return r;
244211 }
....@@ -247,8 +214,8 @@
247214 {
248215 int r = 0;
249216
250
- if (adev->kfd)
251
- r = kgd2kfd->pre_reset(adev->kfd);
217
+ if (adev->kfd.dev)
218
+ r = kgd2kfd_pre_reset(adev->kfd.dev);
252219
253220 return r;
254221 }
....@@ -257,8 +224,8 @@
257224 {
258225 int r = 0;
259226
260
- if (adev->kfd)
261
- r = kgd2kfd->post_reset(adev->kfd);
227
+ if (adev->kfd.dev)
228
+ r = kgd2kfd_post_reset(adev->kfd.dev);
262229
263230 return r;
264231 }
....@@ -267,12 +234,13 @@
267234 {
268235 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
269236
270
- amdgpu_device_gpu_recover(adev, NULL, false);
237
+ if (amdgpu_device_should_recover_gpu(adev))
238
+ amdgpu_device_gpu_recover(adev, NULL);
271239 }
272240
273
-int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
274
- void **mem_obj, uint64_t *gpu_addr,
275
- void **cpu_ptr, bool mqd_gfx9)
241
+int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
242
+ void **mem_obj, uint64_t *gpu_addr,
243
+ void **cpu_ptr, bool cp_mqd_gfx9)
276244 {
277245 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
278246 struct amdgpu_bo *bo = NULL;
....@@ -288,8 +256,8 @@
288256 bp.type = ttm_bo_type_kernel;
289257 bp.resv = NULL;
290258
291
- if (mqd_gfx9)
292
- bp.flags |= AMDGPU_GEM_CREATE_MQD_GFX9;
259
+ if (cp_mqd_gfx9)
260
+ bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
293261
294262 r = amdgpu_bo_create(adev, &bp, &bo);
295263 if (r) {
....@@ -342,7 +310,7 @@
342310 return r;
343311 }
344312
345
-void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
313
+void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
346314 {
347315 struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
348316
....@@ -353,8 +321,79 @@
353321 amdgpu_bo_unref(&(bo));
354322 }
355323
356
-void get_local_mem_info(struct kgd_dev *kgd,
357
- struct kfd_local_mem_info *mem_info)
324
+int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
325
+ void **mem_obj)
326
+{
327
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
328
+ struct amdgpu_bo *bo = NULL;
329
+ struct amdgpu_bo_param bp;
330
+ int r;
331
+
332
+ memset(&bp, 0, sizeof(bp));
333
+ bp.size = size;
334
+ bp.byte_align = 1;
335
+ bp.domain = AMDGPU_GEM_DOMAIN_GWS;
336
+ bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
337
+ bp.type = ttm_bo_type_device;
338
+ bp.resv = NULL;
339
+
340
+ r = amdgpu_bo_create(adev, &bp, &bo);
341
+ if (r) {
342
+ dev_err(adev->dev,
343
+ "failed to allocate gws BO for amdkfd (%d)\n", r);
344
+ return r;
345
+ }
346
+
347
+ *mem_obj = bo;
348
+ return 0;
349
+}
350
+
351
+void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
352
+{
353
+ struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
354
+
355
+ amdgpu_bo_unref(&bo);
356
+}
357
+
358
+uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
359
+ enum kgd_engine_type type)
360
+{
361
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
362
+
363
+ switch (type) {
364
+ case KGD_ENGINE_PFP:
365
+ return adev->gfx.pfp_fw_version;
366
+
367
+ case KGD_ENGINE_ME:
368
+ return adev->gfx.me_fw_version;
369
+
370
+ case KGD_ENGINE_CE:
371
+ return adev->gfx.ce_fw_version;
372
+
373
+ case KGD_ENGINE_MEC1:
374
+ return adev->gfx.mec_fw_version;
375
+
376
+ case KGD_ENGINE_MEC2:
377
+ return adev->gfx.mec2_fw_version;
378
+
379
+ case KGD_ENGINE_RLC:
380
+ return adev->gfx.rlc_fw_version;
381
+
382
+ case KGD_ENGINE_SDMA1:
383
+ return adev->sdma.instance[0].fw_version;
384
+
385
+ case KGD_ENGINE_SDMA2:
386
+ return adev->sdma.instance[1].fw_version;
387
+
388
+ default:
389
+ return 0;
390
+ }
391
+
392
+ return 0;
393
+}
394
+
395
+void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
396
+ struct kfd_local_mem_info *mem_info)
358397 {
359398 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
360399 uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
....@@ -379,13 +418,16 @@
379418
380419 if (amdgpu_sriov_vf(adev))
381420 mem_info->mem_clk_max = adev->clock.default_mclk / 100;
382
- else if (adev->powerplay.pp_funcs)
383
- mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
384
- else
421
+ else if (adev->pm.dpm_enabled) {
422
+ if (amdgpu_emu_mode == 1)
423
+ mem_info->mem_clk_max = 0;
424
+ else
425
+ mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
426
+ } else
385427 mem_info->mem_clk_max = 100;
386428 }
387429
388
-uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
430
+uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd)
389431 {
390432 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
391433
....@@ -394,20 +436,20 @@
394436 return 0;
395437 }
396438
397
-uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
439
+uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
398440 {
399441 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
400442
401443 /* the sclk is in quantas of 10kHz */
402444 if (amdgpu_sriov_vf(adev))
403445 return adev->clock.default_sclk / 100;
404
- else if (adev->powerplay.pp_funcs)
446
+ else if (adev->pm.dpm_enabled)
405447 return amdgpu_dpm_get_sclk(adev, false) / 100;
406448 else
407449 return 100;
408450 }
409451
410
-void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
452
+void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
411453 {
412454 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
413455 struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
....@@ -430,11 +472,126 @@
430472 cu_info->lds_size = acu_info.lds_size;
431473 }
432474
475
+int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
476
+ struct kgd_dev **dma_buf_kgd,
477
+ uint64_t *bo_size, void *metadata_buffer,
478
+ size_t buffer_size, uint32_t *metadata_size,
479
+ uint32_t *flags)
480
+{
481
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
482
+ struct dma_buf *dma_buf;
483
+ struct drm_gem_object *obj;
484
+ struct amdgpu_bo *bo;
485
+ uint64_t metadata_flags;
486
+ int r = -EINVAL;
487
+
488
+ dma_buf = dma_buf_get(dma_buf_fd);
489
+ if (IS_ERR(dma_buf))
490
+ return PTR_ERR(dma_buf);
491
+
492
+ if (dma_buf->ops != &amdgpu_dmabuf_ops)
493
+ /* Can't handle non-graphics buffers */
494
+ goto out_put;
495
+
496
+ obj = dma_buf->priv;
497
+ if (obj->dev->driver != adev_to_drm(adev)->driver)
498
+ /* Can't handle buffers from different drivers */
499
+ goto out_put;
500
+
501
+ adev = drm_to_adev(obj->dev);
502
+ bo = gem_to_amdgpu_bo(obj);
503
+ if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
504
+ AMDGPU_GEM_DOMAIN_GTT)))
505
+ /* Only VRAM and GTT BOs are supported */
506
+ goto out_put;
507
+
508
+ r = 0;
509
+ if (dma_buf_kgd)
510
+ *dma_buf_kgd = (struct kgd_dev *)adev;
511
+ if (bo_size)
512
+ *bo_size = amdgpu_bo_size(bo);
513
+ if (metadata_size)
514
+ *metadata_size = bo->metadata_size;
515
+ if (metadata_buffer)
516
+ r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
517
+ metadata_size, &metadata_flags);
518
+ if (flags) {
519
+ *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
520
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM
521
+ : KFD_IOC_ALLOC_MEM_FLAGS_GTT;
522
+
523
+ if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
524
+ *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
525
+ }
526
+
527
+out_put:
528
+ dma_buf_put(dma_buf);
529
+ return r;
530
+}
531
+
433532 uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
434533 {
435534 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
535
+ struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
436536
437
- return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
537
+ return amdgpu_vram_mgr_usage(vram_man);
538
+}
539
+
540
+uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
541
+{
542
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
543
+
544
+ return adev->gmc.xgmi.hive_id;
545
+}
546
+
547
+uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd)
548
+{
549
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
550
+
551
+ return adev->unique_id;
552
+}
553
+
554
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
555
+{
556
+ struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
557
+ struct amdgpu_device *adev = (struct amdgpu_device *)dst;
558
+ int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
559
+
560
+ if (ret < 0) {
561
+ DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
562
+ adev->gmc.xgmi.physical_node_id,
563
+ peer_adev->gmc.xgmi.physical_node_id, ret);
564
+ ret = 0;
565
+ }
566
+ return (uint8_t)ret;
567
+}
568
+
569
+uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
570
+{
571
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
572
+
573
+ return adev->rmmio_remap.bus_addr;
574
+}
575
+
576
+uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
577
+{
578
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
579
+
580
+ return adev->gds.gws_size;
581
+}
582
+
583
+uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd)
584
+{
585
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
586
+
587
+ return adev->rev_id;
588
+}
589
+
590
+int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd)
591
+{
592
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
593
+
594
+ return adev->gmc.noretry;
438595 }
439596
440597 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
....@@ -478,6 +635,7 @@
478635 job->vmid = vmid;
479636
480637 ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
638
+
481639 if (ret) {
482640 DRM_ERROR("amdgpu: failed to schedule IB.\n");
483641 goto err_ib_sched;
....@@ -496,60 +654,51 @@
496654 {
497655 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
498656
499
- if (adev->powerplay.pp_funcs &&
500
- adev->powerplay.pp_funcs->switch_power_profile)
501
- amdgpu_dpm_switch_power_profile(adev,
502
- PP_SMC_POWER_PROFILE_COMPUTE,
503
- !idle);
657
+ amdgpu_dpm_switch_power_profile(adev,
658
+ PP_SMC_POWER_PROFILE_COMPUTE,
659
+ !idle);
504660 }
505661
506662 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
507663 {
508
- if (adev->kfd) {
509
- if ((1 << vmid) & compute_vmid_bitmap)
510
- return true;
664
+ if (adev->kfd.dev)
665
+ return vmid >= adev->vm_manager.first_kfd_vmid;
666
+
667
+ return false;
668
+}
669
+
670
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
671
+{
672
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
673
+
674
+ if (adev->family == AMDGPU_FAMILY_AI) {
675
+ int i;
676
+
677
+ for (i = 0; i < adev->num_vmhubs; i++)
678
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
679
+ } else {
680
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
511681 }
512682
513
- return false;
514
-}
515
-
516
-#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD)
517
-bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
518
-{
519
- return false;
520
-}
521
-
522
-void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
523
-{
524
-}
525
-
526
-void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
527
- struct amdgpu_vm *vm)
528
-{
529
-}
530
-
531
-struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
532
-{
533
- return NULL;
534
-}
535
-
536
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
537
-{
538683 return 0;
539684 }
540685
541
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
686
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
542687 {
543
- return NULL;
688
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
689
+ const uint32_t flush_type = 0;
690
+ bool all_hub = false;
691
+
692
+ if (adev->family == AMDGPU_FAMILY_AI ||
693
+ adev->family == AMDGPU_FAMILY_RV)
694
+ all_hub = true;
695
+
696
+ return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
544697 }
545698
546
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
699
+bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
547700 {
548
- return NULL;
549
-}
701
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
550702
551
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
552
-{
553
- return NULL;
703
+ return adev->have_atomics_support;
554704 }
555
-#endif