hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
....@@ -21,22 +21,21 @@
2121 *
2222 */
2323
24
+#include <linux/module.h>
25
+
26
+#include <drm/drm_drv.h>
27
+
2428 #include "amdgpu.h"
25
-#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
26
-#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
27
-#define MAX_KIQ_REG_TRY 20
29
+#include "amdgpu_ras.h"
30
+#include "vi.h"
31
+#include "soc15.h"
32
+#include "nv.h"
2833
29
-uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
30
-{
31
- uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
32
-
33
- addr -= AMDGPU_VA_RESERVED_SIZE;
34
-
35
- if (addr >= AMDGPU_VA_HOLE_START)
36
- addr |= AMDGPU_VA_HOLE_END;
37
-
38
- return addr;
39
-}
34
+#define POPULATE_UCODE_INFO(vf2pf_info, ucode, ver) \
35
+ do { \
36
+ vf2pf_info->ucode_info[ucode].id = ucode; \
37
+ vf2pf_info->ucode_info[ucode].version = ver; \
38
+ } while (0)
4039
4140 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
4241 {
....@@ -46,178 +45,45 @@
4645 return RREG32_NO_KIQ(0xc040) == 0xffffffff;
4746 }
4847
49
-int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
50
-{
51
- int r;
52
- void *ptr;
53
-
54
- r = amdgpu_bo_create_kernel(adev, AMDGPU_CSA_SIZE, PAGE_SIZE,
55
- AMDGPU_GEM_DOMAIN_VRAM, &adev->virt.csa_obj,
56
- &adev->virt.csa_vmid0_addr, &ptr);
57
- if (r)
58
- return r;
59
-
60
- memset(ptr, 0, AMDGPU_CSA_SIZE);
61
- return 0;
62
-}
63
-
64
-void amdgpu_free_static_csa(struct amdgpu_device *adev) {
65
- amdgpu_bo_free_kernel(&adev->virt.csa_obj,
66
- &adev->virt.csa_vmid0_addr,
67
- NULL);
68
-}
69
-
70
-/*
71
- * amdgpu_map_static_csa should be called during amdgpu_vm_init
72
- * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
73
- * submission of GFX should use this virtual address within META_DATA init
74
- * package to support SRIOV gfx preemption.
75
- */
76
-int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
77
- struct amdgpu_bo_va **bo_va)
78
-{
79
- uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK;
80
- struct ww_acquire_ctx ticket;
81
- struct list_head list;
82
- struct amdgpu_bo_list_entry pd;
83
- struct ttm_validate_buffer csa_tv;
84
- int r;
85
-
86
- INIT_LIST_HEAD(&list);
87
- INIT_LIST_HEAD(&csa_tv.head);
88
- csa_tv.bo = &adev->virt.csa_obj->tbo;
89
- csa_tv.shared = true;
90
-
91
- list_add(&csa_tv.head, &list);
92
- amdgpu_vm_get_pd_bo(vm, &list, &pd);
93
-
94
- r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
95
- if (r) {
96
- DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
97
- return r;
98
- }
99
-
100
- *bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj);
101
- if (!*bo_va) {
102
- ttm_eu_backoff_reservation(&ticket, &list);
103
- DRM_ERROR("failed to create bo_va for static CSA\n");
104
- return -ENOMEM;
105
- }
106
-
107
- r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
108
- AMDGPU_CSA_SIZE);
109
- if (r) {
110
- DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
111
- amdgpu_vm_bo_rmv(adev, *bo_va);
112
- ttm_eu_backoff_reservation(&ticket, &list);
113
- return r;
114
- }
115
-
116
- r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE,
117
- AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
118
- AMDGPU_PTE_EXECUTABLE);
119
-
120
- if (r) {
121
- DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
122
- amdgpu_vm_bo_rmv(adev, *bo_va);
123
- ttm_eu_backoff_reservation(&ticket, &list);
124
- return r;
125
- }
126
-
127
- ttm_eu_backoff_reservation(&ticket, &list);
128
- return 0;
129
-}
130
-
13148 void amdgpu_virt_init_setting(struct amdgpu_device *adev)
13249 {
13350 /* enable virtual display */
134
- adev->mode_info.num_crtc = 1;
51
+ if (adev->mode_info.num_crtc == 0)
52
+ adev->mode_info.num_crtc = 1;
13553 adev->enable_virtual_display = true;
54
+ adev_to_drm(adev)->driver->driver_features &= ~DRIVER_ATOMIC;
13655 adev->cg_flags = 0;
13756 adev->pg_flags = 0;
13857 }
13958
140
-uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
59
+void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
60
+ uint32_t reg0, uint32_t reg1,
61
+ uint32_t ref, uint32_t mask)
14162 {
63
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
64
+ struct amdgpu_ring *ring = &kiq->ring;
14265 signed long r, cnt = 0;
14366 unsigned long flags;
14467 uint32_t seq;
145
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
146
- struct amdgpu_ring *ring = &kiq->ring;
147
-
148
- BUG_ON(!ring->funcs->emit_rreg);
14968
15069 spin_lock_irqsave(&kiq->ring_lock, flags);
15170 amdgpu_ring_alloc(ring, 32);
152
- amdgpu_ring_emit_rreg(ring, reg);
153
- amdgpu_fence_emit_polling(ring, &seq);
71
+ amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
72
+ ref, mask);
73
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
74
+ if (r)
75
+ goto failed_undo;
76
+
15477 amdgpu_ring_commit(ring);
15578 spin_unlock_irqrestore(&kiq->ring_lock, flags);
15679
15780 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
15881
159
- /* don't wait anymore for gpu reset case because this way may
160
- * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
161
- * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
162
- * never return if we keep waiting in virt_kiq_rreg, which cause
163
- * gpu_recover() hang there.
164
- *
165
- * also don't wait anymore for IRQ context
166
- * */
167
- if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
168
- goto failed_kiq_read;
82
+ /* don't wait anymore for IRQ context */
83
+ if (r < 1 && in_interrupt())
84
+ goto failed_kiq;
16985
170
- if (in_interrupt())
171
- might_sleep();
172
-
173
- while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
174
- msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
175
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
176
- }
177
-
178
- if (cnt > MAX_KIQ_REG_TRY)
179
- goto failed_kiq_read;
180
-
181
- return adev->wb.wb[adev->virt.reg_val_offs];
182
-
183
-failed_kiq_read:
184
- pr_err("failed to read reg:%x\n", reg);
185
- return ~0;
186
-}
187
-
188
-void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
189
-{
190
- signed long r, cnt = 0;
191
- unsigned long flags;
192
- uint32_t seq;
193
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
194
- struct amdgpu_ring *ring = &kiq->ring;
195
-
196
- BUG_ON(!ring->funcs->emit_wreg);
197
-
198
- spin_lock_irqsave(&kiq->ring_lock, flags);
199
- amdgpu_ring_alloc(ring, 32);
200
- amdgpu_ring_emit_wreg(ring, reg, v);
201
- amdgpu_fence_emit_polling(ring, &seq);
202
- amdgpu_ring_commit(ring);
203
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
204
-
205
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
206
-
207
- /* don't wait anymore for gpu reset case because this way may
208
- * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
209
- * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
210
- * never return if we keep waiting in virt_kiq_rreg, which cause
211
- * gpu_recover() hang there.
212
- *
213
- * also don't wait anymore for IRQ context
214
- * */
215
- if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
216
- goto failed_kiq_write;
217
-
218
- if (in_interrupt())
219
- might_sleep();
220
-
86
+ might_sleep();
22187 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
22288
22389 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
....@@ -225,12 +91,15 @@
22591 }
22692
22793 if (cnt > MAX_KIQ_REG_TRY)
228
- goto failed_kiq_write;
94
+ goto failed_kiq;
22995
23096 return;
23197
232
-failed_kiq_write:
233
- pr_err("failed to write reg:%x\n", reg);
98
+failed_undo:
99
+ amdgpu_ring_undo(ring);
100
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
101
+failed_kiq:
102
+ dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
234103 }
235104
236105 /**
....@@ -300,6 +169,19 @@
300169 return 0;
301170 }
302171
172
+void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
173
+{
174
+ struct amdgpu_virt *virt = &adev->virt;
175
+
176
+ if (virt->ops && virt->ops->req_init_data)
177
+ virt->ops->req_init_data(adev);
178
+
179
+ if (adev->virt.req_init_data_ver > 0)
180
+ DRM_INFO("host supports REQ_INIT_DATA handshake\n");
181
+ else
182
+ DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
183
+}
184
+
303185 /**
304186 * amdgpu_virt_wait_reset() - wait for reset gpu completed
305187 * @amdgpu: amdgpu device.
....@@ -363,10 +245,10 @@
363245 }
364246
365247
366
-int amdgpu_virt_fw_reserve_get_checksum(void *obj,
367
- unsigned long obj_size,
368
- unsigned int key,
369
- unsigned int chksum)
248
+unsigned int amd_sriov_msg_checksum(void *obj,
249
+ unsigned long obj_size,
250
+ unsigned int key,
251
+ unsigned int checksum)
370252 {
371253 unsigned int ret = key;
372254 unsigned long i = 0;
....@@ -376,64 +258,492 @@
376258 /* calculate checksum */
377259 for (i = 0; i < obj_size; ++i)
378260 ret += *(pos + i);
379
- /* minus the chksum itself */
380
- pos = (char *)&chksum;
381
- for (i = 0; i < sizeof(chksum); ++i)
261
+ /* minus the checksum itself */
262
+ pos = (char *)&checksum;
263
+ for (i = 0; i < sizeof(checksum); ++i)
382264 ret -= *(pos + i);
383265 return ret;
384266 }
385267
386
-void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
268
+static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
387269 {
388
- uint32_t pf2vf_size = 0;
389
- uint32_t checksum = 0;
390
- uint32_t checkval;
391
- char *str;
270
+ struct amdgpu_virt *virt = &adev->virt;
271
+ struct amdgpu_virt_ras_err_handler_data **data = &virt->virt_eh_data;
272
+ /* GPU will be marked bad on host if bp count more then 10,
273
+ * so alloc 512 is enough.
274
+ */
275
+ unsigned int align_space = 512;
276
+ void *bps = NULL;
277
+ struct amdgpu_bo **bps_bo = NULL;
392278
393
- adev->virt.fw_reserve.p_pf2vf = NULL;
394
- adev->virt.fw_reserve.p_vf2pf = NULL;
279
+ *data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL);
280
+ if (!*data)
281
+ return -ENOMEM;
395282
396
- if (adev->fw_vram_usage.va != NULL) {
397
- adev->virt.fw_reserve.p_pf2vf =
398
- (struct amdgim_pf2vf_info_header *)(
399
- adev->fw_vram_usage.va + AMDGIM_DATAEXCHANGE_OFFSET);
400
- AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size);
401
- AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum);
402
- AMDGPU_FW_VRAM_PF2VF_READ(adev, feature_flags, &adev->virt.gim_feature);
283
+ bps = kmalloc(align_space * sizeof((*data)->bps), GFP_KERNEL);
284
+ bps_bo = kmalloc(align_space * sizeof((*data)->bps_bo), GFP_KERNEL);
403285
404
- /* pf2vf message must be in 4K */
405
- if (pf2vf_size > 0 && pf2vf_size < 4096) {
406
- checkval = amdgpu_virt_fw_reserve_get_checksum(
407
- adev->virt.fw_reserve.p_pf2vf, pf2vf_size,
408
- adev->virt.fw_reserve.checksum_key, checksum);
409
- if (checkval == checksum) {
410
- adev->virt.fw_reserve.p_vf2pf =
411
- ((void *)adev->virt.fw_reserve.p_pf2vf +
412
- pf2vf_size);
413
- memset((void *)adev->virt.fw_reserve.p_vf2pf, 0,
414
- sizeof(amdgim_vf2pf_info));
415
- AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.version,
416
- AMDGPU_FW_VRAM_VF2PF_VER);
417
- AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.size,
418
- sizeof(amdgim_vf2pf_info));
419
- AMDGPU_FW_VRAM_VF2PF_READ(adev, driver_version,
420
- &str);
421
-#ifdef MODULE
422
- if (THIS_MODULE->version != NULL)
423
- strcpy(str, THIS_MODULE->version);
424
- else
425
-#endif
426
- strcpy(str, "N/A");
427
- AMDGPU_FW_VRAM_VF2PF_WRITE(adev, driver_cert,
428
- 0);
429
- AMDGPU_FW_VRAM_VF2PF_WRITE(adev, checksum,
430
- amdgpu_virt_fw_reserve_get_checksum(
431
- adev->virt.fw_reserve.p_vf2pf,
432
- pf2vf_size,
433
- adev->virt.fw_reserve.checksum_key, 0));
434
- }
286
+ if (!bps || !bps_bo) {
287
+ kfree(bps);
288
+ kfree(bps_bo);
289
+ kfree(*data);
290
+ return -ENOMEM;
291
+ }
292
+
293
+ (*data)->bps = bps;
294
+ (*data)->bps_bo = bps_bo;
295
+ (*data)->count = 0;
296
+ (*data)->last_reserved = 0;
297
+
298
+ virt->ras_init_done = true;
299
+
300
+ return 0;
301
+}
302
+
303
+static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
304
+{
305
+ struct amdgpu_virt *virt = &adev->virt;
306
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
307
+ struct amdgpu_bo *bo;
308
+ int i;
309
+
310
+ if (!data)
311
+ return;
312
+
313
+ for (i = data->last_reserved - 1; i >= 0; i--) {
314
+ bo = data->bps_bo[i];
315
+ amdgpu_bo_free_kernel(&bo, NULL, NULL);
316
+ data->bps_bo[i] = bo;
317
+ data->last_reserved = i;
318
+ }
319
+}
320
+
321
+void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev)
322
+{
323
+ struct amdgpu_virt *virt = &adev->virt;
324
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
325
+
326
+ virt->ras_init_done = false;
327
+
328
+ if (!data)
329
+ return;
330
+
331
+ amdgpu_virt_ras_release_bp(adev);
332
+
333
+ kfree(data->bps);
334
+ kfree(data->bps_bo);
335
+ kfree(data);
336
+ virt->virt_eh_data = NULL;
337
+}
338
+
339
+static void amdgpu_virt_ras_add_bps(struct amdgpu_device *adev,
340
+ struct eeprom_table_record *bps, int pages)
341
+{
342
+ struct amdgpu_virt *virt = &adev->virt;
343
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
344
+
345
+ if (!data)
346
+ return;
347
+
348
+ memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
349
+ data->count += pages;
350
+}
351
+
352
+static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
353
+{
354
+ struct amdgpu_virt *virt = &adev->virt;
355
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
356
+ struct amdgpu_bo *bo = NULL;
357
+ uint64_t bp;
358
+ int i;
359
+
360
+ if (!data)
361
+ return;
362
+
363
+ for (i = data->last_reserved; i < data->count; i++) {
364
+ bp = data->bps[i].retired_page;
365
+
366
+ /* There are two cases of reserve error should be ignored:
367
+ * 1) a ras bad page has been allocated (used by someone);
368
+ * 2) a ras bad page has been reserved (duplicate error injection
369
+ * for one page);
370
+ */
371
+ if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
372
+ AMDGPU_GPU_PAGE_SIZE,
373
+ AMDGPU_GEM_DOMAIN_VRAM,
374
+ &bo, NULL))
375
+ DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
376
+
377
+ data->bps_bo[i] = bo;
378
+ data->last_reserved = i + 1;
379
+ bo = NULL;
380
+ }
381
+}
382
+
383
+static bool amdgpu_virt_ras_check_bad_page(struct amdgpu_device *adev,
384
+ uint64_t retired_page)
385
+{
386
+ struct amdgpu_virt *virt = &adev->virt;
387
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
388
+ int i;
389
+
390
+ if (!data)
391
+ return true;
392
+
393
+ for (i = 0; i < data->count; i++)
394
+ if (retired_page == data->bps[i].retired_page)
395
+ return true;
396
+
397
+ return false;
398
+}
399
+
400
+static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
401
+ uint64_t bp_block_offset, uint32_t bp_block_size)
402
+{
403
+ struct eeprom_table_record bp;
404
+ uint64_t retired_page;
405
+ uint32_t bp_idx, bp_cnt;
406
+
407
+ if (bp_block_size) {
408
+ bp_cnt = bp_block_size / sizeof(uint64_t);
409
+ for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
410
+ retired_page = *(uint64_t *)(adev->mman.fw_vram_usage_va +
411
+ bp_block_offset + bp_idx * sizeof(uint64_t));
412
+ bp.retired_page = retired_page;
413
+
414
+ if (amdgpu_virt_ras_check_bad_page(adev, retired_page))
415
+ continue;
416
+
417
+ amdgpu_virt_ras_add_bps(adev, &bp, 1);
418
+
419
+ amdgpu_virt_ras_reserve_bps(adev);
435420 }
436421 }
437422 }
438423
424
+static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
425
+{
426
+ struct amd_sriov_msg_pf2vf_info_header *pf2vf_info = adev->virt.fw_reserve.p_pf2vf;
427
+ uint32_t checksum;
428
+ uint32_t checkval;
439429
430
+ if (adev->virt.fw_reserve.p_pf2vf == NULL)
431
+ return -EINVAL;
432
+
433
+ if (pf2vf_info->size > 1024) {
434
+ DRM_ERROR("invalid pf2vf message size\n");
435
+ return -EINVAL;
436
+ }
437
+
438
+ switch (pf2vf_info->version) {
439
+ case 1:
440
+ checksum = ((struct amdgim_pf2vf_info_v1 *)pf2vf_info)->checksum;
441
+ checkval = amd_sriov_msg_checksum(
442
+ adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
443
+ adev->virt.fw_reserve.checksum_key, checksum);
444
+ if (checksum != checkval) {
445
+ DRM_ERROR("invalid pf2vf message\n");
446
+ return -EINVAL;
447
+ }
448
+
449
+ adev->virt.gim_feature =
450
+ ((struct amdgim_pf2vf_info_v1 *)pf2vf_info)->feature_flags;
451
+ break;
452
+ case 2:
453
+ /* TODO: missing key, need to add it later */
454
+ checksum = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->checksum;
455
+ checkval = amd_sriov_msg_checksum(
456
+ adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
457
+ 0, checksum);
458
+ if (checksum != checkval) {
459
+ DRM_ERROR("invalid pf2vf message\n");
460
+ return -EINVAL;
461
+ }
462
+
463
+ adev->virt.vf2pf_update_interval_ms =
464
+ ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->vf2pf_update_interval_ms;
465
+ adev->virt.gim_feature =
466
+ ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->feature_flags.all;
467
+
468
+ break;
469
+ default:
470
+ DRM_ERROR("invalid pf2vf version\n");
471
+ return -EINVAL;
472
+ }
473
+
474
+ /* correct too large or too little interval value */
475
+ if (adev->virt.vf2pf_update_interval_ms < 200 || adev->virt.vf2pf_update_interval_ms > 10000)
476
+ adev->virt.vf2pf_update_interval_ms = 2000;
477
+
478
+ return 0;
479
+}
480
+
481
+static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev)
482
+{
483
+ struct amd_sriov_msg_vf2pf_info *vf2pf_info;
484
+ vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf;
485
+
486
+ if (adev->virt.fw_reserve.p_vf2pf == NULL)
487
+ return;
488
+
489
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_VCE, adev->vce.fw_version);
490
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_UVD, adev->uvd.fw_version);
491
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MC, adev->gmc.fw_version);
492
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ME, adev->gfx.me_fw_version);
493
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_PFP, adev->gfx.pfp_fw_version);
494
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_CE, adev->gfx.ce_fw_version);
495
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC, adev->gfx.rlc_fw_version);
496
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLC, adev->gfx.rlc_srlc_fw_version);
497
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLG, adev->gfx.rlc_srlg_fw_version);
498
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version);
499
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version);
500
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version);
501
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos_fw_version);
502
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD, adev->psp.asd_fw_version);
503
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS, adev->psp.ta_ras_ucode_version);
504
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI, adev->psp.ta_xgmi_ucode_version);
505
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SMC, adev->pm.fw_version);
506
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA, adev->sdma.instance[0].fw_version);
507
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA2, adev->sdma.instance[1].fw_version);
508
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_VCN, adev->vcn.fw_version);
509
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_DMCU, adev->dm.dmcu_fw_version);
510
+}
511
+
512
+static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
513
+{
514
+ struct amd_sriov_msg_vf2pf_info *vf2pf_info;
515
+ struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
516
+
517
+ vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf;
518
+
519
+ if (adev->virt.fw_reserve.p_vf2pf == NULL)
520
+ return -EINVAL;
521
+
522
+ memset(vf2pf_info, 0, sizeof(struct amd_sriov_msg_vf2pf_info));
523
+
524
+ vf2pf_info->header.size = sizeof(struct amd_sriov_msg_vf2pf_info);
525
+ vf2pf_info->header.version = AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER;
526
+
527
+#ifdef MODULE
528
+ if (THIS_MODULE->version != NULL)
529
+ strcpy(vf2pf_info->driver_version, THIS_MODULE->version);
530
+ else
531
+#endif
532
+ strcpy(vf2pf_info->driver_version, "N/A");
533
+
534
+ vf2pf_info->pf2vf_version_required = 0; // no requirement, guest understands all
535
+ vf2pf_info->driver_cert = 0;
536
+ vf2pf_info->os_info.all = 0;
537
+
538
+ vf2pf_info->fb_usage = amdgpu_vram_mgr_usage(vram_man) >> 20;
539
+ vf2pf_info->fb_vis_usage = amdgpu_vram_mgr_vis_usage(vram_man) >> 20;
540
+ vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20;
541
+ vf2pf_info->fb_vis_size = adev->gmc.visible_vram_size >> 20;
542
+
543
+ amdgpu_virt_populate_vf2pf_ucode_info(adev);
544
+
545
+ /* TODO: read dynamic info */
546
+ vf2pf_info->gfx_usage = 0;
547
+ vf2pf_info->compute_usage = 0;
548
+ vf2pf_info->encode_usage = 0;
549
+ vf2pf_info->decode_usage = 0;
550
+
551
+ vf2pf_info->checksum =
552
+ amd_sriov_msg_checksum(
553
+ vf2pf_info, vf2pf_info->header.size, 0, 0);
554
+
555
+ return 0;
556
+}
557
+
558
+void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)
559
+{
560
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device, virt.vf2pf_work.work);
561
+ int ret;
562
+
563
+ ret = amdgpu_virt_read_pf2vf_data(adev);
564
+ if (ret)
565
+ goto out;
566
+ amdgpu_virt_write_vf2pf_data(adev);
567
+
568
+out:
569
+ schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
570
+}
571
+
572
+void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
573
+{
574
+ if (adev->virt.vf2pf_update_interval_ms != 0) {
575
+ DRM_INFO("clean up the vf2pf work item\n");
576
+ flush_delayed_work(&adev->virt.vf2pf_work);
577
+ cancel_delayed_work_sync(&adev->virt.vf2pf_work);
578
+ }
579
+}
580
+
581
+void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
582
+{
583
+ adev->virt.fw_reserve.p_pf2vf = NULL;
584
+ adev->virt.fw_reserve.p_vf2pf = NULL;
585
+ adev->virt.vf2pf_update_interval_ms = 0;
586
+
587
+ if (adev->mman.fw_vram_usage_va != NULL) {
588
+ /* go through this logic in ip_init and reset to init workqueue*/
589
+ amdgpu_virt_exchange_data(adev);
590
+
591
+ INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
592
+ schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
593
+ } else if (adev->bios != NULL) {
594
+ /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
595
+ adev->virt.fw_reserve.p_pf2vf =
596
+ (struct amd_sriov_msg_pf2vf_info_header *)
597
+ (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
598
+
599
+ amdgpu_virt_read_pf2vf_data(adev);
600
+ }
601
+}
602
+
603
+
604
+void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
605
+{
606
+ uint64_t bp_block_offset = 0;
607
+ uint32_t bp_block_size = 0;
608
+ struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
609
+
610
+ if (adev->mman.fw_vram_usage_va != NULL) {
611
+
612
+ adev->virt.fw_reserve.p_pf2vf =
613
+ (struct amd_sriov_msg_pf2vf_info_header *)
614
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
615
+ adev->virt.fw_reserve.p_vf2pf =
616
+ (struct amd_sriov_msg_vf2pf_info_header *)
617
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
618
+
619
+ amdgpu_virt_read_pf2vf_data(adev);
620
+ amdgpu_virt_write_vf2pf_data(adev);
621
+
622
+ /* bad page handling for version 2 */
623
+ if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
624
+ pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
625
+
626
+ bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
627
+ ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
628
+ bp_block_size = pf2vf_v2->bp_block_size;
629
+
630
+ if (bp_block_size && !adev->virt.ras_init_done)
631
+ amdgpu_virt_init_ras_err_handler_data(adev);
632
+
633
+ if (adev->virt.ras_init_done)
634
+ amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
635
+ }
636
+ }
637
+}
638
+
639
+
640
+void amdgpu_detect_virtualization(struct amdgpu_device *adev)
641
+{
642
+ uint32_t reg;
643
+
644
+ switch (adev->asic_type) {
645
+ case CHIP_TONGA:
646
+ case CHIP_FIJI:
647
+ reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
648
+ break;
649
+ case CHIP_VEGA10:
650
+ case CHIP_VEGA20:
651
+ case CHIP_NAVI10:
652
+ case CHIP_NAVI12:
653
+ case CHIP_SIENNA_CICHLID:
654
+ case CHIP_ARCTURUS:
655
+ reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER);
656
+ break;
657
+ default: /* other chip doesn't support SRIOV */
658
+ reg = 0;
659
+ break;
660
+ }
661
+
662
+ if (reg & 1)
663
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
664
+
665
+ if (reg & 0x80000000)
666
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
667
+
668
+ if (!reg) {
669
+ if (is_virtual_machine()) /* passthrough mode exclus sriov mod */
670
+ adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
671
+ }
672
+
673
+ if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
674
+ /* VF MMIO access (except mailbox range) from CPU
675
+ * will be blocked during sriov runtime
676
+ */
677
+ adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
678
+
679
+ /* we have the ability to check now */
680
+ if (amdgpu_sriov_vf(adev)) {
681
+ switch (adev->asic_type) {
682
+ case CHIP_TONGA:
683
+ case CHIP_FIJI:
684
+ vi_set_virt_ops(adev);
685
+ break;
686
+ case CHIP_VEGA10:
687
+ case CHIP_VEGA20:
688
+ case CHIP_ARCTURUS:
689
+ soc15_set_virt_ops(adev);
690
+ break;
691
+ case CHIP_NAVI10:
692
+ case CHIP_NAVI12:
693
+ case CHIP_SIENNA_CICHLID:
694
+ nv_set_virt_ops(adev);
695
+ /* try send GPU_INIT_DATA request to host */
696
+ amdgpu_virt_request_init_data(adev);
697
+ break;
698
+ default: /* other chip doesn't support SRIOV */
699
+ DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
700
+ break;
701
+ }
702
+ }
703
+}
704
+
705
+static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
706
+{
707
+ return amdgpu_sriov_is_debug(adev) ? true : false;
708
+}
709
+
710
+static bool amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device *adev)
711
+{
712
+ return amdgpu_sriov_is_normal(adev) ? true : false;
713
+}
714
+
715
+int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev)
716
+{
717
+ if (!amdgpu_sriov_vf(adev) ||
718
+ amdgpu_virt_access_debugfs_is_kiq(adev))
719
+ return 0;
720
+
721
+ if (amdgpu_virt_access_debugfs_is_mmio(adev))
722
+ adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
723
+ else
724
+ return -EPERM;
725
+
726
+ return 0;
727
+}
728
+
729
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev)
730
+{
731
+ if (amdgpu_sriov_vf(adev))
732
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
733
+}
734
+
735
+enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev)
736
+{
737
+ enum amdgpu_sriov_vf_mode mode;
738
+
739
+ if (amdgpu_sriov_vf(adev)) {
740
+ if (amdgpu_sriov_is_pp_one_vf(adev))
741
+ mode = SRIOV_VF_MODE_ONE_VF;
742
+ else
743
+ mode = SRIOV_VF_MODE_MULTI_VF;
744
+ } else {
745
+ mode = SRIOV_VF_MODE_BARE_METAL;
746
+ }
747
+
748
+ return mode;
749
+}