hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
....@@ -30,6 +30,7 @@
3030 #include "gc/gc_9_0_offset.h"
3131 #include "gc/gc_9_0_sh_mask.h"
3232 #include "sdma0/sdma0_4_0_sh_mask.h"
33
+#include "amdgpu_amdkfd.h"
3334
3435 static inline struct v9_mqd *get_mqd(void *mqd)
3536 {
....@@ -45,7 +46,7 @@
4546 struct queue_properties *q)
4647 {
4748 struct v9_mqd *m;
48
- uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
49
+ uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
4950
5051 if (q->cu_mask_count == 0)
5152 return;
....@@ -58,48 +59,83 @@
5859 m->compute_static_thread_mgmt_se1 = se_mask[1];
5960 m->compute_static_thread_mgmt_se2 = se_mask[2];
6061 m->compute_static_thread_mgmt_se3 = se_mask[3];
62
+ m->compute_static_thread_mgmt_se4 = se_mask[4];
63
+ m->compute_static_thread_mgmt_se5 = se_mask[5];
64
+ m->compute_static_thread_mgmt_se6 = se_mask[6];
65
+ m->compute_static_thread_mgmt_se7 = se_mask[7];
6166
62
- pr_debug("update cu mask to %#x %#x %#x %#x\n",
67
+ pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
6368 m->compute_static_thread_mgmt_se0,
6469 m->compute_static_thread_mgmt_se1,
6570 m->compute_static_thread_mgmt_se2,
66
- m->compute_static_thread_mgmt_se3);
71
+ m->compute_static_thread_mgmt_se3,
72
+ m->compute_static_thread_mgmt_se4,
73
+ m->compute_static_thread_mgmt_se5,
74
+ m->compute_static_thread_mgmt_se6,
75
+ m->compute_static_thread_mgmt_se7);
6776 }
6877
69
-static int init_mqd(struct mqd_manager *mm, void **mqd,
70
- struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
71
- struct queue_properties *q)
78
+static void set_priority(struct v9_mqd *m, struct queue_properties *q)
79
+{
80
+ m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
81
+ m->cp_hqd_queue_priority = q->priority;
82
+}
83
+
84
+static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
85
+ struct queue_properties *q)
7286 {
7387 int retval;
74
- uint64_t addr;
75
- struct v9_mqd *m;
76
- struct kfd_dev *kfd = mm->dev;
88
+ struct kfd_mem_obj *mqd_mem_obj = NULL;
7789
78
- *mqd_mem_obj = NULL;
79
- /* From V9, for CWSR, the control stack is located on the next page
80
- * boundary after the mqd, we will use the gtt allocation function
81
- * instead of sub-allocation function.
90
+ /* For V9 only, due to a HW bug, the control stack of a user mode
91
+ * compute queue needs to be allocated just behind the page boundary
92
+ * of its regular MQD buffer. So we allocate an enlarged MQD buffer:
93
+ * the first page of the buffer serves as the regular MQD buffer
94
+ * purpose and the remaining is for control stack. Although the two
95
+ * parts are in the same buffer object, they need different memory
96
+ * types: MQD part needs UC (uncached) as usual, while control stack
97
+ * needs NC (non coherent), which is different from the UC type which
98
+ * is used when control stack is allocated in user space.
99
+ *
100
+ * Because of all those, we use the gtt allocation function instead
101
+ * of sub-allocation function for this enlarged MQD buffer. Moreover,
102
+ * in order to achieve two memory types in a single buffer object, we
103
+ * pass a special bo flag AMDGPU_GEM_CREATE_CP_MQD_GFX9 to instruct
104
+ * amdgpu memory functions to do so.
82105 */
83106 if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
84
- *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
85
- if (!*mqd_mem_obj)
86
- return -ENOMEM;
87
- retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd,
107
+ mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
108
+ if (!mqd_mem_obj)
109
+ return NULL;
110
+ retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
88111 ALIGN(q->ctl_stack_size, PAGE_SIZE) +
89112 ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
90
- &((*mqd_mem_obj)->gtt_mem),
91
- &((*mqd_mem_obj)->gpu_addr),
92
- (void *)&((*mqd_mem_obj)->cpu_ptr), true);
93
- } else
94
- retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
95
- mqd_mem_obj);
96
- if (retval) {
97
- kfree(*mqd_mem_obj);
98
- return -ENOMEM;
113
+ &(mqd_mem_obj->gtt_mem),
114
+ &(mqd_mem_obj->gpu_addr),
115
+ (void *)&(mqd_mem_obj->cpu_ptr), true);
116
+ } else {
117
+ retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd),
118
+ &mqd_mem_obj);
99119 }
100120
101
- m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
102
- addr = (*mqd_mem_obj)->gpu_addr;
121
+ if (retval) {
122
+ kfree(mqd_mem_obj);
123
+ return NULL;
124
+ }
125
+
126
+ return mqd_mem_obj;
127
+
128
+}
129
+
130
+static void init_mqd(struct mqd_manager *mm, void **mqd,
131
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
132
+ struct queue_properties *q)
133
+{
134
+ uint64_t addr;
135
+ struct v9_mqd *m;
136
+
137
+ m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
138
+ addr = mqd_mem_obj->gpu_addr;
103139
104140 memset(m, 0, sizeof(struct v9_mqd));
105141
....@@ -109,6 +145,10 @@
109145 m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
110146 m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
111147 m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
148
+ m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
149
+ m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
150
+ m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
151
+ m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
112152
113153 m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
114154 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
....@@ -120,10 +160,7 @@
120160
121161 m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
122162 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
123
- 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
124
-
125
- m->cp_hqd_pipe_priority = 1;
126
- m->cp_hqd_queue_priority = 15;
163
+ 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
127164
128165 if (q->format == KFD_QUEUE_FORMAT_AQL) {
129166 m->cp_hqd_aql_control =
....@@ -151,9 +188,7 @@
151188 *mqd = m;
152189 if (gart_addr)
153190 *gart_addr = addr;
154
- retval = mm->update_mqd(mm, m, q);
155
-
156
- return retval;
191
+ mm->update_mqd(mm, m, q);
157192 }
158193
159194 static int load_mqd(struct mqd_manager *mm, void *mqd,
....@@ -168,7 +203,15 @@
168203 wptr_shift, 0, mms);
169204 }
170205
171
-static int update_mqd(struct mqd_manager *mm, void *mqd,
206
+static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
207
+ uint32_t pipe_id, uint32_t queue_id,
208
+ struct queue_properties *p, struct mm_struct *mms)
209
+{
210
+ return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
211
+ queue_id, p->doorbell_off);
212
+}
213
+
214
+static void update_mqd(struct mqd_manager *mm, void *mqd,
172215 struct queue_properties *q)
173216 {
174217 struct v9_mqd *m;
....@@ -227,13 +270,9 @@
227270 m->cp_hqd_ctx_save_control = 0;
228271
229272 update_cu_mask(mm, mqd, q);
273
+ set_priority(m, q);
230274
231
- q->is_active = (q->queue_size > 0 &&
232
- q->queue_address != 0 &&
233
- q->queue_percent > 0 &&
234
- !q->is_evicted);
235
-
236
- return 0;
275
+ q->is_active = QUEUE_IS_ACTIVE(*q);
237276 }
238277
239278
....@@ -247,13 +286,13 @@
247286 pipe_id, queue_id);
248287 }
249288
250
-static void uninit_mqd(struct mqd_manager *mm, void *mqd,
289
+static void free_mqd(struct mqd_manager *mm, void *mqd,
251290 struct kfd_mem_obj *mqd_mem_obj)
252291 {
253292 struct kfd_dev *kfd = mm->dev;
254293
255294 if (mqd_mem_obj->gtt_mem) {
256
- kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
295
+ amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
257296 kfree(mqd_mem_obj);
258297 } else {
259298 kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
....@@ -269,71 +308,58 @@
269308 pipe_id, queue_id);
270309 }
271310
272
-static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
273
- struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
311
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
312
+ void __user *ctl_stack,
313
+ u32 *ctl_stack_used_size,
314
+ u32 *save_area_used_size)
315
+{
316
+ struct v9_mqd *m;
317
+
318
+ /* Control stack is located one page after MQD. */
319
+ void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
320
+
321
+ m = get_mqd(mqd);
322
+
323
+ *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
324
+ m->cp_hqd_cntl_stack_offset;
325
+ *save_area_used_size = m->cp_hqd_wg_state_offset -
326
+ m->cp_hqd_cntl_stack_size;
327
+
328
+ if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
329
+ return -EFAULT;
330
+
331
+ return 0;
332
+}
333
+
334
+static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
335
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
274336 struct queue_properties *q)
275337 {
276338 struct v9_mqd *m;
277
- int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
278339
279
- if (retval != 0)
280
- return retval;
340
+ init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
281341
282342 m = get_mqd(*mqd);
283343
284344 m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
285345 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
286
-
287
- return retval;
288346 }
289347
290
-static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
291
- struct queue_properties *q)
292
-{
293
- struct v9_mqd *m;
294
- int retval = update_mqd(mm, mqd, q);
295
-
296
- if (retval != 0)
297
- return retval;
298
-
299
- /* TODO: what's the point? update_mqd already does this. */
300
- m = get_mqd(mqd);
301
- m->cp_hqd_vmid = q->vmid;
302
- return retval;
303
-}
304
-
305
-static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
306
- struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
348
+static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
349
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
307350 struct queue_properties *q)
308351 {
309
- int retval;
310352 struct v9_sdma_mqd *m;
311353
312
-
313
- retval = kfd_gtt_sa_allocate(mm->dev,
314
- sizeof(struct v9_sdma_mqd),
315
- mqd_mem_obj);
316
-
317
- if (retval != 0)
318
- return -ENOMEM;
319
-
320
- m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
354
+ m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
321355
322356 memset(m, 0, sizeof(struct v9_sdma_mqd));
323357
324358 *mqd = m;
325359 if (gart_addr)
326
- *gart_addr = (*mqd_mem_obj)->gpu_addr;
360
+ *gart_addr = mqd_mem_obj->gpu_addr;
327361
328
- retval = mm->update_mqd(mm, m, q);
329
-
330
- return retval;
331
-}
332
-
333
-static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
334
- struct kfd_mem_obj *mqd_mem_obj)
335
-{
336
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
362
+ mm->update_mqd(mm, m, q);
337363 }
338364
339365 static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
....@@ -347,7 +373,7 @@
347373
348374 #define SDMA_RLC_DUMMY_DEFAULT 0xf
349375
350
-static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
376
+static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
351377 struct queue_properties *q)
352378 {
353379 struct v9_sdma_mqd *m;
....@@ -370,12 +396,7 @@
370396 m->sdma_queue_id = q->sdma_queue_id;
371397 m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
372398
373
- q->is_active = (q->queue_size > 0 &&
374
- q->queue_address != 0 &&
375
- q->queue_percent > 0 &&
376
- !q->is_evicted);
377
-
378
- return 0;
399
+ q->is_active = QUEUE_IS_ACTIVE(*q);
379400 }
380401
381402 /*
....@@ -431,35 +452,54 @@
431452
432453 switch (type) {
433454 case KFD_MQD_TYPE_CP:
434
- case KFD_MQD_TYPE_COMPUTE:
455
+ mqd->allocate_mqd = allocate_mqd;
435456 mqd->init_mqd = init_mqd;
436
- mqd->uninit_mqd = uninit_mqd;
457
+ mqd->free_mqd = free_mqd;
437458 mqd->load_mqd = load_mqd;
438459 mqd->update_mqd = update_mqd;
439460 mqd->destroy_mqd = destroy_mqd;
440461 mqd->is_occupied = is_occupied;
462
+ mqd->get_wave_state = get_wave_state;
463
+ mqd->mqd_size = sizeof(struct v9_mqd);
441464 #if defined(CONFIG_DEBUG_FS)
442465 mqd->debugfs_show_mqd = debugfs_show_mqd;
443466 #endif
444467 break;
445468 case KFD_MQD_TYPE_HIQ:
469
+ mqd->allocate_mqd = allocate_hiq_mqd;
446470 mqd->init_mqd = init_mqd_hiq;
447
- mqd->uninit_mqd = uninit_mqd;
448
- mqd->load_mqd = load_mqd;
449
- mqd->update_mqd = update_mqd_hiq;
471
+ mqd->free_mqd = free_mqd_hiq_sdma;
472
+ mqd->load_mqd = hiq_load_mqd_kiq;
473
+ mqd->update_mqd = update_mqd;
450474 mqd->destroy_mqd = destroy_mqd;
451475 mqd->is_occupied = is_occupied;
476
+ mqd->mqd_size = sizeof(struct v9_mqd);
477
+#if defined(CONFIG_DEBUG_FS)
478
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
479
+#endif
480
+ break;
481
+ case KFD_MQD_TYPE_DIQ:
482
+ mqd->allocate_mqd = allocate_mqd;
483
+ mqd->init_mqd = init_mqd_hiq;
484
+ mqd->free_mqd = free_mqd;
485
+ mqd->load_mqd = load_mqd;
486
+ mqd->update_mqd = update_mqd;
487
+ mqd->destroy_mqd = destroy_mqd;
488
+ mqd->is_occupied = is_occupied;
489
+ mqd->mqd_size = sizeof(struct v9_mqd);
452490 #if defined(CONFIG_DEBUG_FS)
453491 mqd->debugfs_show_mqd = debugfs_show_mqd;
454492 #endif
455493 break;
456494 case KFD_MQD_TYPE_SDMA:
495
+ mqd->allocate_mqd = allocate_sdma_mqd;
457496 mqd->init_mqd = init_mqd_sdma;
458
- mqd->uninit_mqd = uninit_mqd_sdma;
497
+ mqd->free_mqd = free_mqd_hiq_sdma;
459498 mqd->load_mqd = load_mqd_sdma;
460499 mqd->update_mqd = update_mqd_sdma;
461500 mqd->destroy_mqd = destroy_mqd_sdma;
462501 mqd->is_occupied = is_occupied_sdma;
502
+ mqd->mqd_size = sizeof(struct v9_sdma_mqd);
463503 #if defined(CONFIG_DEBUG_FS)
464504 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
465505 #endif