hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
....@@ -30,6 +30,7 @@
3030 #include "gc/gc_9_0_offset.h"
3131 #include "gc/gc_9_0_sh_mask.h"
3232 #include "sdma0/sdma0_4_0_sh_mask.h"
33
+#include "amdgpu_amdkfd.h"
3334
3435 static inline struct v9_mqd *get_mqd(void *mqd)
3536 {
....@@ -45,7 +46,7 @@
4546 struct queue_properties *q)
4647 {
4748 struct v9_mqd *m;
48
- uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
49
+ uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
4950
5051 if (q->cu_mask_count == 0)
5152 return;
....@@ -58,48 +59,84 @@
5859 m->compute_static_thread_mgmt_se1 = se_mask[1];
5960 m->compute_static_thread_mgmt_se2 = se_mask[2];
6061 m->compute_static_thread_mgmt_se3 = se_mask[3];
62
+ m->compute_static_thread_mgmt_se4 = se_mask[4];
63
+ m->compute_static_thread_mgmt_se5 = se_mask[5];
64
+ m->compute_static_thread_mgmt_se6 = se_mask[6];
65
+ m->compute_static_thread_mgmt_se7 = se_mask[7];
6166
62
- pr_debug("update cu mask to %#x %#x %#x %#x\n",
67
+ pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
6368 m->compute_static_thread_mgmt_se0,
6469 m->compute_static_thread_mgmt_se1,
6570 m->compute_static_thread_mgmt_se2,
66
- m->compute_static_thread_mgmt_se3);
71
+ m->compute_static_thread_mgmt_se3,
72
+ m->compute_static_thread_mgmt_se4,
73
+ m->compute_static_thread_mgmt_se5,
74
+ m->compute_static_thread_mgmt_se6,
75
+ m->compute_static_thread_mgmt_se7);
6776 }
6877
69
-static int init_mqd(struct mqd_manager *mm, void **mqd,
70
- struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
71
- struct queue_properties *q)
78
+static void set_priority(struct v9_mqd *m, struct queue_properties *q)
79
+{
80
+ m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
81
+ m->cp_hqd_queue_priority = q->priority;
82
+}
83
+
84
+static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
85
+ struct queue_properties *q)
7286 {
7387 int retval;
74
- uint64_t addr;
75
- struct v9_mqd *m;
76
- struct kfd_dev *kfd = mm->dev;
88
+ struct kfd_mem_obj *mqd_mem_obj = NULL;
7789
78
- *mqd_mem_obj = NULL;
79
- /* From V9, for CWSR, the control stack is located on the next page
80
- * boundary after the mqd, we will use the gtt allocation function
81
- * instead of sub-allocation function.
90
+ /* For V9 only, due to a HW bug, the control stack of a user mode
91
+ * compute queue needs to be allocated just behind the page boundary
92
+ * of its regular MQD buffer. So we allocate an enlarged MQD buffer:
93
+ * the first page of the buffer serves as the regular MQD buffer
94
+ * purpose and the remaining is for control stack. Although the two
95
+ * parts are in the same buffer object, they need different memory
96
+ * types: MQD part needs UC (uncached) as usual, while control stack
97
+ * needs NC (non coherent), which is different from the UC type which
98
+ * is used when control stack is allocated in user space.
99
+ *
100
+ * Because of all those, we use the gtt allocation function instead
101
+ * of sub-allocation function for this enlarged MQD buffer. Moreover,
102
+ * in order to achieve two memory types in a single buffer object, we
103
+ * pass a special bo flag AMDGPU_GEM_CREATE_CP_MQD_GFX9 to instruct
104
+ * amdgpu memory functions to do so.
82105 */
83106 if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
84
- *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
85
- if (!*mqd_mem_obj)
86
- return -ENOMEM;
87
- retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd,
107
+ mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
108
+ if (!mqd_mem_obj)
109
+ return NULL;
110
+ retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
88111 ALIGN(q->ctl_stack_size, PAGE_SIZE) +
89112 ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
90
- &((*mqd_mem_obj)->gtt_mem),
91
- &((*mqd_mem_obj)->gpu_addr),
92
- (void *)&((*mqd_mem_obj)->cpu_ptr), true);
93
- } else
94
- retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
95
- mqd_mem_obj);
96
- if (retval) {
97
- kfree(*mqd_mem_obj);
98
- return -ENOMEM;
113
+ &(mqd_mem_obj->gtt_mem),
114
+ &(mqd_mem_obj->gpu_addr),
115
+ (void *)&(mqd_mem_obj->cpu_ptr), true);
116
+
117
+ if (retval) {
118
+ kfree(mqd_mem_obj);
119
+ return NULL;
120
+ }
121
+ } else {
122
+ retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd),
123
+ &mqd_mem_obj);
124
+ if (retval)
125
+ return NULL;
99126 }
100127
101
- m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
102
- addr = (*mqd_mem_obj)->gpu_addr;
128
+ return mqd_mem_obj;
129
+}
130
+
131
+static void init_mqd(struct mqd_manager *mm, void **mqd,
132
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
133
+ struct queue_properties *q)
134
+{
135
+ uint64_t addr;
136
+ struct v9_mqd *m;
137
+
138
+ m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
139
+ addr = mqd_mem_obj->gpu_addr;
103140
104141 memset(m, 0, sizeof(struct v9_mqd));
105142
....@@ -109,6 +146,10 @@
109146 m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
110147 m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
111148 m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
149
+ m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
150
+ m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
151
+ m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
152
+ m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
112153
113154 m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
114155 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
....@@ -120,10 +161,7 @@
120161
121162 m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
122163 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
123
- 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
124
-
125
- m->cp_hqd_pipe_priority = 1;
126
- m->cp_hqd_queue_priority = 15;
164
+ 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
127165
128166 if (q->format == KFD_QUEUE_FORMAT_AQL) {
129167 m->cp_hqd_aql_control =
....@@ -151,9 +189,7 @@
151189 *mqd = m;
152190 if (gart_addr)
153191 *gart_addr = addr;
154
- retval = mm->update_mqd(mm, m, q);
155
-
156
- return retval;
192
+ mm->update_mqd(mm, m, q);
157193 }
158194
159195 static int load_mqd(struct mqd_manager *mm, void *mqd,
....@@ -168,7 +204,15 @@
168204 wptr_shift, 0, mms);
169205 }
170206
171
-static int update_mqd(struct mqd_manager *mm, void *mqd,
207
+static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
208
+ uint32_t pipe_id, uint32_t queue_id,
209
+ struct queue_properties *p, struct mm_struct *mms)
210
+{
211
+ return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
212
+ queue_id, p->doorbell_off);
213
+}
214
+
215
+static void update_mqd(struct mqd_manager *mm, void *mqd,
172216 struct queue_properties *q)
173217 {
174218 struct v9_mqd *m;
....@@ -227,13 +271,9 @@
227271 m->cp_hqd_ctx_save_control = 0;
228272
229273 update_cu_mask(mm, mqd, q);
274
+ set_priority(m, q);
230275
231
- q->is_active = (q->queue_size > 0 &&
232
- q->queue_address != 0 &&
233
- q->queue_percent > 0 &&
234
- !q->is_evicted);
235
-
236
- return 0;
276
+ q->is_active = QUEUE_IS_ACTIVE(*q);
237277 }
238278
239279
....@@ -247,13 +287,13 @@
247287 pipe_id, queue_id);
248288 }
249289
250
-static void uninit_mqd(struct mqd_manager *mm, void *mqd,
290
+static void free_mqd(struct mqd_manager *mm, void *mqd,
251291 struct kfd_mem_obj *mqd_mem_obj)
252292 {
253293 struct kfd_dev *kfd = mm->dev;
254294
255295 if (mqd_mem_obj->gtt_mem) {
256
- kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
296
+ amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
257297 kfree(mqd_mem_obj);
258298 } else {
259299 kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
....@@ -269,71 +309,58 @@
269309 pipe_id, queue_id);
270310 }
271311
272
-static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
273
- struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
312
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
313
+ void __user *ctl_stack,
314
+ u32 *ctl_stack_used_size,
315
+ u32 *save_area_used_size)
316
+{
317
+ struct v9_mqd *m;
318
+
319
+ /* Control stack is located one page after MQD. */
320
+ void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
321
+
322
+ m = get_mqd(mqd);
323
+
324
+ *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
325
+ m->cp_hqd_cntl_stack_offset;
326
+ *save_area_used_size = m->cp_hqd_wg_state_offset -
327
+ m->cp_hqd_cntl_stack_size;
328
+
329
+ if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
330
+ return -EFAULT;
331
+
332
+ return 0;
333
+}
334
+
335
+static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
336
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
274337 struct queue_properties *q)
275338 {
276339 struct v9_mqd *m;
277
- int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
278340
279
- if (retval != 0)
280
- return retval;
341
+ init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
281342
282343 m = get_mqd(*mqd);
283344
284345 m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
285346 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
286
-
287
- return retval;
288347 }
289348
290
-static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
291
- struct queue_properties *q)
292
-{
293
- struct v9_mqd *m;
294
- int retval = update_mqd(mm, mqd, q);
295
-
296
- if (retval != 0)
297
- return retval;
298
-
299
- /* TODO: what's the point? update_mqd already does this. */
300
- m = get_mqd(mqd);
301
- m->cp_hqd_vmid = q->vmid;
302
- return retval;
303
-}
304
-
305
-static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
306
- struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
349
+static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
350
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
307351 struct queue_properties *q)
308352 {
309
- int retval;
310353 struct v9_sdma_mqd *m;
311354
312
-
313
- retval = kfd_gtt_sa_allocate(mm->dev,
314
- sizeof(struct v9_sdma_mqd),
315
- mqd_mem_obj);
316
-
317
- if (retval != 0)
318
- return -ENOMEM;
319
-
320
- m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
355
+ m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
321356
322357 memset(m, 0, sizeof(struct v9_sdma_mqd));
323358
324359 *mqd = m;
325360 if (gart_addr)
326
- *gart_addr = (*mqd_mem_obj)->gpu_addr;
361
+ *gart_addr = mqd_mem_obj->gpu_addr;
327362
328
- retval = mm->update_mqd(mm, m, q);
329
-
330
- return retval;
331
-}
332
-
333
-static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
334
- struct kfd_mem_obj *mqd_mem_obj)
335
-{
336
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
363
+ mm->update_mqd(mm, m, q);
337364 }
338365
339366 static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
....@@ -347,7 +374,7 @@
347374
348375 #define SDMA_RLC_DUMMY_DEFAULT 0xf
349376
350
-static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
377
+static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
351378 struct queue_properties *q)
352379 {
353380 struct v9_sdma_mqd *m;
....@@ -370,12 +397,7 @@
370397 m->sdma_queue_id = q->sdma_queue_id;
371398 m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
372399
373
- q->is_active = (q->queue_size > 0 &&
374
- q->queue_address != 0 &&
375
- q->queue_percent > 0 &&
376
- !q->is_evicted);
377
-
378
- return 0;
400
+ q->is_active = QUEUE_IS_ACTIVE(*q);
379401 }
380402
381403 /*
....@@ -431,35 +453,54 @@
431453
432454 switch (type) {
433455 case KFD_MQD_TYPE_CP:
434
- case KFD_MQD_TYPE_COMPUTE:
456
+ mqd->allocate_mqd = allocate_mqd;
435457 mqd->init_mqd = init_mqd;
436
- mqd->uninit_mqd = uninit_mqd;
458
+ mqd->free_mqd = free_mqd;
437459 mqd->load_mqd = load_mqd;
438460 mqd->update_mqd = update_mqd;
439461 mqd->destroy_mqd = destroy_mqd;
440462 mqd->is_occupied = is_occupied;
463
+ mqd->get_wave_state = get_wave_state;
464
+ mqd->mqd_size = sizeof(struct v9_mqd);
441465 #if defined(CONFIG_DEBUG_FS)
442466 mqd->debugfs_show_mqd = debugfs_show_mqd;
443467 #endif
444468 break;
445469 case KFD_MQD_TYPE_HIQ:
470
+ mqd->allocate_mqd = allocate_hiq_mqd;
446471 mqd->init_mqd = init_mqd_hiq;
447
- mqd->uninit_mqd = uninit_mqd;
448
- mqd->load_mqd = load_mqd;
449
- mqd->update_mqd = update_mqd_hiq;
472
+ mqd->free_mqd = free_mqd_hiq_sdma;
473
+ mqd->load_mqd = hiq_load_mqd_kiq;
474
+ mqd->update_mqd = update_mqd;
450475 mqd->destroy_mqd = destroy_mqd;
451476 mqd->is_occupied = is_occupied;
477
+ mqd->mqd_size = sizeof(struct v9_mqd);
478
+#if defined(CONFIG_DEBUG_FS)
479
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
480
+#endif
481
+ break;
482
+ case KFD_MQD_TYPE_DIQ:
483
+ mqd->allocate_mqd = allocate_mqd;
484
+ mqd->init_mqd = init_mqd_hiq;
485
+ mqd->free_mqd = free_mqd;
486
+ mqd->load_mqd = load_mqd;
487
+ mqd->update_mqd = update_mqd;
488
+ mqd->destroy_mqd = destroy_mqd;
489
+ mqd->is_occupied = is_occupied;
490
+ mqd->mqd_size = sizeof(struct v9_mqd);
452491 #if defined(CONFIG_DEBUG_FS)
453492 mqd->debugfs_show_mqd = debugfs_show_mqd;
454493 #endif
455494 break;
456495 case KFD_MQD_TYPE_SDMA:
496
+ mqd->allocate_mqd = allocate_sdma_mqd;
457497 mqd->init_mqd = init_mqd_sdma;
458
- mqd->uninit_mqd = uninit_mqd_sdma;
498
+ mqd->free_mqd = free_mqd_hiq_sdma;
459499 mqd->load_mqd = load_mqd_sdma;
460500 mqd->update_mqd = update_mqd_sdma;
461501 mqd->destroy_mqd = destroy_mqd_sdma;
462502 mqd->is_occupied = is_occupied_sdma;
503
+ mqd->mqd_size = sizeof(struct v9_sdma_mqd);
463504 #if defined(CONFIG_DEBUG_FS)
464505 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
465506 #endif