forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-13 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e
kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
....@@ -33,17 +33,14 @@
3333 #include "kfd_mqd_manager.h"
3434 #include "cik_regs.h"
3535 #include "kfd_kernel_queue.h"
36
+#include "amdgpu_amdkfd.h"
3637
3738 /* Size of the per-pipe EOP queue */
3839 #define CIK_HPD_EOP_BYTES_LOG2 11
3940 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
4041
4142 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
42
- unsigned int pasid, unsigned int vmid);
43
-
44
-static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
45
- struct queue *q,
46
- struct qcm_process_device *qpd);
43
+ u32 pasid, unsigned int vmid);
4744
4845 static int execute_queues_cpsch(struct device_queue_manager *dqm,
4946 enum kfd_unmap_queues_filter filter,
....@@ -54,19 +51,20 @@
5451
5552 static int map_queues_cpsch(struct device_queue_manager *dqm);
5653
57
-static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
58
- struct queue *q,
59
- struct qcm_process_device *qpd);
60
-
6154 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
62
- unsigned int sdma_queue_id);
55
+ struct queue *q);
6356
57
+static inline void deallocate_hqd(struct device_queue_manager *dqm,
58
+ struct queue *q);
59
+static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
60
+static int allocate_sdma_queue(struct device_queue_manager *dqm,
61
+ struct queue *q);
6462 static void kfd_process_hw_exception(struct work_struct *work);
6563
6664 static inline
6765 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
6866 {
69
- if (type == KFD_QUEUE_TYPE_SDMA)
67
+ if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
7068 return KFD_MQD_TYPE_SDMA;
7169 return KFD_MQD_TYPE_CP;
7270 }
....@@ -80,14 +78,14 @@
8078 /* queue is available for KFD usage if bit is 1 */
8179 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i)
8280 if (test_bit(pipe_offset + i,
83
- dqm->dev->shared_resources.queue_bitmap))
81
+ dqm->dev->shared_resources.cp_queue_bitmap))
8482 return true;
8583 return false;
8684 }
8785
88
-unsigned int get_queues_num(struct device_queue_manager *dqm)
86
+unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
8987 {
90
- return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
88
+ return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
9189 KGD_MAX_QUEUES);
9290 }
9391
....@@ -106,10 +104,26 @@
106104 return dqm->dev->device_info->num_sdma_engines;
107105 }
108106
107
+static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
108
+{
109
+ return dqm->dev->device_info->num_xgmi_sdma_engines;
110
+}
111
+
112
+static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
113
+{
114
+ return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
115
+}
116
+
109117 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
110118 {
111119 return dqm->dev->device_info->num_sdma_engines
112
- * KFD_SDMA_QUEUES_PER_ENGINE;
120
+ * dqm->dev->device_info->num_sdma_queues_per_engine;
121
+}
122
+
123
+unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
124
+{
125
+ return dqm->dev->device_info->num_xgmi_sdma_engines
126
+ * dqm->dev->device_info->num_sdma_queues_per_engine;
113127 }
114128
115129 void program_sh_mem_settings(struct device_queue_manager *dqm,
....@@ -123,6 +137,36 @@
123137 qpd->sh_mem_bases);
124138 }
125139
140
+static void increment_queue_count(struct device_queue_manager *dqm,
141
+ struct qcm_process_device *qpd,
142
+ struct queue *q)
143
+{
144
+ dqm->active_queue_count++;
145
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
146
+ q->properties.type == KFD_QUEUE_TYPE_DIQ)
147
+ dqm->active_cp_queue_count++;
148
+
149
+ if (q->properties.is_gws) {
150
+ dqm->gws_queue_count++;
151
+ qpd->mapped_gws_queue = true;
152
+ }
153
+}
154
+
155
+static void decrement_queue_count(struct device_queue_manager *dqm,
156
+ struct qcm_process_device *qpd,
157
+ struct queue *q)
158
+{
159
+ dqm->active_queue_count--;
160
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
161
+ q->properties.type == KFD_QUEUE_TYPE_DIQ)
162
+ dqm->active_cp_queue_count--;
163
+
164
+ if (q->properties.is_gws) {
165
+ dqm->gws_queue_count--;
166
+ qpd->mapped_gws_queue = false;
167
+ }
168
+}
169
+
126170 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
127171 {
128172 struct kfd_dev *dev = qpd->dqm->dev;
....@@ -132,13 +176,20 @@
132176 * preserve the user mode ABI.
133177 */
134178 q->doorbell_id = q->properties.queue_id;
135
- } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
136
- /* For SDMA queues on SOC15, use static doorbell
137
- * assignments based on the engine and queue.
179
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
180
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
181
+ /* For SDMA queues on SOC15 with 8-byte doorbell, use static
182
+ * doorbell assignments based on the engine and queue id.
183
+ * The doobell index distance between RLC (2*i) and (2*i+1)
184
+ * for a SDMA engine is 512.
138185 */
139
- q->doorbell_id = dev->shared_resources.sdma_doorbell
140
- [q->properties.sdma_engine_id]
141
- [q->properties.sdma_queue_id];
186
+ uint32_t *idx_offset =
187
+ dev->shared_resources.sdma_doorbell_idx;
188
+
189
+ q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
190
+ + (q->properties.sdma_queue_id & 1)
191
+ * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
192
+ + (q->properties.sdma_queue_id >> 1);
142193 } else {
143194 /* For CP queues on SOC15 reserve a free doorbell ID */
144195 unsigned int found;
....@@ -154,9 +205,8 @@
154205 }
155206
156207 q->properties.doorbell_off =
157
- kfd_doorbell_id_to_offset(dev, q->process,
208
+ kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
158209 q->doorbell_id);
159
-
160210 return 0;
161211 }
162212
....@@ -167,7 +217,8 @@
167217 struct kfd_dev *dev = qpd->dqm->dev;
168218
169219 if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
170
- q->properties.type == KFD_QUEUE_TYPE_SDMA)
220
+ q->properties.type == KFD_QUEUE_TYPE_SDMA ||
221
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
171222 return;
172223
173224 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
....@@ -178,20 +229,30 @@
178229 struct qcm_process_device *qpd,
179230 struct queue *q)
180231 {
181
- int bit, allocated_vmid;
232
+ int allocated_vmid = -1, i;
182233
183
- if (dqm->vmid_bitmap == 0)
184
- return -ENOMEM;
234
+ for (i = dqm->dev->vm_info.first_vmid_kfd;
235
+ i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
236
+ if (!dqm->vmid_pasid[i]) {
237
+ allocated_vmid = i;
238
+ break;
239
+ }
240
+ }
185241
186
- bit = ffs(dqm->vmid_bitmap) - 1;
187
- dqm->vmid_bitmap &= ~(1 << bit);
242
+ if (allocated_vmid < 0) {
243
+ pr_err("no more vmid to allocate\n");
244
+ return -ENOSPC;
245
+ }
188246
189
- allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
190
- pr_debug("vmid allocation %d\n", allocated_vmid);
247
+ pr_debug("vmid allocated: %d\n", allocated_vmid);
248
+
249
+ dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
250
+
251
+ set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
252
+
191253 qpd->vmid = allocated_vmid;
192254 q->properties.vmid = allocated_vmid;
193255
194
- set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
195256 program_sh_mem_settings(dqm, qpd);
196257
197258 /* qpd->page_table_base is set earlier when register_process()
....@@ -202,6 +263,10 @@
202263 qpd->page_table_base);
203264 /* invalidate the VM context after pasid and vmid mapping is set up */
204265 kfd_flush_tlb(qpd_to_pdd(qpd));
266
+
267
+ if (dqm->dev->kfd2kgd->set_scratch_backing_va)
268
+ dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
269
+ qpd->sh_hidden_private_base, qpd->vmid);
205270
206271 return 0;
207272 }
....@@ -219,7 +284,7 @@
219284 if (ret)
220285 return ret;
221286
222
- return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
287
+ return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
223288 qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
224289 pmf->release_mem_size / sizeof(uint32_t));
225290 }
....@@ -228,8 +293,6 @@
228293 struct qcm_process_device *qpd,
229294 struct queue *q)
230295 {
231
- int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
232
-
233296 /* On GFX v7, CP doesn't flush TC at dequeue */
234297 if (q->device->device_info->asic_family == CHIP_HAWAII)
235298 if (flush_texture_cache_nocpsch(q->device, qpd))
....@@ -239,8 +302,8 @@
239302
240303 /* Release the vmid mapping */
241304 set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
305
+ dqm->vmid_pasid[qpd->vmid] = 0;
242306
243
- dqm->vmid_bitmap |= (1 << bit);
244307 qpd->vmid = 0;
245308 q->properties.vmid = 0;
246309 }
....@@ -249,9 +312,8 @@
249312 struct queue *q,
250313 struct qcm_process_device *qpd)
251314 {
315
+ struct mqd_manager *mqd_mgr;
252316 int retval;
253
-
254
- print_queue(q);
255317
256318 dqm_lock(dqm);
257319
....@@ -269,37 +331,67 @@
269331 }
270332 q->properties.vmid = qpd->vmid;
271333 /*
272
- * Eviction state logic: we only mark active queues as evicted
273
- * to avoid the overhead of restoring inactive queues later
334
+ * Eviction state logic: mark all queues as evicted, even ones
335
+ * not currently active. Restoring inactive queues later only
336
+ * updates the is_evicted flag but is a no-op otherwise.
274337 */
275
- if (qpd->evicted)
276
- q->properties.is_evicted = (q->properties.queue_size > 0 &&
277
- q->properties.queue_percent > 0 &&
278
- q->properties.queue_address != 0);
338
+ q->properties.is_evicted = !!qpd->evicted;
279339
280340 q->properties.tba_addr = qpd->tba_addr;
281341 q->properties.tma_addr = qpd->tma_addr;
282342
283
- if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
284
- retval = create_compute_queue_nocpsch(dqm, q, qpd);
285
- else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
286
- retval = create_sdma_queue_nocpsch(dqm, q, qpd);
287
- else
288
- retval = -EINVAL;
289
-
290
- if (retval) {
291
- if (list_empty(&qpd->queues_list))
292
- deallocate_vmid(dqm, qpd, q);
293
- goto out_unlock;
343
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
344
+ q->properties.type)];
345
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
346
+ retval = allocate_hqd(dqm, q);
347
+ if (retval)
348
+ goto deallocate_vmid;
349
+ pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
350
+ q->pipe, q->queue);
351
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
352
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
353
+ retval = allocate_sdma_queue(dqm, q);
354
+ if (retval)
355
+ goto deallocate_vmid;
356
+ dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
294357 }
295358
359
+ retval = allocate_doorbell(qpd, q);
360
+ if (retval)
361
+ goto out_deallocate_hqd;
362
+
363
+ /* Temporarily release dqm lock to avoid a circular lock dependency */
364
+ dqm_unlock(dqm);
365
+ q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
366
+ dqm_lock(dqm);
367
+
368
+ if (!q->mqd_mem_obj) {
369
+ retval = -ENOMEM;
370
+ goto out_deallocate_doorbell;
371
+ }
372
+ mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
373
+ &q->gart_mqd_addr, &q->properties);
374
+ if (q->properties.is_active) {
375
+ if (!dqm->sched_running) {
376
+ WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
377
+ goto add_queue_to_list;
378
+ }
379
+
380
+ if (WARN(q->process->mm != current->mm,
381
+ "should only run in user thread"))
382
+ retval = -EFAULT;
383
+ else
384
+ retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
385
+ q->queue, &q->properties, current->mm);
386
+ if (retval)
387
+ goto out_free_mqd;
388
+ }
389
+
390
+add_queue_to_list:
296391 list_add(&q->list, &qpd->queues_list);
297392 qpd->queue_count++;
298393 if (q->properties.is_active)
299
- dqm->queue_count++;
300
-
301
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
302
- dqm->sdma_queue_count++;
394
+ increment_queue_count(dqm, qpd, q);
303395
304396 /*
305397 * Unconditionally increment this counter, regardless of the queue's
....@@ -308,7 +400,21 @@
308400 dqm->total_queue_count++;
309401 pr_debug("Total of %d queues are accountable so far\n",
310402 dqm->total_queue_count);
403
+ goto out_unlock;
311404
405
+out_free_mqd:
406
+ mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
407
+out_deallocate_doorbell:
408
+ deallocate_doorbell(qpd, q);
409
+out_deallocate_hqd:
410
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
411
+ deallocate_hqd(dqm, q);
412
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
413
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
414
+ deallocate_sdma_queue(dqm, q);
415
+deallocate_vmid:
416
+ if (list_empty(&qpd->queues_list))
417
+ deallocate_vmid(dqm, qpd, q);
312418 out_unlock:
313419 dqm_unlock(dqm);
314420 return retval;
....@@ -354,60 +460,6 @@
354460 dqm->allocated_queues[q->pipe] |= (1 << q->queue);
355461 }
356462
357
-static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
358
- struct queue *q,
359
- struct qcm_process_device *qpd)
360
-{
361
- struct mqd_manager *mqd_mgr;
362
- int retval;
363
-
364
- mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
365
- if (!mqd_mgr)
366
- return -ENOMEM;
367
-
368
- retval = allocate_hqd(dqm, q);
369
- if (retval)
370
- return retval;
371
-
372
- retval = allocate_doorbell(qpd, q);
373
- if (retval)
374
- goto out_deallocate_hqd;
375
-
376
- retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
377
- &q->gart_mqd_addr, &q->properties);
378
- if (retval)
379
- goto out_deallocate_doorbell;
380
-
381
- pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
382
- q->pipe, q->queue);
383
-
384
- dqm->dev->kfd2kgd->set_scratch_backing_va(
385
- dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
386
-
387
- if (!q->properties.is_active)
388
- return 0;
389
-
390
- if (WARN(q->process->mm != current->mm,
391
- "should only run in user thread"))
392
- retval = -EFAULT;
393
- else
394
- retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
395
- &q->properties, current->mm);
396
- if (retval)
397
- goto out_uninit_mqd;
398
-
399
- return 0;
400
-
401
-out_uninit_mqd:
402
- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
403
-out_deallocate_doorbell:
404
- deallocate_doorbell(qpd, q);
405
-out_deallocate_hqd:
406
- deallocate_hqd(dqm, q);
407
-
408
- return retval;
409
-}
410
-
411463 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
412464 * to avoid asynchronized access
413465 */
....@@ -418,17 +470,16 @@
418470 int retval;
419471 struct mqd_manager *mqd_mgr;
420472
421
- mqd_mgr = dqm->ops.get_mqd_manager(dqm,
422
- get_mqd_type_from_queue_type(q->properties.type));
423
- if (!mqd_mgr)
424
- return -ENOMEM;
473
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
474
+ q->properties.type)];
425475
426
- if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
476
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
427477 deallocate_hqd(dqm, q);
428
- } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
429
- dqm->sdma_queue_count--;
430
- deallocate_sdma_queue(dqm, q->sdma_id);
431
- } else {
478
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
479
+ deallocate_sdma_queue(dqm, q);
480
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
481
+ deallocate_sdma_queue(dqm, q);
482
+ else {
432483 pr_debug("q->properties.type %d is invalid\n",
433484 q->properties.type);
434485 return -EINVAL;
....@@ -437,14 +488,17 @@
437488
438489 deallocate_doorbell(qpd, q);
439490
491
+ if (!dqm->sched_running) {
492
+ WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
493
+ return 0;
494
+ }
495
+
440496 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
441497 KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
442498 KFD_UNMAP_LATENCY_MS,
443499 q->pipe, q->queue);
444500 if (retval == -ETIME)
445501 qpd->reset_wavefronts = true;
446
-
447
- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
448502
449503 list_del(&q->list);
450504 if (list_empty(&qpd->queues_list)) {
....@@ -463,7 +517,7 @@
463517 }
464518 qpd->queue_count--;
465519 if (q->properties.is_active)
466
- dqm->queue_count--;
520
+ decrement_queue_count(dqm, qpd, q);
467521
468522 return retval;
469523 }
....@@ -473,17 +527,35 @@
473527 struct queue *q)
474528 {
475529 int retval;
530
+ uint64_t sdma_val = 0;
531
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
532
+ struct mqd_manager *mqd_mgr =
533
+ dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
534
+
535
+ /* Get the SDMA queue stats */
536
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
537
+ (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
538
+ retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
539
+ &sdma_val);
540
+ if (retval)
541
+ pr_err("Failed to read SDMA queue counter for queue: %d\n",
542
+ q->properties.queue_id);
543
+ }
476544
477545 dqm_lock(dqm);
478546 retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
547
+ if (!retval)
548
+ pdd->sdma_past_activity_counter += sdma_val;
479549 dqm_unlock(dqm);
550
+
551
+ mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
480552
481553 return retval;
482554 }
483555
484556 static int update_queue(struct device_queue_manager *dqm, struct queue *q)
485557 {
486
- int retval;
558
+ int retval = 0;
487559 struct mqd_manager *mqd_mgr;
488560 struct kfd_process_device *pdd;
489561 bool prev_active = false;
....@@ -494,20 +566,8 @@
494566 retval = -ENODEV;
495567 goto out_unlock;
496568 }
497
- mqd_mgr = dqm->ops.get_mqd_manager(dqm,
498
- get_mqd_type_from_queue_type(q->properties.type));
499
- if (!mqd_mgr) {
500
- retval = -ENOMEM;
501
- goto out_unlock;
502
- }
503
- /*
504
- * Eviction state logic: we only mark active queues as evicted
505
- * to avoid the overhead of restoring inactive queues later
506
- */
507
- if (pdd->qpd.evicted)
508
- q->properties.is_evicted = (q->properties.queue_size > 0 &&
509
- q->properties.queue_percent > 0 &&
510
- q->properties.queue_address != 0);
569
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
570
+ q->properties.type)];
511571
512572 /* Save previous activity state for counters */
513573 prev_active = q->properties.is_active;
....@@ -522,7 +582,14 @@
522582 }
523583 } else if (prev_active &&
524584 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
525
- q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
585
+ q->properties.type == KFD_QUEUE_TYPE_SDMA ||
586
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
587
+
588
+ if (!dqm->sched_running) {
589
+ WARN_ONCE(1, "Update non-HWS queue while stopped\n");
590
+ goto out_unlock;
591
+ }
592
+
526593 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
527594 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
528595 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
....@@ -532,24 +599,38 @@
532599 }
533600 }
534601
535
- retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties);
602
+ mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties);
536603
537604 /*
538605 * check active state vs. the previous state and modify
539606 * counter accordingly. map_queues_cpsch uses the
540
- * dqm->queue_count to determine whether a new runlist must be
607
+ * dqm->active_queue_count to determine whether a new runlist must be
541608 * uploaded.
542609 */
543
- if (q->properties.is_active && !prev_active)
544
- dqm->queue_count++;
545
- else if (!q->properties.is_active && prev_active)
546
- dqm->queue_count--;
610
+ if (q->properties.is_active && !prev_active) {
611
+ increment_queue_count(dqm, &pdd->qpd, q);
612
+ } else if (!q->properties.is_active && prev_active) {
613
+ decrement_queue_count(dqm, &pdd->qpd, q);
614
+ } else if (q->gws && !q->properties.is_gws) {
615
+ if (q->properties.is_active) {
616
+ dqm->gws_queue_count++;
617
+ pdd->qpd.mapped_gws_queue = true;
618
+ }
619
+ q->properties.is_gws = true;
620
+ } else if (!q->gws && q->properties.is_gws) {
621
+ if (q->properties.is_active) {
622
+ dqm->gws_queue_count--;
623
+ pdd->qpd.mapped_gws_queue = false;
624
+ }
625
+ q->properties.is_gws = false;
626
+ }
547627
548628 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
549629 retval = map_queues_cpsch(dqm);
550630 else if (q->properties.is_active &&
551631 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
552
- q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
632
+ q->properties.type == KFD_QUEUE_TYPE_SDMA ||
633
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
553634 if (WARN(q->process->mm != current->mm,
554635 "should only run in user thread"))
555636 retval = -EFAULT;
....@@ -564,67 +645,52 @@
564645 return retval;
565646 }
566647
567
-static struct mqd_manager *get_mqd_manager(
568
- struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
569
-{
570
- struct mqd_manager *mqd_mgr;
571
-
572
- if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
573
- return NULL;
574
-
575
- pr_debug("mqd type %d\n", type);
576
-
577
- mqd_mgr = dqm->mqd_mgrs[type];
578
- if (!mqd_mgr) {
579
- mqd_mgr = mqd_manager_init(type, dqm->dev);
580
- if (!mqd_mgr)
581
- pr_err("mqd manager is NULL");
582
- dqm->mqd_mgrs[type] = mqd_mgr;
583
- }
584
-
585
- return mqd_mgr;
586
-}
587
-
588648 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
589649 struct qcm_process_device *qpd)
590650 {
591651 struct queue *q;
592652 struct mqd_manager *mqd_mgr;
593653 struct kfd_process_device *pdd;
594
- int retval = 0;
654
+ int retval, ret = 0;
595655
596656 dqm_lock(dqm);
597657 if (qpd->evicted++ > 0) /* already evicted, do nothing */
598658 goto out;
599659
600660 pdd = qpd_to_pdd(qpd);
601
- pr_info_ratelimited("Evicting PASID %u queues\n",
661
+ pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
602662 pdd->process->pasid);
603663
604
- /* unactivate all active queues on the qpd */
664
+ pdd->last_evict_timestamp = get_jiffies_64();
665
+ /* Mark all queues as evicted. Deactivate all active queues on
666
+ * the qpd.
667
+ */
605668 list_for_each_entry(q, &qpd->queues_list, list) {
669
+ q->properties.is_evicted = true;
606670 if (!q->properties.is_active)
607671 continue;
608
- mqd_mgr = dqm->ops.get_mqd_manager(dqm,
609
- get_mqd_type_from_queue_type(q->properties.type));
610
- if (!mqd_mgr) { /* should not be here */
611
- pr_err("Cannot evict queue, mqd mgr is NULL\n");
612
- retval = -ENOMEM;
613
- goto out;
614
- }
615
- q->properties.is_evicted = true;
672
+
673
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
674
+ q->properties.type)];
616675 q->properties.is_active = false;
676
+ decrement_queue_count(dqm, qpd, q);
677
+
678
+ if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
679
+ continue;
680
+
617681 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
618682 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
619683 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
620
- if (retval)
621
- goto out;
622
- dqm->queue_count--;
684
+ if (retval && !ret)
685
+ /* Return the first error, but keep going to
686
+ * maintain a consistent eviction state
687
+ */
688
+ ret = retval;
623689 }
624690
625691 out:
626692 dqm_unlock(dqm);
627
- return retval;
693
+ return ret;
628694 }
629695
630696 static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
....@@ -639,17 +705,21 @@
639705 goto out;
640706
641707 pdd = qpd_to_pdd(qpd);
642
- pr_info_ratelimited("Evicting PASID %u queues\n",
708
+ pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
643709 pdd->process->pasid);
644710
645
- /* unactivate all active queues on the qpd */
711
+ /* Mark all queues as evicted. Deactivate all active queues on
712
+ * the qpd.
713
+ */
646714 list_for_each_entry(q, &qpd->queues_list, list) {
715
+ q->properties.is_evicted = true;
647716 if (!q->properties.is_active)
648717 continue;
649
- q->properties.is_evicted = true;
718
+
650719 q->properties.is_active = false;
651
- dqm->queue_count--;
720
+ decrement_queue_count(dqm, qpd, q);
652721 }
722
+ pdd->last_evict_timestamp = get_jiffies_64();
653723 retval = execute_queues_cpsch(dqm,
654724 qpd->is_debug ?
655725 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
....@@ -667,12 +737,13 @@
667737 struct queue *q;
668738 struct mqd_manager *mqd_mgr;
669739 struct kfd_process_device *pdd;
670
- uint32_t pd_base;
671
- int retval = 0;
740
+ uint64_t pd_base;
741
+ uint64_t eviction_duration;
742
+ int retval, ret = 0;
672743
673744 pdd = qpd_to_pdd(qpd);
674745 /* Retrieve PD base */
675
- pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
746
+ pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
676747
677748 dqm_lock(dqm);
678749 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
....@@ -682,12 +753,12 @@
682753 goto out;
683754 }
684755
685
- pr_info_ratelimited("Restoring PASID %u queues\n",
756
+ pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
686757 pdd->process->pasid);
687758
688759 /* Update PD Base in QPD */
689760 qpd->page_table_base = pd_base;
690
- pr_debug("Updated PD address to 0x%08x\n", pd_base);
761
+ pr_debug("Updated PD address to 0x%llx\n", pd_base);
691762
692763 if (!list_empty(&qpd->queues_list)) {
693764 dqm->dev->kfd2kgd->set_vm_context_page_table_base(
....@@ -702,35 +773,42 @@
702773 */
703774 mm = get_task_mm(pdd->process->lead_thread);
704775 if (!mm) {
705
- retval = -EFAULT;
776
+ ret = -EFAULT;
706777 goto out;
707778 }
708779
709
- /* activate all active queues on the qpd */
780
+ /* Remove the eviction flags. Activate queues that are not
781
+ * inactive for other reasons.
782
+ */
710783 list_for_each_entry(q, &qpd->queues_list, list) {
711
- if (!q->properties.is_evicted)
712
- continue;
713
- mqd_mgr = dqm->ops.get_mqd_manager(dqm,
714
- get_mqd_type_from_queue_type(q->properties.type));
715
- if (!mqd_mgr) { /* should not be here */
716
- pr_err("Cannot restore queue, mqd mgr is NULL\n");
717
- retval = -ENOMEM;
718
- goto out;
719
- }
720784 q->properties.is_evicted = false;
785
+ if (!QUEUE_IS_ACTIVE(q->properties))
786
+ continue;
787
+
788
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
789
+ q->properties.type)];
721790 q->properties.is_active = true;
791
+ increment_queue_count(dqm, qpd, q);
792
+
793
+ if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
794
+ continue;
795
+
722796 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
723797 q->queue, &q->properties, mm);
724
- if (retval)
725
- goto out;
726
- dqm->queue_count++;
798
+ if (retval && !ret)
799
+ /* Return the first error, but keep going to
800
+ * maintain a consistent eviction state
801
+ */
802
+ ret = retval;
727803 }
728804 qpd->evicted = 0;
805
+ eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
806
+ atomic64_add(eviction_duration, &pdd->evict_duration_counter);
729807 out:
730808 if (mm)
731809 mmput(mm);
732810 dqm_unlock(dqm);
733
- return retval;
811
+ return ret;
734812 }
735813
736814 static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
....@@ -738,12 +816,13 @@
738816 {
739817 struct queue *q;
740818 struct kfd_process_device *pdd;
741
- uint32_t pd_base;
819
+ uint64_t pd_base;
820
+ uint64_t eviction_duration;
742821 int retval = 0;
743822
744823 pdd = qpd_to_pdd(qpd);
745824 /* Retrieve PD base */
746
- pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
825
+ pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
747826
748827 dqm_lock(dqm);
749828 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
....@@ -753,25 +832,27 @@
753832 goto out;
754833 }
755834
756
- pr_info_ratelimited("Restoring PASID %u queues\n",
835
+ pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
757836 pdd->process->pasid);
758837
759838 /* Update PD Base in QPD */
760839 qpd->page_table_base = pd_base;
761
- pr_debug("Updated PD address to 0x%08x\n", pd_base);
840
+ pr_debug("Updated PD address to 0x%llx\n", pd_base);
762841
763842 /* activate all active queues on the qpd */
764843 list_for_each_entry(q, &qpd->queues_list, list) {
765
- if (!q->properties.is_evicted)
766
- continue;
767844 q->properties.is_evicted = false;
845
+ if (!QUEUE_IS_ACTIVE(q->properties))
846
+ continue;
847
+
768848 q->properties.is_active = true;
769
- dqm->queue_count++;
849
+ increment_queue_count(dqm, &pdd->qpd, q);
770850 }
771851 retval = execute_queues_cpsch(dqm,
772852 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
773
- if (!retval)
774
- qpd->evicted = 0;
853
+ qpd->evicted = 0;
854
+ eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
855
+ atomic64_add(eviction_duration, &pdd->evict_duration_counter);
775856 out:
776857 dqm_unlock(dqm);
777858 return retval;
....@@ -782,7 +863,7 @@
782863 {
783864 struct device_process_node *n;
784865 struct kfd_process_device *pdd;
785
- uint32_t pd_base;
866
+ uint64_t pd_base;
786867 int retval;
787868
788869 n = kzalloc(sizeof(*n), GFP_KERNEL);
....@@ -793,20 +874,25 @@
793874
794875 pdd = qpd_to_pdd(qpd);
795876 /* Retrieve PD base */
796
- pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
877
+ pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
797878
798879 dqm_lock(dqm);
799880 list_add(&n->list, &dqm->queues);
800881
801882 /* Update PD Base in QPD */
802883 qpd->page_table_base = pd_base;
884
+ pr_debug("Updated PD address to 0x%llx\n", pd_base);
803885
804886 retval = dqm->asic_ops.update_qpd(dqm, qpd);
805887
806
- if (dqm->processes_count++ == 0)
807
- dqm->dev->kfd2kgd->set_compute_idle(dqm->dev->kgd, false);
888
+ dqm->processes_count++;
808889
809890 dqm_unlock(dqm);
891
+
892
+ /* Outside the DQM lock because under the DQM lock we can't do
893
+ * reclaim or take other locks that others hold while reclaiming.
894
+ */
895
+ kfd_inc_compute_active(dqm->dev);
810896
811897 return retval;
812898 }
....@@ -827,9 +913,7 @@
827913 if (qpd == cur->qpd) {
828914 list_del(&cur->list);
829915 kfree(cur);
830
- if (--dqm->processes_count == 0)
831
- dqm->dev->kfd2kgd->set_compute_idle(
832
- dqm->dev->kgd, true);
916
+ dqm->processes_count--;
833917 goto out;
834918 }
835919 }
....@@ -837,22 +921,22 @@
837921 retval = 1;
838922 out:
839923 dqm_unlock(dqm);
924
+
925
+ /* Outside the DQM lock because under the DQM lock we can't do
926
+ * reclaim or take other locks that others hold while reclaiming.
927
+ */
928
+ if (!retval)
929
+ kfd_dec_compute_active(dqm->dev);
930
+
840931 return retval;
841932 }
842933
843934 static int
844
-set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
935
+set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
845936 unsigned int vmid)
846937 {
847
- uint32_t pasid_mapping;
848
-
849
- pasid_mapping = (pasid == 0) ? 0 :
850
- (uint32_t)pasid |
851
- ATC_VMID_PASID_MAPPING_VALID;
852
-
853938 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
854
- dqm->dev->kgd, pasid_mapping,
855
- vmid);
939
+ dqm->dev->kgd, pasid, vmid);
856940 }
857941
858942 static void init_interrupts(struct device_queue_manager *dqm)
....@@ -877,20 +961,23 @@
877961
878962 mutex_init(&dqm->lock_hidden);
879963 INIT_LIST_HEAD(&dqm->queues);
880
- dqm->queue_count = dqm->next_pipe_to_allocate = 0;
881
- dqm->sdma_queue_count = 0;
964
+ dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
965
+ dqm->active_cp_queue_count = 0;
966
+ dqm->gws_queue_count = 0;
882967
883968 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
884969 int pipe_offset = pipe * get_queues_per_pipe(dqm);
885970
886971 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
887972 if (test_bit(pipe_offset + queue,
888
- dqm->dev->shared_resources.queue_bitmap))
973
+ dqm->dev->shared_resources.cp_queue_bitmap))
889974 dqm->allocated_queues[pipe] |= 1 << queue;
890975 }
891976
892
- dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
893
- dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
977
+ memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
978
+
979
+ dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
980
+ dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
894981
895982 return 0;
896983 }
....@@ -899,97 +986,98 @@
899986 {
900987 int i;
901988
902
- WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
989
+ WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
903990
904991 kfree(dqm->allocated_queues);
905992 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
906993 kfree(dqm->mqd_mgrs[i]);
907994 mutex_destroy(&dqm->lock_hidden);
908
- kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
909995 }
910996
911997 static int start_nocpsch(struct device_queue_manager *dqm)
912998 {
999
+ pr_info("SW scheduler is used");
9131000 init_interrupts(dqm);
914
- return pm_init(&dqm->packets, dqm);
1001
+
1002
+ if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
1003
+ return pm_init(&dqm->packets, dqm);
1004
+ dqm->sched_running = true;
1005
+
1006
+ return 0;
9151007 }
9161008
9171009 static int stop_nocpsch(struct device_queue_manager *dqm)
9181010 {
919
- pm_uninit(&dqm->packets);
1011
+ if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
1012
+ pm_uninit(&dqm->packets, false);
1013
+ dqm->sched_running = false;
1014
+
9201015 return 0;
9211016 }
9221017
1018
+static void pre_reset(struct device_queue_manager *dqm)
1019
+{
1020
+ dqm_lock(dqm);
1021
+ dqm->is_resetting = true;
1022
+ dqm_unlock(dqm);
1023
+}
1024
+
9231025 static int allocate_sdma_queue(struct device_queue_manager *dqm,
924
- unsigned int *sdma_queue_id)
1026
+ struct queue *q)
9251027 {
9261028 int bit;
9271029
928
- if (dqm->sdma_bitmap == 0)
929
- return -ENOMEM;
1030
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1031
+ if (dqm->sdma_bitmap == 0) {
1032
+ pr_err("No more SDMA queue to allocate\n");
1033
+ return -ENOMEM;
1034
+ }
9301035
931
- bit = ffs(dqm->sdma_bitmap) - 1;
932
- dqm->sdma_bitmap &= ~(1 << bit);
933
- *sdma_queue_id = bit;
1036
+ bit = __ffs64(dqm->sdma_bitmap);
1037
+ dqm->sdma_bitmap &= ~(1ULL << bit);
1038
+ q->sdma_id = bit;
1039
+ q->properties.sdma_engine_id = q->sdma_id %
1040
+ get_num_sdma_engines(dqm);
1041
+ q->properties.sdma_queue_id = q->sdma_id /
1042
+ get_num_sdma_engines(dqm);
1043
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1044
+ if (dqm->xgmi_sdma_bitmap == 0) {
1045
+ pr_err("No more XGMI SDMA queue to allocate\n");
1046
+ return -ENOMEM;
1047
+ }
1048
+ bit = __ffs64(dqm->xgmi_sdma_bitmap);
1049
+ dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
1050
+ q->sdma_id = bit;
1051
+ /* sdma_engine_id is sdma id including
1052
+ * both PCIe-optimized SDMAs and XGMI-
1053
+ * optimized SDMAs. The calculation below
1054
+ * assumes the first N engines are always
1055
+ * PCIe-optimized ones
1056
+ */
1057
+ q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
1058
+ q->sdma_id % get_num_xgmi_sdma_engines(dqm);
1059
+ q->properties.sdma_queue_id = q->sdma_id /
1060
+ get_num_xgmi_sdma_engines(dqm);
1061
+ }
1062
+
1063
+ pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
1064
+ pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
9341065
9351066 return 0;
9361067 }
9371068
9381069 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
939
- unsigned int sdma_queue_id)
1070
+ struct queue *q)
9401071 {
941
- if (sdma_queue_id >= get_num_sdma_queues(dqm))
942
- return;
943
- dqm->sdma_bitmap |= (1 << sdma_queue_id);
944
-}
945
-
946
-static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
947
- struct queue *q,
948
- struct qcm_process_device *qpd)
949
-{
950
- struct mqd_manager *mqd_mgr;
951
- int retval;
952
-
953
- mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
954
- if (!mqd_mgr)
955
- return -ENOMEM;
956
-
957
- retval = allocate_sdma_queue(dqm, &q->sdma_id);
958
- if (retval)
959
- return retval;
960
-
961
- q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
962
- q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
963
-
964
- retval = allocate_doorbell(qpd, q);
965
- if (retval)
966
- goto out_deallocate_sdma_queue;
967
-
968
- pr_debug("SDMA id is: %d\n", q->sdma_id);
969
- pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
970
- pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
971
-
972
- dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
973
- retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
974
- &q->gart_mqd_addr, &q->properties);
975
- if (retval)
976
- goto out_deallocate_doorbell;
977
-
978
- retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties,
979
- NULL);
980
- if (retval)
981
- goto out_uninit_mqd;
982
-
983
- return 0;
984
-
985
-out_uninit_mqd:
986
- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
987
-out_deallocate_doorbell:
988
- deallocate_doorbell(qpd, q);
989
-out_deallocate_sdma_queue:
990
- deallocate_sdma_queue(dqm, q->sdma_id);
991
-
992
- return retval;
1072
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1073
+ if (q->sdma_id >= get_num_sdma_queues(dqm))
1074
+ return;
1075
+ dqm->sdma_bitmap |= (1ULL << q->sdma_id);
1076
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1077
+ if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
1078
+ return;
1079
+ dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
1080
+ }
9931081 }
9941082
9951083 /*
....@@ -1008,7 +1096,7 @@
10081096 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
10091097 / dqm->dev->shared_resources.num_pipe_per_mec;
10101098
1011
- if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
1099
+ if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
10121100 continue;
10131101
10141102 /* only acquire queues from the first MEC */
....@@ -1024,10 +1112,12 @@
10241112 break;
10251113 }
10261114
1027
- res.queue_mask |= (1ull << i);
1115
+ res.queue_mask |= 1ull
1116
+ << amdgpu_queue_mask_bit_to_set_resource_bit(
1117
+ (struct amdgpu_device *)dqm->dev->kgd, i);
10281118 }
1029
- res.gws_mask = res.oac_mask = res.gds_heap_base =
1030
- res.gds_heap_size = 0;
1119
+ res.gws_mask = ~0ull;
1120
+ res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
10311121
10321122 pr_debug("Scheduling resources:\n"
10331123 "vmid mask: 0x%8X\n"
....@@ -1039,14 +1129,29 @@
10391129
10401130 static int initialize_cpsch(struct device_queue_manager *dqm)
10411131 {
1132
+ uint64_t num_sdma_queues;
1133
+ uint64_t num_xgmi_sdma_queues;
1134
+
10421135 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
10431136
10441137 mutex_init(&dqm->lock_hidden);
10451138 INIT_LIST_HEAD(&dqm->queues);
1046
- dqm->queue_count = dqm->processes_count = 0;
1047
- dqm->sdma_queue_count = 0;
1139
+ dqm->active_queue_count = dqm->processes_count = 0;
1140
+ dqm->active_cp_queue_count = 0;
1141
+ dqm->gws_queue_count = 0;
10481142 dqm->active_runlist = false;
1049
- dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
1143
+
1144
+ num_sdma_queues = get_num_sdma_queues(dqm);
1145
+ if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap))
1146
+ dqm->sdma_bitmap = ULLONG_MAX;
1147
+ else
1148
+ dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);
1149
+
1150
+ num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
1151
+ if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap))
1152
+ dqm->xgmi_sdma_bitmap = ULLONG_MAX;
1153
+ else
1154
+ dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1);
10501155
10511156 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
10521157
....@@ -1076,7 +1181,7 @@
10761181 if (retval)
10771182 goto fail_allocate_vidmem;
10781183
1079
- dqm->fence_addr = dqm->fence_mem->cpu_ptr;
1184
+ dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr;
10801185 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
10811186
10821187 init_interrupts(dqm);
....@@ -1084,27 +1189,39 @@
10841189 dqm_lock(dqm);
10851190 /* clear hang status when driver try to start the hw scheduler */
10861191 dqm->is_hws_hang = false;
1192
+ dqm->is_resetting = false;
1193
+ dqm->sched_running = true;
10871194 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
10881195 dqm_unlock(dqm);
10891196
10901197 return 0;
10911198 fail_allocate_vidmem:
10921199 fail_set_sched_resources:
1093
- pm_uninit(&dqm->packets);
1200
+ pm_uninit(&dqm->packets, false);
10941201 fail_packet_manager_init:
10951202 return retval;
10961203 }
10971204
10981205 static int stop_cpsch(struct device_queue_manager *dqm)
10991206 {
1207
+ bool hanging;
1208
+
11001209 dqm_lock(dqm);
1101
- unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1210
+ if (!dqm->sched_running) {
1211
+ dqm_unlock(dqm);
1212
+ return 0;
1213
+ }
1214
+
1215
+ if (!dqm->is_hws_hang)
1216
+ unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1217
+ hanging = dqm->is_hws_hang || dqm->is_resetting;
1218
+ dqm->sched_running = false;
11021219 dqm_unlock(dqm);
11031220
11041221 pm_release_ib(&dqm->packets);
11051222
11061223 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1107
- pm_uninit(&dqm->packets);
1224
+ pm_uninit(&dqm->packets, hanging);
11081225
11091226 return 0;
11101227 }
....@@ -1130,7 +1247,7 @@
11301247 dqm->total_queue_count);
11311248
11321249 list_add(&kq->list, &qpd->priv_queue_list);
1133
- dqm->queue_count++;
1250
+ increment_queue_count(dqm, qpd, kq->queue);
11341251 qpd->is_debug = true;
11351252 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
11361253 dqm_unlock(dqm);
....@@ -1144,7 +1261,7 @@
11441261 {
11451262 dqm_lock(dqm);
11461263 list_del(&kq->list);
1147
- dqm->queue_count--;
1264
+ decrement_queue_count(dqm, qpd, kq->queue);
11481265 qpd->is_debug = false;
11491266 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
11501267 /*
....@@ -1163,66 +1280,60 @@
11631280 int retval;
11641281 struct mqd_manager *mqd_mgr;
11651282
1166
- retval = 0;
1167
-
1168
- dqm_lock(dqm);
1169
-
11701283 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
11711284 pr_warn("Can't create new usermode queue because %d queues were already created\n",
11721285 dqm->total_queue_count);
11731286 retval = -EPERM;
1174
- goto out_unlock;
1287
+ goto out;
11751288 }
11761289
1177
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1178
- retval = allocate_sdma_queue(dqm, &q->sdma_id);
1290
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1291
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1292
+ dqm_lock(dqm);
1293
+ retval = allocate_sdma_queue(dqm, q);
1294
+ dqm_unlock(dqm);
11791295 if (retval)
1180
- goto out_unlock;
1181
- q->properties.sdma_queue_id =
1182
- q->sdma_id / get_num_sdma_engines(dqm);
1183
- q->properties.sdma_engine_id =
1184
- q->sdma_id % get_num_sdma_engines(dqm);
1296
+ goto out;
11851297 }
11861298
11871299 retval = allocate_doorbell(qpd, q);
11881300 if (retval)
11891301 goto out_deallocate_sdma_queue;
11901302
1191
- mqd_mgr = dqm->ops.get_mqd_manager(dqm,
1192
- get_mqd_type_from_queue_type(q->properties.type));
1303
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1304
+ q->properties.type)];
11931305
1194
- if (!mqd_mgr) {
1306
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1307
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1308
+ dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1309
+ q->properties.tba_addr = qpd->tba_addr;
1310
+ q->properties.tma_addr = qpd->tma_addr;
1311
+ q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
1312
+ if (!q->mqd_mem_obj) {
11951313 retval = -ENOMEM;
11961314 goto out_deallocate_doorbell;
11971315 }
1316
+
1317
+ dqm_lock(dqm);
11981318 /*
1199
- * Eviction state logic: we only mark active queues as evicted
1200
- * to avoid the overhead of restoring inactive queues later
1319
+ * Eviction state logic: mark all queues as evicted, even ones
1320
+ * not currently active. Restoring inactive queues later only
1321
+ * updates the is_evicted flag but is a no-op otherwise.
12011322 */
1202
- if (qpd->evicted)
1203
- q->properties.is_evicted = (q->properties.queue_size > 0 &&
1204
- q->properties.queue_percent > 0 &&
1205
- q->properties.queue_address != 0);
1206
-
1207
- dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1208
-
1209
- q->properties.tba_addr = qpd->tba_addr;
1210
- q->properties.tma_addr = qpd->tma_addr;
1211
- retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
1323
+ q->properties.is_evicted = !!qpd->evicted;
1324
+ mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
12121325 &q->gart_mqd_addr, &q->properties);
1213
- if (retval)
1214
- goto out_deallocate_doorbell;
12151326
12161327 list_add(&q->list, &qpd->queues_list);
12171328 qpd->queue_count++;
1329
+
12181330 if (q->properties.is_active) {
1219
- dqm->queue_count++;
1220
- retval = execute_queues_cpsch(dqm,
1331
+ increment_queue_count(dqm, qpd, q);
1332
+
1333
+ execute_queues_cpsch(dqm,
12211334 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
12221335 }
12231336
1224
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1225
- dqm->sdma_queue_count++;
12261337 /*
12271338 * Unconditionally increment this counter, regardless of the queue's
12281339 * type or whether the queue is active.
....@@ -1238,16 +1349,18 @@
12381349 out_deallocate_doorbell:
12391350 deallocate_doorbell(qpd, q);
12401351 out_deallocate_sdma_queue:
1241
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1242
- deallocate_sdma_queue(dqm, q->sdma_id);
1243
-out_unlock:
1244
- dqm_unlock(dqm);
1245
-
1352
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1353
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1354
+ dqm_lock(dqm);
1355
+ deallocate_sdma_queue(dqm, q);
1356
+ dqm_unlock(dqm);
1357
+ }
1358
+out:
12461359 return retval;
12471360 }
12481361
1249
-int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
1250
- unsigned int fence_value,
1362
+int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
1363
+ uint64_t fence_value,
12511364 unsigned int timeout_ms)
12521365 {
12531366 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
....@@ -1270,31 +1383,20 @@
12701383 return 0;
12711384 }
12721385
1273
-static int unmap_sdma_queues(struct device_queue_manager *dqm)
1274
-{
1275
- int i, retval = 0;
1276
-
1277
- for (i = 0; i < dqm->dev->device_info->num_sdma_engines; i++) {
1278
- retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
1279
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);
1280
- if (retval)
1281
- return retval;
1282
- }
1283
- return retval;
1284
-}
1285
-
12861386 /* dqm->lock mutex has to be locked before calling this function */
12871387 static int map_queues_cpsch(struct device_queue_manager *dqm)
12881388 {
12891389 int retval;
12901390
1291
- if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
1391
+ if (!dqm->sched_running)
12921392 return 0;
1293
-
1393
+ if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
1394
+ return 0;
12941395 if (dqm->active_runlist)
12951396 return 0;
12961397
12971398 retval = pm_send_runlist(&dqm->packets, &dqm->queues);
1399
+ pr_debug("%s sent runlist\n", __func__);
12981400 if (retval) {
12991401 pr_err("failed to execute runlist\n");
13001402 return retval;
....@@ -1311,16 +1413,12 @@
13111413 {
13121414 int retval = 0;
13131415
1416
+ if (!dqm->sched_running)
1417
+ return 0;
13141418 if (dqm->is_hws_hang)
13151419 return -EIO;
13161420 if (!dqm->active_runlist)
13171421 return retval;
1318
-
1319
- pr_debug("Before destroying queues, sdma queue count is : %u\n",
1320
- dqm->sdma_queue_count);
1321
-
1322
- if (dqm->sdma_queue_count > 0)
1323
- unmap_sdma_queues(dqm);
13241422
13251423 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
13261424 filter, filter_param, false, 0);
....@@ -1332,9 +1430,18 @@
13321430 KFD_FENCE_COMPLETED);
13331431 /* should be timed out */
13341432 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1335
- QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
1336
- if (retval)
1433
+ queue_preemption_timeout_ms);
1434
+ if (retval) {
1435
+ pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1436
+ dqm->is_hws_hang = true;
1437
+ /* It's possible we're detecting a HWS hang in the
1438
+ * middle of a GPU reset. No need to schedule another
1439
+ * reset in this case.
1440
+ */
1441
+ if (!dqm->is_resetting)
1442
+ schedule_work(&dqm->hw_exception_work);
13371443 return retval;
1444
+ }
13381445
13391446 pm_release_ib(&dqm->packets);
13401447 dqm->active_runlist = false;
....@@ -1352,12 +1459,8 @@
13521459 if (dqm->is_hws_hang)
13531460 return -EIO;
13541461 retval = unmap_queues_cpsch(dqm, filter, filter_param);
1355
- if (retval) {
1356
- pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1357
- dqm->is_hws_hang = true;
1358
- schedule_work(&dqm->hw_exception_work);
1462
+ if (retval)
13591463 return retval;
1360
- }
13611464
13621465 return map_queues_cpsch(dqm);
13631466 }
....@@ -1368,9 +1471,18 @@
13681471 {
13691472 int retval;
13701473 struct mqd_manager *mqd_mgr;
1371
- bool preempt_all_queues;
1474
+ uint64_t sdma_val = 0;
1475
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
13721476
1373
- preempt_all_queues = false;
1477
+ /* Get the SDMA queue stats */
1478
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1479
+ (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1480
+ retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
1481
+ &sdma_val);
1482
+ if (retval)
1483
+ pr_err("Failed to read SDMA queue counter for queue: %d\n",
1484
+ q->properties.queue_id);
1485
+ }
13741486
13751487 retval = 0;
13761488
....@@ -1387,31 +1499,26 @@
13871499
13881500 }
13891501
1390
- mqd_mgr = dqm->ops.get_mqd_manager(dqm,
1391
- get_mqd_type_from_queue_type(q->properties.type));
1392
- if (!mqd_mgr) {
1393
- retval = -ENOMEM;
1394
- goto failed;
1395
- }
1502
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1503
+ q->properties.type)];
13961504
13971505 deallocate_doorbell(qpd, q);
13981506
1399
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1400
- dqm->sdma_queue_count--;
1401
- deallocate_sdma_queue(dqm, q->sdma_id);
1507
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1508
+ (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1509
+ deallocate_sdma_queue(dqm, q);
1510
+ pdd->sdma_past_activity_counter += sdma_val;
14021511 }
14031512
14041513 list_del(&q->list);
14051514 qpd->queue_count--;
14061515 if (q->properties.is_active) {
1407
- dqm->queue_count--;
1516
+ decrement_queue_count(dqm, qpd, q);
14081517 retval = execute_queues_cpsch(dqm,
14091518 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
14101519 if (retval == -ETIME)
14111520 qpd->reset_wavefronts = true;
14121521 }
1413
-
1414
- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
14151522
14161523 /*
14171524 * Unconditionally decrement this counter, regardless of the queue's
....@@ -1423,9 +1530,11 @@
14231530
14241531 dqm_unlock(dqm);
14251532
1533
+ /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
1534
+ mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1535
+
14261536 return retval;
14271537
1428
-failed:
14291538 failed_try_destroy_debugged_queue:
14301539
14311540 dqm_unlock(dqm);
....@@ -1525,19 +1634,27 @@
15251634 static int process_termination_nocpsch(struct device_queue_manager *dqm,
15261635 struct qcm_process_device *qpd)
15271636 {
1528
- struct queue *q, *next;
1637
+ struct queue *q;
15291638 struct device_process_node *cur, *next_dpn;
15301639 int retval = 0;
1640
+ bool found = false;
15311641
15321642 dqm_lock(dqm);
15331643
15341644 /* Clear all user mode queues */
1535
- list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1645
+ while (!list_empty(&qpd->queues_list)) {
1646
+ struct mqd_manager *mqd_mgr;
15361647 int ret;
15371648
1649
+ q = list_first_entry(&qpd->queues_list, struct queue, list);
1650
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1651
+ q->properties.type)];
15381652 ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
15391653 if (ret)
15401654 retval = ret;
1655
+ dqm_unlock(dqm);
1656
+ mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1657
+ dqm_lock(dqm);
15411658 }
15421659
15431660 /* Unregister process */
....@@ -1546,25 +1663,63 @@
15461663 list_del(&cur->list);
15471664 kfree(cur);
15481665 dqm->processes_count--;
1666
+ found = true;
15491667 break;
15501668 }
15511669 }
15521670
15531671 dqm_unlock(dqm);
1672
+
1673
+ /* Outside the DQM lock because under the DQM lock we can't do
1674
+ * reclaim or take other locks that others hold while reclaiming.
1675
+ */
1676
+ if (found)
1677
+ kfd_dec_compute_active(dqm->dev);
1678
+
15541679 return retval;
15551680 }
15561681
1682
+static int get_wave_state(struct device_queue_manager *dqm,
1683
+ struct queue *q,
1684
+ void __user *ctl_stack,
1685
+ u32 *ctl_stack_used_size,
1686
+ u32 *save_area_used_size)
1687
+{
1688
+ struct mqd_manager *mqd_mgr;
1689
+
1690
+ dqm_lock(dqm);
1691
+
1692
+ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
1693
+
1694
+ if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
1695
+ q->properties.is_active || !q->device->cwsr_enabled ||
1696
+ !mqd_mgr->get_wave_state) {
1697
+ dqm_unlock(dqm);
1698
+ return -EINVAL;
1699
+ }
1700
+
1701
+ dqm_unlock(dqm);
1702
+
1703
+ /*
1704
+ * get_wave_state is outside the dqm lock to prevent circular locking
1705
+ * and the queue should be protected against destruction by the process
1706
+ * lock.
1707
+ */
1708
+ return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
1709
+ ctl_stack_used_size, save_area_used_size);
1710
+}
15571711
15581712 static int process_termination_cpsch(struct device_queue_manager *dqm,
15591713 struct qcm_process_device *qpd)
15601714 {
15611715 int retval;
1562
- struct queue *q, *next;
1716
+ struct queue *q;
15631717 struct kernel_queue *kq, *kq_next;
15641718 struct mqd_manager *mqd_mgr;
15651719 struct device_process_node *cur, *next_dpn;
15661720 enum kfd_unmap_queues_filter filter =
15671721 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
1722
+ bool found = false;
15681723
15691724 retval = 0;
15701725
....@@ -1573,7 +1728,7 @@
15731728 /* Clean all kernel queues */
15741729 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
15751730 list_del(&kq->list);
1576
- dqm->queue_count--;
1731
+ decrement_queue_count(dqm, qpd, kq->queue);
15771732 qpd->is_debug = false;
15781733 dqm->total_queue_count--;
15791734 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
....@@ -1581,13 +1736,13 @@
15811736
15821737 /* Clear all user mode queues */
15831738 list_for_each_entry(q, &qpd->queues_list, list) {
1584
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1585
- dqm->sdma_queue_count--;
1586
- deallocate_sdma_queue(dqm, q->sdma_id);
1587
- }
1739
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1740
+ deallocate_sdma_queue(dqm, q);
1741
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1742
+ deallocate_sdma_queue(dqm, q);
15881743
15891744 if (q->properties.is_active)
1590
- dqm->queue_count--;
1745
+ decrement_queue_count(dqm, qpd, q);
15911746
15921747 dqm->total_queue_count--;
15931748 }
....@@ -1598,6 +1753,7 @@
15981753 list_del(&cur->list);
15991754 kfree(cur);
16001755 dqm->processes_count--;
1756
+ found = true;
16011757 break;
16021758 }
16031759 }
....@@ -1609,21 +1765,70 @@
16091765 qpd->reset_wavefronts = false;
16101766 }
16111767
1612
- /* lastly, free mqd resources */
1613
- list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1614
- mqd_mgr = dqm->ops.get_mqd_manager(dqm,
1615
- get_mqd_type_from_queue_type(q->properties.type));
1616
- if (!mqd_mgr) {
1617
- retval = -ENOMEM;
1618
- goto out;
1619
- }
1768
+ /* Lastly, free mqd resources.
1769
+ * Do free_mqd() after dqm_unlock to avoid circular locking.
1770
+ */
1771
+ while (!list_empty(&qpd->queues_list)) {
1772
+ q = list_first_entry(&qpd->queues_list, struct queue, list);
1773
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1774
+ q->properties.type)];
16201775 list_del(&q->list);
16211776 qpd->queue_count--;
1622
- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1777
+ dqm_unlock(dqm);
1778
+ mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1779
+ dqm_lock(dqm);
1780
+ }
1781
+ dqm_unlock(dqm);
1782
+
1783
+ /* Outside the DQM lock because under the DQM lock we can't do
1784
+ * reclaim or take other locks that others hold while reclaiming.
1785
+ */
1786
+ if (found)
1787
+ kfd_dec_compute_active(dqm->dev);
1788
+
1789
+ return retval;
1790
+}
1791
+
1792
+static int init_mqd_managers(struct device_queue_manager *dqm)
1793
+{
1794
+ int i, j;
1795
+ struct mqd_manager *mqd_mgr;
1796
+
1797
+ for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
1798
+ mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
1799
+ if (!mqd_mgr) {
1800
+ pr_err("mqd manager [%d] initialization failed\n", i);
1801
+ goto out_free;
1802
+ }
1803
+ dqm->mqd_mgrs[i] = mqd_mgr;
16231804 }
16241805
1625
-out:
1626
- dqm_unlock(dqm);
1806
+ return 0;
1807
+
1808
+out_free:
1809
+ for (j = 0; j < i; j++) {
1810
+ kfree(dqm->mqd_mgrs[j]);
1811
+ dqm->mqd_mgrs[j] = NULL;
1812
+ }
1813
+
1814
+ return -ENOMEM;
1815
+}
1816
+
1817
+/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
1818
+static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
1819
+{
1820
+ int retval;
1821
+ struct kfd_dev *dev = dqm->dev;
1822
+ struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
1823
+ uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
1824
+ get_num_all_sdma_engines(dqm) *
1825
+ dev->device_info->num_sdma_queues_per_engine +
1826
+ dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
1827
+
1828
+ retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
1829
+ &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
1830
+ (void *)&(mem_obj->cpu_ptr), false);
1831
+
16271832 return retval;
16281833 }
16291834
....@@ -1662,9 +1867,9 @@
16621867 dqm->ops.initialize = initialize_cpsch;
16631868 dqm->ops.start = start_cpsch;
16641869 dqm->ops.stop = stop_cpsch;
1870
+ dqm->ops.pre_reset = pre_reset;
16651871 dqm->ops.destroy_queue = destroy_queue_cpsch;
16661872 dqm->ops.update_queue = update_queue;
1667
- dqm->ops.get_mqd_manager = get_mqd_manager;
16681873 dqm->ops.register_process = register_process;
16691874 dqm->ops.unregister_process = unregister_process;
16701875 dqm->ops.uninitialize = uninitialize;
....@@ -1675,15 +1880,16 @@
16751880 dqm->ops.process_termination = process_termination_cpsch;
16761881 dqm->ops.evict_process_queues = evict_process_queues_cpsch;
16771882 dqm->ops.restore_process_queues = restore_process_queues_cpsch;
1883
+ dqm->ops.get_wave_state = get_wave_state;
16781884 break;
16791885 case KFD_SCHED_POLICY_NO_HWS:
16801886 /* initialize dqm for no cp scheduling */
16811887 dqm->ops.start = start_nocpsch;
16821888 dqm->ops.stop = stop_nocpsch;
1889
+ dqm->ops.pre_reset = pre_reset;
16831890 dqm->ops.create_queue = create_queue_nocpsch;
16841891 dqm->ops.destroy_queue = destroy_queue_nocpsch;
16851892 dqm->ops.update_queue = update_queue;
1686
- dqm->ops.get_mqd_manager = get_mqd_manager;
16871893 dqm->ops.register_process = register_process;
16881894 dqm->ops.unregister_process = unregister_process;
16891895 dqm->ops.initialize = initialize_nocpsch;
....@@ -1694,6 +1900,7 @@
16941900 dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
16951901 dqm->ops.restore_process_queues =
16961902 restore_process_queues_nocpsch;
1903
+ dqm->ops.get_wave_state = get_wave_state;
16971904 break;
16981905 default:
16991906 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
....@@ -1717,16 +1924,37 @@
17171924 case CHIP_FIJI:
17181925 case CHIP_POLARIS10:
17191926 case CHIP_POLARIS11:
1927
+ case CHIP_POLARIS12:
1928
+ case CHIP_VEGAM:
17201929 device_queue_manager_init_vi_tonga(&dqm->asic_ops);
17211930 break;
17221931
17231932 case CHIP_VEGA10:
1933
+ case CHIP_VEGA12:
1934
+ case CHIP_VEGA20:
17241935 case CHIP_RAVEN:
1936
+ case CHIP_RENOIR:
1937
+ case CHIP_ARCTURUS:
17251938 device_queue_manager_init_v9(&dqm->asic_ops);
1939
+ break;
1940
+ case CHIP_NAVI10:
1941
+ case CHIP_NAVI12:
1942
+ case CHIP_NAVI14:
1943
+ case CHIP_SIENNA_CICHLID:
1944
+ case CHIP_NAVY_FLOUNDER:
1945
+ device_queue_manager_init_v10_navi10(&dqm->asic_ops);
17261946 break;
17271947 default:
17281948 WARN(1, "Unexpected ASIC family %u",
17291949 dev->device_info->asic_family);
1950
+ goto out_free;
1951
+ }
1952
+
1953
+ if (init_mqd_managers(dqm))
1954
+ goto out_free;
1955
+
1956
+ if (allocate_hiq_sdma_mqd(dqm)) {
1957
+ pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
17301958 goto out_free;
17311959 }
17321960
....@@ -1738,14 +1966,22 @@
17381966 return NULL;
17391967 }
17401968
1969
+static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
1970
+ struct kfd_mem_obj *mqd)
1971
+{
1972
+ WARN(!mqd, "No hiq sdma mqd trunk to free");
1973
+
1974
+ amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
1975
+}
1976
+
17411977 void device_queue_manager_uninit(struct device_queue_manager *dqm)
17421978 {
17431979 dqm->ops.uninitialize(dqm);
1980
+ deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
17441981 kfree(dqm);
17451982 }
17461983
1747
-int kfd_process_vm_fault(struct device_queue_manager *dqm,
1748
- unsigned int pasid)
1984
+int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid)
17491985 {
17501986 struct kfd_process_device *pdd;
17511987 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
....@@ -1753,6 +1989,7 @@
17531989
17541990 if (!p)
17551991 return -EINVAL;
1992
+ WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
17561993 pdd = kfd_get_process_device_data(dqm->dev, p);
17571994 if (pdd)
17581995 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
....@@ -1765,7 +2002,7 @@
17652002 {
17662003 struct device_queue_manager *dqm = container_of(work,
17672004 struct device_queue_manager, hw_exception_work);
1768
- dqm->dev->kfd2kgd->gpu_recover(dqm->dev->kgd);
2005
+ amdgpu_amdkfd_gpu_reset(dqm->dev->kgd);
17692006 }
17702007
17712008 #if defined(CONFIG_DEBUG_FS)
....@@ -1798,13 +2035,20 @@
17982035 int pipe, queue;
17992036 int r = 0;
18002037
2038
+ if (!dqm->sched_running) {
2039
+ seq_printf(m, " Device is stopped\n");
2040
+
2041
+ return 0;
2042
+ }
2043
+
18012044 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
1802
- KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs);
2045
+ KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
2046
+ &dump, &n_regs);
18032047 if (!r) {
18042048 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n",
1805
- KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
1806
- KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
1807
- KFD_CIK_HIQ_QUEUE);
2049
+ KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
2050
+ KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
2051
+ KFD_CIK_HIQ_QUEUE);
18082052 seq_reg_dump(m, dump, n_regs);
18092053
18102054 kfree(dump);
....@@ -1815,7 +2059,7 @@
18152059
18162060 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
18172061 if (!test_bit(pipe_offset + queue,
1818
- dqm->dev->shared_resources.queue_bitmap))
2062
+ dqm->dev->shared_resources.cp_queue_bitmap))
18192063 continue;
18202064
18212065 r = dqm->dev->kfd2kgd->hqd_dump(
....@@ -1831,8 +2075,10 @@
18312075 }
18322076 }
18332077
1834
- for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) {
1835
- for (queue = 0; queue < KFD_SDMA_QUEUES_PER_ENGINE; queue++) {
2078
+ for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
2079
+ for (queue = 0;
2080
+ queue < dqm->dev->device_info->num_sdma_queues_per_engine;
2081
+ queue++) {
18362082 r = dqm->dev->kfd2kgd->hqd_sdma_dump(
18372083 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
18382084 if (r)