forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-10-09 244b2c5ca8b14627e4a17755e5922221e121c771
kernel/drivers/gpu/drm/amd/amdkfd/kfd_process.c
....@@ -25,12 +25,16 @@
2525 #include <linux/sched.h>
2626 #include <linux/sched/mm.h>
2727 #include <linux/sched/task.h>
28
+#include <linux/mmu_context.h>
2829 #include <linux/slab.h>
2930 #include <linux/amd-iommu.h>
3031 #include <linux/notifier.h>
3132 #include <linux/compat.h>
3233 #include <linux/mman.h>
3334 #include <linux/file.h>
35
+#include <linux/pm_runtime.h>
36
+#include "amdgpu_amdkfd.h"
37
+#include "amdgpu.h"
3438
3539 struct mm_struct;
3640
....@@ -61,12 +65,548 @@
6165
6266 static struct kfd_process *find_process(const struct task_struct *thread);
6367 static void kfd_process_ref_release(struct kref *ref);
64
-static struct kfd_process *create_process(const struct task_struct *thread,
65
- struct file *filep);
68
+static struct kfd_process *create_process(const struct task_struct *thread);
69
+static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
6670
6771 static void evict_process_worker(struct work_struct *work);
6872 static void restore_process_worker(struct work_struct *work);
6973
74
+struct kfd_procfs_tree {
75
+ struct kobject *kobj;
76
+};
77
+
78
+static struct kfd_procfs_tree procfs;
79
+
80
+/*
81
+ * Structure for SDMA activity tracking
82
+ */
83
+struct kfd_sdma_activity_handler_workarea {
84
+ struct work_struct sdma_activity_work;
85
+ struct kfd_process_device *pdd;
86
+ uint64_t sdma_activity_counter;
87
+};
88
+
89
+struct temp_sdma_queue_list {
90
+ uint64_t __user *rptr;
91
+ uint64_t sdma_val;
92
+ unsigned int queue_id;
93
+ struct list_head list;
94
+};
95
+
96
+static void kfd_sdma_activity_worker(struct work_struct *work)
97
+{
98
+ struct kfd_sdma_activity_handler_workarea *workarea;
99
+ struct kfd_process_device *pdd;
100
+ uint64_t val;
101
+ struct mm_struct *mm;
102
+ struct queue *q;
103
+ struct qcm_process_device *qpd;
104
+ struct device_queue_manager *dqm;
105
+ int ret = 0;
106
+ struct temp_sdma_queue_list sdma_q_list;
107
+ struct temp_sdma_queue_list *sdma_q, *next;
108
+
109
+ workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
110
+ sdma_activity_work);
111
+ if (!workarea)
112
+ return;
113
+
114
+ pdd = workarea->pdd;
115
+ if (!pdd)
116
+ return;
117
+ dqm = pdd->dev->dqm;
118
+ qpd = &pdd->qpd;
119
+ if (!dqm || !qpd)
120
+ return;
121
+ /*
122
+ * Total SDMA activity is current SDMA activity + past SDMA activity
123
+ * Past SDMA count is stored in pdd.
124
+ * To get the current activity counters for all active SDMA queues,
125
+ * we loop over all SDMA queues and get their counts from user-space.
126
+ *
127
+ * We cannot call get_user() with dqm_lock held as it can cause
128
+ * a circular lock dependency situation. To read the SDMA stats,
129
+ * we need to do the following:
130
+ *
131
+ * 1. Create a temporary list of SDMA queue nodes from the qpd->queues_list,
132
+ * with dqm_lock/dqm_unlock().
133
+ * 2. Call get_user() for each node in temporary list without dqm_lock.
134
+ * Save the SDMA count for each node and also add the count to the total
135
+ * SDMA count counter.
136
+ * Its possible, during this step, a few SDMA queue nodes got deleted
137
+ * from the qpd->queues_list.
138
+ * 3. Do a second pass over qpd->queues_list to check if any nodes got deleted.
139
+ * If any node got deleted, its SDMA count would be captured in the sdma
140
+ * past activity counter. So subtract the SDMA counter stored in step 2
141
+ * for this node from the total SDMA count.
142
+ */
143
+ INIT_LIST_HEAD(&sdma_q_list.list);
144
+
145
+ /*
146
+ * Create the temp list of all SDMA queues
147
+ */
148
+ dqm_lock(dqm);
149
+
150
+ list_for_each_entry(q, &qpd->queues_list, list) {
151
+ if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
152
+ (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
153
+ continue;
154
+
155
+ sdma_q = kzalloc(sizeof(struct temp_sdma_queue_list), GFP_KERNEL);
156
+ if (!sdma_q) {
157
+ dqm_unlock(dqm);
158
+ goto cleanup;
159
+ }
160
+
161
+ INIT_LIST_HEAD(&sdma_q->list);
162
+ sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr;
163
+ sdma_q->queue_id = q->properties.queue_id;
164
+ list_add_tail(&sdma_q->list, &sdma_q_list.list);
165
+ }
166
+
167
+ /*
168
+ * If the temp list is empty, then no SDMA queues nodes were found in
169
+ * qpd->queues_list. Return the past activity count as the total sdma
170
+ * count
171
+ */
172
+ if (list_empty(&sdma_q_list.list)) {
173
+ workarea->sdma_activity_counter = pdd->sdma_past_activity_counter;
174
+ dqm_unlock(dqm);
175
+ return;
176
+ }
177
+
178
+ dqm_unlock(dqm);
179
+
180
+ /*
181
+ * Get the usage count for each SDMA queue in temp_list.
182
+ */
183
+ mm = get_task_mm(pdd->process->lead_thread);
184
+ if (!mm)
185
+ goto cleanup;
186
+
187
+ kthread_use_mm(mm);
188
+
189
+ list_for_each_entry(sdma_q, &sdma_q_list.list, list) {
190
+ val = 0;
191
+ ret = read_sdma_queue_counter(sdma_q->rptr, &val);
192
+ if (ret) {
193
+ pr_debug("Failed to read SDMA queue active counter for queue id: %d",
194
+ sdma_q->queue_id);
195
+ } else {
196
+ sdma_q->sdma_val = val;
197
+ workarea->sdma_activity_counter += val;
198
+ }
199
+ }
200
+
201
+ kthread_unuse_mm(mm);
202
+ mmput(mm);
203
+
204
+ /*
205
+ * Do a second iteration over qpd_queues_list to check if any SDMA
206
+ * nodes got deleted while fetching SDMA counter.
207
+ */
208
+ dqm_lock(dqm);
209
+
210
+ workarea->sdma_activity_counter += pdd->sdma_past_activity_counter;
211
+
212
+ list_for_each_entry(q, &qpd->queues_list, list) {
213
+ if (list_empty(&sdma_q_list.list))
214
+ break;
215
+
216
+ if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
217
+ (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
218
+ continue;
219
+
220
+ list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
221
+ if (((uint64_t __user *)q->properties.read_ptr == sdma_q->rptr) &&
222
+ (sdma_q->queue_id == q->properties.queue_id)) {
223
+ list_del(&sdma_q->list);
224
+ kfree(sdma_q);
225
+ break;
226
+ }
227
+ }
228
+ }
229
+
230
+ dqm_unlock(dqm);
231
+
232
+ /*
233
+ * If temp list is not empty, it implies some queues got deleted
234
+ * from qpd->queues_list during SDMA usage read. Subtract the SDMA
235
+ * count for each node from the total SDMA count.
236
+ */
237
+ list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
238
+ workarea->sdma_activity_counter -= sdma_q->sdma_val;
239
+ list_del(&sdma_q->list);
240
+ kfree(sdma_q);
241
+ }
242
+
243
+ return;
244
+
245
+cleanup:
246
+ list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
247
+ list_del(&sdma_q->list);
248
+ kfree(sdma_q);
249
+ }
250
+}
251
+
252
+/**
253
+ * @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device
254
+ * by current process. Translates acquired wave count into number of compute units
255
+ * that are occupied.
256
+ *
257
+ * @atr: Handle of attribute that allows reporting of wave count. The attribute
258
+ * handle encapsulates GPU device it is associated with, thereby allowing collection
259
+ * of waves in flight, etc
260
+ *
261
+ * @buffer: Handle of user provided buffer updated with wave count
262
+ *
263
+ * Return: Number of bytes written to user buffer or an error value
264
+ */
265
+static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
266
+{
267
+ int cu_cnt;
268
+ int wave_cnt;
269
+ int max_waves_per_cu;
270
+ struct kfd_dev *dev = NULL;
271
+ struct kfd_process *proc = NULL;
272
+ struct kfd_process_device *pdd = NULL;
273
+
274
+ pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
275
+ dev = pdd->dev;
276
+ if (dev->kfd2kgd->get_cu_occupancy == NULL)
277
+ return -EINVAL;
278
+
279
+ cu_cnt = 0;
280
+ proc = pdd->process;
281
+ if (pdd->qpd.queue_count == 0) {
282
+ pr_debug("Gpu-Id: %d has no active queues for process %d\n",
283
+ dev->id, proc->pasid);
284
+ return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
285
+ }
286
+
287
+ /* Collect wave count from device if it supports */
288
+ wave_cnt = 0;
289
+ max_waves_per_cu = 0;
290
+ dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt,
291
+ &max_waves_per_cu);
292
+
293
+ /* Translate wave count to number of compute units */
294
+ cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
295
+ return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
296
+}
297
+
298
+static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
299
+ char *buffer)
300
+{
301
+ if (strcmp(attr->name, "pasid") == 0) {
302
+ struct kfd_process *p = container_of(attr, struct kfd_process,
303
+ attr_pasid);
304
+
305
+ return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
306
+ } else if (strncmp(attr->name, "vram_", 5) == 0) {
307
+ struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
308
+ attr_vram);
309
+ return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
310
+ } else if (strncmp(attr->name, "sdma_", 5) == 0) {
311
+ struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
312
+ attr_sdma);
313
+ struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
314
+
315
+ INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
316
+ kfd_sdma_activity_worker);
317
+
318
+ sdma_activity_work_handler.pdd = pdd;
319
+ sdma_activity_work_handler.sdma_activity_counter = 0;
320
+
321
+ schedule_work(&sdma_activity_work_handler.sdma_activity_work);
322
+
323
+ flush_work(&sdma_activity_work_handler.sdma_activity_work);
324
+
325
+ return snprintf(buffer, PAGE_SIZE, "%llu\n",
326
+ (sdma_activity_work_handler.sdma_activity_counter)/
327
+ SDMA_ACTIVITY_DIVISOR);
328
+ } else {
329
+ pr_err("Invalid attribute");
330
+ return -EINVAL;
331
+ }
332
+
333
+ return 0;
334
+}
335
+
336
+static void kfd_procfs_kobj_release(struct kobject *kobj)
337
+{
338
+ kfree(kobj);
339
+}
340
+
341
+static const struct sysfs_ops kfd_procfs_ops = {
342
+ .show = kfd_procfs_show,
343
+};
344
+
345
+static struct kobj_type procfs_type = {
346
+ .release = kfd_procfs_kobj_release,
347
+ .sysfs_ops = &kfd_procfs_ops,
348
+};
349
+
350
+void kfd_procfs_init(void)
351
+{
352
+ int ret = 0;
353
+
354
+ procfs.kobj = kfd_alloc_struct(procfs.kobj);
355
+ if (!procfs.kobj)
356
+ return;
357
+
358
+ ret = kobject_init_and_add(procfs.kobj, &procfs_type,
359
+ &kfd_device->kobj, "proc");
360
+ if (ret) {
361
+ pr_warn("Could not create procfs proc folder");
362
+ /* If we fail to create the procfs, clean up */
363
+ kfd_procfs_shutdown();
364
+ }
365
+}
366
+
367
+void kfd_procfs_shutdown(void)
368
+{
369
+ if (procfs.kobj) {
370
+ kobject_del(procfs.kobj);
371
+ kobject_put(procfs.kobj);
372
+ procfs.kobj = NULL;
373
+ }
374
+}
375
+
376
+static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
377
+ struct attribute *attr, char *buffer)
378
+{
379
+ struct queue *q = container_of(kobj, struct queue, kobj);
380
+
381
+ if (!strcmp(attr->name, "size"))
382
+ return snprintf(buffer, PAGE_SIZE, "%llu",
383
+ q->properties.queue_size);
384
+ else if (!strcmp(attr->name, "type"))
385
+ return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type);
386
+ else if (!strcmp(attr->name, "gpuid"))
387
+ return snprintf(buffer, PAGE_SIZE, "%u", q->device->id);
388
+ else
389
+ pr_err("Invalid attribute");
390
+
391
+ return 0;
392
+}
393
+
394
+static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
395
+ struct attribute *attr, char *buffer)
396
+{
397
+ if (strcmp(attr->name, "evicted_ms") == 0) {
398
+ struct kfd_process_device *pdd = container_of(attr,
399
+ struct kfd_process_device,
400
+ attr_evict);
401
+ uint64_t evict_jiffies;
402
+
403
+ evict_jiffies = atomic64_read(&pdd->evict_duration_counter);
404
+
405
+ return snprintf(buffer,
406
+ PAGE_SIZE,
407
+ "%llu\n",
408
+ jiffies64_to_msecs(evict_jiffies));
409
+
410
+ /* Sysfs handle that gets CU occupancy is per device */
411
+ } else if (strcmp(attr->name, "cu_occupancy") == 0) {
412
+ return kfd_get_cu_occupancy(attr, buffer);
413
+ } else {
414
+ pr_err("Invalid attribute");
415
+ }
416
+
417
+ return 0;
418
+}
419
+
420
+static struct attribute attr_queue_size = {
421
+ .name = "size",
422
+ .mode = KFD_SYSFS_FILE_MODE
423
+};
424
+
425
+static struct attribute attr_queue_type = {
426
+ .name = "type",
427
+ .mode = KFD_SYSFS_FILE_MODE
428
+};
429
+
430
+static struct attribute attr_queue_gpuid = {
431
+ .name = "gpuid",
432
+ .mode = KFD_SYSFS_FILE_MODE
433
+};
434
+
435
+static struct attribute *procfs_queue_attrs[] = {
436
+ &attr_queue_size,
437
+ &attr_queue_type,
438
+ &attr_queue_gpuid,
439
+ NULL
440
+};
441
+
442
+static const struct sysfs_ops procfs_queue_ops = {
443
+ .show = kfd_procfs_queue_show,
444
+};
445
+
446
+static struct kobj_type procfs_queue_type = {
447
+ .sysfs_ops = &procfs_queue_ops,
448
+ .default_attrs = procfs_queue_attrs,
449
+};
450
+
451
+static const struct sysfs_ops procfs_stats_ops = {
452
+ .show = kfd_procfs_stats_show,
453
+};
454
+
455
+static struct kobj_type procfs_stats_type = {
456
+ .sysfs_ops = &procfs_stats_ops,
457
+ .release = kfd_procfs_kobj_release,
458
+};
459
+
460
+int kfd_procfs_add_queue(struct queue *q)
461
+{
462
+ struct kfd_process *proc;
463
+ int ret;
464
+
465
+ if (!q || !q->process)
466
+ return -EINVAL;
467
+ proc = q->process;
468
+
469
+ /* Create proc/<pid>/queues/<queue id> folder */
470
+ if (!proc->kobj_queues)
471
+ return -EFAULT;
472
+ ret = kobject_init_and_add(&q->kobj, &procfs_queue_type,
473
+ proc->kobj_queues, "%u", q->properties.queue_id);
474
+ if (ret < 0) {
475
+ pr_warn("Creating proc/<pid>/queues/%u failed",
476
+ q->properties.queue_id);
477
+ kobject_put(&q->kobj);
478
+ return ret;
479
+ }
480
+
481
+ return 0;
482
+}
483
+
484
+static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
485
+ char *name)
486
+{
487
+ int ret = 0;
488
+
489
+ if (!p || !attr || !name)
490
+ return -EINVAL;
491
+
492
+ attr->name = name;
493
+ attr->mode = KFD_SYSFS_FILE_MODE;
494
+ sysfs_attr_init(attr);
495
+
496
+ ret = sysfs_create_file(p->kobj, attr);
497
+
498
+ return ret;
499
+}
500
+
501
+static int kfd_procfs_add_sysfs_stats(struct kfd_process *p)
502
+{
503
+ int ret = 0;
504
+ struct kfd_process_device *pdd;
505
+ char stats_dir_filename[MAX_SYSFS_FILENAME_LEN];
506
+
507
+ if (!p)
508
+ return -EINVAL;
509
+
510
+ if (!p->kobj)
511
+ return -EFAULT;
512
+
513
+ /*
514
+ * Create sysfs files for each GPU:
515
+ * - proc/<pid>/stats_<gpuid>/
516
+ * - proc/<pid>/stats_<gpuid>/evicted_ms
517
+ * - proc/<pid>/stats_<gpuid>/cu_occupancy
518
+ */
519
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
520
+ struct kobject *kobj_stats;
521
+
522
+ snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN,
523
+ "stats_%u", pdd->dev->id);
524
+ kobj_stats = kfd_alloc_struct(kobj_stats);
525
+ if (!kobj_stats)
526
+ return -ENOMEM;
527
+
528
+ ret = kobject_init_and_add(kobj_stats,
529
+ &procfs_stats_type,
530
+ p->kobj,
531
+ stats_dir_filename);
532
+
533
+ if (ret) {
534
+ pr_warn("Creating KFD proc/stats_%s folder failed",
535
+ stats_dir_filename);
536
+ kobject_put(kobj_stats);
537
+ goto err;
538
+ }
539
+
540
+ pdd->kobj_stats = kobj_stats;
541
+ pdd->attr_evict.name = "evicted_ms";
542
+ pdd->attr_evict.mode = KFD_SYSFS_FILE_MODE;
543
+ sysfs_attr_init(&pdd->attr_evict);
544
+ ret = sysfs_create_file(kobj_stats, &pdd->attr_evict);
545
+ if (ret)
546
+ pr_warn("Creating eviction stats for gpuid %d failed",
547
+ (int)pdd->dev->id);
548
+
549
+ /* Add sysfs file to report compute unit occupancy */
550
+ if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) {
551
+ pdd->attr_cu_occupancy.name = "cu_occupancy";
552
+ pdd->attr_cu_occupancy.mode = KFD_SYSFS_FILE_MODE;
553
+ sysfs_attr_init(&pdd->attr_cu_occupancy);
554
+ ret = sysfs_create_file(kobj_stats,
555
+ &pdd->attr_cu_occupancy);
556
+ if (ret)
557
+ pr_warn("Creating %s failed for gpuid: %d",
558
+ pdd->attr_cu_occupancy.name,
559
+ (int)pdd->dev->id);
560
+ }
561
+ }
562
+err:
563
+ return ret;
564
+}
565
+
566
+
567
+static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
568
+{
569
+ int ret = 0;
570
+ struct kfd_process_device *pdd;
571
+
572
+ if (!p)
573
+ return -EINVAL;
574
+
575
+ if (!p->kobj)
576
+ return -EFAULT;
577
+
578
+ /*
579
+ * Create sysfs files for each GPU:
580
+ * - proc/<pid>/vram_<gpuid>
581
+ * - proc/<pid>/sdma_<gpuid>
582
+ */
583
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
584
+ snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
585
+ pdd->dev->id);
586
+ ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename);
587
+ if (ret)
588
+ pr_warn("Creating vram usage for gpu id %d failed",
589
+ (int)pdd->dev->id);
590
+
591
+ snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
592
+ pdd->dev->id);
593
+ ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename);
594
+ if (ret)
595
+ pr_warn("Creating sdma usage for gpu id %d failed",
596
+ (int)pdd->dev->id);
597
+ }
598
+
599
+ return ret;
600
+}
601
+
602
+void kfd_procfs_del_queue(struct queue *q)
603
+{
604
+ if (!q)
605
+ return;
606
+
607
+ kobject_del(&q->kobj);
608
+ kobject_put(&q->kobj);
609
+}
70610
71611 int kfd_process_create_wq(void)
72612 {
....@@ -100,8 +640,8 @@
100640 {
101641 struct kfd_dev *dev = pdd->dev;
102642
103
- dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, mem, pdd->vm);
104
- dev->kfd2kgd->free_memory_of_gpu(dev->kgd, mem);
643
+ amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
644
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL);
105645 }
106646
107647 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
....@@ -119,16 +659,16 @@
119659 int handle;
120660 int err;
121661
122
- err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
662
+ err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
123663 pdd->vm, &mem, NULL, flags);
124664 if (err)
125665 goto err_alloc_mem;
126666
127
- err = kdev->kfd2kgd->map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
667
+ err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
128668 if (err)
129669 goto err_map_mem;
130670
131
- err = kdev->kfd2kgd->sync_memory(kdev->kgd, mem, true);
671
+ err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
132672 if (err) {
133673 pr_debug("Sync memory failed, wait interrupted by user signal\n");
134674 goto sync_memory_failed;
....@@ -147,7 +687,7 @@
147687 }
148688
149689 if (kptr) {
150
- err = kdev->kfd2kgd->map_gtt_bo_to_kernel(kdev->kgd,
690
+ err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
151691 (struct kgd_mem *)mem, kptr, NULL);
152692 if (err) {
153693 pr_debug("Map GTT BO to kernel failed\n");
....@@ -165,7 +705,7 @@
165705 return err;
166706
167707 err_map_mem:
168
- kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem);
708
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL);
169709 err_alloc_mem:
170710 *kptr = NULL;
171711 return err;
....@@ -180,10 +720,10 @@
180720 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
181721 {
182722 struct qcm_process_device *qpd = &pdd->qpd;
183
- uint32_t flags = ALLOC_MEM_FLAGS_GTT |
184
- ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
185
- ALLOC_MEM_FLAGS_WRITABLE |
186
- ALLOC_MEM_FLAGS_EXECUTABLE;
723
+ uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT |
724
+ KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
725
+ KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
726
+ KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
187727 void *kaddr;
188728 int ret;
189729
....@@ -205,6 +745,7 @@
205745 {
206746 struct kfd_process *process;
207747 struct task_struct *thread = current;
748
+ int ret;
208749
209750 if (!thread->mm)
210751 return ERR_PTR(-EINVAL);
....@@ -222,11 +763,62 @@
222763
223764 /* A prior open of /dev/kfd could have already created the process. */
224765 process = find_process(thread);
225
- if (process)
766
+ if (process) {
226767 pr_debug("Process already found\n");
227
- else
228
- process = create_process(thread, filep);
768
+ } else {
769
+ process = create_process(thread);
770
+ if (IS_ERR(process))
771
+ goto out;
229772
773
+ ret = kfd_process_init_cwsr_apu(process, filep);
774
+ if (ret) {
775
+ process = ERR_PTR(ret);
776
+ goto out;
777
+ }
778
+
779
+ if (!procfs.kobj)
780
+ goto out;
781
+
782
+ process->kobj = kfd_alloc_struct(process->kobj);
783
+ if (!process->kobj) {
784
+ pr_warn("Creating procfs kobject failed");
785
+ goto out;
786
+ }
787
+ ret = kobject_init_and_add(process->kobj, &procfs_type,
788
+ procfs.kobj, "%d",
789
+ (int)process->lead_thread->pid);
790
+ if (ret) {
791
+ pr_warn("Creating procfs pid directory failed");
792
+ kobject_put(process->kobj);
793
+ goto out;
794
+ }
795
+
796
+ process->attr_pasid.name = "pasid";
797
+ process->attr_pasid.mode = KFD_SYSFS_FILE_MODE;
798
+ sysfs_attr_init(&process->attr_pasid);
799
+ ret = sysfs_create_file(process->kobj, &process->attr_pasid);
800
+ if (ret)
801
+ pr_warn("Creating pasid for pid %d failed",
802
+ (int)process->lead_thread->pid);
803
+
804
+ process->kobj_queues = kobject_create_and_add("queues",
805
+ process->kobj);
806
+ if (!process->kobj_queues)
807
+ pr_warn("Creating KFD proc/queues folder failed");
808
+
809
+ ret = kfd_procfs_add_sysfs_stats(process);
810
+ if (ret)
811
+ pr_warn("Creating sysfs stats dir for pid %d failed",
812
+ (int)process->lead_thread->pid);
813
+
814
+ ret = kfd_procfs_add_sysfs_files(process);
815
+ if (ret)
816
+ pr_warn("Creating sysfs usage file for pid %d failed",
817
+ (int)process->lead_thread->pid);
818
+ }
819
+out:
820
+ if (!IS_ERR(process))
821
+ kref_get(&process->ref);
230822 mutex_unlock(&kfd_processes_mutex);
231823
232824 return process;
....@@ -296,11 +888,11 @@
296888 per_device_list) {
297889 if (!peer_pdd->vm)
298890 continue;
299
- peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu(
891
+ amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
300892 peer_pdd->dev->kgd, mem, peer_pdd->vm);
301893 }
302894
303
- pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd, mem);
895
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL);
304896 kfd_process_device_remove_obj_handle(pdd, id);
305897 }
306898 }
....@@ -319,13 +911,16 @@
319911
320912 list_for_each_entry_safe(pdd, temp, &p->per_device_data,
321913 per_device_list) {
322
- pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
914
+ pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
323915 pdd->dev->id, p->pasid);
324916
325
- if (pdd->drm_file)
917
+ if (pdd->drm_file) {
918
+ amdgpu_amdkfd_gpuvm_release_process_vm(
919
+ pdd->dev->kgd, pdd->vm);
326920 fput(pdd->drm_file);
921
+ }
327922 else if (pdd->vm)
328
- pdd->dev->kfd2kgd->destroy_process_vm(
923
+ amdgpu_amdkfd_gpuvm_destroy_process_vm(
329924 pdd->dev->kgd, pdd->vm);
330925
331926 list_del(&pdd->per_device_list);
....@@ -336,6 +931,18 @@
336931
337932 kfree(pdd->qpd.doorbell_bitmap);
338933 idr_destroy(&pdd->alloc_idr);
934
+
935
+ kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
936
+
937
+ /*
938
+ * before destroying pdd, make sure to report availability
939
+ * for auto suspend
940
+ */
941
+ if (pdd->runtime_inuse) {
942
+ pm_runtime_mark_last_busy(pdd->dev->ddev->dev);
943
+ pm_runtime_put_autosuspend(pdd->dev->ddev->dev);
944
+ pdd->runtime_inuse = false;
945
+ }
339946
340947 kfree(pdd);
341948 }
....@@ -350,6 +957,32 @@
350957 {
351958 struct kfd_process *p = container_of(work, struct kfd_process,
352959 release_work);
960
+ struct kfd_process_device *pdd;
961
+
962
+ /* Remove the procfs files */
963
+ if (p->kobj) {
964
+ sysfs_remove_file(p->kobj, &p->attr_pasid);
965
+ kobject_del(p->kobj_queues);
966
+ kobject_put(p->kobj_queues);
967
+ p->kobj_queues = NULL;
968
+
969
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
970
+ sysfs_remove_file(p->kobj, &pdd->attr_vram);
971
+ sysfs_remove_file(p->kobj, &pdd->attr_sdma);
972
+
973
+ sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict);
974
+ if (pdd->dev->kfd2kgd->get_cu_occupancy)
975
+ sysfs_remove_file(pdd->kobj_stats,
976
+ &pdd->attr_cu_occupancy);
977
+ kobject_del(pdd->kobj_stats);
978
+ kobject_put(pdd->kobj_stats);
979
+ pdd->kobj_stats = NULL;
980
+ }
981
+
982
+ kobject_del(p->kobj);
983
+ kobject_put(p->kobj);
984
+ p->kobj = NULL;
985
+ }
353986
354987 kfd_iommu_unbind_process(p);
355988
....@@ -361,8 +994,6 @@
361994 kfd_event_free_process(p);
362995
363996 kfd_pasid_free(p->pasid);
364
- kfd_free_process_doorbells(p);
365
-
366997 mutex_destroy(&p->mutex);
367998
368999 put_task_struct(p->lead_thread);
....@@ -378,11 +1009,9 @@
3781009 queue_work(kfd_process_wq, &p->release_work);
3791010 }
3801011
381
-static void kfd_process_destroy_delayed(struct rcu_head *rcu)
1012
+static void kfd_process_free_notifier(struct mmu_notifier *mn)
3821013 {
383
- struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
384
-
385
- kfd_unref_process(p);
1014
+ kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
3861015 }
3871016
3881017 static void kfd_process_notifier_release(struct mmu_notifier *mn,
....@@ -431,15 +1060,20 @@
4311060
4321061 /* Indicate to other users that MM is no longer valid */
4331062 p->mm = NULL;
1063
+ /* Signal the eviction fence after user mode queues are
1064
+ * destroyed. This allows any BOs to be freed without
1065
+ * triggering pointless evictions or waiting for fences.
1066
+ */
1067
+ dma_fence_signal(p->ef);
4341068
4351069 mutex_unlock(&p->mutex);
4361070
437
- mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
438
- mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
1071
+ mmu_notifier_put(&p->mmu_notifier);
4391072 }
4401073
4411074 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
4421075 .release = kfd_process_notifier_release,
1076
+ .free_notifier = kfd_process_free_notifier,
4431077 };
4441078
4451079 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
....@@ -454,8 +1088,7 @@
4541088 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
4551089 continue;
4561090
457
- offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
458
- << PAGE_SHIFT;
1091
+ offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
4591092 qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
4601093 KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
4611094 MAP_SHARED, offset);
....@@ -483,8 +1116,9 @@
4831116 {
4841117 struct kfd_dev *dev = pdd->dev;
4851118 struct qcm_process_device *qpd = &pdd->qpd;
486
- uint32_t flags = ALLOC_MEM_FLAGS_GTT |
487
- ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE;
1119
+ uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
1120
+ | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
1121
+ | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
4881122 void *kaddr;
4891123 int ret;
4901124
....@@ -509,81 +1143,64 @@
5091143 return 0;
5101144 }
5111145
512
-static struct kfd_process *create_process(const struct task_struct *thread,
513
- struct file *filep)
1146
+/*
1147
+ * On return the kfd_process is fully operational and will be freed when the
1148
+ * mm is released
1149
+ */
1150
+static struct kfd_process *create_process(const struct task_struct *thread)
5141151 {
5151152 struct kfd_process *process;
5161153 int err = -ENOMEM;
5171154
5181155 process = kzalloc(sizeof(*process), GFP_KERNEL);
519
-
5201156 if (!process)
5211157 goto err_alloc_process;
1158
+
1159
+ kref_init(&process->ref);
1160
+ mutex_init(&process->mutex);
1161
+ process->mm = thread->mm;
1162
+ process->lead_thread = thread->group_leader;
1163
+ INIT_LIST_HEAD(&process->per_device_data);
1164
+ INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
1165
+ INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
1166
+ process->last_restore_timestamp = get_jiffies_64();
1167
+ kfd_event_init_process(process);
1168
+ process->is_32bit_user_mode = in_compat_syscall();
5221169
5231170 process->pasid = kfd_pasid_alloc();
5241171 if (process->pasid == 0)
5251172 goto err_alloc_pasid;
526
-
527
- if (kfd_alloc_process_doorbells(process) < 0)
528
- goto err_alloc_doorbells;
529
-
530
- kref_init(&process->ref);
531
-
532
- mutex_init(&process->mutex);
533
-
534
- process->mm = thread->mm;
535
-
536
- /* register notifier */
537
- process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
538
- err = mmu_notifier_register(&process->mmu_notifier, process->mm);
539
- if (err)
540
- goto err_mmu_notifier;
541
-
542
- hash_add_rcu(kfd_processes_table, &process->kfd_processes,
543
- (uintptr_t)process->mm);
544
-
545
- process->lead_thread = thread->group_leader;
546
- get_task_struct(process->lead_thread);
547
-
548
- INIT_LIST_HEAD(&process->per_device_data);
549
-
550
- kfd_event_init_process(process);
5511173
5521174 err = pqm_init(&process->pqm, process);
5531175 if (err != 0)
5541176 goto err_process_pqm_init;
5551177
5561178 /* init process apertures*/
557
- process->is_32bit_user_mode = in_compat_syscall();
5581179 err = kfd_init_apertures(process);
5591180 if (err != 0)
5601181 goto err_init_apertures;
5611182
562
- INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
563
- INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
564
- process->last_restore_timestamp = get_jiffies_64();
565
-
566
- err = kfd_process_init_cwsr_apu(process, filep);
1183
+ /* Must be last, have to use release destruction after this */
1184
+ process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
1185
+ err = mmu_notifier_register(&process->mmu_notifier, process->mm);
5671186 if (err)
568
- goto err_init_cwsr;
1187
+ goto err_register_notifier;
1188
+
1189
+ get_task_struct(process->lead_thread);
1190
+ hash_add_rcu(kfd_processes_table, &process->kfd_processes,
1191
+ (uintptr_t)process->mm);
5691192
5701193 return process;
5711194
572
-err_init_cwsr:
1195
+err_register_notifier:
5731196 kfd_process_free_outstanding_kfd_bos(process);
5741197 kfd_process_destroy_pdds(process);
5751198 err_init_apertures:
5761199 pqm_uninit(&process->pqm);
5771200 err_process_pqm_init:
578
- hash_del_rcu(&process->kfd_processes);
579
- synchronize_rcu();
580
- mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
581
-err_mmu_notifier:
582
- mutex_destroy(&process->mutex);
583
- kfd_free_process_doorbells(process);
584
-err_alloc_doorbells:
5851201 kfd_pasid_free(process->pasid);
5861202 err_alloc_pasid:
1203
+ mutex_destroy(&process->mutex);
5871204 kfree(process);
5881205 err_alloc_process:
5891206 return ERR_PTR(err);
....@@ -593,6 +1210,8 @@
5931210 struct kfd_dev *dev)
5941211 {
5951212 unsigned int i;
1213
+ int range_start = dev->shared_resources.non_cp_doorbells_start;
1214
+ int range_end = dev->shared_resources.non_cp_doorbells_end;
5961215
5971216 if (!KFD_IS_SOC15(dev->device_info->asic_family))
5981217 return 0;
....@@ -603,13 +1222,19 @@
6031222 if (!qpd->doorbell_bitmap)
6041223 return -ENOMEM;
6051224
606
- /* Mask out any reserved doorbells */
607
- for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++)
608
- if ((dev->shared_resources.reserved_doorbell_mask & i) ==
609
- dev->shared_resources.reserved_doorbell_val) {
1225
+ /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
1226
+ pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
1227
+ pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
1228
+ range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
1229
+ range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
1230
+
1231
+ for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
1232
+ if (i >= range_start && i <= range_end) {
6101233 set_bit(i, qpd->doorbell_bitmap);
611
- pr_debug("reserved doorbell 0x%03x\n", i);
1234
+ set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
1235
+ qpd->doorbell_bitmap);
6121236 }
1237
+ }
6131238
6141239 return 0;
6151240 }
....@@ -635,10 +1260,14 @@
6351260 if (!pdd)
6361261 return NULL;
6371262
1263
+ if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
1264
+ pr_err("Failed to alloc doorbell for pdd\n");
1265
+ goto err_free_pdd;
1266
+ }
1267
+
6381268 if (init_doorbell_bitmap(&pdd->qpd, dev)) {
6391269 pr_err("Failed to init doorbell for process\n");
640
- kfree(pdd);
641
- return NULL;
1270
+ goto err_free_pdd;
6421271 }
6431272
6441273 pdd->dev = dev;
....@@ -647,15 +1276,24 @@
6471276 pdd->qpd.dqm = dev->dqm;
6481277 pdd->qpd.pqm = &p->pqm;
6491278 pdd->qpd.evicted = 0;
1279
+ pdd->qpd.mapped_gws_queue = false;
6501280 pdd->process = p;
6511281 pdd->bound = PDD_UNBOUND;
6521282 pdd->already_dequeued = false;
1283
+ pdd->runtime_inuse = false;
1284
+ pdd->vram_usage = 0;
1285
+ pdd->sdma_past_activity_counter = 0;
1286
+ atomic64_set(&pdd->evict_duration_counter, 0);
6531287 list_add(&pdd->per_device_list, &p->per_device_data);
6541288
6551289 /* Init idr used for memory handle translation */
6561290 idr_init(&pdd->alloc_idr);
6571291
6581292 return pdd;
1293
+
1294
+err_free_pdd:
1295
+ kfree(pdd);
1296
+ return NULL;
6591297 }
6601298
6611299 /**
....@@ -686,16 +1324,18 @@
6861324 dev = pdd->dev;
6871325
6881326 if (drm_file)
689
- ret = dev->kfd2kgd->acquire_process_vm(
690
- dev->kgd, drm_file,
1327
+ ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
1328
+ dev->kgd, drm_file, p->pasid,
6911329 &pdd->vm, &p->kgd_process_info, &p->ef);
6921330 else
693
- ret = dev->kfd2kgd->create_process_vm(
694
- dev->kgd, &pdd->vm, &p->kgd_process_info, &p->ef);
1331
+ ret = amdgpu_amdkfd_gpuvm_create_process_vm(dev->kgd, p->pasid,
1332
+ &pdd->vm, &p->kgd_process_info, &p->ef);
6951333 if (ret) {
6961334 pr_err("Failed to create process VM object\n");
6971335 return ret;
6981336 }
1337
+
1338
+ amdgpu_vm_set_task_info(pdd->vm);
6991339
7001340 ret = kfd_process_device_reserve_ib_mem(pdd);
7011341 if (ret)
....@@ -712,7 +1352,7 @@
7121352 err_reserve_ib_mem:
7131353 kfd_process_device_free_bos(pdd);
7141354 if (!drm_file)
715
- dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);
1355
+ amdgpu_amdkfd_gpuvm_destroy_process_vm(dev->kgd, pdd->vm);
7161356 pdd->vm = NULL;
7171357
7181358 return ret;
....@@ -737,15 +1377,43 @@
7371377 return ERR_PTR(-ENOMEM);
7381378 }
7391379
1380
+ /*
1381
+ * signal runtime-pm system to auto resume and prevent
1382
+ * further runtime suspend once device pdd is created until
1383
+ * pdd is destroyed.
1384
+ */
1385
+ if (!pdd->runtime_inuse) {
1386
+ err = pm_runtime_get_sync(dev->ddev->dev);
1387
+ if (err < 0) {
1388
+ pm_runtime_put_autosuspend(dev->ddev->dev);
1389
+ return ERR_PTR(err);
1390
+ }
1391
+ }
1392
+
7401393 err = kfd_iommu_bind_process_to_device(pdd);
7411394 if (err)
742
- return ERR_PTR(err);
1395
+ goto out;
7431396
7441397 err = kfd_process_device_init_vm(pdd, NULL);
7451398 if (err)
746
- return ERR_PTR(err);
1399
+ goto out;
1400
+
1401
+ /*
1402
+ * make sure that runtime_usage counter is incremented just once
1403
+ * per pdd
1404
+ */
1405
+ pdd->runtime_inuse = true;
7471406
7481407 return pdd;
1408
+
1409
+out:
1410
+ /* balance runpm reference count and exit with error */
1411
+ if (!pdd->runtime_inuse) {
1412
+ pm_runtime_mark_last_busy(dev->ddev->dev);
1413
+ pm_runtime_put_autosuspend(dev->ddev->dev);
1414
+ }
1415
+
1416
+ return ERR_PTR(err);
7491417 }
7501418
7511419 struct kfd_process_device *kfd_get_first_process_device_data(
....@@ -802,7 +1470,7 @@
8021470 }
8031471
8041472 /* This increments the process->ref counter. */
805
-struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
1473
+struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
8061474 {
8071475 struct kfd_process *p, *ret_p = NULL;
8081476 unsigned int temp;
....@@ -838,7 +1506,7 @@
8381506 return p;
8391507 }
8401508
841
-/* process_evict_queues - Evict all user queues of a process
1509
+/* kfd_process_evict_queues - Evict all user queues of a process
8421510 *
8431511 * Eviction is reference-counted per process-device. This means multiple
8441512 * evictions from different sources can be nested safely.
....@@ -878,7 +1546,7 @@
8781546 return r;
8791547 }
8801548
881
-/* process_restore_queues - Restore all user queues of a process */
1549
+/* kfd_process_restore_queues - Restore all user queues of a process */
8821550 int kfd_process_restore_queues(struct kfd_process *p)
8831551 {
8841552 struct kfd_process_device *pdd;
....@@ -920,7 +1588,7 @@
9201588 */
9211589 flush_delayed_work(&p->restore_work);
9221590
923
- pr_debug("Started evicting pasid %d\n", p->pasid);
1591
+ pr_debug("Started evicting pasid 0x%x\n", p->pasid);
9241592 ret = kfd_process_evict_queues(p);
9251593 if (!ret) {
9261594 dma_fence_signal(p->ef);
....@@ -929,16 +1597,15 @@
9291597 queue_delayed_work(kfd_restore_wq, &p->restore_work,
9301598 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
9311599
932
- pr_debug("Finished evicting pasid %d\n", p->pasid);
1600
+ pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
9331601 } else
934
- pr_err("Failed to evict queues of pasid %d\n", p->pasid);
1602
+ pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
9351603 }
9361604
9371605 static void restore_process_worker(struct work_struct *work)
9381606 {
9391607 struct delayed_work *dwork;
9401608 struct kfd_process *p;
941
- struct kfd_process_device *pdd;
9421609 int ret = 0;
9431610
9441611 dwork = to_delayed_work(work);
....@@ -947,17 +1614,7 @@
9471614 * lifetime of this thread, kfd_process p will be valid
9481615 */
9491616 p = container_of(dwork, struct kfd_process, restore_work);
950
-
951
- /* Call restore_process_bos on the first KGD device. This function
952
- * takes care of restoring the whole process including other devices.
953
- * Restore can fail if enough memory is not available. If so,
954
- * reschedule again.
955
- */
956
- pdd = list_first_entry(&p->per_device_data,
957
- struct kfd_process_device,
958
- per_device_list);
959
-
960
- pr_debug("Started restoring pasid %d\n", p->pasid);
1617
+ pr_debug("Started restoring pasid 0x%x\n", p->pasid);
9611618
9621619 /* Setting last_restore_timestamp before successful restoration.
9631620 * Otherwise this would have to be set by KGD (restore_process_bos)
....@@ -970,10 +1627,10 @@
9701627 */
9711628
9721629 p->last_restore_timestamp = get_jiffies_64();
973
- ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info,
1630
+ ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
9741631 &p->ef);
9751632 if (ret) {
976
- pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
1633
+ pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
9771634 p->pasid, PROCESS_BACK_OFF_TIME_MS);
9781635 ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
9791636 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
....@@ -983,9 +1640,9 @@
9831640
9841641 ret = kfd_process_restore_queues(p);
9851642 if (!ret)
986
- pr_debug("Finished restoring pasid %d\n", p->pasid);
1643
+ pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
9871644 else
988
- pr_err("Failed to restore queues of pasid %d\n", p->pasid);
1645
+ pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
9891646 }
9901647
9911648 void kfd_suspend_all_processes(void)
....@@ -994,12 +1651,13 @@
9941651 unsigned int temp;
9951652 int idx = srcu_read_lock(&kfd_processes_srcu);
9961653
1654
+ WARN(debug_evictions, "Evicting all processes");
9971655 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
9981656 cancel_delayed_work_sync(&p->eviction_work);
9991657 cancel_delayed_work_sync(&p->restore_work);
10001658
10011659 if (kfd_process_evict_queues(p))
1002
- pr_err("Failed to suspend process %d\n", p->pasid);
1660
+ pr_err("Failed to suspend process 0x%x\n", p->pasid);
10031661 dma_fence_signal(p->ef);
10041662 dma_fence_put(p->ef);
10051663 p->ef = NULL;
....@@ -1058,16 +1716,17 @@
10581716 void kfd_flush_tlb(struct kfd_process_device *pdd)
10591717 {
10601718 struct kfd_dev *dev = pdd->dev;
1061
- const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
10621719
10631720 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
10641721 /* Nothing to flush until a VMID is assigned, which
10651722 * only happens when the first queue is created.
10661723 */
10671724 if (pdd->qpd.vmid)
1068
- f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
1725
+ amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
1726
+ pdd->qpd.vmid);
10691727 } else {
1070
- f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
1728
+ amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
1729
+ pdd->process->pasid);
10711730 }
10721731 }
10731732
....@@ -1082,7 +1741,7 @@
10821741 int idx = srcu_read_lock(&kfd_processes_srcu);
10831742
10841743 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1085
- seq_printf(m, "Process %d PASID %d:\n",
1744
+ seq_printf(m, "Process %d PASID 0x%x:\n",
10861745 p->lead_thread->tgid, p->pasid);
10871746
10881747 mutex_lock(&p->mutex);
....@@ -1099,3 +1758,4 @@
10991758 }
11001759
11011760 #endif
1761
+