hc
2024-05-10 cde9070d9970eef1f7ec2360586c802a16230ad8
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
....@@ -24,53 +24,49 @@
2424 * Authors:
2525 * Jerome Glisse <glisse@freedesktop.org>
2626 */
27
+
28
+#include <linux/file.h>
2729 #include <linux/pagemap.h>
2830 #include <linux/sync_file.h>
29
-#include <drm/drmP.h>
31
+#include <linux/dma-buf.h>
32
+
3033 #include <drm/amdgpu_drm.h>
3134 #include <drm/drm_syncobj.h>
3235 #include "amdgpu.h"
3336 #include "amdgpu_trace.h"
3437 #include "amdgpu_gmc.h"
38
+#include "amdgpu_gem.h"
39
+#include "amdgpu_ras.h"
3540
3641 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
3742 struct drm_amdgpu_cs_chunk_fence *data,
3843 uint32_t *offset)
3944 {
4045 struct drm_gem_object *gobj;
46
+ struct amdgpu_bo *bo;
4147 unsigned long size;
42
- int r;
4348
4449 gobj = drm_gem_object_lookup(p->filp, data->handle);
4550 if (gobj == NULL)
4651 return -EINVAL;
4752
48
- p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
53
+ bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
4954 p->uf_entry.priority = 0;
50
- p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
51
- p->uf_entry.tv.shared = true;
52
- p->uf_entry.user_pages = NULL;
55
+ p->uf_entry.tv.bo = &bo->tbo;
56
+ /* One for TTM and one for the CS job */
57
+ p->uf_entry.tv.num_shared = 2;
5358
54
- drm_gem_object_put_unlocked(gobj);
59
+ drm_gem_object_put(gobj);
5560
56
- size = amdgpu_bo_size(p->uf_entry.robj);
57
- if (size != PAGE_SIZE || (data->offset + 8) > size) {
58
- r = -EINVAL;
59
- goto error_unref;
60
- }
61
+ size = amdgpu_bo_size(bo);
62
+ if (size != PAGE_SIZE || data->offset > (size - 8))
63
+ return -EINVAL;
6164
62
- if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
63
- r = -EINVAL;
64
- goto error_unref;
65
- }
65
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
66
+ return -EINVAL;
6667
6768 *offset = data->offset;
68
-
6969 return 0;
70
-
71
-error_unref:
72
- amdgpu_bo_unref(&p->uf_entry.robj);
73
- return r;
7470 }
7571
7672 static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
....@@ -110,7 +106,7 @@
110106 int ret;
111107
112108 if (cs->in.num_chunks == 0)
113
- return 0;
109
+ return -EINVAL;
114110
115111 chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
116112 if (!chunk_array)
....@@ -211,6 +207,9 @@
211207 case AMDGPU_CHUNK_ID_DEPENDENCIES:
212208 case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
213209 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
210
+ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
211
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
212
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
214213 break;
215214
216215 default:
....@@ -228,7 +227,7 @@
228227 goto free_all_kdata;
229228 }
230229
231
- if (p->uf_entry.robj)
230
+ if (p->uf_entry.tv.bo)
232231 p->job->uf_addr = uf_offset;
233232 kfree(chunk_array);
234233
....@@ -290,7 +289,7 @@
290289 {
291290 s64 time_us, increment_us;
292291 u64 free_vram, total_vram, used_vram;
293
-
292
+ struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
294293 /* Allow a maximum of 200 accumulated ms. This is basically per-IB
295294 * throttling.
296295 *
....@@ -307,7 +306,7 @@
307306 }
308307
309308 total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
310
- used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
309
+ used_vram = amdgpu_vram_mgr_usage(vram_man);
311310 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
312311
313312 spin_lock(&adev->mm_stats.lock);
....@@ -354,7 +353,7 @@
354353 if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
355354 u64 total_vis_vram = adev->gmc.visible_vram_size;
356355 u64 used_vis_vram =
357
- amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
356
+ amdgpu_vram_mgr_vis_usage(vram_man);
358357
359358 if (used_vis_vram < total_vis_vram) {
360359 u64 free_vis_vram = total_vis_vram - used_vis_vram;
....@@ -395,7 +394,7 @@
395394 struct ttm_operation_ctx ctx = {
396395 .interruptible = true,
397396 .no_wait_gpu = false,
398
- .resv = bo->tbo.resv,
397
+ .resv = bo->tbo.base.resv,
399398 .flags = 0
400399 };
401400 uint32_t domain;
....@@ -407,7 +406,9 @@
407406 /* Don't move this buffer if we have depleted our allowance
408407 * to move it. Don't move anything if the threshold is zero.
409408 */
410
- if (p->bytes_moved < p->bytes_moved_threshold) {
409
+ if (p->bytes_moved < p->bytes_moved_threshold &&
410
+ (!bo->tbo.base.dma_buf ||
411
+ list_empty(&bo->tbo.base.dma_buf->attachments))) {
411412 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
412413 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
413414 /* And don't move a CPU_ACCESS_REQUIRED BO to limited
....@@ -442,75 +443,12 @@
442443 return r;
443444 }
444445
445
-/* Last resort, try to evict something from the current working set */
446
-static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
447
- struct amdgpu_bo *validated)
448
-{
449
- uint32_t domain = validated->allowed_domains;
450
- struct ttm_operation_ctx ctx = { true, false };
451
- int r;
452
-
453
- if (!p->evictable)
454
- return false;
455
-
456
- for (;&p->evictable->tv.head != &p->validated;
457
- p->evictable = list_prev_entry(p->evictable, tv.head)) {
458
-
459
- struct amdgpu_bo_list_entry *candidate = p->evictable;
460
- struct amdgpu_bo *bo = candidate->robj;
461
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
462
- bool update_bytes_moved_vis;
463
- uint32_t other;
464
-
465
- /* If we reached our current BO we can forget it */
466
- if (candidate->robj == validated)
467
- break;
468
-
469
- /* We can't move pinned BOs here */
470
- if (bo->pin_count)
471
- continue;
472
-
473
- other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
474
-
475
- /* Check if this BO is in one of the domains we need space for */
476
- if (!(other & domain))
477
- continue;
478
-
479
- /* Check if we can move this BO somewhere else */
480
- other = bo->allowed_domains & ~domain;
481
- if (!other)
482
- continue;
483
-
484
- /* Good we can try to move this BO somewhere else */
485
- update_bytes_moved_vis =
486
- !amdgpu_gmc_vram_full_visible(&adev->gmc) &&
487
- amdgpu_bo_in_cpu_visible_vram(bo);
488
- amdgpu_bo_placement_from_domain(bo, other);
489
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
490
- p->bytes_moved += ctx.bytes_moved;
491
- if (update_bytes_moved_vis)
492
- p->bytes_moved_vis += ctx.bytes_moved;
493
-
494
- if (unlikely(r))
495
- break;
496
-
497
- p->evictable = list_prev_entry(p->evictable, tv.head);
498
- list_move(&candidate->tv.head, &p->validated);
499
-
500
- return true;
501
- }
502
-
503
- return false;
504
-}
505
-
506446 static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
507447 {
508448 struct amdgpu_cs_parser *p = param;
509449 int r;
510450
511
- do {
512
- r = amdgpu_cs_bo_validate(p, bo);
513
- } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
451
+ r = amdgpu_cs_bo_validate(p, bo);
514452 if (r)
515453 return r;
516454
....@@ -528,38 +466,31 @@
528466 int r;
529467
530468 list_for_each_entry(lobj, validated, tv.head) {
531
- struct amdgpu_bo *bo = lobj->robj;
532
- bool binding_userptr = false;
469
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
533470 struct mm_struct *usermm;
534471
535472 usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
536473 if (usermm && usermm != current->mm)
537474 return -EPERM;
538475
539
- /* Check if we have user pages and nobody bound the BO already */
540
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
541
- lobj->user_pages) {
476
+ if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
477
+ lobj->user_invalidated && lobj->user_pages) {
542478 amdgpu_bo_placement_from_domain(bo,
543479 AMDGPU_GEM_DOMAIN_CPU);
544480 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
545481 if (r)
546482 return r;
483
+
547484 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
548485 lobj->user_pages);
549
- binding_userptr = true;
550486 }
551
-
552
- if (p->evictable == lobj)
553
- p->evictable = NULL;
554487
555488 r = amdgpu_cs_validate(p, bo);
556489 if (r)
557490 return r;
558491
559
- if (binding_userptr) {
560
- kvfree(lobj->user_pages);
561
- lobj->user_pages = NULL;
562
- }
492
+ kvfree(lobj->user_pages);
493
+ lobj->user_pages = NULL;
563494 }
564495 return 0;
565496 }
....@@ -574,7 +505,6 @@
574505 struct amdgpu_bo *gds;
575506 struct amdgpu_bo *gws;
576507 struct amdgpu_bo *oa;
577
- unsigned tries = 10;
578508 int r;
579509
580510 INIT_LIST_HEAD(&p->validated);
....@@ -596,98 +526,63 @@
596526 return r;
597527 }
598528
529
+ /* One for TTM and one for the CS job */
530
+ amdgpu_bo_list_for_each_entry(e, p->bo_list)
531
+ e->tv.num_shared = 2;
532
+
599533 amdgpu_bo_list_get_list(p->bo_list, &p->validated);
600
- if (p->bo_list->first_userptr != p->bo_list->num_entries)
601
- p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
602534
603535 INIT_LIST_HEAD(&duplicates);
604536 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
605537
606
- if (p->uf_entry.robj && !p->uf_entry.robj->parent)
538
+ if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
607539 list_add(&p->uf_entry.tv.head, &p->validated);
608540
609
- while (1) {
610
- struct list_head need_pages;
541
+ /* Get userptr backing pages. If pages are updated after registered
542
+ * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
543
+ * amdgpu_ttm_backend_bind() to flush and invalidate new pages
544
+ */
545
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
546
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
547
+ bool userpage_invalidated = false;
548
+ int i;
611549
612
- r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
613
- &duplicates);
614
- if (unlikely(r != 0)) {
615
- if (r != -ERESTARTSYS)
616
- DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
617
- goto error_free_pages;
550
+ e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
551
+ sizeof(struct page *),
552
+ GFP_KERNEL | __GFP_ZERO);
553
+ if (!e->user_pages) {
554
+ DRM_ERROR("calloc failure\n");
555
+ return -ENOMEM;
618556 }
619557
620
- INIT_LIST_HEAD(&need_pages);
621
- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
622
- struct amdgpu_bo *bo = e->robj;
623
-
624
- if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
625
- &e->user_invalidated) && e->user_pages) {
626
-
627
- /* We acquired a page array, but somebody
628
- * invalidated it. Free it and try again
629
- */
630
- release_pages(e->user_pages,
631
- bo->tbo.ttm->num_pages);
632
- kvfree(e->user_pages);
633
- e->user_pages = NULL;
634
- }
635
-
636
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
637
- !e->user_pages) {
638
- list_del(&e->tv.head);
639
- list_add(&e->tv.head, &need_pages);
640
-
641
- amdgpu_bo_unreserve(e->robj);
642
- }
558
+ r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
559
+ if (r) {
560
+ kvfree(e->user_pages);
561
+ e->user_pages = NULL;
562
+ return r;
643563 }
644564
645
- if (list_empty(&need_pages))
646
- break;
647
-
648
- /* Unreserve everything again. */
649
- ttm_eu_backoff_reservation(&p->ticket, &p->validated);
650
-
651
- /* We tried too many times, just abort */
652
- if (!--tries) {
653
- r = -EDEADLK;
654
- DRM_ERROR("deadlock in %s\n", __func__);
655
- goto error_free_pages;
656
- }
657
-
658
- /* Fill the page arrays for all userptrs. */
659
- list_for_each_entry(e, &need_pages, tv.head) {
660
- struct ttm_tt *ttm = e->robj->tbo.ttm;
661
-
662
- e->user_pages = kvmalloc_array(ttm->num_pages,
663
- sizeof(struct page*),
664
- GFP_KERNEL | __GFP_ZERO);
665
- if (!e->user_pages) {
666
- r = -ENOMEM;
667
- DRM_ERROR("calloc failure in %s\n", __func__);
668
- goto error_free_pages;
669
- }
670
-
671
- r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
672
- if (r) {
673
- DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
674
- kvfree(e->user_pages);
675
- e->user_pages = NULL;
676
- goto error_free_pages;
565
+ for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
566
+ if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
567
+ userpage_invalidated = true;
568
+ break;
677569 }
678570 }
571
+ e->user_invalidated = userpage_invalidated;
572
+ }
679573
680
- /* And try again. */
681
- list_splice(&need_pages, &p->validated);
574
+ r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
575
+ &duplicates);
576
+ if (unlikely(r != 0)) {
577
+ if (r != -ERESTARTSYS)
578
+ DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
579
+ goto out;
682580 }
683581
684582 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
685583 &p->bytes_moved_vis_threshold);
686584 p->bytes_moved = 0;
687585 p->bytes_moved_vis = 0;
688
- p->evictable = list_last_entry(&p->validated,
689
- struct amdgpu_bo_list_entry,
690
- tv.head);
691586
692587 r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
693588 amdgpu_cs_validate, p);
....@@ -697,16 +592,12 @@
697592 }
698593
699594 r = amdgpu_cs_list_validate(p, &duplicates);
700
- if (r) {
701
- DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n");
595
+ if (r)
702596 goto error_validate;
703
- }
704597
705598 r = amdgpu_cs_list_validate(p, &p->validated);
706
- if (r) {
707
- DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n");
599
+ if (r)
708600 goto error_validate;
709
- }
710601
711602 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
712603 p->bytes_moved_vis);
....@@ -715,24 +606,30 @@
715606 gws = p->bo_list->gws_obj;
716607 oa = p->bo_list->oa_obj;
717608
718
- amdgpu_bo_list_for_each_entry(e, p->bo_list)
719
- e->bo_va = amdgpu_vm_bo_find(vm, e->robj);
609
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
610
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
611
+
612
+ /* Make sure we use the exclusive slot for shared BOs */
613
+ if (bo->prime_shared_count)
614
+ e->tv.num_shared = 0;
615
+ e->bo_va = amdgpu_vm_bo_find(vm, bo);
616
+ }
720617
721618 if (gds) {
722
- p->job->gds_base = amdgpu_bo_gpu_offset(gds);
723
- p->job->gds_size = amdgpu_bo_size(gds);
619
+ p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
620
+ p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
724621 }
725622 if (gws) {
726
- p->job->gws_base = amdgpu_bo_gpu_offset(gws);
727
- p->job->gws_size = amdgpu_bo_size(gws);
623
+ p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
624
+ p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
728625 }
729626 if (oa) {
730
- p->job->oa_base = amdgpu_bo_gpu_offset(oa);
731
- p->job->oa_size = amdgpu_bo_size(oa);
627
+ p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
628
+ p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
732629 }
733630
734
- if (!r && p->uf_entry.robj) {
735
- struct amdgpu_bo *uf = p->uf_entry.robj;
631
+ if (!r && p->uf_entry.tv.bo) {
632
+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
736633
737634 r = amdgpu_ttm_alloc_gart(&uf->tbo);
738635 p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
....@@ -741,31 +638,25 @@
741638 error_validate:
742639 if (r)
743640 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
744
-
745
-error_free_pages:
746
-
747
- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
748
- if (!e->user_pages)
749
- continue;
750
-
751
- release_pages(e->user_pages,
752
- e->robj->tbo.ttm->num_pages);
753
- kvfree(e->user_pages);
754
- }
755
-
641
+out:
756642 return r;
757643 }
758644
759645 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
760646 {
647
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
761648 struct amdgpu_bo_list_entry *e;
762649 int r;
763650
764651 list_for_each_entry(e, &p->validated, tv.head) {
765
- struct reservation_object *resv = e->robj->tbo.resv;
766
- r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
767
- amdgpu_bo_explicit_sync(e->robj));
652
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
653
+ struct dma_resv *resv = bo->tbo.base.resv;
654
+ enum amdgpu_sync_mode sync_mode;
768655
656
+ sync_mode = amdgpu_bo_explicit_sync(bo) ?
657
+ AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
658
+ r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
659
+ &fpriv->vm);
769660 if (r)
770661 return r;
771662 }
....@@ -789,9 +680,11 @@
789680 ttm_eu_backoff_reservation(&parser->ticket,
790681 &parser->validated);
791682
792
- for (i = 0; i < parser->num_post_dep_syncobjs; i++)
793
- drm_syncobj_put(parser->post_dep_syncobjs[i]);
794
- kfree(parser->post_dep_syncobjs);
683
+ for (i = 0; i < parser->num_post_deps; i++) {
684
+ drm_syncobj_put(parser->post_deps[i].syncobj);
685
+ kfree(parser->post_deps[i].chain);
686
+ }
687
+ kfree(parser->post_deps);
795688
796689 dma_fence_put(parser->fence);
797690
....@@ -807,11 +700,16 @@
807700 kfree(parser->chunks);
808701 if (parser->job)
809702 amdgpu_job_free(parser->job);
810
- amdgpu_bo_unref(&parser->uf_entry.robj);
703
+ if (parser->uf_entry.tv.bo) {
704
+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
705
+
706
+ amdgpu_bo_unref(&uf);
707
+ }
811708 }
812709
813
-static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
710
+static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
814711 {
712
+ struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
815713 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
816714 struct amdgpu_device *adev = p->adev;
817715 struct amdgpu_vm *vm = &fpriv->vm;
....@@ -820,92 +718,8 @@
820718 struct amdgpu_bo *bo;
821719 int r;
822720
823
- r = amdgpu_vm_clear_freed(adev, vm, NULL);
824
- if (r)
825
- return r;
826
-
827
- r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
828
- if (r)
829
- return r;
830
-
831
- r = amdgpu_sync_fence(adev, &p->job->sync,
832
- fpriv->prt_va->last_pt_update, false);
833
- if (r)
834
- return r;
835
-
836
- if (amdgpu_sriov_vf(adev)) {
837
- struct dma_fence *f;
838
-
839
- bo_va = fpriv->csa_va;
840
- BUG_ON(!bo_va);
841
- r = amdgpu_vm_bo_update(adev, bo_va, false);
842
- if (r)
843
- return r;
844
-
845
- f = bo_va->last_pt_update;
846
- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
847
- if (r)
848
- return r;
849
- }
850
-
851
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
852
- struct dma_fence *f;
853
-
854
- /* ignore duplicates */
855
- bo = e->robj;
856
- if (!bo)
857
- continue;
858
-
859
- bo_va = e->bo_va;
860
- if (bo_va == NULL)
861
- continue;
862
-
863
- r = amdgpu_vm_bo_update(adev, bo_va, false);
864
- if (r)
865
- return r;
866
-
867
- f = bo_va->last_pt_update;
868
- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
869
- if (r)
870
- return r;
871
- }
872
-
873
- r = amdgpu_vm_handle_moved(adev, vm);
874
- if (r)
875
- return r;
876
-
877
- r = amdgpu_vm_update_directories(adev, vm);
878
- if (r)
879
- return r;
880
-
881
- r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
882
- if (r)
883
- return r;
884
-
885
- if (amdgpu_vm_debug) {
886
- /* Invalidate all BOs to test for userspace bugs */
887
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
888
- /* ignore duplicates */
889
- if (!e->robj)
890
- continue;
891
-
892
- amdgpu_vm_bo_invalidate(adev, e->robj, false);
893
- }
894
- }
895
-
896
- return r;
897
-}
898
-
899
-static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
900
- struct amdgpu_cs_parser *p)
901
-{
902
- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
903
- struct amdgpu_vm *vm = &fpriv->vm;
904
- struct amdgpu_ring *ring = p->ring;
905
- int r;
906
-
907721 /* Only for UVD/VCE VM emulation */
908
- if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) {
722
+ if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
909723 unsigned i, j;
910724
911725 for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
....@@ -924,7 +738,7 @@
924738 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
925739 continue;
926740
927
- va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK;
741
+ va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
928742 r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
929743 if (r) {
930744 DRM_ERROR("IB va_start is invalid\n");
....@@ -946,7 +760,7 @@
946760 offset = m->start * AMDGPU_GPU_PAGE_SIZE;
947761 kptr += va_start - offset;
948762
949
- if (p->ring->funcs->parse_cs) {
763
+ if (ring->funcs->parse_cs) {
950764 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
951765 amdgpu_bo_kunmap(aobj);
952766
....@@ -965,16 +779,78 @@
965779 }
966780 }
967781
968
- if (p->job->vm) {
969
- p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
782
+ if (!p->job->vm)
783
+ return amdgpu_cs_sync_rings(p);
970784
971
- r = amdgpu_bo_vm_update_pte(p);
785
+
786
+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
787
+ if (r)
788
+ return r;
789
+
790
+ r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
791
+ if (r)
792
+ return r;
793
+
794
+ r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
795
+ if (r)
796
+ return r;
797
+
798
+ if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
799
+ bo_va = fpriv->csa_va;
800
+ BUG_ON(!bo_va);
801
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
972802 if (r)
973803 return r;
974804
975
- r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
805
+ r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
976806 if (r)
977807 return r;
808
+ }
809
+
810
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
811
+ /* ignore duplicates */
812
+ bo = ttm_to_amdgpu_bo(e->tv.bo);
813
+ if (!bo)
814
+ continue;
815
+
816
+ bo_va = e->bo_va;
817
+ if (bo_va == NULL)
818
+ continue;
819
+
820
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
821
+ if (r)
822
+ return r;
823
+
824
+ r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
825
+ if (r)
826
+ return r;
827
+ }
828
+
829
+ r = amdgpu_vm_handle_moved(adev, vm);
830
+ if (r)
831
+ return r;
832
+
833
+ r = amdgpu_vm_update_pdes(adev, vm, false);
834
+ if (r)
835
+ return r;
836
+
837
+ r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
838
+ if (r)
839
+ return r;
840
+
841
+ p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
842
+
843
+ if (amdgpu_vm_debug) {
844
+ /* Invalidate all BOs to test for userspace bugs */
845
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
846
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
847
+
848
+ /* ignore duplicates */
849
+ if (!bo)
850
+ continue;
851
+
852
+ amdgpu_vm_bo_invalidate(adev, bo, false);
853
+ }
978854 }
979855
980856 return amdgpu_cs_sync_rings(p);
....@@ -985,14 +861,15 @@
985861 {
986862 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
987863 struct amdgpu_vm *vm = &fpriv->vm;
988
- int i, j;
989864 int r, ce_preempt = 0, de_preempt = 0;
865
+ struct amdgpu_ring *ring;
866
+ int i, j;
990867
991868 for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
992869 struct amdgpu_cs_chunk *chunk;
993870 struct amdgpu_ib *ib;
994871 struct drm_amdgpu_cs_chunk_ib *chunk_ib;
995
- struct amdgpu_ring *ring;
872
+ struct drm_sched_entity *entity;
996873
997874 chunk = &parser->chunks[i];
998875 ib = &parser->job->ibs[j];
....@@ -1001,7 +878,8 @@
1001878 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
1002879 continue;
1003880
1004
- if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) {
881
+ if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
882
+ (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
1005883 if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
1006884 if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
1007885 ce_preempt++;
....@@ -1014,8 +892,9 @@
1014892 return -EINVAL;
1015893 }
1016894
1017
- r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
1018
- chunk_ib->ip_instance, chunk_ib->ring, &ring);
895
+ r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
896
+ chunk_ib->ip_instance, chunk_ib->ring,
897
+ &entity);
1019898 if (r)
1020899 return r;
1021900
....@@ -1023,14 +902,20 @@
1023902 parser->job->preamble_status |=
1024903 AMDGPU_PREAMBLE_IB_PRESENT;
1025904
1026
- if (parser->ring && parser->ring != ring)
905
+ if (parser->entity && parser->entity != entity)
1027906 return -EINVAL;
1028907
1029
- parser->ring = ring;
908
+ /* Return if there is no run queue associated with this entity.
909
+ * Possibly because of disabled HW IP*/
910
+ if (entity->rq == NULL)
911
+ return -EINVAL;
1030912
1031
- r = amdgpu_ib_get(adev, vm,
1032
- ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
1033
- ib);
913
+ parser->entity = entity;
914
+
915
+ ring = to_amdgpu_ring(entity->rq->sched);
916
+ r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
917
+ chunk_ib->ib_bytes : 0,
918
+ AMDGPU_IB_POOL_DELAYED, ib);
1034919 if (r) {
1035920 DRM_ERROR("Failed to get ib !\n");
1036921 return r;
....@@ -1043,13 +928,12 @@
1043928 j++;
1044929 }
1045930
1046
- /* UVD & VCE fw doesn't support user fences */
1047
- if (parser->job->uf_addr && (
1048
- parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
1049
- parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
931
+ /* MM engine doesn't support user fences */
932
+ ring = to_amdgpu_ring(parser->entity->rq->sched);
933
+ if (parser->job->uf_addr && ring->funcs->no_user_fence)
1050934 return -EINVAL;
1051935
1052
- return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx);
936
+ return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
1053937 }
1054938
1055939 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
....@@ -1065,51 +949,62 @@
1065949 sizeof(struct drm_amdgpu_cs_chunk_dep);
1066950
1067951 for (i = 0; i < num_deps; ++i) {
1068
- struct amdgpu_ring *ring;
1069952 struct amdgpu_ctx *ctx;
953
+ struct drm_sched_entity *entity;
1070954 struct dma_fence *fence;
1071955
1072956 ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
1073957 if (ctx == NULL)
1074958 return -EINVAL;
1075959
1076
- r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
1077
- deps[i].ip_type,
1078
- deps[i].ip_instance,
1079
- deps[i].ring, &ring);
960
+ r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
961
+ deps[i].ip_instance,
962
+ deps[i].ring, &entity);
1080963 if (r) {
1081964 amdgpu_ctx_put(ctx);
1082965 return r;
1083966 }
1084967
1085
- fence = amdgpu_ctx_get_fence(ctx, ring,
1086
- deps[i].handle);
1087
- if (IS_ERR(fence)) {
1088
- r = PTR_ERR(fence);
1089
- amdgpu_ctx_put(ctx);
1090
- return r;
1091
- } else if (fence) {
1092
- r = amdgpu_sync_fence(p->adev, &p->job->sync, fence,
1093
- true);
1094
- dma_fence_put(fence);
1095
- amdgpu_ctx_put(ctx);
1096
- if (r)
1097
- return r;
968
+ fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
969
+ amdgpu_ctx_put(ctx);
970
+
971
+ if (IS_ERR(fence))
972
+ return PTR_ERR(fence);
973
+ else if (!fence)
974
+ continue;
975
+
976
+ if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
977
+ struct drm_sched_fence *s_fence;
978
+ struct dma_fence *old = fence;
979
+
980
+ s_fence = to_drm_sched_fence(fence);
981
+ fence = dma_fence_get(&s_fence->scheduled);
982
+ dma_fence_put(old);
1098983 }
984
+
985
+ r = amdgpu_sync_fence(&p->job->sync, fence);
986
+ dma_fence_put(fence);
987
+ if (r)
988
+ return r;
1099989 }
1100990 return 0;
1101991 }
1102992
1103993 static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
1104
- uint32_t handle)
994
+ uint32_t handle, u64 point,
995
+ u64 flags)
1105996 {
1106
- int r;
1107997 struct dma_fence *fence;
1108
- r = drm_syncobj_find_fence(p->filp, handle, &fence);
1109
- if (r)
1110
- return r;
998
+ int r;
1111999
1112
- r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
1000
+ r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
1001
+ if (r) {
1002
+ DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
1003
+ handle, point, r);
1004
+ return r;
1005
+ }
1006
+
1007
+ r = amdgpu_sync_fence(&p->job->sync, fence);
11131008 dma_fence_put(fence);
11141009
11151010 return r;
....@@ -1118,46 +1013,123 @@
11181013 static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
11191014 struct amdgpu_cs_chunk *chunk)
11201015 {
1016
+ struct drm_amdgpu_cs_chunk_sem *deps;
11211017 unsigned num_deps;
11221018 int i, r;
1123
- struct drm_amdgpu_cs_chunk_sem *deps;
11241019
11251020 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
11261021 num_deps = chunk->length_dw * 4 /
11271022 sizeof(struct drm_amdgpu_cs_chunk_sem);
1128
-
11291023 for (i = 0; i < num_deps; ++i) {
1130
- r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
1024
+ r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
1025
+ 0, 0);
11311026 if (r)
11321027 return r;
11331028 }
1029
+
1030
+ return 0;
1031
+}
1032
+
1033
+
1034
+static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
1035
+ struct amdgpu_cs_chunk *chunk)
1036
+{
1037
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1038
+ unsigned num_deps;
1039
+ int i, r;
1040
+
1041
+ syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1042
+ num_deps = chunk->length_dw * 4 /
1043
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1044
+ for (i = 0; i < num_deps; ++i) {
1045
+ r = amdgpu_syncobj_lookup_and_add_to_sync(p,
1046
+ syncobj_deps[i].handle,
1047
+ syncobj_deps[i].point,
1048
+ syncobj_deps[i].flags);
1049
+ if (r)
1050
+ return r;
1051
+ }
1052
+
11341053 return 0;
11351054 }
11361055
11371056 static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
11381057 struct amdgpu_cs_chunk *chunk)
11391058 {
1059
+ struct drm_amdgpu_cs_chunk_sem *deps;
11401060 unsigned num_deps;
11411061 int i;
1142
- struct drm_amdgpu_cs_chunk_sem *deps;
1062
+
11431063 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
11441064 num_deps = chunk->length_dw * 4 /
11451065 sizeof(struct drm_amdgpu_cs_chunk_sem);
11461066
1147
- p->post_dep_syncobjs = kmalloc_array(num_deps,
1148
- sizeof(struct drm_syncobj *),
1149
- GFP_KERNEL);
1150
- p->num_post_dep_syncobjs = 0;
1067
+ if (p->post_deps)
1068
+ return -EINVAL;
11511069
1152
- if (!p->post_dep_syncobjs)
1070
+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1071
+ GFP_KERNEL);
1072
+ p->num_post_deps = 0;
1073
+
1074
+ if (!p->post_deps)
1075
+ return -ENOMEM;
1076
+
1077
+
1078
+ for (i = 0; i < num_deps; ++i) {
1079
+ p->post_deps[i].syncobj =
1080
+ drm_syncobj_find(p->filp, deps[i].handle);
1081
+ if (!p->post_deps[i].syncobj)
1082
+ return -EINVAL;
1083
+ p->post_deps[i].chain = NULL;
1084
+ p->post_deps[i].point = 0;
1085
+ p->num_post_deps++;
1086
+ }
1087
+
1088
+ return 0;
1089
+}
1090
+
1091
+
1092
+static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
1093
+ struct amdgpu_cs_chunk *chunk)
1094
+{
1095
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1096
+ unsigned num_deps;
1097
+ int i;
1098
+
1099
+ syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1100
+ num_deps = chunk->length_dw * 4 /
1101
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1102
+
1103
+ if (p->post_deps)
1104
+ return -EINVAL;
1105
+
1106
+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1107
+ GFP_KERNEL);
1108
+ p->num_post_deps = 0;
1109
+
1110
+ if (!p->post_deps)
11531111 return -ENOMEM;
11541112
11551113 for (i = 0; i < num_deps; ++i) {
1156
- p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
1157
- if (!p->post_dep_syncobjs[i])
1114
+ struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
1115
+
1116
+ dep->chain = NULL;
1117
+ if (syncobj_deps[i].point) {
1118
+ dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
1119
+ if (!dep->chain)
1120
+ return -ENOMEM;
1121
+ }
1122
+
1123
+ dep->syncobj = drm_syncobj_find(p->filp,
1124
+ syncobj_deps[i].handle);
1125
+ if (!dep->syncobj) {
1126
+ kfree(dep->chain);
11581127 return -EINVAL;
1159
- p->num_post_dep_syncobjs++;
1128
+ }
1129
+ dep->point = syncobj_deps[i].point;
1130
+ p->num_post_deps++;
11601131 }
1132
+
11611133 return 0;
11621134 }
11631135
....@@ -1171,18 +1143,33 @@
11711143
11721144 chunk = &p->chunks[i];
11731145
1174
- if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
1146
+ switch (chunk->chunk_id) {
1147
+ case AMDGPU_CHUNK_ID_DEPENDENCIES:
1148
+ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
11751149 r = amdgpu_cs_process_fence_dep(p, chunk);
11761150 if (r)
11771151 return r;
1178
- } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
1152
+ break;
1153
+ case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
11791154 r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
11801155 if (r)
11811156 return r;
1182
- } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
1157
+ break;
1158
+ case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
11831159 r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
11841160 if (r)
11851161 return r;
1162
+ break;
1163
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
1164
+ r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
1165
+ if (r)
1166
+ return r;
1167
+ break;
1168
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
1169
+ r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
1170
+ if (r)
1171
+ return r;
1172
+ break;
11861173 }
11871174 }
11881175
....@@ -1193,53 +1180,58 @@
11931180 {
11941181 int i;
11951182
1196
- for (i = 0; i < p->num_post_dep_syncobjs; ++i)
1197
- drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
1183
+ for (i = 0; i < p->num_post_deps; ++i) {
1184
+ if (p->post_deps[i].chain && p->post_deps[i].point) {
1185
+ drm_syncobj_add_point(p->post_deps[i].syncobj,
1186
+ p->post_deps[i].chain,
1187
+ p->fence, p->post_deps[i].point);
1188
+ p->post_deps[i].chain = NULL;
1189
+ } else {
1190
+ drm_syncobj_replace_fence(p->post_deps[i].syncobj,
1191
+ p->fence);
1192
+ }
1193
+ }
11981194 }
11991195
12001196 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
12011197 union drm_amdgpu_cs *cs)
12021198 {
12031199 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1204
- struct amdgpu_ring *ring = p->ring;
1205
- struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
1206
- enum drm_sched_priority priority;
1200
+ struct drm_sched_entity *entity = p->entity;
12071201 struct amdgpu_bo_list_entry *e;
12081202 struct amdgpu_job *job;
12091203 uint64_t seq;
1210
-
12111204 int r;
12121205
12131206 job = p->job;
12141207 p->job = NULL;
12151208
1216
- r = drm_sched_job_init(&job->base, entity, p->filp);
1209
+ r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
12171210 if (r)
12181211 goto error_unlock;
12191212
1220
- /* No memory allocation is allowed while holding the mn lock */
1221
- amdgpu_mn_lock(p->mn);
1222
- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1223
- struct amdgpu_bo *bo = e->robj;
1213
+ /* No memory allocation is allowed while holding the notifier lock.
1214
+ * The lock is held until amdgpu_cs_submit is finished and fence is
1215
+ * added to BOs.
1216
+ */
1217
+ mutex_lock(&p->adev->notifier_lock);
12241218
1225
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
1226
- r = -ERESTARTSYS;
1227
- goto error_abort;
1228
- }
1219
+ /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
1220
+ * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
1221
+ */
1222
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1223
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1224
+
1225
+ r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1226
+ }
1227
+ if (r) {
1228
+ r = -EAGAIN;
1229
+ goto error_abort;
12291230 }
12301231
1231
- job->owner = p->filp;
12321232 p->fence = dma_fence_get(&job->base.s_fence->finished);
12331233
1234
- r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
1235
- if (r) {
1236
- dma_fence_put(p->fence);
1237
- dma_fence_put(&job->base.s_fence->finished);
1238
- amdgpu_job_free(job);
1239
- amdgpu_mn_unlock(p->mn);
1240
- return r;
1241
- }
1242
-
1234
+ amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
12431235 amdgpu_cs_post_dependencies(p);
12441236
12451237 if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
....@@ -1255,34 +1247,45 @@
12551247
12561248 trace_amdgpu_cs_ioctl(job);
12571249 amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
1258
- priority = job->base.s_priority;
12591250 drm_sched_entity_push_job(&job->base, entity);
12601251
1261
- ring = to_amdgpu_ring(entity->rq->sched);
1262
- amdgpu_ring_priority_get(ring, priority);
1252
+ amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
12631253
12641254 ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1265
- amdgpu_mn_unlock(p->mn);
1255
+ mutex_unlock(&p->adev->notifier_lock);
12661256
12671257 return 0;
12681258
12691259 error_abort:
1270
- dma_fence_put(&job->base.s_fence->finished);
1271
- job->base.s_fence = NULL;
1272
- amdgpu_mn_unlock(p->mn);
1260
+ drm_sched_job_cleanup(&job->base);
1261
+ mutex_unlock(&p->adev->notifier_lock);
12731262
12741263 error_unlock:
12751264 amdgpu_job_free(job);
12761265 return r;
12771266 }
12781267
1268
+static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
1269
+{
1270
+ int i;
1271
+
1272
+ if (!trace_amdgpu_cs_enabled())
1273
+ return;
1274
+
1275
+ for (i = 0; i < parser->job->num_ibs; i++)
1276
+ trace_amdgpu_cs(parser, i);
1277
+}
1278
+
12791279 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
12801280 {
1281
- struct amdgpu_device *adev = dev->dev_private;
1281
+ struct amdgpu_device *adev = drm_to_adev(dev);
12821282 union drm_amdgpu_cs *cs = data;
12831283 struct amdgpu_cs_parser parser = {};
12841284 bool reserved_buffers = false;
1285
- int i, r;
1285
+ int r;
1286
+
1287
+ if (amdgpu_ras_intr_triggered())
1288
+ return -EHWPOISON;
12861289
12871290 if (!adev->accel_working)
12881291 return -EBUSY;
....@@ -1292,7 +1295,8 @@
12921295
12931296 r = amdgpu_cs_parser_init(&parser, data);
12941297 if (r) {
1295
- DRM_ERROR("Failed to initialize parser !\n");
1298
+ if (printk_ratelimit())
1299
+ DRM_ERROR("Failed to initialize parser %d!\n", r);
12961300 goto out;
12971301 }
12981302
....@@ -1300,27 +1304,26 @@
13001304 if (r)
13011305 goto out;
13021306
1303
- r = amdgpu_cs_parser_bos(&parser, data);
1304
- if (r) {
1305
- if (r == -ENOMEM)
1306
- DRM_ERROR("Not enough memory for command submission!\n");
1307
- else if (r != -ERESTARTSYS)
1308
- DRM_ERROR("Failed to process the buffer list %d!\n", r);
1309
- goto out;
1310
- }
1311
-
1312
- reserved_buffers = true;
1313
-
13141307 r = amdgpu_cs_dependencies(adev, &parser);
13151308 if (r) {
13161309 DRM_ERROR("Failed in the dependencies handling %d!\n", r);
13171310 goto out;
13181311 }
13191312
1320
- for (i = 0; i < parser.job->num_ibs; i++)
1321
- trace_amdgpu_cs(&parser, i);
1313
+ r = amdgpu_cs_parser_bos(&parser, data);
1314
+ if (r) {
1315
+ if (r == -ENOMEM)
1316
+ DRM_ERROR("Not enough memory for command submission!\n");
1317
+ else if (r != -ERESTARTSYS && r != -EAGAIN)
1318
+ DRM_ERROR("Failed to process the buffer list %d!\n", r);
1319
+ goto out;
1320
+ }
13221321
1323
- r = amdgpu_cs_ib_vm_chunk(adev, &parser);
1322
+ reserved_buffers = true;
1323
+
1324
+ trace_amdgpu_cs_ibs(&parser);
1325
+
1326
+ r = amdgpu_cs_vm_handling(&parser);
13241327 if (r)
13251328 goto out;
13261329
....@@ -1328,6 +1331,7 @@
13281331
13291332 out:
13301333 amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
1334
+
13311335 return r;
13321336 }
13331337
....@@ -1344,9 +1348,8 @@
13441348 struct drm_file *filp)
13451349 {
13461350 union drm_amdgpu_wait_cs *wait = data;
1347
- struct amdgpu_device *adev = dev->dev_private;
13481351 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1349
- struct amdgpu_ring *ring = NULL;
1352
+ struct drm_sched_entity *entity;
13501353 struct amdgpu_ctx *ctx;
13511354 struct dma_fence *fence;
13521355 long r;
....@@ -1355,15 +1358,14 @@
13551358 if (ctx == NULL)
13561359 return -EINVAL;
13571360
1358
- r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
1359
- wait->in.ip_type, wait->in.ip_instance,
1360
- wait->in.ring, &ring);
1361
+ r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
1362
+ wait->in.ring, &entity);
13611363 if (r) {
13621364 amdgpu_ctx_put(ctx);
13631365 return r;
13641366 }
13651367
1366
- fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
1368
+ fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
13671369 if (IS_ERR(fence))
13681370 r = PTR_ERR(fence);
13691371 else if (fence) {
....@@ -1395,7 +1397,7 @@
13951397 struct drm_file *filp,
13961398 struct drm_amdgpu_fence *user)
13971399 {
1398
- struct amdgpu_ring *ring;
1400
+ struct drm_sched_entity *entity;
13991401 struct amdgpu_ctx *ctx;
14001402 struct dma_fence *fence;
14011403 int r;
....@@ -1404,14 +1406,14 @@
14041406 if (ctx == NULL)
14051407 return ERR_PTR(-EINVAL);
14061408
1407
- r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
1408
- user->ip_instance, user->ring, &ring);
1409
+ r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
1410
+ user->ring, &entity);
14091411 if (r) {
14101412 amdgpu_ctx_put(ctx);
14111413 return ERR_PTR(r);
14121414 }
14131415
1414
- fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
1416
+ fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
14151417 amdgpu_ctx_put(ctx);
14161418
14171419 return fence;
....@@ -1420,7 +1422,7 @@
14201422 int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
14211423 struct drm_file *filp)
14221424 {
1423
- struct amdgpu_device *adev = dev->dev_private;
1425
+ struct amdgpu_device *adev = drm_to_adev(dev);
14241426 union drm_amdgpu_fence_to_handle *info = data;
14251427 struct dma_fence *fence;
14261428 struct drm_syncobj *syncobj;
....@@ -1430,6 +1432,9 @@
14301432 fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
14311433 if (IS_ERR(fence))
14321434 return PTR_ERR(fence);
1435
+
1436
+ if (!fence)
1437
+ fence = dma_fence_get_stub();
14331438
14341439 switch (info->in.what) {
14351440 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
....@@ -1469,6 +1474,7 @@
14691474 return 0;
14701475
14711476 default:
1477
+ dma_fence_put(fence);
14721478 return -EINVAL;
14731479 }
14741480 }
....@@ -1501,15 +1507,15 @@
15011507 continue;
15021508
15031509 r = dma_fence_wait_timeout(fence, true, timeout);
1510
+ if (r > 0 && fence->error)
1511
+ r = fence->error;
1512
+
15041513 dma_fence_put(fence);
15051514 if (r < 0)
15061515 return r;
15071516
15081517 if (r == 0)
15091518 break;
1510
-
1511
- if (fence->error)
1512
- return fence->error;
15131519 }
15141520
15151521 memset(wait, 0, sizeof(*wait));
....@@ -1593,7 +1599,7 @@
15931599 int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
15941600 struct drm_file *filp)
15951601 {
1596
- struct amdgpu_device *adev = dev->dev_private;
1602
+ struct amdgpu_device *adev = drm_to_adev(dev);
15971603 union drm_amdgpu_wait_fences *wait = data;
15981604 uint32_t fence_count = wait->in.fence_count;
15991605 struct drm_amdgpu_fence *fences_user;
....@@ -1655,7 +1661,7 @@
16551661 *map = mapping;
16561662
16571663 /* Double check that the BO is reserved by this CS */
1658
- if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
1664
+ if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
16591665 return -EINVAL;
16601666
16611667 if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {