hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
....@@ -24,20 +24,26 @@
2424 * Authors:
2525 * Jerome Glisse <glisse@freedesktop.org>
2626 */
27
+
28
+#include <linux/file.h>
2729 #include <linux/pagemap.h>
2830 #include <linux/sync_file.h>
29
-#include <drm/drmP.h>
31
+#include <linux/dma-buf.h>
32
+
3033 #include <drm/amdgpu_drm.h>
3134 #include <drm/drm_syncobj.h>
3235 #include "amdgpu.h"
3336 #include "amdgpu_trace.h"
3437 #include "amdgpu_gmc.h"
38
+#include "amdgpu_gem.h"
39
+#include "amdgpu_ras.h"
3540
3641 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
3742 struct drm_amdgpu_cs_chunk_fence *data,
3843 uint32_t *offset)
3944 {
4045 struct drm_gem_object *gobj;
46
+ struct amdgpu_bo *bo;
4147 unsigned long size;
4248 int r;
4349
....@@ -45,21 +51,21 @@
4551 if (gobj == NULL)
4652 return -EINVAL;
4753
48
- p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
54
+ bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
4955 p->uf_entry.priority = 0;
50
- p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
51
- p->uf_entry.tv.shared = true;
52
- p->uf_entry.user_pages = NULL;
56
+ p->uf_entry.tv.bo = &bo->tbo;
57
+ /* One for TTM and one for the CS job */
58
+ p->uf_entry.tv.num_shared = 2;
5359
54
- drm_gem_object_put_unlocked(gobj);
60
+ drm_gem_object_put(gobj);
5561
56
- size = amdgpu_bo_size(p->uf_entry.robj);
62
+ size = amdgpu_bo_size(bo);
5763 if (size != PAGE_SIZE || (data->offset + 8) > size) {
5864 r = -EINVAL;
5965 goto error_unref;
6066 }
6167
62
- if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
68
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
6369 r = -EINVAL;
6470 goto error_unref;
6571 }
....@@ -69,7 +75,7 @@
6975 return 0;
7076
7177 error_unref:
72
- amdgpu_bo_unref(&p->uf_entry.robj);
78
+ amdgpu_bo_unref(&bo);
7379 return r;
7480 }
7581
....@@ -110,7 +116,7 @@
110116 int ret;
111117
112118 if (cs->in.num_chunks == 0)
113
- return 0;
119
+ return -EINVAL;
114120
115121 chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
116122 if (!chunk_array)
....@@ -211,6 +217,9 @@
211217 case AMDGPU_CHUNK_ID_DEPENDENCIES:
212218 case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
213219 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
220
+ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
221
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
222
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
214223 break;
215224
216225 default:
....@@ -228,7 +237,7 @@
228237 goto free_all_kdata;
229238 }
230239
231
- if (p->uf_entry.robj)
240
+ if (p->uf_entry.tv.bo)
232241 p->job->uf_addr = uf_offset;
233242 kfree(chunk_array);
234243
....@@ -290,7 +299,7 @@
290299 {
291300 s64 time_us, increment_us;
292301 u64 free_vram, total_vram, used_vram;
293
-
302
+ struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
294303 /* Allow a maximum of 200 accumulated ms. This is basically per-IB
295304 * throttling.
296305 *
....@@ -307,7 +316,7 @@
307316 }
308317
309318 total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
310
- used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
319
+ used_vram = amdgpu_vram_mgr_usage(vram_man);
311320 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
312321
313322 spin_lock(&adev->mm_stats.lock);
....@@ -354,7 +363,7 @@
354363 if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
355364 u64 total_vis_vram = adev->gmc.visible_vram_size;
356365 u64 used_vis_vram =
357
- amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
366
+ amdgpu_vram_mgr_vis_usage(vram_man);
358367
359368 if (used_vis_vram < total_vis_vram) {
360369 u64 free_vis_vram = total_vis_vram - used_vis_vram;
....@@ -395,7 +404,7 @@
395404 struct ttm_operation_ctx ctx = {
396405 .interruptible = true,
397406 .no_wait_gpu = false,
398
- .resv = bo->tbo.resv,
407
+ .resv = bo->tbo.base.resv,
399408 .flags = 0
400409 };
401410 uint32_t domain;
....@@ -407,7 +416,9 @@
407416 /* Don't move this buffer if we have depleted our allowance
408417 * to move it. Don't move anything if the threshold is zero.
409418 */
410
- if (p->bytes_moved < p->bytes_moved_threshold) {
419
+ if (p->bytes_moved < p->bytes_moved_threshold &&
420
+ (!bo->tbo.base.dma_buf ||
421
+ list_empty(&bo->tbo.base.dma_buf->attachments))) {
411422 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
412423 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
413424 /* And don't move a CPU_ACCESS_REQUIRED BO to limited
....@@ -442,75 +453,12 @@
442453 return r;
443454 }
444455
445
-/* Last resort, try to evict something from the current working set */
446
-static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
447
- struct amdgpu_bo *validated)
448
-{
449
- uint32_t domain = validated->allowed_domains;
450
- struct ttm_operation_ctx ctx = { true, false };
451
- int r;
452
-
453
- if (!p->evictable)
454
- return false;
455
-
456
- for (;&p->evictable->tv.head != &p->validated;
457
- p->evictable = list_prev_entry(p->evictable, tv.head)) {
458
-
459
- struct amdgpu_bo_list_entry *candidate = p->evictable;
460
- struct amdgpu_bo *bo = candidate->robj;
461
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
462
- bool update_bytes_moved_vis;
463
- uint32_t other;
464
-
465
- /* If we reached our current BO we can forget it */
466
- if (candidate->robj == validated)
467
- break;
468
-
469
- /* We can't move pinned BOs here */
470
- if (bo->pin_count)
471
- continue;
472
-
473
- other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
474
-
475
- /* Check if this BO is in one of the domains we need space for */
476
- if (!(other & domain))
477
- continue;
478
-
479
- /* Check if we can move this BO somewhere else */
480
- other = bo->allowed_domains & ~domain;
481
- if (!other)
482
- continue;
483
-
484
- /* Good we can try to move this BO somewhere else */
485
- update_bytes_moved_vis =
486
- !amdgpu_gmc_vram_full_visible(&adev->gmc) &&
487
- amdgpu_bo_in_cpu_visible_vram(bo);
488
- amdgpu_bo_placement_from_domain(bo, other);
489
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
490
- p->bytes_moved += ctx.bytes_moved;
491
- if (update_bytes_moved_vis)
492
- p->bytes_moved_vis += ctx.bytes_moved;
493
-
494
- if (unlikely(r))
495
- break;
496
-
497
- p->evictable = list_prev_entry(p->evictable, tv.head);
498
- list_move(&candidate->tv.head, &p->validated);
499
-
500
- return true;
501
- }
502
-
503
- return false;
504
-}
505
-
506456 static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
507457 {
508458 struct amdgpu_cs_parser *p = param;
509459 int r;
510460
511
- do {
512
- r = amdgpu_cs_bo_validate(p, bo);
513
- } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
461
+ r = amdgpu_cs_bo_validate(p, bo);
514462 if (r)
515463 return r;
516464
....@@ -528,38 +476,31 @@
528476 int r;
529477
530478 list_for_each_entry(lobj, validated, tv.head) {
531
- struct amdgpu_bo *bo = lobj->robj;
532
- bool binding_userptr = false;
479
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
533480 struct mm_struct *usermm;
534481
535482 usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
536483 if (usermm && usermm != current->mm)
537484 return -EPERM;
538485
539
- /* Check if we have user pages and nobody bound the BO already */
540
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
541
- lobj->user_pages) {
486
+ if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
487
+ lobj->user_invalidated && lobj->user_pages) {
542488 amdgpu_bo_placement_from_domain(bo,
543489 AMDGPU_GEM_DOMAIN_CPU);
544490 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
545491 if (r)
546492 return r;
493
+
547494 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
548495 lobj->user_pages);
549
- binding_userptr = true;
550496 }
551
-
552
- if (p->evictable == lobj)
553
- p->evictable = NULL;
554497
555498 r = amdgpu_cs_validate(p, bo);
556499 if (r)
557500 return r;
558501
559
- if (binding_userptr) {
560
- kvfree(lobj->user_pages);
561
- lobj->user_pages = NULL;
562
- }
502
+ kvfree(lobj->user_pages);
503
+ lobj->user_pages = NULL;
563504 }
564505 return 0;
565506 }
....@@ -574,7 +515,6 @@
574515 struct amdgpu_bo *gds;
575516 struct amdgpu_bo *gws;
576517 struct amdgpu_bo *oa;
577
- unsigned tries = 10;
578518 int r;
579519
580520 INIT_LIST_HEAD(&p->validated);
....@@ -596,98 +536,63 @@
596536 return r;
597537 }
598538
539
+ /* One for TTM and one for the CS job */
540
+ amdgpu_bo_list_for_each_entry(e, p->bo_list)
541
+ e->tv.num_shared = 2;
542
+
599543 amdgpu_bo_list_get_list(p->bo_list, &p->validated);
600
- if (p->bo_list->first_userptr != p->bo_list->num_entries)
601
- p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
602544
603545 INIT_LIST_HEAD(&duplicates);
604546 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
605547
606
- if (p->uf_entry.robj && !p->uf_entry.robj->parent)
548
+ if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
607549 list_add(&p->uf_entry.tv.head, &p->validated);
608550
609
- while (1) {
610
- struct list_head need_pages;
551
+ /* Get userptr backing pages. If pages are updated after registered
552
+ * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
553
+ * amdgpu_ttm_backend_bind() to flush and invalidate new pages
554
+ */
555
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
556
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
557
+ bool userpage_invalidated = false;
558
+ int i;
611559
612
- r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
613
- &duplicates);
614
- if (unlikely(r != 0)) {
615
- if (r != -ERESTARTSYS)
616
- DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
617
- goto error_free_pages;
560
+ e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
561
+ sizeof(struct page *),
562
+ GFP_KERNEL | __GFP_ZERO);
563
+ if (!e->user_pages) {
564
+ DRM_ERROR("calloc failure\n");
565
+ return -ENOMEM;
618566 }
619567
620
- INIT_LIST_HEAD(&need_pages);
621
- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
622
- struct amdgpu_bo *bo = e->robj;
623
-
624
- if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
625
- &e->user_invalidated) && e->user_pages) {
626
-
627
- /* We acquired a page array, but somebody
628
- * invalidated it. Free it and try again
629
- */
630
- release_pages(e->user_pages,
631
- bo->tbo.ttm->num_pages);
632
- kvfree(e->user_pages);
633
- e->user_pages = NULL;
634
- }
635
-
636
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
637
- !e->user_pages) {
638
- list_del(&e->tv.head);
639
- list_add(&e->tv.head, &need_pages);
640
-
641
- amdgpu_bo_unreserve(e->robj);
642
- }
568
+ r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
569
+ if (r) {
570
+ kvfree(e->user_pages);
571
+ e->user_pages = NULL;
572
+ return r;
643573 }
644574
645
- if (list_empty(&need_pages))
646
- break;
647
-
648
- /* Unreserve everything again. */
649
- ttm_eu_backoff_reservation(&p->ticket, &p->validated);
650
-
651
- /* We tried too many times, just abort */
652
- if (!--tries) {
653
- r = -EDEADLK;
654
- DRM_ERROR("deadlock in %s\n", __func__);
655
- goto error_free_pages;
656
- }
657
-
658
- /* Fill the page arrays for all userptrs. */
659
- list_for_each_entry(e, &need_pages, tv.head) {
660
- struct ttm_tt *ttm = e->robj->tbo.ttm;
661
-
662
- e->user_pages = kvmalloc_array(ttm->num_pages,
663
- sizeof(struct page*),
664
- GFP_KERNEL | __GFP_ZERO);
665
- if (!e->user_pages) {
666
- r = -ENOMEM;
667
- DRM_ERROR("calloc failure in %s\n", __func__);
668
- goto error_free_pages;
669
- }
670
-
671
- r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
672
- if (r) {
673
- DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
674
- kvfree(e->user_pages);
675
- e->user_pages = NULL;
676
- goto error_free_pages;
575
+ for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
576
+ if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
577
+ userpage_invalidated = true;
578
+ break;
677579 }
678580 }
581
+ e->user_invalidated = userpage_invalidated;
582
+ }
679583
680
- /* And try again. */
681
- list_splice(&need_pages, &p->validated);
584
+ r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
585
+ &duplicates);
586
+ if (unlikely(r != 0)) {
587
+ if (r != -ERESTARTSYS)
588
+ DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
589
+ goto out;
682590 }
683591
684592 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
685593 &p->bytes_moved_vis_threshold);
686594 p->bytes_moved = 0;
687595 p->bytes_moved_vis = 0;
688
- p->evictable = list_last_entry(&p->validated,
689
- struct amdgpu_bo_list_entry,
690
- tv.head);
691596
692597 r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
693598 amdgpu_cs_validate, p);
....@@ -697,16 +602,12 @@
697602 }
698603
699604 r = amdgpu_cs_list_validate(p, &duplicates);
700
- if (r) {
701
- DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n");
605
+ if (r)
702606 goto error_validate;
703
- }
704607
705608 r = amdgpu_cs_list_validate(p, &p->validated);
706
- if (r) {
707
- DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n");
609
+ if (r)
708610 goto error_validate;
709
- }
710611
711612 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
712613 p->bytes_moved_vis);
....@@ -715,24 +616,30 @@
715616 gws = p->bo_list->gws_obj;
716617 oa = p->bo_list->oa_obj;
717618
718
- amdgpu_bo_list_for_each_entry(e, p->bo_list)
719
- e->bo_va = amdgpu_vm_bo_find(vm, e->robj);
619
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
620
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
621
+
622
+ /* Make sure we use the exclusive slot for shared BOs */
623
+ if (bo->prime_shared_count)
624
+ e->tv.num_shared = 0;
625
+ e->bo_va = amdgpu_vm_bo_find(vm, bo);
626
+ }
720627
721628 if (gds) {
722
- p->job->gds_base = amdgpu_bo_gpu_offset(gds);
723
- p->job->gds_size = amdgpu_bo_size(gds);
629
+ p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
630
+ p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
724631 }
725632 if (gws) {
726
- p->job->gws_base = amdgpu_bo_gpu_offset(gws);
727
- p->job->gws_size = amdgpu_bo_size(gws);
633
+ p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
634
+ p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
728635 }
729636 if (oa) {
730
- p->job->oa_base = amdgpu_bo_gpu_offset(oa);
731
- p->job->oa_size = amdgpu_bo_size(oa);
637
+ p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
638
+ p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
732639 }
733640
734
- if (!r && p->uf_entry.robj) {
735
- struct amdgpu_bo *uf = p->uf_entry.robj;
641
+ if (!r && p->uf_entry.tv.bo) {
642
+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
736643
737644 r = amdgpu_ttm_alloc_gart(&uf->tbo);
738645 p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
....@@ -741,31 +648,25 @@
741648 error_validate:
742649 if (r)
743650 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
744
-
745
-error_free_pages:
746
-
747
- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
748
- if (!e->user_pages)
749
- continue;
750
-
751
- release_pages(e->user_pages,
752
- e->robj->tbo.ttm->num_pages);
753
- kvfree(e->user_pages);
754
- }
755
-
651
+out:
756652 return r;
757653 }
758654
759655 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
760656 {
657
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
761658 struct amdgpu_bo_list_entry *e;
762659 int r;
763660
764661 list_for_each_entry(e, &p->validated, tv.head) {
765
- struct reservation_object *resv = e->robj->tbo.resv;
766
- r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
767
- amdgpu_bo_explicit_sync(e->robj));
662
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
663
+ struct dma_resv *resv = bo->tbo.base.resv;
664
+ enum amdgpu_sync_mode sync_mode;
768665
666
+ sync_mode = amdgpu_bo_explicit_sync(bo) ?
667
+ AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
668
+ r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
669
+ &fpriv->vm);
769670 if (r)
770671 return r;
771672 }
....@@ -789,9 +690,11 @@
789690 ttm_eu_backoff_reservation(&parser->ticket,
790691 &parser->validated);
791692
792
- for (i = 0; i < parser->num_post_dep_syncobjs; i++)
793
- drm_syncobj_put(parser->post_dep_syncobjs[i]);
794
- kfree(parser->post_dep_syncobjs);
693
+ for (i = 0; i < parser->num_post_deps; i++) {
694
+ drm_syncobj_put(parser->post_deps[i].syncobj);
695
+ kfree(parser->post_deps[i].chain);
696
+ }
697
+ kfree(parser->post_deps);
795698
796699 dma_fence_put(parser->fence);
797700
....@@ -807,11 +710,16 @@
807710 kfree(parser->chunks);
808711 if (parser->job)
809712 amdgpu_job_free(parser->job);
810
- amdgpu_bo_unref(&parser->uf_entry.robj);
713
+ if (parser->uf_entry.tv.bo) {
714
+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
715
+
716
+ amdgpu_bo_unref(&uf);
717
+ }
811718 }
812719
813
-static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
720
+static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
814721 {
722
+ struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
815723 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
816724 struct amdgpu_device *adev = p->adev;
817725 struct amdgpu_vm *vm = &fpriv->vm;
....@@ -820,92 +728,8 @@
820728 struct amdgpu_bo *bo;
821729 int r;
822730
823
- r = amdgpu_vm_clear_freed(adev, vm, NULL);
824
- if (r)
825
- return r;
826
-
827
- r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
828
- if (r)
829
- return r;
830
-
831
- r = amdgpu_sync_fence(adev, &p->job->sync,
832
- fpriv->prt_va->last_pt_update, false);
833
- if (r)
834
- return r;
835
-
836
- if (amdgpu_sriov_vf(adev)) {
837
- struct dma_fence *f;
838
-
839
- bo_va = fpriv->csa_va;
840
- BUG_ON(!bo_va);
841
- r = amdgpu_vm_bo_update(adev, bo_va, false);
842
- if (r)
843
- return r;
844
-
845
- f = bo_va->last_pt_update;
846
- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
847
- if (r)
848
- return r;
849
- }
850
-
851
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
852
- struct dma_fence *f;
853
-
854
- /* ignore duplicates */
855
- bo = e->robj;
856
- if (!bo)
857
- continue;
858
-
859
- bo_va = e->bo_va;
860
- if (bo_va == NULL)
861
- continue;
862
-
863
- r = amdgpu_vm_bo_update(adev, bo_va, false);
864
- if (r)
865
- return r;
866
-
867
- f = bo_va->last_pt_update;
868
- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
869
- if (r)
870
- return r;
871
- }
872
-
873
- r = amdgpu_vm_handle_moved(adev, vm);
874
- if (r)
875
- return r;
876
-
877
- r = amdgpu_vm_update_directories(adev, vm);
878
- if (r)
879
- return r;
880
-
881
- r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
882
- if (r)
883
- return r;
884
-
885
- if (amdgpu_vm_debug) {
886
- /* Invalidate all BOs to test for userspace bugs */
887
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
888
- /* ignore duplicates */
889
- if (!e->robj)
890
- continue;
891
-
892
- amdgpu_vm_bo_invalidate(adev, e->robj, false);
893
- }
894
- }
895
-
896
- return r;
897
-}
898
-
899
-static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
900
- struct amdgpu_cs_parser *p)
901
-{
902
- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
903
- struct amdgpu_vm *vm = &fpriv->vm;
904
- struct amdgpu_ring *ring = p->ring;
905
- int r;
906
-
907731 /* Only for UVD/VCE VM emulation */
908
- if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) {
732
+ if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
909733 unsigned i, j;
910734
911735 for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
....@@ -924,7 +748,7 @@
924748 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
925749 continue;
926750
927
- va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK;
751
+ va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
928752 r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
929753 if (r) {
930754 DRM_ERROR("IB va_start is invalid\n");
....@@ -946,7 +770,7 @@
946770 offset = m->start * AMDGPU_GPU_PAGE_SIZE;
947771 kptr += va_start - offset;
948772
949
- if (p->ring->funcs->parse_cs) {
773
+ if (ring->funcs->parse_cs) {
950774 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
951775 amdgpu_bo_kunmap(aobj);
952776
....@@ -965,16 +789,78 @@
965789 }
966790 }
967791
968
- if (p->job->vm) {
969
- p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
792
+ if (!p->job->vm)
793
+ return amdgpu_cs_sync_rings(p);
970794
971
- r = amdgpu_bo_vm_update_pte(p);
795
+
796
+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
797
+ if (r)
798
+ return r;
799
+
800
+ r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
801
+ if (r)
802
+ return r;
803
+
804
+ r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
805
+ if (r)
806
+ return r;
807
+
808
+ if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
809
+ bo_va = fpriv->csa_va;
810
+ BUG_ON(!bo_va);
811
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
972812 if (r)
973813 return r;
974814
975
- r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
815
+ r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
976816 if (r)
977817 return r;
818
+ }
819
+
820
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
821
+ /* ignore duplicates */
822
+ bo = ttm_to_amdgpu_bo(e->tv.bo);
823
+ if (!bo)
824
+ continue;
825
+
826
+ bo_va = e->bo_va;
827
+ if (bo_va == NULL)
828
+ continue;
829
+
830
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
831
+ if (r)
832
+ return r;
833
+
834
+ r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
835
+ if (r)
836
+ return r;
837
+ }
838
+
839
+ r = amdgpu_vm_handle_moved(adev, vm);
840
+ if (r)
841
+ return r;
842
+
843
+ r = amdgpu_vm_update_pdes(adev, vm, false);
844
+ if (r)
845
+ return r;
846
+
847
+ r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
848
+ if (r)
849
+ return r;
850
+
851
+ p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
852
+
853
+ if (amdgpu_vm_debug) {
854
+ /* Invalidate all BOs to test for userspace bugs */
855
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
856
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
857
+
858
+ /* ignore duplicates */
859
+ if (!bo)
860
+ continue;
861
+
862
+ amdgpu_vm_bo_invalidate(adev, bo, false);
863
+ }
978864 }
979865
980866 return amdgpu_cs_sync_rings(p);
....@@ -985,14 +871,15 @@
985871 {
986872 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
987873 struct amdgpu_vm *vm = &fpriv->vm;
988
- int i, j;
989874 int r, ce_preempt = 0, de_preempt = 0;
875
+ struct amdgpu_ring *ring;
876
+ int i, j;
990877
991878 for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
992879 struct amdgpu_cs_chunk *chunk;
993880 struct amdgpu_ib *ib;
994881 struct drm_amdgpu_cs_chunk_ib *chunk_ib;
995
- struct amdgpu_ring *ring;
882
+ struct drm_sched_entity *entity;
996883
997884 chunk = &parser->chunks[i];
998885 ib = &parser->job->ibs[j];
....@@ -1001,7 +888,8 @@
1001888 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
1002889 continue;
1003890
1004
- if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) {
891
+ if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
892
+ (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
1005893 if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
1006894 if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
1007895 ce_preempt++;
....@@ -1014,8 +902,9 @@
1014902 return -EINVAL;
1015903 }
1016904
1017
- r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
1018
- chunk_ib->ip_instance, chunk_ib->ring, &ring);
905
+ r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
906
+ chunk_ib->ip_instance, chunk_ib->ring,
907
+ &entity);
1019908 if (r)
1020909 return r;
1021910
....@@ -1023,14 +912,20 @@
1023912 parser->job->preamble_status |=
1024913 AMDGPU_PREAMBLE_IB_PRESENT;
1025914
1026
- if (parser->ring && parser->ring != ring)
915
+ if (parser->entity && parser->entity != entity)
1027916 return -EINVAL;
1028917
1029
- parser->ring = ring;
918
+ /* Return if there is no run queue associated with this entity.
919
+ * Possibly because of disabled HW IP*/
920
+ if (entity->rq == NULL)
921
+ return -EINVAL;
1030922
1031
- r = amdgpu_ib_get(adev, vm,
1032
- ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
1033
- ib);
923
+ parser->entity = entity;
924
+
925
+ ring = to_amdgpu_ring(entity->rq->sched);
926
+ r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
927
+ chunk_ib->ib_bytes : 0,
928
+ AMDGPU_IB_POOL_DELAYED, ib);
1034929 if (r) {
1035930 DRM_ERROR("Failed to get ib !\n");
1036931 return r;
....@@ -1043,13 +938,12 @@
1043938 j++;
1044939 }
1045940
1046
- /* UVD & VCE fw doesn't support user fences */
1047
- if (parser->job->uf_addr && (
1048
- parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
1049
- parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
941
+ /* MM engine doesn't support user fences */
942
+ ring = to_amdgpu_ring(parser->entity->rq->sched);
943
+ if (parser->job->uf_addr && ring->funcs->no_user_fence)
1050944 return -EINVAL;
1051945
1052
- return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx);
946
+ return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
1053947 }
1054948
1055949 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
....@@ -1065,51 +959,62 @@
1065959 sizeof(struct drm_amdgpu_cs_chunk_dep);
1066960
1067961 for (i = 0; i < num_deps; ++i) {
1068
- struct amdgpu_ring *ring;
1069962 struct amdgpu_ctx *ctx;
963
+ struct drm_sched_entity *entity;
1070964 struct dma_fence *fence;
1071965
1072966 ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
1073967 if (ctx == NULL)
1074968 return -EINVAL;
1075969
1076
- r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
1077
- deps[i].ip_type,
1078
- deps[i].ip_instance,
1079
- deps[i].ring, &ring);
970
+ r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
971
+ deps[i].ip_instance,
972
+ deps[i].ring, &entity);
1080973 if (r) {
1081974 amdgpu_ctx_put(ctx);
1082975 return r;
1083976 }
1084977
1085
- fence = amdgpu_ctx_get_fence(ctx, ring,
1086
- deps[i].handle);
1087
- if (IS_ERR(fence)) {
1088
- r = PTR_ERR(fence);
1089
- amdgpu_ctx_put(ctx);
1090
- return r;
1091
- } else if (fence) {
1092
- r = amdgpu_sync_fence(p->adev, &p->job->sync, fence,
1093
- true);
1094
- dma_fence_put(fence);
1095
- amdgpu_ctx_put(ctx);
1096
- if (r)
1097
- return r;
978
+ fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
979
+ amdgpu_ctx_put(ctx);
980
+
981
+ if (IS_ERR(fence))
982
+ return PTR_ERR(fence);
983
+ else if (!fence)
984
+ continue;
985
+
986
+ if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
987
+ struct drm_sched_fence *s_fence;
988
+ struct dma_fence *old = fence;
989
+
990
+ s_fence = to_drm_sched_fence(fence);
991
+ fence = dma_fence_get(&s_fence->scheduled);
992
+ dma_fence_put(old);
1098993 }
994
+
995
+ r = amdgpu_sync_fence(&p->job->sync, fence);
996
+ dma_fence_put(fence);
997
+ if (r)
998
+ return r;
1099999 }
11001000 return 0;
11011001 }
11021002
11031003 static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
1104
- uint32_t handle)
1004
+ uint32_t handle, u64 point,
1005
+ u64 flags)
11051006 {
1106
- int r;
11071007 struct dma_fence *fence;
1108
- r = drm_syncobj_find_fence(p->filp, handle, &fence);
1109
- if (r)
1110
- return r;
1008
+ int r;
11111009
1112
- r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
1010
+ r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
1011
+ if (r) {
1012
+ DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
1013
+ handle, point, r);
1014
+ return r;
1015
+ }
1016
+
1017
+ r = amdgpu_sync_fence(&p->job->sync, fence);
11131018 dma_fence_put(fence);
11141019
11151020 return r;
....@@ -1118,46 +1023,123 @@
11181023 static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
11191024 struct amdgpu_cs_chunk *chunk)
11201025 {
1026
+ struct drm_amdgpu_cs_chunk_sem *deps;
11211027 unsigned num_deps;
11221028 int i, r;
1123
- struct drm_amdgpu_cs_chunk_sem *deps;
11241029
11251030 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
11261031 num_deps = chunk->length_dw * 4 /
11271032 sizeof(struct drm_amdgpu_cs_chunk_sem);
1128
-
11291033 for (i = 0; i < num_deps; ++i) {
1130
- r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
1034
+ r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
1035
+ 0, 0);
11311036 if (r)
11321037 return r;
11331038 }
1039
+
1040
+ return 0;
1041
+}
1042
+
1043
+
1044
+static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
1045
+ struct amdgpu_cs_chunk *chunk)
1046
+{
1047
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1048
+ unsigned num_deps;
1049
+ int i, r;
1050
+
1051
+ syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1052
+ num_deps = chunk->length_dw * 4 /
1053
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1054
+ for (i = 0; i < num_deps; ++i) {
1055
+ r = amdgpu_syncobj_lookup_and_add_to_sync(p,
1056
+ syncobj_deps[i].handle,
1057
+ syncobj_deps[i].point,
1058
+ syncobj_deps[i].flags);
1059
+ if (r)
1060
+ return r;
1061
+ }
1062
+
11341063 return 0;
11351064 }
11361065
11371066 static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
11381067 struct amdgpu_cs_chunk *chunk)
11391068 {
1069
+ struct drm_amdgpu_cs_chunk_sem *deps;
11401070 unsigned num_deps;
11411071 int i;
1142
- struct drm_amdgpu_cs_chunk_sem *deps;
1072
+
11431073 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
11441074 num_deps = chunk->length_dw * 4 /
11451075 sizeof(struct drm_amdgpu_cs_chunk_sem);
11461076
1147
- p->post_dep_syncobjs = kmalloc_array(num_deps,
1148
- sizeof(struct drm_syncobj *),
1149
- GFP_KERNEL);
1150
- p->num_post_dep_syncobjs = 0;
1077
+ if (p->post_deps)
1078
+ return -EINVAL;
11511079
1152
- if (!p->post_dep_syncobjs)
1080
+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1081
+ GFP_KERNEL);
1082
+ p->num_post_deps = 0;
1083
+
1084
+ if (!p->post_deps)
1085
+ return -ENOMEM;
1086
+
1087
+
1088
+ for (i = 0; i < num_deps; ++i) {
1089
+ p->post_deps[i].syncobj =
1090
+ drm_syncobj_find(p->filp, deps[i].handle);
1091
+ if (!p->post_deps[i].syncobj)
1092
+ return -EINVAL;
1093
+ p->post_deps[i].chain = NULL;
1094
+ p->post_deps[i].point = 0;
1095
+ p->num_post_deps++;
1096
+ }
1097
+
1098
+ return 0;
1099
+}
1100
+
1101
+
1102
+static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
1103
+ struct amdgpu_cs_chunk *chunk)
1104
+{
1105
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1106
+ unsigned num_deps;
1107
+ int i;
1108
+
1109
+ syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1110
+ num_deps = chunk->length_dw * 4 /
1111
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1112
+
1113
+ if (p->post_deps)
1114
+ return -EINVAL;
1115
+
1116
+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1117
+ GFP_KERNEL);
1118
+ p->num_post_deps = 0;
1119
+
1120
+ if (!p->post_deps)
11531121 return -ENOMEM;
11541122
11551123 for (i = 0; i < num_deps; ++i) {
1156
- p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
1157
- if (!p->post_dep_syncobjs[i])
1124
+ struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
1125
+
1126
+ dep->chain = NULL;
1127
+ if (syncobj_deps[i].point) {
1128
+ dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
1129
+ if (!dep->chain)
1130
+ return -ENOMEM;
1131
+ }
1132
+
1133
+ dep->syncobj = drm_syncobj_find(p->filp,
1134
+ syncobj_deps[i].handle);
1135
+ if (!dep->syncobj) {
1136
+ kfree(dep->chain);
11581137 return -EINVAL;
1159
- p->num_post_dep_syncobjs++;
1138
+ }
1139
+ dep->point = syncobj_deps[i].point;
1140
+ p->num_post_deps++;
11601141 }
1142
+
11611143 return 0;
11621144 }
11631145
....@@ -1171,18 +1153,33 @@
11711153
11721154 chunk = &p->chunks[i];
11731155
1174
- if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
1156
+ switch (chunk->chunk_id) {
1157
+ case AMDGPU_CHUNK_ID_DEPENDENCIES:
1158
+ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
11751159 r = amdgpu_cs_process_fence_dep(p, chunk);
11761160 if (r)
11771161 return r;
1178
- } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
1162
+ break;
1163
+ case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
11791164 r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
11801165 if (r)
11811166 return r;
1182
- } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
1167
+ break;
1168
+ case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
11831169 r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
11841170 if (r)
11851171 return r;
1172
+ break;
1173
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
1174
+ r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
1175
+ if (r)
1176
+ return r;
1177
+ break;
1178
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
1179
+ r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
1180
+ if (r)
1181
+ return r;
1182
+ break;
11861183 }
11871184 }
11881185
....@@ -1193,53 +1190,58 @@
11931190 {
11941191 int i;
11951192
1196
- for (i = 0; i < p->num_post_dep_syncobjs; ++i)
1197
- drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
1193
+ for (i = 0; i < p->num_post_deps; ++i) {
1194
+ if (p->post_deps[i].chain && p->post_deps[i].point) {
1195
+ drm_syncobj_add_point(p->post_deps[i].syncobj,
1196
+ p->post_deps[i].chain,
1197
+ p->fence, p->post_deps[i].point);
1198
+ p->post_deps[i].chain = NULL;
1199
+ } else {
1200
+ drm_syncobj_replace_fence(p->post_deps[i].syncobj,
1201
+ p->fence);
1202
+ }
1203
+ }
11981204 }
11991205
12001206 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
12011207 union drm_amdgpu_cs *cs)
12021208 {
12031209 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1204
- struct amdgpu_ring *ring = p->ring;
1205
- struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
1206
- enum drm_sched_priority priority;
1210
+ struct drm_sched_entity *entity = p->entity;
12071211 struct amdgpu_bo_list_entry *e;
12081212 struct amdgpu_job *job;
12091213 uint64_t seq;
1210
-
12111214 int r;
12121215
12131216 job = p->job;
12141217 p->job = NULL;
12151218
1216
- r = drm_sched_job_init(&job->base, entity, p->filp);
1219
+ r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
12171220 if (r)
12181221 goto error_unlock;
12191222
1220
- /* No memory allocation is allowed while holding the mn lock */
1221
- amdgpu_mn_lock(p->mn);
1222
- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1223
- struct amdgpu_bo *bo = e->robj;
1223
+ /* No memory allocation is allowed while holding the notifier lock.
1224
+ * The lock is held until amdgpu_cs_submit is finished and fence is
1225
+ * added to BOs.
1226
+ */
1227
+ mutex_lock(&p->adev->notifier_lock);
12241228
1225
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
1226
- r = -ERESTARTSYS;
1227
- goto error_abort;
1228
- }
1229
+ /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
1230
+ * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
1231
+ */
1232
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1233
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1234
+
1235
+ r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1236
+ }
1237
+ if (r) {
1238
+ r = -EAGAIN;
1239
+ goto error_abort;
12291240 }
12301241
1231
- job->owner = p->filp;
12321242 p->fence = dma_fence_get(&job->base.s_fence->finished);
12331243
1234
- r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
1235
- if (r) {
1236
- dma_fence_put(p->fence);
1237
- dma_fence_put(&job->base.s_fence->finished);
1238
- amdgpu_job_free(job);
1239
- amdgpu_mn_unlock(p->mn);
1240
- return r;
1241
- }
1242
-
1244
+ amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
12431245 amdgpu_cs_post_dependencies(p);
12441246
12451247 if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
....@@ -1255,34 +1257,45 @@
12551257
12561258 trace_amdgpu_cs_ioctl(job);
12571259 amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
1258
- priority = job->base.s_priority;
12591260 drm_sched_entity_push_job(&job->base, entity);
12601261
1261
- ring = to_amdgpu_ring(entity->rq->sched);
1262
- amdgpu_ring_priority_get(ring, priority);
1262
+ amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
12631263
12641264 ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1265
- amdgpu_mn_unlock(p->mn);
1265
+ mutex_unlock(&p->adev->notifier_lock);
12661266
12671267 return 0;
12681268
12691269 error_abort:
1270
- dma_fence_put(&job->base.s_fence->finished);
1271
- job->base.s_fence = NULL;
1272
- amdgpu_mn_unlock(p->mn);
1270
+ drm_sched_job_cleanup(&job->base);
1271
+ mutex_unlock(&p->adev->notifier_lock);
12731272
12741273 error_unlock:
12751274 amdgpu_job_free(job);
12761275 return r;
12771276 }
12781277
1278
+static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
1279
+{
1280
+ int i;
1281
+
1282
+ if (!trace_amdgpu_cs_enabled())
1283
+ return;
1284
+
1285
+ for (i = 0; i < parser->job->num_ibs; i++)
1286
+ trace_amdgpu_cs(parser, i);
1287
+}
1288
+
12791289 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
12801290 {
1281
- struct amdgpu_device *adev = dev->dev_private;
1291
+ struct amdgpu_device *adev = drm_to_adev(dev);
12821292 union drm_amdgpu_cs *cs = data;
12831293 struct amdgpu_cs_parser parser = {};
12841294 bool reserved_buffers = false;
1285
- int i, r;
1295
+ int r;
1296
+
1297
+ if (amdgpu_ras_intr_triggered())
1298
+ return -EHWPOISON;
12861299
12871300 if (!adev->accel_working)
12881301 return -EBUSY;
....@@ -1292,7 +1305,8 @@
12921305
12931306 r = amdgpu_cs_parser_init(&parser, data);
12941307 if (r) {
1295
- DRM_ERROR("Failed to initialize parser !\n");
1308
+ if (printk_ratelimit())
1309
+ DRM_ERROR("Failed to initialize parser %d!\n", r);
12961310 goto out;
12971311 }
12981312
....@@ -1300,27 +1314,26 @@
13001314 if (r)
13011315 goto out;
13021316
1303
- r = amdgpu_cs_parser_bos(&parser, data);
1304
- if (r) {
1305
- if (r == -ENOMEM)
1306
- DRM_ERROR("Not enough memory for command submission!\n");
1307
- else if (r != -ERESTARTSYS)
1308
- DRM_ERROR("Failed to process the buffer list %d!\n", r);
1309
- goto out;
1310
- }
1311
-
1312
- reserved_buffers = true;
1313
-
13141317 r = amdgpu_cs_dependencies(adev, &parser);
13151318 if (r) {
13161319 DRM_ERROR("Failed in the dependencies handling %d!\n", r);
13171320 goto out;
13181321 }
13191322
1320
- for (i = 0; i < parser.job->num_ibs; i++)
1321
- trace_amdgpu_cs(&parser, i);
1323
+ r = amdgpu_cs_parser_bos(&parser, data);
1324
+ if (r) {
1325
+ if (r == -ENOMEM)
1326
+ DRM_ERROR("Not enough memory for command submission!\n");
1327
+ else if (r != -ERESTARTSYS && r != -EAGAIN)
1328
+ DRM_ERROR("Failed to process the buffer list %d!\n", r);
1329
+ goto out;
1330
+ }
13221331
1323
- r = amdgpu_cs_ib_vm_chunk(adev, &parser);
1332
+ reserved_buffers = true;
1333
+
1334
+ trace_amdgpu_cs_ibs(&parser);
1335
+
1336
+ r = amdgpu_cs_vm_handling(&parser);
13241337 if (r)
13251338 goto out;
13261339
....@@ -1328,6 +1341,7 @@
13281341
13291342 out:
13301343 amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
1344
+
13311345 return r;
13321346 }
13331347
....@@ -1344,9 +1358,8 @@
13441358 struct drm_file *filp)
13451359 {
13461360 union drm_amdgpu_wait_cs *wait = data;
1347
- struct amdgpu_device *adev = dev->dev_private;
13481361 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1349
- struct amdgpu_ring *ring = NULL;
1362
+ struct drm_sched_entity *entity;
13501363 struct amdgpu_ctx *ctx;
13511364 struct dma_fence *fence;
13521365 long r;
....@@ -1355,15 +1368,14 @@
13551368 if (ctx == NULL)
13561369 return -EINVAL;
13571370
1358
- r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
1359
- wait->in.ip_type, wait->in.ip_instance,
1360
- wait->in.ring, &ring);
1371
+ r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
1372
+ wait->in.ring, &entity);
13611373 if (r) {
13621374 amdgpu_ctx_put(ctx);
13631375 return r;
13641376 }
13651377
1366
- fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
1378
+ fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
13671379 if (IS_ERR(fence))
13681380 r = PTR_ERR(fence);
13691381 else if (fence) {
....@@ -1395,7 +1407,7 @@
13951407 struct drm_file *filp,
13961408 struct drm_amdgpu_fence *user)
13971409 {
1398
- struct amdgpu_ring *ring;
1410
+ struct drm_sched_entity *entity;
13991411 struct amdgpu_ctx *ctx;
14001412 struct dma_fence *fence;
14011413 int r;
....@@ -1404,14 +1416,14 @@
14041416 if (ctx == NULL)
14051417 return ERR_PTR(-EINVAL);
14061418
1407
- r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
1408
- user->ip_instance, user->ring, &ring);
1419
+ r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
1420
+ user->ring, &entity);
14091421 if (r) {
14101422 amdgpu_ctx_put(ctx);
14111423 return ERR_PTR(r);
14121424 }
14131425
1414
- fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
1426
+ fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
14151427 amdgpu_ctx_put(ctx);
14161428
14171429 return fence;
....@@ -1420,7 +1432,7 @@
14201432 int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
14211433 struct drm_file *filp)
14221434 {
1423
- struct amdgpu_device *adev = dev->dev_private;
1435
+ struct amdgpu_device *adev = drm_to_adev(dev);
14241436 union drm_amdgpu_fence_to_handle *info = data;
14251437 struct dma_fence *fence;
14261438 struct drm_syncobj *syncobj;
....@@ -1430,6 +1442,9 @@
14301442 fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
14311443 if (IS_ERR(fence))
14321444 return PTR_ERR(fence);
1445
+
1446
+ if (!fence)
1447
+ fence = dma_fence_get_stub();
14331448
14341449 switch (info->in.what) {
14351450 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
....@@ -1469,6 +1484,7 @@
14691484 return 0;
14701485
14711486 default:
1487
+ dma_fence_put(fence);
14721488 return -EINVAL;
14731489 }
14741490 }
....@@ -1593,7 +1609,7 @@
15931609 int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
15941610 struct drm_file *filp)
15951611 {
1596
- struct amdgpu_device *adev = dev->dev_private;
1612
+ struct amdgpu_device *adev = drm_to_adev(dev);
15971613 union drm_amdgpu_wait_fences *wait = data;
15981614 uint32_t fence_count = wait->in.fence_count;
15991615 struct drm_amdgpu_fence *fences_user;
....@@ -1655,7 +1671,7 @@
16551671 *map = mapping;
16561672
16571673 /* Double check that the BO is reserved by this CS */
1658
- if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
1674
+ if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
16591675 return -EINVAL;
16601676
16611677 if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {