hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
....@@ -31,7 +31,8 @@
3131 */
3232 #include <linux/list.h>
3333 #include <linux/slab.h>
34
-#include <drm/drmP.h>
34
+#include <linux/dma-buf.h>
35
+
3536 #include <drm/amdgpu_drm.h>
3637 #include <drm/drm_cache.h>
3738 #include "amdgpu.h"
....@@ -50,18 +51,6 @@
5051 * uvd, etc. for kernel managed allocations used by the GPU.
5152 *
5253 */
53
-
54
-static bool amdgpu_bo_need_backup(struct amdgpu_device *adev)
55
-{
56
- if (adev->flags & AMD_IS_APU)
57
- return false;
58
-
59
- if (amdgpu_gpu_recovery == 0 ||
60
- (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))
61
- return false;
62
-
63
- return true;
64
-}
6554
6655 /**
6756 * amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting
....@@ -92,20 +81,19 @@
9281 if (bo->pin_count > 0)
9382 amdgpu_bo_subtract_pin_size(bo);
9483
95
- if (bo->kfd_bo)
96
- amdgpu_amdkfd_unreserve_system_memory_limit(bo);
97
-
9884 amdgpu_bo_kunmap(bo);
9985
100
- if (bo->gem_base.import_attach)
101
- drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg);
102
- drm_gem_object_release(&bo->gem_base);
103
- amdgpu_bo_unref(&bo->parent);
86
+ if (bo->tbo.base.import_attach)
87
+ drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg);
88
+ drm_gem_object_release(&bo->tbo.base);
89
+ /* in case amdgpu_device_recover_vram got NULL of bo->parent */
10490 if (!list_empty(&bo->shadow_list)) {
10591 mutex_lock(&adev->shadow_list_lock);
10692 list_del_init(&bo->shadow_list);
10793 mutex_unlock(&adev->shadow_list_lock);
10894 }
95
+ amdgpu_bo_unref(&bo->parent);
96
+
10997 kfree(bo->metadata);
11098 kfree(bo);
11199 }
....@@ -148,8 +136,8 @@
148136
149137 places[c].fpfn = 0;
150138 places[c].lpfn = 0;
151
- places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
152
- TTM_PL_FLAG_VRAM;
139
+ places[c].mem_type = TTM_PL_VRAM;
140
+ places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED;
153141
154142 if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
155143 places[c].lpfn = visible_pfn;
....@@ -163,11 +151,9 @@
163151
164152 if (domain & AMDGPU_GEM_DOMAIN_GTT) {
165153 places[c].fpfn = 0;
166
- if (flags & AMDGPU_GEM_CREATE_SHADOW)
167
- places[c].lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
168
- else
169
- places[c].lpfn = 0;
170
- places[c].flags = TTM_PL_FLAG_TT;
154
+ places[c].lpfn = 0;
155
+ places[c].mem_type = TTM_PL_TT;
156
+ places[c].flags = 0;
171157 if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
172158 places[c].flags |= TTM_PL_FLAG_WC |
173159 TTM_PL_FLAG_UNCACHED;
....@@ -179,7 +165,8 @@
179165 if (domain & AMDGPU_GEM_DOMAIN_CPU) {
180166 places[c].fpfn = 0;
181167 places[c].lpfn = 0;
182
- places[c].flags = TTM_PL_FLAG_SYSTEM;
168
+ places[c].mem_type = TTM_PL_SYSTEM;
169
+ places[c].flags = 0;
183170 if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
184171 places[c].flags |= TTM_PL_FLAG_WC |
185172 TTM_PL_FLAG_UNCACHED;
....@@ -191,28 +178,32 @@
191178 if (domain & AMDGPU_GEM_DOMAIN_GDS) {
192179 places[c].fpfn = 0;
193180 places[c].lpfn = 0;
194
- places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GDS;
181
+ places[c].mem_type = AMDGPU_PL_GDS;
182
+ places[c].flags = TTM_PL_FLAG_UNCACHED;
195183 c++;
196184 }
197185
198186 if (domain & AMDGPU_GEM_DOMAIN_GWS) {
199187 places[c].fpfn = 0;
200188 places[c].lpfn = 0;
201
- places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GWS;
189
+ places[c].mem_type = AMDGPU_PL_GWS;
190
+ places[c].flags = TTM_PL_FLAG_UNCACHED;
202191 c++;
203192 }
204193
205194 if (domain & AMDGPU_GEM_DOMAIN_OA) {
206195 places[c].fpfn = 0;
207196 places[c].lpfn = 0;
208
- places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_OA;
197
+ places[c].mem_type = AMDGPU_PL_OA;
198
+ places[c].flags = TTM_PL_FLAG_UNCACHED;
209199 c++;
210200 }
211201
212202 if (!c) {
213203 places[c].fpfn = 0;
214204 places[c].lpfn = 0;
215
- places[c].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
205
+ places[c].mem_type = TTM_PL_SYSTEM;
206
+ places[c].flags = TTM_PL_MASK_CACHING;
216207 c++;
217208 }
218209
....@@ -253,12 +244,18 @@
253244 bool free = false;
254245 int r;
255246
247
+ if (!size) {
248
+ amdgpu_bo_unref(bo_ptr);
249
+ return 0;
250
+ }
251
+
256252 memset(&bp, 0, sizeof(bp));
257253 bp.size = size;
258254 bp.byte_align = align;
259255 bp.domain = domain;
260
- bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
261
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
256
+ bp.flags = cpu_addr ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
257
+ : AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
258
+ bp.flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
262259 bp.type = ttm_bo_type_kernel;
263260 bp.resv = NULL;
264261
....@@ -346,9 +343,77 @@
346343 if (r)
347344 return r;
348345
349
- amdgpu_bo_unreserve(*bo_ptr);
346
+ if (*bo_ptr)
347
+ amdgpu_bo_unreserve(*bo_ptr);
350348
351349 return 0;
350
+}
351
+
352
+/**
353
+ * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location
354
+ *
355
+ * @adev: amdgpu device object
356
+ * @offset: offset of the BO
357
+ * @size: size of the BO
358
+ * @domain: where to place it
359
+ * @bo_ptr: used to initialize BOs in structures
360
+ * @cpu_addr: optional CPU address mapping
361
+ *
362
+ * Creates a kernel BO at a specific offset in the address space of the domain.
363
+ *
364
+ * Returns:
365
+ * 0 on success, negative error code otherwise.
366
+ */
367
+int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
368
+ uint64_t offset, uint64_t size, uint32_t domain,
369
+ struct amdgpu_bo **bo_ptr, void **cpu_addr)
370
+{
371
+ struct ttm_operation_ctx ctx = { false, false };
372
+ unsigned int i;
373
+ int r;
374
+
375
+ offset &= PAGE_MASK;
376
+ size = ALIGN(size, PAGE_SIZE);
377
+
378
+ r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr,
379
+ NULL, cpu_addr);
380
+ if (r)
381
+ return r;
382
+
383
+ if ((*bo_ptr) == NULL)
384
+ return 0;
385
+
386
+ /*
387
+ * Remove the original mem node and create a new one at the request
388
+ * position.
389
+ */
390
+ if (cpu_addr)
391
+ amdgpu_bo_kunmap(*bo_ptr);
392
+
393
+ ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.mem);
394
+
395
+ for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
396
+ (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
397
+ (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
398
+ }
399
+ r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
400
+ &(*bo_ptr)->tbo.mem, &ctx);
401
+ if (r)
402
+ goto error;
403
+
404
+ if (cpu_addr) {
405
+ r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
406
+ if (r)
407
+ goto error;
408
+ }
409
+
410
+ amdgpu_bo_unreserve(*bo_ptr);
411
+ return 0;
412
+
413
+error:
414
+ amdgpu_bo_unreserve(*bo_ptr);
415
+ amdgpu_bo_unref(bo_ptr);
416
+ return r;
352417 }
353418
354419 /**
....@@ -386,14 +451,14 @@
386451 static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
387452 unsigned long size, u32 domain)
388453 {
389
- struct ttm_mem_type_manager *man = NULL;
454
+ struct ttm_resource_manager *man = NULL;
390455
391456 /*
392457 * If GTT is part of requested domains the check must succeed to
393458 * allow fall back to GTT
394459 */
395460 if (domain & AMDGPU_GEM_DOMAIN_GTT) {
396
- man = &adev->mman.bdev.man[TTM_PL_TT];
461
+ man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
397462
398463 if (size < (man->size << PAGE_SHIFT))
399464 return true;
....@@ -402,7 +467,7 @@
402467 }
403468
404469 if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
405
- man = &adev->mman.bdev.man[TTM_PL_VRAM];
470
+ man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
406471
407472 if (size < (man->size << PAGE_SHIFT))
408473 return true;
....@@ -420,13 +485,47 @@
420485 return false;
421486 }
422487
488
+bool amdgpu_bo_support_uswc(u64 bo_flags)
489
+{
490
+
491
+#ifdef CONFIG_X86_32
492
+ /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
493
+ * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
494
+ */
495
+ return false;
496
+#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
497
+ /* Don't try to enable write-combining when it can't work, or things
498
+ * may be slow
499
+ * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
500
+ */
501
+
502
+#ifndef CONFIG_COMPILE_TEST
503
+#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
504
+ thanks to write-combining
505
+#endif
506
+
507
+ if (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
508
+ DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
509
+ "better performance thanks to write-combining\n");
510
+ return false;
511
+#else
512
+ /* For architectures that don't support WC memory,
513
+ * mask out the WC flag from the BO
514
+ */
515
+ if (!drm_arch_can_wc_memory())
516
+ return false;
517
+
518
+ return true;
519
+#endif
520
+}
521
+
423522 static int amdgpu_bo_do_create(struct amdgpu_device *adev,
424523 struct amdgpu_bo_param *bp,
425524 struct amdgpu_bo **bo_ptr)
426525 {
427526 struct ttm_operation_ctx ctx = {
428527 .interruptible = (bp->type != ttm_bo_type_kernel),
429
- .no_wait_gpu = false,
528
+ .no_wait_gpu = bp->no_wait_gpu,
430529 .resv = bp->resv,
431530 .flags = bp->type != ttm_bo_type_kernel ?
432531 TTM_OPT_FLAG_ALLOW_RES_EVICT : 0
....@@ -436,8 +535,20 @@
436535 size_t acc_size;
437536 int r;
438537
439
- page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
440
- size = ALIGN(size, PAGE_SIZE);
538
+ /* Note that GDS/GWS/OA allocates 1 page per byte/resource. */
539
+ if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
540
+ /* GWS and OA don't need any alignment. */
541
+ page_align = bp->byte_align;
542
+ size <<= PAGE_SHIFT;
543
+ } else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) {
544
+ /* Both size and alignment must be a multiple of 4. */
545
+ page_align = ALIGN(bp->byte_align, 4);
546
+ size = ALIGN(size, 4) << PAGE_SHIFT;
547
+ } else {
548
+ /* Memory should be aligned at least to a page size. */
549
+ page_align = ALIGN(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
550
+ size = ALIGN(size, PAGE_SIZE);
551
+ }
441552
442553 if (!amdgpu_bo_validate_size(adev, size, bp->domain))
443554 return -ENOMEM;
....@@ -450,9 +561,9 @@
450561 bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL);
451562 if (bo == NULL)
452563 return -ENOMEM;
453
- drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
564
+ drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size);
454565 INIT_LIST_HEAD(&bo->shadow_list);
455
- INIT_LIST_HEAD(&bo->va);
566
+ bo->vm_bo = NULL;
456567 bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
457568 bp->domain;
458569 bo->allowed_domains = bo->preferred_domains;
....@@ -462,36 +573,15 @@
462573
463574 bo->flags = bp->flags;
464575
465
-#ifdef CONFIG_X86_32
466
- /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
467
- * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
468
- */
469
- bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
470
-#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
471
- /* Don't try to enable write-combining when it can't work, or things
472
- * may be slow
473
- * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
474
- */
475
-
476
-#ifndef CONFIG_COMPILE_TEST
477
-#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
478
- thanks to write-combining
479
-#endif
480
-
481
- if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
482
- DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
483
- "better performance thanks to write-combining\n");
484
- bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
485
-#else
486
- /* For architectures that don't support WC memory,
487
- * mask out the WC flag from the BO
488
- */
489
- if (!drm_arch_can_wc_memory())
576
+ if (!amdgpu_bo_support_uswc(bo->flags))
490577 bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
491
-#endif
492578
493579 bo->tbo.bdev = &adev->mman.bdev;
494
- amdgpu_bo_placement_from_domain(bo, bp->domain);
580
+ if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
581
+ AMDGPU_GEM_DOMAIN_GDS))
582
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
583
+ else
584
+ amdgpu_bo_placement_from_domain(bo, bp->domain);
495585 if (bp->type == ttm_bo_type_kernel)
496586 bo->tbo.priority = 1;
497587
....@@ -510,10 +600,10 @@
510600 amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
511601
512602 if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
513
- bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
603
+ bo->tbo.mem.mem_type == TTM_PL_VRAM) {
514604 struct dma_fence *fence;
515605
516
- r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence);
606
+ r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence);
517607 if (unlikely(r))
518608 goto fail_unreserve;
519609
....@@ -536,13 +626,13 @@
536626
537627 fail_unreserve:
538628 if (!bp->resv)
539
- ww_mutex_unlock(&bo->tbo.resv->lock);
629
+ dma_resv_unlock(bo->tbo.base.resv);
540630 amdgpu_bo_unref(&bo);
541631 return r;
542632 }
543633
544634 static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
545
- unsigned long size, int byte_align,
635
+ unsigned long size,
546636 struct amdgpu_bo *bo)
547637 {
548638 struct amdgpu_bo_param bp;
....@@ -553,18 +643,17 @@
553643
554644 memset(&bp, 0, sizeof(bp));
555645 bp.size = size;
556
- bp.byte_align = byte_align;
557646 bp.domain = AMDGPU_GEM_DOMAIN_GTT;
558647 bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
559648 AMDGPU_GEM_CREATE_SHADOW;
560649 bp.type = ttm_bo_type_kernel;
561
- bp.resv = bo->tbo.resv;
650
+ bp.resv = bo->tbo.base.resv;
562651
563652 r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
564653 if (!r) {
565654 bo->shadow->parent = amdgpu_bo_ref(bo);
566655 mutex_lock(&adev->shadow_list_lock);
567
- list_add_tail(&bo->shadow_list, &adev->shadow_list);
656
+ list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list);
568657 mutex_unlock(&adev->shadow_list_lock);
569658 }
570659
....@@ -597,67 +686,20 @@
597686 if (r)
598687 return r;
599688
600
- if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_bo_need_backup(adev)) {
689
+ if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) {
601690 if (!bp->resv)
602
- WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
691
+ WARN_ON(dma_resv_lock((*bo_ptr)->tbo.base.resv,
603692 NULL));
604693
605
- r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr));
694
+ r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr);
606695
607696 if (!bp->resv)
608
- reservation_object_unlock((*bo_ptr)->tbo.resv);
697
+ dma_resv_unlock((*bo_ptr)->tbo.base.resv);
609698
610699 if (r)
611700 amdgpu_bo_unref(bo_ptr);
612701 }
613702
614
- return r;
615
-}
616
-
617
-/**
618
- * amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object
619
- * @adev: amdgpu device object
620
- * @ring: amdgpu_ring for the engine handling the buffer operations
621
- * @bo: &amdgpu_bo buffer to be backed up
622
- * @resv: reservation object with embedded fence
623
- * @fence: dma_fence associated with the operation
624
- * @direct: whether to submit the job directly
625
- *
626
- * Copies an &amdgpu_bo buffer object to its shadow object.
627
- * Not used for now.
628
- *
629
- * Returns:
630
- * 0 for success or a negative error code on failure.
631
- */
632
-int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
633
- struct amdgpu_ring *ring,
634
- struct amdgpu_bo *bo,
635
- struct reservation_object *resv,
636
- struct dma_fence **fence,
637
- bool direct)
638
-
639
-{
640
- struct amdgpu_bo *shadow = bo->shadow;
641
- uint64_t bo_addr, shadow_addr;
642
- int r;
643
-
644
- if (!shadow)
645
- return -EINVAL;
646
-
647
- bo_addr = amdgpu_bo_gpu_offset(bo);
648
- shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
649
-
650
- r = reservation_object_reserve_shared(bo->tbo.resv);
651
- if (r)
652
- goto err;
653
-
654
- r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr,
655
- amdgpu_bo_size(bo), resv, fence,
656
- direct, false);
657
- if (!r)
658
- amdgpu_bo_fence(bo, *fence, true);
659
-
660
-err:
661703 return r;
662704 }
663705
....@@ -696,13 +738,10 @@
696738 }
697739
698740 /**
699
- * amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object
700
- * @adev: amdgpu device object
701
- * @ring: amdgpu_ring for the engine handling the buffer operations
702
- * @bo: &amdgpu_bo buffer to be restored
703
- * @resv: reservation object with embedded fence
741
+ * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
742
+ *
743
+ * @shadow: &amdgpu_bo shadow to be restored
704744 * @fence: dma_fence associated with the operation
705
- * @direct: whether to submit the job directly
706745 *
707746 * Copies a buffer object's shadow content back to the object.
708747 * This is used for recovering a buffer from its shadow in case of a gpu
....@@ -711,36 +750,19 @@
711750 * Returns:
712751 * 0 for success or a negative error code on failure.
713752 */
714
-int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
715
- struct amdgpu_ring *ring,
716
- struct amdgpu_bo *bo,
717
- struct reservation_object *resv,
718
- struct dma_fence **fence,
719
- bool direct)
753
+int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
720754
721755 {
722
- struct amdgpu_bo *shadow = bo->shadow;
723
- uint64_t bo_addr, shadow_addr;
724
- int r;
756
+ struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
757
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
758
+ uint64_t shadow_addr, parent_addr;
725759
726
- if (!shadow)
727
- return -EINVAL;
760
+ shadow_addr = amdgpu_bo_gpu_offset(shadow);
761
+ parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
728762
729
- bo_addr = amdgpu_bo_gpu_offset(bo);
730
- shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
731
-
732
- r = reservation_object_reserve_shared(bo->tbo.resv);
733
- if (r)
734
- goto err;
735
-
736
- r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
737
- amdgpu_bo_size(bo), resv, fence,
738
- direct, false);
739
- if (!r)
740
- amdgpu_bo_fence(bo, *fence, true);
741
-
742
-err:
743
- return r;
763
+ return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
764
+ amdgpu_bo_size(shadow), NULL, fence,
765
+ true, false, false);
744766 }
745767
746768 /**
....@@ -769,7 +791,7 @@
769791 return 0;
770792 }
771793
772
- r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false,
794
+ r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, false, false,
773795 MAX_SCHEDULE_TIMEOUT);
774796 if (r < 0)
775797 return r;
....@@ -883,6 +905,10 @@
883905 if (WARN_ON_ONCE(min_offset > max_offset))
884906 return -EINVAL;
885907
908
+ /* Check domain to be pinned to against preferred domains */
909
+ if (bo->preferred_domains & domain)
910
+ domain = bo->preferred_domains & domain;
911
+
886912 /* A shared bo cannot be migrated to VRAM */
887913 if (bo->prime_shared_count) {
888914 if (domain & AMDGPU_GEM_DOMAIN_GTT)
....@@ -905,13 +931,17 @@
905931 bo->pin_count++;
906932
907933 if (max_offset != 0) {
908
- u64 domain_start = bo->tbo.bdev->man[mem_type].gpu_offset;
934
+ u64 domain_start = amdgpu_ttm_domain_start(adev,
935
+ mem_type);
909936 WARN_ON_ONCE(max_offset <
910937 (amdgpu_bo_gpu_offset(bo) - domain_start));
911938 }
912939
913940 return 0;
914941 }
942
+
943
+ if (bo->tbo.base.import_attach)
944
+ dma_buf_pin(bo->tbo.base.import_attach);
915945
916946 bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
917947 /* force to pin into visible video ram */
....@@ -986,7 +1016,7 @@
9861016 struct ttm_operation_ctx ctx = { false, false };
9871017 int r, i;
9881018
989
- if (!bo->pin_count) {
1019
+ if (WARN_ON_ONCE(!bo->pin_count)) {
9901020 dev_warn(adev->dev, "%p unpin not necessary\n", bo);
9911021 return 0;
9921022 }
....@@ -995,6 +1025,9 @@
9951025 return 0;
9961026
9971027 amdgpu_bo_subtract_pin_size(bo);
1028
+
1029
+ if (bo->tbo.base.import_attach)
1030
+ dma_buf_unpin(bo->tbo.base.import_attach);
9981031
9991032 for (i = 0; i < bo->placement.num_placement; i++) {
10001033 bo->placements[i].lpfn = 0;
....@@ -1020,10 +1053,12 @@
10201053 int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
10211054 {
10221055 /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
1023
- if (0 && (adev->flags & AMD_IS_APU)) {
1056
+#ifndef CONFIG_HIBERNATION
1057
+ if (adev->flags & AMD_IS_APU) {
10241058 /* Useless to evict on IGP chips */
10251059 return 0;
10261060 }
1061
+#endif
10271062 return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
10281063 }
10291064
....@@ -1037,6 +1072,7 @@
10371072 "HBM",
10381073 "DDR3",
10391074 "DDR4",
1075
+ "GDDR6",
10401076 };
10411077
10421078 /**
....@@ -1108,7 +1144,10 @@
11081144 int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
11091145 struct vm_area_struct *vma)
11101146 {
1111
- return ttm_fbdev_mmap(vma, &bo->tbo);
1147
+ if (vma->vm_pgoff != 0)
1148
+ return -EACCES;
1149
+
1150
+ return ttm_bo_mmap_obj(vma, &bo->tbo);
11121151 }
11131152
11141153 /**
....@@ -1144,7 +1183,7 @@
11441183 */
11451184 void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
11461185 {
1147
- lockdep_assert_held(&bo->tbo.resv->lock.base);
1186
+ dma_resv_assert_held(bo->tbo.base.resv);
11481187
11491188 if (tiling_flags)
11501189 *tiling_flags = bo->tiling_flags;
....@@ -1242,11 +1281,11 @@
12421281 */
12431282 void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
12441283 bool evict,
1245
- struct ttm_mem_reg *new_mem)
1284
+ struct ttm_resource *new_mem)
12461285 {
12471286 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
12481287 struct amdgpu_bo *abo;
1249
- struct ttm_mem_reg *old_mem = &bo->mem;
1288
+ struct ttm_resource *old_mem = &bo->mem;
12501289
12511290 if (!amdgpu_bo_is_amdgpu_bo(bo))
12521291 return;
....@@ -1255,6 +1294,10 @@
12551294 amdgpu_vm_bo_invalidate(adev, abo, evict);
12561295
12571296 amdgpu_bo_kunmap(abo);
1297
+
1298
+ if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach &&
1299
+ bo->mem.mem_type != TTM_PL_SYSTEM)
1300
+ dma_buf_move_notify(abo->tbo.base.dma_buf);
12581301
12591302 /* remember the eviction */
12601303 if (evict)
....@@ -1266,6 +1309,49 @@
12661309
12671310 /* move_notify is called before move happens */
12681311 trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
1312
+}
1313
+
1314
+/**
1315
+ * amdgpu_bo_release_notify - notification about a BO being released
1316
+ * @bo: pointer to a buffer object
1317
+ *
1318
+ * Wipes VRAM buffers whose contents should not be leaked before the
1319
+ * memory is released.
1320
+ */
1321
+void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
1322
+{
1323
+ struct dma_fence *fence = NULL;
1324
+ struct amdgpu_bo *abo;
1325
+ int r;
1326
+
1327
+ if (!amdgpu_bo_is_amdgpu_bo(bo))
1328
+ return;
1329
+
1330
+ abo = ttm_to_amdgpu_bo(bo);
1331
+
1332
+ if (abo->kfd_bo)
1333
+ amdgpu_amdkfd_unreserve_memory_limit(abo);
1334
+
1335
+ /* We only remove the fence if the resv has individualized. */
1336
+ WARN_ON_ONCE(bo->type == ttm_bo_type_kernel
1337
+ && bo->base.resv != &bo->base._resv);
1338
+ if (bo->base.resv == &bo->base._resv)
1339
+ amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
1340
+
1341
+ if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
1342
+ !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
1343
+ return;
1344
+
1345
+ if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
1346
+ return;
1347
+
1348
+ r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
1349
+ if (!WARN_ON(r)) {
1350
+ amdgpu_bo_fence(abo, fence, false);
1351
+ dma_fence_put(fence);
1352
+ }
1353
+
1354
+ dma_resv_unlock(bo->base.resv);
12691355 }
12701356
12711357 /**
....@@ -1340,12 +1426,58 @@
13401426 void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
13411427 bool shared)
13421428 {
1343
- struct reservation_object *resv = bo->tbo.resv;
1429
+ struct dma_resv *resv = bo->tbo.base.resv;
13441430
13451431 if (shared)
1346
- reservation_object_add_shared_fence(resv, fence);
1432
+ dma_resv_add_shared_fence(resv, fence);
13471433 else
1348
- reservation_object_add_excl_fence(resv, fence);
1434
+ dma_resv_add_excl_fence(resv, fence);
1435
+}
1436
+
1437
+/**
1438
+ * amdgpu_bo_sync_wait_resv - Wait for BO reservation fences
1439
+ *
1440
+ * @adev: amdgpu device pointer
1441
+ * @resv: reservation object to sync to
1442
+ * @sync_mode: synchronization mode
1443
+ * @owner: fence owner
1444
+ * @intr: Whether the wait is interruptible
1445
+ *
1446
+ * Extract the fences from the reservation object and waits for them to finish.
1447
+ *
1448
+ * Returns:
1449
+ * 0 on success, errno otherwise.
1450
+ */
1451
+int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv,
1452
+ enum amdgpu_sync_mode sync_mode, void *owner,
1453
+ bool intr)
1454
+{
1455
+ struct amdgpu_sync sync;
1456
+ int r;
1457
+
1458
+ amdgpu_sync_create(&sync);
1459
+ amdgpu_sync_resv(adev, &sync, resv, sync_mode, owner);
1460
+ r = amdgpu_sync_wait(&sync, intr);
1461
+ amdgpu_sync_free(&sync);
1462
+ return r;
1463
+}
1464
+
1465
+/**
1466
+ * amdgpu_bo_sync_wait - Wrapper for amdgpu_bo_sync_wait_resv
1467
+ * @bo: buffer object to wait for
1468
+ * @owner: fence owner
1469
+ * @intr: Whether the wait is interruptible
1470
+ *
1471
+ * Wrapper to wait for fences in a BO.
1472
+ * Returns:
1473
+ * 0 on success, errno otherwise.
1474
+ */
1475
+int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr)
1476
+{
1477
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
1478
+
1479
+ return amdgpu_bo_sync_wait_resv(adev, bo->tbo.base.resv,
1480
+ AMDGPU_SYNC_NE_OWNER, owner, intr);
13491481 }
13501482
13511483 /**
....@@ -1361,15 +1493,31 @@
13611493 u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
13621494 {
13631495 WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
1364
- WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
1365
- !amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem));
1366
- WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
1367
- !bo->pin_count);
1496
+ WARN_ON_ONCE(!dma_resv_is_locked(bo->tbo.base.resv) &&
1497
+ !bo->pin_count && bo->tbo.type != ttm_bo_type_kernel);
13681498 WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
13691499 WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
13701500 !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
13711501
1372
- return bo->tbo.offset;
1502
+ return amdgpu_bo_gpu_offset_no_check(bo);
1503
+}
1504
+
1505
+/**
1506
+ * amdgpu_bo_gpu_offset_no_check - return GPU offset of bo
1507
+ * @bo: amdgpu object for which we query the offset
1508
+ *
1509
+ * Returns:
1510
+ * current GPU offset of the object without raising warnings.
1511
+ */
1512
+u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
1513
+{
1514
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
1515
+ uint64_t offset;
1516
+
1517
+ offset = (bo->tbo.mem.start << PAGE_SHIFT) +
1518
+ amdgpu_ttm_domain_start(adev, bo->tbo.mem.mem_type);
1519
+
1520
+ return amdgpu_gmc_sign_extend(offset);
13731521 }
13741522
13751523 /**