hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
....@@ -29,193 +29,50 @@
2929 * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
3030 * Dave Airlie
3131 */
32
+
33
+#include <linux/dma-mapping.h>
34
+#include <linux/iommu.h>
35
+#include <linux/hmm.h>
36
+#include <linux/pagemap.h>
37
+#include <linux/sched/task.h>
38
+#include <linux/sched/mm.h>
39
+#include <linux/seq_file.h>
40
+#include <linux/slab.h>
41
+#include <linux/swap.h>
42
+#include <linux/swiotlb.h>
43
+#include <linux/dma-buf.h>
44
+#include <linux/sizes.h>
45
+
3246 #include <drm/ttm/ttm_bo_api.h>
3347 #include <drm/ttm/ttm_bo_driver.h>
3448 #include <drm/ttm/ttm_placement.h>
3549 #include <drm/ttm/ttm_module.h>
3650 #include <drm/ttm/ttm_page_alloc.h>
37
-#include <drm/drmP.h>
51
+
52
+#include <drm/drm_debugfs.h>
3853 #include <drm/amdgpu_drm.h>
39
-#include <linux/seq_file.h>
40
-#include <linux/slab.h>
41
-#include <linux/swiotlb.h>
42
-#include <linux/swap.h>
43
-#include <linux/pagemap.h>
44
-#include <linux/debugfs.h>
45
-#include <linux/iommu.h>
54
+
4655 #include "amdgpu.h"
4756 #include "amdgpu_object.h"
4857 #include "amdgpu_trace.h"
4958 #include "amdgpu_amdkfd.h"
59
+#include "amdgpu_sdma.h"
60
+#include "amdgpu_ras.h"
61
+#include "amdgpu_atomfirmware.h"
5062 #include "bif/bif_4_1_d.h"
5163
52
-#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
64
+#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128
5365
54
-static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
55
- struct ttm_mem_reg *mem, unsigned num_pages,
56
- uint64_t offset, unsigned window,
57
- struct amdgpu_ring *ring,
58
- uint64_t *addr);
66
+static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev,
67
+ struct ttm_tt *ttm,
68
+ struct ttm_resource *bo_mem);
5969
60
-static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
61
-static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
62
-
63
-/*
64
- * Global memory.
65
- */
66
-
67
-/**
68
- * amdgpu_ttm_mem_global_init - Initialize and acquire reference to
69
- * memory object
70
- *
71
- * @ref: Object for initialization.
72
- *
73
- * This is called by drm_global_item_ref() when an object is being
74
- * initialized.
75
- */
76
-static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
70
+static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
71
+ unsigned int type,
72
+ uint64_t size_in_page)
7773 {
78
- return ttm_mem_global_init(ref->object);
79
-}
80
-
81
-/**
82
- * amdgpu_ttm_mem_global_release - Drop reference to a memory object
83
- *
84
- * @ref: Object being removed
85
- *
86
- * This is called by drm_global_item_unref() when an object is being
87
- * released.
88
- */
89
-static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
90
-{
91
- ttm_mem_global_release(ref->object);
92
-}
93
-
94
-/**
95
- * amdgpu_ttm_global_init - Initialize global TTM memory reference structures.
96
- *
97
- * @adev: AMDGPU device for which the global structures need to be registered.
98
- *
99
- * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()
100
- * during bring up.
101
- */
102
-static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
103
-{
104
- struct drm_global_reference *global_ref;
105
- int r;
106
-
107
- /* ensure reference is false in case init fails */
108
- adev->mman.mem_global_referenced = false;
109
-
110
- global_ref = &adev->mman.mem_global_ref;
111
- global_ref->global_type = DRM_GLOBAL_TTM_MEM;
112
- global_ref->size = sizeof(struct ttm_mem_global);
113
- global_ref->init = &amdgpu_ttm_mem_global_init;
114
- global_ref->release = &amdgpu_ttm_mem_global_release;
115
- r = drm_global_item_ref(global_ref);
116
- if (r) {
117
- DRM_ERROR("Failed setting up TTM memory accounting "
118
- "subsystem.\n");
119
- goto error_mem;
120
- }
121
-
122
- adev->mman.bo_global_ref.mem_glob =
123
- adev->mman.mem_global_ref.object;
124
- global_ref = &adev->mman.bo_global_ref.ref;
125
- global_ref->global_type = DRM_GLOBAL_TTM_BO;
126
- global_ref->size = sizeof(struct ttm_bo_global);
127
- global_ref->init = &ttm_bo_global_init;
128
- global_ref->release = &ttm_bo_global_release;
129
- r = drm_global_item_ref(global_ref);
130
- if (r) {
131
- DRM_ERROR("Failed setting up TTM BO subsystem.\n");
132
- goto error_bo;
133
- }
134
-
135
- mutex_init(&adev->mman.gtt_window_lock);
136
-
137
- adev->mman.mem_global_referenced = true;
138
-
139
- return 0;
140
-
141
-error_bo:
142
- drm_global_item_unref(&adev->mman.mem_global_ref);
143
-error_mem:
144
- return r;
145
-}
146
-
147
-static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
148
-{
149
- if (adev->mman.mem_global_referenced) {
150
- mutex_destroy(&adev->mman.gtt_window_lock);
151
- drm_global_item_unref(&adev->mman.bo_global_ref.ref);
152
- drm_global_item_unref(&adev->mman.mem_global_ref);
153
- adev->mman.mem_global_referenced = false;
154
- }
155
-}
156
-
157
-static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
158
-{
159
- return 0;
160
-}
161
-
162
-/**
163
- * amdgpu_init_mem_type - Initialize a memory manager for a specific type of
164
- * memory request.
165
- *
166
- * @bdev: The TTM BO device object (contains a reference to amdgpu_device)
167
- * @type: The type of memory requested
168
- * @man: The memory type manager for each domain
169
- *
170
- * This is called by ttm_bo_init_mm() when a buffer object is being
171
- * initialized.
172
- */
173
-static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
174
- struct ttm_mem_type_manager *man)
175
-{
176
- struct amdgpu_device *adev;
177
-
178
- adev = amdgpu_ttm_adev(bdev);
179
-
180
- switch (type) {
181
- case TTM_PL_SYSTEM:
182
- /* System memory */
183
- man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
184
- man->available_caching = TTM_PL_MASK_CACHING;
185
- man->default_caching = TTM_PL_FLAG_CACHED;
186
- break;
187
- case TTM_PL_TT:
188
- /* GTT memory */
189
- man->func = &amdgpu_gtt_mgr_func;
190
- man->gpu_offset = adev->gmc.gart_start;
191
- man->available_caching = TTM_PL_MASK_CACHING;
192
- man->default_caching = TTM_PL_FLAG_CACHED;
193
- man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
194
- break;
195
- case TTM_PL_VRAM:
196
- /* "On-card" video ram */
197
- man->func = &amdgpu_vram_mgr_func;
198
- man->gpu_offset = adev->gmc.vram_start;
199
- man->flags = TTM_MEMTYPE_FLAG_FIXED |
200
- TTM_MEMTYPE_FLAG_MAPPABLE;
201
- man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
202
- man->default_caching = TTM_PL_FLAG_WC;
203
- break;
204
- case AMDGPU_PL_GDS:
205
- case AMDGPU_PL_GWS:
206
- case AMDGPU_PL_OA:
207
- /* On-chip GDS memory*/
208
- man->func = &ttm_bo_manager_func;
209
- man->gpu_offset = 0;
210
- man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_CMA;
211
- man->available_caching = TTM_PL_FLAG_UNCACHED;
212
- man->default_caching = TTM_PL_FLAG_UNCACHED;
213
- break;
214
- default:
215
- DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
216
- return -EINVAL;
217
- }
218
- return 0;
74
+ return ttm_range_man_init(&adev->mman.bdev, type,
75
+ false, size_in_page);
21976 }
22077
22178 /**
....@@ -234,7 +91,8 @@
23491 static const struct ttm_place placements = {
23592 .fpfn = 0,
23693 .lpfn = 0,
237
- .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
94
+ .mem_type = TTM_PL_SYSTEM,
95
+ .flags = TTM_PL_MASK_CACHING
23896 };
23997
24098 /* Don't handle scatter gather BOs */
....@@ -255,6 +113,13 @@
255113
256114 abo = ttm_to_amdgpu_bo(bo);
257115 switch (bo->mem.mem_type) {
116
+ case AMDGPU_PL_GDS:
117
+ case AMDGPU_PL_GWS:
118
+ case AMDGPU_PL_OA:
119
+ placement->num_placement = 0;
120
+ placement->num_busy_placement = 0;
121
+ return;
122
+
258123 case TTM_PL_VRAM:
259124 if (!adev->mman.buffer_funcs_enabled) {
260125 /* Move to system memory */
....@@ -282,6 +147,7 @@
282147 case TTM_PL_TT:
283148 default:
284149 amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
150
+ break;
285151 }
286152 *placement = abo->placement;
287153 }
....@@ -308,26 +174,8 @@
308174
309175 if (amdgpu_ttm_tt_get_usermm(bo->ttm))
310176 return -EPERM;
311
- return drm_vma_node_verify_access(&abo->gem_base.vma_node,
177
+ return drm_vma_node_verify_access(&abo->tbo.base.vma_node,
312178 filp->private_data);
313
-}
314
-
315
-/**
316
- * amdgpu_move_null - Register memory for a buffer object
317
- *
318
- * @bo: The bo to assign the memory to
319
- * @new_mem: The memory to be assigned.
320
- *
321
- * Assign the memory from new_mem to the memory of the buffer object bo.
322
- */
323
-static void amdgpu_move_null(struct ttm_buffer_object *bo,
324
- struct ttm_mem_reg *new_mem)
325
-{
326
- struct ttm_mem_reg *old_mem = &bo->mem;
327
-
328
- BUG_ON(old_mem->mm_node != NULL);
329
- *old_mem = *new_mem;
330
- new_mem->mm_node = NULL;
331179 }
332180
333181 /**
....@@ -340,13 +188,14 @@
340188 */
341189 static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
342190 struct drm_mm_node *mm_node,
343
- struct ttm_mem_reg *mem)
191
+ struct ttm_resource *mem)
344192 {
345193 uint64_t addr = 0;
346194
347
- if (mem->mem_type != TTM_PL_TT || amdgpu_gtt_mgr_has_gart_addr(mem)) {
195
+ if (mm_node->start != AMDGPU_BO_INVALID_OFFSET) {
348196 addr = mm_node->start << PAGE_SHIFT;
349
- addr += bo->bdev->man[mem->mem_type].gpu_offset;
197
+ addr += amdgpu_ttm_domain_start(amdgpu_ttm_adev(bo->bdev),
198
+ mem->mem_type);
350199 }
351200 return addr;
352201 }
....@@ -359,8 +208,8 @@
359208 * @offset: The offset that drm_mm_node is used for finding.
360209 *
361210 */
362
-static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
363
- unsigned long *offset)
211
+static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_resource *mem,
212
+ uint64_t *offset)
364213 {
365214 struct drm_mm_node *mm_node = mem->mm_node;
366215
....@@ -372,93 +221,201 @@
372221 }
373222
374223 /**
224
+ * amdgpu_ttm_map_buffer - Map memory into the GART windows
225
+ * @bo: buffer object to map
226
+ * @mem: memory object to map
227
+ * @mm_node: drm_mm node object to map
228
+ * @num_pages: number of pages to map
229
+ * @offset: offset into @mm_node where to start
230
+ * @window: which GART window to use
231
+ * @ring: DMA ring to use for the copy
232
+ * @tmz: if we should setup a TMZ enabled mapping
233
+ * @addr: resulting address inside the MC address space
234
+ *
235
+ * Setup one of the GART windows to access a specific piece of memory or return
236
+ * the physical address for local memory.
237
+ */
238
+static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
239
+ struct ttm_resource *mem,
240
+ struct drm_mm_node *mm_node,
241
+ unsigned num_pages, uint64_t offset,
242
+ unsigned window, struct amdgpu_ring *ring,
243
+ bool tmz, uint64_t *addr)
244
+{
245
+ struct amdgpu_device *adev = ring->adev;
246
+ struct amdgpu_job *job;
247
+ unsigned num_dw, num_bytes;
248
+ struct dma_fence *fence;
249
+ uint64_t src_addr, dst_addr;
250
+ void *cpu_addr;
251
+ uint64_t flags;
252
+ unsigned int i;
253
+ int r;
254
+
255
+ BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
256
+ AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
257
+
258
+ /* Map only what can't be accessed directly */
259
+ if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
260
+ *addr = amdgpu_mm_node_addr(bo, mm_node, mem) + offset;
261
+ return 0;
262
+ }
263
+
264
+ *addr = adev->gmc.gart_start;
265
+ *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
266
+ AMDGPU_GPU_PAGE_SIZE;
267
+ *addr += offset & ~PAGE_MASK;
268
+
269
+ num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
270
+ num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
271
+
272
+ r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
273
+ AMDGPU_IB_POOL_DELAYED, &job);
274
+ if (r)
275
+ return r;
276
+
277
+ src_addr = num_dw * 4;
278
+ src_addr += job->ibs[0].gpu_addr;
279
+
280
+ dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
281
+ dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
282
+ amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
283
+ dst_addr, num_bytes, false);
284
+
285
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
286
+ WARN_ON(job->ibs[0].length_dw > num_dw);
287
+
288
+ flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem);
289
+ if (tmz)
290
+ flags |= AMDGPU_PTE_TMZ;
291
+
292
+ cpu_addr = &job->ibs[0].ptr[num_dw];
293
+
294
+ if (mem->mem_type == TTM_PL_TT) {
295
+ struct ttm_dma_tt *dma;
296
+ dma_addr_t *dma_address;
297
+
298
+ dma = container_of(bo->ttm, struct ttm_dma_tt, ttm);
299
+ dma_address = &dma->dma_address[offset >> PAGE_SHIFT];
300
+ r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
301
+ cpu_addr);
302
+ if (r)
303
+ goto error_free;
304
+ } else {
305
+ dma_addr_t dma_address;
306
+
307
+ dma_address = (mm_node->start << PAGE_SHIFT) + offset;
308
+ dma_address += adev->vm_manager.vram_base_offset;
309
+
310
+ for (i = 0; i < num_pages; ++i) {
311
+ r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1,
312
+ &dma_address, flags, cpu_addr);
313
+ if (r)
314
+ goto error_free;
315
+
316
+ dma_address += PAGE_SIZE;
317
+ }
318
+ }
319
+
320
+ r = amdgpu_job_submit(job, &adev->mman.entity,
321
+ AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
322
+ if (r)
323
+ goto error_free;
324
+
325
+ dma_fence_put(fence);
326
+
327
+ return r;
328
+
329
+error_free:
330
+ amdgpu_job_free(job);
331
+ return r;
332
+}
333
+
334
+/**
375335 * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
336
+ * @adev: amdgpu device
337
+ * @src: buffer/address where to read from
338
+ * @dst: buffer/address where to write to
339
+ * @size: number of bytes to copy
340
+ * @tmz: if a secure copy should be used
341
+ * @resv: resv object to sync to
342
+ * @f: Returns the last fence if multiple jobs are submitted.
376343 *
377344 * The function copies @size bytes from {src->mem + src->offset} to
378345 * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
379346 * move and different for a BO to BO copy.
380347 *
381
- * @f: Returns the last fence if multiple jobs are submitted.
382348 */
383349 int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
384
- struct amdgpu_copy_mem *src,
385
- struct amdgpu_copy_mem *dst,
386
- uint64_t size,
387
- struct reservation_object *resv,
350
+ const struct amdgpu_copy_mem *src,
351
+ const struct amdgpu_copy_mem *dst,
352
+ uint64_t size, bool tmz,
353
+ struct dma_resv *resv,
388354 struct dma_fence **f)
389355 {
356
+ const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
357
+ AMDGPU_GPU_PAGE_SIZE);
358
+
359
+ uint64_t src_node_size, dst_node_size, src_offset, dst_offset;
390360 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
391361 struct drm_mm_node *src_mm, *dst_mm;
392
- uint64_t src_node_start, dst_node_start, src_node_size,
393
- dst_node_size, src_page_offset, dst_page_offset;
394362 struct dma_fence *fence = NULL;
395363 int r = 0;
396
- const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
397
- AMDGPU_GPU_PAGE_SIZE);
398364
399365 if (!adev->mman.buffer_funcs_enabled) {
400366 DRM_ERROR("Trying to move memory with ring turned off.\n");
401367 return -EINVAL;
402368 }
403369
404
- src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
405
- src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
406
- src->offset;
407
- src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
408
- src_page_offset = src_node_start & (PAGE_SIZE - 1);
370
+ src_offset = src->offset;
371
+ if (src->mem->mm_node) {
372
+ src_mm = amdgpu_find_mm_node(src->mem, &src_offset);
373
+ src_node_size = (src_mm->size << PAGE_SHIFT) - src_offset;
374
+ } else {
375
+ src_mm = NULL;
376
+ src_node_size = ULLONG_MAX;
377
+ }
409378
410
- dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
411
- dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
412
- dst->offset;
413
- dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
414
- dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
379
+ dst_offset = dst->offset;
380
+ if (dst->mem->mm_node) {
381
+ dst_mm = amdgpu_find_mm_node(dst->mem, &dst_offset);
382
+ dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst_offset;
383
+ } else {
384
+ dst_mm = NULL;
385
+ dst_node_size = ULLONG_MAX;
386
+ }
415387
416388 mutex_lock(&adev->mman.gtt_window_lock);
417389
418390 while (size) {
419
- unsigned long cur_size;
420
- uint64_t from = src_node_start, to = dst_node_start;
391
+ uint32_t src_page_offset = src_offset & ~PAGE_MASK;
392
+ uint32_t dst_page_offset = dst_offset & ~PAGE_MASK;
421393 struct dma_fence *next;
394
+ uint32_t cur_size;
395
+ uint64_t from, to;
422396
423397 /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
424398 * begins at an offset, then adjust the size accordingly
425399 */
426
- cur_size = min3(min(src_node_size, dst_node_size), size,
427
- GTT_MAX_BYTES);
428
- if (cur_size + src_page_offset > GTT_MAX_BYTES ||
429
- cur_size + dst_page_offset > GTT_MAX_BYTES)
430
- cur_size -= max(src_page_offset, dst_page_offset);
400
+ cur_size = max(src_page_offset, dst_page_offset);
401
+ cur_size = min(min3(src_node_size, dst_node_size, size),
402
+ (uint64_t)(GTT_MAX_BYTES - cur_size));
431403
432
- /* Map only what needs to be accessed. Map src to window 0 and
433
- * dst to window 1
434
- */
435
- if (src->mem->mem_type == TTM_PL_TT &&
436
- !amdgpu_gtt_mgr_has_gart_addr(src->mem)) {
437
- r = amdgpu_map_buffer(src->bo, src->mem,
438
- PFN_UP(cur_size + src_page_offset),
439
- src_node_start, 0, ring,
440
- &from);
441
- if (r)
442
- goto error;
443
- /* Adjust the offset because amdgpu_map_buffer returns
444
- * start of mapped page
445
- */
446
- from += src_page_offset;
447
- }
404
+ /* Map src to window 0 and dst to window 1. */
405
+ r = amdgpu_ttm_map_buffer(src->bo, src->mem, src_mm,
406
+ PFN_UP(cur_size + src_page_offset),
407
+ src_offset, 0, ring, tmz, &from);
408
+ if (r)
409
+ goto error;
448410
449
- if (dst->mem->mem_type == TTM_PL_TT &&
450
- !amdgpu_gtt_mgr_has_gart_addr(dst->mem)) {
451
- r = amdgpu_map_buffer(dst->bo, dst->mem,
452
- PFN_UP(cur_size + dst_page_offset),
453
- dst_node_start, 1, ring,
454
- &to);
455
- if (r)
456
- goto error;
457
- to += dst_page_offset;
458
- }
411
+ r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, dst_mm,
412
+ PFN_UP(cur_size + dst_page_offset),
413
+ dst_offset, 1, ring, tmz, &to);
414
+ if (r)
415
+ goto error;
459416
460417 r = amdgpu_copy_buffer(ring, from, to, cur_size,
461
- resv, &next, false, true);
418
+ resv, &next, false, true, tmz);
462419 if (r)
463420 goto error;
464421
....@@ -471,21 +428,20 @@
471428
472429 src_node_size -= cur_size;
473430 if (!src_node_size) {
474
- src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
475
- src->mem);
476
- src_node_size = (src_mm->size << PAGE_SHIFT);
431
+ ++src_mm;
432
+ src_node_size = src_mm->size << PAGE_SHIFT;
433
+ src_offset = 0;
477434 } else {
478
- src_node_start += cur_size;
479
- src_page_offset = src_node_start & (PAGE_SIZE - 1);
435
+ src_offset += cur_size;
480436 }
437
+
481438 dst_node_size -= cur_size;
482439 if (!dst_node_size) {
483
- dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
484
- dst->mem);
485
- dst_node_size = (dst_mm->size << PAGE_SHIFT);
440
+ ++dst_mm;
441
+ dst_node_size = dst_mm->size << PAGE_SHIFT;
442
+ dst_offset = 0;
486443 } else {
487
- dst_node_start += cur_size;
488
- dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
444
+ dst_offset += cur_size;
489445 }
490446 }
491447 error:
....@@ -503,11 +459,12 @@
503459 * help move buffers to and from VRAM.
504460 */
505461 static int amdgpu_move_blit(struct ttm_buffer_object *bo,
506
- bool evict, bool no_wait_gpu,
507
- struct ttm_mem_reg *new_mem,
508
- struct ttm_mem_reg *old_mem)
462
+ bool evict,
463
+ struct ttm_resource *new_mem,
464
+ struct ttm_resource *old_mem)
509465 {
510466 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
467
+ struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
511468 struct amdgpu_copy_mem src, dst;
512469 struct dma_fence *fence = NULL;
513470 int r;
....@@ -521,11 +478,31 @@
521478
522479 r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
523480 new_mem->num_pages << PAGE_SHIFT,
524
- bo->resv, &fence);
481
+ amdgpu_bo_encrypted(abo),
482
+ bo->base.resv, &fence);
525483 if (r)
526484 goto error;
527485
528
- r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
486
+ /* clear the space being freed */
487
+ if (old_mem->mem_type == TTM_PL_VRAM &&
488
+ (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
489
+ struct dma_fence *wipe_fence = NULL;
490
+
491
+ r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
492
+ NULL, &wipe_fence);
493
+ if (r) {
494
+ goto error;
495
+ } else if (wipe_fence) {
496
+ dma_fence_put(fence);
497
+ fence = wipe_fence;
498
+ }
499
+ }
500
+
501
+ /* Always block for VM page tables before committing the new location */
502
+ if (bo->type == ttm_bo_type_kernel)
503
+ r = ttm_bo_move_accel_cleanup(bo, fence, true, false, new_mem);
504
+ else
505
+ r = ttm_bo_move_accel_cleanup(bo, fence, evict, true, new_mem);
529506 dma_fence_put(fence);
530507 return r;
531508
....@@ -543,16 +520,13 @@
543520 */
544521 static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
545522 struct ttm_operation_ctx *ctx,
546
- struct ttm_mem_reg *new_mem)
523
+ struct ttm_resource *new_mem)
547524 {
548
- struct amdgpu_device *adev;
549
- struct ttm_mem_reg *old_mem = &bo->mem;
550
- struct ttm_mem_reg tmp_mem;
525
+ struct ttm_resource *old_mem = &bo->mem;
526
+ struct ttm_resource tmp_mem;
551527 struct ttm_place placements;
552528 struct ttm_placement placement;
553529 int r;
554
-
555
- adev = amdgpu_ttm_adev(bo->bdev);
556530
557531 /* create space/pages for new_mem in GTT space */
558532 tmp_mem = *new_mem;
....@@ -563,9 +537,11 @@
563537 placement.busy_placement = &placements;
564538 placements.fpfn = 0;
565539 placements.lpfn = 0;
566
- placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
540
+ placements.mem_type = TTM_PL_TT;
541
+ placements.flags = TTM_PL_MASK_CACHING;
567542 r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
568543 if (unlikely(r)) {
544
+ pr_err("Failed to find GTT space for blit from VRAM\n");
569545 return r;
570546 }
571547
....@@ -575,14 +551,18 @@
575551 goto out_cleanup;
576552 }
577553
554
+ r = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
555
+ if (unlikely(r))
556
+ goto out_cleanup;
557
+
578558 /* Bind the memory to the GTT space */
579
- r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx);
559
+ r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, &tmp_mem);
580560 if (unlikely(r)) {
581561 goto out_cleanup;
582562 }
583563
584564 /* blit VRAM to GTT */
585
- r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, &tmp_mem, old_mem);
565
+ r = amdgpu_move_blit(bo, evict, &tmp_mem, old_mem);
586566 if (unlikely(r)) {
587567 goto out_cleanup;
588568 }
....@@ -590,7 +570,7 @@
590570 /* move BO (in tmp_mem) to new_mem */
591571 r = ttm_bo_move_ttm(bo, ctx, new_mem);
592572 out_cleanup:
593
- ttm_bo_mem_put(bo, &tmp_mem);
573
+ ttm_resource_free(bo, &tmp_mem);
594574 return r;
595575 }
596576
....@@ -601,16 +581,13 @@
601581 */
602582 static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
603583 struct ttm_operation_ctx *ctx,
604
- struct ttm_mem_reg *new_mem)
584
+ struct ttm_resource *new_mem)
605585 {
606
- struct amdgpu_device *adev;
607
- struct ttm_mem_reg *old_mem = &bo->mem;
608
- struct ttm_mem_reg tmp_mem;
586
+ struct ttm_resource *old_mem = &bo->mem;
587
+ struct ttm_resource tmp_mem;
609588 struct ttm_placement placement;
610589 struct ttm_place placements;
611590 int r;
612
-
613
- adev = amdgpu_ttm_adev(bo->bdev);
614591
615592 /* make space in GTT for old_mem buffer */
616593 tmp_mem = *new_mem;
....@@ -621,9 +598,11 @@
621598 placement.busy_placement = &placements;
622599 placements.fpfn = 0;
623600 placements.lpfn = 0;
624
- placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
601
+ placements.mem_type = TTM_PL_TT;
602
+ placements.flags = TTM_PL_MASK_CACHING;
625603 r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
626604 if (unlikely(r)) {
605
+ pr_err("Failed to find GTT space for blit to VRAM\n");
627606 return r;
628607 }
629608
....@@ -634,13 +613,37 @@
634613 }
635614
636615 /* copy to VRAM */
637
- r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, new_mem, old_mem);
616
+ r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
638617 if (unlikely(r)) {
639618 goto out_cleanup;
640619 }
641620 out_cleanup:
642
- ttm_bo_mem_put(bo, &tmp_mem);
621
+ ttm_resource_free(bo, &tmp_mem);
643622 return r;
623
+}
624
+
625
+/**
626
+ * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
627
+ *
628
+ * Called by amdgpu_bo_move()
629
+ */
630
+static bool amdgpu_mem_visible(struct amdgpu_device *adev,
631
+ struct ttm_resource *mem)
632
+{
633
+ struct drm_mm_node *nodes = mem->mm_node;
634
+
635
+ if (mem->mem_type == TTM_PL_SYSTEM ||
636
+ mem->mem_type == TTM_PL_TT)
637
+ return true;
638
+ if (mem->mem_type != TTM_PL_VRAM)
639
+ return false;
640
+
641
+ /* ttm_resource_ioremap only supports contiguous memory */
642
+ if (nodes->size != mem->num_pages)
643
+ return false;
644
+
645
+ return ((nodes->start + nodes->size) << PAGE_SHIFT)
646
+ <= adev->gmc.visible_vram_size;
644647 }
645648
646649 /**
....@@ -650,11 +653,11 @@
650653 */
651654 static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
652655 struct ttm_operation_ctx *ctx,
653
- struct ttm_mem_reg *new_mem)
656
+ struct ttm_resource *new_mem)
654657 {
655658 struct amdgpu_device *adev;
656659 struct amdgpu_bo *abo;
657
- struct ttm_mem_reg *old_mem = &bo->mem;
660
+ struct ttm_resource *old_mem = &bo->mem;
658661 int r;
659662
660663 /* Can't move a pinned BO */
....@@ -665,7 +668,7 @@
665668 adev = amdgpu_ttm_adev(bo->bdev);
666669
667670 if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
668
- amdgpu_move_null(bo, new_mem);
671
+ ttm_bo_move_null(bo, new_mem);
669672 return 0;
670673 }
671674 if ((old_mem->mem_type == TTM_PL_TT &&
....@@ -673,12 +676,24 @@
673676 (old_mem->mem_type == TTM_PL_SYSTEM &&
674677 new_mem->mem_type == TTM_PL_TT)) {
675678 /* bind is enough */
676
- amdgpu_move_null(bo, new_mem);
679
+ ttm_bo_move_null(bo, new_mem);
680
+ return 0;
681
+ }
682
+ if (old_mem->mem_type == AMDGPU_PL_GDS ||
683
+ old_mem->mem_type == AMDGPU_PL_GWS ||
684
+ old_mem->mem_type == AMDGPU_PL_OA ||
685
+ new_mem->mem_type == AMDGPU_PL_GDS ||
686
+ new_mem->mem_type == AMDGPU_PL_GWS ||
687
+ new_mem->mem_type == AMDGPU_PL_OA) {
688
+ /* Nothing to save here */
689
+ ttm_bo_move_null(bo, new_mem);
677690 return 0;
678691 }
679692
680
- if (!adev->mman.buffer_funcs_enabled)
693
+ if (!adev->mman.buffer_funcs_enabled) {
694
+ r = -ENODEV;
681695 goto memcpy;
696
+ }
682697
683698 if (old_mem->mem_type == TTM_PL_VRAM &&
684699 new_mem->mem_type == TTM_PL_SYSTEM) {
....@@ -687,16 +702,22 @@
687702 new_mem->mem_type == TTM_PL_VRAM) {
688703 r = amdgpu_move_ram_vram(bo, evict, ctx, new_mem);
689704 } else {
690
- r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu,
705
+ r = amdgpu_move_blit(bo, evict,
691706 new_mem, old_mem);
692707 }
693708
694709 if (r) {
695710 memcpy:
696
- r = ttm_bo_move_memcpy(bo, ctx, new_mem);
697
- if (r) {
711
+ /* Check that all memory is CPU accessible */
712
+ if (!amdgpu_mem_visible(adev, old_mem) ||
713
+ !amdgpu_mem_visible(adev, new_mem)) {
714
+ pr_err("Move buffer fallback to memcpy unavailable\n");
698715 return r;
699716 }
717
+
718
+ r = ttm_bo_move_memcpy(bo, ctx, new_mem);
719
+ if (r)
720
+ return r;
700721 }
701722
702723 if (bo->type == ttm_bo_type_device &&
....@@ -718,19 +739,12 @@
718739 *
719740 * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
720741 */
721
-static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
742
+static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_resource *mem)
722743 {
723
- struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
724744 struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
725745 struct drm_mm_node *mm_node = mem->mm_node;
746
+ size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT;
726747
727
- mem->bus.addr = NULL;
728
- mem->bus.offset = 0;
729
- mem->bus.size = mem->num_pages << PAGE_SHIFT;
730
- mem->bus.base = 0;
731
- mem->bus.is_iomem = false;
732
- if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
733
- return -EINVAL;
734748 switch (mem->mem_type) {
735749 case TTM_PL_SYSTEM:
736750 /* system memory */
....@@ -740,18 +754,18 @@
740754 case TTM_PL_VRAM:
741755 mem->bus.offset = mem->start << PAGE_SHIFT;
742756 /* check if it's visible */
743
- if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size)
757
+ if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
744758 return -EINVAL;
745759 /* Only physically contiguous buffers apply. In a contiguous
746760 * buffer, size of the first mm_node would match the number of
747
- * pages in ttm_mem_reg.
761
+ * pages in ttm_resource.
748762 */
749763 if (adev->mman.aper_base_kaddr &&
750764 (mm_node->size == mem->num_pages))
751765 mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
752766 mem->bus.offset;
753767
754
- mem->bus.base = adev->gmc.aper_base;
768
+ mem->bus.offset += adev->gmc.aper_base;
755769 mem->bus.is_iomem = true;
756770 break;
757771 default:
....@@ -760,118 +774,196 @@
760774 return 0;
761775 }
762776
763
-static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
764
-{
765
-}
766
-
767777 static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
768778 unsigned long page_offset)
769779 {
780
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
781
+ uint64_t offset = (page_offset << PAGE_SHIFT);
770782 struct drm_mm_node *mm;
771
- unsigned long offset = (page_offset << PAGE_SHIFT);
772783
773784 mm = amdgpu_find_mm_node(&bo->mem, &offset);
774
- return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
775
- (offset >> PAGE_SHIFT);
785
+ offset += adev->gmc.aper_base;
786
+ return mm->start + (offset >> PAGE_SHIFT);
787
+}
788
+
789
+/**
790
+ * amdgpu_ttm_domain_start - Returns GPU start address
791
+ * @adev: amdgpu device object
792
+ * @type: type of the memory
793
+ *
794
+ * Returns:
795
+ * GPU start address of a memory domain
796
+ */
797
+
798
+uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type)
799
+{
800
+ switch (type) {
801
+ case TTM_PL_TT:
802
+ return adev->gmc.gart_start;
803
+ case TTM_PL_VRAM:
804
+ return adev->gmc.vram_start;
805
+ }
806
+
807
+ return 0;
776808 }
777809
778810 /*
779811 * TTM backend functions.
780812 */
781
-struct amdgpu_ttm_gup_task_list {
782
- struct list_head list;
783
- struct task_struct *task;
784
-};
785
-
786813 struct amdgpu_ttm_tt {
787814 struct ttm_dma_tt ttm;
815
+ struct drm_gem_object *gobj;
788816 u64 offset;
789817 uint64_t userptr;
790818 struct task_struct *usertask;
791819 uint32_t userflags;
792
- spinlock_t guptasklock;
793
- struct list_head guptasks;
794
- atomic_t mmu_invalidations;
795
- uint32_t last_set_pages;
820
+ bool bound;
821
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
822
+ struct hmm_range *range;
823
+#endif
796824 };
797825
826
+#ifdef CONFIG_DRM_AMDGPU_USERPTR
798827 /**
799
- * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
800
- * pointer to memory
828
+ * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
829
+ * memory and start HMM tracking CPU page table update
801830 *
802
- * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
803
- * This provides a wrapper around the get_user_pages() call to provide
804
- * device accessible pages that back user memory.
831
+ * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
832
+ * once afterwards to stop HMM tracking
805833 */
806
-int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
834
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
807835 {
836
+ struct ttm_tt *ttm = bo->tbo.ttm;
808837 struct amdgpu_ttm_tt *gtt = (void *)ttm;
809
- struct mm_struct *mm = gtt->usertask->mm;
810
- unsigned int flags = 0;
811
- unsigned pinned = 0;
812
- int r;
838
+ unsigned long start = gtt->userptr;
839
+ struct vm_area_struct *vma;
840
+ struct hmm_range *range;
841
+ unsigned long timeout;
842
+ struct mm_struct *mm;
843
+ unsigned long i;
844
+ int r = 0;
813845
814
- if (!mm) /* Happens during process shutdown */
815
- return -ESRCH;
816
-
817
- if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
818
- flags |= FOLL_WRITE;
819
-
820
- down_read(&mm->mmap_sem);
821
-
822
- if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
823
- /*
824
- * check that we only use anonymous memory to prevent problems
825
- * with writeback
826
- */
827
- unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
828
- struct vm_area_struct *vma;
829
-
830
- vma = find_vma(mm, gtt->userptr);
831
- if (!vma || vma->vm_file || vma->vm_end < end) {
832
- up_read(&mm->mmap_sem);
833
- return -EPERM;
834
- }
846
+ mm = bo->notifier.mm;
847
+ if (unlikely(!mm)) {
848
+ DRM_DEBUG_DRIVER("BO is not registered?\n");
849
+ return -EFAULT;
835850 }
836851
837
- /* loop enough times using contiguous pages of memory */
838
- do {
839
- unsigned num_pages = ttm->num_pages - pinned;
840
- uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
841
- struct page **p = pages + pinned;
842
- struct amdgpu_ttm_gup_task_list guptask;
852
+ /* Another get_user_pages is running at the same time?? */
853
+ if (WARN_ON(gtt->range))
854
+ return -EFAULT;
843855
844
- guptask.task = current;
845
- spin_lock(&gtt->guptasklock);
846
- list_add(&guptask.list, &gtt->guptasks);
847
- spin_unlock(&gtt->guptasklock);
856
+ if (!mmget_not_zero(mm)) /* Happens during process shutdown */
857
+ return -ESRCH;
848858
849
- if (mm == current->mm)
850
- r = get_user_pages(userptr, num_pages, flags, p, NULL);
851
- else
852
- r = get_user_pages_remote(gtt->usertask,
853
- mm, userptr, num_pages,
854
- flags, p, NULL, NULL);
859
+ range = kzalloc(sizeof(*range), GFP_KERNEL);
860
+ if (unlikely(!range)) {
861
+ r = -ENOMEM;
862
+ goto out;
863
+ }
864
+ range->notifier = &bo->notifier;
865
+ range->start = bo->notifier.interval_tree.start;
866
+ range->end = bo->notifier.interval_tree.last + 1;
867
+ range->default_flags = HMM_PFN_REQ_FAULT;
868
+ if (!amdgpu_ttm_tt_is_readonly(ttm))
869
+ range->default_flags |= HMM_PFN_REQ_WRITE;
855870
856
- spin_lock(&gtt->guptasklock);
857
- list_del(&guptask.list);
858
- spin_unlock(&gtt->guptasklock);
871
+ range->hmm_pfns = kvmalloc_array(ttm->num_pages,
872
+ sizeof(*range->hmm_pfns), GFP_KERNEL);
873
+ if (unlikely(!range->hmm_pfns)) {
874
+ r = -ENOMEM;
875
+ goto out_free_ranges;
876
+ }
859877
860
- if (r < 0)
861
- goto release_pages;
878
+ mmap_read_lock(mm);
879
+ vma = find_vma(mm, start);
880
+ if (unlikely(!vma || start < vma->vm_start)) {
881
+ r = -EFAULT;
882
+ goto out_unlock;
883
+ }
884
+ if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
885
+ vma->vm_file)) {
886
+ r = -EPERM;
887
+ goto out_unlock;
888
+ }
889
+ mmap_read_unlock(mm);
890
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
862891
863
- pinned += r;
892
+retry:
893
+ range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
864894
865
- } while (pinned < ttm->num_pages);
895
+ mmap_read_lock(mm);
896
+ r = hmm_range_fault(range);
897
+ mmap_read_unlock(mm);
898
+ if (unlikely(r)) {
899
+ /*
900
+ * FIXME: This timeout should encompass the retry from
901
+ * mmu_interval_read_retry() as well.
902
+ */
903
+ if (r == -EBUSY && !time_after(jiffies, timeout))
904
+ goto retry;
905
+ goto out_free_pfns;
906
+ }
866907
867
- up_read(&mm->mmap_sem);
908
+ /*
909
+ * Due to default_flags, all pages are HMM_PFN_VALID or
910
+ * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
911
+ * the notifier_lock, and mmu_interval_read_retry() must be done first.
912
+ */
913
+ for (i = 0; i < ttm->num_pages; i++)
914
+ pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
915
+
916
+ gtt->range = range;
917
+ mmput(mm);
918
+
868919 return 0;
869920
870
-release_pages:
871
- release_pages(pages, pinned);
872
- up_read(&mm->mmap_sem);
921
+out_unlock:
922
+ mmap_read_unlock(mm);
923
+out_free_pfns:
924
+ kvfree(range->hmm_pfns);
925
+out_free_ranges:
926
+ kfree(range);
927
+out:
928
+ mmput(mm);
873929 return r;
874930 }
931
+
932
+/**
933
+ * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
934
+ * Check if the pages backing this ttm range have been invalidated
935
+ *
936
+ * Returns: true if pages are still valid
937
+ */
938
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
939
+{
940
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
941
+ bool r = false;
942
+
943
+ if (!gtt || !gtt->userptr)
944
+ return false;
945
+
946
+ DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n",
947
+ gtt->userptr, ttm->num_pages);
948
+
949
+ WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns,
950
+ "No user pages to check\n");
951
+
952
+ if (gtt->range) {
953
+ /*
954
+ * FIXME: Must always hold notifier_lock for this, and must
955
+ * not ignore the return code.
956
+ */
957
+ r = mmu_interval_read_retry(gtt->range->notifier,
958
+ gtt->range->notifier_seq);
959
+ kvfree(gtt->range->hmm_pfns);
960
+ kfree(gtt->range);
961
+ gtt->range = NULL;
962
+ }
963
+
964
+ return !r;
965
+}
966
+#endif
875967
876968 /**
877969 * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
....@@ -882,39 +974,10 @@
882974 */
883975 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
884976 {
885
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
886
- unsigned i;
977
+ unsigned long i;
887978
888
- gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
889
- for (i = 0; i < ttm->num_pages; ++i) {
890
- if (ttm->pages[i])
891
- put_page(ttm->pages[i]);
892
-
979
+ for (i = 0; i < ttm->num_pages; ++i)
893980 ttm->pages[i] = pages ? pages[i] : NULL;
894
- }
895
-}
896
-
897
-/**
898
- * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty
899
- *
900
- * Called while unpinning userptr pages
901
- */
902
-void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
903
-{
904
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
905
- unsigned i;
906
-
907
- for (i = 0; i < ttm->num_pages; ++i) {
908
- struct page *page = ttm->pages[i];
909
-
910
- if (!page)
911
- continue;
912
-
913
- if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
914
- set_page_dirty(page);
915
-
916
- mark_page_accessed(page);
917
- }
918981 }
919982
920983 /**
....@@ -922,11 +985,11 @@
922985 *
923986 * Called by amdgpu_ttm_backend_bind()
924987 **/
925
-static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
988
+static int amdgpu_ttm_tt_pin_userptr(struct ttm_bo_device *bdev,
989
+ struct ttm_tt *ttm)
926990 {
927
- struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
991
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
928992 struct amdgpu_ttm_tt *gtt = (void *)ttm;
929
- unsigned nents;
930993 int r;
931994
932995 int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
....@@ -941,9 +1004,8 @@
9411004 goto release_sg;
9421005
9431006 /* Map SG to device */
944
- r = -ENOMEM;
945
- nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
946
- if (nents != ttm->sg->nents)
1007
+ r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
1008
+ if (r)
9471009 goto release_sg;
9481010
9491011 /* convert SG to linear array of pages and dma addresses */
....@@ -961,9 +1023,10 @@
9611023 /**
9621024 * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
9631025 */
964
-static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
1026
+static void amdgpu_ttm_tt_unpin_userptr(struct ttm_bo_device *bdev,
1027
+ struct ttm_tt *ttm)
9651028 {
966
- struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
1029
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
9671030 struct amdgpu_ttm_tt *gtt = (void *)ttm;
9681031
9691032 int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
....@@ -975,15 +1038,25 @@
9751038 return;
9761039
9771040 /* unmap the pages mapped to the device */
978
- dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
979
-
980
- /* mark the pages as dirty */
981
- amdgpu_ttm_tt_mark_user_pages(ttm);
982
-
1041
+ dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
9831042 sg_free_table(ttm->sg);
1043
+
1044
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
1045
+ if (gtt->range) {
1046
+ unsigned long i;
1047
+
1048
+ for (i = 0; i < ttm->num_pages; i++) {
1049
+ if (ttm->pages[i] !=
1050
+ hmm_pfn_to_page(gtt->range->hmm_pfns[i]))
1051
+ break;
1052
+ }
1053
+
1054
+ WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
1055
+ }
1056
+#endif
9841057 }
9851058
986
-int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
1059
+static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
9871060 struct ttm_buffer_object *tbo,
9881061 uint64_t flags)
9891062 {
....@@ -992,7 +1065,10 @@
9921065 struct amdgpu_ttm_tt *gtt = (void *)ttm;
9931066 int r;
9941067
995
- if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) {
1068
+ if (amdgpu_bo_encrypted(abo))
1069
+ flags |= AMDGPU_PTE_TMZ;
1070
+
1071
+ if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
9961072 uint64_t page_idx = 1;
9971073
9981074 r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
....@@ -1000,9 +1076,12 @@
10001076 if (r)
10011077 goto gart_bind_fail;
10021078
1003
- /* Patch mtype of the second part BO */
1004
- flags &= ~AMDGPU_PTE_MTYPE_MASK;
1005
- flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC);
1079
+ /* The memory type of the first page defaults to UC. Now
1080
+ * modify the memory type to NC from the second page of
1081
+ * the BO onward.
1082
+ */
1083
+ flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
1084
+ flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
10061085
10071086 r = amdgpu_gart_bind(adev,
10081087 gtt->offset + (page_idx << PAGE_SHIFT),
....@@ -1028,16 +1107,23 @@
10281107 * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
10291108 * This handles binding GTT memory to the device address space.
10301109 */
1031
-static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
1032
- struct ttm_mem_reg *bo_mem)
1110
+static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev,
1111
+ struct ttm_tt *ttm,
1112
+ struct ttm_resource *bo_mem)
10331113 {
1034
- struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
1114
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
10351115 struct amdgpu_ttm_tt *gtt = (void*)ttm;
10361116 uint64_t flags;
10371117 int r = 0;
10381118
1119
+ if (!bo_mem)
1120
+ return -EINVAL;
1121
+
1122
+ if (gtt->bound)
1123
+ return 0;
1124
+
10391125 if (gtt->userptr) {
1040
- r = amdgpu_ttm_tt_pin_userptr(ttm);
1126
+ r = amdgpu_ttm_tt_pin_userptr(bdev, ttm);
10411127 if (r) {
10421128 DRM_ERROR("failed to pin userptr\n");
10431129 return r;
....@@ -1069,58 +1155,67 @@
10691155 if (r)
10701156 DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
10711157 ttm->num_pages, gtt->offset);
1158
+ gtt->bound = true;
10721159 return r;
10731160 }
10741161
10751162 /**
1076
- * amdgpu_ttm_alloc_gart - Allocate GART memory for buffer object
1163
+ * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either
1164
+ * through AGP or GART aperture.
1165
+ *
1166
+ * If bo is accessible through AGP aperture, then use AGP aperture
1167
+ * to access bo; otherwise allocate logical space in GART aperture
1168
+ * and map bo to GART aperture.
10771169 */
10781170 int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
10791171 {
10801172 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
10811173 struct ttm_operation_ctx ctx = { false, false };
10821174 struct amdgpu_ttm_tt *gtt = (void*)bo->ttm;
1083
- struct ttm_mem_reg tmp;
1175
+ struct ttm_resource tmp;
10841176 struct ttm_placement placement;
10851177 struct ttm_place placements;
1086
- uint64_t flags;
1178
+ uint64_t addr, flags;
10871179 int r;
10881180
1089
- if (bo->mem.mem_type != TTM_PL_TT ||
1090
- amdgpu_gtt_mgr_has_gart_addr(&bo->mem))
1181
+ if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
10911182 return 0;
10921183
1093
- /* allocate GTT space */
1094
- tmp = bo->mem;
1095
- tmp.mm_node = NULL;
1096
- placement.num_placement = 1;
1097
- placement.placement = &placements;
1098
- placement.num_busy_placement = 1;
1099
- placement.busy_placement = &placements;
1100
- placements.fpfn = 0;
1101
- placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
1102
- placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
1103
- TTM_PL_FLAG_TT;
1184
+ addr = amdgpu_gmc_agp_addr(bo);
1185
+ if (addr != AMDGPU_BO_INVALID_OFFSET) {
1186
+ bo->mem.start = addr >> PAGE_SHIFT;
1187
+ } else {
11041188
1105
- r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
1106
- if (unlikely(r))
1107
- return r;
1189
+ /* allocate GART space */
1190
+ tmp = bo->mem;
1191
+ tmp.mm_node = NULL;
1192
+ placement.num_placement = 1;
1193
+ placement.placement = &placements;
1194
+ placement.num_busy_placement = 1;
1195
+ placement.busy_placement = &placements;
1196
+ placements.fpfn = 0;
1197
+ placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
1198
+ placements.mem_type = TTM_PL_TT;
1199
+ placements.flags = bo->mem.placement;
11081200
1109
- /* compute PTE flags for this buffer object */
1110
- flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
1201
+ r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
1202
+ if (unlikely(r))
1203
+ return r;
11111204
1112
- /* Bind pages */
1113
- gtt->offset = (u64)tmp.start << PAGE_SHIFT;
1114
- r = amdgpu_ttm_gart_bind(adev, bo, flags);
1115
- if (unlikely(r)) {
1116
- ttm_bo_mem_put(bo, &tmp);
1117
- return r;
1205
+ /* compute PTE flags for this buffer object */
1206
+ flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
1207
+
1208
+ /* Bind pages */
1209
+ gtt->offset = (u64)tmp.start << PAGE_SHIFT;
1210
+ r = amdgpu_ttm_gart_bind(adev, bo, flags);
1211
+ if (unlikely(r)) {
1212
+ ttm_resource_free(bo, &tmp);
1213
+ return r;
1214
+ }
1215
+
1216
+ ttm_resource_free(bo, &bo->mem);
1217
+ bo->mem = tmp;
11181218 }
1119
-
1120
- ttm_bo_mem_put(bo, &bo->mem);
1121
- bo->mem = tmp;
1122
- bo->offset = (bo->mem.start << PAGE_SHIFT) +
1123
- bo->bdev->man[bo->mem.mem_type].gpu_offset;
11241219
11251220 return 0;
11261221 }
....@@ -1152,43 +1247,44 @@
11521247 * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
11531248 * ttm_tt_destroy().
11541249 */
1155
-static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
1250
+static void amdgpu_ttm_backend_unbind(struct ttm_bo_device *bdev,
1251
+ struct ttm_tt *ttm)
11561252 {
1157
- struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
1253
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
11581254 struct amdgpu_ttm_tt *gtt = (void *)ttm;
11591255 int r;
11601256
11611257 /* if the pages have userptr pinning then clear that first */
11621258 if (gtt->userptr)
1163
- amdgpu_ttm_tt_unpin_userptr(ttm);
1259
+ amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
1260
+
1261
+ if (!gtt->bound)
1262
+ return;
11641263
11651264 if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
1166
- return 0;
1265
+ return;
11671266
11681267 /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
11691268 r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
11701269 if (r)
11711270 DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
11721271 gtt->ttm.ttm.num_pages, gtt->offset);
1173
- return r;
1272
+ gtt->bound = false;
11741273 }
11751274
1176
-static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
1275
+static void amdgpu_ttm_backend_destroy(struct ttm_bo_device *bdev,
1276
+ struct ttm_tt *ttm)
11771277 {
11781278 struct amdgpu_ttm_tt *gtt = (void *)ttm;
11791279
1280
+ amdgpu_ttm_backend_unbind(bdev, ttm);
1281
+ ttm_tt_destroy_common(bdev, ttm);
11801282 if (gtt->usertask)
11811283 put_task_struct(gtt->usertask);
11821284
11831285 ttm_dma_tt_fini(&gtt->ttm);
11841286 kfree(gtt);
11851287 }
1186
-
1187
-static struct ttm_backend_func amdgpu_backend_func = {
1188
- .bind = &amdgpu_ttm_backend_bind,
1189
- .unbind = &amdgpu_ttm_backend_unbind,
1190
- .destroy = &amdgpu_ttm_backend_destroy,
1191
-};
11921288
11931289 /**
11941290 * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
....@@ -1200,16 +1296,13 @@
12001296 static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
12011297 uint32_t page_flags)
12021298 {
1203
- struct amdgpu_device *adev;
12041299 struct amdgpu_ttm_tt *gtt;
1205
-
1206
- adev = amdgpu_ttm_adev(bo->bdev);
12071300
12081301 gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
12091302 if (gtt == NULL) {
12101303 return NULL;
12111304 }
1212
- gtt->ttm.ttm.func = &amdgpu_backend_func;
1305
+ gtt->gobj = &bo->base;
12131306
12141307 /* allocate space for the uninitialized page entries */
12151308 if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
....@@ -1225,12 +1318,12 @@
12251318 * Map the pages of a ttm_tt object to an address space visible
12261319 * to the underlying device.
12271320 */
1228
-static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
1229
- struct ttm_operation_ctx *ctx)
1321
+static int amdgpu_ttm_tt_populate(struct ttm_bo_device *bdev,
1322
+ struct ttm_tt *ttm,
1323
+ struct ttm_operation_ctx *ctx)
12301324 {
1231
- struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
1325
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
12321326 struct amdgpu_ttm_tt *gtt = (void *)ttm;
1233
- bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
12341327
12351328 /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
12361329 if (gtt && gtt->userptr) {
....@@ -1239,15 +1332,27 @@
12391332 return -ENOMEM;
12401333
12411334 ttm->page_flags |= TTM_PAGE_FLAG_SG;
1242
- ttm->state = tt_unbound;
1335
+ ttm_tt_set_populated(ttm);
12431336 return 0;
12441337 }
12451338
1246
- if (slave && ttm->sg) {
1339
+ if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
1340
+ if (!ttm->sg) {
1341
+ struct dma_buf_attachment *attach;
1342
+ struct sg_table *sgt;
1343
+
1344
+ attach = gtt->gobj->import_attach;
1345
+ sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
1346
+ if (IS_ERR(sgt))
1347
+ return PTR_ERR(sgt);
1348
+
1349
+ ttm->sg = sgt;
1350
+ }
1351
+
12471352 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
12481353 gtt->ttm.dma_address,
12491354 ttm->num_pages);
1250
- ttm->state = tt_unbound;
1355
+ ttm_tt_set_populated(ttm);
12511356 return 0;
12521357 }
12531358
....@@ -1268,11 +1373,10 @@
12681373 * Unmaps pages of a ttm_tt object from the device address space and
12691374 * unpopulates the page array backing it.
12701375 */
1271
-static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
1376
+static void amdgpu_ttm_tt_unpopulate(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
12721377 {
1273
- struct amdgpu_device *adev;
12741378 struct amdgpu_ttm_tt *gtt = (void *)ttm;
1275
- bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
1379
+ struct amdgpu_device *adev;
12761380
12771381 if (gtt && gtt->userptr) {
12781382 amdgpu_ttm_tt_set_user_pages(ttm, NULL);
....@@ -1282,10 +1386,19 @@
12821386 return;
12831387 }
12841388
1285
- if (slave)
1389
+ if (ttm->sg && gtt->gobj->import_attach) {
1390
+ struct dma_buf_attachment *attach;
1391
+
1392
+ attach = gtt->gobj->import_attach;
1393
+ dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
1394
+ ttm->sg = NULL;
1395
+ return;
1396
+ }
1397
+
1398
+ if (ttm->page_flags & TTM_PAGE_FLAG_SG)
12861399 return;
12871400
1288
- adev = amdgpu_ttm_adev(ttm->bdev);
1401
+ adev = amdgpu_ttm_adev(bdev);
12891402
12901403 #ifdef CONFIG_SWIOTLB
12911404 if (adev->need_swiotlb && swiotlb_nr_tbl()) {
....@@ -1302,21 +1415,26 @@
13021415 * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
13031416 * task
13041417 *
1305
- * @ttm: The ttm_tt object to bind this userptr object to
1418
+ * @bo: The ttm_buffer_object to bind this userptr to
13061419 * @addr: The address in the current tasks VM space to use
13071420 * @flags: Requirements of userptr object.
13081421 *
13091422 * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
13101423 * to current task
13111424 */
1312
-int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
1313
- uint32_t flags)
1425
+int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
1426
+ uint64_t addr, uint32_t flags)
13141427 {
1315
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
1428
+ struct amdgpu_ttm_tt *gtt;
13161429
1317
- if (gtt == NULL)
1318
- return -EINVAL;
1430
+ if (!bo->ttm) {
1431
+ /* TODO: We want a separate TTM object type for userptrs */
1432
+ bo->ttm = amdgpu_ttm_tt_create(bo, 0);
1433
+ if (bo->ttm == NULL)
1434
+ return -ENOMEM;
1435
+ }
13191436
1437
+ gtt = (void*)bo->ttm;
13201438 gtt->userptr = addr;
13211439 gtt->userflags = flags;
13221440
....@@ -1324,11 +1442,6 @@
13241442 put_task_struct(gtt->usertask);
13251443 gtt->usertask = current->group_leader;
13261444 get_task_struct(gtt->usertask);
1327
-
1328
- spin_lock_init(&gtt->guptasklock);
1329
- INIT_LIST_HEAD(&gtt->guptasks);
1330
- atomic_set(&gtt->mmu_invalidations, 0);
1331
- gtt->last_set_pages = 0;
13321445
13331446 return 0;
13341447 }
....@@ -1358,7 +1471,6 @@
13581471 unsigned long end)
13591472 {
13601473 struct amdgpu_ttm_tt *gtt = (void *)ttm;
1361
- struct amdgpu_ttm_gup_task_list *entry;
13621474 unsigned long size;
13631475
13641476 if (gtt == NULL || !gtt->userptr)
....@@ -1371,48 +1483,20 @@
13711483 if (gtt->userptr > end || gtt->userptr + size <= start)
13721484 return false;
13731485
1374
- /* Search the lists of tasks that hold this mapping and see
1375
- * if current is one of them. If it is return false.
1376
- */
1377
- spin_lock(&gtt->guptasklock);
1378
- list_for_each_entry(entry, &gtt->guptasks, list) {
1379
- if (entry->task == current) {
1380
- spin_unlock(&gtt->guptasklock);
1381
- return false;
1382
- }
1383
- }
1384
- spin_unlock(&gtt->guptasklock);
1385
-
1386
- atomic_inc(&gtt->mmu_invalidations);
1387
-
13881486 return true;
13891487 }
13901488
13911489 /**
1392
- * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
1490
+ * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
13931491 */
1394
-bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
1395
- int *last_invalidated)
1396
-{
1397
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
1398
- int prev_invalidated = *last_invalidated;
1399
-
1400
- *last_invalidated = atomic_read(&gtt->mmu_invalidations);
1401
- return prev_invalidated != *last_invalidated;
1402
-}
1403
-
1404
-/**
1405
- * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
1406
- * been invalidated since the last time they've been set?
1407
- */
1408
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
1492
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
14091493 {
14101494 struct amdgpu_ttm_tt *gtt = (void *)ttm;
14111495
14121496 if (gtt == NULL || !gtt->userptr)
14131497 return false;
14141498
1415
- return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
1499
+ return true;
14161500 }
14171501
14181502 /**
....@@ -1429,13 +1513,14 @@
14291513 }
14301514
14311515 /**
1432
- * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
1516
+ * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
14331517 *
14341518 * @ttm: The ttm_tt object to compute the flags for
14351519 * @mem: The memory registry backing this ttm_tt object
1520
+ *
1521
+ * Figure out the flags to use for a VM PDE (Page Directory Entry).
14361522 */
1437
-uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1438
- struct ttm_mem_reg *mem)
1523
+uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
14391524 {
14401525 uint64_t flags = 0;
14411526
....@@ -1448,6 +1533,22 @@
14481533 if (ttm->caching_state == tt_cached)
14491534 flags |= AMDGPU_PTE_SNOOPED;
14501535 }
1536
+
1537
+ return flags;
1538
+}
1539
+
1540
+/**
1541
+ * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
1542
+ *
1543
+ * @ttm: The ttm_tt object to compute the flags for
1544
+ * @mem: The memory registry backing this ttm_tt object
1545
+
1546
+ * Figure out the flags to use for a VM PTE (Page Table Entry).
1547
+ */
1548
+uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1549
+ struct ttm_resource *mem)
1550
+{
1551
+ uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);
14511552
14521553 flags |= adev->gart.gart_pte_flags;
14531554 flags |= AMDGPU_PTE_READABLE;
....@@ -1472,19 +1573,23 @@
14721573 {
14731574 unsigned long num_pages = bo->mem.num_pages;
14741575 struct drm_mm_node *node = bo->mem.mm_node;
1475
- struct reservation_object_list *flist;
1576
+ struct dma_resv_list *flist;
14761577 struct dma_fence *f;
14771578 int i;
1579
+
1580
+ if (bo->type == ttm_bo_type_kernel &&
1581
+ !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
1582
+ return false;
14781583
14791584 /* If bo is a KFD BO, check if the bo belongs to the current process.
14801585 * If true, then return false as any KFD process needs all its BOs to
14811586 * be resident to run successfully
14821587 */
1483
- flist = reservation_object_get_list(bo->resv);
1588
+ flist = dma_resv_get_list(bo->base.resv);
14841589 if (flist) {
14851590 for (i = 0; i < flist->shared_count; ++i) {
14861591 f = rcu_dereference_protected(flist->shared[i],
1487
- reservation_object_held(bo->resv));
1592
+ dma_resv_held(bo->base.resv));
14881593 if (amdkfd_fence_check_mm(f, current->mm))
14891594 return false;
14901595 }
....@@ -1492,6 +1597,9 @@
14921597
14931598 switch (bo->mem.mem_type) {
14941599 case TTM_PL_TT:
1600
+ if (amdgpu_bo_is_amdgpu_bo(bo) &&
1601
+ amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
1602
+ return false;
14951603 return true;
14961604
14971605 case TTM_PL_VRAM:
....@@ -1540,12 +1648,13 @@
15401648 if (bo->mem.mem_type != TTM_PL_VRAM)
15411649 return -EIO;
15421650
1543
- nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
1544
- pos = (nodes->start << PAGE_SHIFT) + offset;
1651
+ pos = offset;
1652
+ nodes = amdgpu_find_mm_node(&abo->tbo.mem, &pos);
1653
+ pos += (nodes->start << PAGE_SHIFT);
15451654
15461655 while (len && pos < adev->gmc.mc_vram_size) {
15471656 uint64_t aligned_pos = pos & ~(uint64_t)3;
1548
- uint32_t bytes = 4 - (pos & 3);
1657
+ uint64_t bytes = 4 - (pos & 3);
15491658 uint32_t shift = (pos & 3) * 8;
15501659 uint32_t mask = 0xffffffff << shift;
15511660
....@@ -1554,20 +1663,28 @@
15541663 bytes = len;
15551664 }
15561665
1557
- spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1558
- WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
1559
- WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
1560
- if (!write || mask != 0xffffffff)
1561
- value = RREG32_NO_KIQ(mmMM_DATA);
1562
- if (write) {
1563
- value &= ~mask;
1564
- value |= (*(uint32_t *)buf << shift) & mask;
1565
- WREG32_NO_KIQ(mmMM_DATA, value);
1566
- }
1567
- spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1568
- if (!write) {
1569
- value = (value & mask) >> shift;
1570
- memcpy(buf, &value, bytes);
1666
+ if (mask != 0xffffffff) {
1667
+ spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1668
+ WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
1669
+ WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
1670
+ if (!write || mask != 0xffffffff)
1671
+ value = RREG32_NO_KIQ(mmMM_DATA);
1672
+ if (write) {
1673
+ value &= ~mask;
1674
+ value |= (*(uint32_t *)buf << shift) & mask;
1675
+ WREG32_NO_KIQ(mmMM_DATA, value);
1676
+ }
1677
+ spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1678
+ if (!write) {
1679
+ value = (value & mask) >> shift;
1680
+ memcpy(buf, &value, bytes);
1681
+ }
1682
+ } else {
1683
+ bytes = (nodes->start + nodes->size) << PAGE_SHIFT;
1684
+ bytes = min(bytes - pos, (uint64_t)len & ~0x3ull);
1685
+
1686
+ amdgpu_device_vram_access(adev, pos, (uint32_t *)buf,
1687
+ bytes, write);
15711688 }
15721689
15731690 ret += bytes;
....@@ -1587,18 +1704,20 @@
15871704 .ttm_tt_create = &amdgpu_ttm_tt_create,
15881705 .ttm_tt_populate = &amdgpu_ttm_tt_populate,
15891706 .ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
1590
- .invalidate_caches = &amdgpu_invalidate_caches,
1591
- .init_mem_type = &amdgpu_init_mem_type,
1707
+ .ttm_tt_bind = &amdgpu_ttm_backend_bind,
1708
+ .ttm_tt_unbind = &amdgpu_ttm_backend_unbind,
1709
+ .ttm_tt_destroy = &amdgpu_ttm_backend_destroy,
15921710 .eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
15931711 .evict_flags = &amdgpu_evict_flags,
15941712 .move = &amdgpu_bo_move,
15951713 .verify_access = &amdgpu_verify_access,
15961714 .move_notify = &amdgpu_bo_move_notify,
1715
+ .release_notify = &amdgpu_bo_release_notify,
15971716 .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
15981717 .io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
1599
- .io_mem_free = &amdgpu_ttm_io_mem_free,
16001718 .io_mem_pfn = amdgpu_ttm_io_mem_pfn,
1601
- .access_memory = &amdgpu_ttm_access_memory
1719
+ .access_memory = &amdgpu_ttm_access_memory,
1720
+ .del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
16021721 };
16031722
16041723 /*
....@@ -1613,8 +1732,8 @@
16131732 */
16141733 static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
16151734 {
1616
- amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo,
1617
- NULL, &adev->fw_vram_usage.va);
1735
+ amdgpu_bo_free_kernel(&adev->mman.fw_vram_usage_reserved_bo,
1736
+ NULL, &adev->mman.fw_vram_usage_va);
16181737 }
16191738
16201739 /**
....@@ -1626,81 +1745,128 @@
16261745 */
16271746 static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
16281747 {
1629
- struct ttm_operation_ctx ctx = { false, false };
1630
- struct amdgpu_bo_param bp;
1631
- int r = 0;
1632
- int i;
1633
- u64 vram_size = adev->gmc.visible_vram_size;
1634
- u64 offset = adev->fw_vram_usage.start_offset;
1635
- u64 size = adev->fw_vram_usage.size;
1636
- struct amdgpu_bo *bo;
1748
+ uint64_t vram_size = adev->gmc.visible_vram_size;
16371749
1638
- memset(&bp, 0, sizeof(bp));
1639
- bp.size = adev->fw_vram_usage.size;
1640
- bp.byte_align = PAGE_SIZE;
1641
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
1642
- bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1643
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1644
- bp.type = ttm_bo_type_kernel;
1645
- bp.resv = NULL;
1646
- adev->fw_vram_usage.va = NULL;
1647
- adev->fw_vram_usage.reserved_bo = NULL;
1750
+ adev->mman.fw_vram_usage_va = NULL;
1751
+ adev->mman.fw_vram_usage_reserved_bo = NULL;
16481752
1649
- if (adev->fw_vram_usage.size > 0 &&
1650
- adev->fw_vram_usage.size <= vram_size) {
1753
+ if (adev->mman.fw_vram_usage_size == 0 ||
1754
+ adev->mman.fw_vram_usage_size > vram_size)
1755
+ return 0;
16511756
1652
- r = amdgpu_bo_create(adev, &bp,
1653
- &adev->fw_vram_usage.reserved_bo);
1654
- if (r)
1655
- goto error_create;
1656
-
1657
- r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false);
1658
- if (r)
1659
- goto error_reserve;
1660
-
1661
- /* remove the original mem node and create a new one at the
1662
- * request position
1663
- */
1664
- bo = adev->fw_vram_usage.reserved_bo;
1665
- offset = ALIGN(offset, PAGE_SIZE);
1666
- for (i = 0; i < bo->placement.num_placement; ++i) {
1667
- bo->placements[i].fpfn = offset >> PAGE_SHIFT;
1668
- bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
1669
- }
1670
-
1671
- ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem);
1672
- r = ttm_bo_mem_space(&bo->tbo, &bo->placement,
1673
- &bo->tbo.mem, &ctx);
1674
- if (r)
1675
- goto error_pin;
1676
-
1677
- r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo,
1678
- AMDGPU_GEM_DOMAIN_VRAM,
1679
- adev->fw_vram_usage.start_offset,
1680
- (adev->fw_vram_usage.start_offset +
1681
- adev->fw_vram_usage.size));
1682
- if (r)
1683
- goto error_pin;
1684
- r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
1685
- &adev->fw_vram_usage.va);
1686
- if (r)
1687
- goto error_kmap;
1688
-
1689
- amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
1690
- }
1691
- return r;
1692
-
1693
-error_kmap:
1694
- amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo);
1695
-error_pin:
1696
- amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
1697
-error_reserve:
1698
- amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo);
1699
-error_create:
1700
- adev->fw_vram_usage.va = NULL;
1701
- adev->fw_vram_usage.reserved_bo = NULL;
1702
- return r;
1757
+ return amdgpu_bo_create_kernel_at(adev,
1758
+ adev->mman.fw_vram_usage_start_offset,
1759
+ adev->mman.fw_vram_usage_size,
1760
+ AMDGPU_GEM_DOMAIN_VRAM,
1761
+ &adev->mman.fw_vram_usage_reserved_bo,
1762
+ &adev->mman.fw_vram_usage_va);
17031763 }
1764
+
1765
+/*
1766
+ * Memoy training reservation functions
1767
+ */
1768
+
1769
+/**
1770
+ * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
1771
+ *
1772
+ * @adev: amdgpu_device pointer
1773
+ *
1774
+ * free memory training reserved vram if it has been reserved.
1775
+ */
1776
+static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
1777
+{
1778
+ struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1779
+
1780
+ ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
1781
+ amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL);
1782
+ ctx->c2p_bo = NULL;
1783
+
1784
+ return 0;
1785
+}
1786
+
1787
+static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
1788
+{
1789
+ struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1790
+
1791
+ memset(ctx, 0, sizeof(*ctx));
1792
+
1793
+ ctx->c2p_train_data_offset =
1794
+ ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
1795
+ ctx->p2c_train_data_offset =
1796
+ (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
1797
+ ctx->train_data_size =
1798
+ GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
1799
+
1800
+ DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
1801
+ ctx->train_data_size,
1802
+ ctx->p2c_train_data_offset,
1803
+ ctx->c2p_train_data_offset);
1804
+}
1805
+
1806
+/*
1807
+ * reserve TMR memory at the top of VRAM which holds
1808
+ * IP Discovery data and is protected by PSP.
1809
+ */
1810
+static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
1811
+{
1812
+ int ret;
1813
+ struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1814
+ bool mem_train_support = false;
1815
+
1816
+ if (!amdgpu_sriov_vf(adev)) {
1817
+ ret = amdgpu_mem_train_support(adev);
1818
+ if (ret == 1)
1819
+ mem_train_support = true;
1820
+ else if (ret == -1)
1821
+ return -EINVAL;
1822
+ else
1823
+ DRM_DEBUG("memory training does not support!\n");
1824
+ }
1825
+
1826
+ /*
1827
+ * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all
1828
+ * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc)
1829
+ *
1830
+ * Otherwise, fallback to legacy approach to check and reserve tmr block for ip
1831
+ * discovery data and G6 memory training data respectively
1832
+ */
1833
+ adev->mman.discovery_tmr_size =
1834
+ amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
1835
+ if (!adev->mman.discovery_tmr_size)
1836
+ adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET;
1837
+
1838
+ if (mem_train_support) {
1839
+ /* reserve vram for mem train according to TMR location */
1840
+ amdgpu_ttm_training_data_block_init(adev);
1841
+ ret = amdgpu_bo_create_kernel_at(adev,
1842
+ ctx->c2p_train_data_offset,
1843
+ ctx->train_data_size,
1844
+ AMDGPU_GEM_DOMAIN_VRAM,
1845
+ &ctx->c2p_bo,
1846
+ NULL);
1847
+ if (ret) {
1848
+ DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
1849
+ amdgpu_ttm_training_reserve_vram_fini(adev);
1850
+ return ret;
1851
+ }
1852
+ ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
1853
+ }
1854
+
1855
+ ret = amdgpu_bo_create_kernel_at(adev,
1856
+ adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
1857
+ adev->mman.discovery_tmr_size,
1858
+ AMDGPU_GEM_DOMAIN_VRAM,
1859
+ &adev->mman.discovery_memory,
1860
+ NULL);
1861
+ if (ret) {
1862
+ DRM_ERROR("alloc tmr failed(%d)!\n", ret);
1863
+ amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
1864
+ return ret;
1865
+ }
1866
+
1867
+ return 0;
1868
+}
1869
+
17041870 /**
17051871 * amdgpu_ttm_init - Init the memory management (ttm) as well as various
17061872 * gtt/vram related fields.
....@@ -1716,18 +1882,14 @@
17161882 int r;
17171883 u64 vis_vram_limit;
17181884
1719
- /* initialize global references for vram/gtt */
1720
- r = amdgpu_ttm_global_init(adev);
1721
- if (r) {
1722
- return r;
1723
- }
1885
+ mutex_init(&adev->mman.gtt_window_lock);
1886
+
17241887 /* No others user of address space so set it to 0 */
17251888 r = ttm_bo_device_init(&adev->mman.bdev,
1726
- adev->mman.bo_global_ref.ref.object,
17271889 &amdgpu_bo_driver,
1728
- adev->ddev->anon_inode->i_mapping,
1729
- DRM_FILE_PAGE_OFFSET,
1730
- adev->need_dma32);
1890
+ adev_to_drm(adev)->anon_inode->i_mapping,
1891
+ adev_to_drm(adev)->vma_offset_manager,
1892
+ dma_addressing_limited(adev->dev));
17311893 if (r) {
17321894 DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
17331895 return r;
....@@ -1738,8 +1900,7 @@
17381900 adev->mman.bdev.no_retry = true;
17391901
17401902 /* Initialize VRAM pool with all of VRAM divided into pages */
1741
- r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
1742
- adev->gmc.real_vram_size >> PAGE_SHIFT);
1903
+ r = amdgpu_vram_mgr_init(adev);
17431904 if (r) {
17441905 DRM_ERROR("Failed initializing VRAM heap.\n");
17451906 return r;
....@@ -1767,18 +1928,35 @@
17671928 return r;
17681929 }
17691930
1931
+ /*
1932
+ * only NAVI10 and onwards ASIC support for IP discovery.
1933
+ * If IP discovery enabled, a block of memory should be
1934
+ * reserved for IP discovey.
1935
+ */
1936
+ if (adev->mman.discovery_bin) {
1937
+ r = amdgpu_ttm_reserve_tmr(adev);
1938
+ if (r)
1939
+ return r;
1940
+ }
1941
+
17701942 /* allocate memory as required for VGA
17711943 * This is used for VGA emulation and pre-OS scanout buffers to
17721944 * avoid display artifacts while transitioning between pre-OS
17731945 * and driver. */
1774
- if (adev->gmc.stolen_size) {
1775
- r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
1776
- AMDGPU_GEM_DOMAIN_VRAM,
1777
- &adev->stolen_vga_memory,
1778
- NULL, NULL);
1779
- if (r)
1780
- return r;
1781
- }
1946
+ r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
1947
+ AMDGPU_GEM_DOMAIN_VRAM,
1948
+ &adev->mman.stolen_vga_memory,
1949
+ NULL);
1950
+ if (r)
1951
+ return r;
1952
+ r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
1953
+ adev->mman.stolen_extended_size,
1954
+ AMDGPU_GEM_DOMAIN_VRAM,
1955
+ &adev->mman.stolen_extended_memory,
1956
+ NULL);
1957
+ if (r)
1958
+ return r;
1959
+
17821960 DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
17831961 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
17841962
....@@ -1796,7 +1974,7 @@
17961974 gtt_size = (uint64_t)amdgpu_gtt_size << 20;
17971975
17981976 /* Initialize GTT memory pool */
1799
- r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
1977
+ r = amdgpu_gtt_mgr_init(adev, gtt_size);
18001978 if (r) {
18011979 DRM_ERROR("Failed initializing GTT heap.\n");
18021980 return r;
....@@ -1805,51 +1983,24 @@
18051983 (unsigned)(gtt_size / (1024 * 1024)));
18061984
18071985 /* Initialize various on-chip memory pools */
1808
- adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
1809
- adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
1810
- adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
1811
- adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT;
1812
- adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT;
1813
- adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT;
1814
- adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT;
1815
- adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT;
1816
- adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT;
1817
- /* GDS Memory */
1818
- if (adev->gds.mem.total_size) {
1819
- r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
1820
- adev->gds.mem.total_size >> PAGE_SHIFT);
1821
- if (r) {
1822
- DRM_ERROR("Failed initializing GDS heap.\n");
1823
- return r;
1824
- }
1825
- }
1826
-
1827
- /* GWS */
1828
- if (adev->gds.gws.total_size) {
1829
- r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
1830
- adev->gds.gws.total_size >> PAGE_SHIFT);
1831
- if (r) {
1832
- DRM_ERROR("Failed initializing gws heap.\n");
1833
- return r;
1834
- }
1835
- }
1836
-
1837
- /* OA */
1838
- if (adev->gds.oa.total_size) {
1839
- r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
1840
- adev->gds.oa.total_size >> PAGE_SHIFT);
1841
- if (r) {
1842
- DRM_ERROR("Failed initializing oa heap.\n");
1843
- return r;
1844
- }
1845
- }
1846
-
1847
- /* Register debugfs entries for amdgpu_ttm */
1848
- r = amdgpu_ttm_debugfs_init(adev);
1986
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
18491987 if (r) {
1850
- DRM_ERROR("Failed to init debugfs\n");
1988
+ DRM_ERROR("Failed initializing GDS heap.\n");
18511989 return r;
18521990 }
1991
+
1992
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size);
1993
+ if (r) {
1994
+ DRM_ERROR("Failed initializing gws heap.\n");
1995
+ return r;
1996
+ }
1997
+
1998
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size);
1999
+ if (r) {
2000
+ DRM_ERROR("Failed initializing oa heap.\n");
2001
+ return r;
2002
+ }
2003
+
18532004 return 0;
18542005 }
18552006
....@@ -1859,7 +2010,9 @@
18592010 void amdgpu_ttm_late_init(struct amdgpu_device *adev)
18602011 {
18612012 /* return the VGA stolen memory (if any) back to VRAM */
1862
- amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
2013
+ if (!adev->mman.keep_stolen_vga_memory)
2014
+ amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
2015
+ amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
18632016 }
18642017
18652018 /**
....@@ -1870,22 +2023,24 @@
18702023 if (!adev->mman.initialized)
18712024 return;
18722025
1873
- amdgpu_ttm_debugfs_fini(adev);
2026
+ amdgpu_ttm_training_reserve_vram_fini(adev);
2027
+ /* return the stolen vga memory back to VRAM */
2028
+ if (adev->mman.keep_stolen_vga_memory)
2029
+ amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
2030
+ /* return the IP Discovery TMR memory back to VRAM */
2031
+ amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
18742032 amdgpu_ttm_fw_reserve_vram_fini(adev);
2033
+
18752034 if (adev->mman.aper_base_kaddr)
18762035 iounmap(adev->mman.aper_base_kaddr);
18772036 adev->mman.aper_base_kaddr = NULL;
18782037
1879
- ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
1880
- ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
1881
- if (adev->gds.mem.total_size)
1882
- ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
1883
- if (adev->gds.gws.total_size)
1884
- ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
1885
- if (adev->gds.oa.total_size)
1886
- ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
2038
+ amdgpu_vram_mgr_fini(adev);
2039
+ amdgpu_gtt_mgr_fini(adev);
2040
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
2041
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
2042
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
18872043 ttm_bo_device_release(&adev->mman.bdev);
1888
- amdgpu_ttm_global_fini(adev);
18892044 adev->mman.initialized = false;
18902045 DRM_INFO("amdgpu: ttm finalized\n");
18912046 }
....@@ -1901,21 +2056,23 @@
19012056 */
19022057 void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
19032058 {
1904
- struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM];
2059
+ struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
19052060 uint64_t size;
19062061 int r;
19072062
1908
- if (!adev->mman.initialized || adev->in_gpu_reset ||
2063
+ if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
19092064 adev->mman.buffer_funcs_enabled == enable)
19102065 return;
19112066
19122067 if (enable) {
19132068 struct amdgpu_ring *ring;
1914
- struct drm_sched_rq *rq;
2069
+ struct drm_gpu_scheduler *sched;
19152070
19162071 ring = adev->mman.buffer_funcs_ring;
1917
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
1918
- r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL);
2072
+ sched = &ring->sched;
2073
+ r = drm_sched_entity_init(&adev->mman.entity,
2074
+ DRM_SCHED_PRIORITY_KERNEL, &sched,
2075
+ 1, NULL);
19192076 if (r) {
19202077 DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
19212078 r);
....@@ -1938,92 +2095,23 @@
19382095
19392096 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
19402097 {
1941
- struct drm_file *file_priv;
1942
- struct amdgpu_device *adev;
2098
+ struct drm_file *file_priv = filp->private_data;
2099
+ struct amdgpu_device *adev = drm_to_adev(file_priv->minor->dev);
19432100
1944
- if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET))
1945
- return -EINVAL;
1946
-
1947
- file_priv = filp->private_data;
1948
- adev = file_priv->minor->dev->dev_private;
19492101 if (adev == NULL)
19502102 return -EINVAL;
19512103
19522104 return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
19532105 }
19542106
1955
-static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
1956
- struct ttm_mem_reg *mem, unsigned num_pages,
1957
- uint64_t offset, unsigned window,
1958
- struct amdgpu_ring *ring,
1959
- uint64_t *addr)
1960
-{
1961
- struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
1962
- struct amdgpu_device *adev = ring->adev;
1963
- struct ttm_tt *ttm = bo->ttm;
1964
- struct amdgpu_job *job;
1965
- unsigned num_dw, num_bytes;
1966
- dma_addr_t *dma_address;
1967
- struct dma_fence *fence;
1968
- uint64_t src_addr, dst_addr;
1969
- uint64_t flags;
1970
- int r;
1971
-
1972
- BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
1973
- AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
1974
-
1975
- *addr = adev->gmc.gart_start;
1976
- *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
1977
- AMDGPU_GPU_PAGE_SIZE;
1978
-
1979
- num_dw = adev->mman.buffer_funcs->copy_num_dw;
1980
- while (num_dw & 0x7)
1981
- num_dw++;
1982
-
1983
- num_bytes = num_pages * 8;
1984
-
1985
- r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job);
1986
- if (r)
1987
- return r;
1988
-
1989
- src_addr = num_dw * 4;
1990
- src_addr += job->ibs[0].gpu_addr;
1991
-
1992
- dst_addr = adev->gart.table_addr;
1993
- dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
1994
- amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
1995
- dst_addr, num_bytes);
1996
-
1997
- amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1998
- WARN_ON(job->ibs[0].length_dw > num_dw);
1999
-
2000
- dma_address = &gtt->ttm.dma_address[offset >> PAGE_SHIFT];
2001
- flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem);
2002
- r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
2003
- &job->ibs[0].ptr[num_dw]);
2004
- if (r)
2005
- goto error_free;
2006
-
2007
- r = amdgpu_job_submit(job, &adev->mman.entity,
2008
- AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
2009
- if (r)
2010
- goto error_free;
2011
-
2012
- dma_fence_put(fence);
2013
-
2014
- return r;
2015
-
2016
-error_free:
2017
- amdgpu_job_free(job);
2018
- return r;
2019
-}
2020
-
20212107 int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
20222108 uint64_t dst_offset, uint32_t byte_count,
2023
- struct reservation_object *resv,
2109
+ struct dma_resv *resv,
20242110 struct dma_fence **fence, bool direct_submit,
2025
- bool vm_needs_flush)
2111
+ bool vm_needs_flush, bool tmz)
20262112 {
2113
+ enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT :
2114
+ AMDGPU_IB_POOL_DELAYED;
20272115 struct amdgpu_device *adev = ring->adev;
20282116 struct amdgpu_job *job;
20292117
....@@ -2032,28 +2120,27 @@
20322120 unsigned i;
20332121 int r;
20342122
2035
- if (direct_submit && !ring->ready) {
2123
+ if (!direct_submit && !ring->sched.ready) {
20362124 DRM_ERROR("Trying to move memory with ring turned off.\n");
20372125 return -EINVAL;
20382126 }
20392127
20402128 max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
20412129 num_loops = DIV_ROUND_UP(byte_count, max_bytes);
2042
- num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
2130
+ num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
20432131
2044
- /* for IB padding */
2045
- while (num_dw & 0x7)
2046
- num_dw++;
2047
-
2048
- r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
2132
+ r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job);
20492133 if (r)
20502134 return r;
20512135
2052
- job->vm_needs_flush = vm_needs_flush;
2136
+ if (vm_needs_flush) {
2137
+ job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
2138
+ job->vm_needs_flush = true;
2139
+ }
20532140 if (resv) {
20542141 r = amdgpu_sync_resv(adev, &job->sync, resv,
2055
- AMDGPU_FENCE_OWNER_UNDEFINED,
2056
- false);
2142
+ AMDGPU_SYNC_ALWAYS,
2143
+ AMDGPU_FENCE_OWNER_UNDEFINED);
20572144 if (r) {
20582145 DRM_ERROR("sync failed (%d).\n", r);
20592146 goto error_free;
....@@ -2064,7 +2151,7 @@
20642151 uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
20652152
20662153 amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
2067
- dst_offset, cur_size_in_bytes);
2154
+ dst_offset, cur_size_in_bytes, tmz);
20682155
20692156 src_offset += cur_size_in_bytes;
20702157 dst_offset += cur_size_in_bytes;
....@@ -2091,7 +2178,7 @@
20912178
20922179 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
20932180 uint32_t src_data,
2094
- struct reservation_object *resv,
2181
+ struct dma_resv *resv,
20952182 struct dma_fence **fence)
20962183 {
20972184 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
....@@ -2120,9 +2207,9 @@
21202207 mm_node = bo->tbo.mem.mm_node;
21212208 num_loops = 0;
21222209 while (num_pages) {
2123
- uint32_t byte_count = mm_node->size << PAGE_SHIFT;
2210
+ uint64_t byte_count = mm_node->size << PAGE_SHIFT;
21242211
2125
- num_loops += DIV_ROUND_UP(byte_count, max_bytes);
2212
+ num_loops += DIV_ROUND_UP_ULL(byte_count, max_bytes);
21262213 num_pages -= mm_node->size;
21272214 ++mm_node;
21282215 }
....@@ -2131,13 +2218,15 @@
21312218 /* for IB padding */
21322219 num_dw += 64;
21332220
2134
- r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
2221
+ r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED,
2222
+ &job);
21352223 if (r)
21362224 return r;
21372225
21382226 if (resv) {
21392227 r = amdgpu_sync_resv(adev, &job->sync, resv,
2140
- AMDGPU_FENCE_OWNER_UNDEFINED, false);
2228
+ AMDGPU_SYNC_ALWAYS,
2229
+ AMDGPU_FENCE_OWNER_UNDEFINED);
21412230 if (r) {
21422231 DRM_ERROR("sync failed (%d).\n", r);
21432232 goto error_free;
....@@ -2148,12 +2237,13 @@
21482237 mm_node = bo->tbo.mem.mm_node;
21492238
21502239 while (num_pages) {
2151
- uint32_t byte_count = mm_node->size << PAGE_SHIFT;
2240
+ uint64_t byte_count = mm_node->size << PAGE_SHIFT;
21522241 uint64_t dst_addr;
21532242
21542243 dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
21552244 while (byte_count) {
2156
- uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
2245
+ uint32_t cur_size_in_bytes = min_t(uint64_t, byte_count,
2246
+ max_bytes);
21572247
21582248 amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
21592249 dst_addr, cur_size_in_bytes);
....@@ -2185,22 +2275,22 @@
21852275 static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
21862276 {
21872277 struct drm_info_node *node = (struct drm_info_node *)m->private;
2188
- unsigned ttm_pl = *(int *)node->info_ent->data;
2278
+ unsigned ttm_pl = (uintptr_t)node->info_ent->data;
21892279 struct drm_device *dev = node->minor->dev;
2190
- struct amdgpu_device *adev = dev->dev_private;
2191
- struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl];
2280
+ struct amdgpu_device *adev = drm_to_adev(dev);
2281
+ struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, ttm_pl);
21922282 struct drm_printer p = drm_seq_file_printer(m);
21932283
21942284 man->func->debug(man, &p);
21952285 return 0;
21962286 }
21972287
2198
-static int ttm_pl_vram = TTM_PL_VRAM;
2199
-static int ttm_pl_tt = TTM_PL_TT;
2200
-
22012288 static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
2202
- {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram},
2203
- {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt},
2289
+ {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_VRAM},
2290
+ {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_TT},
2291
+ {"amdgpu_gds_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GDS},
2292
+ {"amdgpu_gws_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GWS},
2293
+ {"amdgpu_oa_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_OA},
22042294 {"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL},
22052295 #ifdef CONFIG_SWIOTLB
22062296 {"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL}
....@@ -2217,7 +2307,6 @@
22172307 {
22182308 struct amdgpu_device *adev = file_inode(f)->i_private;
22192309 ssize_t result = 0;
2220
- int r;
22212310
22222311 if (size & 0x3 || *pos & 0x3)
22232312 return -EINVAL;
....@@ -2225,27 +2314,19 @@
22252314 if (*pos >= adev->gmc.mc_vram_size)
22262315 return -ENXIO;
22272316
2317
+ size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
22282318 while (size) {
2229
- unsigned long flags;
2230
- uint32_t value;
2319
+ size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4);
2320
+ uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
22312321
2232
- if (*pos >= adev->gmc.mc_vram_size)
2233
- return result;
2322
+ amdgpu_device_vram_access(adev, *pos, value, bytes, false);
2323
+ if (copy_to_user(buf, value, bytes))
2324
+ return -EFAULT;
22342325
2235
- spin_lock_irqsave(&adev->mmio_idx_lock, flags);
2236
- WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
2237
- WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
2238
- value = RREG32_NO_KIQ(mmMM_DATA);
2239
- spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
2240
-
2241
- r = put_user(value, (uint32_t *)buf);
2242
- if (r)
2243
- return r;
2244
-
2245
- result += 4;
2246
- buf += 4;
2247
- *pos += 4;
2248
- size -= 4;
2326
+ result += bytes;
2327
+ buf += bytes;
2328
+ *pos += bytes;
2329
+ size -= bytes;
22492330 }
22502331
22512332 return result;
....@@ -2482,12 +2563,12 @@
24822563
24832564 #endif
24842565
2485
-static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
2566
+int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
24862567 {
24872568 #if defined(CONFIG_DEBUG_FS)
24882569 unsigned count;
24892570
2490
- struct drm_minor *minor = adev->ddev->primary;
2571
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
24912572 struct dentry *ent, *root = minor->debugfs_root;
24922573
24932574 for (count = 0; count < ARRAY_SIZE(ttm_debugfs_entries); count++) {
....@@ -2515,15 +2596,5 @@
25152596 return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count);
25162597 #else
25172598 return 0;
2518
-#endif
2519
-}
2520
-
2521
-static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev)
2522
-{
2523
-#if defined(CONFIG_DEBUG_FS)
2524
- unsigned i;
2525
-
2526
- for (i = 0; i < ARRAY_SIZE(ttm_debugfs_entries); i++)
2527
- debugfs_remove(adev->mman.debugfs_entries[i]);
25282599 #endif
25292600 }