hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
....@@ -28,12 +28,15 @@
2828 #include <linux/dma-fence-array.h>
2929 #include <linux/interval_tree_generic.h>
3030 #include <linux/idr.h>
31
-#include <drm/drmP.h>
31
+#include <linux/dma-buf.h>
32
+
3233 #include <drm/amdgpu_drm.h>
3334 #include "amdgpu.h"
3435 #include "amdgpu_trace.h"
3536 #include "amdgpu_amdkfd.h"
3637 #include "amdgpu_gmc.h"
38
+#include "amdgpu_xgmi.h"
39
+#include "amdgpu_dma_buf.h"
3740
3841 /**
3942 * DOC: GPUVM
....@@ -66,58 +69,6 @@
6669 #undef LAST
6770
6871 /**
69
- * struct amdgpu_pte_update_params - Local structure
70
- *
71
- * Encapsulate some VM table update parameters to reduce
72
- * the number of function parameters
73
- *
74
- */
75
-struct amdgpu_pte_update_params {
76
-
77
- /**
78
- * @adev: amdgpu device we do this update for
79
- */
80
- struct amdgpu_device *adev;
81
-
82
- /**
83
- * @vm: optional amdgpu_vm we do this update for
84
- */
85
- struct amdgpu_vm *vm;
86
-
87
- /**
88
- * @src: address where to copy page table entries from
89
- */
90
- uint64_t src;
91
-
92
- /**
93
- * @ib: indirect buffer to fill with commands
94
- */
95
- struct amdgpu_ib *ib;
96
-
97
- /**
98
- * @func: Function which actually does the update
99
- */
100
- void (*func)(struct amdgpu_pte_update_params *params,
101
- struct amdgpu_bo *bo, uint64_t pe,
102
- uint64_t addr, unsigned count, uint32_t incr,
103
- uint64_t flags);
104
- /**
105
- * @pages_addr:
106
- *
107
- * DMA addresses to use for mapping, used during VM update by CPU
108
- */
109
- dma_addr_t *pages_addr;
110
-
111
- /**
112
- * @kptr:
113
- *
114
- * Kernel pointer of PD/PT BO that needs to be updated,
115
- * used during VM update by CPU
116
- */
117
- void *kptr;
118
-};
119
-
120
-/**
12172 * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback
12273 */
12374 struct amdgpu_prt_cb {
....@@ -133,46 +84,30 @@
13384 struct dma_fence_cb cb;
13485 };
13586
136
-/**
137
- * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
138
- *
139
- * @base: base structure for tracking BO usage in a VM
140
- * @vm: vm to which bo is to be added
141
- * @bo: amdgpu buffer object
142
- *
143
- * Initialize a bo_va_base structure and add it to the appropriate lists
144
- *
87
+/*
88
+ * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
89
+ * happens while holding this lock anywhere to prevent deadlocks when
90
+ * an MMU notifier runs in reclaim-FS context.
14591 */
146
-static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
147
- struct amdgpu_vm *vm,
148
- struct amdgpu_bo *bo)
92
+static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
14993 {
150
- base->vm = vm;
151
- base->bo = bo;
152
- INIT_LIST_HEAD(&base->bo_list);
153
- INIT_LIST_HEAD(&base->vm_status);
94
+ mutex_lock(&vm->eviction_lock);
95
+ vm->saved_flags = memalloc_nofs_save();
96
+}
15497
155
- if (!bo)
156
- return;
157
- list_add_tail(&base->bo_list, &bo->va);
98
+static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
99
+{
100
+ if (mutex_trylock(&vm->eviction_lock)) {
101
+ vm->saved_flags = memalloc_nofs_save();
102
+ return 1;
103
+ }
104
+ return 0;
105
+}
158106
159
- if (bo->tbo.type == ttm_bo_type_kernel)
160
- list_move(&base->vm_status, &vm->relocated);
161
-
162
- if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
163
- return;
164
-
165
- if (bo->preferred_domains &
166
- amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
167
- return;
168
-
169
- /*
170
- * we checked all the prerequisites, but it looks like this per vm bo
171
- * is currently evicted. add the bo to the evicted list to make sure it
172
- * is validated on next vm use to avoid fault.
173
- * */
174
- list_move_tail(&base->vm_status, &vm->evicted);
175
- base->moved = true;
107
+static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
108
+{
109
+ memalloc_nofs_restore(vm->saved_flags);
110
+ mutex_unlock(&vm->eviction_lock);
176111 }
177112
178113 /**
....@@ -187,23 +122,17 @@
187122 static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
188123 unsigned level)
189124 {
190
- unsigned shift = 0xff;
191
-
192125 switch (level) {
193126 case AMDGPU_VM_PDB2:
194127 case AMDGPU_VM_PDB1:
195128 case AMDGPU_VM_PDB0:
196
- shift = 9 * (AMDGPU_VM_PDB0 - level) +
129
+ return 9 * (AMDGPU_VM_PDB0 - level) +
197130 adev->vm_manager.block_size;
198
- break;
199131 case AMDGPU_VM_PTB:
200
- shift = 0;
201
- break;
132
+ return 0;
202133 default:
203
- dev_err(adev->dev, "the level%d isn't supported.\n", level);
134
+ return ~0;
204135 }
205
-
206
- return shift;
207136 }
208137
209138 /**
....@@ -223,13 +152,50 @@
223152
224153 if (level == adev->vm_manager.root_level)
225154 /* For the root directory */
226
- return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
155
+ return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
156
+ >> shift;
227157 else if (level != AMDGPU_VM_PTB)
228158 /* Everything in between */
229159 return 512;
230160 else
231161 /* For the page tables on the leaves */
232162 return AMDGPU_VM_PTE_COUNT(adev);
163
+}
164
+
165
+/**
166
+ * amdgpu_vm_num_ats_entries - return the number of ATS entries in the root PD
167
+ *
168
+ * @adev: amdgpu_device pointer
169
+ *
170
+ * Returns:
171
+ * The number of entries in the root page directory which needs the ATS setting.
172
+ */
173
+static unsigned amdgpu_vm_num_ats_entries(struct amdgpu_device *adev)
174
+{
175
+ unsigned shift;
176
+
177
+ shift = amdgpu_vm_level_shift(adev, adev->vm_manager.root_level);
178
+ return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
179
+}
180
+
181
+/**
182
+ * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT
183
+ *
184
+ * @adev: amdgpu_device pointer
185
+ * @level: VMPT level
186
+ *
187
+ * Returns:
188
+ * The mask to extract the entry number of a PD/PT from an address.
189
+ */
190
+static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev,
191
+ unsigned int level)
192
+{
193
+ if (level <= adev->vm_manager.root_level)
194
+ return 0xffffffff;
195
+ else if (level != AMDGPU_VM_PTB)
196
+ return 0x1ff;
197
+ else
198
+ return AMDGPU_VM_PTE_COUNT(adev) - 1;
233199 }
234200
235201 /**
....@@ -247,6 +213,365 @@
247213 }
248214
249215 /**
216
+ * amdgpu_vm_bo_evicted - vm_bo is evicted
217
+ *
218
+ * @vm_bo: vm_bo which is evicted
219
+ *
220
+ * State for PDs/PTs and per VM BOs which are not at the location they should
221
+ * be.
222
+ */
223
+static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
224
+{
225
+ struct amdgpu_vm *vm = vm_bo->vm;
226
+ struct amdgpu_bo *bo = vm_bo->bo;
227
+
228
+ vm_bo->moved = true;
229
+ if (bo->tbo.type == ttm_bo_type_kernel)
230
+ list_move(&vm_bo->vm_status, &vm->evicted);
231
+ else
232
+ list_move_tail(&vm_bo->vm_status, &vm->evicted);
233
+}
234
+/**
235
+ * amdgpu_vm_bo_moved - vm_bo is moved
236
+ *
237
+ * @vm_bo: vm_bo which is moved
238
+ *
239
+ * State for per VM BOs which are moved, but that change is not yet reflected
240
+ * in the page tables.
241
+ */
242
+static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
243
+{
244
+ list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
245
+}
246
+
247
+/**
248
+ * amdgpu_vm_bo_idle - vm_bo is idle
249
+ *
250
+ * @vm_bo: vm_bo which is now idle
251
+ *
252
+ * State for PDs/PTs and per VM BOs which have gone through the state machine
253
+ * and are now idle.
254
+ */
255
+static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
256
+{
257
+ list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
258
+ vm_bo->moved = false;
259
+}
260
+
261
+/**
262
+ * amdgpu_vm_bo_invalidated - vm_bo is invalidated
263
+ *
264
+ * @vm_bo: vm_bo which is now invalidated
265
+ *
266
+ * State for normal BOs which are invalidated and that change not yet reflected
267
+ * in the PTs.
268
+ */
269
+static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
270
+{
271
+ spin_lock(&vm_bo->vm->invalidated_lock);
272
+ list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
273
+ spin_unlock(&vm_bo->vm->invalidated_lock);
274
+}
275
+
276
+/**
277
+ * amdgpu_vm_bo_relocated - vm_bo is reloacted
278
+ *
279
+ * @vm_bo: vm_bo which is relocated
280
+ *
281
+ * State for PDs/PTs which needs to update their parent PD.
282
+ * For the root PD, just move to idle state.
283
+ */
284
+static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
285
+{
286
+ if (vm_bo->bo->parent)
287
+ list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
288
+ else
289
+ amdgpu_vm_bo_idle(vm_bo);
290
+}
291
+
292
+/**
293
+ * amdgpu_vm_bo_done - vm_bo is done
294
+ *
295
+ * @vm_bo: vm_bo which is now done
296
+ *
297
+ * State for normal BOs which are invalidated and that change has been updated
298
+ * in the PTs.
299
+ */
300
+static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
301
+{
302
+ spin_lock(&vm_bo->vm->invalidated_lock);
303
+ list_del_init(&vm_bo->vm_status);
304
+ spin_unlock(&vm_bo->vm->invalidated_lock);
305
+}
306
+
307
+/**
308
+ * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
309
+ *
310
+ * @base: base structure for tracking BO usage in a VM
311
+ * @vm: vm to which bo is to be added
312
+ * @bo: amdgpu buffer object
313
+ *
314
+ * Initialize a bo_va_base structure and add it to the appropriate lists
315
+ *
316
+ */
317
+static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
318
+ struct amdgpu_vm *vm,
319
+ struct amdgpu_bo *bo)
320
+{
321
+ base->vm = vm;
322
+ base->bo = bo;
323
+ base->next = NULL;
324
+ INIT_LIST_HEAD(&base->vm_status);
325
+
326
+ if (!bo)
327
+ return;
328
+ base->next = bo->vm_bo;
329
+ bo->vm_bo = base;
330
+
331
+ if (bo->tbo.base.resv != vm->root.base.bo->tbo.base.resv)
332
+ return;
333
+
334
+ vm->bulk_moveable = false;
335
+ if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
336
+ amdgpu_vm_bo_relocated(base);
337
+ else
338
+ amdgpu_vm_bo_idle(base);
339
+
340
+ if (bo->preferred_domains &
341
+ amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
342
+ return;
343
+
344
+ /*
345
+ * we checked all the prerequisites, but it looks like this per vm bo
346
+ * is currently evicted. add the bo to the evicted list to make sure it
347
+ * is validated on next vm use to avoid fault.
348
+ * */
349
+ amdgpu_vm_bo_evicted(base);
350
+}
351
+
352
+/**
353
+ * amdgpu_vm_pt_parent - get the parent page directory
354
+ *
355
+ * @pt: child page table
356
+ *
357
+ * Helper to get the parent entry for the child page table. NULL if we are at
358
+ * the root page directory.
359
+ */
360
+static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt)
361
+{
362
+ struct amdgpu_bo *parent = pt->base.bo->parent;
363
+
364
+ if (!parent)
365
+ return NULL;
366
+
367
+ return container_of(parent->vm_bo, struct amdgpu_vm_pt, base);
368
+}
369
+
370
+/*
371
+ * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
372
+ */
373
+struct amdgpu_vm_pt_cursor {
374
+ uint64_t pfn;
375
+ struct amdgpu_vm_pt *parent;
376
+ struct amdgpu_vm_pt *entry;
377
+ unsigned level;
378
+};
379
+
380
+/**
381
+ * amdgpu_vm_pt_start - start PD/PT walk
382
+ *
383
+ * @adev: amdgpu_device pointer
384
+ * @vm: amdgpu_vm structure
385
+ * @start: start address of the walk
386
+ * @cursor: state to initialize
387
+ *
388
+ * Initialize a amdgpu_vm_pt_cursor to start a walk.
389
+ */
390
+static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
391
+ struct amdgpu_vm *vm, uint64_t start,
392
+ struct amdgpu_vm_pt_cursor *cursor)
393
+{
394
+ cursor->pfn = start;
395
+ cursor->parent = NULL;
396
+ cursor->entry = &vm->root;
397
+ cursor->level = adev->vm_manager.root_level;
398
+}
399
+
400
+/**
401
+ * amdgpu_vm_pt_descendant - go to child node
402
+ *
403
+ * @adev: amdgpu_device pointer
404
+ * @cursor: current state
405
+ *
406
+ * Walk to the child node of the current node.
407
+ * Returns:
408
+ * True if the walk was possible, false otherwise.
409
+ */
410
+static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
411
+ struct amdgpu_vm_pt_cursor *cursor)
412
+{
413
+ unsigned mask, shift, idx;
414
+
415
+ if (!cursor->entry->entries)
416
+ return false;
417
+
418
+ BUG_ON(!cursor->entry->base.bo);
419
+ mask = amdgpu_vm_entries_mask(adev, cursor->level);
420
+ shift = amdgpu_vm_level_shift(adev, cursor->level);
421
+
422
+ ++cursor->level;
423
+ idx = (cursor->pfn >> shift) & mask;
424
+ cursor->parent = cursor->entry;
425
+ cursor->entry = &cursor->entry->entries[idx];
426
+ return true;
427
+}
428
+
429
+/**
430
+ * amdgpu_vm_pt_sibling - go to sibling node
431
+ *
432
+ * @adev: amdgpu_device pointer
433
+ * @cursor: current state
434
+ *
435
+ * Walk to the sibling node of the current node.
436
+ * Returns:
437
+ * True if the walk was possible, false otherwise.
438
+ */
439
+static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
440
+ struct amdgpu_vm_pt_cursor *cursor)
441
+{
442
+ unsigned shift, num_entries;
443
+
444
+ /* Root doesn't have a sibling */
445
+ if (!cursor->parent)
446
+ return false;
447
+
448
+ /* Go to our parents and see if we got a sibling */
449
+ shift = amdgpu_vm_level_shift(adev, cursor->level - 1);
450
+ num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1);
451
+
452
+ if (cursor->entry == &cursor->parent->entries[num_entries - 1])
453
+ return false;
454
+
455
+ cursor->pfn += 1ULL << shift;
456
+ cursor->pfn &= ~((1ULL << shift) - 1);
457
+ ++cursor->entry;
458
+ return true;
459
+}
460
+
461
+/**
462
+ * amdgpu_vm_pt_ancestor - go to parent node
463
+ *
464
+ * @cursor: current state
465
+ *
466
+ * Walk to the parent node of the current node.
467
+ * Returns:
468
+ * True if the walk was possible, false otherwise.
469
+ */
470
+static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
471
+{
472
+ if (!cursor->parent)
473
+ return false;
474
+
475
+ --cursor->level;
476
+ cursor->entry = cursor->parent;
477
+ cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
478
+ return true;
479
+}
480
+
481
+/**
482
+ * amdgpu_vm_pt_next - get next PD/PT in hieratchy
483
+ *
484
+ * @adev: amdgpu_device pointer
485
+ * @cursor: current state
486
+ *
487
+ * Walk the PD/PT tree to the next node.
488
+ */
489
+static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
490
+ struct amdgpu_vm_pt_cursor *cursor)
491
+{
492
+ /* First try a newborn child */
493
+ if (amdgpu_vm_pt_descendant(adev, cursor))
494
+ return;
495
+
496
+ /* If that didn't worked try to find a sibling */
497
+ while (!amdgpu_vm_pt_sibling(adev, cursor)) {
498
+ /* No sibling, go to our parents and grandparents */
499
+ if (!amdgpu_vm_pt_ancestor(cursor)) {
500
+ cursor->pfn = ~0ll;
501
+ return;
502
+ }
503
+ }
504
+}
505
+
506
+/**
507
+ * amdgpu_vm_pt_first_dfs - start a deep first search
508
+ *
509
+ * @adev: amdgpu_device structure
510
+ * @vm: amdgpu_vm structure
511
+ * @start: optional cursor to start with
512
+ * @cursor: state to initialize
513
+ *
514
+ * Starts a deep first traversal of the PD/PT tree.
515
+ */
516
+static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
517
+ struct amdgpu_vm *vm,
518
+ struct amdgpu_vm_pt_cursor *start,
519
+ struct amdgpu_vm_pt_cursor *cursor)
520
+{
521
+ if (start)
522
+ *cursor = *start;
523
+ else
524
+ amdgpu_vm_pt_start(adev, vm, 0, cursor);
525
+ while (amdgpu_vm_pt_descendant(adev, cursor));
526
+}
527
+
528
+/**
529
+ * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
530
+ *
531
+ * @start: starting point for the search
532
+ * @entry: current entry
533
+ *
534
+ * Returns:
535
+ * True when the search should continue, false otherwise.
536
+ */
537
+static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start,
538
+ struct amdgpu_vm_pt *entry)
539
+{
540
+ return entry && (!start || entry != start->entry);
541
+}
542
+
543
+/**
544
+ * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
545
+ *
546
+ * @adev: amdgpu_device structure
547
+ * @cursor: current state
548
+ *
549
+ * Move the cursor to the next node in a deep first search.
550
+ */
551
+static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
552
+ struct amdgpu_vm_pt_cursor *cursor)
553
+{
554
+ if (!cursor->entry)
555
+ return;
556
+
557
+ if (!cursor->parent)
558
+ cursor->entry = NULL;
559
+ else if (amdgpu_vm_pt_sibling(adev, cursor))
560
+ while (amdgpu_vm_pt_descendant(adev, cursor));
561
+ else
562
+ amdgpu_vm_pt_ancestor(cursor);
563
+}
564
+
565
+/*
566
+ * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
567
+ */
568
+#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \
569
+ for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \
570
+ (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
571
+ amdgpu_vm_pt_continue_dfs((start), (entry)); \
572
+ (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
573
+
574
+/**
250575 * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
251576 *
252577 * @vm: vm providing the BOs
....@@ -260,12 +585,82 @@
260585 struct list_head *validated,
261586 struct amdgpu_bo_list_entry *entry)
262587 {
263
- entry->robj = vm->root.base.bo;
264588 entry->priority = 0;
265
- entry->tv.bo = &entry->robj->tbo;
266
- entry->tv.shared = true;
589
+ entry->tv.bo = &vm->root.base.bo->tbo;
590
+ /* Two for VM updates, one for TTM and one for the CS job */
591
+ entry->tv.num_shared = 4;
267592 entry->user_pages = NULL;
268593 list_add(&entry->tv.head, validated);
594
+}
595
+
596
+/**
597
+ * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag
598
+ *
599
+ * @bo: BO which was removed from the LRU
600
+ *
601
+ * Make sure the bulk_moveable flag is updated when a BO is removed from the
602
+ * LRU.
603
+ */
604
+void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
605
+{
606
+ struct amdgpu_bo *abo;
607
+ struct amdgpu_vm_bo_base *bo_base;
608
+
609
+ if (!amdgpu_bo_is_amdgpu_bo(bo))
610
+ return;
611
+
612
+ if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT)
613
+ return;
614
+
615
+ abo = ttm_to_amdgpu_bo(bo);
616
+ if (!abo->parent)
617
+ return;
618
+ for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) {
619
+ struct amdgpu_vm *vm = bo_base->vm;
620
+
621
+ if (abo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)
622
+ vm->bulk_moveable = false;
623
+ }
624
+
625
+}
626
+/**
627
+ * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
628
+ *
629
+ * @adev: amdgpu device pointer
630
+ * @vm: vm providing the BOs
631
+ *
632
+ * Move all BOs to the end of LRU and remember their positions to put them
633
+ * together.
634
+ */
635
+void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
636
+ struct amdgpu_vm *vm)
637
+{
638
+ struct amdgpu_vm_bo_base *bo_base;
639
+
640
+ if (vm->bulk_moveable) {
641
+ spin_lock(&ttm_bo_glob.lru_lock);
642
+ ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
643
+ spin_unlock(&ttm_bo_glob.lru_lock);
644
+ return;
645
+ }
646
+
647
+ memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
648
+
649
+ spin_lock(&ttm_bo_glob.lru_lock);
650
+ list_for_each_entry(bo_base, &vm->idle, vm_status) {
651
+ struct amdgpu_bo *bo = bo_base->bo;
652
+
653
+ if (!bo->parent)
654
+ continue;
655
+
656
+ ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move);
657
+ if (bo->shadow)
658
+ ttm_bo_move_to_lru_tail(&bo->shadow->tbo,
659
+ &vm->lru_bulk_move);
660
+ }
661
+ spin_unlock(&ttm_bo_glob.lru_lock);
662
+
663
+ vm->bulk_moveable = true;
269664 }
270665
271666 /**
....@@ -285,48 +680,31 @@
285680 int (*validate)(void *p, struct amdgpu_bo *bo),
286681 void *param)
287682 {
288
- struct ttm_bo_global *glob = adev->mman.bdev.glob;
289683 struct amdgpu_vm_bo_base *bo_base, *tmp;
290
- int r = 0;
684
+ int r;
685
+
686
+ vm->bulk_moveable &= list_empty(&vm->evicted);
291687
292688 list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
293689 struct amdgpu_bo *bo = bo_base->bo;
294690
295
- if (bo->parent) {
296
- r = validate(param, bo);
297
- if (r)
298
- break;
299
-
300
- spin_lock(&glob->lru_lock);
301
- ttm_bo_move_to_lru_tail(&bo->tbo);
302
- if (bo->shadow)
303
- ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
304
- spin_unlock(&glob->lru_lock);
305
- }
691
+ r = validate(param, bo);
692
+ if (r)
693
+ return r;
306694
307695 if (bo->tbo.type != ttm_bo_type_kernel) {
308
- spin_lock(&vm->moved_lock);
309
- list_move(&bo_base->vm_status, &vm->moved);
310
- spin_unlock(&vm->moved_lock);
696
+ amdgpu_vm_bo_moved(bo_base);
311697 } else {
312
- list_move(&bo_base->vm_status, &vm->relocated);
698
+ vm->update_funcs->map_table(bo);
699
+ amdgpu_vm_bo_relocated(bo_base);
313700 }
314701 }
315702
316
- spin_lock(&glob->lru_lock);
317
- list_for_each_entry(bo_base, &vm->idle, vm_status) {
318
- struct amdgpu_bo *bo = bo_base->bo;
703
+ amdgpu_vm_eviction_lock(vm);
704
+ vm->evicting = false;
705
+ amdgpu_vm_eviction_unlock(vm);
319706
320
- if (!bo->parent)
321
- continue;
322
-
323
- ttm_bo_move_to_lru_tail(&bo->tbo);
324
- if (bo->shadow)
325
- ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
326
- }
327
- spin_unlock(&glob->lru_lock);
328
-
329
- return r;
707
+ return 0;
330708 }
331709
332710 /**
....@@ -337,11 +715,17 @@
337715 * Check if all VM PDs/PTs are ready for updates
338716 *
339717 * Returns:
340
- * True if eviction list is empty.
718
+ * True if VM is not evicting.
341719 */
342720 bool amdgpu_vm_ready(struct amdgpu_vm *vm)
343721 {
344
- return list_empty(&vm->evicted);
722
+ bool ret;
723
+
724
+ amdgpu_vm_eviction_lock(vm);
725
+ ret = !vm->evicting;
726
+ amdgpu_vm_eviction_unlock(vm);
727
+
728
+ return ret && list_empty(&vm->evicted);
345729 }
346730
347731 /**
....@@ -350,8 +734,7 @@
350734 * @adev: amdgpu_device pointer
351735 * @vm: VM to clear BO from
352736 * @bo: BO to clear
353
- * @level: level this BO is at
354
- * @pte_support_ats: indicate ATS support from PTE
737
+ * @immediate: use an immediate update
355738 *
356739 * Root PD needs to be reserved when calling this.
357740 *
....@@ -359,248 +742,249 @@
359742 * 0 on success, errno otherwise.
360743 */
361744 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
362
- struct amdgpu_vm *vm, struct amdgpu_bo *bo,
363
- unsigned level, bool pte_support_ats)
745
+ struct amdgpu_vm *vm,
746
+ struct amdgpu_bo *bo,
747
+ bool immediate)
364748 {
365749 struct ttm_operation_ctx ctx = { true, false };
366
- struct dma_fence *fence = NULL;
750
+ unsigned level = adev->vm_manager.root_level;
751
+ struct amdgpu_vm_update_params params;
752
+ struct amdgpu_bo *ancestor = bo;
367753 unsigned entries, ats_entries;
368
- struct amdgpu_ring *ring;
369
- struct amdgpu_job *job;
370754 uint64_t addr;
371755 int r;
372756
373
- entries = amdgpu_bo_size(bo) / 8;
757
+ /* Figure out our place in the hierarchy */
758
+ if (ancestor->parent) {
759
+ ++level;
760
+ while (ancestor->parent->parent) {
761
+ ++level;
762
+ ancestor = ancestor->parent;
763
+ }
764
+ }
374765
375
- if (pte_support_ats) {
376
- if (level == adev->vm_manager.root_level) {
377
- ats_entries = amdgpu_vm_level_shift(adev, level);
378
- ats_entries += AMDGPU_GPU_PAGE_SHIFT;
379
- ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
380
- ats_entries = min(ats_entries, entries);
381
- entries -= ats_entries;
766
+ entries = amdgpu_bo_size(bo) / 8;
767
+ if (!vm->pte_support_ats) {
768
+ ats_entries = 0;
769
+
770
+ } else if (!bo->parent) {
771
+ ats_entries = amdgpu_vm_num_ats_entries(adev);
772
+ ats_entries = min(ats_entries, entries);
773
+ entries -= ats_entries;
774
+
775
+ } else {
776
+ struct amdgpu_vm_pt *pt;
777
+
778
+ pt = container_of(ancestor->vm_bo, struct amdgpu_vm_pt, base);
779
+ ats_entries = amdgpu_vm_num_ats_entries(adev);
780
+ if ((pt - vm->root.entries) >= ats_entries) {
781
+ ats_entries = 0;
382782 } else {
383783 ats_entries = entries;
384784 entries = 0;
385785 }
386
- } else {
387
- ats_entries = 0;
388786 }
389
-
390
- ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
391
-
392
- r = reservation_object_reserve_shared(bo->tbo.resv);
393
- if (r)
394
- return r;
395787
396788 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
397789 if (r)
398
- goto error;
790
+ return r;
399791
400
- r = amdgpu_job_alloc_with_ib(adev, 64, &job);
792
+ if (bo->shadow) {
793
+ r = ttm_bo_validate(&bo->shadow->tbo, &bo->shadow->placement,
794
+ &ctx);
795
+ if (r)
796
+ return r;
797
+ }
798
+
799
+ r = vm->update_funcs->map_table(bo);
401800 if (r)
402
- goto error;
801
+ return r;
403802
404
- addr = amdgpu_bo_gpu_offset(bo);
803
+ memset(&params, 0, sizeof(params));
804
+ params.adev = adev;
805
+ params.vm = vm;
806
+ params.immediate = immediate;
807
+
808
+ r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
809
+ if (r)
810
+ return r;
811
+
812
+ addr = 0;
405813 if (ats_entries) {
406
- uint64_t ats_value;
814
+ uint64_t value = 0, flags;
407815
408
- ats_value = AMDGPU_PTE_DEFAULT_ATC;
409
- if (level != AMDGPU_VM_PTB)
410
- ats_value |= AMDGPU_PDE_PTE;
816
+ flags = AMDGPU_PTE_DEFAULT_ATC;
817
+ if (level != AMDGPU_VM_PTB) {
818
+ /* Handle leaf PDEs as PTEs */
819
+ flags |= AMDGPU_PDE_PTE;
820
+ amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
821
+ }
411822
412
- amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
413
- ats_entries, 0, ats_value);
823
+ r = vm->update_funcs->update(&params, bo, addr, 0, ats_entries,
824
+ value, flags);
825
+ if (r)
826
+ return r;
827
+
414828 addr += ats_entries * 8;
415829 }
416830
417
- if (entries)
418
- amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
419
- entries, 0, 0);
831
+ if (entries) {
832
+ uint64_t value = 0, flags = 0;
420833
421
- amdgpu_ring_pad_ib(ring, &job->ibs[0]);
834
+ if (adev->asic_type >= CHIP_VEGA10) {
835
+ if (level != AMDGPU_VM_PTB) {
836
+ /* Handle leaf PDEs as PTEs */
837
+ flags |= AMDGPU_PDE_PTE;
838
+ amdgpu_gmc_get_vm_pde(adev, level,
839
+ &value, &flags);
840
+ } else {
841
+ /* Workaround for fault priority problem on GMC9 */
842
+ flags = AMDGPU_PTE_EXECUTABLE;
843
+ }
844
+ }
422845
423
- WARN_ON(job->ibs[0].length_dw > 64);
424
- r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
425
- AMDGPU_FENCE_OWNER_UNDEFINED, false);
846
+ r = vm->update_funcs->update(&params, bo, addr, 0, entries,
847
+ value, flags);
848
+ if (r)
849
+ return r;
850
+ }
851
+
852
+ return vm->update_funcs->commit(&params, NULL);
853
+}
854
+
855
+/**
856
+ * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation
857
+ *
858
+ * @adev: amdgpu_device pointer
859
+ * @vm: requesting vm
860
+ * @level: the page table level
861
+ * @immediate: use a immediate update
862
+ * @bp: resulting BO allocation parameters
863
+ */
864
+static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
865
+ int level, bool immediate,
866
+ struct amdgpu_bo_param *bp)
867
+{
868
+ memset(bp, 0, sizeof(*bp));
869
+
870
+ bp->size = amdgpu_vm_bo_size(adev, level);
871
+ bp->byte_align = AMDGPU_GPU_PAGE_SIZE;
872
+ bp->domain = AMDGPU_GEM_DOMAIN_VRAM;
873
+ bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain);
874
+ bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
875
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC;
876
+ if (vm->use_cpu_for_update)
877
+ bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
878
+ else if (!vm->root.base.bo || vm->root.base.bo->shadow)
879
+ bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
880
+ bp->type = ttm_bo_type_kernel;
881
+ bp->no_wait_gpu = immediate;
882
+ if (vm->root.base.bo)
883
+ bp->resv = vm->root.base.bo->tbo.base.resv;
884
+}
885
+
886
+/**
887
+ * amdgpu_vm_alloc_pts - Allocate a specific page table
888
+ *
889
+ * @adev: amdgpu_device pointer
890
+ * @vm: VM to allocate page tables for
891
+ * @cursor: Which page table to allocate
892
+ * @immediate: use an immediate update
893
+ *
894
+ * Make sure a specific page table or directory is allocated.
895
+ *
896
+ * Returns:
897
+ * 1 if page table needed to be allocated, 0 if page table was already
898
+ * allocated, negative errno if an error occurred.
899
+ */
900
+static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
901
+ struct amdgpu_vm *vm,
902
+ struct amdgpu_vm_pt_cursor *cursor,
903
+ bool immediate)
904
+{
905
+ struct amdgpu_vm_pt *entry = cursor->entry;
906
+ struct amdgpu_bo_param bp;
907
+ struct amdgpu_bo *pt;
908
+ int r;
909
+
910
+ if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
911
+ unsigned num_entries;
912
+
913
+ num_entries = amdgpu_vm_num_entries(adev, cursor->level);
914
+ entry->entries = kvmalloc_array(num_entries,
915
+ sizeof(*entry->entries),
916
+ GFP_KERNEL | __GFP_ZERO);
917
+ if (!entry->entries)
918
+ return -ENOMEM;
919
+ }
920
+
921
+ if (entry->base.bo)
922
+ return 0;
923
+
924
+ amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp);
925
+
926
+ r = amdgpu_bo_create(adev, &bp, &pt);
426927 if (r)
427
- goto error_free;
928
+ return r;
428929
429
- r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_UNDEFINED,
430
- &fence);
930
+ /* Keep a reference to the root directory to avoid
931
+ * freeing them up in the wrong order.
932
+ */
933
+ pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
934
+ amdgpu_vm_bo_base_init(&entry->base, vm, pt);
935
+
936
+ r = amdgpu_vm_clear_bo(adev, vm, pt, immediate);
431937 if (r)
432
- goto error_free;
433
-
434
- amdgpu_bo_fence(bo, fence, true);
435
- dma_fence_put(fence);
436
-
437
- if (bo->shadow)
438
- return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
439
- level, pte_support_ats);
938
+ goto error_free_pt;
440939
441940 return 0;
442941
443
-error_free:
444
- amdgpu_job_free(job);
445
-
446
-error:
942
+error_free_pt:
943
+ amdgpu_bo_unref(&pt->shadow);
944
+ amdgpu_bo_unref(&pt);
447945 return r;
448946 }
449947
450948 /**
451
- * amdgpu_vm_alloc_levels - allocate the PD/PT levels
949
+ * amdgpu_vm_free_table - fre one PD/PT
452950 *
453
- * @adev: amdgpu_device pointer
454
- * @vm: requested vm
455
- * @parent: parent PT
456
- * @saddr: start of the address range
457
- * @eaddr: end of the address range
458
- * @level: VMPT level
459
- * @ats: indicate ATS support from PTE
460
- *
461
- * Make sure the page directories and page tables are allocated
462
- *
463
- * Returns:
464
- * 0 on success, errno otherwise.
951
+ * @entry: PDE to free
465952 */
466
-static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
467
- struct amdgpu_vm *vm,
468
- struct amdgpu_vm_pt *parent,
469
- uint64_t saddr, uint64_t eaddr,
470
- unsigned level, bool ats)
953
+static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
471954 {
472
- unsigned shift = amdgpu_vm_level_shift(adev, level);
473
- unsigned pt_idx, from, to;
474
- u64 flags;
475
- int r;
476
-
477
- if (!parent->entries) {
478
- unsigned num_entries = amdgpu_vm_num_entries(adev, level);
479
-
480
- parent->entries = kvmalloc_array(num_entries,
481
- sizeof(struct amdgpu_vm_pt),
482
- GFP_KERNEL | __GFP_ZERO);
483
- if (!parent->entries)
484
- return -ENOMEM;
485
- memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
955
+ if (entry->base.bo) {
956
+ entry->base.bo->vm_bo = NULL;
957
+ list_del(&entry->base.vm_status);
958
+ amdgpu_bo_unref(&entry->base.bo->shadow);
959
+ amdgpu_bo_unref(&entry->base.bo);
486960 }
487
-
488
- from = saddr >> shift;
489
- to = eaddr >> shift;
490
- if (from >= amdgpu_vm_num_entries(adev, level) ||
491
- to >= amdgpu_vm_num_entries(adev, level))
492
- return -EINVAL;
493
-
494
- ++level;
495
- saddr = saddr & ((1 << shift) - 1);
496
- eaddr = eaddr & ((1 << shift) - 1);
497
-
498
- flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
499
- if (vm->root.base.bo->shadow)
500
- flags |= AMDGPU_GEM_CREATE_SHADOW;
501
- if (vm->use_cpu_for_update)
502
- flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
503
- else
504
- flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
505
-
506
- /* walk over the address space and allocate the page tables */
507
- for (pt_idx = from; pt_idx <= to; ++pt_idx) {
508
- struct reservation_object *resv = vm->root.base.bo->tbo.resv;
509
- struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
510
- struct amdgpu_bo *pt;
511
-
512
- if (!entry->base.bo) {
513
- struct amdgpu_bo_param bp;
514
-
515
- memset(&bp, 0, sizeof(bp));
516
- bp.size = amdgpu_vm_bo_size(adev, level);
517
- bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
518
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
519
- bp.flags = flags;
520
- bp.type = ttm_bo_type_kernel;
521
- bp.resv = resv;
522
- r = amdgpu_bo_create(adev, &bp, &pt);
523
- if (r)
524
- return r;
525
-
526
- r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
527
- if (r) {
528
- amdgpu_bo_unref(&pt->shadow);
529
- amdgpu_bo_unref(&pt);
530
- return r;
531
- }
532
-
533
- if (vm->use_cpu_for_update) {
534
- r = amdgpu_bo_kmap(pt, NULL);
535
- if (r) {
536
- amdgpu_bo_unref(&pt->shadow);
537
- amdgpu_bo_unref(&pt);
538
- return r;
539
- }
540
- }
541
-
542
- /* Keep a reference to the root directory to avoid
543
- * freeing them up in the wrong order.
544
- */
545
- pt->parent = amdgpu_bo_ref(parent->base.bo);
546
-
547
- amdgpu_vm_bo_base_init(&entry->base, vm, pt);
548
- }
549
-
550
- if (level < AMDGPU_VM_PTB) {
551
- uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
552
- uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
553
- ((1 << shift) - 1);
554
- r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
555
- sub_eaddr, level, ats);
556
- if (r)
557
- return r;
558
- }
559
- }
560
-
561
- return 0;
961
+ kvfree(entry->entries);
962
+ entry->entries = NULL;
562963 }
563964
564965 /**
565
- * amdgpu_vm_alloc_pts - Allocate page tables.
966
+ * amdgpu_vm_free_pts - free PD/PT levels
566967 *
567
- * @adev: amdgpu_device pointer
568
- * @vm: VM to allocate page tables for
569
- * @saddr: Start address which needs to be allocated
570
- * @size: Size from start address we need.
968
+ * @adev: amdgpu device structure
969
+ * @vm: amdgpu vm structure
970
+ * @start: optional cursor where to start freeing PDs/PTs
571971 *
572
- * Make sure the page tables are allocated.
573
- *
574
- * Returns:
575
- * 0 on success, errno otherwise.
972
+ * Free the page directory or page table level and all sub levels.
576973 */
577
-int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
578
- struct amdgpu_vm *vm,
579
- uint64_t saddr, uint64_t size)
974
+static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
975
+ struct amdgpu_vm *vm,
976
+ struct amdgpu_vm_pt_cursor *start)
580977 {
581
- uint64_t eaddr;
582
- bool ats = false;
978
+ struct amdgpu_vm_pt_cursor cursor;
979
+ struct amdgpu_vm_pt *entry;
583980
584
- /* validate the parameters */
585
- if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
586
- return -EINVAL;
981
+ vm->bulk_moveable = false;
587982
588
- eaddr = saddr + size - 1;
983
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
984
+ amdgpu_vm_free_table(entry);
589985
590
- if (vm->pte_support_ats)
591
- ats = saddr < AMDGPU_VA_HOLE_START;
592
-
593
- saddr /= AMDGPU_GPU_PAGE_SIZE;
594
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
595
-
596
- if (eaddr >= adev->vm_manager.max_pfn) {
597
- dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
598
- eaddr, adev->vm_manager.max_pfn);
599
- return -EINVAL;
600
- }
601
-
602
- return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
603
- adev->vm_manager.root_level, ats);
986
+ if (start)
987
+ amdgpu_vm_free_table(start->entry);
604988 }
605989
606990 /**
....@@ -686,7 +1070,8 @@
6861070 * Returns:
6871071 * 0 on success, errno otherwise.
6881072 */
689
-int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
1073
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
1074
+ bool need_pipe_sync)
6901075 {
6911076 struct amdgpu_device *adev = ring->adev;
6921077 unsigned vmhub = ring->funcs->vmhub;
....@@ -703,7 +1088,11 @@
7031088 struct dma_fence *fence = NULL;
7041089 bool pasid_mapping_needed = false;
7051090 unsigned patch_offset = 0;
1091
+ bool update_spm_vmid_needed = (job->vm && (job->vm->reserved_vmid[vmhub] != NULL));
7061092 int r;
1093
+
1094
+ if (update_spm_vmid_needed && adev->gfx.rlc.funcs->update_spm_vmid)
1095
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
7071096
7081097 if (amdgpu_vmid_had_gpu_reset(adev, id)) {
7091098 gds_switch_needed = true;
....@@ -806,74 +1195,15 @@
8061195 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
8071196 struct amdgpu_bo *bo)
8081197 {
809
- struct amdgpu_bo_va *bo_va;
1198
+ struct amdgpu_vm_bo_base *base;
8101199
811
- list_for_each_entry(bo_va, &bo->va, base.bo_list) {
812
- if (bo_va->base.vm == vm) {
813
- return bo_va;
814
- }
1200
+ for (base = bo->vm_bo; base; base = base->next) {
1201
+ if (base->vm != vm)
1202
+ continue;
1203
+
1204
+ return container_of(base, struct amdgpu_bo_va, base);
8151205 }
8161206 return NULL;
817
-}
818
-
819
-/**
820
- * amdgpu_vm_do_set_ptes - helper to call the right asic function
821
- *
822
- * @params: see amdgpu_pte_update_params definition
823
- * @bo: PD/PT to update
824
- * @pe: addr of the page entry
825
- * @addr: dst addr to write into pe
826
- * @count: number of page entries to update
827
- * @incr: increase next addr by incr bytes
828
- * @flags: hw access flags
829
- *
830
- * Traces the parameters and calls the right asic functions
831
- * to setup the page table using the DMA.
832
- */
833
-static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
834
- struct amdgpu_bo *bo,
835
- uint64_t pe, uint64_t addr,
836
- unsigned count, uint32_t incr,
837
- uint64_t flags)
838
-{
839
- pe += amdgpu_bo_gpu_offset(bo);
840
- trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
841
-
842
- if (count < 3) {
843
- amdgpu_vm_write_pte(params->adev, params->ib, pe,
844
- addr | flags, count, incr);
845
-
846
- } else {
847
- amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr,
848
- count, incr, flags);
849
- }
850
-}
851
-
852
-/**
853
- * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
854
- *
855
- * @params: see amdgpu_pte_update_params definition
856
- * @bo: PD/PT to update
857
- * @pe: addr of the page entry
858
- * @addr: dst addr to write into pe
859
- * @count: number of page entries to update
860
- * @incr: increase next addr by incr bytes
861
- * @flags: hw access flags
862
- *
863
- * Traces the parameters and calls the DMA function to copy the PTEs.
864
- */
865
-static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
866
- struct amdgpu_bo *bo,
867
- uint64_t pe, uint64_t addr,
868
- unsigned count, uint32_t incr,
869
- uint64_t flags)
870
-{
871
- uint64_t src = (params->src + (addr >> 12) * 8);
872
-
873
- pe += amdgpu_bo_gpu_offset(bo);
874
- trace_amdgpu_vm_copy_ptes(pe, src, count);
875
-
876
- amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
8771207 }
8781208
8791209 /**
....@@ -888,7 +1218,7 @@
8881218 * Returns:
8891219 * The pointer for the page table entry.
8901220 */
891
-static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
1221
+uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
8921222 {
8931223 uint64_t result;
8941224
....@@ -904,401 +1234,146 @@
9041234 }
9051235
9061236 /**
907
- * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
908
- *
909
- * @params: see amdgpu_pte_update_params definition
910
- * @bo: PD/PT to update
911
- * @pe: kmap addr of the page entry
912
- * @addr: dst addr to write into pe
913
- * @count: number of page entries to update
914
- * @incr: increase next addr by incr bytes
915
- * @flags: hw access flags
916
- *
917
- * Write count number of PT/PD entries directly.
918
- */
919
-static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
920
- struct amdgpu_bo *bo,
921
- uint64_t pe, uint64_t addr,
922
- unsigned count, uint32_t incr,
923
- uint64_t flags)
924
-{
925
- unsigned int i;
926
- uint64_t value;
927
-
928
- pe += (unsigned long)amdgpu_bo_kptr(bo);
929
-
930
- trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
931
-
932
- for (i = 0; i < count; i++) {
933
- value = params->pages_addr ?
934
- amdgpu_vm_map_gart(params->pages_addr, addr) :
935
- addr;
936
- amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
937
- i, value, flags);
938
- addr += incr;
939
- }
940
-}
941
-
942
-
943
-/**
944
- * amdgpu_vm_wait_pd - Wait for PT BOs to be free.
945
- *
946
- * @adev: amdgpu_device pointer
947
- * @vm: related vm
948
- * @owner: fence owner
949
- *
950
- * Returns:
951
- * 0 on success, errno otherwise.
952
- */
953
-static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
954
- void *owner)
955
-{
956
- struct amdgpu_sync sync;
957
- int r;
958
-
959
- amdgpu_sync_create(&sync);
960
- amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
961
- r = amdgpu_sync_wait(&sync, true);
962
- amdgpu_sync_free(&sync);
963
-
964
- return r;
965
-}
966
-
967
-/*
9681237 * amdgpu_vm_update_pde - update a single level in the hierarchy
9691238 *
970
- * @param: parameters for the update
1239
+ * @params: parameters for the update
9711240 * @vm: requested vm
972
- * @parent: parent directory
9731241 * @entry: entry to update
9741242 *
9751243 * Makes sure the requested entry in parent is up to date.
9761244 */
977
-static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
978
- struct amdgpu_vm *vm,
979
- struct amdgpu_vm_pt *parent,
980
- struct amdgpu_vm_pt *entry)
1245
+static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params,
1246
+ struct amdgpu_vm *vm,
1247
+ struct amdgpu_vm_pt *entry)
9811248 {
1249
+ struct amdgpu_vm_pt *parent = amdgpu_vm_pt_parent(entry);
9821250 struct amdgpu_bo *bo = parent->base.bo, *pbo;
9831251 uint64_t pde, pt, flags;
9841252 unsigned level;
985
-
986
- /* Don't update huge pages here */
987
- if (entry->huge)
988
- return;
9891253
9901254 for (level = 0, pbo = bo->parent; pbo; ++level)
9911255 pbo = pbo->parent;
9921256
9931257 level += params->adev->vm_manager.root_level;
994
- pt = amdgpu_bo_gpu_offset(entry->base.bo);
995
- flags = AMDGPU_PTE_VALID;
996
- amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
1258
+ amdgpu_gmc_get_pde_for_bo(entry->base.bo, level, &pt, &flags);
9971259 pde = (entry - parent->entries) * 8;
998
- if (bo->shadow)
999
- params->func(params, bo->shadow, pde, pt, 1, 0, flags);
1000
- params->func(params, bo, pde, pt, 1, 0, flags);
1260
+ return vm->update_funcs->update(params, bo, pde, pt, 1, 0, flags);
10011261 }
10021262
1003
-/*
1004
- * amdgpu_vm_invalidate_level - mark all PD levels as invalid
1263
+/**
1264
+ * amdgpu_vm_invalidate_pds - mark all PDs as invalid
10051265 *
10061266 * @adev: amdgpu_device pointer
10071267 * @vm: related vm
1008
- * @parent: parent PD
1009
- * @level: VMPT level
10101268 *
10111269 * Mark all PD level as invalid after an error.
10121270 */
1013
-static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
1014
- struct amdgpu_vm *vm,
1015
- struct amdgpu_vm_pt *parent,
1016
- unsigned level)
1271
+static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
1272
+ struct amdgpu_vm *vm)
10171273 {
1018
- unsigned pt_idx, num_entries;
1274
+ struct amdgpu_vm_pt_cursor cursor;
1275
+ struct amdgpu_vm_pt *entry;
10191276
1020
- /*
1021
- * Recurse into the subdirectories. This recursion is harmless because
1022
- * we only have a maximum of 5 layers.
1023
- */
1024
- num_entries = amdgpu_vm_num_entries(adev, level);
1025
- for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
1026
- struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
1027
-
1028
- if (!entry->base.bo)
1029
- continue;
1030
-
1031
- if (!entry->base.moved)
1032
- list_move(&entry->base.vm_status, &vm->relocated);
1033
- amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
1034
- }
1277
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry)
1278
+ if (entry->base.bo && !entry->base.moved)
1279
+ amdgpu_vm_bo_relocated(&entry->base);
10351280 }
10361281
1037
-/*
1038
- * amdgpu_vm_update_directories - make sure that all directories are valid
1282
+/**
1283
+ * amdgpu_vm_update_pdes - make sure that all directories are valid
10391284 *
10401285 * @adev: amdgpu_device pointer
10411286 * @vm: requested vm
1287
+ * @immediate: submit immediately to the paging queue
10421288 *
10431289 * Makes sure all directories are up to date.
10441290 *
10451291 * Returns:
10461292 * 0 for success, error for failure.
10471293 */
1048
-int amdgpu_vm_update_directories(struct amdgpu_device *adev,
1049
- struct amdgpu_vm *vm)
1294
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
1295
+ struct amdgpu_vm *vm, bool immediate)
10501296 {
1051
- struct amdgpu_pte_update_params params;
1052
- struct amdgpu_job *job;
1053
- unsigned ndw = 0;
1054
- int r = 0;
1297
+ struct amdgpu_vm_update_params params;
1298
+ int r;
10551299
10561300 if (list_empty(&vm->relocated))
10571301 return 0;
10581302
1059
-restart:
10601303 memset(&params, 0, sizeof(params));
10611304 params.adev = adev;
1305
+ params.vm = vm;
1306
+ params.immediate = immediate;
10621307
1063
- if (vm->use_cpu_for_update) {
1064
- struct amdgpu_vm_bo_base *bo_base;
1065
-
1066
- list_for_each_entry(bo_base, &vm->relocated, vm_status) {
1067
- r = amdgpu_bo_kmap(bo_base->bo, NULL);
1068
- if (unlikely(r))
1069
- return r;
1070
- }
1071
-
1072
- r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
1073
- if (unlikely(r))
1074
- return r;
1075
-
1076
- params.func = amdgpu_vm_cpu_set_ptes;
1077
- } else {
1078
- ndw = 512 * 8;
1079
- r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
1080
- if (r)
1081
- return r;
1082
-
1083
- params.ib = &job->ibs[0];
1084
- params.func = amdgpu_vm_do_set_ptes;
1085
- }
1308
+ r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
1309
+ if (r)
1310
+ return r;
10861311
10871312 while (!list_empty(&vm->relocated)) {
1088
- struct amdgpu_vm_bo_base *bo_base, *parent;
1089
- struct amdgpu_vm_pt *pt, *entry;
1090
- struct amdgpu_bo *bo;
1313
+ struct amdgpu_vm_pt *entry;
10911314
1092
- bo_base = list_first_entry(&vm->relocated,
1093
- struct amdgpu_vm_bo_base,
1094
- vm_status);
1095
- bo_base->moved = false;
1096
- list_del_init(&bo_base->vm_status);
1315
+ entry = list_first_entry(&vm->relocated, struct amdgpu_vm_pt,
1316
+ base.vm_status);
1317
+ amdgpu_vm_bo_idle(&entry->base);
10971318
1098
- bo = bo_base->bo->parent;
1099
- if (!bo)
1100
- continue;
1101
-
1102
- parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
1103
- bo_list);
1104
- pt = container_of(parent, struct amdgpu_vm_pt, base);
1105
- entry = container_of(bo_base, struct amdgpu_vm_pt, base);
1106
-
1107
- amdgpu_vm_update_pde(&params, vm, pt, entry);
1108
-
1109
- if (!vm->use_cpu_for_update &&
1110
- (ndw - params.ib->length_dw) < 32)
1111
- break;
1112
- }
1113
-
1114
- if (vm->use_cpu_for_update) {
1115
- /* Flush HDP */
1116
- mb();
1117
- amdgpu_asic_flush_hdp(adev, NULL);
1118
- } else if (params.ib->length_dw == 0) {
1119
- amdgpu_job_free(job);
1120
- } else {
1121
- struct amdgpu_bo *root = vm->root.base.bo;
1122
- struct amdgpu_ring *ring;
1123
- struct dma_fence *fence;
1124
-
1125
- ring = container_of(vm->entity.rq->sched, struct amdgpu_ring,
1126
- sched);
1127
-
1128
- amdgpu_ring_pad_ib(ring, params.ib);
1129
- amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
1130
- AMDGPU_FENCE_OWNER_VM, false);
1131
- WARN_ON(params.ib->length_dw > ndw);
1132
- r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM,
1133
- &fence);
1319
+ r = amdgpu_vm_update_pde(&params, vm, entry);
11341320 if (r)
11351321 goto error;
1136
-
1137
- amdgpu_bo_fence(root, fence, true);
1138
- dma_fence_put(vm->last_update);
1139
- vm->last_update = fence;
11401322 }
11411323
1142
- if (!list_empty(&vm->relocated))
1143
- goto restart;
1144
-
1324
+ r = vm->update_funcs->commit(&params, &vm->last_update);
1325
+ if (r)
1326
+ goto error;
11451327 return 0;
11461328
11471329 error:
1148
- amdgpu_vm_invalidate_level(adev, vm, &vm->root,
1149
- adev->vm_manager.root_level);
1150
- amdgpu_job_free(job);
1330
+ amdgpu_vm_invalidate_pds(adev, vm);
11511331 return r;
11521332 }
11531333
1154
-/**
1155
- * amdgpu_vm_find_entry - find the entry for an address
1156
- *
1157
- * @p: see amdgpu_pte_update_params definition
1158
- * @addr: virtual address in question
1159
- * @entry: resulting entry or NULL
1160
- * @parent: parent entry
1161
- *
1162
- * Find the vm_pt entry and it's parent for the given address.
1163
- */
1164
-void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
1165
- struct amdgpu_vm_pt **entry,
1166
- struct amdgpu_vm_pt **parent)
1167
-{
1168
- unsigned level = p->adev->vm_manager.root_level;
1169
-
1170
- *parent = NULL;
1171
- *entry = &p->vm->root;
1172
- while ((*entry)->entries) {
1173
- unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
1174
-
1175
- *parent = *entry;
1176
- *entry = &(*entry)->entries[addr >> shift];
1177
- addr &= (1ULL << shift) - 1;
1178
- }
1179
-
1180
- if (level != AMDGPU_VM_PTB)
1181
- *entry = NULL;
1182
-}
1183
-
1184
-/**
1185
- * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
1186
- *
1187
- * @p: see amdgpu_pte_update_params definition
1188
- * @entry: vm_pt entry to check
1189
- * @parent: parent entry
1190
- * @nptes: number of PTEs updated with this operation
1191
- * @dst: destination address where the PTEs should point to
1192
- * @flags: access flags fro the PTEs
1193
- *
1194
- * Check if we can update the PD with a huge page.
1195
- */
1196
-static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
1197
- struct amdgpu_vm_pt *entry,
1198
- struct amdgpu_vm_pt *parent,
1199
- unsigned nptes, uint64_t dst,
1200
- uint64_t flags)
1201
-{
1202
- uint64_t pde;
1203
-
1204
- /* In the case of a mixed PT the PDE must point to it*/
1205
- if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
1206
- nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
1207
- /* Set the huge page flag to stop scanning at this PDE */
1208
- flags |= AMDGPU_PDE_PTE;
1209
- }
1210
-
1211
- if (!(flags & AMDGPU_PDE_PTE)) {
1212
- if (entry->huge) {
1213
- /* Add the entry to the relocated list to update it. */
1214
- entry->huge = false;
1215
- list_move(&entry->base.vm_status, &p->vm->relocated);
1216
- }
1217
- return;
1218
- }
1219
-
1220
- entry->huge = true;
1221
- amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
1222
-
1223
- pde = (entry - parent->entries) * 8;
1224
- if (parent->base.bo->shadow)
1225
- p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
1226
- p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
1227
-}
1228
-
1229
-/**
1230
- * amdgpu_vm_update_ptes - make sure that page tables are valid
1231
- *
1232
- * @params: see amdgpu_pte_update_params definition
1233
- * @start: start of GPU address range
1234
- * @end: end of GPU address range
1235
- * @dst: destination address to map to, the next dst inside the function
1236
- * @flags: mapping flags
1237
- *
1238
- * Update the page tables in the range @start - @end.
1239
- *
1240
- * Returns:
1241
- * 0 for success, -EINVAL for failure.
1242
- */
1243
-static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1244
- uint64_t start, uint64_t end,
1245
- uint64_t dst, uint64_t flags)
1246
-{
1247
- struct amdgpu_device *adev = params->adev;
1248
- const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
1249
-
1250
- uint64_t addr, pe_start;
1251
- struct amdgpu_bo *pt;
1252
- unsigned nptes;
1253
-
1254
- /* walk over the address space and update the page tables */
1255
- for (addr = start; addr < end; addr += nptes,
1256
- dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
1257
- struct amdgpu_vm_pt *entry, *parent;
1258
-
1259
- amdgpu_vm_get_entry(params, addr, &entry, &parent);
1260
- if (!entry)
1261
- return -ENOENT;
1262
-
1263
- if ((addr & ~mask) == (end & ~mask))
1264
- nptes = end - addr;
1265
- else
1266
- nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
1267
-
1268
- amdgpu_vm_handle_huge_pages(params, entry, parent,
1269
- nptes, dst, flags);
1270
- /* We don't need to update PTEs for huge pages */
1271
- if (entry->huge)
1272
- continue;
1273
-
1274
- pt = entry->base.bo;
1275
- pe_start = (addr & mask) * 8;
1276
- if (pt->shadow)
1277
- params->func(params, pt->shadow, pe_start, dst, nptes,
1278
- AMDGPU_GPU_PAGE_SIZE, flags);
1279
- params->func(params, pt, pe_start, dst, nptes,
1280
- AMDGPU_GPU_PAGE_SIZE, flags);
1281
- }
1282
-
1283
- return 0;
1284
-}
1285
-
12861334 /*
1287
- * amdgpu_vm_frag_ptes - add fragment information to PTEs
1335
+ * amdgpu_vm_update_flags - figure out flags for PTE updates
12881336 *
1289
- * @params: see amdgpu_pte_update_params definition
1290
- * @vm: requested vm
1337
+ * Make sure to set the right flags for the PTEs at the desired level.
1338
+ */
1339
+static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params,
1340
+ struct amdgpu_bo *bo, unsigned level,
1341
+ uint64_t pe, uint64_t addr,
1342
+ unsigned count, uint32_t incr,
1343
+ uint64_t flags)
1344
+
1345
+{
1346
+ if (level != AMDGPU_VM_PTB) {
1347
+ flags |= AMDGPU_PDE_PTE;
1348
+ amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
1349
+
1350
+ } else if (params->adev->asic_type >= CHIP_VEGA10 &&
1351
+ !(flags & AMDGPU_PTE_VALID) &&
1352
+ !(flags & AMDGPU_PTE_PRT)) {
1353
+
1354
+ /* Workaround for fault priority problem on GMC9 */
1355
+ flags |= AMDGPU_PTE_EXECUTABLE;
1356
+ }
1357
+
1358
+ params->vm->update_funcs->update(params, bo, pe, addr, count, incr,
1359
+ flags);
1360
+}
1361
+
1362
+/**
1363
+ * amdgpu_vm_fragment - get fragment for PTEs
1364
+ *
1365
+ * @params: see amdgpu_vm_update_params definition
12911366 * @start: first PTE to handle
12921367 * @end: last PTE to handle
1293
- * @dst: addr those PTEs should point to
12941368 * @flags: hw mapping flags
1369
+ * @frag: resulting fragment size
1370
+ * @frag_end: end of this fragment
12951371 *
1296
- * Returns:
1297
- * 0 for success, -EINVAL for failure.
1372
+ * Returns the first possible fragment for the start and end address.
12981373 */
1299
-static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
1300
- uint64_t start, uint64_t end,
1301
- uint64_t dst, uint64_t flags)
1374
+static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params,
1375
+ uint64_t start, uint64_t end, uint64_t flags,
1376
+ unsigned int *frag, uint64_t *frag_end)
13021377 {
13031378 /**
13041379 * The MC L1 TLB supports variable sized pages, based on a fragment
....@@ -1317,36 +1392,182 @@
13171392 * larger. Thus, we try to use large fragments wherever possible.
13181393 * Userspace can support this by aligning virtual base address and
13191394 * allocation size to the fragment size.
1395
+ *
1396
+ * Starting with Vega10 the fragment size only controls the L1. The L2
1397
+ * is now directly feed with small/huge/giant pages from the walker.
13201398 */
1321
- unsigned max_frag = params->adev->vm_manager.fragment_size;
1322
- int r;
1399
+ unsigned max_frag;
1400
+
1401
+ if (params->adev->asic_type < CHIP_VEGA10)
1402
+ max_frag = params->adev->vm_manager.fragment_size;
1403
+ else
1404
+ max_frag = 31;
13231405
13241406 /* system pages are non continuously */
1325
- if (params->src || !(flags & AMDGPU_PTE_VALID))
1326
- return amdgpu_vm_update_ptes(params, start, end, dst, flags);
1407
+ if (params->pages_addr) {
1408
+ *frag = 0;
1409
+ *frag_end = end;
1410
+ return;
1411
+ }
13271412
1328
- while (start != end) {
1329
- uint64_t frag_flags, frag_end;
1330
- unsigned frag;
1413
+ /* This intentionally wraps around if no bit is set */
1414
+ *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1);
1415
+ if (*frag >= max_frag) {
1416
+ *frag = max_frag;
1417
+ *frag_end = end & ~((1ULL << max_frag) - 1);
1418
+ } else {
1419
+ *frag_end = start + (1 << *frag);
1420
+ }
1421
+}
13311422
1332
- /* This intentionally wraps around if no bit is set */
1333
- frag = min((unsigned)ffs(start) - 1,
1334
- (unsigned)fls64(end - start) - 1);
1335
- if (frag >= max_frag) {
1336
- frag_flags = AMDGPU_PTE_FRAG(max_frag);
1337
- frag_end = end & ~((1ULL << max_frag) - 1);
1338
- } else {
1339
- frag_flags = AMDGPU_PTE_FRAG(frag);
1340
- frag_end = start + (1 << frag);
1423
+/**
1424
+ * amdgpu_vm_update_ptes - make sure that page tables are valid
1425
+ *
1426
+ * @params: see amdgpu_vm_update_params definition
1427
+ * @start: start of GPU address range
1428
+ * @end: end of GPU address range
1429
+ * @dst: destination address to map to, the next dst inside the function
1430
+ * @flags: mapping flags
1431
+ *
1432
+ * Update the page tables in the range @start - @end.
1433
+ *
1434
+ * Returns:
1435
+ * 0 for success, -EINVAL for failure.
1436
+ */
1437
+static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
1438
+ uint64_t start, uint64_t end,
1439
+ uint64_t dst, uint64_t flags)
1440
+{
1441
+ struct amdgpu_device *adev = params->adev;
1442
+ struct amdgpu_vm_pt_cursor cursor;
1443
+ uint64_t frag_start = start, frag_end;
1444
+ unsigned int frag;
1445
+ int r;
1446
+
1447
+ /* figure out the initial fragment */
1448
+ amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
1449
+
1450
+ /* walk over the address space and update the PTs */
1451
+ amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
1452
+ while (cursor.pfn < end) {
1453
+ unsigned shift, parent_shift, mask;
1454
+ uint64_t incr, entry_end, pe_start;
1455
+ struct amdgpu_bo *pt;
1456
+
1457
+ if (!params->unlocked) {
1458
+ /* make sure that the page tables covering the
1459
+ * address range are actually allocated
1460
+ */
1461
+ r = amdgpu_vm_alloc_pts(params->adev, params->vm,
1462
+ &cursor, params->immediate);
1463
+ if (r)
1464
+ return r;
13411465 }
13421466
1343
- r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
1344
- flags | frag_flags);
1345
- if (r)
1346
- return r;
1467
+ shift = amdgpu_vm_level_shift(adev, cursor.level);
1468
+ parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
1469
+ if (params->unlocked) {
1470
+ /* Unlocked updates are only allowed on the leaves */
1471
+ if (amdgpu_vm_pt_descendant(adev, &cursor))
1472
+ continue;
1473
+ } else if (adev->asic_type < CHIP_VEGA10 &&
1474
+ (flags & AMDGPU_PTE_VALID)) {
1475
+ /* No huge page support before GMC v9 */
1476
+ if (cursor.level != AMDGPU_VM_PTB) {
1477
+ if (!amdgpu_vm_pt_descendant(adev, &cursor))
1478
+ return -ENOENT;
1479
+ continue;
1480
+ }
1481
+ } else if (frag < shift) {
1482
+ /* We can't use this level when the fragment size is
1483
+ * smaller than the address shift. Go to the next
1484
+ * child entry and try again.
1485
+ */
1486
+ if (amdgpu_vm_pt_descendant(adev, &cursor))
1487
+ continue;
1488
+ } else if (frag >= parent_shift) {
1489
+ /* If the fragment size is even larger than the parent
1490
+ * shift we should go up one level and check it again.
1491
+ */
1492
+ if (!amdgpu_vm_pt_ancestor(&cursor))
1493
+ return -EINVAL;
1494
+ continue;
1495
+ }
13471496
1348
- dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
1349
- start = frag_end;
1497
+ pt = cursor.entry->base.bo;
1498
+ if (!pt) {
1499
+ /* We need all PDs and PTs for mapping something, */
1500
+ if (flags & AMDGPU_PTE_VALID)
1501
+ return -ENOENT;
1502
+
1503
+ /* but unmapping something can happen at a higher
1504
+ * level.
1505
+ */
1506
+ if (!amdgpu_vm_pt_ancestor(&cursor))
1507
+ return -EINVAL;
1508
+
1509
+ pt = cursor.entry->base.bo;
1510
+ shift = parent_shift;
1511
+ frag_end = max(frag_end, ALIGN(frag_start + 1,
1512
+ 1ULL << shift));
1513
+ }
1514
+
1515
+ /* Looks good so far, calculate parameters for the update */
1516
+ incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
1517
+ mask = amdgpu_vm_entries_mask(adev, cursor.level);
1518
+ pe_start = ((cursor.pfn >> shift) & mask) * 8;
1519
+ entry_end = ((uint64_t)mask + 1) << shift;
1520
+ entry_end += cursor.pfn & ~(entry_end - 1);
1521
+ entry_end = min(entry_end, end);
1522
+
1523
+ do {
1524
+ struct amdgpu_vm *vm = params->vm;
1525
+ uint64_t upd_end = min(entry_end, frag_end);
1526
+ unsigned nptes = (upd_end - frag_start) >> shift;
1527
+ uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
1528
+
1529
+ /* This can happen when we set higher level PDs to
1530
+ * silent to stop fault floods.
1531
+ */
1532
+ nptes = max(nptes, 1u);
1533
+
1534
+ trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
1535
+ nptes, dst, incr, upd_flags,
1536
+ vm->task_info.pid,
1537
+ vm->immediate.fence_context);
1538
+ amdgpu_vm_update_flags(params, pt, cursor.level,
1539
+ pe_start, dst, nptes, incr,
1540
+ upd_flags);
1541
+
1542
+ pe_start += nptes * 8;
1543
+ dst += nptes * incr;
1544
+
1545
+ frag_start = upd_end;
1546
+ if (frag_start >= frag_end) {
1547
+ /* figure out the next fragment */
1548
+ amdgpu_vm_fragment(params, frag_start, end,
1549
+ flags, &frag, &frag_end);
1550
+ if (frag < shift)
1551
+ break;
1552
+ }
1553
+ } while (frag_start < entry_end);
1554
+
1555
+ if (amdgpu_vm_pt_descendant(adev, &cursor)) {
1556
+ /* Free all child entries.
1557
+ * Update the tables with the flags and addresses and free up subsequent
1558
+ * tables in the case of huge pages or freed up areas.
1559
+ * This is the maximum you can free, because all other page tables are not
1560
+ * completely covered by the range and so potentially still in use.
1561
+ */
1562
+ while (cursor.pfn < frag_start) {
1563
+ amdgpu_vm_free_pts(adev, params->vm, &cursor);
1564
+ amdgpu_vm_pt_next(adev, &cursor);
1565
+ }
1566
+
1567
+ } else if (frag >= shift) {
1568
+ /* or just move on to the next on the same level. */
1569
+ amdgpu_vm_pt_next(adev, &cursor);
1570
+ }
13501571 }
13511572
13521573 return 0;
....@@ -1356,13 +1577,15 @@
13561577 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
13571578 *
13581579 * @adev: amdgpu_device pointer
1359
- * @exclusive: fence we need to sync to
1360
- * @pages_addr: DMA addresses to use for mapping
13611580 * @vm: requested vm
1581
+ * @immediate: immediate submission in a page fault
1582
+ * @unlocked: unlocked invalidation during MM callback
1583
+ * @resv: fences we need to sync to
13621584 * @start: start of mapped range
13631585 * @last: last mapped entry
13641586 * @flags: flags for the entries
13651587 * @addr: addr to set the area to
1588
+ * @pages_addr: DMA addresses to use for mapping
13661589 * @fence: optional resulting fence
13671590 *
13681591 * Fill in the page table entries between @start and @last.
....@@ -1371,140 +1594,58 @@
13711594 * 0 for success, -EINVAL for failure.
13721595 */
13731596 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1374
- struct dma_fence *exclusive,
1375
- dma_addr_t *pages_addr,
1376
- struct amdgpu_vm *vm,
1597
+ struct amdgpu_vm *vm, bool immediate,
1598
+ bool unlocked, struct dma_resv *resv,
13771599 uint64_t start, uint64_t last,
13781600 uint64_t flags, uint64_t addr,
1601
+ dma_addr_t *pages_addr,
13791602 struct dma_fence **fence)
13801603 {
1381
- struct amdgpu_ring *ring;
1382
- void *owner = AMDGPU_FENCE_OWNER_VM;
1383
- unsigned nptes, ncmds, ndw;
1384
- struct amdgpu_job *job;
1385
- struct amdgpu_pte_update_params params;
1386
- struct dma_fence *f = NULL;
1604
+ struct amdgpu_vm_update_params params;
1605
+ enum amdgpu_sync_mode sync_mode;
13871606 int r;
13881607
13891608 memset(&params, 0, sizeof(params));
13901609 params.adev = adev;
13911610 params.vm = vm;
1611
+ params.immediate = immediate;
1612
+ params.pages_addr = pages_addr;
1613
+ params.unlocked = unlocked;
13921614
1393
- /* sync to everything on unmapping */
1394
- if (!(flags & AMDGPU_PTE_VALID))
1395
- owner = AMDGPU_FENCE_OWNER_UNDEFINED;
1396
-
1397
- if (vm->use_cpu_for_update) {
1398
- /* params.src is used as flag to indicate system Memory */
1399
- if (pages_addr)
1400
- params.src = ~0;
1401
-
1402
- /* Wait for PT BOs to be free. PTs share the same resv. object
1403
- * as the root PD BO
1404
- */
1405
- r = amdgpu_vm_wait_pd(adev, vm, owner);
1406
- if (unlikely(r))
1407
- return r;
1408
-
1409
- params.func = amdgpu_vm_cpu_set_ptes;
1410
- params.pages_addr = pages_addr;
1411
- return amdgpu_vm_frag_ptes(&params, start, last + 1,
1412
- addr, flags);
1413
- }
1414
-
1415
- ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
1416
-
1417
- nptes = last - start + 1;
1418
-
1419
- /*
1420
- * reserve space for two commands every (1 << BLOCK_SIZE)
1421
- * entries or 2k dwords (whatever is smaller)
1422
- *
1423
- * The second command is for the shadow pagetables.
1615
+ /* Implicitly sync to command submissions in the same VM before
1616
+ * unmapping. Sync to moving fences before mapping.
14241617 */
1425
- if (vm->root.base.bo->shadow)
1426
- ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
1618
+ if (!(flags & AMDGPU_PTE_VALID))
1619
+ sync_mode = AMDGPU_SYNC_EQ_OWNER;
14271620 else
1428
- ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
1621
+ sync_mode = AMDGPU_SYNC_EXPLICIT;
14291622
1430
- /* padding, etc. */
1431
- ndw = 64;
1432
-
1433
- if (pages_addr) {
1434
- /* copy commands needed */
1435
- ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
1436
-
1437
- /* and also PTEs */
1438
- ndw += nptes * 2;
1439
-
1440
- params.func = amdgpu_vm_do_copy_ptes;
1441
-
1442
- } else {
1443
- /* set page commands needed */
1444
- ndw += ncmds * 10;
1445
-
1446
- /* extra commands for begin/end fragments */
1447
- if (vm->root.base.bo->shadow)
1448
- ndw += 2 * 10 * adev->vm_manager.fragment_size * 2;
1449
- else
1450
- ndw += 2 * 10 * adev->vm_manager.fragment_size;
1451
-
1452
- params.func = amdgpu_vm_do_set_ptes;
1623
+ amdgpu_vm_eviction_lock(vm);
1624
+ if (vm->evicting) {
1625
+ r = -EBUSY;
1626
+ goto error_unlock;
14531627 }
14541628
1455
- r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
1456
- if (r)
1457
- return r;
1629
+ if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
1630
+ struct dma_fence *tmp = dma_fence_get_stub();
14581631
1459
- params.ib = &job->ibs[0];
1460
-
1461
- if (pages_addr) {
1462
- uint64_t *pte;
1463
- unsigned i;
1464
-
1465
- /* Put the PTEs at the end of the IB. */
1466
- i = ndw - nptes * 2;
1467
- pte= (uint64_t *)&(job->ibs->ptr[i]);
1468
- params.src = job->ibs->gpu_addr + i * 4;
1469
-
1470
- for (i = 0; i < nptes; ++i) {
1471
- pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i *
1472
- AMDGPU_GPU_PAGE_SIZE);
1473
- pte[i] |= flags;
1474
- }
1475
- addr = 0;
1632
+ amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true);
1633
+ swap(vm->last_unlocked, tmp);
1634
+ dma_fence_put(tmp);
14761635 }
14771636
1478
- r = amdgpu_sync_fence(adev, &job->sync, exclusive, false);
1637
+ r = vm->update_funcs->prepare(&params, resv, sync_mode);
14791638 if (r)
1480
- goto error_free;
1639
+ goto error_unlock;
14811640
1482
- r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
1483
- owner, false);
1641
+ r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
14841642 if (r)
1485
- goto error_free;
1643
+ goto error_unlock;
14861644
1487
- r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
1488
- if (r)
1489
- goto error_free;
1645
+ r = vm->update_funcs->commit(&params, fence);
14901646
1491
- r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
1492
- if (r)
1493
- goto error_free;
1494
-
1495
- amdgpu_ring_pad_ib(ring, params.ib);
1496
- WARN_ON(params.ib->length_dw > ndw);
1497
- r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, &f);
1498
- if (r)
1499
- goto error_free;
1500
-
1501
- amdgpu_bo_fence(vm->root.base.bo, f, true);
1502
- dma_fence_put(*fence);
1503
- *fence = f;
1504
- return 0;
1505
-
1506
-error_free:
1507
- amdgpu_job_free(job);
1647
+error_unlock:
1648
+ amdgpu_vm_eviction_unlock(vm);
15081649 return r;
15091650 }
15101651
....@@ -1512,11 +1653,12 @@
15121653 * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
15131654 *
15141655 * @adev: amdgpu_device pointer
1515
- * @exclusive: fence we need to sync to
1656
+ * @resv: fences we need to sync to
15161657 * @pages_addr: DMA addresses to use for mapping
15171658 * @vm: requested vm
15181659 * @mapping: mapped range and flags to use for the update
15191660 * @flags: HW flags for the mapping
1661
+ * @bo_adev: amdgpu_device pointer that bo actually been allocated
15201662 * @nodes: array of drm_mm_nodes with the MC addresses
15211663 * @fence: optional resulting fence
15221664 *
....@@ -1527,11 +1669,12 @@
15271669 * 0 for success, -EINVAL for failure.
15281670 */
15291671 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
1530
- struct dma_fence *exclusive,
1672
+ struct dma_resv *resv,
15311673 dma_addr_t *pages_addr,
15321674 struct amdgpu_vm *vm,
15331675 struct amdgpu_bo_va_mapping *mapping,
15341676 uint64_t flags,
1677
+ struct amdgpu_device *bo_adev,
15351678 struct drm_mm_node *nodes,
15361679 struct dma_fence **fence)
15371680 {
....@@ -1547,17 +1690,8 @@
15471690 if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
15481691 flags &= ~AMDGPU_PTE_WRITEABLE;
15491692
1550
- flags &= ~AMDGPU_PTE_EXECUTABLE;
1551
- flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1552
-
1553
- flags &= ~AMDGPU_PTE_MTYPE_MASK;
1554
- flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK);
1555
-
1556
- if ((mapping->flags & AMDGPU_PTE_PRT) &&
1557
- (adev->asic_type >= CHIP_VEGA10)) {
1558
- flags |= AMDGPU_PTE_PRT;
1559
- flags &= ~AMDGPU_PTE_VALID;
1560
- }
1693
+ /* Apply ASIC specific mapping flags */
1694
+ amdgpu_gmc_get_vm_pte(adev, mapping, &flags);
15611695
15621696 trace_amdgpu_vm_bo_update(mapping);
15631697
....@@ -1574,19 +1708,18 @@
15741708 uint64_t max_entries;
15751709 uint64_t addr, last;
15761710
1711
+ max_entries = mapping->last - start + 1;
15771712 if (nodes) {
15781713 addr = nodes->start << PAGE_SHIFT;
1579
- max_entries = (nodes->size - pfn) *
1580
- AMDGPU_GPU_PAGES_IN_CPU_PAGE;
1714
+ max_entries = min((nodes->size - pfn) *
1715
+ AMDGPU_GPU_PAGES_IN_CPU_PAGE, max_entries);
15811716 } else {
15821717 addr = 0;
1583
- max_entries = S64_MAX;
15841718 }
15851719
15861720 if (pages_addr) {
15871721 uint64_t count;
15881722
1589
- max_entries = min(max_entries, 16ull * 1024ull);
15901723 for (count = 1;
15911724 count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
15921725 ++count) {
....@@ -1602,18 +1735,19 @@
16021735 dma_addr = pages_addr;
16031736 } else {
16041737 addr = pages_addr[pfn];
1605
- max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
1738
+ max_entries = count *
1739
+ AMDGPU_GPU_PAGES_IN_CPU_PAGE;
16061740 }
16071741
1608
- } else if (flags & AMDGPU_PTE_VALID) {
1609
- addr += adev->vm_manager.vram_base_offset;
1742
+ } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
1743
+ addr += bo_adev->vm_manager.vram_base_offset;
16101744 addr += pfn << PAGE_SHIFT;
16111745 }
16121746
1613
- last = min((uint64_t)mapping->last, start + max_entries - 1);
1614
- r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm,
1747
+ last = start + max_entries - 1;
1748
+ r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
16151749 start, last, flags, addr,
1616
- fence);
1750
+ dma_addr, fence);
16171751 if (r)
16181752 return r;
16191753
....@@ -1641,42 +1775,59 @@
16411775 * Returns:
16421776 * 0 for success, -EINVAL for failure.
16431777 */
1644
-int amdgpu_vm_bo_update(struct amdgpu_device *adev,
1645
- struct amdgpu_bo_va *bo_va,
1778
+int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
16461779 bool clear)
16471780 {
16481781 struct amdgpu_bo *bo = bo_va->base.bo;
16491782 struct amdgpu_vm *vm = bo_va->base.vm;
16501783 struct amdgpu_bo_va_mapping *mapping;
16511784 dma_addr_t *pages_addr = NULL;
1652
- struct ttm_mem_reg *mem;
1785
+ struct ttm_resource *mem;
16531786 struct drm_mm_node *nodes;
1654
- struct dma_fence *exclusive, **last_update;
1787
+ struct dma_fence **last_update;
1788
+ struct dma_resv *resv;
16551789 uint64_t flags;
1790
+ struct amdgpu_device *bo_adev = adev;
16561791 int r;
16571792
16581793 if (clear || !bo) {
16591794 mem = NULL;
16601795 nodes = NULL;
1661
- exclusive = NULL;
1796
+ resv = vm->root.base.bo->tbo.base.resv;
16621797 } else {
1798
+ struct drm_gem_object *obj = &bo->tbo.base;
16631799 struct ttm_dma_tt *ttm;
16641800
1801
+ resv = bo->tbo.base.resv;
1802
+ if (obj->import_attach && bo_va->is_xgmi) {
1803
+ struct dma_buf *dma_buf = obj->import_attach->dmabuf;
1804
+ struct drm_gem_object *gobj = dma_buf->priv;
1805
+ struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
1806
+
1807
+ if (abo->tbo.mem.mem_type == TTM_PL_VRAM)
1808
+ bo = gem_to_amdgpu_bo(gobj);
1809
+ }
16651810 mem = &bo->tbo.mem;
16661811 nodes = mem->mm_node;
16671812 if (mem->mem_type == TTM_PL_TT) {
16681813 ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
16691814 pages_addr = ttm->dma_address;
16701815 }
1671
- exclusive = reservation_object_get_excl(bo->tbo.resv);
16721816 }
16731817
1674
- if (bo)
1818
+ if (bo) {
16751819 flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
1676
- else
1677
- flags = 0x0;
16781820
1679
- if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
1821
+ if (amdgpu_bo_encrypted(bo))
1822
+ flags |= AMDGPU_PTE_TMZ;
1823
+
1824
+ bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
1825
+ } else {
1826
+ flags = 0x0;
1827
+ }
1828
+
1829
+ if (clear || (bo && bo->tbo.base.resv ==
1830
+ vm->root.base.bo->tbo.base.resv))
16801831 last_update = &vm->last_update;
16811832 else
16821833 last_update = &bo_va->last_pt_update;
....@@ -1690,34 +1841,27 @@
16901841 }
16911842
16921843 list_for_each_entry(mapping, &bo_va->invalids, list) {
1693
- r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
1694
- mapping, flags, nodes,
1844
+ r = amdgpu_vm_bo_split_mapping(adev, resv, pages_addr, vm,
1845
+ mapping, flags, bo_adev, nodes,
16951846 last_update);
16961847 if (r)
16971848 return r;
16981849 }
16991850
1700
- if (vm->use_cpu_for_update) {
1701
- /* Flush HDP */
1702
- mb();
1703
- amdgpu_asic_flush_hdp(adev, NULL);
1704
- }
1705
-
1706
- spin_lock(&vm->moved_lock);
1707
- list_del_init(&bo_va->base.vm_status);
1708
- spin_unlock(&vm->moved_lock);
1709
-
17101851 /* If the BO is not in its preferred location add it back to
17111852 * the evicted list so that it gets validated again on the
17121853 * next command submission.
17131854 */
1714
- if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
1855
+ if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) {
17151856 uint32_t mem_type = bo->tbo.mem.mem_type;
17161857
1717
- if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
1718
- list_add_tail(&bo_va->base.vm_status, &vm->evicted);
1858
+ if (!(bo->preferred_domains &
1859
+ amdgpu_mem_type_to_domain(mem_type)))
1860
+ amdgpu_vm_bo_evicted(&bo_va->base);
17191861 else
1720
- list_add(&bo_va->base.vm_status, &vm->idle);
1862
+ amdgpu_vm_bo_idle(&bo_va->base);
1863
+ } else {
1864
+ amdgpu_vm_bo_done(&bo_va->base);
17211865 }
17221866
17231867 list_splice_init(&bo_va->invalids, &bo_va->valids);
....@@ -1845,18 +1989,18 @@
18451989 */
18461990 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
18471991 {
1848
- struct reservation_object *resv = vm->root.base.bo->tbo.resv;
1992
+ struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
18491993 struct dma_fence *excl, **shared;
18501994 unsigned i, shared_count;
18511995 int r;
18521996
1853
- r = reservation_object_get_fences_rcu(resv, &excl,
1997
+ r = dma_resv_get_fences_rcu(resv, &excl,
18541998 &shared_count, &shared);
18551999 if (r) {
18562000 /* Not enough memory to grab the fence list, as last resort
18572001 * block for all the fences to complete.
18582002 */
1859
- reservation_object_wait_timeout_rcu(resv, true, false,
2003
+ dma_resv_wait_timeout_rcu(resv, true, false,
18602004 MAX_SCHEDULE_TIMEOUT);
18612005 return;
18622006 }
....@@ -1892,6 +2036,7 @@
18922036 struct amdgpu_vm *vm,
18932037 struct dma_fence **fence)
18942038 {
2039
+ struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
18952040 struct amdgpu_bo_va_mapping *mapping;
18962041 uint64_t init_pte_value = 0;
18972042 struct dma_fence *f = NULL;
....@@ -1902,12 +2047,13 @@
19022047 struct amdgpu_bo_va_mapping, list);
19032048 list_del(&mapping->list);
19042049
1905
- if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
2050
+ if (vm->pte_support_ats &&
2051
+ mapping->start < AMDGPU_GMC_HOLE_START)
19062052 init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
19072053
1908
- r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
2054
+ r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
19092055 mapping->start, mapping->last,
1910
- init_pte_value, 0, &f);
2056
+ init_pte_value, 0, NULL, &f);
19112057 amdgpu_vm_free_mapping(adev, vm, mapping, f);
19122058 if (r) {
19132059 dma_fence_put(f);
....@@ -1943,40 +2089,40 @@
19432089 struct amdgpu_vm *vm)
19442090 {
19452091 struct amdgpu_bo_va *bo_va, *tmp;
1946
- struct list_head moved;
2092
+ struct dma_resv *resv;
19472093 bool clear;
19482094 int r;
19492095
1950
- INIT_LIST_HEAD(&moved);
1951
- spin_lock(&vm->moved_lock);
1952
- list_splice_init(&vm->moved, &moved);
1953
- spin_unlock(&vm->moved_lock);
1954
-
1955
- list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) {
1956
- struct reservation_object *resv = bo_va->base.bo->tbo.resv;
1957
-
2096
+ list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
19582097 /* Per VM BOs never need to bo cleared in the page tables */
1959
- if (resv == vm->root.base.bo->tbo.resv)
1960
- clear = false;
2098
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
2099
+ if (r)
2100
+ return r;
2101
+ }
2102
+
2103
+ spin_lock(&vm->invalidated_lock);
2104
+ while (!list_empty(&vm->invalidated)) {
2105
+ bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
2106
+ base.vm_status);
2107
+ resv = bo_va->base.bo->tbo.base.resv;
2108
+ spin_unlock(&vm->invalidated_lock);
2109
+
19612110 /* Try to reserve the BO to avoid clearing its ptes */
1962
- else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
2111
+ if (!amdgpu_vm_debug && dma_resv_trylock(resv))
19632112 clear = false;
19642113 /* Somebody else is using the BO right now */
19652114 else
19662115 clear = true;
19672116
19682117 r = amdgpu_vm_bo_update(adev, bo_va, clear);
1969
- if (r) {
1970
- spin_lock(&vm->moved_lock);
1971
- list_splice(&moved, &vm->moved);
1972
- spin_unlock(&vm->moved_lock);
2118
+ if (r)
19732119 return r;
1974
- }
19752120
1976
- if (!clear && resv != vm->root.base.bo->tbo.resv)
1977
- reservation_object_unlock(resv);
1978
-
2121
+ if (!clear)
2122
+ dma_resv_unlock(resv);
2123
+ spin_lock(&vm->invalidated_lock);
19792124 }
2125
+ spin_unlock(&vm->invalidated_lock);
19802126
19812127 return 0;
19822128 }
....@@ -2012,6 +2158,15 @@
20122158 INIT_LIST_HEAD(&bo_va->valids);
20132159 INIT_LIST_HEAD(&bo_va->invalids);
20142160
2161
+ if (!bo)
2162
+ return bo_va;
2163
+
2164
+ if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) {
2165
+ bo_va->is_xgmi = true;
2166
+ /* Power up XGMI if it can be potentially used */
2167
+ amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MAX_VEGA20);
2168
+ }
2169
+
20152170 return bo_va;
20162171 }
20172172
....@@ -2039,11 +2194,9 @@
20392194 if (mapping->flags & AMDGPU_PTE_PRT)
20402195 amdgpu_vm_prt_get(adev);
20412196
2042
- if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
2197
+ if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv &&
20432198 !bo_va->base.moved) {
2044
- spin_lock(&vm->moved_lock);
20452199 list_move(&bo_va->base.vm_status, &vm->moved);
2046
- spin_unlock(&vm->moved_lock);
20472200 }
20482201 trace_amdgpu_vm_bo_map(bo_va, mapping);
20492202 }
....@@ -2083,7 +2236,8 @@
20832236 /* make sure object fit at this offset */
20842237 eaddr = saddr + size - 1;
20852238 if (saddr >= eaddr ||
2086
- (bo && offset + size > amdgpu_bo_size(bo)))
2239
+ (bo && offset + size > amdgpu_bo_size(bo)) ||
2240
+ (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
20872241 return -EINVAL;
20882242
20892243 saddr /= AMDGPU_GPU_PAGE_SIZE;
....@@ -2148,7 +2302,8 @@
21482302 /* make sure object fit at this offset */
21492303 eaddr = saddr + size - 1;
21502304 if (saddr >= eaddr ||
2151
- (bo && offset + size > amdgpu_bo_size(bo)))
2305
+ (bo && offset + size > amdgpu_bo_size(bo)) ||
2306
+ (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
21522307 return -EINVAL;
21532308
21542309 /* Allocate all the needed memory */
....@@ -2373,7 +2528,8 @@
23732528 struct amdgpu_bo *bo;
23742529
23752530 bo = mapping->bo_va->base.bo;
2376
- if (READ_ONCE(bo->tbo.resv->lock.ctx) != ticket)
2531
+ if (dma_resv_locking_ctx(bo->tbo.base.resv) !=
2532
+ ticket)
23772533 continue;
23782534 }
23792535
....@@ -2395,13 +2551,27 @@
23952551 struct amdgpu_bo_va *bo_va)
23962552 {
23972553 struct amdgpu_bo_va_mapping *mapping, *next;
2554
+ struct amdgpu_bo *bo = bo_va->base.bo;
23982555 struct amdgpu_vm *vm = bo_va->base.vm;
2556
+ struct amdgpu_vm_bo_base **base;
23992557
2400
- list_del(&bo_va->base.bo_list);
2558
+ if (bo) {
2559
+ if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)
2560
+ vm->bulk_moveable = false;
24012561
2402
- spin_lock(&vm->moved_lock);
2562
+ for (base = &bo_va->base.bo->vm_bo; *base;
2563
+ base = &(*base)->next) {
2564
+ if (*base != &bo_va->base)
2565
+ continue;
2566
+
2567
+ *base = bo_va->base.next;
2568
+ break;
2569
+ }
2570
+ }
2571
+
2572
+ spin_lock(&vm->invalidated_lock);
24032573 list_del(&bo_va->base.vm_status);
2404
- spin_unlock(&vm->moved_lock);
2574
+ spin_unlock(&vm->invalidated_lock);
24052575
24062576 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
24072577 list_del(&mapping->list);
....@@ -2418,7 +2588,45 @@
24182588 }
24192589
24202590 dma_fence_put(bo_va->last_pt_update);
2591
+
2592
+ if (bo && bo_va->is_xgmi)
2593
+ amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MIN);
2594
+
24212595 kfree(bo_va);
2596
+}
2597
+
2598
+/**
2599
+ * amdgpu_vm_evictable - check if we can evict a VM
2600
+ *
2601
+ * @bo: A page table of the VM.
2602
+ *
2603
+ * Check if it is possible to evict a VM.
2604
+ */
2605
+bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
2606
+{
2607
+ struct amdgpu_vm_bo_base *bo_base = bo->vm_bo;
2608
+
2609
+ /* Page tables of a destroyed VM can go away immediately */
2610
+ if (!bo_base || !bo_base->vm)
2611
+ return true;
2612
+
2613
+ /* Don't evict VM page tables while they are busy */
2614
+ if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true))
2615
+ return false;
2616
+
2617
+ /* Try to block ongoing updates */
2618
+ if (!amdgpu_vm_eviction_trylock(bo_base->vm))
2619
+ return false;
2620
+
2621
+ /* Don't evict VM page tables while they are updated */
2622
+ if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
2623
+ amdgpu_vm_eviction_unlock(bo_base->vm);
2624
+ return false;
2625
+ }
2626
+
2627
+ bo_base->vm->evicting = true;
2628
+ amdgpu_vm_eviction_unlock(bo_base->vm);
2629
+ return true;
24222630 }
24232631
24242632 /**
....@@ -2439,30 +2647,24 @@
24392647 if (bo->parent && bo->parent->shadow == bo)
24402648 bo = bo->parent;
24412649
2442
- list_for_each_entry(bo_base, &bo->va, bo_list) {
2650
+ for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
24432651 struct amdgpu_vm *vm = bo_base->vm;
2444
- bool was_moved = bo_base->moved;
24452652
2653
+ if (evicted && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) {
2654
+ amdgpu_vm_bo_evicted(bo_base);
2655
+ continue;
2656
+ }
2657
+
2658
+ if (bo_base->moved)
2659
+ continue;
24462660 bo_base->moved = true;
2447
- if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
2448
- if (bo->tbo.type == ttm_bo_type_kernel)
2449
- list_move(&bo_base->vm_status, &vm->evicted);
2450
- else
2451
- list_move_tail(&bo_base->vm_status,
2452
- &vm->evicted);
2453
- continue;
2454
- }
24552661
2456
- if (was_moved)
2457
- continue;
2458
-
2459
- if (bo->tbo.type == ttm_bo_type_kernel) {
2460
- list_move(&bo_base->vm_status, &vm->relocated);
2461
- } else {
2462
- spin_lock(&bo_base->vm->moved_lock);
2463
- list_move(&bo_base->vm_status, &vm->moved);
2464
- spin_unlock(&bo_base->vm->moved_lock);
2465
- }
2662
+ if (bo->tbo.type == ttm_bo_type_kernel)
2663
+ amdgpu_vm_bo_relocated(bo_base);
2664
+ else if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)
2665
+ amdgpu_vm_bo_moved(bo_base);
2666
+ else
2667
+ amdgpu_vm_bo_invalidated(bo_base);
24662668 }
24672669 }
24682670
....@@ -2582,6 +2784,22 @@
25822784 }
25832785
25842786 /**
2787
+ * amdgpu_vm_wait_idle - wait for the VM to become idle
2788
+ *
2789
+ * @vm: VM object to wait for
2790
+ * @timeout: timeout to wait for VM to become idle
2791
+ */
2792
+long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
2793
+{
2794
+ timeout = dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv,
2795
+ true, true, timeout);
2796
+ if (timeout <= 0)
2797
+ return timeout;
2798
+
2799
+ return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
2800
+}
2801
+
2802
+/**
25852803 * amdgpu_vm_init - initialize a vm instance
25862804 *
25872805 * @adev: amdgpu_device pointer
....@@ -2595,17 +2813,10 @@
25952813 * 0 for success, error for failure.
25962814 */
25972815 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2598
- int vm_context, unsigned int pasid)
2816
+ int vm_context, u32 pasid)
25992817 {
26002818 struct amdgpu_bo_param bp;
26012819 struct amdgpu_bo *root;
2602
- const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
2603
- AMDGPU_VM_PTE_COUNT(adev) * 8);
2604
- unsigned ring_instance;
2605
- struct amdgpu_ring *ring;
2606
- struct drm_sched_rq *rq;
2607
- unsigned long size;
2608
- uint64_t flags;
26092820 int r, i;
26102821
26112822 vm->va = RB_ROOT_CACHED;
....@@ -2613,22 +2824,28 @@
26132824 vm->reserved_vmid[i] = NULL;
26142825 INIT_LIST_HEAD(&vm->evicted);
26152826 INIT_LIST_HEAD(&vm->relocated);
2616
- spin_lock_init(&vm->moved_lock);
26172827 INIT_LIST_HEAD(&vm->moved);
26182828 INIT_LIST_HEAD(&vm->idle);
2829
+ INIT_LIST_HEAD(&vm->invalidated);
2830
+ spin_lock_init(&vm->invalidated_lock);
26192831 INIT_LIST_HEAD(&vm->freed);
26202832
2621
- /* create scheduler entity for page table updates */
26222833
2623
- ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
2624
- ring_instance %= adev->vm_manager.vm_pte_num_rings;
2625
- ring = adev->vm_manager.vm_pte_rings[ring_instance];
2626
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
2627
- r = drm_sched_entity_init(&vm->entity, &rq, 1, NULL);
2834
+ /* create scheduler entities for page table updates */
2835
+ r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
2836
+ adev->vm_manager.vm_pte_scheds,
2837
+ adev->vm_manager.vm_pte_num_scheds, NULL);
26282838 if (r)
26292839 return r;
26302840
2841
+ r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
2842
+ adev->vm_manager.vm_pte_scheds,
2843
+ adev->vm_manager.vm_pte_num_scheds, NULL);
2844
+ if (r)
2845
+ goto error_free_immediate;
2846
+
26312847 vm->pte_support_ats = false;
2848
+ vm->is_compute_context = false;
26322849
26332850 if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
26342851 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
....@@ -2642,39 +2859,41 @@
26422859 }
26432860 DRM_DEBUG_DRIVER("VM update mode is %s\n",
26442861 vm->use_cpu_for_update ? "CPU" : "SDMA");
2645
- WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
2862
+ WARN_ONCE((vm->use_cpu_for_update &&
2863
+ !amdgpu_gmc_vram_full_visible(&adev->gmc)),
26462864 "CPU update of VM recommended only for large BAR system\n");
2647
- vm->last_update = NULL;
26482865
2649
- flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
26502866 if (vm->use_cpu_for_update)
2651
- flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
2652
- else if (vm_context != AMDGPU_VM_CONTEXT_COMPUTE)
2653
- flags |= AMDGPU_GEM_CREATE_SHADOW;
2867
+ vm->update_funcs = &amdgpu_vm_cpu_funcs;
2868
+ else
2869
+ vm->update_funcs = &amdgpu_vm_sdma_funcs;
2870
+ vm->last_update = NULL;
2871
+ vm->last_unlocked = dma_fence_get_stub();
26542872
2655
- size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
2656
- memset(&bp, 0, sizeof(bp));
2657
- bp.size = size;
2658
- bp.byte_align = align;
2659
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
2660
- bp.flags = flags;
2661
- bp.type = ttm_bo_type_kernel;
2662
- bp.resv = NULL;
2873
+ mutex_init(&vm->eviction_lock);
2874
+ vm->evicting = false;
2875
+
2876
+ amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
2877
+ if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
2878
+ bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
26632879 r = amdgpu_bo_create(adev, &bp, &root);
26642880 if (r)
2665
- goto error_free_sched_entity;
2881
+ goto error_free_delayed;
26662882
26672883 r = amdgpu_bo_reserve(root, true);
26682884 if (r)
26692885 goto error_free_root;
26702886
2671
- r = amdgpu_vm_clear_bo(adev, vm, root,
2672
- adev->vm_manager.root_level,
2673
- vm->pte_support_ats);
2887
+ r = dma_resv_reserve_shared(root->tbo.base.resv, 1);
26742888 if (r)
26752889 goto error_unreserve;
26762890
26772891 amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
2892
+
2893
+ r = amdgpu_vm_clear_bo(adev, vm, root, false);
2894
+ if (r)
2895
+ goto error_unreserve;
2896
+
26782897 amdgpu_bo_unreserve(vm->root.base.bo);
26792898
26802899 if (pasid) {
....@@ -2691,7 +2910,6 @@
26912910 }
26922911
26932912 INIT_KFIFO(vm->faults);
2694
- vm->fault_credit = 16;
26952913
26962914 return 0;
26972915
....@@ -2703,10 +2921,45 @@
27032921 amdgpu_bo_unref(&vm->root.base.bo);
27042922 vm->root.base.bo = NULL;
27052923
2706
-error_free_sched_entity:
2707
- drm_sched_entity_destroy(&vm->entity);
2924
+error_free_delayed:
2925
+ dma_fence_put(vm->last_unlocked);
2926
+ drm_sched_entity_destroy(&vm->delayed);
2927
+
2928
+error_free_immediate:
2929
+ drm_sched_entity_destroy(&vm->immediate);
27082930
27092931 return r;
2932
+}
2933
+
2934
+/**
2935
+ * amdgpu_vm_check_clean_reserved - check if a VM is clean
2936
+ *
2937
+ * @adev: amdgpu_device pointer
2938
+ * @vm: the VM to check
2939
+ *
2940
+ * check all entries of the root PD, if any subsequent PDs are allocated,
2941
+ * it means there are page table creating and filling, and is no a clean
2942
+ * VM
2943
+ *
2944
+ * Returns:
2945
+ * 0 if this VM is clean
2946
+ */
2947
+static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
2948
+ struct amdgpu_vm *vm)
2949
+{
2950
+ enum amdgpu_vm_level root = adev->vm_manager.root_level;
2951
+ unsigned int entries = amdgpu_vm_num_entries(adev, root);
2952
+ unsigned int i = 0;
2953
+
2954
+ if (!(vm->root.entries))
2955
+ return 0;
2956
+
2957
+ for (i = 0; i < entries; i++) {
2958
+ if (vm->root.entries[i].base.bo)
2959
+ return -EINVAL;
2960
+ }
2961
+
2962
+ return 0;
27102963 }
27112964
27122965 /**
....@@ -2714,6 +2967,7 @@
27142967 *
27152968 * @adev: amdgpu_device pointer
27162969 * @vm: requested vm
2970
+ * @pasid: pasid to use
27172971 *
27182972 * This only works on GFX VMs that don't have any BOs added and no
27192973 * page tables allocated yet.
....@@ -2729,7 +2983,8 @@
27292983 * Returns:
27302984 * 0 for success, -errno for errors.
27312985 */
2732
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2986
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2987
+ u32 pasid)
27332988 {
27342989 bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
27352990 int r;
....@@ -2739,30 +2994,56 @@
27392994 return r;
27402995
27412996 /* Sanity checks */
2742
- if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
2743
- r = -EINVAL;
2744
- goto error;
2997
+ r = amdgpu_vm_check_clean_reserved(adev, vm);
2998
+ if (r)
2999
+ goto unreserve_bo;
3000
+
3001
+ if (pasid) {
3002
+ unsigned long flags;
3003
+
3004
+ spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
3005
+ r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
3006
+ GFP_ATOMIC);
3007
+ spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
3008
+
3009
+ if (r == -ENOSPC)
3010
+ goto unreserve_bo;
3011
+ r = 0;
27453012 }
27463013
27473014 /* Check if PD needs to be reinitialized and do it before
27483015 * changing any other state, in case it fails.
27493016 */
27503017 if (pte_support_ats != vm->pte_support_ats) {
2751
- r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
2752
- adev->vm_manager.root_level,
2753
- pte_support_ats);
3018
+ vm->pte_support_ats = pte_support_ats;
3019
+ r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, false);
27543020 if (r)
2755
- goto error;
3021
+ goto free_idr;
27563022 }
27573023
27583024 /* Update VM state */
27593025 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
27603026 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2761
- vm->pte_support_ats = pte_support_ats;
27623027 DRM_DEBUG_DRIVER("VM update mode is %s\n",
27633028 vm->use_cpu_for_update ? "CPU" : "SDMA");
2764
- WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
3029
+ WARN_ONCE((vm->use_cpu_for_update &&
3030
+ !amdgpu_gmc_vram_full_visible(&adev->gmc)),
27653031 "CPU update of VM recommended only for large BAR system\n");
3032
+
3033
+ if (vm->use_cpu_for_update) {
3034
+ /* Sync with last SDMA update/clear before switching to CPU */
3035
+ r = amdgpu_bo_sync_wait(vm->root.base.bo,
3036
+ AMDGPU_FENCE_OWNER_UNDEFINED, true);
3037
+ if (r)
3038
+ goto free_idr;
3039
+
3040
+ vm->update_funcs = &amdgpu_vm_cpu_funcs;
3041
+ } else {
3042
+ vm->update_funcs = &amdgpu_vm_sdma_funcs;
3043
+ }
3044
+ dma_fence_put(vm->last_update);
3045
+ vm->last_update = NULL;
3046
+ vm->is_compute_context = true;
27663047
27673048 if (vm->pasid) {
27683049 unsigned long flags;
....@@ -2771,45 +3052,53 @@
27713052 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
27723053 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
27733054
3055
+ /* Free the original amdgpu allocated pasid
3056
+ * Will be replaced with kfd allocated pasid
3057
+ */
3058
+ amdgpu_pasid_free(vm->pasid);
27743059 vm->pasid = 0;
27753060 }
27763061
27773062 /* Free the shadow bo for compute VM */
27783063 amdgpu_bo_unref(&vm->root.base.bo->shadow);
27793064
2780
-error:
3065
+ if (pasid)
3066
+ vm->pasid = pasid;
3067
+
3068
+ goto unreserve_bo;
3069
+
3070
+free_idr:
3071
+ if (pasid) {
3072
+ unsigned long flags;
3073
+
3074
+ spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
3075
+ idr_remove(&adev->vm_manager.pasid_idr, pasid);
3076
+ spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
3077
+ }
3078
+unreserve_bo:
27813079 amdgpu_bo_unreserve(vm->root.base.bo);
27823080 return r;
27833081 }
27843082
27853083 /**
2786
- * amdgpu_vm_free_levels - free PD/PT levels
3084
+ * amdgpu_vm_release_compute - release a compute vm
3085
+ * @adev: amdgpu_device pointer
3086
+ * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute
27873087 *
2788
- * @adev: amdgpu device structure
2789
- * @parent: PD/PT starting level to free
2790
- * @level: level of parent structure
2791
- *
2792
- * Free the page directory or page table level and all sub levels.
3088
+ * This is a correspondant of amdgpu_vm_make_compute. It decouples compute
3089
+ * pasid from vm. Compute should stop use of vm after this call.
27933090 */
2794
-static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
2795
- struct amdgpu_vm_pt *parent,
2796
- unsigned level)
3091
+void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
27973092 {
2798
- unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
3093
+ if (vm->pasid) {
3094
+ unsigned long flags;
27993095
2800
- if (parent->base.bo) {
2801
- list_del(&parent->base.bo_list);
2802
- list_del(&parent->base.vm_status);
2803
- amdgpu_bo_unref(&parent->base.bo->shadow);
2804
- amdgpu_bo_unref(&parent->base.bo);
3096
+ spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
3097
+ idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
3098
+ spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
28053099 }
2806
-
2807
- if (parent->entries)
2808
- for (i = 0; i < num_entries; i++)
2809
- amdgpu_vm_free_levels(adev, &parent->entries[i],
2810
- level + 1);
2811
-
2812
- kvfree(parent->entries);
3100
+ vm->pasid = 0;
3101
+ vm->is_compute_context = false;
28133102 }
28143103
28153104 /**
....@@ -2826,34 +3115,24 @@
28263115 struct amdgpu_bo_va_mapping *mapping, *tmp;
28273116 bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
28283117 struct amdgpu_bo *root;
2829
- u64 fault;
2830
- int i, r;
3118
+ int i;
28313119
28323120 amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
28333121
2834
- /* Clear pending page faults from IH when the VM is destroyed */
2835
- while (kfifo_get(&vm->faults, &fault))
2836
- amdgpu_ih_clear_fault(adev, fault);
2837
-
3122
+ root = amdgpu_bo_ref(vm->root.base.bo);
3123
+ amdgpu_bo_reserve(root, true);
28383124 if (vm->pasid) {
28393125 unsigned long flags;
28403126
28413127 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
28423128 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
28433129 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
3130
+ vm->pasid = 0;
28443131 }
28453132
2846
- drm_sched_entity_destroy(&vm->entity);
3133
+ dma_fence_wait(vm->last_unlocked, false);
3134
+ dma_fence_put(vm->last_unlocked);
28473135
2848
- if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
2849
- dev_err(adev->dev, "still active bo inside vm\n");
2850
- }
2851
- rbtree_postorder_for_each_entry_safe(mapping, tmp,
2852
- &vm->va.rb_root, rb) {
2853
- list_del(&mapping->list);
2854
- amdgpu_vm_it_remove(mapping, &vm->va);
2855
- kfree(mapping);
2856
- }
28573136 list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
28583137 if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
28593138 amdgpu_vm_prt_fini(adev, vm);
....@@ -2864,55 +3143,29 @@
28643143 amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
28653144 }
28663145
2867
- root = amdgpu_bo_ref(vm->root.base.bo);
2868
- r = amdgpu_bo_reserve(root, true);
2869
- if (r) {
2870
- dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
2871
- } else {
2872
- amdgpu_vm_free_levels(adev, &vm->root,
2873
- adev->vm_manager.root_level);
2874
- amdgpu_bo_unreserve(root);
2875
- }
3146
+ amdgpu_vm_free_pts(adev, vm, NULL);
3147
+ amdgpu_bo_unreserve(root);
28763148 amdgpu_bo_unref(&root);
3149
+ WARN_ON(vm->root.base.bo);
3150
+
3151
+ drm_sched_entity_destroy(&vm->immediate);
3152
+ drm_sched_entity_destroy(&vm->delayed);
3153
+
3154
+ if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
3155
+ dev_err(adev->dev, "still active bo inside vm\n");
3156
+ }
3157
+ rbtree_postorder_for_each_entry_safe(mapping, tmp,
3158
+ &vm->va.rb_root, rb) {
3159
+ /* Don't remove the mapping here, we don't want to trigger a
3160
+ * rebalance and the tree is about to be destroyed anyway.
3161
+ */
3162
+ list_del(&mapping->list);
3163
+ kfree(mapping);
3164
+ }
3165
+
28773166 dma_fence_put(vm->last_update);
28783167 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
28793168 amdgpu_vmid_free_reserved(adev, vm, i);
2880
-}
2881
-
2882
-/**
2883
- * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
2884
- *
2885
- * @adev: amdgpu_device pointer
2886
- * @pasid: PASID do identify the VM
2887
- *
2888
- * This function is expected to be called in interrupt context.
2889
- *
2890
- * Returns:
2891
- * True if there was fault credit, false otherwise
2892
- */
2893
-bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
2894
- unsigned int pasid)
2895
-{
2896
- struct amdgpu_vm *vm;
2897
-
2898
- spin_lock(&adev->vm_manager.pasid_lock);
2899
- vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
2900
- if (!vm) {
2901
- /* VM not found, can't track fault credit */
2902
- spin_unlock(&adev->vm_manager.pasid_lock);
2903
- return true;
2904
- }
2905
-
2906
- /* No lock needed. only accessed by IRQ handler */
2907
- if (!vm->fault_credit) {
2908
- /* Too many faults in this VM */
2909
- spin_unlock(&adev->vm_manager.pasid_lock);
2910
- return false;
2911
- }
2912
-
2913
- vm->fault_credit--;
2914
- spin_unlock(&adev->vm_manager.pasid_lock);
2915
- return true;
29163169 }
29173170
29183171 /**
....@@ -2926,6 +3179,12 @@
29263179 {
29273180 unsigned i;
29283181
3182
+ /* Concurrent flushes are only possible starting with Vega10 and
3183
+ * are broken on Navi10 and Navi14.
3184
+ */
3185
+ adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 ||
3186
+ adev->asic_type == CHIP_NAVI10 ||
3187
+ adev->asic_type == CHIP_NAVI14);
29293188 amdgpu_vmid_mgr_init(adev);
29303189
29313190 adev->vm_manager.fence_context =
....@@ -2933,7 +3192,6 @@
29333192 for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
29343193 adev->vm_manager.seqno[i] = 0;
29353194
2936
- atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
29373195 spin_lock_init(&adev->vm_manager.prt_lock);
29383196 atomic_set(&adev->vm_manager.num_prt_users, 0);
29393197
....@@ -2942,7 +3200,11 @@
29423200 */
29433201 #ifdef CONFIG_X86_64
29443202 if (amdgpu_vm_update_mode == -1) {
2945
- if (amdgpu_gmc_vram_full_visible(&adev->gmc))
3203
+ /* For asic with VF MMIO access protection
3204
+ * avoid using CPU for VM table updates
3205
+ */
3206
+ if (amdgpu_gmc_vram_full_visible(&adev->gmc) &&
3207
+ !amdgpu_sriov_vf_mmio_access_protection(adev))
29463208 adev->vm_manager.vm_update_mode =
29473209 AMDGPU_VM_USE_CPU_FOR_COMPUTE;
29483210 else
....@@ -2985,19 +3247,36 @@
29853247 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
29863248 {
29873249 union drm_amdgpu_vm *args = data;
2988
- struct amdgpu_device *adev = dev->dev_private;
3250
+ struct amdgpu_device *adev = drm_to_adev(dev);
29893251 struct amdgpu_fpriv *fpriv = filp->driver_priv;
3252
+ long timeout = msecs_to_jiffies(2000);
29903253 int r;
29913254
29923255 switch (args->in.op) {
29933256 case AMDGPU_VM_OP_RESERVE_VMID:
2994
- /* current, we only have requirement to reserve vmid from gfxhub */
2995
- r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
3257
+ /* We only have requirement to reserve vmid from gfxhub */
3258
+ r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm,
3259
+ AMDGPU_GFXHUB_0);
29963260 if (r)
29973261 return r;
29983262 break;
29993263 case AMDGPU_VM_OP_UNRESERVE_VMID:
3000
- amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
3264
+ if (amdgpu_sriov_runtime(adev))
3265
+ timeout = 8 * timeout;
3266
+
3267
+ /* Wait vm idle to make sure the vmid set in SPM_VMID is
3268
+ * not referenced anymore.
3269
+ */
3270
+ r = amdgpu_bo_reserve(fpriv->vm.root.base.bo, true);
3271
+ if (r)
3272
+ return r;
3273
+
3274
+ r = amdgpu_vm_wait_idle(&fpriv->vm, timeout);
3275
+ if (r < 0)
3276
+ return r;
3277
+
3278
+ amdgpu_bo_unreserve(fpriv->vm.root.base.bo);
3279
+ amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);
30013280 break;
30023281 default:
30033282 return -EINVAL;
....@@ -3009,11 +3288,11 @@
30093288 /**
30103289 * amdgpu_vm_get_task_info - Extracts task info for a PASID.
30113290 *
3012
- * @dev: drm device pointer
3291
+ * @adev: drm device pointer
30133292 * @pasid: PASID identifier for VM
30143293 * @task_info: task_info to fill.
30153294 */
3016
-void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
3295
+void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
30173296 struct amdgpu_task_info *task_info)
30183297 {
30193298 struct amdgpu_vm *vm;
....@@ -3035,13 +3314,97 @@
30353314 */
30363315 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
30373316 {
3038
- if (!vm->task_info.pid) {
3039
- vm->task_info.pid = current->pid;
3040
- get_task_comm(vm->task_info.task_name, current);
3317
+ if (vm->task_info.pid)
3318
+ return;
30413319
3042
- if (current->group_leader->mm == current->mm) {
3043
- vm->task_info.tgid = current->group_leader->pid;
3044
- get_task_comm(vm->task_info.process_name, current->group_leader);
3045
- }
3320
+ vm->task_info.pid = current->pid;
3321
+ get_task_comm(vm->task_info.task_name, current);
3322
+
3323
+ if (current->group_leader->mm != current->mm)
3324
+ return;
3325
+
3326
+ vm->task_info.tgid = current->group_leader->pid;
3327
+ get_task_comm(vm->task_info.process_name, current->group_leader);
3328
+}
3329
+
3330
+/**
3331
+ * amdgpu_vm_handle_fault - graceful handling of VM faults.
3332
+ * @adev: amdgpu device pointer
3333
+ * @pasid: PASID of the VM
3334
+ * @addr: Address of the fault
3335
+ *
3336
+ * Try to gracefully handle a VM fault. Return true if the fault was handled and
3337
+ * shouldn't be reported any more.
3338
+ */
3339
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
3340
+ uint64_t addr)
3341
+{
3342
+ struct amdgpu_bo *root;
3343
+ uint64_t value, flags;
3344
+ struct amdgpu_vm *vm;
3345
+ long r;
3346
+
3347
+ spin_lock(&adev->vm_manager.pasid_lock);
3348
+ vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
3349
+ if (vm)
3350
+ root = amdgpu_bo_ref(vm->root.base.bo);
3351
+ else
3352
+ root = NULL;
3353
+ spin_unlock(&adev->vm_manager.pasid_lock);
3354
+
3355
+ if (!root)
3356
+ return false;
3357
+
3358
+ r = amdgpu_bo_reserve(root, true);
3359
+ if (r)
3360
+ goto error_unref;
3361
+
3362
+ /* Double check that the VM still exists */
3363
+ spin_lock(&adev->vm_manager.pasid_lock);
3364
+ vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
3365
+ if (vm && vm->root.base.bo != root)
3366
+ vm = NULL;
3367
+ spin_unlock(&adev->vm_manager.pasid_lock);
3368
+ if (!vm)
3369
+ goto error_unlock;
3370
+
3371
+ addr /= AMDGPU_GPU_PAGE_SIZE;
3372
+ flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
3373
+ AMDGPU_PTE_SYSTEM;
3374
+
3375
+ if (vm->is_compute_context) {
3376
+ /* Intentionally setting invalid PTE flag
3377
+ * combination to force a no-retry-fault
3378
+ */
3379
+ flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
3380
+ AMDGPU_PTE_TF;
3381
+ value = 0;
3382
+
3383
+ } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
3384
+ /* Redirect the access to the dummy page */
3385
+ value = adev->dummy_page_addr;
3386
+ flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
3387
+ AMDGPU_PTE_WRITEABLE;
3388
+
3389
+ } else {
3390
+ /* Let the hw retry silently on the PTE */
3391
+ value = 0;
30463392 }
3393
+
3394
+ r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr,
3395
+ addr + 1, flags, value, NULL, NULL);
3396
+ if (r)
3397
+ goto error_unlock;
3398
+
3399
+ r = amdgpu_vm_update_pdes(adev, vm, true);
3400
+
3401
+error_unlock:
3402
+ amdgpu_bo_unreserve(root);
3403
+ if (r < 0)
3404
+ DRM_ERROR("Can't handle page fault (%ld)\n", r);
3405
+
3406
+error_unref:
3407
+ amdgpu_bo_unref(&root);
3408
+
3409
+ return false;
30473410 }