hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
....@@ -19,21 +19,17 @@
1919 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
2020 * OTHER DEALINGS IN THE SOFTWARE.
2121 */
22
-
23
-#define pr_fmt(fmt) "kfd2kgd: " fmt
24
-
22
+#include <linux/dma-buf.h>
2523 #include <linux/list.h>
2624 #include <linux/pagemap.h>
2725 #include <linux/sched/mm.h>
28
-#include <drm/drmP.h>
26
+#include <linux/sched/task.h>
27
+
2928 #include "amdgpu_object.h"
3029 #include "amdgpu_vm.h"
3130 #include "amdgpu_amdkfd.h"
32
-
33
-/* Special VM and GART address alignment needed for VI pre-Fiji due to
34
- * a HW bug.
35
- */
36
-#define VI_BO_SIZE_ALIGN (0x8000)
31
+#include "amdgpu_dma_buf.h"
32
+#include <uapi/linux/kfd_ioctl.h>
3733
3834 /* BO flag to indicate a KFD userptr BO */
3935 #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
....@@ -46,17 +42,11 @@
4642 /* Impose limit on how much memory KFD can use */
4743 static struct {
4844 uint64_t max_system_mem_limit;
49
- uint64_t max_userptr_mem_limit;
45
+ uint64_t max_ttm_mem_limit;
5046 int64_t system_mem_used;
51
- int64_t userptr_mem_used;
47
+ int64_t ttm_mem_used;
5248 spinlock_t mem_limit_lock;
5349 } kfd_mem_limit;
54
-
55
-/* Struct used for amdgpu_amdkfd_bo_validate */
56
-struct amdgpu_vm_parser {
57
- uint32_t domain;
58
- bool wait;
59
-};
6050
6151 static const char * const domain_bit_to_string[] = {
6252 "CPU",
....@@ -90,8 +80,8 @@
9080 }
9181
9282 /* Set memory usage limits. Current, limits are
93
- * System (kernel) memory - 3/8th System RAM
94
- * Userptr memory - 3/4th System RAM
83
+ * System (TTM + userptr) memory - 15/16th System RAM
84
+ * TTM memory - 3/8th System RAM
9585 */
9686 void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
9787 {
....@@ -103,48 +93,78 @@
10393 mem *= si.mem_unit;
10494
10595 spin_lock_init(&kfd_mem_limit.mem_limit_lock);
106
- kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
107
- kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
108
- pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
96
+ kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
97
+ kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
98
+ pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
10999 (kfd_mem_limit.max_system_mem_limit >> 20),
110
- (kfd_mem_limit.max_userptr_mem_limit >> 20));
100
+ (kfd_mem_limit.max_ttm_mem_limit >> 20));
111101 }
112102
113
-static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
114
- uint64_t size, u32 domain)
103
+/* Estimate page table size needed to represent a given memory size
104
+ *
105
+ * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
106
+ * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
107
+ * of memory (factor 256K, >> 18). ROCm user mode tries to optimize
108
+ * for 2MB pages for TLB efficiency. However, small allocations and
109
+ * fragmented system memory still need some 4KB pages. We choose a
110
+ * compromise that should work in most cases without reserving too
111
+ * much memory for page tables unnecessarily (factor 16K, >> 14).
112
+ */
113
+#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
114
+
115
+static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
116
+ uint64_t size, u32 domain, bool sg)
115117 {
116
- size_t acc_size;
118
+ uint64_t reserved_for_pt =
119
+ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
120
+ size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
117121 int ret = 0;
118122
119123 acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
120124 sizeof(struct amdgpu_bo));
121125
122
- spin_lock(&kfd_mem_limit.mem_limit_lock);
126
+ vram_needed = 0;
123127 if (domain == AMDGPU_GEM_DOMAIN_GTT) {
124
- if (kfd_mem_limit.system_mem_used + (acc_size + size) >
125
- kfd_mem_limit.max_system_mem_limit) {
126
- ret = -ENOMEM;
127
- goto err_no_mem;
128
- }
129
- kfd_mem_limit.system_mem_used += (acc_size + size);
130
- } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
131
- if ((kfd_mem_limit.system_mem_used + acc_size >
132
- kfd_mem_limit.max_system_mem_limit) ||
133
- (kfd_mem_limit.userptr_mem_used + (size + acc_size) >
134
- kfd_mem_limit.max_userptr_mem_limit)) {
135
- ret = -ENOMEM;
136
- goto err_no_mem;
137
- }
138
- kfd_mem_limit.system_mem_used += acc_size;
139
- kfd_mem_limit.userptr_mem_used += size;
128
+ /* TTM GTT memory */
129
+ system_mem_needed = acc_size + size;
130
+ ttm_mem_needed = acc_size + size;
131
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
132
+ /* Userptr */
133
+ system_mem_needed = acc_size + size;
134
+ ttm_mem_needed = acc_size;
135
+ } else {
136
+ /* VRAM and SG */
137
+ system_mem_needed = acc_size;
138
+ ttm_mem_needed = acc_size;
139
+ if (domain == AMDGPU_GEM_DOMAIN_VRAM)
140
+ vram_needed = size;
140141 }
141
-err_no_mem:
142
+
143
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
144
+
145
+ if (kfd_mem_limit.system_mem_used + system_mem_needed >
146
+ kfd_mem_limit.max_system_mem_limit)
147
+ pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
148
+
149
+ if ((kfd_mem_limit.system_mem_used + system_mem_needed >
150
+ kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
151
+ (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
152
+ kfd_mem_limit.max_ttm_mem_limit) ||
153
+ (adev->kfd.vram_used + vram_needed >
154
+ adev->gmc.real_vram_size - reserved_for_pt)) {
155
+ ret = -ENOMEM;
156
+ } else {
157
+ kfd_mem_limit.system_mem_used += system_mem_needed;
158
+ kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
159
+ adev->kfd.vram_used += vram_needed;
160
+ }
161
+
142162 spin_unlock(&kfd_mem_limit.mem_limit_lock);
143163 return ret;
144164 }
145165
146
-static void unreserve_system_mem_limit(struct amdgpu_device *adev,
147
- uint64_t size, u32 domain)
166
+static void unreserve_mem_limit(struct amdgpu_device *adev,
167
+ uint64_t size, u32 domain, bool sg)
148168 {
149169 size_t acc_size;
150170
....@@ -154,71 +174,62 @@
154174 spin_lock(&kfd_mem_limit.mem_limit_lock);
155175 if (domain == AMDGPU_GEM_DOMAIN_GTT) {
156176 kfd_mem_limit.system_mem_used -= (acc_size + size);
157
- } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
177
+ kfd_mem_limit.ttm_mem_used -= (acc_size + size);
178
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
179
+ kfd_mem_limit.system_mem_used -= (acc_size + size);
180
+ kfd_mem_limit.ttm_mem_used -= acc_size;
181
+ } else {
158182 kfd_mem_limit.system_mem_used -= acc_size;
159
- kfd_mem_limit.userptr_mem_used -= size;
183
+ kfd_mem_limit.ttm_mem_used -= acc_size;
184
+ if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
185
+ adev->kfd.vram_used -= size;
186
+ WARN_ONCE(adev->kfd.vram_used < 0,
187
+ "kfd VRAM memory accounting unbalanced");
188
+ }
160189 }
161190 WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
162191 "kfd system memory accounting unbalanced");
163
- WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
164
- "kfd userptr memory accounting unbalanced");
192
+ WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
193
+ "kfd TTM memory accounting unbalanced");
165194
166195 spin_unlock(&kfd_mem_limit.mem_limit_lock);
167196 }
168197
169
-void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
198
+void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
170199 {
171
- spin_lock(&kfd_mem_limit.mem_limit_lock);
200
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
201
+ u32 domain = bo->preferred_domains;
202
+ bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
172203
173204 if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
174
- kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
175
- kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
176
- } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
177
- kfd_mem_limit.system_mem_used -=
178
- (bo->tbo.acc_size + amdgpu_bo_size(bo));
205
+ domain = AMDGPU_GEM_DOMAIN_CPU;
206
+ sg = false;
179207 }
180
- WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
181
- "kfd system memory accounting unbalanced");
182
- WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
183
- "kfd userptr memory accounting unbalanced");
184208
185
- spin_unlock(&kfd_mem_limit.mem_limit_lock);
209
+ unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
186210 }
187211
188212
189
-/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's
213
+/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
190214 * reservation object.
191215 *
192216 * @bo: [IN] Remove eviction fence(s) from this BO
193
- * @ef: [IN] If ef is specified, then this eviction fence is removed if it
217
+ * @ef: [IN] This eviction fence is removed if it
194218 * is present in the shared list.
195
- * @ef_list: [OUT] Returns list of eviction fences. These fences are removed
196
- * from BO's reservation object shared list.
197
- * @ef_count: [OUT] Number of fences in ef_list.
198219 *
199
- * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be
200
- * called to restore the eviction fences and to avoid memory leak. This is
201
- * useful for shared BOs.
202220 * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
203221 */
204222 static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
205
- struct amdgpu_amdkfd_fence *ef,
206
- struct amdgpu_amdkfd_fence ***ef_list,
207
- unsigned int *ef_count)
223
+ struct amdgpu_amdkfd_fence *ef)
208224 {
209
- struct reservation_object *resv = bo->tbo.resv;
210
- struct reservation_object_list *old, *new;
225
+ struct dma_resv *resv = bo->tbo.base.resv;
226
+ struct dma_resv_list *old, *new;
211227 unsigned int i, j, k;
212228
213
- if (!ef && !ef_list)
229
+ if (!ef)
214230 return -EINVAL;
215231
216
- if (ef_list) {
217
- *ef_list = NULL;
218
- *ef_count = 0;
219
- }
220
-
221
- old = reservation_object_get_list(resv);
232
+ old = dma_resv_get_list(resv);
222233 if (!old)
223234 return 0;
224235
....@@ -234,10 +245,9 @@
234245 struct dma_fence *f;
235246
236247 f = rcu_dereference_protected(old->shared[i],
237
- reservation_object_held(resv));
248
+ dma_resv_held(resv));
238249
239
- if ((ef && f->context == ef->base.context) ||
240
- (!ef && to_amdgpu_amdkfd_fence(f)))
250
+ if (f->context == ef->base.context)
241251 RCU_INIT_POINTER(new->shared[--j], f);
242252 else
243253 RCU_INIT_POINTER(new->shared[k++], f);
....@@ -245,72 +255,58 @@
245255 new->shared_max = old->shared_max;
246256 new->shared_count = k;
247257
248
- if (!ef) {
249
- unsigned int count = old->shared_count - j;
250
-
251
- /* Alloc memory for count number of eviction fence pointers.
252
- * Fill the ef_list array and ef_count
253
- */
254
- *ef_list = kcalloc(count, sizeof(**ef_list), GFP_KERNEL);
255
- *ef_count = count;
256
-
257
- if (!*ef_list) {
258
- kfree(new);
259
- return -ENOMEM;
260
- }
261
- }
262
-
263258 /* Install the new fence list, seqcount provides the barriers */
264
- preempt_disable();
265259 write_seqcount_begin(&resv->seq);
266260 RCU_INIT_POINTER(resv->fence, new);
267261 write_seqcount_end(&resv->seq);
268
- preempt_enable();
269262
270263 /* Drop the references to the removed fences or move them to ef_list */
271264 for (i = j, k = 0; i < old->shared_count; ++i) {
272265 struct dma_fence *f;
273266
274267 f = rcu_dereference_protected(new->shared[i],
275
- reservation_object_held(resv));
276
- if (!ef)
277
- (*ef_list)[k++] = to_amdgpu_amdkfd_fence(f);
278
- else
279
- dma_fence_put(f);
268
+ dma_resv_held(resv));
269
+ dma_fence_put(f);
280270 }
281271 kfree_rcu(old, rcu);
282272
283273 return 0;
284274 }
285275
286
-/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's
287
- * reservation object.
288
- *
289
- * @bo: [IN] Add eviction fences to this BO
290
- * @ef_list: [IN] List of eviction fences to be added
291
- * @ef_count: [IN] Number of fences in ef_list.
292
- *
293
- * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this
294
- * function.
295
- */
296
-static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
297
- struct amdgpu_amdkfd_fence **ef_list,
298
- unsigned int ef_count)
276
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
299277 {
300
- int i;
278
+ struct amdgpu_bo *root = bo;
279
+ struct amdgpu_vm_bo_base *vm_bo;
280
+ struct amdgpu_vm *vm;
281
+ struct amdkfd_process_info *info;
282
+ struct amdgpu_amdkfd_fence *ef;
283
+ int ret;
301284
302
- if (!ef_list || !ef_count)
303
- return;
285
+ /* we can always get vm_bo from root PD bo.*/
286
+ while (root->parent)
287
+ root = root->parent;
304288
305
- for (i = 0; i < ef_count; i++) {
306
- amdgpu_bo_fence(bo, &ef_list[i]->base, true);
307
- /* Re-adding the fence takes an additional reference. Drop that
308
- * reference.
309
- */
310
- dma_fence_put(&ef_list[i]->base);
311
- }
289
+ vm_bo = root->vm_bo;
290
+ if (!vm_bo)
291
+ return 0;
312292
313
- kfree(ef_list);
293
+ vm = vm_bo->vm;
294
+ if (!vm)
295
+ return 0;
296
+
297
+ info = vm->process_info;
298
+ if (!info || !info->eviction_fence)
299
+ return 0;
300
+
301
+ ef = container_of(dma_fence_get(&info->eviction_fence->base),
302
+ struct amdgpu_amdkfd_fence, base);
303
+
304
+ BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
305
+ ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
306
+ dma_resv_unlock(bo->tbo.base.resv);
307
+
308
+ dma_fence_put(&ef->base);
309
+ return ret;
314310 }
315311
316312 static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
....@@ -328,28 +324,16 @@
328324 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
329325 if (ret)
330326 goto validate_fail;
331
- if (wait) {
332
- struct amdgpu_amdkfd_fence **ef_list;
333
- unsigned int ef_count;
334
-
335
- ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list,
336
- &ef_count);
337
- if (ret)
338
- goto validate_fail;
339
-
340
- ttm_bo_wait(&bo->tbo, false, false);
341
- amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count);
342
- }
327
+ if (wait)
328
+ amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
343329
344330 validate_fail:
345331 return ret;
346332 }
347333
348
-static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
334
+static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
349335 {
350
- struct amdgpu_vm_parser *p = param;
351
-
352
- return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
336
+ return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
353337 }
354338
355339 /* vm_validate_pt_pd_bos - Validate page table and directory BOs
....@@ -363,56 +347,31 @@
363347 {
364348 struct amdgpu_bo *pd = vm->root.base.bo;
365349 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
366
- struct amdgpu_vm_parser param;
367
- uint64_t addr, flags = AMDGPU_PTE_VALID;
368350 int ret;
369351
370
- param.domain = AMDGPU_GEM_DOMAIN_VRAM;
371
- param.wait = false;
372
-
373
- ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
374
- &param);
352
+ ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL);
375353 if (ret) {
376
- pr_err("amdgpu: failed to validate PT BOs\n");
354
+ pr_err("failed to validate PT BOs\n");
377355 return ret;
378356 }
379357
380
- ret = amdgpu_amdkfd_validate(&param, pd);
358
+ ret = amdgpu_amdkfd_validate_vm_bo(NULL, pd);
381359 if (ret) {
382
- pr_err("amdgpu: failed to validate PD\n");
360
+ pr_err("failed to validate PD\n");
383361 return ret;
384362 }
385363
386
- addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
387
- amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
388
- vm->pd_phys_addr = addr;
364
+ vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
389365
390366 if (vm->use_cpu_for_update) {
391367 ret = amdgpu_bo_kmap(pd, NULL);
392368 if (ret) {
393
- pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
369
+ pr_err("failed to kmap PD, ret=%d\n", ret);
394370 return ret;
395371 }
396372 }
397373
398374 return 0;
399
-}
400
-
401
-static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
402
- struct dma_fence *f)
403
-{
404
- int ret = amdgpu_sync_fence(adev, sync, f, false);
405
-
406
- /* Sync objects can't handle multiple GPUs (contexts) updating
407
- * sync->last_vm_update. Fortunately we don't need it for
408
- * KFD's purposes, so we can just drop that fence.
409
- */
410
- if (sync->last_vm_update) {
411
- dma_fence_put(sync->last_vm_update);
412
- sync->last_vm_update = NULL;
413
- }
414
-
415
- return ret;
416375 }
417376
418377 static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
....@@ -421,11 +380,44 @@
421380 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
422381 int ret;
423382
424
- ret = amdgpu_vm_update_directories(adev, vm);
383
+ ret = amdgpu_vm_update_pdes(adev, vm, false);
425384 if (ret)
426385 return ret;
427386
428
- return sync_vm_fence(adev, sync, vm->last_update);
387
+ return amdgpu_sync_fence(sync, vm->last_update);
388
+}
389
+
390
+static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
391
+{
392
+ struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
393
+ bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
394
+ uint32_t mapping_flags;
395
+
396
+ mapping_flags = AMDGPU_VM_PAGE_READABLE;
397
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
398
+ mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
399
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
400
+ mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
401
+
402
+ switch (adev->asic_type) {
403
+ case CHIP_ARCTURUS:
404
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
405
+ if (bo_adev == adev)
406
+ mapping_flags |= coherent ?
407
+ AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
408
+ else
409
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
410
+ } else {
411
+ mapping_flags |= coherent ?
412
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
413
+ }
414
+ break;
415
+ default:
416
+ mapping_flags |= coherent ?
417
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
418
+ }
419
+
420
+ return amdgpu_gem_va_map_flags(adev, mapping_flags);
429421 }
430422
431423 /* add_bo_to_vm - Add a BO to a VM
....@@ -446,7 +438,6 @@
446438 {
447439 int ret;
448440 struct kfd_bo_va_list *bo_va_entry;
449
- struct amdgpu_bo *pd = vm->root.base.bo;
450441 struct amdgpu_bo *bo = mem->bo;
451442 uint64_t va = mem->va;
452443 struct list_head *list_bo_va = &mem->bo_va_list;
....@@ -477,42 +468,23 @@
477468 }
478469
479470 bo_va_entry->va = va;
480
- bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev,
481
- mem->mapping_flags);
471
+ bo_va_entry->pte_flags = get_pte_flags(adev, mem);
482472 bo_va_entry->kgd_dev = (void *)adev;
483473 list_add(&bo_va_entry->bo_list, list_bo_va);
484474
485475 if (p_bo_va_entry)
486476 *p_bo_va_entry = bo_va_entry;
487477
488
- /* Allocate new page tables if needed and validate
489
- * them. Clearing of new page tables and validate need to wait
490
- * on move fences. We don't want that to trigger the eviction
491
- * fence, so remove it temporarily.
492
- */
493
- amdgpu_amdkfd_remove_eviction_fence(pd,
494
- vm->process_info->eviction_fence,
495
- NULL, NULL);
496
-
497
- ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
498
- if (ret) {
499
- pr_err("Failed to allocate pts, err=%d\n", ret);
500
- goto err_alloc_pts;
501
- }
502
-
478
+ /* Allocate validate page tables if needed */
503479 ret = vm_validate_pt_pd_bos(vm);
504480 if (ret) {
505481 pr_err("validate_pt_pd_bos() failed\n");
506482 goto err_alloc_pts;
507483 }
508484
509
- /* Add the eviction fence back */
510
- amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
511
-
512485 return 0;
513486
514487 err_alloc_pts:
515
- amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
516488 amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
517489 list_del(&bo_va_entry->bo_list);
518490 err_vmadd:
....@@ -539,13 +511,24 @@
539511 struct amdgpu_bo *bo = mem->bo;
540512
541513 INIT_LIST_HEAD(&entry->head);
542
- entry->shared = true;
514
+ entry->num_shared = 1;
543515 entry->bo = &bo->tbo;
544516 mutex_lock(&process_info->lock);
545517 if (userptr)
546518 list_add_tail(&entry->head, &process_info->userptr_valid_list);
547519 else
548520 list_add_tail(&entry->head, &process_info->kfd_bo_list);
521
+ mutex_unlock(&process_info->lock);
522
+}
523
+
524
+static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
525
+ struct amdkfd_process_info *process_info)
526
+{
527
+ struct ttm_validate_buffer *bo_list_entry;
528
+
529
+ bo_list_entry = &mem->validate_list;
530
+ mutex_lock(&process_info->lock);
531
+ list_del(&bo_list_entry->head);
549532 mutex_unlock(&process_info->lock);
550533 }
551534
....@@ -561,8 +544,7 @@
561544 *
562545 * Returns 0 for success, negative errno for errors.
563546 */
564
-static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
565
- uint64_t user_addr)
547
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
566548 {
567549 struct amdkfd_process_info *process_info = mem->process_info;
568550 struct amdgpu_bo *bo = mem->bo;
....@@ -571,7 +553,7 @@
571553
572554 mutex_lock(&process_info->lock);
573555
574
- ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
556
+ ret = amdgpu_ttm_tt_set_userptr(&bo->tbo, user_addr, 0);
575557 if (ret) {
576558 pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
577559 goto out;
....@@ -584,27 +566,11 @@
584566 goto out;
585567 }
586568
587
- /* If no restore worker is running concurrently, user_pages
588
- * should not be allocated
589
- */
590
- WARN(mem->user_pages, "Leaking user_pages array");
591
-
592
- mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
593
- sizeof(struct page *),
594
- GFP_KERNEL | __GFP_ZERO);
595
- if (!mem->user_pages) {
596
- pr_err("%s: Failed to allocate pages array\n", __func__);
597
- ret = -ENOMEM;
598
- goto unregister_out;
599
- }
600
-
601
- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
569
+ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
602570 if (ret) {
603571 pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
604
- goto free_out;
572
+ goto unregister_out;
605573 }
606
-
607
- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
608574
609575 ret = amdgpu_bo_reserve(bo, true);
610576 if (ret) {
....@@ -618,11 +584,7 @@
618584 amdgpu_bo_unreserve(bo);
619585
620586 release_out:
621
- if (ret)
622
- release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
623
-free_out:
624
- kvfree(mem->user_pages);
625
- mem->user_pages = NULL;
587
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
626588 unregister_out:
627589 if (ret)
628590 amdgpu_mn_unregister(bo);
....@@ -678,26 +640,24 @@
678640 if (!ctx->vm_pd)
679641 return -ENOMEM;
680642
681
- ctx->kfd_bo.robj = bo;
682643 ctx->kfd_bo.priority = 0;
683644 ctx->kfd_bo.tv.bo = &bo->tbo;
684
- ctx->kfd_bo.tv.shared = true;
685
- ctx->kfd_bo.user_pages = NULL;
645
+ ctx->kfd_bo.tv.num_shared = 1;
686646 list_add(&ctx->kfd_bo.tv.head, &ctx->list);
687647
688648 amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
689649
690650 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
691651 false, &ctx->duplicates);
692
- if (!ret)
693
- ctx->reserved = true;
694
- else {
695
- pr_err("Failed to reserve buffers in ttm\n");
652
+ if (ret) {
653
+ pr_err("Failed to reserve buffers in ttm.\n");
696654 kfree(ctx->vm_pd);
697655 ctx->vm_pd = NULL;
656
+ return ret;
698657 }
699658
700
- return ret;
659
+ ctx->reserved = true;
660
+ return 0;
701661 }
702662
703663 /**
....@@ -743,11 +703,9 @@
743703 return -ENOMEM;
744704 }
745705
746
- ctx->kfd_bo.robj = bo;
747706 ctx->kfd_bo.priority = 0;
748707 ctx->kfd_bo.tv.bo = &bo->tbo;
749
- ctx->kfd_bo.tv.shared = true;
750
- ctx->kfd_bo.user_pages = NULL;
708
+ ctx->kfd_bo.tv.num_shared = 1;
751709 list_add(&ctx->kfd_bo.tv.head, &ctx->list);
752710
753711 i = 0;
....@@ -764,17 +722,15 @@
764722
765723 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
766724 false, &ctx->duplicates);
767
- if (!ret)
768
- ctx->reserved = true;
769
- else
770
- pr_err("Failed to reserve buffers in ttm.\n");
771
-
772725 if (ret) {
726
+ pr_err("Failed to reserve buffers in ttm.\n");
773727 kfree(ctx->vm_pd);
774728 ctx->vm_pd = NULL;
729
+ return ret;
775730 }
776731
777
- return ret;
732
+ ctx->reserved = true;
733
+ return 0;
778734 }
779735
780736 /**
....@@ -813,25 +769,12 @@
813769 {
814770 struct amdgpu_bo_va *bo_va = entry->bo_va;
815771 struct amdgpu_vm *vm = bo_va->base.vm;
816
- struct amdgpu_bo *pd = vm->root.base.bo;
817772
818
- /* Remove eviction fence from PD (and thereby from PTs too as
819
- * they share the resv. object). Otherwise during PT update
820
- * job (see amdgpu_vm_bo_update_mapping), eviction fence would
821
- * get added to job->sync object and job execution would
822
- * trigger the eviction fence.
823
- */
824
- amdgpu_amdkfd_remove_eviction_fence(pd,
825
- vm->process_info->eviction_fence,
826
- NULL, NULL);
827773 amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
828774
829775 amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
830776
831
- /* Add the eviction fence back */
832
- amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
833
-
834
- sync_vm_fence(adev, sync, bo_va->last_pt_update);
777
+ amdgpu_sync_fence(sync, bo_va->last_pt_update);
835778
836779 return 0;
837780 }
....@@ -841,13 +784,7 @@
841784 struct amdgpu_sync *sync)
842785 {
843786 int ret;
844
- struct amdgpu_vm *vm;
845
- struct amdgpu_bo_va *bo_va;
846
- struct amdgpu_bo *bo;
847
-
848
- bo_va = entry->bo_va;
849
- vm = bo_va->base.vm;
850
- bo = bo_va->base.bo;
787
+ struct amdgpu_bo_va *bo_va = entry->bo_va;
851788
852789 /* Update the page tables */
853790 ret = amdgpu_vm_bo_update(adev, bo_va, false);
....@@ -856,7 +793,7 @@
856793 return ret;
857794 }
858795
859
- return sync_vm_fence(adev, sync, bo_va->last_pt_update);
796
+ return amdgpu_sync_fence(sync, bo_va->last_pt_update);
860797 }
861798
862799 static int map_bo_to_gpuvm(struct amdgpu_device *adev,
....@@ -891,6 +828,24 @@
891828 return ret;
892829 }
893830
831
+static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
832
+{
833
+ struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
834
+
835
+ if (!sg)
836
+ return NULL;
837
+ if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
838
+ kfree(sg);
839
+ return NULL;
840
+ }
841
+ sg->sgl->dma_address = addr;
842
+ sg->sgl->length = size;
843
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
844
+ sg->sgl->dma_length = size;
845
+#endif
846
+ return sg;
847
+}
848
+
894849 static int process_validate_vms(struct amdkfd_process_info *process_info)
895850 {
896851 struct amdgpu_vm *peer_vm;
....@@ -899,6 +854,26 @@
899854 list_for_each_entry(peer_vm, &process_info->vm_list_head,
900855 vm_list_node) {
901856 ret = vm_validate_pt_pd_bos(peer_vm);
857
+ if (ret)
858
+ return ret;
859
+ }
860
+
861
+ return 0;
862
+}
863
+
864
+static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
865
+ struct amdgpu_sync *sync)
866
+{
867
+ struct amdgpu_vm *peer_vm;
868
+ int ret;
869
+
870
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
871
+ vm_list_node) {
872
+ struct amdgpu_bo *pd = peer_vm->root.base.bo;
873
+
874
+ ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv,
875
+ AMDGPU_SYNC_NE_OWNER,
876
+ AMDGPU_FENCE_OWNER_KFD);
902877 if (ret)
903878 return ret;
904879 }
....@@ -968,9 +943,13 @@
968943 pr_err("validate_pt_pd_bos() failed\n");
969944 goto validate_pd_fail;
970945 }
971
- ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false);
946
+ ret = amdgpu_bo_sync_wait(vm->root.base.bo,
947
+ AMDGPU_FENCE_OWNER_KFD, false);
972948 if (ret)
973949 goto wait_pd_fail;
950
+ ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1);
951
+ if (ret)
952
+ goto reserve_shared_fail;
974953 amdgpu_bo_fence(vm->root.base.bo,
975954 &vm->process_info->eviction_fence->base, true);
976955 amdgpu_bo_unreserve(vm->root.base.bo);
....@@ -984,6 +963,7 @@
984963
985964 return 0;
986965
966
+reserve_shared_fail:
987967 wait_pd_fail:
988968 validate_pd_fail:
989969 amdgpu_bo_unreserve(vm->root.base.bo);
....@@ -1003,8 +983,8 @@
1003983 return ret;
1004984 }
1005985
1006
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
1007
- void **process_info,
986
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, u32 pasid,
987
+ void **vm, void **process_info,
1008988 struct dma_fence **ef)
1009989 {
1010990 struct amdgpu_device *adev = get_amdgpu_device(kgd);
....@@ -1016,7 +996,7 @@
1016996 return -ENOMEM;
1017997
1018998 /* Initialize AMDGPU part of the VM */
1019
- ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
999
+ ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid);
10201000 if (ret) {
10211001 pr_err("Failed init vm ret %d\n", ret);
10221002 goto amdgpu_vm_init_fail;
....@@ -1039,22 +1019,26 @@
10391019 }
10401020
10411021 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
1042
- struct file *filp,
1022
+ struct file *filp, u32 pasid,
10431023 void **vm, void **process_info,
10441024 struct dma_fence **ef)
10451025 {
10461026 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1047
- struct drm_file *drm_priv = filp->private_data;
1048
- struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
1049
- struct amdgpu_vm *avm = &drv_priv->vm;
1027
+ struct amdgpu_fpriv *drv_priv;
1028
+ struct amdgpu_vm *avm;
10501029 int ret;
1030
+
1031
+ ret = amdgpu_file_to_fpriv(filp, &drv_priv);
1032
+ if (ret)
1033
+ return ret;
1034
+ avm = &drv_priv->vm;
10511035
10521036 /* Already a compute VM? */
10531037 if (avm->process_info)
10541038 return -EINVAL;
10551039
10561040 /* Convert VM into a compute VM */
1057
- ret = amdgpu_vm_make_compute(adev, avm);
1041
+ ret = amdgpu_vm_make_compute(adev, avm, pasid);
10581042 if (ret)
10591043 return ret;
10601044
....@@ -1088,6 +1072,8 @@
10881072 list_del(&vm->vm_list_node);
10891073 mutex_unlock(&process_info->lock);
10901074
1075
+ vm->process_info = NULL;
1076
+
10911077 /* Release per-process resources when last compute VM is destroyed */
10921078 if (!process_info->n_vms) {
10931079 WARN_ON(!list_empty(&process_info->kfd_bo_list));
....@@ -1117,11 +1103,34 @@
11171103 kfree(vm);
11181104 }
11191105
1120
-uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
1106
+void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
1107
+{
1108
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
1109
+ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1110
+
1111
+ if (WARN_ON(!kgd || !vm))
1112
+ return;
1113
+
1114
+ pr_debug("Releasing process vm %p\n", vm);
1115
+
1116
+ /* The original pasid of amdgpu vm has already been
1117
+ * released during making a amdgpu vm to a compute vm
1118
+ * The current pasid is managed by kfd and will be
1119
+ * released on kfd process destroy. Set amdgpu pasid
1120
+ * to 0 to avoid duplicate release.
1121
+ */
1122
+ amdgpu_vm_release_compute(adev, avm);
1123
+}
1124
+
1125
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
11211126 {
11221127 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1128
+ struct amdgpu_bo *pd = avm->root.base.bo;
1129
+ struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
11231130
1124
- return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
1131
+ if (adev->asic_type < CHIP_VEGA10)
1132
+ return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
1133
+ return avm->pd_phys_addr;
11251134 }
11261135
11271136 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
....@@ -1131,44 +1140,57 @@
11311140 {
11321141 struct amdgpu_device *adev = get_amdgpu_device(kgd);
11331142 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1143
+ enum ttm_bo_type bo_type = ttm_bo_type_device;
1144
+ struct sg_table *sg = NULL;
11341145 uint64_t user_addr = 0;
11351146 struct amdgpu_bo *bo;
11361147 struct amdgpu_bo_param bp;
1137
- int byte_align;
11381148 u32 domain, alloc_domain;
11391149 u64 alloc_flags;
1140
- uint32_t mapping_flags;
11411150 int ret;
11421151
11431152 /*
11441153 * Check on which domain to allocate BO
11451154 */
1146
- if (flags & ALLOC_MEM_FLAGS_VRAM) {
1155
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
11471156 domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
1148
- alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
1149
- alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
1157
+ alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
1158
+ alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
11501159 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
11511160 AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
1152
- } else if (flags & ALLOC_MEM_FLAGS_GTT) {
1161
+ } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
11531162 domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
11541163 alloc_flags = 0;
1155
- } else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
1164
+ } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
11561165 domain = AMDGPU_GEM_DOMAIN_GTT;
11571166 alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
11581167 alloc_flags = 0;
11591168 if (!offset || !*offset)
11601169 return -EINVAL;
11611170 user_addr = untagged_addr(*offset);
1171
+ } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
1172
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
1173
+ domain = AMDGPU_GEM_DOMAIN_GTT;
1174
+ alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
1175
+ bo_type = ttm_bo_type_sg;
1176
+ alloc_flags = 0;
1177
+ if (size > UINT_MAX)
1178
+ return -EINVAL;
1179
+ sg = create_doorbell_sg(*offset, size);
1180
+ if (!sg)
1181
+ return -ENOMEM;
11621182 } else {
11631183 return -EINVAL;
11641184 }
11651185
11661186 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
1167
- if (!*mem)
1168
- return -ENOMEM;
1187
+ if (!*mem) {
1188
+ ret = -ENOMEM;
1189
+ goto err;
1190
+ }
11691191 INIT_LIST_HEAD(&(*mem)->bo_va_list);
11701192 mutex_init(&(*mem)->lock);
1171
- (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
1193
+ (*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
11721194
11731195 /* Workaround for AQL queue wraparound bug. Map the same
11741196 * memory twice. That means we only actually allocate half
....@@ -1177,30 +1199,14 @@
11771199 if ((*mem)->aql_queue)
11781200 size = size >> 1;
11791201
1180
- /* Workaround for TLB bug on older VI chips */
1181
- byte_align = (adev->family == AMDGPU_FAMILY_VI &&
1182
- adev->asic_type != CHIP_FIJI &&
1183
- adev->asic_type != CHIP_POLARIS10 &&
1184
- adev->asic_type != CHIP_POLARIS11) ?
1185
- VI_BO_SIZE_ALIGN : 1;
1186
-
1187
- mapping_flags = AMDGPU_VM_PAGE_READABLE;
1188
- if (flags & ALLOC_MEM_FLAGS_WRITABLE)
1189
- mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
1190
- if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
1191
- mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
1192
- if (flags & ALLOC_MEM_FLAGS_COHERENT)
1193
- mapping_flags |= AMDGPU_VM_MTYPE_UC;
1194
- else
1195
- mapping_flags |= AMDGPU_VM_MTYPE_NC;
1196
- (*mem)->mapping_flags = mapping_flags;
1202
+ (*mem)->alloc_flags = flags;
11971203
11981204 amdgpu_sync_create(&(*mem)->sync);
11991205
1200
- ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
1206
+ ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
12011207 if (ret) {
12021208 pr_debug("Insufficient system memory\n");
1203
- goto err_reserve_system_mem;
1209
+ goto err_reserve_limit;
12041210 }
12051211
12061212 pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
....@@ -1208,16 +1214,20 @@
12081214
12091215 memset(&bp, 0, sizeof(bp));
12101216 bp.size = size;
1211
- bp.byte_align = byte_align;
1217
+ bp.byte_align = 1;
12121218 bp.domain = alloc_domain;
12131219 bp.flags = alloc_flags;
1214
- bp.type = ttm_bo_type_device;
1220
+ bp.type = bo_type;
12151221 bp.resv = NULL;
12161222 ret = amdgpu_bo_create(adev, &bp, &bo);
12171223 if (ret) {
12181224 pr_debug("Failed to create BO on domain %s. ret %d\n",
12191225 domain_string(alloc_domain), ret);
12201226 goto err_bo_create;
1227
+ }
1228
+ if (bo_type == ttm_bo_type_sg) {
1229
+ bo->tbo.sg = sg;
1230
+ bo->tbo.ttm->sg = sg;
12211231 }
12221232 bo->kfd_bo = *mem;
12231233 (*mem)->bo = bo;
....@@ -1231,13 +1241,9 @@
12311241 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
12321242
12331243 if (user_addr) {
1234
- ret = init_user_pages(*mem, current->mm, user_addr);
1235
- if (ret) {
1236
- mutex_lock(&avm->process_info->lock);
1237
- list_del(&(*mem)->validate_list.head);
1238
- mutex_unlock(&avm->process_info->lock);
1244
+ ret = init_user_pages(*mem, user_addr);
1245
+ if (ret)
12391246 goto allocate_init_user_pages_failed;
1240
- }
12411247 }
12421248
12431249 if (offset)
....@@ -1246,43 +1252,48 @@
12461252 return 0;
12471253
12481254 allocate_init_user_pages_failed:
1255
+ remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
12491256 amdgpu_bo_unref(&bo);
12501257 /* Don't unreserve system mem limit twice */
1251
- goto err_reserve_system_mem;
1258
+ goto err_reserve_limit;
12521259 err_bo_create:
1253
- unreserve_system_mem_limit(adev, size, alloc_domain);
1254
-err_reserve_system_mem:
1260
+ unreserve_mem_limit(adev, size, alloc_domain, !!sg);
1261
+err_reserve_limit:
12551262 mutex_destroy(&(*mem)->lock);
12561263 kfree(*mem);
1264
+err:
1265
+ if (sg) {
1266
+ sg_free_table(sg);
1267
+ kfree(sg);
1268
+ }
12571269 return ret;
12581270 }
12591271
12601272 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
1261
- struct kgd_dev *kgd, struct kgd_mem *mem)
1273
+ struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
12621274 {
12631275 struct amdkfd_process_info *process_info = mem->process_info;
12641276 unsigned long bo_size = mem->bo->tbo.mem.size;
12651277 struct kfd_bo_va_list *entry, *tmp;
12661278 struct bo_vm_reservation_context ctx;
12671279 struct ttm_validate_buffer *bo_list_entry;
1280
+ unsigned int mapped_to_gpu_memory;
12681281 int ret;
1282
+ bool is_imported = 0;
12691283
12701284 mutex_lock(&mem->lock);
1271
-
1272
- if (mem->mapped_to_gpu_memory > 0) {
1273
- pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
1274
- mem->va, bo_size);
1275
- mutex_unlock(&mem->lock);
1276
- return -EBUSY;
1277
- }
1278
-
1285
+ mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
1286
+ is_imported = mem->is_imported;
12791287 mutex_unlock(&mem->lock);
12801288 /* lock is not needed after this, since mem is unused and will
12811289 * be freed anyway
12821290 */
12831291
1284
- /* No more MMU notifiers */
1285
- amdgpu_mn_unregister(mem->bo);
1292
+ if (mapped_to_gpu_memory > 0) {
1293
+ pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
1294
+ mem->va, bo_size);
1295
+ return -EBUSY;
1296
+ }
12861297
12871298 /* Make sure restore workers don't access the BO any more */
12881299 bo_list_entry = &mem->validate_list;
....@@ -1290,14 +1301,8 @@
12901301 list_del(&bo_list_entry->head);
12911302 mutex_unlock(&process_info->lock);
12921303
1293
- /* Free user pages if necessary */
1294
- if (mem->user_pages) {
1295
- pr_debug("%s: Freeing user_pages array\n", __func__);
1296
- if (mem->user_pages[0])
1297
- release_pages(mem->user_pages,
1298
- mem->bo->tbo.ttm->num_pages);
1299
- kvfree(mem->user_pages);
1300
- }
1304
+ /* No more MMU notifiers */
1305
+ amdgpu_mn_unregister(mem->bo);
13011306
13021307 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
13031308 if (unlikely(ret))
....@@ -1308,8 +1313,7 @@
13081313 * attached
13091314 */
13101315 amdgpu_amdkfd_remove_eviction_fence(mem->bo,
1311
- process_info->eviction_fence,
1312
- NULL, NULL);
1316
+ process_info->eviction_fence);
13131317 pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
13141318 mem->va + bo_size * (1 + mem->aql_queue));
13151319
....@@ -1323,8 +1327,27 @@
13231327 /* Free the sync object */
13241328 amdgpu_sync_free(&mem->sync);
13251329
1330
+ /* If the SG is not NULL, it's one we created for a doorbell or mmio
1331
+ * remap BO. We need to free it.
1332
+ */
1333
+ if (mem->bo->tbo.sg) {
1334
+ sg_free_table(mem->bo->tbo.sg);
1335
+ kfree(mem->bo->tbo.sg);
1336
+ }
1337
+
1338
+ /* Update the size of the BO being freed if it was allocated from
1339
+ * VRAM and is not imported.
1340
+ */
1341
+ if (size) {
1342
+ if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
1343
+ (!is_imported))
1344
+ *size = bo_size;
1345
+ else
1346
+ *size = 0;
1347
+ }
1348
+
13261349 /* Free the BO*/
1327
- amdgpu_bo_unref(&mem->bo);
1350
+ drm_gem_object_put(&mem->bo->tbo.base);
13281351 mutex_destroy(&mem->lock);
13291352 kfree(mem);
13301353
....@@ -1363,9 +1386,9 @@
13631386 * concurrently and the queues are actually stopped
13641387 */
13651388 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
1366
- down_write(&current->mm->mmap_sem);
1389
+ mmap_write_lock(current->mm);
13671390 is_invalid_userptr = atomic_read(&mem->invalid);
1368
- up_write(&current->mm->mmap_sem);
1391
+ mmap_write_unlock(current->mm);
13691392 }
13701393
13711394 mutex_lock(&mem->lock);
....@@ -1387,7 +1410,8 @@
13871410 * the queues are still stopped and we can leave mapping for
13881411 * the next restore worker
13891412 */
1390
- if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
1413
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
1414
+ bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
13911415 is_invalid_userptr = true;
13921416
13931417 if (check_if_add_bo_to_vm(avm, mem)) {
....@@ -1429,7 +1453,7 @@
14291453 ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
14301454 is_invalid_userptr);
14311455 if (ret) {
1432
- pr_err("Failed to map radeon bo to gpuvm\n");
1456
+ pr_err("Failed to map bo to gpuvm\n");
14331457 goto map_bo_to_gpuvm_failed;
14341458 }
14351459
....@@ -1527,8 +1551,7 @@
15271551 if (mem->mapped_to_gpu_memory == 0 &&
15281552 !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
15291553 amdgpu_amdkfd_remove_eviction_fence(mem->bo,
1530
- process_info->eviction_fence,
1531
- NULL, NULL);
1554
+ process_info->eviction_fence);
15321555
15331556 unreserve_out:
15341557 unreserve_bo_and_vms(&ctx, false, false);
....@@ -1589,7 +1612,7 @@
15891612 }
15901613
15911614 amdgpu_amdkfd_remove_eviction_fence(
1592
- bo, mem->process_info->eviction_fence, NULL, NULL);
1615
+ bo, mem->process_info->eviction_fence);
15931616 list_del_init(&mem->validate_list.head);
15941617
15951618 if (size)
....@@ -1624,6 +1647,65 @@
16241647 return 0;
16251648 }
16261649
1650
+int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
1651
+ struct dma_buf *dma_buf,
1652
+ uint64_t va, void *vm,
1653
+ struct kgd_mem **mem, uint64_t *size,
1654
+ uint64_t *mmap_offset)
1655
+{
1656
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
1657
+ struct drm_gem_object *obj;
1658
+ struct amdgpu_bo *bo;
1659
+ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1660
+
1661
+ if (dma_buf->ops != &amdgpu_dmabuf_ops)
1662
+ /* Can't handle non-graphics buffers */
1663
+ return -EINVAL;
1664
+
1665
+ obj = dma_buf->priv;
1666
+ if (drm_to_adev(obj->dev) != adev)
1667
+ /* Can't handle buffers from other devices */
1668
+ return -EINVAL;
1669
+
1670
+ bo = gem_to_amdgpu_bo(obj);
1671
+ if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
1672
+ AMDGPU_GEM_DOMAIN_GTT)))
1673
+ /* Only VRAM and GTT BOs are supported */
1674
+ return -EINVAL;
1675
+
1676
+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
1677
+ if (!*mem)
1678
+ return -ENOMEM;
1679
+
1680
+ if (size)
1681
+ *size = amdgpu_bo_size(bo);
1682
+
1683
+ if (mmap_offset)
1684
+ *mmap_offset = amdgpu_bo_mmap_offset(bo);
1685
+
1686
+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
1687
+ mutex_init(&(*mem)->lock);
1688
+
1689
+ (*mem)->alloc_flags =
1690
+ ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
1691
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
1692
+ | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
1693
+ | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
1694
+
1695
+ drm_gem_object_get(&bo->tbo.base);
1696
+ (*mem)->bo = bo;
1697
+ (*mem)->va = va;
1698
+ (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
1699
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
1700
+ (*mem)->mapped_to_gpu_memory = 0;
1701
+ (*mem)->process_info = avm->process_info;
1702
+ add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
1703
+ amdgpu_sync_create(&(*mem)->sync);
1704
+ (*mem)->is_imported = true;
1705
+
1706
+ return 0;
1707
+}
1708
+
16271709 /* Evict a userptr BO by stopping the queues if necessary
16281710 *
16291711 * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
....@@ -1639,14 +1721,14 @@
16391721 struct mm_struct *mm)
16401722 {
16411723 struct amdkfd_process_info *process_info = mem->process_info;
1642
- int invalid, evicted_bos;
1724
+ int evicted_bos;
16431725 int r = 0;
16441726
1645
- invalid = atomic_inc_return(&mem->invalid);
1727
+ atomic_inc(&mem->invalid);
16461728 evicted_bos = atomic_inc_return(&process_info->evicted_bos);
16471729 if (evicted_bos == 1) {
16481730 /* First eviction, stop the queues */
1649
- r = kgd2kfd->quiesce_mm(mm);
1731
+ r = kgd2kfd_quiesce_mm(mm);
16501732 if (r)
16511733 pr_err("Failed to quiesce KFD\n");
16521734 schedule_delayed_work(&process_info->restore_userptr_work,
....@@ -1711,36 +1793,24 @@
17111793
17121794 bo = mem->bo;
17131795
1714
- if (!mem->user_pages) {
1715
- mem->user_pages =
1716
- kvmalloc_array(bo->tbo.ttm->num_pages,
1717
- sizeof(struct page *),
1718
- GFP_KERNEL | __GFP_ZERO);
1719
- if (!mem->user_pages) {
1720
- pr_err("%s: Failed to allocate pages array\n",
1721
- __func__);
1722
- return -ENOMEM;
1723
- }
1724
- } else if (mem->user_pages[0]) {
1725
- release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
1796
+ /* Get updated user pages */
1797
+ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
1798
+ if (ret) {
1799
+ pr_debug("%s: Failed to get user pages: %d\n",
1800
+ __func__, ret);
1801
+
1802
+ /* Return error -EBUSY or -ENOMEM, retry restore */
1803
+ return ret;
17261804 }
17271805
1728
- /* Get updated user pages */
1729
- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
1730
- mem->user_pages);
1731
- if (ret) {
1732
- mem->user_pages[0] = NULL;
1733
- pr_info("%s: Failed to get user pages: %d\n",
1734
- __func__, ret);
1735
- /* Pretend it succeeded. It will fail later
1736
- * with a VM fault if the GPU tries to access
1737
- * it. Better than hanging indefinitely with
1738
- * stalled user mode queues.
1739
- */
1740
- }
1806
+ /*
1807
+ * FIXME: Cannot ignore the return code, must hold
1808
+ * notifier_lock
1809
+ */
1810
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
17411811
17421812 /* Mark the BO as valid unless it was invalidated
1743
- * again concurrently
1813
+ * again concurrently.
17441814 */
17451815 if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
17461816 return -EAGAIN;
....@@ -1773,7 +1843,8 @@
17731843 GFP_KERNEL);
17741844 if (!pd_bo_list_entries) {
17751845 pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
1776
- return -ENOMEM;
1846
+ ret = -ENOMEM;
1847
+ goto out_no_mem;
17771848 }
17781849
17791850 INIT_LIST_HEAD(&resv_list);
....@@ -1790,26 +1861,16 @@
17901861 validate_list.head) {
17911862 list_add_tail(&mem->resv_list.head, &resv_list);
17921863 mem->resv_list.bo = mem->validate_list.bo;
1793
- mem->resv_list.shared = mem->validate_list.shared;
1864
+ mem->resv_list.num_shared = mem->validate_list.num_shared;
17941865 }
17951866
17961867 /* Reserve all BOs and page tables for validation */
17971868 ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
17981869 WARN(!list_empty(&duplicates), "Duplicates should be empty");
17991870 if (ret)
1800
- goto out;
1871
+ goto out_free;
18011872
18021873 amdgpu_sync_create(&sync);
1803
-
1804
- /* Avoid triggering eviction fences when unmapping invalid
1805
- * userptr BOs (waits for all fences, doesn't use
1806
- * FENCE_OWNER_VM)
1807
- */
1808
- list_for_each_entry(peer_vm, &process_info->vm_list_head,
1809
- vm_list_node)
1810
- amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
1811
- process_info->eviction_fence,
1812
- NULL, NULL);
18131874
18141875 ret = process_validate_vms(process_info);
18151876 if (ret)
....@@ -1823,10 +1884,8 @@
18231884
18241885 bo = mem->bo;
18251886
1826
- /* Copy pages array and validate the BO if we got user pages */
1827
- if (mem->user_pages[0]) {
1828
- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
1829
- mem->user_pages);
1887
+ /* Validate the BO if we got user pages */
1888
+ if (bo->tbo.ttm->pages[0]) {
18301889 amdgpu_bo_placement_from_domain(bo, mem->domain);
18311890 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
18321891 if (ret) {
....@@ -1835,13 +1894,6 @@
18351894 }
18361895 }
18371896
1838
- /* Validate succeeded, now the BO owns the pages, free
1839
- * our copy of the pointer array. Put this BO back on
1840
- * the userptr_valid_list. If we need to revalidate
1841
- * it, we need to start from scratch.
1842
- */
1843
- kvfree(mem->user_pages);
1844
- mem->user_pages = NULL;
18451897 list_move_tail(&mem->validate_list.head,
18461898 &process_info->userptr_valid_list);
18471899
....@@ -1871,15 +1923,12 @@
18711923 ret = process_update_pds(process_info, &sync);
18721924
18731925 unreserve_out:
1874
- list_for_each_entry(peer_vm, &process_info->vm_list_head,
1875
- vm_list_node)
1876
- amdgpu_bo_fence(peer_vm->root.base.bo,
1877
- &process_info->eviction_fence->base, true);
18781926 ttm_eu_backoff_reservation(&ticket, &resv_list);
18791927 amdgpu_sync_wait(&sync, false);
18801928 amdgpu_sync_free(&sync);
1881
-out:
1929
+out_free:
18821930 kfree(pd_bo_list_entries);
1931
+out_no_mem:
18831932
18841933 return ret;
18851934 }
....@@ -1938,12 +1987,13 @@
19381987 evicted_bos)
19391988 goto unlock_out;
19401989 evicted_bos = 0;
1941
- if (kgd2kfd->resume_mm(mm)) {
1990
+ if (kgd2kfd_resume_mm(mm)) {
19421991 pr_err("%s: Failed to resume KFD\n", __func__);
19431992 /* No recovery from this failure. Probably the CP is
19441993 * hanging. No point trying again.
19451994 */
19461995 }
1996
+
19471997 unlock_out:
19481998 mutex_unlock(&process_info->lock);
19491999 mmput(mm);
....@@ -2009,7 +2059,7 @@
20092059
20102060 list_add_tail(&mem->resv_list.head, &ctx.list);
20112061 mem->resv_list.bo = mem->validate_list.bo;
2012
- mem->resv_list.shared = mem->validate_list.shared;
2062
+ mem->resv_list.num_shared = mem->validate_list.num_shared;
20132063 }
20142064
20152065 ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
....@@ -2026,13 +2076,10 @@
20262076 if (ret)
20272077 goto validate_map_fail;
20282078
2029
- /* Wait for PD/PTs validate to finish */
2030
- /* FIXME: I think this isn't needed */
2031
- list_for_each_entry(peer_vm, &process_info->vm_list_head,
2032
- vm_list_node) {
2033
- struct amdgpu_bo *bo = peer_vm->root.base.bo;
2034
-
2035
- ttm_bo_wait(&bo->tbo, false, false);
2079
+ ret = process_sync_pds_resv(process_info, &sync_obj);
2080
+ if (ret) {
2081
+ pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
2082
+ goto validate_map_fail;
20362083 }
20372084
20382085 /* Validate BOs and map them to GPUVM (update VM page tables). */
....@@ -2048,7 +2095,11 @@
20482095 pr_debug("Memory eviction: Validate BOs failed. Try again\n");
20492096 goto validate_map_fail;
20502097 }
2051
-
2098
+ ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
2099
+ if (ret) {
2100
+ pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
2101
+ goto validate_map_fail;
2102
+ }
20522103 list_for_each_entry(bo_va_entry, &mem->bo_va_list,
20532104 bo_list) {
20542105 ret = update_gpuvm_pte((struct amdgpu_device *)
....@@ -2069,6 +2120,7 @@
20692120 goto validate_map_fail;
20702121 }
20712122
2123
+ /* Wait for validate and PT updates to finish */
20722124 amdgpu_sync_wait(&sync_obj, false);
20732125
20742126 /* Release old eviction fence and create new one, because fence only
....@@ -2087,10 +2139,7 @@
20872139 process_info->eviction_fence = new_fence;
20882140 *ef = dma_fence_get(&new_fence->base);
20892141
2090
- /* Wait for validate to finish and attach new eviction fence */
2091
- list_for_each_entry(mem, &process_info->kfd_bo_list,
2092
- validate_list.head)
2093
- ttm_bo_wait(&mem->bo->tbo, false, false);
2142
+ /* Attach new eviction fence to all BOs */
20942143 list_for_each_entry(mem, &process_info->kfd_bo_list,
20952144 validate_list.head)
20962145 amdgpu_bo_fence(mem->bo,
....@@ -2112,3 +2161,115 @@
21122161 kfree(pd_bo_list);
21132162 return ret;
21142163 }
2164
+
2165
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
2166
+{
2167
+ struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
2168
+ struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
2169
+ int ret;
2170
+
2171
+ if (!info || !gws)
2172
+ return -EINVAL;
2173
+
2174
+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
2175
+ if (!*mem)
2176
+ return -ENOMEM;
2177
+
2178
+ mutex_init(&(*mem)->lock);
2179
+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
2180
+ (*mem)->bo = amdgpu_bo_ref(gws_bo);
2181
+ (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
2182
+ (*mem)->process_info = process_info;
2183
+ add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
2184
+ amdgpu_sync_create(&(*mem)->sync);
2185
+
2186
+
2187
+ /* Validate gws bo the first time it is added to process */
2188
+ mutex_lock(&(*mem)->process_info->lock);
2189
+ ret = amdgpu_bo_reserve(gws_bo, false);
2190
+ if (unlikely(ret)) {
2191
+ pr_err("Reserve gws bo failed %d\n", ret);
2192
+ goto bo_reservation_failure;
2193
+ }
2194
+
2195
+ ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
2196
+ if (ret) {
2197
+ pr_err("GWS BO validate failed %d\n", ret);
2198
+ goto bo_validation_failure;
2199
+ }
2200
+ /* GWS resource is shared b/t amdgpu and amdkfd
2201
+ * Add process eviction fence to bo so they can
2202
+ * evict each other.
2203
+ */
2204
+ ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1);
2205
+ if (ret)
2206
+ goto reserve_shared_fail;
2207
+ amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
2208
+ amdgpu_bo_unreserve(gws_bo);
2209
+ mutex_unlock(&(*mem)->process_info->lock);
2210
+
2211
+ return ret;
2212
+
2213
+reserve_shared_fail:
2214
+bo_validation_failure:
2215
+ amdgpu_bo_unreserve(gws_bo);
2216
+bo_reservation_failure:
2217
+ mutex_unlock(&(*mem)->process_info->lock);
2218
+ amdgpu_sync_free(&(*mem)->sync);
2219
+ remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
2220
+ amdgpu_bo_unref(&gws_bo);
2221
+ mutex_destroy(&(*mem)->lock);
2222
+ kfree(*mem);
2223
+ *mem = NULL;
2224
+ return ret;
2225
+}
2226
+
2227
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
2228
+{
2229
+ int ret;
2230
+ struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
2231
+ struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
2232
+ struct amdgpu_bo *gws_bo = kgd_mem->bo;
2233
+
2234
+ /* Remove BO from process's validate list so restore worker won't touch
2235
+ * it anymore
2236
+ */
2237
+ remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
2238
+
2239
+ ret = amdgpu_bo_reserve(gws_bo, false);
2240
+ if (unlikely(ret)) {
2241
+ pr_err("Reserve gws bo failed %d\n", ret);
2242
+ //TODO add BO back to validate_list?
2243
+ return ret;
2244
+ }
2245
+ amdgpu_amdkfd_remove_eviction_fence(gws_bo,
2246
+ process_info->eviction_fence);
2247
+ amdgpu_bo_unreserve(gws_bo);
2248
+ amdgpu_sync_free(&kgd_mem->sync);
2249
+ amdgpu_bo_unref(&gws_bo);
2250
+ mutex_destroy(&kgd_mem->lock);
2251
+ kfree(mem);
2252
+ return 0;
2253
+}
2254
+
2255
+/* Returns GPU-specific tiling mode information */
2256
+int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
2257
+ struct tile_config *config)
2258
+{
2259
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
2260
+
2261
+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
2262
+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
2263
+ config->num_tile_configs =
2264
+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2265
+ config->macro_tile_config_ptr =
2266
+ adev->gfx.config.macrotile_mode_array;
2267
+ config->num_macro_tile_configs =
2268
+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2269
+
2270
+ /* Those values are not set from GFX9 onwards */
2271
+ config->num_banks = adev->gfx.config.num_banks;
2272
+ config->num_ranks = adev->gfx.config.num_ranks;
2273
+
2274
+ return 0;
2275
+}