hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
....@@ -19,21 +19,17 @@
1919 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
2020 * OTHER DEALINGS IN THE SOFTWARE.
2121 */
22
-
23
-#define pr_fmt(fmt) "kfd2kgd: " fmt
24
-
22
+#include <linux/dma-buf.h>
2523 #include <linux/list.h>
2624 #include <linux/pagemap.h>
2725 #include <linux/sched/mm.h>
28
-#include <drm/drmP.h>
26
+#include <linux/sched/task.h>
27
+
2928 #include "amdgpu_object.h"
3029 #include "amdgpu_vm.h"
3130 #include "amdgpu_amdkfd.h"
32
-
33
-/* Special VM and GART address alignment needed for VI pre-Fiji due to
34
- * a HW bug.
35
- */
36
-#define VI_BO_SIZE_ALIGN (0x8000)
31
+#include "amdgpu_dma_buf.h"
32
+#include <uapi/linux/kfd_ioctl.h>
3733
3834 /* BO flag to indicate a KFD userptr BO */
3935 #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
....@@ -46,17 +42,11 @@
4642 /* Impose limit on how much memory KFD can use */
4743 static struct {
4844 uint64_t max_system_mem_limit;
49
- uint64_t max_userptr_mem_limit;
45
+ uint64_t max_ttm_mem_limit;
5046 int64_t system_mem_used;
51
- int64_t userptr_mem_used;
47
+ int64_t ttm_mem_used;
5248 spinlock_t mem_limit_lock;
5349 } kfd_mem_limit;
54
-
55
-/* Struct used for amdgpu_amdkfd_bo_validate */
56
-struct amdgpu_vm_parser {
57
- uint32_t domain;
58
- bool wait;
59
-};
6050
6151 static const char * const domain_bit_to_string[] = {
6252 "CPU",
....@@ -90,8 +80,8 @@
9080 }
9181
9282 /* Set memory usage limits. Current, limits are
93
- * System (kernel) memory - 3/8th System RAM
94
- * Userptr memory - 3/4th System RAM
83
+ * System (TTM + userptr) memory - 15/16th System RAM
84
+ * TTM memory - 3/8th System RAM
9585 */
9686 void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
9787 {
....@@ -103,48 +93,78 @@
10393 mem *= si.mem_unit;
10494
10595 spin_lock_init(&kfd_mem_limit.mem_limit_lock);
106
- kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
107
- kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
108
- pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
96
+ kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
97
+ kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
98
+ pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
10999 (kfd_mem_limit.max_system_mem_limit >> 20),
110
- (kfd_mem_limit.max_userptr_mem_limit >> 20));
100
+ (kfd_mem_limit.max_ttm_mem_limit >> 20));
111101 }
112102
113
-static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
114
- uint64_t size, u32 domain)
103
+/* Estimate page table size needed to represent a given memory size
104
+ *
105
+ * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
106
+ * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
107
+ * of memory (factor 256K, >> 18). ROCm user mode tries to optimize
108
+ * for 2MB pages for TLB efficiency. However, small allocations and
109
+ * fragmented system memory still need some 4KB pages. We choose a
110
+ * compromise that should work in most cases without reserving too
111
+ * much memory for page tables unnecessarily (factor 16K, >> 14).
112
+ */
113
+#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
114
+
115
+static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
116
+ uint64_t size, u32 domain, bool sg)
115117 {
116
- size_t acc_size;
118
+ uint64_t reserved_for_pt =
119
+ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
120
+ size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
117121 int ret = 0;
118122
119123 acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
120124 sizeof(struct amdgpu_bo));
121125
122
- spin_lock(&kfd_mem_limit.mem_limit_lock);
126
+ vram_needed = 0;
123127 if (domain == AMDGPU_GEM_DOMAIN_GTT) {
124
- if (kfd_mem_limit.system_mem_used + (acc_size + size) >
125
- kfd_mem_limit.max_system_mem_limit) {
126
- ret = -ENOMEM;
127
- goto err_no_mem;
128
- }
129
- kfd_mem_limit.system_mem_used += (acc_size + size);
130
- } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
131
- if ((kfd_mem_limit.system_mem_used + acc_size >
132
- kfd_mem_limit.max_system_mem_limit) ||
133
- (kfd_mem_limit.userptr_mem_used + (size + acc_size) >
134
- kfd_mem_limit.max_userptr_mem_limit)) {
135
- ret = -ENOMEM;
136
- goto err_no_mem;
137
- }
138
- kfd_mem_limit.system_mem_used += acc_size;
139
- kfd_mem_limit.userptr_mem_used += size;
128
+ /* TTM GTT memory */
129
+ system_mem_needed = acc_size + size;
130
+ ttm_mem_needed = acc_size + size;
131
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
132
+ /* Userptr */
133
+ system_mem_needed = acc_size + size;
134
+ ttm_mem_needed = acc_size;
135
+ } else {
136
+ /* VRAM and SG */
137
+ system_mem_needed = acc_size;
138
+ ttm_mem_needed = acc_size;
139
+ if (domain == AMDGPU_GEM_DOMAIN_VRAM)
140
+ vram_needed = size;
140141 }
141
-err_no_mem:
142
+
143
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
144
+
145
+ if (kfd_mem_limit.system_mem_used + system_mem_needed >
146
+ kfd_mem_limit.max_system_mem_limit)
147
+ pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
148
+
149
+ if ((kfd_mem_limit.system_mem_used + system_mem_needed >
150
+ kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
151
+ (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
152
+ kfd_mem_limit.max_ttm_mem_limit) ||
153
+ (adev->kfd.vram_used + vram_needed >
154
+ adev->gmc.real_vram_size - reserved_for_pt)) {
155
+ ret = -ENOMEM;
156
+ } else {
157
+ kfd_mem_limit.system_mem_used += system_mem_needed;
158
+ kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
159
+ adev->kfd.vram_used += vram_needed;
160
+ }
161
+
142162 spin_unlock(&kfd_mem_limit.mem_limit_lock);
143163 return ret;
144164 }
145165
146
-static void unreserve_system_mem_limit(struct amdgpu_device *adev,
147
- uint64_t size, u32 domain)
166
+static void unreserve_mem_limit(struct amdgpu_device *adev,
167
+ uint64_t size, u32 domain, bool sg)
148168 {
149169 size_t acc_size;
150170
....@@ -154,71 +174,62 @@
154174 spin_lock(&kfd_mem_limit.mem_limit_lock);
155175 if (domain == AMDGPU_GEM_DOMAIN_GTT) {
156176 kfd_mem_limit.system_mem_used -= (acc_size + size);
157
- } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
177
+ kfd_mem_limit.ttm_mem_used -= (acc_size + size);
178
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
179
+ kfd_mem_limit.system_mem_used -= (acc_size + size);
180
+ kfd_mem_limit.ttm_mem_used -= acc_size;
181
+ } else {
158182 kfd_mem_limit.system_mem_used -= acc_size;
159
- kfd_mem_limit.userptr_mem_used -= size;
183
+ kfd_mem_limit.ttm_mem_used -= acc_size;
184
+ if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
185
+ adev->kfd.vram_used -= size;
186
+ WARN_ONCE(adev->kfd.vram_used < 0,
187
+ "kfd VRAM memory accounting unbalanced");
188
+ }
160189 }
161190 WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
162191 "kfd system memory accounting unbalanced");
163
- WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
164
- "kfd userptr memory accounting unbalanced");
192
+ WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
193
+ "kfd TTM memory accounting unbalanced");
165194
166195 spin_unlock(&kfd_mem_limit.mem_limit_lock);
167196 }
168197
169
-void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
198
+void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
170199 {
171
- spin_lock(&kfd_mem_limit.mem_limit_lock);
200
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
201
+ u32 domain = bo->preferred_domains;
202
+ bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
172203
173204 if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
174
- kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
175
- kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
176
- } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
177
- kfd_mem_limit.system_mem_used -=
178
- (bo->tbo.acc_size + amdgpu_bo_size(bo));
205
+ domain = AMDGPU_GEM_DOMAIN_CPU;
206
+ sg = false;
179207 }
180
- WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
181
- "kfd system memory accounting unbalanced");
182
- WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
183
- "kfd userptr memory accounting unbalanced");
184208
185
- spin_unlock(&kfd_mem_limit.mem_limit_lock);
209
+ unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
186210 }
187211
188212
189
-/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's
213
+/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
190214 * reservation object.
191215 *
192216 * @bo: [IN] Remove eviction fence(s) from this BO
193
- * @ef: [IN] If ef is specified, then this eviction fence is removed if it
217
+ * @ef: [IN] This eviction fence is removed if it
194218 * is present in the shared list.
195
- * @ef_list: [OUT] Returns list of eviction fences. These fences are removed
196
- * from BO's reservation object shared list.
197
- * @ef_count: [OUT] Number of fences in ef_list.
198219 *
199
- * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be
200
- * called to restore the eviction fences and to avoid memory leak. This is
201
- * useful for shared BOs.
202220 * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
203221 */
204222 static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
205
- struct amdgpu_amdkfd_fence *ef,
206
- struct amdgpu_amdkfd_fence ***ef_list,
207
- unsigned int *ef_count)
223
+ struct amdgpu_amdkfd_fence *ef)
208224 {
209
- struct reservation_object *resv = bo->tbo.resv;
210
- struct reservation_object_list *old, *new;
225
+ struct dma_resv *resv = bo->tbo.base.resv;
226
+ struct dma_resv_list *old, *new;
211227 unsigned int i, j, k;
212228
213
- if (!ef && !ef_list)
229
+ if (!ef)
214230 return -EINVAL;
215231
216
- if (ef_list) {
217
- *ef_list = NULL;
218
- *ef_count = 0;
219
- }
220
-
221
- old = reservation_object_get_list(resv);
232
+ old = dma_resv_get_list(resv);
222233 if (!old)
223234 return 0;
224235
....@@ -234,10 +245,9 @@
234245 struct dma_fence *f;
235246
236247 f = rcu_dereference_protected(old->shared[i],
237
- reservation_object_held(resv));
248
+ dma_resv_held(resv));
238249
239
- if ((ef && f->context == ef->base.context) ||
240
- (!ef && to_amdgpu_amdkfd_fence(f)))
250
+ if (f->context == ef->base.context)
241251 RCU_INIT_POINTER(new->shared[--j], f);
242252 else
243253 RCU_INIT_POINTER(new->shared[k++], f);
....@@ -245,70 +255,58 @@
245255 new->shared_max = old->shared_max;
246256 new->shared_count = k;
247257
248
- if (!ef) {
249
- unsigned int count = old->shared_count - j;
250
-
251
- /* Alloc memory for count number of eviction fence pointers.
252
- * Fill the ef_list array and ef_count
253
- */
254
- *ef_list = kcalloc(count, sizeof(**ef_list), GFP_KERNEL);
255
- *ef_count = count;
256
-
257
- if (!*ef_list) {
258
- kfree(new);
259
- return -ENOMEM;
260
- }
261
- }
262
-
263258 /* Install the new fence list, seqcount provides the barriers */
264
- write_seqlock(&resv->seq);
259
+ write_seqcount_begin(&resv->seq);
265260 RCU_INIT_POINTER(resv->fence, new);
266
- write_sequnlock(&resv->seq);
261
+ write_seqcount_end(&resv->seq);
267262
268263 /* Drop the references to the removed fences or move them to ef_list */
269264 for (i = j, k = 0; i < old->shared_count; ++i) {
270265 struct dma_fence *f;
271266
272267 f = rcu_dereference_protected(new->shared[i],
273
- reservation_object_held(resv));
274
- if (!ef)
275
- (*ef_list)[k++] = to_amdgpu_amdkfd_fence(f);
276
- else
277
- dma_fence_put(f);
268
+ dma_resv_held(resv));
269
+ dma_fence_put(f);
278270 }
279271 kfree_rcu(old, rcu);
280272
281273 return 0;
282274 }
283275
284
-/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's
285
- * reservation object.
286
- *
287
- * @bo: [IN] Add eviction fences to this BO
288
- * @ef_list: [IN] List of eviction fences to be added
289
- * @ef_count: [IN] Number of fences in ef_list.
290
- *
291
- * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this
292
- * function.
293
- */
294
-static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
295
- struct amdgpu_amdkfd_fence **ef_list,
296
- unsigned int ef_count)
276
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
297277 {
298
- int i;
278
+ struct amdgpu_bo *root = bo;
279
+ struct amdgpu_vm_bo_base *vm_bo;
280
+ struct amdgpu_vm *vm;
281
+ struct amdkfd_process_info *info;
282
+ struct amdgpu_amdkfd_fence *ef;
283
+ int ret;
299284
300
- if (!ef_list || !ef_count)
301
- return;
285
+ /* we can always get vm_bo from root PD bo.*/
286
+ while (root->parent)
287
+ root = root->parent;
302288
303
- for (i = 0; i < ef_count; i++) {
304
- amdgpu_bo_fence(bo, &ef_list[i]->base, true);
305
- /* Re-adding the fence takes an additional reference. Drop that
306
- * reference.
307
- */
308
- dma_fence_put(&ef_list[i]->base);
309
- }
289
+ vm_bo = root->vm_bo;
290
+ if (!vm_bo)
291
+ return 0;
310292
311
- kfree(ef_list);
293
+ vm = vm_bo->vm;
294
+ if (!vm)
295
+ return 0;
296
+
297
+ info = vm->process_info;
298
+ if (!info || !info->eviction_fence)
299
+ return 0;
300
+
301
+ ef = container_of(dma_fence_get(&info->eviction_fence->base),
302
+ struct amdgpu_amdkfd_fence, base);
303
+
304
+ BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
305
+ ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
306
+ dma_resv_unlock(bo->tbo.base.resv);
307
+
308
+ dma_fence_put(&ef->base);
309
+ return ret;
312310 }
313311
314312 static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
....@@ -326,28 +324,16 @@
326324 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
327325 if (ret)
328326 goto validate_fail;
329
- if (wait) {
330
- struct amdgpu_amdkfd_fence **ef_list;
331
- unsigned int ef_count;
332
-
333
- ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list,
334
- &ef_count);
335
- if (ret)
336
- goto validate_fail;
337
-
338
- ttm_bo_wait(&bo->tbo, false, false);
339
- amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count);
340
- }
327
+ if (wait)
328
+ amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
341329
342330 validate_fail:
343331 return ret;
344332 }
345333
346
-static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
334
+static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
347335 {
348
- struct amdgpu_vm_parser *p = param;
349
-
350
- return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
336
+ return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
351337 }
352338
353339 /* vm_validate_pt_pd_bos - Validate page table and directory BOs
....@@ -361,56 +347,31 @@
361347 {
362348 struct amdgpu_bo *pd = vm->root.base.bo;
363349 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
364
- struct amdgpu_vm_parser param;
365
- uint64_t addr, flags = AMDGPU_PTE_VALID;
366350 int ret;
367351
368
- param.domain = AMDGPU_GEM_DOMAIN_VRAM;
369
- param.wait = false;
370
-
371
- ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
372
- &param);
352
+ ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL);
373353 if (ret) {
374
- pr_err("amdgpu: failed to validate PT BOs\n");
354
+ pr_err("failed to validate PT BOs\n");
375355 return ret;
376356 }
377357
378
- ret = amdgpu_amdkfd_validate(&param, pd);
358
+ ret = amdgpu_amdkfd_validate_vm_bo(NULL, pd);
379359 if (ret) {
380
- pr_err("amdgpu: failed to validate PD\n");
360
+ pr_err("failed to validate PD\n");
381361 return ret;
382362 }
383363
384
- addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
385
- amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
386
- vm->pd_phys_addr = addr;
364
+ vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
387365
388366 if (vm->use_cpu_for_update) {
389367 ret = amdgpu_bo_kmap(pd, NULL);
390368 if (ret) {
391
- pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
369
+ pr_err("failed to kmap PD, ret=%d\n", ret);
392370 return ret;
393371 }
394372 }
395373
396374 return 0;
397
-}
398
-
399
-static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
400
- struct dma_fence *f)
401
-{
402
- int ret = amdgpu_sync_fence(adev, sync, f, false);
403
-
404
- /* Sync objects can't handle multiple GPUs (contexts) updating
405
- * sync->last_vm_update. Fortunately we don't need it for
406
- * KFD's purposes, so we can just drop that fence.
407
- */
408
- if (sync->last_vm_update) {
409
- dma_fence_put(sync->last_vm_update);
410
- sync->last_vm_update = NULL;
411
- }
412
-
413
- return ret;
414375 }
415376
416377 static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
....@@ -419,11 +380,44 @@
419380 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
420381 int ret;
421382
422
- ret = amdgpu_vm_update_directories(adev, vm);
383
+ ret = amdgpu_vm_update_pdes(adev, vm, false);
423384 if (ret)
424385 return ret;
425386
426
- return sync_vm_fence(adev, sync, vm->last_update);
387
+ return amdgpu_sync_fence(sync, vm->last_update);
388
+}
389
+
390
+static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
391
+{
392
+ struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
393
+ bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
394
+ uint32_t mapping_flags;
395
+
396
+ mapping_flags = AMDGPU_VM_PAGE_READABLE;
397
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
398
+ mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
399
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
400
+ mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
401
+
402
+ switch (adev->asic_type) {
403
+ case CHIP_ARCTURUS:
404
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
405
+ if (bo_adev == adev)
406
+ mapping_flags |= coherent ?
407
+ AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
408
+ else
409
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
410
+ } else {
411
+ mapping_flags |= coherent ?
412
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
413
+ }
414
+ break;
415
+ default:
416
+ mapping_flags |= coherent ?
417
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
418
+ }
419
+
420
+ return amdgpu_gem_va_map_flags(adev, mapping_flags);
427421 }
428422
429423 /* add_bo_to_vm - Add a BO to a VM
....@@ -444,7 +438,6 @@
444438 {
445439 int ret;
446440 struct kfd_bo_va_list *bo_va_entry;
447
- struct amdgpu_bo *pd = vm->root.base.bo;
448441 struct amdgpu_bo *bo = mem->bo;
449442 uint64_t va = mem->va;
450443 struct list_head *list_bo_va = &mem->bo_va_list;
....@@ -475,42 +468,23 @@
475468 }
476469
477470 bo_va_entry->va = va;
478
- bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev,
479
- mem->mapping_flags);
471
+ bo_va_entry->pte_flags = get_pte_flags(adev, mem);
480472 bo_va_entry->kgd_dev = (void *)adev;
481473 list_add(&bo_va_entry->bo_list, list_bo_va);
482474
483475 if (p_bo_va_entry)
484476 *p_bo_va_entry = bo_va_entry;
485477
486
- /* Allocate new page tables if needed and validate
487
- * them. Clearing of new page tables and validate need to wait
488
- * on move fences. We don't want that to trigger the eviction
489
- * fence, so remove it temporarily.
490
- */
491
- amdgpu_amdkfd_remove_eviction_fence(pd,
492
- vm->process_info->eviction_fence,
493
- NULL, NULL);
494
-
495
- ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
496
- if (ret) {
497
- pr_err("Failed to allocate pts, err=%d\n", ret);
498
- goto err_alloc_pts;
499
- }
500
-
478
+ /* Allocate validate page tables if needed */
501479 ret = vm_validate_pt_pd_bos(vm);
502480 if (ret) {
503481 pr_err("validate_pt_pd_bos() failed\n");
504482 goto err_alloc_pts;
505483 }
506484
507
- /* Add the eviction fence back */
508
- amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
509
-
510485 return 0;
511486
512487 err_alloc_pts:
513
- amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
514488 amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
515489 list_del(&bo_va_entry->bo_list);
516490 err_vmadd:
....@@ -537,13 +511,24 @@
537511 struct amdgpu_bo *bo = mem->bo;
538512
539513 INIT_LIST_HEAD(&entry->head);
540
- entry->shared = true;
514
+ entry->num_shared = 1;
541515 entry->bo = &bo->tbo;
542516 mutex_lock(&process_info->lock);
543517 if (userptr)
544518 list_add_tail(&entry->head, &process_info->userptr_valid_list);
545519 else
546520 list_add_tail(&entry->head, &process_info->kfd_bo_list);
521
+ mutex_unlock(&process_info->lock);
522
+}
523
+
524
+static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
525
+ struct amdkfd_process_info *process_info)
526
+{
527
+ struct ttm_validate_buffer *bo_list_entry;
528
+
529
+ bo_list_entry = &mem->validate_list;
530
+ mutex_lock(&process_info->lock);
531
+ list_del(&bo_list_entry->head);
547532 mutex_unlock(&process_info->lock);
548533 }
549534
....@@ -559,8 +544,7 @@
559544 *
560545 * Returns 0 for success, negative errno for errors.
561546 */
562
-static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
563
- uint64_t user_addr)
547
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
564548 {
565549 struct amdkfd_process_info *process_info = mem->process_info;
566550 struct amdgpu_bo *bo = mem->bo;
....@@ -569,7 +553,7 @@
569553
570554 mutex_lock(&process_info->lock);
571555
572
- ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
556
+ ret = amdgpu_ttm_tt_set_userptr(&bo->tbo, user_addr, 0);
573557 if (ret) {
574558 pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
575559 goto out;
....@@ -582,27 +566,11 @@
582566 goto out;
583567 }
584568
585
- /* If no restore worker is running concurrently, user_pages
586
- * should not be allocated
587
- */
588
- WARN(mem->user_pages, "Leaking user_pages array");
589
-
590
- mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
591
- sizeof(struct page *),
592
- GFP_KERNEL | __GFP_ZERO);
593
- if (!mem->user_pages) {
594
- pr_err("%s: Failed to allocate pages array\n", __func__);
595
- ret = -ENOMEM;
596
- goto unregister_out;
597
- }
598
-
599
- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
569
+ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
600570 if (ret) {
601571 pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
602
- goto free_out;
572
+ goto unregister_out;
603573 }
604
-
605
- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
606574
607575 ret = amdgpu_bo_reserve(bo, true);
608576 if (ret) {
....@@ -616,11 +584,7 @@
616584 amdgpu_bo_unreserve(bo);
617585
618586 release_out:
619
- if (ret)
620
- release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
621
-free_out:
622
- kvfree(mem->user_pages);
623
- mem->user_pages = NULL;
587
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
624588 unregister_out:
625589 if (ret)
626590 amdgpu_mn_unregister(bo);
....@@ -676,26 +640,24 @@
676640 if (!ctx->vm_pd)
677641 return -ENOMEM;
678642
679
- ctx->kfd_bo.robj = bo;
680643 ctx->kfd_bo.priority = 0;
681644 ctx->kfd_bo.tv.bo = &bo->tbo;
682
- ctx->kfd_bo.tv.shared = true;
683
- ctx->kfd_bo.user_pages = NULL;
645
+ ctx->kfd_bo.tv.num_shared = 1;
684646 list_add(&ctx->kfd_bo.tv.head, &ctx->list);
685647
686648 amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
687649
688650 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
689651 false, &ctx->duplicates);
690
- if (!ret)
691
- ctx->reserved = true;
692
- else {
693
- pr_err("Failed to reserve buffers in ttm\n");
652
+ if (ret) {
653
+ pr_err("Failed to reserve buffers in ttm.\n");
694654 kfree(ctx->vm_pd);
695655 ctx->vm_pd = NULL;
656
+ return ret;
696657 }
697658
698
- return ret;
659
+ ctx->reserved = true;
660
+ return 0;
699661 }
700662
701663 /**
....@@ -741,11 +703,9 @@
741703 return -ENOMEM;
742704 }
743705
744
- ctx->kfd_bo.robj = bo;
745706 ctx->kfd_bo.priority = 0;
746707 ctx->kfd_bo.tv.bo = &bo->tbo;
747
- ctx->kfd_bo.tv.shared = true;
748
- ctx->kfd_bo.user_pages = NULL;
708
+ ctx->kfd_bo.tv.num_shared = 1;
749709 list_add(&ctx->kfd_bo.tv.head, &ctx->list);
750710
751711 i = 0;
....@@ -762,17 +722,15 @@
762722
763723 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
764724 false, &ctx->duplicates);
765
- if (!ret)
766
- ctx->reserved = true;
767
- else
768
- pr_err("Failed to reserve buffers in ttm.\n");
769
-
770725 if (ret) {
726
+ pr_err("Failed to reserve buffers in ttm.\n");
771727 kfree(ctx->vm_pd);
772728 ctx->vm_pd = NULL;
729
+ return ret;
773730 }
774731
775
- return ret;
732
+ ctx->reserved = true;
733
+ return 0;
776734 }
777735
778736 /**
....@@ -811,25 +769,12 @@
811769 {
812770 struct amdgpu_bo_va *bo_va = entry->bo_va;
813771 struct amdgpu_vm *vm = bo_va->base.vm;
814
- struct amdgpu_bo *pd = vm->root.base.bo;
815772
816
- /* Remove eviction fence from PD (and thereby from PTs too as
817
- * they share the resv. object). Otherwise during PT update
818
- * job (see amdgpu_vm_bo_update_mapping), eviction fence would
819
- * get added to job->sync object and job execution would
820
- * trigger the eviction fence.
821
- */
822
- amdgpu_amdkfd_remove_eviction_fence(pd,
823
- vm->process_info->eviction_fence,
824
- NULL, NULL);
825773 amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
826774
827775 amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
828776
829
- /* Add the eviction fence back */
830
- amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
831
-
832
- sync_vm_fence(adev, sync, bo_va->last_pt_update);
777
+ amdgpu_sync_fence(sync, bo_va->last_pt_update);
833778
834779 return 0;
835780 }
....@@ -839,13 +784,7 @@
839784 struct amdgpu_sync *sync)
840785 {
841786 int ret;
842
- struct amdgpu_vm *vm;
843
- struct amdgpu_bo_va *bo_va;
844
- struct amdgpu_bo *bo;
845
-
846
- bo_va = entry->bo_va;
847
- vm = bo_va->base.vm;
848
- bo = bo_va->base.bo;
787
+ struct amdgpu_bo_va *bo_va = entry->bo_va;
849788
850789 /* Update the page tables */
851790 ret = amdgpu_vm_bo_update(adev, bo_va, false);
....@@ -854,7 +793,7 @@
854793 return ret;
855794 }
856795
857
- return sync_vm_fence(adev, sync, bo_va->last_pt_update);
796
+ return amdgpu_sync_fence(sync, bo_va->last_pt_update);
858797 }
859798
860799 static int map_bo_to_gpuvm(struct amdgpu_device *adev,
....@@ -889,6 +828,24 @@
889828 return ret;
890829 }
891830
831
+static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
832
+{
833
+ struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
834
+
835
+ if (!sg)
836
+ return NULL;
837
+ if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
838
+ kfree(sg);
839
+ return NULL;
840
+ }
841
+ sg->sgl->dma_address = addr;
842
+ sg->sgl->length = size;
843
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
844
+ sg->sgl->dma_length = size;
845
+#endif
846
+ return sg;
847
+}
848
+
892849 static int process_validate_vms(struct amdkfd_process_info *process_info)
893850 {
894851 struct amdgpu_vm *peer_vm;
....@@ -897,6 +854,26 @@
897854 list_for_each_entry(peer_vm, &process_info->vm_list_head,
898855 vm_list_node) {
899856 ret = vm_validate_pt_pd_bos(peer_vm);
857
+ if (ret)
858
+ return ret;
859
+ }
860
+
861
+ return 0;
862
+}
863
+
864
+static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
865
+ struct amdgpu_sync *sync)
866
+{
867
+ struct amdgpu_vm *peer_vm;
868
+ int ret;
869
+
870
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
871
+ vm_list_node) {
872
+ struct amdgpu_bo *pd = peer_vm->root.base.bo;
873
+
874
+ ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv,
875
+ AMDGPU_SYNC_NE_OWNER,
876
+ AMDGPU_FENCE_OWNER_KFD);
900877 if (ret)
901878 return ret;
902879 }
....@@ -966,9 +943,13 @@
966943 pr_err("validate_pt_pd_bos() failed\n");
967944 goto validate_pd_fail;
968945 }
969
- ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false);
946
+ ret = amdgpu_bo_sync_wait(vm->root.base.bo,
947
+ AMDGPU_FENCE_OWNER_KFD, false);
970948 if (ret)
971949 goto wait_pd_fail;
950
+ ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1);
951
+ if (ret)
952
+ goto reserve_shared_fail;
972953 amdgpu_bo_fence(vm->root.base.bo,
973954 &vm->process_info->eviction_fence->base, true);
974955 amdgpu_bo_unreserve(vm->root.base.bo);
....@@ -982,6 +963,7 @@
982963
983964 return 0;
984965
966
+reserve_shared_fail:
985967 wait_pd_fail:
986968 validate_pd_fail:
987969 amdgpu_bo_unreserve(vm->root.base.bo);
....@@ -1001,8 +983,8 @@
1001983 return ret;
1002984 }
1003985
1004
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
1005
- void **process_info,
986
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, u32 pasid,
987
+ void **vm, void **process_info,
1006988 struct dma_fence **ef)
1007989 {
1008990 struct amdgpu_device *adev = get_amdgpu_device(kgd);
....@@ -1014,7 +996,7 @@
1014996 return -ENOMEM;
1015997
1016998 /* Initialize AMDGPU part of the VM */
1017
- ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
999
+ ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid);
10181000 if (ret) {
10191001 pr_err("Failed init vm ret %d\n", ret);
10201002 goto amdgpu_vm_init_fail;
....@@ -1037,22 +1019,26 @@
10371019 }
10381020
10391021 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
1040
- struct file *filp,
1022
+ struct file *filp, u32 pasid,
10411023 void **vm, void **process_info,
10421024 struct dma_fence **ef)
10431025 {
10441026 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1045
- struct drm_file *drm_priv = filp->private_data;
1046
- struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
1047
- struct amdgpu_vm *avm = &drv_priv->vm;
1027
+ struct amdgpu_fpriv *drv_priv;
1028
+ struct amdgpu_vm *avm;
10481029 int ret;
1030
+
1031
+ ret = amdgpu_file_to_fpriv(filp, &drv_priv);
1032
+ if (ret)
1033
+ return ret;
1034
+ avm = &drv_priv->vm;
10491035
10501036 /* Already a compute VM? */
10511037 if (avm->process_info)
10521038 return -EINVAL;
10531039
10541040 /* Convert VM into a compute VM */
1055
- ret = amdgpu_vm_make_compute(adev, avm);
1041
+ ret = amdgpu_vm_make_compute(adev, avm, pasid);
10561042 if (ret)
10571043 return ret;
10581044
....@@ -1086,6 +1072,8 @@
10861072 list_del(&vm->vm_list_node);
10871073 mutex_unlock(&process_info->lock);
10881074
1075
+ vm->process_info = NULL;
1076
+
10891077 /* Release per-process resources when last compute VM is destroyed */
10901078 if (!process_info->n_vms) {
10911079 WARN_ON(!list_empty(&process_info->kfd_bo_list));
....@@ -1115,11 +1103,34 @@
11151103 kfree(vm);
11161104 }
11171105
1118
-uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
1106
+void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
1107
+{
1108
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
1109
+ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1110
+
1111
+ if (WARN_ON(!kgd || !vm))
1112
+ return;
1113
+
1114
+ pr_debug("Releasing process vm %p\n", vm);
1115
+
1116
+ /* The original pasid of amdgpu vm has already been
1117
+ * released during making a amdgpu vm to a compute vm
1118
+ * The current pasid is managed by kfd and will be
1119
+ * released on kfd process destroy. Set amdgpu pasid
1120
+ * to 0 to avoid duplicate release.
1121
+ */
1122
+ amdgpu_vm_release_compute(adev, avm);
1123
+}
1124
+
1125
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
11191126 {
11201127 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1128
+ struct amdgpu_bo *pd = avm->root.base.bo;
1129
+ struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
11211130
1122
- return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
1131
+ if (adev->asic_type < CHIP_VEGA10)
1132
+ return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
1133
+ return avm->pd_phys_addr;
11231134 }
11241135
11251136 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
....@@ -1129,44 +1140,57 @@
11291140 {
11301141 struct amdgpu_device *adev = get_amdgpu_device(kgd);
11311142 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1143
+ enum ttm_bo_type bo_type = ttm_bo_type_device;
1144
+ struct sg_table *sg = NULL;
11321145 uint64_t user_addr = 0;
11331146 struct amdgpu_bo *bo;
11341147 struct amdgpu_bo_param bp;
1135
- int byte_align;
11361148 u32 domain, alloc_domain;
11371149 u64 alloc_flags;
1138
- uint32_t mapping_flags;
11391150 int ret;
11401151
11411152 /*
11421153 * Check on which domain to allocate BO
11431154 */
1144
- if (flags & ALLOC_MEM_FLAGS_VRAM) {
1155
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
11451156 domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
1146
- alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
1147
- alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
1157
+ alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
1158
+ alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
11481159 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
11491160 AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
1150
- } else if (flags & ALLOC_MEM_FLAGS_GTT) {
1161
+ } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
11511162 domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
11521163 alloc_flags = 0;
1153
- } else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
1164
+ } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
11541165 domain = AMDGPU_GEM_DOMAIN_GTT;
11551166 alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
11561167 alloc_flags = 0;
11571168 if (!offset || !*offset)
11581169 return -EINVAL;
11591170 user_addr = untagged_addr(*offset);
1171
+ } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
1172
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
1173
+ domain = AMDGPU_GEM_DOMAIN_GTT;
1174
+ alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
1175
+ bo_type = ttm_bo_type_sg;
1176
+ alloc_flags = 0;
1177
+ if (size > UINT_MAX)
1178
+ return -EINVAL;
1179
+ sg = create_doorbell_sg(*offset, size);
1180
+ if (!sg)
1181
+ return -ENOMEM;
11601182 } else {
11611183 return -EINVAL;
11621184 }
11631185
11641186 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
1165
- if (!*mem)
1166
- return -ENOMEM;
1187
+ if (!*mem) {
1188
+ ret = -ENOMEM;
1189
+ goto err;
1190
+ }
11671191 INIT_LIST_HEAD(&(*mem)->bo_va_list);
11681192 mutex_init(&(*mem)->lock);
1169
- (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
1193
+ (*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
11701194
11711195 /* Workaround for AQL queue wraparound bug. Map the same
11721196 * memory twice. That means we only actually allocate half
....@@ -1175,30 +1199,14 @@
11751199 if ((*mem)->aql_queue)
11761200 size = size >> 1;
11771201
1178
- /* Workaround for TLB bug on older VI chips */
1179
- byte_align = (adev->family == AMDGPU_FAMILY_VI &&
1180
- adev->asic_type != CHIP_FIJI &&
1181
- adev->asic_type != CHIP_POLARIS10 &&
1182
- adev->asic_type != CHIP_POLARIS11) ?
1183
- VI_BO_SIZE_ALIGN : 1;
1184
-
1185
- mapping_flags = AMDGPU_VM_PAGE_READABLE;
1186
- if (flags & ALLOC_MEM_FLAGS_WRITABLE)
1187
- mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
1188
- if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
1189
- mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
1190
- if (flags & ALLOC_MEM_FLAGS_COHERENT)
1191
- mapping_flags |= AMDGPU_VM_MTYPE_UC;
1192
- else
1193
- mapping_flags |= AMDGPU_VM_MTYPE_NC;
1194
- (*mem)->mapping_flags = mapping_flags;
1202
+ (*mem)->alloc_flags = flags;
11951203
11961204 amdgpu_sync_create(&(*mem)->sync);
11971205
1198
- ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
1206
+ ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
11991207 if (ret) {
12001208 pr_debug("Insufficient system memory\n");
1201
- goto err_reserve_system_mem;
1209
+ goto err_reserve_limit;
12021210 }
12031211
12041212 pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
....@@ -1206,16 +1214,20 @@
12061214
12071215 memset(&bp, 0, sizeof(bp));
12081216 bp.size = size;
1209
- bp.byte_align = byte_align;
1217
+ bp.byte_align = 1;
12101218 bp.domain = alloc_domain;
12111219 bp.flags = alloc_flags;
1212
- bp.type = ttm_bo_type_device;
1220
+ bp.type = bo_type;
12131221 bp.resv = NULL;
12141222 ret = amdgpu_bo_create(adev, &bp, &bo);
12151223 if (ret) {
12161224 pr_debug("Failed to create BO on domain %s. ret %d\n",
12171225 domain_string(alloc_domain), ret);
12181226 goto err_bo_create;
1227
+ }
1228
+ if (bo_type == ttm_bo_type_sg) {
1229
+ bo->tbo.sg = sg;
1230
+ bo->tbo.ttm->sg = sg;
12191231 }
12201232 bo->kfd_bo = *mem;
12211233 (*mem)->bo = bo;
....@@ -1229,13 +1241,9 @@
12291241 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
12301242
12311243 if (user_addr) {
1232
- ret = init_user_pages(*mem, current->mm, user_addr);
1233
- if (ret) {
1234
- mutex_lock(&avm->process_info->lock);
1235
- list_del(&(*mem)->validate_list.head);
1236
- mutex_unlock(&avm->process_info->lock);
1244
+ ret = init_user_pages(*mem, user_addr);
1245
+ if (ret)
12371246 goto allocate_init_user_pages_failed;
1238
- }
12391247 }
12401248
12411249 if (offset)
....@@ -1244,43 +1252,48 @@
12441252 return 0;
12451253
12461254 allocate_init_user_pages_failed:
1255
+ remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
12471256 amdgpu_bo_unref(&bo);
12481257 /* Don't unreserve system mem limit twice */
1249
- goto err_reserve_system_mem;
1258
+ goto err_reserve_limit;
12501259 err_bo_create:
1251
- unreserve_system_mem_limit(adev, size, alloc_domain);
1252
-err_reserve_system_mem:
1260
+ unreserve_mem_limit(adev, size, alloc_domain, !!sg);
1261
+err_reserve_limit:
12531262 mutex_destroy(&(*mem)->lock);
12541263 kfree(*mem);
1264
+err:
1265
+ if (sg) {
1266
+ sg_free_table(sg);
1267
+ kfree(sg);
1268
+ }
12551269 return ret;
12561270 }
12571271
12581272 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
1259
- struct kgd_dev *kgd, struct kgd_mem *mem)
1273
+ struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
12601274 {
12611275 struct amdkfd_process_info *process_info = mem->process_info;
12621276 unsigned long bo_size = mem->bo->tbo.mem.size;
12631277 struct kfd_bo_va_list *entry, *tmp;
12641278 struct bo_vm_reservation_context ctx;
12651279 struct ttm_validate_buffer *bo_list_entry;
1280
+ unsigned int mapped_to_gpu_memory;
12661281 int ret;
1282
+ bool is_imported = 0;
12671283
12681284 mutex_lock(&mem->lock);
1269
-
1270
- if (mem->mapped_to_gpu_memory > 0) {
1271
- pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
1272
- mem->va, bo_size);
1273
- mutex_unlock(&mem->lock);
1274
- return -EBUSY;
1275
- }
1276
-
1285
+ mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
1286
+ is_imported = mem->is_imported;
12771287 mutex_unlock(&mem->lock);
12781288 /* lock is not needed after this, since mem is unused and will
12791289 * be freed anyway
12801290 */
12811291
1282
- /* No more MMU notifiers */
1283
- amdgpu_mn_unregister(mem->bo);
1292
+ if (mapped_to_gpu_memory > 0) {
1293
+ pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
1294
+ mem->va, bo_size);
1295
+ return -EBUSY;
1296
+ }
12841297
12851298 /* Make sure restore workers don't access the BO any more */
12861299 bo_list_entry = &mem->validate_list;
....@@ -1288,14 +1301,8 @@
12881301 list_del(&bo_list_entry->head);
12891302 mutex_unlock(&process_info->lock);
12901303
1291
- /* Free user pages if necessary */
1292
- if (mem->user_pages) {
1293
- pr_debug("%s: Freeing user_pages array\n", __func__);
1294
- if (mem->user_pages[0])
1295
- release_pages(mem->user_pages,
1296
- mem->bo->tbo.ttm->num_pages);
1297
- kvfree(mem->user_pages);
1298
- }
1304
+ /* No more MMU notifiers */
1305
+ amdgpu_mn_unregister(mem->bo);
12991306
13001307 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
13011308 if (unlikely(ret))
....@@ -1306,8 +1313,7 @@
13061313 * attached
13071314 */
13081315 amdgpu_amdkfd_remove_eviction_fence(mem->bo,
1309
- process_info->eviction_fence,
1310
- NULL, NULL);
1316
+ process_info->eviction_fence);
13111317 pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
13121318 mem->va + bo_size * (1 + mem->aql_queue));
13131319
....@@ -1321,8 +1327,27 @@
13211327 /* Free the sync object */
13221328 amdgpu_sync_free(&mem->sync);
13231329
1330
+ /* If the SG is not NULL, it's one we created for a doorbell or mmio
1331
+ * remap BO. We need to free it.
1332
+ */
1333
+ if (mem->bo->tbo.sg) {
1334
+ sg_free_table(mem->bo->tbo.sg);
1335
+ kfree(mem->bo->tbo.sg);
1336
+ }
1337
+
1338
+ /* Update the size of the BO being freed if it was allocated from
1339
+ * VRAM and is not imported.
1340
+ */
1341
+ if (size) {
1342
+ if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
1343
+ (!is_imported))
1344
+ *size = bo_size;
1345
+ else
1346
+ *size = 0;
1347
+ }
1348
+
13241349 /* Free the BO*/
1325
- amdgpu_bo_unref(&mem->bo);
1350
+ drm_gem_object_put(&mem->bo->tbo.base);
13261351 mutex_destroy(&mem->lock);
13271352 kfree(mem);
13281353
....@@ -1361,9 +1386,9 @@
13611386 * concurrently and the queues are actually stopped
13621387 */
13631388 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
1364
- down_write(&current->mm->mmap_sem);
1389
+ mmap_write_lock(current->mm);
13651390 is_invalid_userptr = atomic_read(&mem->invalid);
1366
- up_write(&current->mm->mmap_sem);
1391
+ mmap_write_unlock(current->mm);
13671392 }
13681393
13691394 mutex_lock(&mem->lock);
....@@ -1385,7 +1410,8 @@
13851410 * the queues are still stopped and we can leave mapping for
13861411 * the next restore worker
13871412 */
1388
- if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
1413
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
1414
+ bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
13891415 is_invalid_userptr = true;
13901416
13911417 if (check_if_add_bo_to_vm(avm, mem)) {
....@@ -1427,7 +1453,7 @@
14271453 ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
14281454 is_invalid_userptr);
14291455 if (ret) {
1430
- pr_err("Failed to map radeon bo to gpuvm\n");
1456
+ pr_err("Failed to map bo to gpuvm\n");
14311457 goto map_bo_to_gpuvm_failed;
14321458 }
14331459
....@@ -1525,8 +1551,7 @@
15251551 if (mem->mapped_to_gpu_memory == 0 &&
15261552 !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
15271553 amdgpu_amdkfd_remove_eviction_fence(mem->bo,
1528
- process_info->eviction_fence,
1529
- NULL, NULL);
1554
+ process_info->eviction_fence);
15301555
15311556 unreserve_out:
15321557 unreserve_bo_and_vms(&ctx, false, false);
....@@ -1587,7 +1612,7 @@
15871612 }
15881613
15891614 amdgpu_amdkfd_remove_eviction_fence(
1590
- bo, mem->process_info->eviction_fence, NULL, NULL);
1615
+ bo, mem->process_info->eviction_fence);
15911616 list_del_init(&mem->validate_list.head);
15921617
15931618 if (size)
....@@ -1622,6 +1647,65 @@
16221647 return 0;
16231648 }
16241649
1650
+int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
1651
+ struct dma_buf *dma_buf,
1652
+ uint64_t va, void *vm,
1653
+ struct kgd_mem **mem, uint64_t *size,
1654
+ uint64_t *mmap_offset)
1655
+{
1656
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
1657
+ struct drm_gem_object *obj;
1658
+ struct amdgpu_bo *bo;
1659
+ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1660
+
1661
+ if (dma_buf->ops != &amdgpu_dmabuf_ops)
1662
+ /* Can't handle non-graphics buffers */
1663
+ return -EINVAL;
1664
+
1665
+ obj = dma_buf->priv;
1666
+ if (drm_to_adev(obj->dev) != adev)
1667
+ /* Can't handle buffers from other devices */
1668
+ return -EINVAL;
1669
+
1670
+ bo = gem_to_amdgpu_bo(obj);
1671
+ if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
1672
+ AMDGPU_GEM_DOMAIN_GTT)))
1673
+ /* Only VRAM and GTT BOs are supported */
1674
+ return -EINVAL;
1675
+
1676
+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
1677
+ if (!*mem)
1678
+ return -ENOMEM;
1679
+
1680
+ if (size)
1681
+ *size = amdgpu_bo_size(bo);
1682
+
1683
+ if (mmap_offset)
1684
+ *mmap_offset = amdgpu_bo_mmap_offset(bo);
1685
+
1686
+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
1687
+ mutex_init(&(*mem)->lock);
1688
+
1689
+ (*mem)->alloc_flags =
1690
+ ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
1691
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
1692
+ | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
1693
+ | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
1694
+
1695
+ drm_gem_object_get(&bo->tbo.base);
1696
+ (*mem)->bo = bo;
1697
+ (*mem)->va = va;
1698
+ (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
1699
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
1700
+ (*mem)->mapped_to_gpu_memory = 0;
1701
+ (*mem)->process_info = avm->process_info;
1702
+ add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
1703
+ amdgpu_sync_create(&(*mem)->sync);
1704
+ (*mem)->is_imported = true;
1705
+
1706
+ return 0;
1707
+}
1708
+
16251709 /* Evict a userptr BO by stopping the queues if necessary
16261710 *
16271711 * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
....@@ -1637,14 +1721,14 @@
16371721 struct mm_struct *mm)
16381722 {
16391723 struct amdkfd_process_info *process_info = mem->process_info;
1640
- int invalid, evicted_bos;
1724
+ int evicted_bos;
16411725 int r = 0;
16421726
1643
- invalid = atomic_inc_return(&mem->invalid);
1727
+ atomic_inc(&mem->invalid);
16441728 evicted_bos = atomic_inc_return(&process_info->evicted_bos);
16451729 if (evicted_bos == 1) {
16461730 /* First eviction, stop the queues */
1647
- r = kgd2kfd->quiesce_mm(mm);
1731
+ r = kgd2kfd_quiesce_mm(mm);
16481732 if (r)
16491733 pr_err("Failed to quiesce KFD\n");
16501734 schedule_delayed_work(&process_info->restore_userptr_work,
....@@ -1709,36 +1793,24 @@
17091793
17101794 bo = mem->bo;
17111795
1712
- if (!mem->user_pages) {
1713
- mem->user_pages =
1714
- kvmalloc_array(bo->tbo.ttm->num_pages,
1715
- sizeof(struct page *),
1716
- GFP_KERNEL | __GFP_ZERO);
1717
- if (!mem->user_pages) {
1718
- pr_err("%s: Failed to allocate pages array\n",
1719
- __func__);
1720
- return -ENOMEM;
1721
- }
1722
- } else if (mem->user_pages[0]) {
1723
- release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
1796
+ /* Get updated user pages */
1797
+ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
1798
+ if (ret) {
1799
+ pr_debug("%s: Failed to get user pages: %d\n",
1800
+ __func__, ret);
1801
+
1802
+ /* Return error -EBUSY or -ENOMEM, retry restore */
1803
+ return ret;
17241804 }
17251805
1726
- /* Get updated user pages */
1727
- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
1728
- mem->user_pages);
1729
- if (ret) {
1730
- mem->user_pages[0] = NULL;
1731
- pr_info("%s: Failed to get user pages: %d\n",
1732
- __func__, ret);
1733
- /* Pretend it succeeded. It will fail later
1734
- * with a VM fault if the GPU tries to access
1735
- * it. Better than hanging indefinitely with
1736
- * stalled user mode queues.
1737
- */
1738
- }
1806
+ /*
1807
+ * FIXME: Cannot ignore the return code, must hold
1808
+ * notifier_lock
1809
+ */
1810
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
17391811
17401812 /* Mark the BO as valid unless it was invalidated
1741
- * again concurrently
1813
+ * again concurrently.
17421814 */
17431815 if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
17441816 return -EAGAIN;
....@@ -1771,7 +1843,8 @@
17711843 GFP_KERNEL);
17721844 if (!pd_bo_list_entries) {
17731845 pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
1774
- return -ENOMEM;
1846
+ ret = -ENOMEM;
1847
+ goto out_no_mem;
17751848 }
17761849
17771850 INIT_LIST_HEAD(&resv_list);
....@@ -1788,26 +1861,16 @@
17881861 validate_list.head) {
17891862 list_add_tail(&mem->resv_list.head, &resv_list);
17901863 mem->resv_list.bo = mem->validate_list.bo;
1791
- mem->resv_list.shared = mem->validate_list.shared;
1864
+ mem->resv_list.num_shared = mem->validate_list.num_shared;
17921865 }
17931866
17941867 /* Reserve all BOs and page tables for validation */
17951868 ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
17961869 WARN(!list_empty(&duplicates), "Duplicates should be empty");
17971870 if (ret)
1798
- goto out;
1871
+ goto out_free;
17991872
18001873 amdgpu_sync_create(&sync);
1801
-
1802
- /* Avoid triggering eviction fences when unmapping invalid
1803
- * userptr BOs (waits for all fences, doesn't use
1804
- * FENCE_OWNER_VM)
1805
- */
1806
- list_for_each_entry(peer_vm, &process_info->vm_list_head,
1807
- vm_list_node)
1808
- amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
1809
- process_info->eviction_fence,
1810
- NULL, NULL);
18111874
18121875 ret = process_validate_vms(process_info);
18131876 if (ret)
....@@ -1821,10 +1884,8 @@
18211884
18221885 bo = mem->bo;
18231886
1824
- /* Copy pages array and validate the BO if we got user pages */
1825
- if (mem->user_pages[0]) {
1826
- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
1827
- mem->user_pages);
1887
+ /* Validate the BO if we got user pages */
1888
+ if (bo->tbo.ttm->pages[0]) {
18281889 amdgpu_bo_placement_from_domain(bo, mem->domain);
18291890 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
18301891 if (ret) {
....@@ -1833,13 +1894,6 @@
18331894 }
18341895 }
18351896
1836
- /* Validate succeeded, now the BO owns the pages, free
1837
- * our copy of the pointer array. Put this BO back on
1838
- * the userptr_valid_list. If we need to revalidate
1839
- * it, we need to start from scratch.
1840
- */
1841
- kvfree(mem->user_pages);
1842
- mem->user_pages = NULL;
18431897 list_move_tail(&mem->validate_list.head,
18441898 &process_info->userptr_valid_list);
18451899
....@@ -1869,15 +1923,12 @@
18691923 ret = process_update_pds(process_info, &sync);
18701924
18711925 unreserve_out:
1872
- list_for_each_entry(peer_vm, &process_info->vm_list_head,
1873
- vm_list_node)
1874
- amdgpu_bo_fence(peer_vm->root.base.bo,
1875
- &process_info->eviction_fence->base, true);
18761926 ttm_eu_backoff_reservation(&ticket, &resv_list);
18771927 amdgpu_sync_wait(&sync, false);
18781928 amdgpu_sync_free(&sync);
1879
-out:
1929
+out_free:
18801930 kfree(pd_bo_list_entries);
1931
+out_no_mem:
18811932
18821933 return ret;
18831934 }
....@@ -1936,12 +1987,13 @@
19361987 evicted_bos)
19371988 goto unlock_out;
19381989 evicted_bos = 0;
1939
- if (kgd2kfd->resume_mm(mm)) {
1990
+ if (kgd2kfd_resume_mm(mm)) {
19401991 pr_err("%s: Failed to resume KFD\n", __func__);
19411992 /* No recovery from this failure. Probably the CP is
19421993 * hanging. No point trying again.
19431994 */
19441995 }
1996
+
19451997 unlock_out:
19461998 mutex_unlock(&process_info->lock);
19471999 mmput(mm);
....@@ -2007,7 +2059,7 @@
20072059
20082060 list_add_tail(&mem->resv_list.head, &ctx.list);
20092061 mem->resv_list.bo = mem->validate_list.bo;
2010
- mem->resv_list.shared = mem->validate_list.shared;
2062
+ mem->resv_list.num_shared = mem->validate_list.num_shared;
20112063 }
20122064
20132065 ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
....@@ -2024,13 +2076,10 @@
20242076 if (ret)
20252077 goto validate_map_fail;
20262078
2027
- /* Wait for PD/PTs validate to finish */
2028
- /* FIXME: I think this isn't needed */
2029
- list_for_each_entry(peer_vm, &process_info->vm_list_head,
2030
- vm_list_node) {
2031
- struct amdgpu_bo *bo = peer_vm->root.base.bo;
2032
-
2033
- ttm_bo_wait(&bo->tbo, false, false);
2079
+ ret = process_sync_pds_resv(process_info, &sync_obj);
2080
+ if (ret) {
2081
+ pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
2082
+ goto validate_map_fail;
20342083 }
20352084
20362085 /* Validate BOs and map them to GPUVM (update VM page tables). */
....@@ -2046,7 +2095,11 @@
20462095 pr_debug("Memory eviction: Validate BOs failed. Try again\n");
20472096 goto validate_map_fail;
20482097 }
2049
-
2098
+ ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
2099
+ if (ret) {
2100
+ pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
2101
+ goto validate_map_fail;
2102
+ }
20502103 list_for_each_entry(bo_va_entry, &mem->bo_va_list,
20512104 bo_list) {
20522105 ret = update_gpuvm_pte((struct amdgpu_device *)
....@@ -2067,6 +2120,7 @@
20672120 goto validate_map_fail;
20682121 }
20692122
2123
+ /* Wait for validate and PT updates to finish */
20702124 amdgpu_sync_wait(&sync_obj, false);
20712125
20722126 /* Release old eviction fence and create new one, because fence only
....@@ -2085,10 +2139,7 @@
20852139 process_info->eviction_fence = new_fence;
20862140 *ef = dma_fence_get(&new_fence->base);
20872141
2088
- /* Wait for validate to finish and attach new eviction fence */
2089
- list_for_each_entry(mem, &process_info->kfd_bo_list,
2090
- validate_list.head)
2091
- ttm_bo_wait(&mem->bo->tbo, false, false);
2142
+ /* Attach new eviction fence to all BOs */
20922143 list_for_each_entry(mem, &process_info->kfd_bo_list,
20932144 validate_list.head)
20942145 amdgpu_bo_fence(mem->bo,
....@@ -2110,3 +2161,115 @@
21102161 kfree(pd_bo_list);
21112162 return ret;
21122163 }
2164
+
2165
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
2166
+{
2167
+ struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
2168
+ struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
2169
+ int ret;
2170
+
2171
+ if (!info || !gws)
2172
+ return -EINVAL;
2173
+
2174
+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
2175
+ if (!*mem)
2176
+ return -ENOMEM;
2177
+
2178
+ mutex_init(&(*mem)->lock);
2179
+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
2180
+ (*mem)->bo = amdgpu_bo_ref(gws_bo);
2181
+ (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
2182
+ (*mem)->process_info = process_info;
2183
+ add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
2184
+ amdgpu_sync_create(&(*mem)->sync);
2185
+
2186
+
2187
+ /* Validate gws bo the first time it is added to process */
2188
+ mutex_lock(&(*mem)->process_info->lock);
2189
+ ret = amdgpu_bo_reserve(gws_bo, false);
2190
+ if (unlikely(ret)) {
2191
+ pr_err("Reserve gws bo failed %d\n", ret);
2192
+ goto bo_reservation_failure;
2193
+ }
2194
+
2195
+ ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
2196
+ if (ret) {
2197
+ pr_err("GWS BO validate failed %d\n", ret);
2198
+ goto bo_validation_failure;
2199
+ }
2200
+ /* GWS resource is shared b/t amdgpu and amdkfd
2201
+ * Add process eviction fence to bo so they can
2202
+ * evict each other.
2203
+ */
2204
+ ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1);
2205
+ if (ret)
2206
+ goto reserve_shared_fail;
2207
+ amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
2208
+ amdgpu_bo_unreserve(gws_bo);
2209
+ mutex_unlock(&(*mem)->process_info->lock);
2210
+
2211
+ return ret;
2212
+
2213
+reserve_shared_fail:
2214
+bo_validation_failure:
2215
+ amdgpu_bo_unreserve(gws_bo);
2216
+bo_reservation_failure:
2217
+ mutex_unlock(&(*mem)->process_info->lock);
2218
+ amdgpu_sync_free(&(*mem)->sync);
2219
+ remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
2220
+ amdgpu_bo_unref(&gws_bo);
2221
+ mutex_destroy(&(*mem)->lock);
2222
+ kfree(*mem);
2223
+ *mem = NULL;
2224
+ return ret;
2225
+}
2226
+
2227
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
2228
+{
2229
+ int ret;
2230
+ struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
2231
+ struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
2232
+ struct amdgpu_bo *gws_bo = kgd_mem->bo;
2233
+
2234
+ /* Remove BO from process's validate list so restore worker won't touch
2235
+ * it anymore
2236
+ */
2237
+ remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
2238
+
2239
+ ret = amdgpu_bo_reserve(gws_bo, false);
2240
+ if (unlikely(ret)) {
2241
+ pr_err("Reserve gws bo failed %d\n", ret);
2242
+ //TODO add BO back to validate_list?
2243
+ return ret;
2244
+ }
2245
+ amdgpu_amdkfd_remove_eviction_fence(gws_bo,
2246
+ process_info->eviction_fence);
2247
+ amdgpu_bo_unreserve(gws_bo);
2248
+ amdgpu_sync_free(&kgd_mem->sync);
2249
+ amdgpu_bo_unref(&gws_bo);
2250
+ mutex_destroy(&kgd_mem->lock);
2251
+ kfree(mem);
2252
+ return 0;
2253
+}
2254
+
2255
+/* Returns GPU-specific tiling mode information */
2256
+int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
2257
+ struct tile_config *config)
2258
+{
2259
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
2260
+
2261
+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
2262
+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
2263
+ config->num_tile_configs =
2264
+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2265
+ config->macro_tile_config_ptr =
2266
+ adev->gfx.config.macrotile_mode_array;
2267
+ config->num_macro_tile_configs =
2268
+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2269
+
2270
+ /* Those values are not set from GFX9 onwards */
2271
+ config->num_banks = adev->gfx.config.num_banks;
2272
+ config->num_ranks = adev->gfx.config.num_ranks;
2273
+
2274
+ return 0;
2275
+}