~hc/RK356X_SDK_RELEASE.git

..	..	@@ -19,21 +19,17 @@
19	19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20	20	* OTHER DEALINGS IN THE SOFTWARE.
21	21	*/
22		-
23		-#define pr_fmt(fmt) "kfd2kgd: " fmt
24		-
	22	+#include <linux/dma-buf.h>
25	23	#include <linux/list.h>
26	24	#include <linux/pagemap.h>
27	25	#include <linux/sched/mm.h>
28		-#include <drm/drmP.h>
	26	+#include <linux/sched/task.h>
	27	+
29	28	#include "amdgpu_object.h"
30	29	#include "amdgpu_vm.h"
31	30	#include "amdgpu_amdkfd.h"
32		-
33		-/* Special VM and GART address alignment needed for VI pre-Fiji due to
34		- * a HW bug.
35		- */
36		-#define VI_BO_SIZE_ALIGN (0x8000)
	31	+#include "amdgpu_dma_buf.h"
	32	+#include <uapi/linux/kfd_ioctl.h>
37	33
38	34	/* BO flag to indicate a KFD userptr BO */
39	35	#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
..	..	@@ -46,17 +42,11 @@
46	42	/* Impose limit on how much memory KFD can use */
47	43	static struct {
48	44	uint64_t max_system_mem_limit;
49		- uint64_t max_userptr_mem_limit;
	45	+ uint64_t max_ttm_mem_limit;
50	46	int64_t system_mem_used;
51		- int64_t userptr_mem_used;
	47	+ int64_t ttm_mem_used;
52	48	spinlock_t mem_limit_lock;
53	49	} kfd_mem_limit;
54		-
55		-/* Struct used for amdgpu_amdkfd_bo_validate */
56		-struct amdgpu_vm_parser {
57		- uint32_t domain;
58		- bool wait;
59		-};
60	50
61	51	static const char * const domain_bit_to_string[] = {
62	52	"CPU",
..	..	@@ -90,8 +80,8 @@
90	80	}
91	81
92	82	/* Set memory usage limits. Current, limits are
93		- * System (kernel) memory - 3/8th System RAM
94		- * Userptr memory - 3/4th System RAM
	83	+ * System (TTM + userptr) memory - 15/16th System RAM
	84	+ * TTM memory - 3/8th System RAM
95	85	*/
96	86	void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
97	87	{
..	..	@@ -103,48 +93,78 @@
103	93	mem *= si.mem_unit;
104	94
105	95	spin_lock_init(&kfd_mem_limit.mem_limit_lock);
106		- kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
107		- kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
108		- pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
	96	+ kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
	97	+ kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
	98	+ pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
109	99	(kfd_mem_limit.max_system_mem_limit >> 20),
110		- (kfd_mem_limit.max_userptr_mem_limit >> 20));
	100	+ (kfd_mem_limit.max_ttm_mem_limit >> 20));
111	101	}
112	102
113		-static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
114		- uint64_t size, u32 domain)
	103	+/* Estimate page table size needed to represent a given memory size
	104	+ *
	105	+ * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
	106	+ * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
	107	+ * of memory (factor 256K, >> 18). ROCm user mode tries to optimize
	108	+ * for 2MB pages for TLB efficiency. However, small allocations and
	109	+ * fragmented system memory still need some 4KB pages. We choose a
	110	+ * compromise that should work in most cases without reserving too
	111	+ * much memory for page tables unnecessarily (factor 16K, >> 14).
	112	+ */
	113	+#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
	114	+
	115	+static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
	116	+ uint64_t size, u32 domain, bool sg)
115	117	{
116		- size_t acc_size;
	118	+ uint64_t reserved_for_pt =
	119	+ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
	120	+ size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
117	121	int ret = 0;
118	122
119	123	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
120	124	sizeof(struct amdgpu_bo));
121	125
122		- spin_lock(&kfd_mem_limit.mem_limit_lock);
	126	+ vram_needed = 0;
123	127	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
124		- if (kfd_mem_limit.system_mem_used + (acc_size + size) >
125		- kfd_mem_limit.max_system_mem_limit) {
126		- ret = -ENOMEM;
127		- goto err_no_mem;
128		- }
129		- kfd_mem_limit.system_mem_used += (acc_size + size);
130		- } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
131		- if ((kfd_mem_limit.system_mem_used + acc_size >
132		- kfd_mem_limit.max_system_mem_limit) \|\|
133		- (kfd_mem_limit.userptr_mem_used + (size + acc_size) >
134		- kfd_mem_limit.max_userptr_mem_limit)) {
135		- ret = -ENOMEM;
136		- goto err_no_mem;
137		- }
138		- kfd_mem_limit.system_mem_used += acc_size;
139		- kfd_mem_limit.userptr_mem_used += size;
	128	+ /* TTM GTT memory */
	129	+ system_mem_needed = acc_size + size;
	130	+ ttm_mem_needed = acc_size + size;
	131	+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
	132	+ /* Userptr */
	133	+ system_mem_needed = acc_size + size;
	134	+ ttm_mem_needed = acc_size;
	135	+ } else {
	136	+ /* VRAM and SG */
	137	+ system_mem_needed = acc_size;
	138	+ ttm_mem_needed = acc_size;
	139	+ if (domain == AMDGPU_GEM_DOMAIN_VRAM)
	140	+ vram_needed = size;
140	141	}
141		-err_no_mem:
	142	+
	143	+ spin_lock(&kfd_mem_limit.mem_limit_lock);
	144	+
	145	+ if (kfd_mem_limit.system_mem_used + system_mem_needed >
	146	+ kfd_mem_limit.max_system_mem_limit)
	147	+ pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
	148	+
	149	+ if ((kfd_mem_limit.system_mem_used + system_mem_needed >
	150	+ kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) \|\|
	151	+ (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
	152	+ kfd_mem_limit.max_ttm_mem_limit) \|\|
	153	+ (adev->kfd.vram_used + vram_needed >
	154	+ adev->gmc.real_vram_size - reserved_for_pt)) {
	155	+ ret = -ENOMEM;
	156	+ } else {
	157	+ kfd_mem_limit.system_mem_used += system_mem_needed;
	158	+ kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
	159	+ adev->kfd.vram_used += vram_needed;
	160	+ }
	161	+
142	162	spin_unlock(&kfd_mem_limit.mem_limit_lock);
143	163	return ret;
144	164	}
145	165
146		-static void unreserve_system_mem_limit(struct amdgpu_device *adev,
147		- uint64_t size, u32 domain)
	166	+static void unreserve_mem_limit(struct amdgpu_device *adev,
	167	+ uint64_t size, u32 domain, bool sg)
148	168	{
149	169	size_t acc_size;
150	170
..	..	@@ -154,71 +174,62 @@
154	174	spin_lock(&kfd_mem_limit.mem_limit_lock);
155	175	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
156	176	kfd_mem_limit.system_mem_used -= (acc_size + size);
157		- } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
	177	+ kfd_mem_limit.ttm_mem_used -= (acc_size + size);
	178	+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
	179	+ kfd_mem_limit.system_mem_used -= (acc_size + size);
	180	+ kfd_mem_limit.ttm_mem_used -= acc_size;
	181	+ } else {
158	182	kfd_mem_limit.system_mem_used -= acc_size;
159		- kfd_mem_limit.userptr_mem_used -= size;
	183	+ kfd_mem_limit.ttm_mem_used -= acc_size;
	184	+ if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
	185	+ adev->kfd.vram_used -= size;
	186	+ WARN_ONCE(adev->kfd.vram_used < 0,
	187	+ "kfd VRAM memory accounting unbalanced");
	188	+ }
160	189	}
161	190	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
162	191	"kfd system memory accounting unbalanced");
163		- WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
164		- "kfd userptr memory accounting unbalanced");
	192	+ WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
	193	+ "kfd TTM memory accounting unbalanced");
165	194
166	195	spin_unlock(&kfd_mem_limit.mem_limit_lock);
167	196	}
168	197
169		-void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
	198	+void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
170	199	{
171		- spin_lock(&kfd_mem_limit.mem_limit_lock);
	200	+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
	201	+ u32 domain = bo->preferred_domains;
	202	+ bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
172	203
173	204	if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
174		- kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
175		- kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
176		- } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
177		- kfd_mem_limit.system_mem_used -=
178		- (bo->tbo.acc_size + amdgpu_bo_size(bo));
	205	+ domain = AMDGPU_GEM_DOMAIN_CPU;
	206	+ sg = false;
179	207	}
180		- WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
181		- "kfd system memory accounting unbalanced");
182		- WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
183		- "kfd userptr memory accounting unbalanced");
184	208
185		- spin_unlock(&kfd_mem_limit.mem_limit_lock);
	209	+ unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
186	210	}
187	211
188	212
189		-/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's
	213	+/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
190	214	* reservation object.
191	215	*
192	216	* @bo: [IN] Remove eviction fence(s) from this BO
193		- * @ef: [IN] If ef is specified, then this eviction fence is removed if it
	217	+ * @ef: [IN] This eviction fence is removed if it
194	218	* is present in the shared list.
195		- * @ef_list: [OUT] Returns list of eviction fences. These fences are removed
196		- * from BO's reservation object shared list.
197		- * @ef_count: [OUT] Number of fences in ef_list.
198	219	*
199		- * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be
200		- * called to restore the eviction fences and to avoid memory leak. This is
201		- * useful for shared BOs.
202	220	* NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
203	221	*/
204	222	static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
205		- struct amdgpu_amdkfd_fence *ef,
206		- struct amdgpu_amdkfd_fence ***ef_list,
207		- unsigned int *ef_count)
	223	+ struct amdgpu_amdkfd_fence *ef)
208	224	{
209		- struct reservation_object *resv = bo->tbo.resv;
210		- struct reservation_object_list old, new;
	225	+ struct dma_resv *resv = bo->tbo.base.resv;
	226	+ struct dma_resv_list old, new;
211	227	unsigned int i, j, k;
212	228
213		- if (!ef && !ef_list)
	229	+ if (!ef)
214	230	return -EINVAL;
215	231
216		- if (ef_list) {
217		- *ef_list = NULL;
218		- *ef_count = 0;
219		- }
220		-
221		- old = reservation_object_get_list(resv);
	232	+ old = dma_resv_get_list(resv);
222	233	if (!old)
223	234	return 0;
224	235
..	..	@@ -234,10 +245,9 @@
234	245	struct dma_fence *f;
235	246
236	247	f = rcu_dereference_protected(old->shared[i],
237		- reservation_object_held(resv));
	248	+ dma_resv_held(resv));
238	249
239		- if ((ef && f->context == ef->base.context) \|\|
240		- (!ef && to_amdgpu_amdkfd_fence(f)))
	250	+ if (f->context == ef->base.context)
241	251	RCU_INIT_POINTER(new->shared[--j], f);
242	252	else
243	253	RCU_INIT_POINTER(new->shared[k++], f);
..	..	@@ -245,70 +255,58 @@
245	255	new->shared_max = old->shared_max;
246	256	new->shared_count = k;
247	257
248		- if (!ef) {
249		- unsigned int count = old->shared_count - j;
250		-
251		- /* Alloc memory for count number of eviction fence pointers.
252		- * Fill the ef_list array and ef_count
253		- */
254		- ef_list = kcalloc(count, sizeof(*ef_list), GFP_KERNEL);
255		- *ef_count = count;
256		-
257		- if (!*ef_list) {
258		- kfree(new);
259		- return -ENOMEM;
260		- }
261		- }
262		-
263	258	/* Install the new fence list, seqcount provides the barriers */
264		- write_seqlock(&resv->seq);
	259	+ write_seqcount_begin(&resv->seq);
265	260	RCU_INIT_POINTER(resv->fence, new);
266		- write_sequnlock(&resv->seq);
	261	+ write_seqcount_end(&resv->seq);
267	262
268	263	/* Drop the references to the removed fences or move them to ef_list */
269	264	for (i = j, k = 0; i < old->shared_count; ++i) {
270	265	struct dma_fence *f;
271	266
272	267	f = rcu_dereference_protected(new->shared[i],
273		- reservation_object_held(resv));
274		- if (!ef)
275		- (*ef_list)[k++] = to_amdgpu_amdkfd_fence(f);
276		- else
277		- dma_fence_put(f);
	268	+ dma_resv_held(resv));
	269	+ dma_fence_put(f);
278	270	}
279	271	kfree_rcu(old, rcu);
280	272
281	273	return 0;
282	274	}
283	275
284		-/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's
285		- * reservation object.
286		- *
287		- * @bo: [IN] Add eviction fences to this BO
288		- * @ef_list: [IN] List of eviction fences to be added
289		- * @ef_count: [IN] Number of fences in ef_list.
290		- *
291		- * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this
292		- * function.
293		- */
294		-static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
295		- struct amdgpu_amdkfd_fence **ef_list,
296		- unsigned int ef_count)
	276	+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
297	277	{
298		- int i;
	278	+ struct amdgpu_bo *root = bo;
	279	+ struct amdgpu_vm_bo_base *vm_bo;
	280	+ struct amdgpu_vm *vm;
	281	+ struct amdkfd_process_info *info;
	282	+ struct amdgpu_amdkfd_fence *ef;
	283	+ int ret;
299	284
300		- if (!ef_list \|\| !ef_count)
301		- return;
	285	+ /* we can always get vm_bo from root PD bo.*/
	286	+ while (root->parent)
	287	+ root = root->parent;
302	288
303		- for (i = 0; i < ef_count; i++) {
304		- amdgpu_bo_fence(bo, &ef_list[i]->base, true);
305		- /* Re-adding the fence takes an additional reference. Drop that
306		- * reference.
307		- */
308		- dma_fence_put(&ef_list[i]->base);
309		- }
	289	+ vm_bo = root->vm_bo;
	290	+ if (!vm_bo)
	291	+ return 0;
310	292
311		- kfree(ef_list);
	293	+ vm = vm_bo->vm;
	294	+ if (!vm)
	295	+ return 0;
	296	+
	297	+ info = vm->process_info;
	298	+ if (!info \|\| !info->eviction_fence)
	299	+ return 0;
	300	+
	301	+ ef = container_of(dma_fence_get(&info->eviction_fence->base),
	302	+ struct amdgpu_amdkfd_fence, base);
	303	+
	304	+ BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
	305	+ ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
	306	+ dma_resv_unlock(bo->tbo.base.resv);
	307	+
	308	+ dma_fence_put(&ef->base);
	309	+ return ret;
312	310	}
313	311
314	312	static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
..	..	@@ -326,28 +324,16 @@
326	324	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
327	325	if (ret)
328	326	goto validate_fail;
329		- if (wait) {
330		- struct amdgpu_amdkfd_fence **ef_list;
331		- unsigned int ef_count;
332		-
333		- ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list,
334		- &ef_count);
335		- if (ret)
336		- goto validate_fail;
337		-
338		- ttm_bo_wait(&bo->tbo, false, false);
339		- amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count);
340		- }
	327	+ if (wait)
	328	+ amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
341	329
342	330	validate_fail:
343	331	return ret;
344	332	}
345	333
346		-static int amdgpu_amdkfd_validate(void param, struct amdgpu_bo bo)
	334	+static int amdgpu_amdkfd_validate_vm_bo(void _unused, struct amdgpu_bo bo)
347	335	{
348		- struct amdgpu_vm_parser *p = param;
349		-
350		- return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
	336	+ return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
351	337	}
352	338
353	339	/* vm_validate_pt_pd_bos - Validate page table and directory BOs
..	..	@@ -361,56 +347,31 @@
361	347	{
362	348	struct amdgpu_bo *pd = vm->root.base.bo;
363	349	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
364		- struct amdgpu_vm_parser param;
365		- uint64_t addr, flags = AMDGPU_PTE_VALID;
366	350	int ret;
367	351
368		- param.domain = AMDGPU_GEM_DOMAIN_VRAM;
369		- param.wait = false;
370		-
371		- ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
372		- &param);
	352	+ ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL);
373	353	if (ret) {
374		- pr_err("amdgpu: failed to validate PT BOs\n");
	354	+ pr_err("failed to validate PT BOs\n");
375	355	return ret;
376	356	}
377	357
378		- ret = amdgpu_amdkfd_validate(&param, pd);
	358	+ ret = amdgpu_amdkfd_validate_vm_bo(NULL, pd);
379	359	if (ret) {
380		- pr_err("amdgpu: failed to validate PD\n");
	360	+ pr_err("failed to validate PD\n");
381	361	return ret;
382	362	}
383	363
384		- addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
385		- amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
386		- vm->pd_phys_addr = addr;
	364	+ vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
387	365
388	366	if (vm->use_cpu_for_update) {
389	367	ret = amdgpu_bo_kmap(pd, NULL);
390	368	if (ret) {
391		- pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
	369	+ pr_err("failed to kmap PD, ret=%d\n", ret);
392	370	return ret;
393	371	}
394	372	}
395	373
396	374	return 0;
397		-}
398		-
399		-static int sync_vm_fence(struct amdgpu_device adev, struct amdgpu_sync sync,
400		- struct dma_fence *f)
401		-{
402		- int ret = amdgpu_sync_fence(adev, sync, f, false);
403		-
404		- /* Sync objects can't handle multiple GPUs (contexts) updating
405		- * sync->last_vm_update. Fortunately we don't need it for
406		- * KFD's purposes, so we can just drop that fence.
407		- */
408		- if (sync->last_vm_update) {
409		- dma_fence_put(sync->last_vm_update);
410		- sync->last_vm_update = NULL;
411		- }
412		-
413		- return ret;
414	375	}
415	376
416	377	static int vm_update_pds(struct amdgpu_vm vm, struct amdgpu_sync sync)
..	..	@@ -419,11 +380,44 @@
419	380	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
420	381	int ret;
421	382
422		- ret = amdgpu_vm_update_directories(adev, vm);
	383	+ ret = amdgpu_vm_update_pdes(adev, vm, false);
423	384	if (ret)
424	385	return ret;
425	386
426		- return sync_vm_fence(adev, sync, vm->last_update);
	387	+ return amdgpu_sync_fence(sync, vm->last_update);
	388	+}
	389	+
	390	+static uint64_t get_pte_flags(struct amdgpu_device adev, struct kgd_mem mem)
	391	+{
	392	+ struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
	393	+ bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
	394	+ uint32_t mapping_flags;
	395	+
	396	+ mapping_flags = AMDGPU_VM_PAGE_READABLE;
	397	+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
	398	+ mapping_flags \|= AMDGPU_VM_PAGE_WRITEABLE;
	399	+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
	400	+ mapping_flags \|= AMDGPU_VM_PAGE_EXECUTABLE;
	401	+
	402	+ switch (adev->asic_type) {
	403	+ case CHIP_ARCTURUS:
	404	+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
	405	+ if (bo_adev == adev)
	406	+ mapping_flags \|= coherent ?
	407	+ AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
	408	+ else
	409	+ mapping_flags \|= AMDGPU_VM_MTYPE_UC;
	410	+ } else {
	411	+ mapping_flags \|= coherent ?
	412	+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
	413	+ }
	414	+ break;
	415	+ default:
	416	+ mapping_flags \|= coherent ?
	417	+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
	418	+ }
	419	+
	420	+ return amdgpu_gem_va_map_flags(adev, mapping_flags);
427	421	}
428	422
429	423	/* add_bo_to_vm - Add a BO to a VM
..	..	@@ -444,7 +438,6 @@
444	438	{
445	439	int ret;
446	440	struct kfd_bo_va_list *bo_va_entry;
447		- struct amdgpu_bo *pd = vm->root.base.bo;
448	441	struct amdgpu_bo *bo = mem->bo;
449	442	uint64_t va = mem->va;
450	443	struct list_head *list_bo_va = &mem->bo_va_list;
..	..	@@ -475,42 +468,23 @@
475	468	}
476	469
477	470	bo_va_entry->va = va;
478		- bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev,
479		- mem->mapping_flags);
	471	+ bo_va_entry->pte_flags = get_pte_flags(adev, mem);
480	472	bo_va_entry->kgd_dev = (void *)adev;
481	473	list_add(&bo_va_entry->bo_list, list_bo_va);
482	474
483	475	if (p_bo_va_entry)
484	476	*p_bo_va_entry = bo_va_entry;
485	477
486		- /* Allocate new page tables if needed and validate
487		- * them. Clearing of new page tables and validate need to wait
488		- * on move fences. We don't want that to trigger the eviction
489		- * fence, so remove it temporarily.
490		- */
491		- amdgpu_amdkfd_remove_eviction_fence(pd,
492		- vm->process_info->eviction_fence,
493		- NULL, NULL);
494		-
495		- ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
496		- if (ret) {
497		- pr_err("Failed to allocate pts, err=%d\n", ret);
498		- goto err_alloc_pts;
499		- }
500		-
	478	+ /* Allocate validate page tables if needed */
501	479	ret = vm_validate_pt_pd_bos(vm);
502	480	if (ret) {
503	481	pr_err("validate_pt_pd_bos() failed\n");
504	482	goto err_alloc_pts;
505	483	}
506	484
507		- /* Add the eviction fence back */
508		- amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
509		-
510	485	return 0;
511	486
512	487	err_alloc_pts:
513		- amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
514	488	amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
515	489	list_del(&bo_va_entry->bo_list);
516	490	err_vmadd:
..	..	@@ -537,13 +511,24 @@
537	511	struct amdgpu_bo *bo = mem->bo;
538	512
539	513	INIT_LIST_HEAD(&entry->head);
540		- entry->shared = true;
	514	+ entry->num_shared = 1;
541	515	entry->bo = &bo->tbo;
542	516	mutex_lock(&process_info->lock);
543	517	if (userptr)
544	518	list_add_tail(&entry->head, &process_info->userptr_valid_list);
545	519	else
546	520	list_add_tail(&entry->head, &process_info->kfd_bo_list);
	521	+ mutex_unlock(&process_info->lock);
	522	+}
	523	+
	524	+static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
	525	+ struct amdkfd_process_info *process_info)
	526	+{
	527	+ struct ttm_validate_buffer *bo_list_entry;
	528	+
	529	+ bo_list_entry = &mem->validate_list;
	530	+ mutex_lock(&process_info->lock);
	531	+ list_del(&bo_list_entry->head);
547	532	mutex_unlock(&process_info->lock);
548	533	}
549	534
..	..	@@ -559,8 +544,7 @@
559	544	*
560	545	* Returns 0 for success, negative errno for errors.
561	546	*/
562		-static int init_user_pages(struct kgd_mem mem, struct mm_struct mm,
563		- uint64_t user_addr)
	547	+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
564	548	{
565	549	struct amdkfd_process_info *process_info = mem->process_info;
566	550	struct amdgpu_bo *bo = mem->bo;
..	..	@@ -569,7 +553,7 @@
569	553
570	554	mutex_lock(&process_info->lock);
571	555
572		- ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
	556	+ ret = amdgpu_ttm_tt_set_userptr(&bo->tbo, user_addr, 0);
573	557	if (ret) {
574	558	pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
575	559	goto out;
..	..	@@ -582,27 +566,11 @@
582	566	goto out;
583	567	}
584	568
585		- /* If no restore worker is running concurrently, user_pages
586		- * should not be allocated
587		- */
588		- WARN(mem->user_pages, "Leaking user_pages array");
589		-
590		- mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
591		- sizeof(struct page *),
592		- GFP_KERNEL \| __GFP_ZERO);
593		- if (!mem->user_pages) {
594		- pr_err("%s: Failed to allocate pages array\n", __func__);
595		- ret = -ENOMEM;
596		- goto unregister_out;
597		- }
598		-
599		- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
	569	+ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
600	570	if (ret) {
601	571	pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
602		- goto free_out;
	572	+ goto unregister_out;
603	573	}
604		-
605		- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
606	574
607	575	ret = amdgpu_bo_reserve(bo, true);
608	576	if (ret) {
..	..	@@ -616,11 +584,7 @@
616	584	amdgpu_bo_unreserve(bo);
617	585
618	586	release_out:
619		- if (ret)
620		- release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
621		-free_out:
622		- kvfree(mem->user_pages);
623		- mem->user_pages = NULL;
	587	+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
624	588	unregister_out:
625	589	if (ret)
626	590	amdgpu_mn_unregister(bo);
..	..	@@ -676,26 +640,24 @@
676	640	if (!ctx->vm_pd)
677	641	return -ENOMEM;
678	642
679		- ctx->kfd_bo.robj = bo;
680	643	ctx->kfd_bo.priority = 0;
681	644	ctx->kfd_bo.tv.bo = &bo->tbo;
682		- ctx->kfd_bo.tv.shared = true;
683		- ctx->kfd_bo.user_pages = NULL;
	645	+ ctx->kfd_bo.tv.num_shared = 1;
684	646	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
685	647
686	648	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
687	649
688	650	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
689	651	false, &ctx->duplicates);
690		- if (!ret)
691		- ctx->reserved = true;
692		- else {
693		- pr_err("Failed to reserve buffers in ttm\n");
	652	+ if (ret) {
	653	+ pr_err("Failed to reserve buffers in ttm.\n");
694	654	kfree(ctx->vm_pd);
695	655	ctx->vm_pd = NULL;
	656	+ return ret;
696	657	}
697	658
698		- return ret;
	659	+ ctx->reserved = true;
	660	+ return 0;
699	661	}
700	662
701	663	/**
..	..	@@ -741,11 +703,9 @@
741	703	return -ENOMEM;
742	704	}
743	705
744		- ctx->kfd_bo.robj = bo;
745	706	ctx->kfd_bo.priority = 0;
746	707	ctx->kfd_bo.tv.bo = &bo->tbo;
747		- ctx->kfd_bo.tv.shared = true;
748		- ctx->kfd_bo.user_pages = NULL;
	708	+ ctx->kfd_bo.tv.num_shared = 1;
749	709	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
750	710
751	711	i = 0;
..	..	@@ -762,17 +722,15 @@
762	722
763	723	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
764	724	false, &ctx->duplicates);
765		- if (!ret)
766		- ctx->reserved = true;
767		- else
768		- pr_err("Failed to reserve buffers in ttm.\n");
769		-
770	725	if (ret) {
	726	+ pr_err("Failed to reserve buffers in ttm.\n");
771	727	kfree(ctx->vm_pd);
772	728	ctx->vm_pd = NULL;
	729	+ return ret;
773	730	}
774	731
775		- return ret;
	732	+ ctx->reserved = true;
	733	+ return 0;
776	734	}
777	735
778	736	/**
..	..	@@ -811,25 +769,12 @@
811	769	{
812	770	struct amdgpu_bo_va *bo_va = entry->bo_va;
813	771	struct amdgpu_vm *vm = bo_va->base.vm;
814		- struct amdgpu_bo *pd = vm->root.base.bo;
815	772
816		- /* Remove eviction fence from PD (and thereby from PTs too as
817		- * they share the resv. object). Otherwise during PT update
818		- * job (see amdgpu_vm_bo_update_mapping), eviction fence would
819		- * get added to job->sync object and job execution would
820		- * trigger the eviction fence.
821		- */
822		- amdgpu_amdkfd_remove_eviction_fence(pd,
823		- vm->process_info->eviction_fence,
824		- NULL, NULL);
825	773	amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
826	774
827	775	amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
828	776
829		- /* Add the eviction fence back */
830		- amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
831		-
832		- sync_vm_fence(adev, sync, bo_va->last_pt_update);
	777	+ amdgpu_sync_fence(sync, bo_va->last_pt_update);
833	778
834	779	return 0;
835	780	}
..	..	@@ -839,13 +784,7 @@
839	784	struct amdgpu_sync *sync)
840	785	{
841	786	int ret;
842		- struct amdgpu_vm *vm;
843		- struct amdgpu_bo_va *bo_va;
844		- struct amdgpu_bo *bo;
845		-
846		- bo_va = entry->bo_va;
847		- vm = bo_va->base.vm;
848		- bo = bo_va->base.bo;
	787	+ struct amdgpu_bo_va *bo_va = entry->bo_va;
849	788
850	789	/* Update the page tables */
851	790	ret = amdgpu_vm_bo_update(adev, bo_va, false);
..	..	@@ -854,7 +793,7 @@
854	793	return ret;
855	794	}
856	795
857		- return sync_vm_fence(adev, sync, bo_va->last_pt_update);
	796	+ return amdgpu_sync_fence(sync, bo_va->last_pt_update);
858	797	}
859	798
860	799	static int map_bo_to_gpuvm(struct amdgpu_device *adev,
..	..	@@ -889,6 +828,24 @@
889	828	return ret;
890	829	}
891	830
	831	+static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
	832	+{
	833	+ struct sg_table sg = kmalloc(sizeof(sg), GFP_KERNEL);
	834	+
	835	+ if (!sg)
	836	+ return NULL;
	837	+ if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
	838	+ kfree(sg);
	839	+ return NULL;
	840	+ }
	841	+ sg->sgl->dma_address = addr;
	842	+ sg->sgl->length = size;
	843	+#ifdef CONFIG_NEED_SG_DMA_LENGTH
	844	+ sg->sgl->dma_length = size;
	845	+#endif
	846	+ return sg;
	847	+}
	848	+
892	849	static int process_validate_vms(struct amdkfd_process_info *process_info)
893	850	{
894	851	struct amdgpu_vm *peer_vm;
..	..	@@ -897,6 +854,26 @@
897	854	list_for_each_entry(peer_vm, &process_info->vm_list_head,
898	855	vm_list_node) {
899	856	ret = vm_validate_pt_pd_bos(peer_vm);
	857	+ if (ret)
	858	+ return ret;
	859	+ }
	860	+
	861	+ return 0;
	862	+}
	863	+
	864	+static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
	865	+ struct amdgpu_sync *sync)
	866	+{
	867	+ struct amdgpu_vm *peer_vm;
	868	+ int ret;
	869	+
	870	+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
	871	+ vm_list_node) {
	872	+ struct amdgpu_bo *pd = peer_vm->root.base.bo;
	873	+
	874	+ ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv,
	875	+ AMDGPU_SYNC_NE_OWNER,
	876	+ AMDGPU_FENCE_OWNER_KFD);
900	877	if (ret)
901	878	return ret;
902	879	}
..	..	@@ -966,9 +943,13 @@
966	943	pr_err("validate_pt_pd_bos() failed\n");
967	944	goto validate_pd_fail;
968	945	}
969		- ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false);
	946	+ ret = amdgpu_bo_sync_wait(vm->root.base.bo,
	947	+ AMDGPU_FENCE_OWNER_KFD, false);
970	948	if (ret)
971	949	goto wait_pd_fail;
	950	+ ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1);
	951	+ if (ret)
	952	+ goto reserve_shared_fail;
972	953	amdgpu_bo_fence(vm->root.base.bo,
973	954	&vm->process_info->eviction_fence->base, true);
974	955	amdgpu_bo_unreserve(vm->root.base.bo);
..	..	@@ -982,6 +963,7 @@
982	963
983	964	return 0;
984	965
	966	+reserve_shared_fail:
985	967	wait_pd_fail:
986	968	validate_pd_fail:
987	969	amdgpu_bo_unreserve(vm->root.base.bo);
..	..	@@ -1001,8 +983,8 @@
1001	983	return ret;
1002	984	}
1003	985
1004		-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev kgd, void *vm,
1005		- void **process_info,
	986	+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, u32 pasid,
	987	+ void vm, void process_info,
1006	988	struct dma_fence **ef)
1007	989	{
1008	990	struct amdgpu_device *adev = get_amdgpu_device(kgd);
..	..	@@ -1014,7 +996,7 @@
1014	996	return -ENOMEM;
1015	997
1016	998	/* Initialize AMDGPU part of the VM */
1017		- ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
	999	+ ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid);
1018	1000	if (ret) {
1019	1001	pr_err("Failed init vm ret %d\n", ret);
1020	1002	goto amdgpu_vm_init_fail;
..	..	@@ -1037,22 +1019,26 @@
1037	1019	}
1038	1020
1039	1021	int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
1040		- struct file *filp,
	1022	+ struct file *filp, u32 pasid,
1041	1023	void vm, void process_info,
1042	1024	struct dma_fence **ef)
1043	1025	{
1044	1026	struct amdgpu_device *adev = get_amdgpu_device(kgd);
1045		- struct drm_file *drm_priv = filp->private_data;
1046		- struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
1047		- struct amdgpu_vm *avm = &drv_priv->vm;
	1027	+ struct amdgpu_fpriv *drv_priv;
	1028	+ struct amdgpu_vm *avm;
1048	1029	int ret;
	1030	+
	1031	+ ret = amdgpu_file_to_fpriv(filp, &drv_priv);
	1032	+ if (ret)
	1033	+ return ret;
	1034	+ avm = &drv_priv->vm;
1049	1035
1050	1036	/* Already a compute VM? */
1051	1037	if (avm->process_info)
1052	1038	return -EINVAL;
1053	1039
1054	1040	/* Convert VM into a compute VM */
1055		- ret = amdgpu_vm_make_compute(adev, avm);
	1041	+ ret = amdgpu_vm_make_compute(adev, avm, pasid);
1056	1042	if (ret)
1057	1043	return ret;
1058	1044
..	..	@@ -1086,6 +1072,8 @@
1086	1072	list_del(&vm->vm_list_node);
1087	1073	mutex_unlock(&process_info->lock);
1088	1074
	1075	+ vm->process_info = NULL;
	1076	+
1089	1077	/* Release per-process resources when last compute VM is destroyed */
1090	1078	if (!process_info->n_vms) {
1091	1079	WARN_ON(!list_empty(&process_info->kfd_bo_list));
..	..	@@ -1115,11 +1103,34 @@
1115	1103	kfree(vm);
1116	1104	}
1117	1105
1118		-uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
	1106	+void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev kgd, void vm)
	1107	+{
	1108	+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
	1109	+ struct amdgpu_vm avm = (struct amdgpu_vm )vm;
	1110	+
	1111	+ if (WARN_ON(!kgd \|\| !vm))
	1112	+ return;
	1113	+
	1114	+ pr_debug("Releasing process vm %p\n", vm);
	1115	+
	1116	+ /* The original pasid of amdgpu vm has already been
	1117	+ * released during making a amdgpu vm to a compute vm
	1118	+ * The current pasid is managed by kfd and will be
	1119	+ * released on kfd process destroy. Set amdgpu pasid
	1120	+ * to 0 to avoid duplicate release.
	1121	+ */
	1122	+ amdgpu_vm_release_compute(adev, avm);
	1123	+}
	1124	+
	1125	+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
1119	1126	{
1120	1127	struct amdgpu_vm avm = (struct amdgpu_vm )vm;
	1128	+ struct amdgpu_bo *pd = avm->root.base.bo;
	1129	+ struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
1121	1130
1122		- return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
	1131	+ if (adev->asic_type < CHIP_VEGA10)
	1132	+ return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
	1133	+ return avm->pd_phys_addr;
1123	1134	}
1124	1135
1125	1136	int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
..	..	@@ -1129,44 +1140,57 @@
1129	1140	{
1130	1141	struct amdgpu_device *adev = get_amdgpu_device(kgd);
1131	1142	struct amdgpu_vm avm = (struct amdgpu_vm )vm;
	1143	+ enum ttm_bo_type bo_type = ttm_bo_type_device;
	1144	+ struct sg_table *sg = NULL;
1132	1145	uint64_t user_addr = 0;
1133	1146	struct amdgpu_bo *bo;
1134	1147	struct amdgpu_bo_param bp;
1135		- int byte_align;
1136	1148	u32 domain, alloc_domain;
1137	1149	u64 alloc_flags;
1138		- uint32_t mapping_flags;
1139	1150	int ret;
1140	1151
1141	1152	/*
1142	1153	* Check on which domain to allocate BO
1143	1154	*/
1144		- if (flags & ALLOC_MEM_FLAGS_VRAM) {
	1155	+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
1145	1156	domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
1146		- alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
1147		- alloc_flags \|= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
	1157	+ alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
	1158	+ alloc_flags \|= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
1148	1159	AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
1149	1160	AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
1150		- } else if (flags & ALLOC_MEM_FLAGS_GTT) {
	1161	+ } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
1151	1162	domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
1152	1163	alloc_flags = 0;
1153		- } else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
	1164	+ } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
1154	1165	domain = AMDGPU_GEM_DOMAIN_GTT;
1155	1166	alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
1156	1167	alloc_flags = 0;
1157	1168	if (!offset \|\| !*offset)
1158	1169	return -EINVAL;
1159	1170	user_addr = untagged_addr(*offset);
	1171	+ } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL \|
	1172	+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
	1173	+ domain = AMDGPU_GEM_DOMAIN_GTT;
	1174	+ alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
	1175	+ bo_type = ttm_bo_type_sg;
	1176	+ alloc_flags = 0;
	1177	+ if (size > UINT_MAX)
	1178	+ return -EINVAL;
	1179	+ sg = create_doorbell_sg(*offset, size);
	1180	+ if (!sg)
	1181	+ return -ENOMEM;
1160	1182	} else {
1161	1183	return -EINVAL;
1162	1184	}
1163	1185
1164	1186	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
1165		- if (!*mem)
1166		- return -ENOMEM;
	1187	+ if (!*mem) {
	1188	+ ret = -ENOMEM;
	1189	+ goto err;
	1190	+ }
1167	1191	INIT_LIST_HEAD(&(*mem)->bo_va_list);
1168	1192	mutex_init(&(*mem)->lock);
1169		- (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
	1193	+ (*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
1170	1194
1171	1195	/* Workaround for AQL queue wraparound bug. Map the same
1172	1196	* memory twice. That means we only actually allocate half
..	..	@@ -1175,30 +1199,14 @@
1175	1199	if ((*mem)->aql_queue)
1176	1200	size = size >> 1;
1177	1201
1178		- /* Workaround for TLB bug on older VI chips */
1179		- byte_align = (adev->family == AMDGPU_FAMILY_VI &&
1180		- adev->asic_type != CHIP_FIJI &&
1181		- adev->asic_type != CHIP_POLARIS10 &&
1182		- adev->asic_type != CHIP_POLARIS11) ?
1183		- VI_BO_SIZE_ALIGN : 1;
1184		-
1185		- mapping_flags = AMDGPU_VM_PAGE_READABLE;
1186		- if (flags & ALLOC_MEM_FLAGS_WRITABLE)
1187		- mapping_flags \|= AMDGPU_VM_PAGE_WRITEABLE;
1188		- if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
1189		- mapping_flags \|= AMDGPU_VM_PAGE_EXECUTABLE;
1190		- if (flags & ALLOC_MEM_FLAGS_COHERENT)
1191		- mapping_flags \|= AMDGPU_VM_MTYPE_UC;
1192		- else
1193		- mapping_flags \|= AMDGPU_VM_MTYPE_NC;
1194		- (*mem)->mapping_flags = mapping_flags;
	1202	+ (*mem)->alloc_flags = flags;
1195	1203
1196	1204	amdgpu_sync_create(&(*mem)->sync);
1197	1205
1198		- ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
	1206	+ ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
1199	1207	if (ret) {
1200	1208	pr_debug("Insufficient system memory\n");
1201		- goto err_reserve_system_mem;
	1209	+ goto err_reserve_limit;
1202	1210	}
1203	1211
1204	1212	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
..	..	@@ -1206,16 +1214,20 @@
1206	1214
1207	1215	memset(&bp, 0, sizeof(bp));
1208	1216	bp.size = size;
1209		- bp.byte_align = byte_align;
	1217	+ bp.byte_align = 1;
1210	1218	bp.domain = alloc_domain;
1211	1219	bp.flags = alloc_flags;
1212		- bp.type = ttm_bo_type_device;
	1220	+ bp.type = bo_type;
1213	1221	bp.resv = NULL;
1214	1222	ret = amdgpu_bo_create(adev, &bp, &bo);
1215	1223	if (ret) {
1216	1224	pr_debug("Failed to create BO on domain %s. ret %d\n",
1217	1225	domain_string(alloc_domain), ret);
1218	1226	goto err_bo_create;
	1227	+ }
	1228	+ if (bo_type == ttm_bo_type_sg) {
	1229	+ bo->tbo.sg = sg;
	1230	+ bo->tbo.ttm->sg = sg;
1219	1231	}
1220	1232	bo->kfd_bo = *mem;
1221	1233	(*mem)->bo = bo;
..	..	@@ -1229,13 +1241,9 @@
1229	1241	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
1230	1242
1231	1243	if (user_addr) {
1232		- ret = init_user_pages(*mem, current->mm, user_addr);
1233		- if (ret) {
1234		- mutex_lock(&avm->process_info->lock);
1235		- list_del(&(*mem)->validate_list.head);
1236		- mutex_unlock(&avm->process_info->lock);
	1244	+ ret = init_user_pages(*mem, user_addr);
	1245	+ if (ret)
1237	1246	goto allocate_init_user_pages_failed;
1238		- }
1239	1247	}
1240	1248
1241	1249	if (offset)
..	..	@@ -1244,43 +1252,48 @@
1244	1252	return 0;
1245	1253
1246	1254	allocate_init_user_pages_failed:
	1255	+ remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
1247	1256	amdgpu_bo_unref(&bo);
1248	1257	/* Don't unreserve system mem limit twice */
1249		- goto err_reserve_system_mem;
	1258	+ goto err_reserve_limit;
1250	1259	err_bo_create:
1251		- unreserve_system_mem_limit(adev, size, alloc_domain);
1252		-err_reserve_system_mem:
	1260	+ unreserve_mem_limit(adev, size, alloc_domain, !!sg);
	1261	+err_reserve_limit:
1253	1262	mutex_destroy(&(*mem)->lock);
1254	1263	kfree(*mem);
	1264	+err:
	1265	+ if (sg) {
	1266	+ sg_free_table(sg);
	1267	+ kfree(sg);
	1268	+ }
1255	1269	return ret;
1256	1270	}
1257	1271
1258	1272	int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
1259		- struct kgd_dev kgd, struct kgd_mem mem)
	1273	+ struct kgd_dev kgd, struct kgd_mem mem, uint64_t *size)
1260	1274	{
1261	1275	struct amdkfd_process_info *process_info = mem->process_info;
1262	1276	unsigned long bo_size = mem->bo->tbo.mem.size;
1263	1277	struct kfd_bo_va_list entry, tmp;
1264	1278	struct bo_vm_reservation_context ctx;
1265	1279	struct ttm_validate_buffer *bo_list_entry;
	1280	+ unsigned int mapped_to_gpu_memory;
1266	1281	int ret;
	1282	+ bool is_imported = 0;
1267	1283
1268	1284	mutex_lock(&mem->lock);
1269		-
1270		- if (mem->mapped_to_gpu_memory > 0) {
1271		- pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
1272		- mem->va, bo_size);
1273		- mutex_unlock(&mem->lock);
1274		- return -EBUSY;
1275		- }
1276		-
	1285	+ mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
	1286	+ is_imported = mem->is_imported;
1277	1287	mutex_unlock(&mem->lock);
1278	1288	/* lock is not needed after this, since mem is unused and will
1279	1289	* be freed anyway
1280	1290	*/
1281	1291
1282		- /* No more MMU notifiers */
1283		- amdgpu_mn_unregister(mem->bo);
	1292	+ if (mapped_to_gpu_memory > 0) {
	1293	+ pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
	1294	+ mem->va, bo_size);
	1295	+ return -EBUSY;
	1296	+ }
1284	1297
1285	1298	/* Make sure restore workers don't access the BO any more */
1286	1299	bo_list_entry = &mem->validate_list;
..	..	@@ -1288,14 +1301,8 @@
1288	1301	list_del(&bo_list_entry->head);
1289	1302	mutex_unlock(&process_info->lock);
1290	1303
1291		- /* Free user pages if necessary */
1292		- if (mem->user_pages) {
1293		- pr_debug("%s: Freeing user_pages array\n", __func__);
1294		- if (mem->user_pages[0])
1295		- release_pages(mem->user_pages,
1296		- mem->bo->tbo.ttm->num_pages);
1297		- kvfree(mem->user_pages);
1298		- }
	1304	+ /* No more MMU notifiers */
	1305	+ amdgpu_mn_unregister(mem->bo);
1299	1306
1300	1307	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
1301	1308	if (unlikely(ret))
..	..	@@ -1306,8 +1313,7 @@
1306	1313	* attached
1307	1314	*/
1308	1315	amdgpu_amdkfd_remove_eviction_fence(mem->bo,
1309		- process_info->eviction_fence,
1310		- NULL, NULL);
	1316	+ process_info->eviction_fence);
1311	1317	pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
1312	1318	mem->va + bo_size * (1 + mem->aql_queue));
1313	1319
..	..	@@ -1321,8 +1327,27 @@
1321	1327	/* Free the sync object */
1322	1328	amdgpu_sync_free(&mem->sync);
1323	1329
	1330	+ /* If the SG is not NULL, it's one we created for a doorbell or mmio
	1331	+ * remap BO. We need to free it.
	1332	+ */
	1333	+ if (mem->bo->tbo.sg) {
	1334	+ sg_free_table(mem->bo->tbo.sg);
	1335	+ kfree(mem->bo->tbo.sg);
	1336	+ }
	1337	+
	1338	+ /* Update the size of the BO being freed if it was allocated from
	1339	+ * VRAM and is not imported.
	1340	+ */
	1341	+ if (size) {
	1342	+ if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
	1343	+ (!is_imported))
	1344	+ *size = bo_size;
	1345	+ else
	1346	+ *size = 0;
	1347	+ }
	1348	+
1324	1349	/* Free the BO*/
1325		- amdgpu_bo_unref(&mem->bo);
	1350	+ drm_gem_object_put(&mem->bo->tbo.base);
1326	1351	mutex_destroy(&mem->lock);
1327	1352	kfree(mem);
1328	1353
..	..	@@ -1361,9 +1386,9 @@
1361	1386	* concurrently and the queues are actually stopped
1362	1387	*/
1363	1388	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
1364		- down_write(&current->mm->mmap_sem);
	1389	+ mmap_write_lock(current->mm);
1365	1390	is_invalid_userptr = atomic_read(&mem->invalid);
1366		- up_write(&current->mm->mmap_sem);
	1391	+ mmap_write_unlock(current->mm);
1367	1392	}
1368	1393
1369	1394	mutex_lock(&mem->lock);
..	..	@@ -1385,7 +1410,8 @@
1385	1410	* the queues are still stopped and we can leave mapping for
1386	1411	* the next restore worker
1387	1412	*/
1388		- if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
	1413	+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
	1414	+ bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
1389	1415	is_invalid_userptr = true;
1390	1416
1391	1417	if (check_if_add_bo_to_vm(avm, mem)) {
..	..	@@ -1427,7 +1453,7 @@
1427	1453	ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
1428	1454	is_invalid_userptr);
1429	1455	if (ret) {
1430		- pr_err("Failed to map radeon bo to gpuvm\n");
	1456	+ pr_err("Failed to map bo to gpuvm\n");
1431	1457	goto map_bo_to_gpuvm_failed;
1432	1458	}
1433	1459
..	..	@@ -1525,8 +1551,7 @@
1525	1551	if (mem->mapped_to_gpu_memory == 0 &&
1526	1552	!amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
1527	1553	amdgpu_amdkfd_remove_eviction_fence(mem->bo,
1528		- process_info->eviction_fence,
1529		- NULL, NULL);
	1554	+ process_info->eviction_fence);
1530	1555
1531	1556	unreserve_out:
1532	1557	unreserve_bo_and_vms(&ctx, false, false);
..	..	@@ -1587,7 +1612,7 @@
1587	1612	}
1588	1613
1589	1614	amdgpu_amdkfd_remove_eviction_fence(
1590		- bo, mem->process_info->eviction_fence, NULL, NULL);
	1615	+ bo, mem->process_info->eviction_fence);
1591	1616	list_del_init(&mem->validate_list.head);
1592	1617
1593	1618	if (size)
..	..	@@ -1622,6 +1647,65 @@
1622	1647	return 0;
1623	1648	}
1624	1649
	1650	+int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
	1651	+ struct dma_buf *dma_buf,
	1652	+ uint64_t va, void *vm,
	1653	+ struct kgd_mem *mem, uint64_t size,
	1654	+ uint64_t *mmap_offset)
	1655	+{
	1656	+ struct amdgpu_device adev = (struct amdgpu_device )kgd;
	1657	+ struct drm_gem_object *obj;
	1658	+ struct amdgpu_bo *bo;
	1659	+ struct amdgpu_vm avm = (struct amdgpu_vm )vm;
	1660	+
	1661	+ if (dma_buf->ops != &amdgpu_dmabuf_ops)
	1662	+ /* Can't handle non-graphics buffers */
	1663	+ return -EINVAL;
	1664	+
	1665	+ obj = dma_buf->priv;
	1666	+ if (drm_to_adev(obj->dev) != adev)
	1667	+ /* Can't handle buffers from other devices */
	1668	+ return -EINVAL;
	1669	+
	1670	+ bo = gem_to_amdgpu_bo(obj);
	1671	+ if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM \|
	1672	+ AMDGPU_GEM_DOMAIN_GTT)))
	1673	+ /* Only VRAM and GTT BOs are supported */
	1674	+ return -EINVAL;
	1675	+
	1676	+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
	1677	+ if (!*mem)
	1678	+ return -ENOMEM;
	1679	+
	1680	+ if (size)
	1681	+ *size = amdgpu_bo_size(bo);
	1682	+
	1683	+ if (mmap_offset)
	1684	+ *mmap_offset = amdgpu_bo_mmap_offset(bo);
	1685	+
	1686	+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
	1687	+ mutex_init(&(*mem)->lock);
	1688	+
	1689	+ (*mem)->alloc_flags =
	1690	+ ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
	1691	+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
	1692	+ \| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
	1693	+ \| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
	1694	+
	1695	+ drm_gem_object_get(&bo->tbo.base);
	1696	+ (*mem)->bo = bo;
	1697	+ (*mem)->va = va;
	1698	+ (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
	1699	+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
	1700	+ (*mem)->mapped_to_gpu_memory = 0;
	1701	+ (*mem)->process_info = avm->process_info;
	1702	+ add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
	1703	+ amdgpu_sync_create(&(*mem)->sync);
	1704	+ (*mem)->is_imported = true;
	1705	+
	1706	+ return 0;
	1707	+}
	1708	+
1625	1709	/* Evict a userptr BO by stopping the queues if necessary
1626	1710	*
1627	1711	* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
..	..	@@ -1637,14 +1721,14 @@
1637	1721	struct mm_struct *mm)
1638	1722	{
1639	1723	struct amdkfd_process_info *process_info = mem->process_info;
1640		- int invalid, evicted_bos;
	1724	+ int evicted_bos;
1641	1725	int r = 0;
1642	1726
1643		- invalid = atomic_inc_return(&mem->invalid);
	1727	+ atomic_inc(&mem->invalid);
1644	1728	evicted_bos = atomic_inc_return(&process_info->evicted_bos);
1645	1729	if (evicted_bos == 1) {
1646	1730	/* First eviction, stop the queues */
1647		- r = kgd2kfd->quiesce_mm(mm);
	1731	+ r = kgd2kfd_quiesce_mm(mm);
1648	1732	if (r)
1649	1733	pr_err("Failed to quiesce KFD\n");
1650	1734	schedule_delayed_work(&process_info->restore_userptr_work,
..	..	@@ -1709,36 +1793,24 @@
1709	1793
1710	1794	bo = mem->bo;
1711	1795
1712		- if (!mem->user_pages) {
1713		- mem->user_pages =
1714		- kvmalloc_array(bo->tbo.ttm->num_pages,
1715		- sizeof(struct page *),
1716		- GFP_KERNEL \| __GFP_ZERO);
1717		- if (!mem->user_pages) {
1718		- pr_err("%s: Failed to allocate pages array\n",
1719		- __func__);
1720		- return -ENOMEM;
1721		- }
1722		- } else if (mem->user_pages[0]) {
1723		- release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
	1796	+ /* Get updated user pages */
	1797	+ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
	1798	+ if (ret) {
	1799	+ pr_debug("%s: Failed to get user pages: %d\n",
	1800	+ __func__, ret);
	1801	+
	1802	+ /* Return error -EBUSY or -ENOMEM, retry restore */
	1803	+ return ret;
1724	1804	}
1725	1805
1726		- /* Get updated user pages */
1727		- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
1728		- mem->user_pages);
1729		- if (ret) {
1730		- mem->user_pages[0] = NULL;
1731		- pr_info("%s: Failed to get user pages: %d\n",
1732		- __func__, ret);
1733		- /* Pretend it succeeded. It will fail later
1734		- * with a VM fault if the GPU tries to access
1735		- * it. Better than hanging indefinitely with
1736		- * stalled user mode queues.
1737		- */
1738		- }
	1806	+ /*
	1807	+ * FIXME: Cannot ignore the return code, must hold
	1808	+ * notifier_lock
	1809	+ */
	1810	+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1739	1811
1740	1812	/* Mark the BO as valid unless it was invalidated
1741		- * again concurrently
	1813	+ * again concurrently.
1742	1814	*/
1743	1815	if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
1744	1816	return -EAGAIN;
..	..	@@ -1771,7 +1843,8 @@
1771	1843	GFP_KERNEL);
1772	1844	if (!pd_bo_list_entries) {
1773	1845	pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
1774		- return -ENOMEM;
	1846	+ ret = -ENOMEM;
	1847	+ goto out_no_mem;
1775	1848	}
1776	1849
1777	1850	INIT_LIST_HEAD(&resv_list);
..	..	@@ -1788,26 +1861,16 @@
1788	1861	validate_list.head) {
1789	1862	list_add_tail(&mem->resv_list.head, &resv_list);
1790	1863	mem->resv_list.bo = mem->validate_list.bo;
1791		- mem->resv_list.shared = mem->validate_list.shared;
	1864	+ mem->resv_list.num_shared = mem->validate_list.num_shared;
1792	1865	}
1793	1866
1794	1867	/* Reserve all BOs and page tables for validation */
1795	1868	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
1796	1869	WARN(!list_empty(&duplicates), "Duplicates should be empty");
1797	1870	if (ret)
1798		- goto out;
	1871	+ goto out_free;
1799	1872
1800	1873	amdgpu_sync_create(&sync);
1801		-
1802		- /* Avoid triggering eviction fences when unmapping invalid
1803		- * userptr BOs (waits for all fences, doesn't use
1804		- * FENCE_OWNER_VM)
1805		- */
1806		- list_for_each_entry(peer_vm, &process_info->vm_list_head,
1807		- vm_list_node)
1808		- amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
1809		- process_info->eviction_fence,
1810		- NULL, NULL);
1811	1874
1812	1875	ret = process_validate_vms(process_info);
1813	1876	if (ret)
..	..	@@ -1821,10 +1884,8 @@
1821	1884
1822	1885	bo = mem->bo;
1823	1886
1824		- /* Copy pages array and validate the BO if we got user pages */
1825		- if (mem->user_pages[0]) {
1826		- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
1827		- mem->user_pages);
	1887	+ /* Validate the BO if we got user pages */
	1888	+ if (bo->tbo.ttm->pages[0]) {
1828	1889	amdgpu_bo_placement_from_domain(bo, mem->domain);
1829	1890	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1830	1891	if (ret) {
..	..	@@ -1833,13 +1894,6 @@
1833	1894	}
1834	1895	}
1835	1896
1836		- /* Validate succeeded, now the BO owns the pages, free
1837		- * our copy of the pointer array. Put this BO back on
1838		- * the userptr_valid_list. If we need to revalidate
1839		- * it, we need to start from scratch.
1840		- */
1841		- kvfree(mem->user_pages);
1842		- mem->user_pages = NULL;
1843	1897	list_move_tail(&mem->validate_list.head,
1844	1898	&process_info->userptr_valid_list);
1845	1899
..	..	@@ -1869,15 +1923,12 @@
1869	1923	ret = process_update_pds(process_info, &sync);
1870	1924
1871	1925	unreserve_out:
1872		- list_for_each_entry(peer_vm, &process_info->vm_list_head,
1873		- vm_list_node)
1874		- amdgpu_bo_fence(peer_vm->root.base.bo,
1875		- &process_info->eviction_fence->base, true);
1876	1926	ttm_eu_backoff_reservation(&ticket, &resv_list);
1877	1927	amdgpu_sync_wait(&sync, false);
1878	1928	amdgpu_sync_free(&sync);
1879		-out:
	1929	+out_free:
1880	1930	kfree(pd_bo_list_entries);
	1931	+out_no_mem:
1881	1932
1882	1933	return ret;
1883	1934	}
..	..	@@ -1936,12 +1987,13 @@
1936	1987	evicted_bos)
1937	1988	goto unlock_out;
1938	1989	evicted_bos = 0;
1939		- if (kgd2kfd->resume_mm(mm)) {
	1990	+ if (kgd2kfd_resume_mm(mm)) {
1940	1991	pr_err("%s: Failed to resume KFD\n", __func__);
1941	1992	/* No recovery from this failure. Probably the CP is
1942	1993	* hanging. No point trying again.
1943	1994	*/
1944	1995	}
	1996	+
1945	1997	unlock_out:
1946	1998	mutex_unlock(&process_info->lock);
1947	1999	mmput(mm);
..	..	@@ -2007,7 +2059,7 @@
2007	2059
2008	2060	list_add_tail(&mem->resv_list.head, &ctx.list);
2009	2061	mem->resv_list.bo = mem->validate_list.bo;
2010		- mem->resv_list.shared = mem->validate_list.shared;
	2062	+ mem->resv_list.num_shared = mem->validate_list.num_shared;
2011	2063	}
2012	2064
2013	2065	ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
..	..	@@ -2024,13 +2076,10 @@
2024	2076	if (ret)
2025	2077	goto validate_map_fail;
2026	2078
2027		- /* Wait for PD/PTs validate to finish */
2028		- /* FIXME: I think this isn't needed */
2029		- list_for_each_entry(peer_vm, &process_info->vm_list_head,
2030		- vm_list_node) {
2031		- struct amdgpu_bo *bo = peer_vm->root.base.bo;
2032		-
2033		- ttm_bo_wait(&bo->tbo, false, false);
	2079	+ ret = process_sync_pds_resv(process_info, &sync_obj);
	2080	+ if (ret) {
	2081	+ pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
	2082	+ goto validate_map_fail;
2034	2083	}
2035	2084
2036	2085	/* Validate BOs and map them to GPUVM (update VM page tables). */
..	..	@@ -2046,7 +2095,11 @@
2046	2095	pr_debug("Memory eviction: Validate BOs failed. Try again\n");
2047	2096	goto validate_map_fail;
2048	2097	}
2049		-
	2098	+ ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
	2099	+ if (ret) {
	2100	+ pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
	2101	+ goto validate_map_fail;
	2102	+ }
2050	2103	list_for_each_entry(bo_va_entry, &mem->bo_va_list,
2051	2104	bo_list) {
2052	2105	ret = update_gpuvm_pte((struct amdgpu_device *)
..	..	@@ -2067,6 +2120,7 @@
2067	2120	goto validate_map_fail;
2068	2121	}
2069	2122
	2123	+ /* Wait for validate and PT updates to finish */
2070	2124	amdgpu_sync_wait(&sync_obj, false);
2071	2125
2072	2126	/* Release old eviction fence and create new one, because fence only
..	..	@@ -2085,10 +2139,7 @@
2085	2139	process_info->eviction_fence = new_fence;
2086	2140	*ef = dma_fence_get(&new_fence->base);
2087	2141
2088		- /* Wait for validate to finish and attach new eviction fence */
2089		- list_for_each_entry(mem, &process_info->kfd_bo_list,
2090		- validate_list.head)
2091		- ttm_bo_wait(&mem->bo->tbo, false, false);
	2142	+ /* Attach new eviction fence to all BOs */
2092	2143	list_for_each_entry(mem, &process_info->kfd_bo_list,
2093	2144	validate_list.head)
2094	2145	amdgpu_bo_fence(mem->bo,
..	..	@@ -2110,3 +2161,115 @@
2110	2161	kfree(pd_bo_list);
2111	2162	return ret;
2112	2163	}
	2164	+
	2165	+int amdgpu_amdkfd_add_gws_to_process(void info, void gws, struct kgd_mem **mem)
	2166	+{
	2167	+ struct amdkfd_process_info process_info = (struct amdkfd_process_info )info;
	2168	+ struct amdgpu_bo gws_bo = (struct amdgpu_bo )gws;
	2169	+ int ret;
	2170	+
	2171	+ if (!info \|\| !gws)
	2172	+ return -EINVAL;
	2173	+
	2174	+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
	2175	+ if (!*mem)
	2176	+ return -ENOMEM;
	2177	+
	2178	+ mutex_init(&(*mem)->lock);
	2179	+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
	2180	+ (*mem)->bo = amdgpu_bo_ref(gws_bo);
	2181	+ (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
	2182	+ (*mem)->process_info = process_info;
	2183	+ add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
	2184	+ amdgpu_sync_create(&(*mem)->sync);
	2185	+
	2186	+
	2187	+ /* Validate gws bo the first time it is added to process */
	2188	+ mutex_lock(&(*mem)->process_info->lock);
	2189	+ ret = amdgpu_bo_reserve(gws_bo, false);
	2190	+ if (unlikely(ret)) {
	2191	+ pr_err("Reserve gws bo failed %d\n", ret);
	2192	+ goto bo_reservation_failure;
	2193	+ }
	2194	+
	2195	+ ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
	2196	+ if (ret) {
	2197	+ pr_err("GWS BO validate failed %d\n", ret);
	2198	+ goto bo_validation_failure;
	2199	+ }
	2200	+ /* GWS resource is shared b/t amdgpu and amdkfd
	2201	+ * Add process eviction fence to bo so they can
	2202	+ * evict each other.
	2203	+ */
	2204	+ ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1);
	2205	+ if (ret)
	2206	+ goto reserve_shared_fail;
	2207	+ amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
	2208	+ amdgpu_bo_unreserve(gws_bo);
	2209	+ mutex_unlock(&(*mem)->process_info->lock);
	2210	+
	2211	+ return ret;
	2212	+
	2213	+reserve_shared_fail:
	2214	+bo_validation_failure:
	2215	+ amdgpu_bo_unreserve(gws_bo);
	2216	+bo_reservation_failure:
	2217	+ mutex_unlock(&(*mem)->process_info->lock);
	2218	+ amdgpu_sync_free(&(*mem)->sync);
	2219	+ remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
	2220	+ amdgpu_bo_unref(&gws_bo);
	2221	+ mutex_destroy(&(*mem)->lock);
	2222	+ kfree(*mem);
	2223	+ *mem = NULL;
	2224	+ return ret;
	2225	+}
	2226	+
	2227	+int amdgpu_amdkfd_remove_gws_from_process(void info, void mem)
	2228	+{
	2229	+ int ret;
	2230	+ struct amdkfd_process_info process_info = (struct amdkfd_process_info )info;
	2231	+ struct kgd_mem kgd_mem = (struct kgd_mem )mem;
	2232	+ struct amdgpu_bo *gws_bo = kgd_mem->bo;
	2233	+
	2234	+ /* Remove BO from process's validate list so restore worker won't touch
	2235	+ * it anymore
	2236	+ */
	2237	+ remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
	2238	+
	2239	+ ret = amdgpu_bo_reserve(gws_bo, false);
	2240	+ if (unlikely(ret)) {
	2241	+ pr_err("Reserve gws bo failed %d\n", ret);
	2242	+ //TODO add BO back to validate_list?
	2243	+ return ret;
	2244	+ }
	2245	+ amdgpu_amdkfd_remove_eviction_fence(gws_bo,
	2246	+ process_info->eviction_fence);
	2247	+ amdgpu_bo_unreserve(gws_bo);
	2248	+ amdgpu_sync_free(&kgd_mem->sync);
	2249	+ amdgpu_bo_unref(&gws_bo);
	2250	+ mutex_destroy(&kgd_mem->lock);
	2251	+ kfree(mem);
	2252	+ return 0;
	2253	+}
	2254	+
	2255	+/* Returns GPU-specific tiling mode information */
	2256	+int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
	2257	+ struct tile_config *config)
	2258	+{
	2259	+ struct amdgpu_device adev = (struct amdgpu_device )kgd;
	2260	+
	2261	+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
	2262	+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
	2263	+ config->num_tile_configs =
	2264	+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
	2265	+ config->macro_tile_config_ptr =
	2266	+ adev->gfx.config.macrotile_mode_array;
	2267	+ config->num_macro_tile_configs =
	2268	+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
	2269	+
	2270	+ /* Those values are not set from GFX9 onwards */
	2271	+ config->num_banks = adev->gfx.config.num_banks;
	2272	+ config->num_ranks = adev->gfx.config.num_ranks;
	2273	+
	2274	+ return 0;
	2275	+}