~hc/RK356X_SDK_RELEASE.git

..	..	@@ -29,6 +29,8 @@
29	29	#include <linux/rbtree.h>
30	30	#include <drm/gpu_scheduler.h>
31	31	#include <drm/drm_file.h>
	32	+#include <drm/ttm/ttm_bo_driver.h>
	33	+#include <linux/sched/mm.h>
32	34
33	35	#include "amdgpu_sync.h"
34	36	#include "amdgpu_ring.h"
..	..	@@ -48,12 +50,12 @@
48	50	/* number of entries in page table */
49	51	#define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size)
50	52
51		-/* PTBs (Page Table Blocks) need to be aligned to 32K */
52		-#define AMDGPU_VM_PTB_ALIGN_SIZE 32768
53		-
54	53	#define AMDGPU_PTE_VALID (1ULL << 0)
55	54	#define AMDGPU_PTE_SYSTEM (1ULL << 1)
56	55	#define AMDGPU_PTE_SNOOPED (1ULL << 2)
	56	+
	57	+/* RV+ */
	58	+#define AMDGPU_PTE_TMZ (1ULL << 3)
57	59
58	60	/* VI only */
59	61	#define AMDGPU_PTE_EXECUTABLE (1ULL << 4)
..	..	@@ -69,6 +71,8 @@
69	71	/* PDE is handled as PTE for VEGA10 */
70	72	#define AMDGPU_PDE_PTE (1ULL << 54)
71	73
	74	+#define AMDGPU_PTE_LOG (1ULL << 55)
	75	+
72	76	/* PTE is handled as PDE for VEGA10 (Translate Further) */
73	77	#define AMDGPU_PTE_TF (1ULL << 56)
74	78
..	..	@@ -77,8 +81,8 @@
77	81
78	82
79	83	/* For GFX9 */
80		-#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
81		-#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
	84	+#define AMDGPU_PTE_MTYPE_VG10(a) ((uint64_t)(a) << 57)
	85	+#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL)
82	86
83	87	#define AMDGPU_MTYPE_NC 0
84	88	#define AMDGPU_MTYPE_CC 2
..	..	@@ -88,33 +92,28 @@
88	92	\| AMDGPU_PTE_EXECUTABLE \
89	93	\| AMDGPU_PTE_READABLE \
90	94	\| AMDGPU_PTE_WRITEABLE \
91		- \| AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_CC))
	95	+ \| AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC))
92	96
93		-/* How to programm VM fault handling */
	97	+/* gfx10 */
	98	+#define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48)
	99	+#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL)
	100	+
	101	+/* How to program VM fault handling */
94	102	#define AMDGPU_VM_FAULT_STOP_NEVER 0
95	103	#define AMDGPU_VM_FAULT_STOP_FIRST 1
96	104	#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
97	105
	106	+/* Reserve 4MB VRAM for page tables */
	107	+#define AMDGPU_VM_RESERVED_VRAM (4ULL << 20)
	108	+
98	109	/* max number of VMHUB */
99		-#define AMDGPU_MAX_VMHUBS 2
100		-#define AMDGPU_GFXHUB 0
101		-#define AMDGPU_MMHUB 1
	110	+#define AMDGPU_MAX_VMHUBS 3
	111	+#define AMDGPU_GFXHUB_0 0
	112	+#define AMDGPU_MMHUB_0 1
	113	+#define AMDGPU_MMHUB_1 2
102	114
103		-/* hardcode that limit for now */
104		-#define AMDGPU_VA_RESERVED_SIZE (1ULL << 20)
105		-
106		-/* VA hole for 48bit addresses on Vega10 */
107		-#define AMDGPU_VA_HOLE_START 0x0000800000000000ULL
108		-#define AMDGPU_VA_HOLE_END 0xffff800000000000ULL
109		-
110		-/*
111		- * Hardware is programmed as if the hole doesn't exists with start and end
112		- * address values.
113		- *
114		- * This mask is used to remove the upper 16bits of the VA and so come up with
115		- * the linear addr value.
116		- */
117		-#define AMDGPU_VA_HOLE_MASK 0x0000ffffffffffffULL
	115	+/* Reserve 2MB at top/bottom of address space for kernel use */
	116	+#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20)
118	117
119	118	/* max vmids dedicated for process */
120	119	#define AMDGPU_VM_MAX_RESERVED_VMID 1
..	..	@@ -143,7 +142,7 @@
143	142	struct amdgpu_bo *bo;
144	143
145	144	/* protected by bo being reserved */
146		- struct list_head bo_list;
	145	+ struct amdgpu_vm_bo_base *next;
147	146
148	147	/* protected by spinlock */
149	148	struct list_head vm_status;
..	..	@@ -154,16 +153,31 @@
154	153
155	154	struct amdgpu_vm_pt {
156	155	struct amdgpu_vm_bo_base base;
157		- bool huge;
158	156
159	157	/* array of page tables, one for each directory entry */
160	158	struct amdgpu_vm_pt *entries;
161	159	};
162	160
163		-#define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) \| (addr))
164		-#define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)
165		-#define AMDGPU_VM_FAULT_ADDR(fault) ((u64)(fault) & 0xfffffffff000ULL)
	161	+/* provided by hw blocks that can write ptes, e.g., sdma */
	162	+struct amdgpu_vm_pte_funcs {
	163	+ /* number of dw to reserve per operation */
	164	+ unsigned copy_pte_num_dw;
166	165
	166	+ /* copy pte entries from GART */
	167	+ void (copy_pte)(struct amdgpu_ib ib,
	168	+ uint64_t pe, uint64_t src,
	169	+ unsigned count);
	170	+
	171	+ /* write pte one entry at a time with addr mapping */
	172	+ void (write_pte)(struct amdgpu_ib ib, uint64_t pe,
	173	+ uint64_t value, unsigned count,
	174	+ uint32_t incr);
	175	+ /* for linear pte/pde updates without addr mapping */
	176	+ void (set_pte_pde)(struct amdgpu_ib ib,
	177	+ uint64_t pe,
	178	+ uint64_t addr, unsigned count,
	179	+ uint32_t incr, uint64_t flags);
	180	+};
167	181
168	182	struct amdgpu_task_info {
169	183	char process_name[TASK_COMM_LEN];
..	..	@@ -172,9 +186,74 @@
172	186	pid_t tgid;
173	187	};
174	188
	189	+/**
	190	+ * struct amdgpu_vm_update_params
	191	+ *
	192	+ * Encapsulate some VM table update parameters to reduce
	193	+ * the number of function parameters
	194	+ *
	195	+ */
	196	+struct amdgpu_vm_update_params {
	197	+
	198	+ /**
	199	+ * @adev: amdgpu device we do this update for
	200	+ */
	201	+ struct amdgpu_device *adev;
	202	+
	203	+ /**
	204	+ * @vm: optional amdgpu_vm we do this update for
	205	+ */
	206	+ struct amdgpu_vm *vm;
	207	+
	208	+ /**
	209	+ * @immediate: if changes should be made immediately
	210	+ */
	211	+ bool immediate;
	212	+
	213	+ /**
	214	+ * @unlocked: true if the root BO is not locked
	215	+ */
	216	+ bool unlocked;
	217	+
	218	+ /**
	219	+ * @pages_addr:
	220	+ *
	221	+ * DMA addresses to use for mapping
	222	+ */
	223	+ dma_addr_t *pages_addr;
	224	+
	225	+ /**
	226	+ * @job: job to used for hw submission
	227	+ */
	228	+ struct amdgpu_job *job;
	229	+
	230	+ /**
	231	+ * @num_dw_left: number of dw left for the IB
	232	+ */
	233	+ unsigned int num_dw_left;
	234	+};
	235	+
	236	+struct amdgpu_vm_update_funcs {
	237	+ int (map_table)(struct amdgpu_bo bo);
	238	+ int (prepare)(struct amdgpu_vm_update_params p, struct dma_resv *resv,
	239	+ enum amdgpu_sync_mode sync_mode);
	240	+ int (update)(struct amdgpu_vm_update_params p,
	241	+ struct amdgpu_bo *bo, uint64_t pe, uint64_t addr,
	242	+ unsigned count, uint32_t incr, uint64_t flags);
	243	+ int (commit)(struct amdgpu_vm_update_params p,
	244	+ struct dma_fence **fence);
	245	+};
	246	+
175	247	struct amdgpu_vm {
176	248	/* tree of virtual addresses mapped */
177	249	struct rb_root_cached va;
	250	+
	251	+ /* Lock to prevent eviction while we are updating page tables
	252	+ * use vm_eviction_lock/unlock(vm)
	253	+ */
	254	+ struct mutex eviction_lock;
	255	+ bool evicting;
	256	+ unsigned int saved_flags;
178	257
179	258	/* BOs who needs a validation */
180	259	struct list_head evicted;
..	..	@@ -182,12 +261,15 @@
182	261	/* PT BOs which relocated and their parent need an update */
183	262	struct list_head relocated;
184	263
185		- /* BOs moved, but not yet updated in the PT */
	264	+ /* per VM BOs moved, but not yet updated in the PT */
186	265	struct list_head moved;
187		- spinlock_t moved_lock;
188	266
189	267	/* All BOs of this VM not currently in the state machine */
190	268	struct list_head idle;
	269	+
	270	+ /* regular invalidated BOs, but not yet updated in the PT */
	271	+ struct list_head invalidated;
	272	+ spinlock_t invalidated_lock;
191	273
192	274	/* BO mappings freed, but not yet updated in the PT */
193	275	struct list_head freed;
..	..	@@ -196,24 +278,28 @@
196	278	struct amdgpu_vm_pt root;
197	279	struct dma_fence *last_update;
198	280
199		- /* Scheduler entity for page table updates */
200		- struct drm_sched_entity entity;
	281	+ /* Scheduler entities for page table updates */
	282	+ struct drm_sched_entity immediate;
	283	+ struct drm_sched_entity delayed;
	284	+
	285	+ /* Last unlocked submission to the scheduler entities */
	286	+ struct dma_fence *last_unlocked;
201	287
202	288	unsigned int pasid;
203	289	/* dedicated to vm */
204	290	struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS];
205	291
206	292	/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
207		- bool use_cpu_for_update;
	293	+ bool use_cpu_for_update;
	294	+
	295	+ /* Functions to use for VM table updates */
	296	+ const struct amdgpu_vm_update_funcs *update_funcs;
208	297
209	298	/* Flag to indicate ATS support from PTE for GFX9 */
210	299	bool pte_support_ats;
211	300
212	301	/* Up to 128 pending retry page faults */
213	302	DECLARE_KFIFO(faults, u64, 128);
214		-
215		- /* Limit non-retry fault storms */
216		- unsigned int fault_credit;
217	303
218	304	/* Points to the KFD process VM info */
219	305	struct amdkfd_process_info *process_info;
..	..	@@ -226,11 +312,20 @@
226	312
227	313	/* Some basic info about the task */
228	314	struct amdgpu_task_info task_info;
	315	+
	316	+ /* Store positions of group of BOs */
	317	+ struct ttm_lru_bulk_move lru_bulk_move;
	318	+ /* mark whether can do the bulk move */
	319	+ bool bulk_moveable;
	320	+ /* Flag to indicate if VM is used for compute */
	321	+ bool is_compute_context;
229	322	};
230	323
231	324	struct amdgpu_vm_manager {
232	325	/* Handling of VMIDs */
233	326	struct amdgpu_vmid_mgr id_mgr[AMDGPU_MAX_VMHUBS];
	327	+ unsigned int first_kfd_vmid;
	328	+ bool concurrent_flush;
234	329
235	330	/* Handling of VM fences */
236	331	u64 fence_context;
..	..	@@ -244,10 +339,10 @@
244	339	/* vram base address for page table entry */
245	340	u64 vram_base_offset;
246	341	/* vm pte handling */
247		- const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
248		- struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS];
249		- unsigned vm_pte_num_rings;
250		- atomic_t vm_pte_next_ring;
	342	+ const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
	343	+ struct drm_gpu_scheduler *vm_pte_scheds[AMDGPU_MAX_RINGS];
	344	+ unsigned vm_pte_num_scheds;
	345	+ struct amdgpu_ring *page_fault;
251	346
252	347	/* partial resident texture handling */
253	348	spinlock_t prt_lock;
..	..	@@ -266,14 +361,22 @@
266	361	spinlock_t pasid_lock;
267	362	};
268	363
	364	+#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
	365	+#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
	366	+#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
	367	+
	368	+extern const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs;
	369	+extern const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs;
	370	+
269	371	void amdgpu_vm_manager_init(struct amdgpu_device *adev);
270	372	void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
	373	+
	374	+long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
271	375	int amdgpu_vm_init(struct amdgpu_device adev, struct amdgpu_vm vm,
272		- int vm_context, unsigned int pasid);
273		-int amdgpu_vm_make_compute(struct amdgpu_device adev, struct amdgpu_vm vm);
	376	+ int vm_context, u32 pasid);
	377	+int amdgpu_vm_make_compute(struct amdgpu_device adev, struct amdgpu_vm vm, u32 pasid);
	378	+void amdgpu_vm_release_compute(struct amdgpu_device adev, struct amdgpu_vm vm);
274	379	void amdgpu_vm_fini(struct amdgpu_device adev, struct amdgpu_vm vm);
275		-bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
276		- unsigned int pasid);
277	380	void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
278	381	struct list_head *validated,
279	382	struct amdgpu_bo_list_entry *entry);
..	..	@@ -281,12 +384,9 @@
281	384	int amdgpu_vm_validate_pt_bos(struct amdgpu_device adev, struct amdgpu_vm vm,
282	385	int (callback)(void p, struct amdgpu_bo *bo),
283	386	void *param);
284		-int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
285		- struct amdgpu_vm *vm,
286		- uint64_t saddr, uint64_t size);
287	387	int amdgpu_vm_flush(struct amdgpu_ring ring, struct amdgpu_job job, bool need_pipe_sync);
288		-int amdgpu_vm_update_directories(struct amdgpu_device *adev,
289		- struct amdgpu_vm *vm);
	388	+int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
	389	+ struct amdgpu_vm *vm, bool immediate);
290	390	int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
291	391	struct amdgpu_vm *vm,
292	392	struct dma_fence **fence);
..	..	@@ -295,8 +395,10 @@
295	395	int amdgpu_vm_bo_update(struct amdgpu_device *adev,
296	396	struct amdgpu_bo_va *bo_va,
297	397	bool clear);
	398	+bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
298	399	void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
299	400	struct amdgpu_bo *bo, bool evicted);
	401	+uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
300	402	struct amdgpu_bo_va amdgpu_vm_bo_find(struct amdgpu_vm vm,
301	403	struct amdgpu_bo *bo);
302	404	struct amdgpu_bo_va amdgpu_vm_bo_add(struct amdgpu_device adev,
..	..	@@ -329,9 +431,15 @@
329	431	struct amdgpu_job *job);
330	432	void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
331	433
332		-void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
333		- struct amdgpu_task_info *task_info);
	434	+void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
	435	+ struct amdgpu_task_info *task_info);
	436	+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
	437	+ uint64_t addr);
334	438
335	439	void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
336	440
	441	+void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
	442	+ struct amdgpu_vm *vm);
	443	+void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
	444	+
337	445	#endif