~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,7 +1,7 @@
1	1	// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2	2	/*
3	3	*
4		- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
	4	+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
5	5	*
6	6	* This program is free software and is provided to you under the terms of the
7	7	* GNU General Public License version 2 as published by the Free Software
..	..	@@ -25,15 +25,35 @@
25	25	#include "mali_kbase_csf_tiler_heap_def.h"
26	26	#include "mali_kbase_csf_heap_context_alloc.h"
27	27
	28	+/* Tiler heap shrink stop limit for maintaining a minimum number of chunks */
	29	+#define HEAP_SHRINK_STOP_LIMIT (1)
	30	+
	31	+/**
	32	+ * struct kbase_csf_gpu_buffer_heap - A gpu buffer object specific to tiler heap
	33	+ *
	34	+ * @cdsbp_0: Descriptor_type and buffer_type
	35	+ * @size: The size of the current heap chunk
	36	+ * @pointer: Pointer to the current heap chunk
	37	+ * @low_pointer: Pointer to low end of current heap chunk
	38	+ * @high_pointer: Pointer to high end of current heap chunk
	39	+ */
	40	+struct kbase_csf_gpu_buffer_heap {
	41	+ u32 cdsbp_0;
	42	+ u32 size;
	43	+ u64 pointer;
	44	+ u64 low_pointer;
	45	+ u64 high_pointer;
	46	+} __packed;
	47	+
28	48	/**
29	49	* encode_chunk_ptr - Encode the address and size of a chunk as an integer.
	50	+ *
	51	+ * @chunk_size: Size of a tiler heap chunk, in bytes.
	52	+ * @chunk_addr: GPU virtual address of the same tiler heap chunk.
30	53	*
31	54	* The size and address of the next chunk in a list are packed into a single
32	55	* 64-bit value for storage in a chunk's header. This function returns that
33	56	* value.
34		- *
35		- * @chunk_size: Size of a tiler heap chunk, in bytes.
36		- * @chunk_addr: GPU virtual address of the same tiler heap chunk.
37	57	*
38	58	* Return: Next chunk pointer suitable for writing into a chunk header.
39	59	*/
..	..	@@ -66,8 +86,6 @@
66	86	static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
67	87	struct kbase_csf_tiler_heap *const heap)
68	88	{
69		- lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
70		-
71	89	if (list_empty(&heap->chunks_list))
72	90	return NULL;
73	91
..	..	@@ -76,15 +94,44 @@
76	94	}
77	95
78	96	/**
	97	+ * remove_external_chunk_mappings - Remove external mappings from a chunk that
	98	+ * is being transitioned to the tiler heap
	99	+ * memory system.
	100	+ *
	101	+ * @kctx: kbase context the chunk belongs to.
	102	+ * @chunk: The chunk whose external mappings are going to be removed.
	103	+ *
	104	+ * This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates
	105	+ * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other
	106	+ * parts of kbase outside of tiler heap management should not take references on its physical
	107	+ * pages, and should not modify them.
	108	+ */
	109	+static void remove_external_chunk_mappings(struct kbase_context *const kctx,
	110	+ struct kbase_csf_tiler_heap_chunk *chunk)
	111	+{
	112	+ lockdep_assert_held(&kctx->reg_lock);
	113	+
	114	+ if (chunk->region->cpu_alloc != NULL) {
	115	+ kbase_mem_shrink_cpu_mapping(kctx, chunk->region, 0,
	116	+ chunk->region->cpu_alloc->nents);
	117	+ }
	118	+#if !defined(CONFIG_MALI_VECTOR_DUMP)
	119	+ chunk->region->flags \|= KBASE_REG_DONT_NEED;
	120	+#endif
	121	+
	122	+ dev_dbg(kctx->kbdev->dev, "Removed external mappings from chunk 0x%llX", chunk->gpu_va);
	123	+}
	124	+
	125	+/**
79	126	* link_chunk - Link a chunk into a tiler heap
	127	+ *
	128	+ * @heap: Pointer to the tiler heap.
	129	+ * @chunk: Pointer to the heap chunk to be linked.
80	130	*
81	131	* Unless the @chunk is the first in the kernel's list of chunks belonging to
82	132	* a given tiler heap, this function stores the size and address of the @chunk
83	133	* in the header of the preceding chunk. This requires the GPU memory region
84		- * containing the header to be be mapped temporarily, which can fail.
85		- *
86		- * @heap: Pointer to the tiler heap.
87		- * @chunk: Pointer to the heap chunk to be linked.
	134	+ * containing the header to be mapped temporarily, which can fail.
88	135	*
89	136	* Return: 0 if successful or a negative error code on failure.
90	137	*/
..	..	@@ -95,19 +142,12 @@
95	142
96	143	if (prev) {
97	144	struct kbase_context *const kctx = heap->kctx;
98		- struct kbase_vmap_struct map;
99		- u64 *const prev_hdr = kbase_vmap_prot(kctx, prev->gpu_va,
100		- sizeof(*prev_hdr), KBASE_REG_CPU_WR, &map);
	145	+ u64 *prev_hdr = prev->map.addr;
101	146
102		- if (unlikely(!prev_hdr)) {
103		- dev_err(kctx->kbdev->dev,
104		- "Failed to map tiler heap chunk 0x%llX\n",
105		- prev->gpu_va);
106		- return -ENOMEM;
107		- }
	147	+ WARN((prev->region->flags & KBASE_REG_CPU_CACHED),
	148	+ "Cannot support CPU cached chunks without sync operations");
108	149
109	150	*prev_hdr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va);
110		- kbase_vunmap(kctx, &map);
111	151
112	152	dev_dbg(kctx->kbdev->dev,
113	153	"Linked tiler heap chunks, 0x%llX -> 0x%llX\n",
..	..	@@ -120,23 +160,25 @@
120	160	/**
121	161	* init_chunk - Initialize and link a tiler heap chunk
122	162	*
123		- * Zero-initialize a new chunk's header (including its pointer to the next
124		- * chunk, which doesn't exist yet) and then update the previous chunk's
125		- * header to link the new chunk into the chunk list.
126		- *
127	163	* @heap: Pointer to the tiler heap.
128	164	* @chunk: Pointer to the heap chunk to be initialized and linked.
129	165	* @link_with_prev: Flag to indicate if the new chunk needs to be linked with
130	166	* the previously allocated chunk.
	167	+ *
	168	+ * Zero-initialize a new chunk's header (including its pointer to the next
	169	+ * chunk, which doesn't exist yet) and then update the previous chunk's
	170	+ * header to link the new chunk into the chunk list.
131	171	*
132	172	* Return: 0 if successful or a negative error code on failure.
133	173	*/
134	174	static int init_chunk(struct kbase_csf_tiler_heap *const heap,
135	175	struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev)
136	176	{
137		- struct kbase_vmap_struct map;
138		- struct u64 *chunk_hdr = NULL;
	177	+ int err = 0;
	178	+ u64 *chunk_hdr;
139	179	struct kbase_context *const kctx = heap->kctx;
	180	+
	181	+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
140	182
141	183	if (unlikely(chunk->gpu_va & ~CHUNK_ADDR_MASK)) {
142	184	dev_err(kctx->kbdev->dev,
..	..	@@ -144,155 +186,283 @@
144	186	return -EINVAL;
145	187	}
146	188
147		- chunk_hdr = kbase_vmap_prot(kctx,
148		- chunk->gpu_va, CHUNK_HDR_SIZE, KBASE_REG_CPU_WR, &map);
149		-
150		- if (unlikely(!chunk_hdr)) {
151		- dev_err(kctx->kbdev->dev,
152		- "Failed to map a tiler heap chunk header\n");
153		- return -ENOMEM;
	189	+ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
	190	+ "Cannot support CPU cached chunks without sync operations");
	191	+ chunk_hdr = chunk->map.addr;
	192	+ if (WARN(chunk->map.size < CHUNK_HDR_SIZE,
	193	+ "Tiler chunk kernel mapping was not large enough for zero-init")) {
	194	+ return -EINVAL;
154	195	}
155	196
156	197	memset(chunk_hdr, 0, CHUNK_HDR_SIZE);
157		- kbase_vunmap(kctx, &map);
	198	+ INIT_LIST_HEAD(&chunk->link);
158	199
159	200	if (link_with_prev)
160		- return link_chunk(heap, chunk);
161		- else
162		- return 0;
163		-}
164		-
165		-/**
166		- * create_chunk - Create a tiler heap chunk
167		- *
168		- * This function allocates a chunk of memory for a tiler heap and adds it to
169		- * the end of the list of chunks associated with that heap. The size of the
170		- * chunk is not a parameter because it is configured per-heap not per-chunk.
171		- *
172		- * @heap: Pointer to the tiler heap for which to allocate memory.
173		- * @link_with_prev: Flag to indicate if the chunk to be allocated needs to be
174		- * linked with the previously allocated chunk.
175		- *
176		- * Return: 0 if successful or a negative error code on failure.
177		- */
178		-static int create_chunk(struct kbase_csf_tiler_heap *const heap,
179		- bool link_with_prev)
180		-{
181		- int err = 0;
182		- struct kbase_context *const kctx = heap->kctx;
183		- u64 nr_pages = PFN_UP(heap->chunk_size);
184		- u64 flags = BASE_MEM_PROT_GPU_RD \| BASE_MEM_PROT_GPU_WR \|
185		- BASE_MEM_PROT_CPU_WR \| BASEP_MEM_NO_USER_FREE \|
186		- BASE_MEM_COHERENT_LOCAL;
187		- struct kbase_csf_tiler_heap_chunk *chunk = NULL;
188		-
189		- flags \|= base_mem_group_id_set(kctx->jit_group_id);
190		-
191		-#if defined(CONFIG_MALI_BIFROST_DEBUG) \|\| defined(CONFIG_MALI_VECTOR_DUMP)
192		- flags \|= BASE_MEM_PROT_CPU_RD;
193		-#endif
194		-
195		- lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
196		-
197		- chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
198		- if (unlikely(!chunk)) {
199		- dev_err(kctx->kbdev->dev,
200		- "No kernel memory for a new tiler heap chunk\n");
201		- return -ENOMEM;
202		- }
203		-
204		- /* Allocate GPU memory for the new chunk. */
205		- INIT_LIST_HEAD(&chunk->link);
206		- chunk->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0,
207		- &flags, &chunk->gpu_va);
208		-
209		- if (unlikely(!chunk->region)) {
210		- dev_err(kctx->kbdev->dev,
211		- "Failed to allocate a tiler heap chunk\n");
212		- err = -ENOMEM;
213		- } else {
214		- err = init_chunk(heap, chunk, link_with_prev);
215		- if (unlikely(err)) {
216		- kbase_gpu_vm_lock(kctx);
217		- chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
218		- kbase_mem_free_region(kctx, chunk->region);
219		- kbase_gpu_vm_unlock(kctx);
220		- }
221		- }
	201	+ err = link_chunk(heap, chunk);
222	202
223	203	if (unlikely(err)) {
224		- kfree(chunk);
225		- } else {
226		- list_add_tail(&chunk->link, &heap->chunks_list);
227		- heap->chunk_count++;
228		-
229		- dev_dbg(kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n",
230		- chunk->gpu_va);
	204	+ dev_err(kctx->kbdev->dev, "Failed to link a chunk to a tiler heap\n");
	205	+ return -EINVAL;
231	206	}
	207	+
	208	+ list_add_tail(&chunk->link, &heap->chunks_list);
	209	+ heap->chunk_count++;
232	210
233	211	return err;
234	212	}
235	213
236	214	/**
237		- * delete_chunk - Delete a tiler heap chunk
	215	+ * remove_unlinked_chunk - Remove a chunk that is not currently linked into a
	216	+ * heap.
238	217	*
239		- * This function frees a tiler heap chunk previously allocated by @create_chunk
240		- * and removes it from the list of chunks associated with the heap.
241		- *
242		- * WARNING: The deleted chunk is not unlinked from the list of chunks used by
243		- * the GPU, therefore it is only safe to use this function when
244		- * deleting a heap.
245		- *
246		- * @heap: Pointer to the tiler heap for which @chunk was allocated.
247		- * @chunk: Pointer to a chunk to be deleted.
	218	+ * @kctx: Kbase context that was used to allocate the memory.
	219	+ * @chunk: Chunk that has been allocated, but not linked into a heap.
248	220	*/
249		-static void delete_chunk(struct kbase_csf_tiler_heap *const heap,
250		- struct kbase_csf_tiler_heap_chunk *const chunk)
	221	+static void remove_unlinked_chunk(struct kbase_context *kctx,
	222	+ struct kbase_csf_tiler_heap_chunk *chunk)
251	223	{
252		- struct kbase_context *const kctx = heap->kctx;
253		-
254		- lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
	224	+ if (WARN_ON(!list_empty(&chunk->link)))
	225	+ return;
255	226
256	227	kbase_gpu_vm_lock(kctx);
257		- chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
	228	+ kbase_vunmap(kctx, &chunk->map);
	229	+ /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
	230	+ * regions), and so we must clear that flag too before freeing.
	231	+ * For "no user free count", we check that the count is 1 as it is a shrinkable region;
	232	+ * no other code part within kbase can take a reference to it.
	233	+ */
	234	+ WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1);
	235	+ kbase_va_region_no_user_free_dec(chunk->region);
	236	+#if !defined(CONFIG_MALI_VECTOR_DUMP)
	237	+ chunk->region->flags &= ~KBASE_REG_DONT_NEED;
	238	+#endif
258	239	kbase_mem_free_region(kctx, chunk->region);
259	240	kbase_gpu_vm_unlock(kctx);
260		- list_del(&chunk->link);
261		- heap->chunk_count--;
	241	+
262	242	kfree(chunk);
263	243	}
264	244
265	245	/**
266		- * delete_all_chunks - Delete all chunks belonging to a tiler heap
	246	+ * alloc_new_chunk - Allocate new chunk metadata for the tiler heap, reserve a fully backed VA
	247	+ * region for the chunk, and provide a kernel mapping.
	248	+ * @kctx: kbase context with which the chunk will be linked
	249	+ * @chunk_size: the size of the chunk from the corresponding heap
267	250	*
268		- * This function empties the list of chunks associated with a tiler heap by
269		- * freeing all chunks previously allocated by @create_chunk.
	251	+ * Allocate the chunk tracking metadata and a corresponding fully backed VA region for the
	252	+ * chunk. The kernel may need to invoke the reclaim path while trying to fulfill the allocation, so
	253	+ * we cannot hold any lock that would be held in the shrinker paths (JIT evict lock or tiler heap
	254	+ * lock).
	255	+ *
	256	+ * Since the chunk may have its physical backing removed, to prevent use-after-free scenarios we
	257	+ * ensure that it is protected from being mapped by other parts of kbase.
	258	+ *
	259	+ * The chunk's GPU memory can be accessed via its 'map' member, but should only be done so by the
	260	+ * shrinker path, as it may be otherwise shrunk at any time.
	261	+ *
	262	+ * Return: pointer to kbase_csf_tiler_heap_chunk on success or a NULL pointer
	263	+ * on failure
	264	+ */
	265	+static struct kbase_csf_tiler_heap_chunk alloc_new_chunk(struct kbase_context kctx,
	266	+ u64 chunk_size)
	267	+{
	268	+ u64 nr_pages = PFN_UP(chunk_size);
	269	+ u64 flags = BASE_MEM_PROT_GPU_RD \| BASE_MEM_PROT_GPU_WR \| BASE_MEM_PROT_CPU_WR \|
	270	+ BASEP_MEM_NO_USER_FREE \| BASE_MEM_COHERENT_LOCAL \| BASE_MEM_PROT_CPU_RD;
	271	+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
	272	+ /* The chunk kernel mapping needs to be large enough to:
	273	+ * - initially zero the CHUNK_HDR_SIZE area
	274	+ * - on shrinking, access the NEXT_CHUNK_ADDR_SIZE area
	275	+ */
	276	+ const size_t chunk_kernel_map_size = max(CHUNK_HDR_SIZE, NEXT_CHUNK_ADDR_SIZE);
	277	+
	278	+ /* Calls to this function are inherently synchronous, with respect to
	279	+ * MMU operations.
	280	+ */
	281	+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
	282	+ flags \|= kbase_mem_group_id_set(kctx->jit_group_id);
	283	+
	284	+ chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
	285	+ if (unlikely(!chunk)) {
	286	+ dev_err(kctx->kbdev->dev,
	287	+ "No kernel memory for a new tiler heap chunk\n");
	288	+ return NULL;
	289	+ }
	290	+
	291	+ /* Allocate GPU memory for the new chunk. */
	292	+ chunk->region =
	293	+ kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info);
	294	+
	295	+ if (unlikely(!chunk->region)) {
	296	+ dev_err(kctx->kbdev->dev, "Failed to allocate a tiler heap chunk!\n");
	297	+ goto unroll_chunk;
	298	+ }
	299	+
	300	+ kbase_gpu_vm_lock(kctx);
	301	+
	302	+ /* Some checks done here as NO_USER_FREE still allows such things to be made
	303	+ * whilst we had dropped the region lock
	304	+ */
	305	+ if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) {
	306	+ dev_err(kctx->kbdev->dev, "Chunk region has active kernel mappings!\n");
	307	+ goto unroll_region;
	308	+ }
	309	+
	310	+ /* There is a race condition with regard to KBASE_REG_DONT_NEED, where another
	311	+ * thread can have the "no user free" refcount increased between kbase_mem_alloc
	312	+ * and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by
	313	+ * remove_external_chunk_mappings (below).
	314	+ *
	315	+ * It should be fine and not a security risk if we let the region leak till
	316	+ * region tracker termination in such a case.
	317	+ */
	318	+ if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) {
	319	+ dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n");
	320	+ goto unroll_region;
	321	+ }
	322	+
	323	+ /* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE
	324	+ * being requested, it's useful to document in code what those restrictions are, and ensure
	325	+ * they remain in place in future.
	326	+ */
	327	+ if (WARN(!chunk->region->gpu_alloc,
	328	+ "NO_USER_FREE chunks should not have had their alloc freed")) {
	329	+ goto unroll_region;
	330	+ }
	331	+
	332	+ if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE,
	333	+ "NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) {
	334	+ goto unroll_region;
	335	+ }
	336	+
	337	+ if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC),
	338	+ "NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) {
	339	+ goto unroll_region;
	340	+ }
	341	+
	342	+ if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED),
	343	+ "NO_USER_FREE chunks should not have been made ephemeral")) {
	344	+ goto unroll_region;
	345	+ }
	346	+
	347	+ if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1,
	348	+ "NO_USER_FREE chunks should not have been aliased")) {
	349	+ goto unroll_region;
	350	+ }
	351	+
	352	+ if (unlikely(!kbase_vmap_reg(kctx, chunk->region, chunk->gpu_va, chunk_kernel_map_size,
	353	+ (KBASE_REG_CPU_RD \| KBASE_REG_CPU_WR), &chunk->map,
	354	+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING))) {
	355	+ dev_err(kctx->kbdev->dev, "Failed to map chunk header for shrinking!\n");
	356	+ goto unroll_region;
	357	+ }
	358	+
	359	+ remove_external_chunk_mappings(kctx, chunk);
	360	+ kbase_gpu_vm_unlock(kctx);
	361	+
	362	+ /* If page migration is enabled, we don't want to migrate tiler heap pages.
	363	+ * This does not change if the constituent pages are already marked as isolated.
	364	+ */
	365	+ if (kbase_page_migration_enabled)
	366	+ kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE);
	367	+
	368	+ return chunk;
	369	+
	370	+unroll_region:
	371	+ /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
	372	+ * regions), and so we must clear that flag too before freeing.
	373	+ */
	374	+ kbase_va_region_no_user_free_dec(chunk->region);
	375	+#if !defined(CONFIG_MALI_VECTOR_DUMP)
	376	+ chunk->region->flags &= ~KBASE_REG_DONT_NEED;
	377	+#endif
	378	+ kbase_mem_free_region(kctx, chunk->region);
	379	+ kbase_gpu_vm_unlock(kctx);
	380	+unroll_chunk:
	381	+ kfree(chunk);
	382	+ return NULL;
	383	+}
	384	+
	385	+/**
	386	+ * create_chunk - Create a tiler heap chunk
	387	+ *
	388	+ * @heap: Pointer to the tiler heap for which to allocate memory.
	389	+ *
	390	+ * This function allocates a chunk of memory for a tiler heap, adds it to the
	391	+ * the list of chunks associated with that heap both on the host side and in GPU
	392	+ * memory.
	393	+ *
	394	+ * Return: 0 if successful or a negative error code on failure.
	395	+ */
	396	+static int create_chunk(struct kbase_csf_tiler_heap *const heap)
	397	+{
	398	+ int err = 0;
	399	+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
	400	+
	401	+ chunk = alloc_new_chunk(heap->kctx, heap->chunk_size);
	402	+ if (unlikely(!chunk)) {
	403	+ err = -ENOMEM;
	404	+ goto allocation_failure;
	405	+ }
	406	+
	407	+ mutex_lock(&heap->kctx->csf.tiler_heaps.lock);
	408	+ err = init_chunk(heap, chunk, true);
	409	+ mutex_unlock(&heap->kctx->csf.tiler_heaps.lock);
	410	+
	411	+ if (unlikely(err))
	412	+ goto initialization_failure;
	413	+
	414	+ dev_dbg(heap->kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", chunk->gpu_va);
	415	+
	416	+ return 0;
	417	+initialization_failure:
	418	+ remove_unlinked_chunk(heap->kctx, chunk);
	419	+allocation_failure:
	420	+ return err;
	421	+}
	422	+
	423	+/**
	424	+ * delete_all_chunks - Delete all chunks belonging to an unlinked tiler heap
270	425	*
271	426	* @heap: Pointer to a tiler heap.
	427	+ *
	428	+ * This function empties the list of chunks associated with a tiler heap by freeing all chunks
	429	+ * previously allocated by @create_chunk.
	430	+ *
	431	+ * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the
	432	+ * tiler_heaps lock cannot be held whilst deleting its chunks due to also needing the &struct
	433	+ * kbase_context.region_lock.
	434	+ *
	435	+ * WARNING: Whilst the deleted chunks are unlinked from host memory, they are not unlinked from the
	436	+ * list of chunks used by the GPU, therefore it is only safe to use this function when
	437	+ * deleting a heap.
272	438	*/
273	439	static void delete_all_chunks(struct kbase_csf_tiler_heap *heap)
274	440	{
275		- struct list_head entry = NULL, tmp = NULL;
276	441	struct kbase_context *const kctx = heap->kctx;
	442	+ struct list_head entry = NULL, tmp = NULL;
277	443
278		- lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
	444	+ WARN(!list_empty(&heap->link),
	445	+ "Deleting a heap's chunks when that heap is still linked requires the tiler_heaps lock, which cannot be held by the caller");
279	446
280	447	list_for_each_safe(entry, tmp, &heap->chunks_list) {
281	448	struct kbase_csf_tiler_heap_chunk *chunk = list_entry(
282	449	entry, struct kbase_csf_tiler_heap_chunk, link);
283	450
284		- delete_chunk(heap, chunk);
	451	+ list_del_init(&chunk->link);
	452	+ heap->chunk_count--;
	453	+
	454	+ remove_unlinked_chunk(kctx, chunk);
285	455	}
286	456	}
287	457
288	458	/**
289	459	* create_initial_chunks - Create the initial list of chunks for a tiler heap
290	460	*
291		- * This function allocates a given number of chunks for a tiler heap and
292		- * adds them to the list of chunks associated with that heap.
293		- *
294	461	* @heap: Pointer to the tiler heap for which to allocate memory.
295	462	* @nchunks: Number of chunks to create.
	463	+ *
	464	+ * This function allocates a given number of chunks for a tiler heap and
	465	+ * adds them to the list of chunks associated with that heap.
296	466	*
297	467	* Return: 0 if successful or a negative error code on failure.
298	468	*/
..	..	@@ -303,7 +473,7 @@
303	473	u32 i;
304	474
305	475	for (i = 0; (i < nchunks) && likely(!err); i++)
306		- err = create_chunk(heap, true);
	476	+ err = create_chunk(heap);
307	477
308	478	if (unlikely(err))
309	479	delete_all_chunks(heap);
..	..	@@ -312,14 +482,17 @@
312	482	}
313	483
314	484	/**
315		- * delete_heap - Delete a tiler heap
316		- *
317		- * This function frees any chunks allocated for a tiler heap previously
318		- * initialized by @kbase_csf_tiler_heap_init and removes it from the list of
319		- * heaps associated with the kbase context. The heap context structure used by
320		- * the firmware is also freed.
	485	+ * delete_heap - Delete an unlinked tiler heap
321	486	*
322	487	* @heap: Pointer to a tiler heap to be deleted.
	488	+ *
	489	+ * This function frees any chunks allocated for a tiler heap previously
	490	+ * initialized by @kbase_csf_tiler_heap_init. The heap context structure used by
	491	+ * the firmware is also freed.
	492	+ *
	493	+ * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the
	494	+ * tiler_heaps lock cannot be held whilst deleting it due to also needing the &struct
	495	+ * kbase_context.region_lock.
323	496	*/
324	497	static void delete_heap(struct kbase_csf_tiler_heap *heap)
325	498	{
..	..	@@ -327,22 +500,40 @@
327	500
328	501	dev_dbg(kctx->kbdev->dev, "Deleting tiler heap 0x%llX\n", heap->gpu_va);
329	502
330		- lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
	503	+ WARN(!list_empty(&heap->link),
	504	+ "Deleting a heap that is still linked requires the tiler_heaps lock, which cannot be held by the caller");
331	505
	506	+ /* Make sure that all of the VA regions corresponding to the chunks are
	507	+ * freed at this time and that the work queue is not trying to access freed
	508	+ * memory.
	509	+ *
	510	+ * Note: since the heap is unlinked, and that no references are made to chunks other
	511	+ * than from their heap, there is no need to separately move the chunks out of the
	512	+ * heap->chunks_list to delete them.
	513	+ */
332	514	delete_all_chunks(heap);
333	515
	516	+ kbase_vunmap(kctx, &heap->gpu_va_map);
334	517	/* We could optimize context destruction by not freeing leaked heap
335		- * contexts but it doesn't seem worth the extra complexity.
	518	+ * contexts but it doesn't seem worth the extra complexity. After this
	519	+ * point, the suballocation is returned to the heap context allocator and
	520	+ * may be overwritten with new data, meaning heap->gpu_va should not
	521	+ * be used past this point.
336	522	*/
337	523	kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc,
338	524	heap->gpu_va);
339		-
340		- list_del(&heap->link);
341	525
342	526	WARN_ON(heap->chunk_count);
343	527	KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id,
344	528	heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0,
345	529	heap->target_in_flight, 0);
	530	+
	531	+ if (heap->buf_desc_reg) {
	532	+ kbase_vunmap(kctx, &heap->buf_desc_map);
	533	+ kbase_gpu_vm_lock(kctx);
	534	+ kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
	535	+ kbase_gpu_vm_unlock(kctx);
	536	+ }
346	537
347	538	kfree(heap);
348	539	}
..	..	@@ -350,14 +541,14 @@
350	541	/**
351	542	* find_tiler_heap - Find a tiler heap from the address of its heap context
352	543	*
	544	+ * @kctx: Pointer to the kbase context to search for a tiler heap.
	545	+ * @heap_gpu_va: GPU virtual address of a heap context structure.
	546	+ *
353	547	* Each tiler heap managed by the kernel has an associated heap context
354	548	* structure used by the firmware. This function finds a tiler heap object from
355	549	* the GPU virtual address of its associated heap context. The heap context
356	550	* should have been allocated by @kbase_csf_heap_context_allocator_alloc in the
357	551	* same @kctx.
358		- *
359		- * @kctx: Pointer to the kbase context to search for a tiler heap.
360		- * @heap_gpu_va: GPU virtual address of a heap context structure.
361	552	*
362	553	* Return: pointer to the tiler heap object, or NULL if not found.
363	554	*/
..	..	@@ -375,6 +566,23 @@
375	566
376	567	dev_dbg(kctx->kbdev->dev, "Tiler heap 0x%llX was not found\n",
377	568	heap_gpu_va);
	569	+
	570	+ return NULL;
	571	+}
	572	+
	573	+static struct kbase_csf_tiler_heap_chunk find_chunk(struct kbase_csf_tiler_heap heap,
	574	+ u64 const chunk_gpu_va)
	575	+{
	576	+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
	577	+
	578	+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
	579	+
	580	+ list_for_each_entry(chunk, &heap->chunks_list, link) {
	581	+ if (chunk->gpu_va == chunk_gpu_va)
	582	+ return chunk;
	583	+ }
	584	+
	585	+ dev_dbg(heap->kctx->kbdev->dev, "Tiler heap chunk 0x%llX was not found\n", chunk_gpu_va);
378	586
379	587	return NULL;
380	588	}
..	..	@@ -397,37 +605,91 @@
397	605
398	606	void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx)
399	607	{
	608	+ LIST_HEAD(local_heaps_list);
400	609	struct list_head entry = NULL, tmp = NULL;
401	610
402	611	dev_dbg(kctx->kbdev->dev, "Terminating a context for tiler heaps\n");
403	612
404	613	mutex_lock(&kctx->csf.tiler_heaps.lock);
	614	+ list_splice_init(&kctx->csf.tiler_heaps.list, &local_heaps_list);
	615	+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
405	616
406		- list_for_each_safe(entry, tmp, &kctx->csf.tiler_heaps.list) {
	617	+ list_for_each_safe(entry, tmp, &local_heaps_list) {
407	618	struct kbase_csf_tiler_heap *heap = list_entry(
408	619	entry, struct kbase_csf_tiler_heap, link);
	620	+
	621	+ list_del_init(&heap->link);
409	622	delete_heap(heap);
410	623	}
411	624
412		- mutex_unlock(&kctx->csf.tiler_heaps.lock);
413	625	mutex_destroy(&kctx->csf.tiler_heaps.lock);
414	626
415	627	kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc);
416	628	}
417	629
418		-int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
419		- u32 const chunk_size, u32 const initial_chunks, u32 const max_chunks,
420		- u16 const target_in_flight, u64 *const heap_gpu_va,
421		- u64 *const first_chunk_va)
	630	+/**
	631	+ * kbasep_is_buffer_descriptor_region_suitable - Check if a VA region chosen to house
	632	+ * the tiler heap buffer descriptor
	633	+ * is suitable for the purpose.
	634	+ * @kctx: kbase context of the tiler heap
	635	+ * @reg: VA region being checked for suitability
	636	+ *
	637	+ * The tiler heap buffer descriptor memory does not admit page faults according
	638	+ * to its design, so it must have the entirety of the backing upon allocation,
	639	+ * and it has to remain alive as long as the tiler heap is alive, meaning it
	640	+ * cannot be allocated from JIT/Ephemeral, or user freeable memory.
	641	+ *
	642	+ * Return: true on suitability, false otherwise.
	643	+ */
	644	+static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *const kctx,
	645	+ struct kbase_va_region *const reg)
	646	+{
	647	+ if (kbase_is_region_invalid_or_free(reg)) {
	648	+ dev_err(kctx->kbdev->dev, "Region is either invalid or free!\n");
	649	+ return false;
	650	+ }
	651	+
	652	+ if (!(reg->flags & KBASE_REG_CPU_RD) \|\| kbase_is_region_shrinkable(reg) \|\|
	653	+ (reg->flags & KBASE_REG_PF_GROW)) {
	654	+ dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags);
	655	+ return false;
	656	+ }
	657	+
	658	+ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
	659	+ dev_err(kctx->kbdev->dev, "Region has invalid type!\n");
	660	+ return false;
	661	+ }
	662	+
	663	+ if ((reg->nr_pages != kbase_reg_current_backed_size(reg)) \|\|
	664	+ (reg->nr_pages < PFN_UP(sizeof(struct kbase_csf_gpu_buffer_heap)))) {
	665	+ dev_err(kctx->kbdev->dev, "Region has invalid backing!\n");
	666	+ return false;
	667	+ }
	668	+
	669	+ return true;
	670	+}
	671	+
	672	+#define TILER_BUF_DESC_SIZE (sizeof(struct kbase_csf_gpu_buffer_heap))
	673	+
	674	+int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_size,
	675	+ u32 const initial_chunks, u32 const max_chunks,
	676	+ u16 const target_in_flight, u64 const buf_desc_va,
	677	+ u64 const heap_gpu_va, u64 const first_chunk_va)
422	678	{
423	679	int err = 0;
424	680	struct kbase_csf_tiler_heap *heap = NULL;
425	681	struct kbase_csf_heap_context_allocator *const ctx_alloc =
426	682	&kctx->csf.tiler_heaps.ctx_alloc;
	683	+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
	684	+ struct kbase_va_region *gpu_va_reg = NULL;
	685	+ void *vmap_ptr = NULL;
427	686
428	687	dev_dbg(kctx->kbdev->dev,
429		- "Creating a tiler heap with %u chunks (limit: %u) of size %u\n",
430		- initial_chunks, max_chunks, chunk_size);
	688	+ "Creating a tiler heap with %u chunks (limit: %u) of size %u, buf_desc_va: 0x%llx\n",
	689	+ initial_chunks, max_chunks, chunk_size, buf_desc_va);
	690	+
	691	+ if (!kbase_mem_allow_alloc(kctx))
	692	+ return -EINVAL;
431	693
432	694	if (chunk_size == 0)
433	695	return -EINVAL;
..	..	@@ -446,8 +708,7 @@
446	708
447	709	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
448	710	if (unlikely(!heap)) {
449		- dev_err(kctx->kbdev->dev,
450		- "No kernel memory for a new tiler heap\n");
	711	+ dev_err(kctx->kbdev->dev, "No kernel memory for a new tiler heap");
451	712	return -ENOMEM;
452	713	}
453	714
..	..	@@ -455,51 +716,130 @@
455	716	heap->chunk_size = chunk_size;
456	717	heap->max_chunks = max_chunks;
457	718	heap->target_in_flight = target_in_flight;
	719	+ heap->buf_desc_checked = false;
458	720	INIT_LIST_HEAD(&heap->chunks_list);
	721	+ INIT_LIST_HEAD(&heap->link);
459	722
460		- heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc);
	723	+ /* Check on the buffer descriptor virtual Address */
	724	+ if (buf_desc_va) {
	725	+ struct kbase_va_region *buf_desc_reg;
461	726
462		- mutex_lock(&kctx->csf.tiler_heaps.lock);
	727	+ kbase_gpu_vm_lock(kctx);
	728	+ buf_desc_reg =
	729	+ kbase_region_tracker_find_region_enclosing_address(kctx, buf_desc_va);
463	730
464		- if (unlikely(!heap->gpu_va)) {
465		- dev_err(kctx->kbdev->dev,
466		- "Failed to allocate a tiler heap context\n");
467		- err = -ENOMEM;
468		- } else {
469		- err = create_initial_chunks(heap, initial_chunks);
470		- if (unlikely(err)) {
471		- kbase_csf_heap_context_allocator_free(ctx_alloc,
472		- heap->gpu_va);
	731	+ if (!kbasep_is_buffer_descriptor_region_suitable(kctx, buf_desc_reg)) {
	732	+ kbase_gpu_vm_unlock(kctx);
	733	+ dev_err(kctx->kbdev->dev,
	734	+ "Could not find a suitable VA region for the tiler heap buf desc!\n");
	735	+ err = -EINVAL;
	736	+ goto buf_desc_not_suitable;
	737	+ }
	738	+
	739	+ /* If we don't prevent userspace from unmapping this, we may run into
	740	+ * use-after-free, as we don't check for the existence of the region throughout.
	741	+ */
	742	+
	743	+ heap->buf_desc_va = buf_desc_va;
	744	+ heap->buf_desc_reg = buf_desc_reg;
	745	+ kbase_va_region_no_user_free_inc(buf_desc_reg);
	746	+
	747	+ vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE,
	748	+ KBASE_REG_CPU_RD, &heap->buf_desc_map,
	749	+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
	750	+
	751	+ if (kbase_page_migration_enabled)
	752	+ kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE);
	753	+
	754	+ kbase_gpu_vm_unlock(kctx);
	755	+
	756	+ if (unlikely(!vmap_ptr)) {
	757	+ dev_err(kctx->kbdev->dev,
	758	+ "Could not vmap buffer descriptor into kernel memory (err %d)\n",
	759	+ err);
	760	+ err = -ENOMEM;
	761	+ goto buf_desc_vmap_failed;
473	762	}
474	763	}
475	764
476		- if (unlikely(err)) {
477		- kfree(heap);
478		- } else {
479		- struct kbase_csf_tiler_heap_chunk const *first_chunk =
480		- list_first_entry(&heap->chunks_list,
481		- struct kbase_csf_tiler_heap_chunk, link);
482		-
483		- kctx->csf.tiler_heaps.nr_of_heaps++;
484		- heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps;
485		- list_add(&heap->link, &kctx->csf.tiler_heaps.list);
486		-
487		- *heap_gpu_va = heap->gpu_va;
488		- *first_chunk_va = first_chunk->gpu_va;
489		-
490		- KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
491		- kctx->kbdev, kctx->id, heap->heap_id,
492		- PFN_UP(heap->chunk_size * heap->max_chunks),
493		- PFN_UP(heap->chunk_size * heap->chunk_count),
494		- heap->max_chunks, heap->chunk_size, heap->chunk_count,
495		- heap->target_in_flight, 0);
496		-
497		- dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n",
498		- heap->gpu_va);
	765	+ heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc);
	766	+ if (unlikely(!heap->gpu_va)) {
	767	+ dev_dbg(kctx->kbdev->dev, "Failed to allocate a tiler heap context\n");
	768	+ err = -ENOMEM;
	769	+ goto heap_context_alloc_failed;
499	770	}
500	771
	772	+ gpu_va_reg = ctx_alloc->region;
	773	+
	774	+ kbase_gpu_vm_lock(kctx);
	775	+ /* gpu_va_reg was created with BASEP_MEM_NO_USER_FREE, the code to unset this only happens
	776	+ * on kctx termination (after all syscalls on kctx have finished), and so it is safe to
	777	+ * assume that gpu_va_reg is still present.
	778	+ */
	779	+ vmap_ptr = kbase_vmap_reg(kctx, gpu_va_reg, heap->gpu_va, NEXT_CHUNK_ADDR_SIZE,
	780	+ (KBASE_REG_CPU_RD \| KBASE_REG_CPU_WR), &heap->gpu_va_map,
	781	+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
	782	+ kbase_gpu_vm_unlock(kctx);
	783	+ if (unlikely(!vmap_ptr)) {
	784	+ dev_dbg(kctx->kbdev->dev, "Failed to vmap the correct heap GPU VA address\n");
	785	+ err = -ENOMEM;
	786	+ goto heap_context_vmap_failed;
	787	+ }
	788	+
	789	+ err = create_initial_chunks(heap, initial_chunks);
	790	+ if (unlikely(err)) {
	791	+ dev_dbg(kctx->kbdev->dev, "Failed to create the initial tiler heap chunks\n");
	792	+ goto create_chunks_failed;
	793	+ }
	794	+ chunk = list_first_entry(&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link);
	795	+
	796	+ *heap_gpu_va = heap->gpu_va;
	797	+ *first_chunk_va = chunk->gpu_va;
	798	+
	799	+ mutex_lock(&kctx->csf.tiler_heaps.lock);
	800	+ kctx->csf.tiler_heaps.nr_of_heaps++;
	801	+ heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps;
	802	+ list_add(&heap->link, &kctx->csf.tiler_heaps.list);
	803	+
	804	+ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id,
	805	+ PFN_UP(heap->chunk_size * heap->max_chunks),
	806	+ PFN_UP(heap->chunk_size * heap->chunk_count),
	807	+ heap->max_chunks, heap->chunk_size, heap->chunk_count,
	808	+ heap->target_in_flight, 0);
	809	+
	810	+#if defined(CONFIG_MALI_VECTOR_DUMP)
	811	+ list_for_each_entry(chunk, &heap->chunks_list, link) {
	812	+ KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(kctx->kbdev, kctx->id, heap->heap_id,
	813	+ chunk->gpu_va);
	814	+ }
	815	+#endif
	816	+ kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
	817	+ kctx->running_total_tiler_heap_memory += (u64)heap->chunk_size * heap->chunk_count;
	818	+ if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
	819	+ kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
	820	+
	821	+ dev_dbg(kctx->kbdev->dev,
	822	+ "Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", heap->gpu_va,
	823	+ buf_desc_va, kctx->tgid, kctx->id);
501	824	mutex_unlock(&kctx->csf.tiler_heaps.lock);
502	825
	826	+ return 0;
	827	+
	828	+create_chunks_failed:
	829	+ kbase_vunmap(kctx, &heap->gpu_va_map);
	830	+heap_context_vmap_failed:
	831	+ kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va);
	832	+heap_context_alloc_failed:
	833	+ if (heap->buf_desc_reg)
	834	+ kbase_vunmap(kctx, &heap->buf_desc_map);
	835	+buf_desc_vmap_failed:
	836	+ if (heap->buf_desc_reg) {
	837	+ kbase_gpu_vm_lock(kctx);
	838	+ kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
	839	+ kbase_gpu_vm_unlock(kctx);
	840	+ }
	841	+buf_desc_not_suitable:
	842	+ kfree(heap);
503	843	return err;
504	844	}
505	845
..	..	@@ -508,47 +848,73 @@
508	848	{
509	849	int err = 0;
510	850	struct kbase_csf_tiler_heap *heap = NULL;
	851	+ u32 chunk_count = 0;
	852	+ u64 heap_size = 0;
511	853
512	854	mutex_lock(&kctx->csf.tiler_heaps.lock);
513		-
514	855	heap = find_tiler_heap(kctx, heap_gpu_va);
	856	+ if (likely(heap)) {
	857	+ chunk_count = heap->chunk_count;
	858	+ heap_size = heap->chunk_size * chunk_count;
	859	+
	860	+ list_del_init(&heap->link);
	861	+ } else {
	862	+ err = -EINVAL;
	863	+ }
	864	+
	865	+ /* Update stats whilst still holding the lock so they are in sync with the tiler_heaps.list
	866	+ * at all times
	867	+ */
	868	+ if (likely(kctx->running_total_tiler_heap_memory >= heap_size))
	869	+ kctx->running_total_tiler_heap_memory -= heap_size;
	870	+ else
	871	+ dev_warn(kctx->kbdev->dev,
	872	+ "Running total tiler heap memory lower than expected!");
	873	+ if (likely(kctx->running_total_tiler_heap_nr_chunks >= chunk_count))
	874	+ kctx->running_total_tiler_heap_nr_chunks -= chunk_count;
	875	+ else
	876	+ dev_warn(kctx->kbdev->dev,
	877	+ "Running total tiler chunk count lower than expected!");
	878	+ if (!err)
	879	+ dev_dbg(kctx->kbdev->dev,
	880	+ "Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n",
	881	+ heap->gpu_va, heap->buf_desc_va, kctx->tgid, kctx->id);
	882	+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
	883	+
	884	+ /* Deletion requires the kctx->reg_lock, so must only operate on it whilst unlinked from
	885	+ * the kctx's csf.tiler_heaps.list, and without holding the csf.tiler_heaps.lock
	886	+ */
515	887	if (likely(heap))
516	888	delete_heap(heap);
517		- else
518		- err = -EINVAL;
519		-
520		- mutex_unlock(&kctx->csf.tiler_heaps.lock);
521	889
522	890	return err;
523	891	}
524	892
525	893	/**
526		- * alloc_new_chunk - Allocate a new chunk for the tiler heap.
	894	+ * validate_allocation_request - Check whether the chunk allocation request
	895	+ * received on tiler OOM should be handled at
	896	+ * current time.
527	897	*
528		- * This function will allocate a new chunk for the chunked tiler heap depending
529		- * on the settings provided by userspace when the heap was created and the
530		- * heap's statistics (like number of render passes in-flight).
	898	+ * @heap: The tiler heap the OOM is associated with
	899	+ * @nr_in_flight: Number of fragment jobs in flight
	900	+ * @pending_frag_count: Number of pending fragment jobs
531	901	*
532		- * @heap: Pointer to the tiler heap.
533		- * @nr_in_flight: Number of render passes that are in-flight, must not be zero.
534		- * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage.
535		- * The minimum value is zero but it must be less or equal to
536		- * the total number of render passes in flight
537		- * @new_chunk_ptr: Where to store the GPU virtual address & size of the new
538		- * chunk allocated for the heap.
	902	+ * Context: must hold the tiler heap lock to guarantee its lifetime
539	903	*
540		- * Return: 0 if a new chunk was allocated otherwise an appropriate negative
541		- * error code.
	904	+ * Return:
	905	+ * * 0 - allowed to allocate an additional chunk
	906	+ * * -EINVAL - invalid
	907	+ * * -EBUSY - there are fragment jobs still in flight, which may free chunks
	908	+ * after completing
	909	+ * * -ENOMEM - the targeted number of in-flight chunks has been reached and
	910	+ * no new ones will be allocated
542	911	*/
543		-static int alloc_new_chunk(struct kbase_csf_tiler_heap *heap,
544		- u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
	912	+static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight,
	913	+ u32 pending_frag_count)
545	914	{
546		- int err = -ENOMEM;
547		-
548	915	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
549	916
550		- if (WARN_ON(!nr_in_flight) \|\|
551		- WARN_ON(pending_frag_count > nr_in_flight))
	917	+ if (WARN_ON(!nr_in_flight) \|\| WARN_ON(pending_frag_count > nr_in_flight))
552	918	return -EINVAL;
553	919
554	920	if (nr_in_flight <= heap->target_in_flight) {
..	..	@@ -556,56 +922,451 @@
556	922	/* Not exceeded the target number of render passes yet so be
557	923	* generous with memory.
558	924	*/
559		- err = create_chunk(heap, false);
560		-
561		- if (likely(!err)) {
562		- struct kbase_csf_tiler_heap_chunk *new_chunk =
563		- get_last_chunk(heap);
564		- if (!WARN_ON(!new_chunk)) {
565		- *new_chunk_ptr =
566		- encode_chunk_ptr(heap->chunk_size,
567		- new_chunk->gpu_va);
568		- return 0;
569		- }
570		- }
	925	+ return 0;
571	926	} else if (pending_frag_count > 0) {
572		- err = -EBUSY;
	927	+ return -EBUSY;
573	928	} else {
574		- err = -ENOMEM;
	929	+ return -ENOMEM;
575	930	}
576	931	} else {
577	932	/* Reached target number of render passes in flight.
578	933	* Wait for some of them to finish
579	934	*/
580		- err = -EBUSY;
	935	+ return -EBUSY;
581	936	}
582		-
583		- return err;
	937	+ return -ENOMEM;
584	938	}
585	939
586	940	int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
587	941	u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
588	942	{
589	943	struct kbase_csf_tiler_heap *heap;
	944	+ struct kbase_csf_tiler_heap_chunk *chunk;
590	945	int err = -EINVAL;
	946	+ u64 chunk_size = 0;
	947	+ u64 heap_id = 0;
	948	+
	949	+ /* To avoid potential locking issues during allocation, this is handled
	950	+ * in three phases:
	951	+ * 1. Take the lock, find the corresponding heap, and find its chunk size
	952	+ * (this is always 2 MB, but may change down the line).
	953	+ * 2. Allocate memory for the chunk and its region.
	954	+ * 3. If the heap still exists, link it to the end of the list. If it
	955	+ * doesn't, roll back the allocation.
	956	+ */
591	957
592	958	mutex_lock(&kctx->csf.tiler_heaps.lock);
	959	+ heap = find_tiler_heap(kctx, gpu_heap_va);
	960	+ if (likely(heap)) {
	961	+ chunk_size = heap->chunk_size;
	962	+ heap_id = heap->heap_id;
	963	+ } else {
	964	+ dev_err(kctx->kbdev->dev, "Heap 0x%llX does not exist", gpu_heap_va);
	965	+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
	966	+ goto prelink_failure;
	967	+ }
593	968
	969	+ err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
	970	+ if (unlikely(err)) {
	971	+ /* The allocation request can be legitimate, but be invoked on a heap
	972	+ * that has already reached the maximum pre-configured capacity. This
	973	+ * is useful debug information, but should not be treated as an error,
	974	+ * since the request will be re-sent at a later point.
	975	+ */
	976	+ dev_dbg(kctx->kbdev->dev,
	977	+ "Not allocating new chunk for heap 0x%llX due to current heap state (err %d)",
	978	+ gpu_heap_va, err);
	979	+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
	980	+ goto prelink_failure;
	981	+ }
	982	+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
	983	+ /* this heap must not be used whilst we have dropped the lock */
	984	+ heap = NULL;
	985	+
	986	+ chunk = alloc_new_chunk(kctx, chunk_size);
	987	+ if (unlikely(!chunk)) {
	988	+ dev_err(kctx->kbdev->dev, "Could not allocate chunk of size %lld for ctx %d_%d",
	989	+ chunk_size, kctx->tgid, kctx->id);
	990	+ goto prelink_failure;
	991	+ }
	992	+
	993	+ /* After this point, the heap that we were targeting could already have had the needed
	994	+ * chunks allocated, if we were handling multiple OoM events on multiple threads, so
	995	+ * we need to revalidate the need for the allocation.
	996	+ */
	997	+ mutex_lock(&kctx->csf.tiler_heaps.lock);
594	998	heap = find_tiler_heap(kctx, gpu_heap_va);
595	999
596		- if (likely(heap)) {
597		- err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count,
598		- new_chunk_ptr);
599		-
600		- KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
601		- kctx->kbdev, kctx->id, heap->heap_id,
602		- PFN_UP(heap->chunk_size * heap->max_chunks),
603		- PFN_UP(heap->chunk_size * heap->chunk_count),
604		- heap->max_chunks, heap->chunk_size, heap->chunk_count,
605		- heap->target_in_flight, nr_in_flight);
	1000	+ if (unlikely(!heap)) {
	1001	+ dev_err(kctx->kbdev->dev, "Tiler heap 0x%llX no longer exists!\n", gpu_heap_va);
	1002	+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
	1003	+ goto unroll_chunk;
606	1004	}
	1005	+
	1006	+ if (heap_id != heap->heap_id) {
	1007	+ dev_err(kctx->kbdev->dev,
	1008	+ "Tiler heap 0x%llX was removed from ctx %d_%d while allocating chunk of size %lld!",
	1009	+ gpu_heap_va, kctx->tgid, kctx->id, chunk_size);
	1010	+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
	1011	+ goto unroll_chunk;
	1012	+ }
	1013	+
	1014	+ if (WARN_ON(chunk_size != heap->chunk_size)) {
	1015	+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
	1016	+ goto unroll_chunk;
	1017	+ }
	1018	+
	1019	+ err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
	1020	+ if (unlikely(err)) {
	1021	+ dev_warn(
	1022	+ kctx->kbdev->dev,
	1023	+ "Aborting linking chunk to heap 0x%llX: heap state changed during allocation (err %d)",
	1024	+ gpu_heap_va, err);
	1025	+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
	1026	+ goto unroll_chunk;
	1027	+ }
	1028	+
	1029	+ err = init_chunk(heap, chunk, false);
	1030	+
	1031	+ /* On error, the chunk would not be linked, so we can still treat it as an unlinked
	1032	+ * chunk for error handling.
	1033	+ */
	1034	+ if (unlikely(err)) {
	1035	+ dev_err(kctx->kbdev->dev,
	1036	+ "Could not link chunk(0x%llX) with tiler heap 0%llX in ctx %d_%d due to error %d",
	1037	+ chunk->gpu_va, gpu_heap_va, kctx->tgid, kctx->id, err);
	1038	+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
	1039	+ goto unroll_chunk;
	1040	+ }
	1041	+
	1042	+ *new_chunk_ptr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va);
	1043	+
	1044	+ /* update total and peak tiler heap memory record */
	1045	+ kctx->running_total_tiler_heap_nr_chunks++;
	1046	+ kctx->running_total_tiler_heap_memory += heap->chunk_size;
	1047	+
	1048	+ if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
	1049	+ kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
	1050	+
	1051	+ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id,
	1052	+ PFN_UP(heap->chunk_size * heap->max_chunks),
	1053	+ PFN_UP(heap->chunk_size * heap->chunk_count),
	1054	+ heap->max_chunks, heap->chunk_size, heap->chunk_count,
	1055	+ heap->target_in_flight, nr_in_flight);
607	1056
608	1057	mutex_unlock(&kctx->csf.tiler_heaps.lock);
609	1058
610	1059	return err;
	1060	+unroll_chunk:
	1061	+ remove_unlinked_chunk(kctx, chunk);
	1062	+prelink_failure:
	1063	+ return err;
	1064	+}
	1065	+
	1066	+static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 chunk_gpu_va,
	1067	+ u64 *hdr_val)
	1068	+{
	1069	+ int err;
	1070	+ u64 *chunk_hdr;
	1071	+ struct kbase_context *kctx = heap->kctx;
	1072	+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
	1073	+
	1074	+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
	1075	+
	1076	+ chunk = find_chunk(heap, chunk_gpu_va);
	1077	+ if (unlikely(!chunk)) {
	1078	+ dev_warn(kctx->kbdev->dev,
	1079	+ "Failed to find tiler heap(0x%llX) chunk(0x%llX) for reclaim-delete\n",
	1080	+ heap->gpu_va, chunk_gpu_va);
	1081	+ return false;
	1082	+ }
	1083	+
	1084	+ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
	1085	+ "Cannot support CPU cached chunks without sync operations");
	1086	+ chunk_hdr = chunk->map.addr;
	1087	+ hdr_val = chunk_hdr;
	1088	+
	1089	+ dev_dbg(kctx->kbdev->dev,
	1090	+ "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n",
	1091	+ chunk_gpu_va, heap->gpu_va, *hdr_val);
	1092	+
	1093	+ err = kbase_mem_shrink_gpu_mapping(kctx, chunk->region, 0, chunk->region->gpu_alloc->nents);
	1094	+ if (unlikely(err)) {
	1095	+ dev_warn(
	1096	+ kctx->kbdev->dev,
	1097	+ "Reclaim: shrinking GPU mapping failed on chunk(0x%llx) in heap(0x%llx) (err %d)\n",
	1098	+ chunk_gpu_va, heap->gpu_va, err);
	1099	+
	1100	+ /* Cannot free the pages whilst references on the GPU remain, so keep the chunk on
	1101	+ * the heap's chunk list and try a different heap.
	1102	+ */
	1103	+
	1104	+ return false;
	1105	+ }
	1106	+ /* Destroy the mapping before the physical pages which are mapped are destroyed. */
	1107	+ kbase_vunmap(kctx, &chunk->map);
	1108	+
	1109	+ err = kbase_free_phy_pages_helper(chunk->region->gpu_alloc,
	1110	+ chunk->region->gpu_alloc->nents);
	1111	+ if (unlikely(err)) {
	1112	+ dev_warn(
	1113	+ kctx->kbdev->dev,
	1114	+ "Reclaim: remove physical backing failed on chunk(0x%llx) in heap(0x%llx) (err %d), continuing with deferred removal\n",
	1115	+ chunk_gpu_va, heap->gpu_va, err);
	1116	+
	1117	+ /* kbase_free_phy_pages_helper() should only fail on invalid input, and WARNs
	1118	+ * anyway, so continue instead of returning early.
	1119	+ *
	1120	+ * Indeed, we don't want to leave the chunk on the heap's chunk list whilst it has
	1121	+ * its mapping removed, as that could lead to problems. It's safest to instead
	1122	+ * continue with deferred destruction of the chunk.
	1123	+ */
	1124	+ }
	1125	+
	1126	+ dev_dbg(kctx->kbdev->dev,
	1127	+ "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n",
	1128	+ chunk_gpu_va, heap->gpu_va, *hdr_val);
	1129	+
	1130	+ mutex_lock(&heap->kctx->jit_evict_lock);
	1131	+ list_move(&chunk->region->jit_node, &kctx->jit_destroy_head);
	1132	+ mutex_unlock(&heap->kctx->jit_evict_lock);
	1133	+
	1134	+ list_del(&chunk->link);
	1135	+ heap->chunk_count--;
	1136	+ kfree(chunk);
	1137	+
	1138	+ return true;
	1139	+}
	1140	+
	1141	+static void sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap,
	1142	+ struct kbase_csf_gpu_buffer_heap *desc)
	1143	+{
	1144	+ u64 first_hoarded_chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK;
	1145	+
	1146	+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
	1147	+
	1148	+ if (first_hoarded_chunk_gpu_va) {
	1149	+ struct kbase_csf_tiler_heap_chunk *chunk =
	1150	+ find_chunk(heap, first_hoarded_chunk_gpu_va);
	1151	+
	1152	+ if (likely(chunk)) {
	1153	+ dev_dbg(heap->kctx->kbdev->dev,
	1154	+ "Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed\n",
	1155	+ heap->buf_desc_va);
	1156	+
	1157	+ heap->buf_desc_checked = true;
	1158	+ return;
	1159	+ }
	1160	+ }
	1161	+ /* If there is no match, defer the check to next time */
	1162	+ dev_dbg(heap->kctx->kbdev->dev, "Buffer descriptor 0x%llX runtime sanity check deferred\n",
	1163	+ heap->buf_desc_va);
	1164	+}
	1165	+
	1166	+static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap heap, u64 chunk_gpu_va_ptr)
	1167	+{
	1168	+ struct kbase_context *kctx = heap->kctx;
	1169	+
	1170	+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
	1171	+
	1172	+ /* Initialize the descriptor pointer value to 0 */
	1173	+ *chunk_gpu_va_ptr = 0;
	1174	+
	1175	+ /* The BufferDescriptor on heap is a hint on creation, do a sanity check at runtime */
	1176	+ if (heap->buf_desc_reg && !heap->buf_desc_checked) {
	1177	+ struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
	1178	+
	1179	+ /* BufferDescriptor is supplied by userspace, so could be CPU-cached */
	1180	+ if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
	1181	+ kbase_sync_mem_regions(kctx, &heap->buf_desc_map, KBASE_SYNC_TO_CPU);
	1182	+
	1183	+ sanity_check_gpu_buffer_heap(heap, desc);
	1184	+ if (heap->buf_desc_checked)
	1185	+ *chunk_gpu_va_ptr = desc->pointer & CHUNK_ADDR_MASK;
	1186	+ }
	1187	+
	1188	+ return heap->buf_desc_checked;
	1189	+}
	1190	+
	1191	+static u32 delete_hoarded_chunks(struct kbase_csf_tiler_heap *heap)
	1192	+{
	1193	+ u32 freed = 0;
	1194	+ u64 chunk_gpu_va = 0;
	1195	+ struct kbase_context *kctx = heap->kctx;
	1196	+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
	1197	+
	1198	+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
	1199	+
	1200	+ if (can_read_hw_gpu_buffer_heap(heap, &chunk_gpu_va)) {
	1201	+ u64 chunk_hdr_val;
	1202	+ u64 *hw_hdr;
	1203	+
	1204	+ if (!chunk_gpu_va) {
	1205	+ struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
	1206	+
	1207	+ /* BufferDescriptor is supplied by userspace, so could be CPU-cached */
	1208	+ if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
	1209	+ kbase_sync_mem_regions(kctx, &heap->buf_desc_map,
	1210	+ KBASE_SYNC_TO_CPU);
	1211	+ chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK;
	1212	+
	1213	+ if (!chunk_gpu_va) {
	1214	+ dev_dbg(kctx->kbdev->dev,
	1215	+ "Buffer descriptor 0x%llX has no chunks (NULL) for reclaim scan\n",
	1216	+ heap->buf_desc_va);
	1217	+ goto out;
	1218	+ }
	1219	+ }
	1220	+
	1221	+ chunk = find_chunk(heap, chunk_gpu_va);
	1222	+ if (unlikely(!chunk))
	1223	+ goto out;
	1224	+
	1225	+ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
	1226	+ "Cannot support CPU cached chunks without sync operations");
	1227	+ hw_hdr = chunk->map.addr;
	1228	+
	1229	+ /* Move onto the next chunk relevant information */
	1230	+ chunk_hdr_val = *hw_hdr;
	1231	+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
	1232	+
	1233	+ while (chunk_gpu_va && heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
	1234	+ bool success =
	1235	+ delete_chunk_physical_pages(heap, chunk_gpu_va, &chunk_hdr_val);
	1236	+
	1237	+ if (!success)
	1238	+ break;
	1239	+
	1240	+ freed++;
	1241	+ /* On success, chunk_hdr_val is updated, extract the next chunk address */
	1242	+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
	1243	+ }
	1244	+
	1245	+ /* Update the existing hardware chunk header, after reclaim deletion of chunks */
	1246	+ *hw_hdr = chunk_hdr_val;
	1247	+
	1248	+ dev_dbg(heap->kctx->kbdev->dev,
	1249	+ "HW reclaim scan freed chunks: %u, set hw_hdr[0]: 0x%llX\n", freed,
	1250	+ chunk_hdr_val);
	1251	+ } else {
	1252	+ dev_dbg(kctx->kbdev->dev,
	1253	+ "Skip HW reclaim scan, (disabled: buffer descriptor 0x%llX)\n",
	1254	+ heap->buf_desc_va);
	1255	+ }
	1256	+out:
	1257	+ return freed;
	1258	+}
	1259	+
	1260	+static u64 delete_unused_chunk_pages(struct kbase_csf_tiler_heap *heap)
	1261	+{
	1262	+ u32 freed_chunks = 0;
	1263	+ u64 freed_pages = 0;
	1264	+ u64 chunk_gpu_va;
	1265	+ u64 chunk_hdr_val;
	1266	+ struct kbase_context *kctx = heap->kctx;
	1267	+ u64 *ctx_ptr;
	1268	+
	1269	+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
	1270	+
	1271	+ WARN(heap->gpu_va_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED,
	1272	+ "Cannot support CPU cached heap context without sync operations");
	1273	+
	1274	+ ctx_ptr = heap->gpu_va_map.addr;
	1275	+
	1276	+ /* Extract the first chunk address from the context's free_list_head */
	1277	+ chunk_hdr_val = *ctx_ptr;
	1278	+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
	1279	+
	1280	+ while (chunk_gpu_va) {
	1281	+ u64 hdr_val;
	1282	+ bool success = delete_chunk_physical_pages(heap, chunk_gpu_va, &hdr_val);
	1283	+
	1284	+ if (!success)
	1285	+ break;
	1286	+
	1287	+ freed_chunks++;
	1288	+ chunk_hdr_val = hdr_val;
	1289	+ /* extract the next chunk address */
	1290	+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
	1291	+ }
	1292	+
	1293	+ /* Update the post-scan deletion to context header */
	1294	+ *ctx_ptr = chunk_hdr_val;
	1295	+
	1296	+ /* Try to scan the HW hoarded list of unused chunks */
	1297	+ freed_chunks += delete_hoarded_chunks(heap);
	1298	+ freed_pages = freed_chunks * PFN_UP(heap->chunk_size);
	1299	+ dev_dbg(heap->kctx->kbdev->dev,
	1300	+ "Scan reclaim freed chunks/pages %u/%llu, set heap-ctx_u64[0]: 0x%llX\n",
	1301	+ freed_chunks, freed_pages, chunk_hdr_val);
	1302	+
	1303	+ /* Update context tiler heaps memory usage */
	1304	+ kctx->running_total_tiler_heap_memory -= freed_pages << PAGE_SHIFT;
	1305	+ kctx->running_total_tiler_heap_nr_chunks -= freed_chunks;
	1306	+ return freed_pages;
	1307	+}
	1308	+
	1309	+u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free)
	1310	+{
	1311	+ u64 freed = 0;
	1312	+ struct kbase_csf_tiler_heap *heap;
	1313	+
	1314	+ mutex_lock(&kctx->csf.tiler_heaps.lock);
	1315	+
	1316	+ list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) {
	1317	+ freed += delete_unused_chunk_pages(heap);
	1318	+
	1319	+ /* If freed enough, then stop here */
	1320	+ if (freed >= to_free)
	1321	+ break;
	1322	+ }
	1323	+
	1324	+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
	1325	+ /* The scan is surely not more than 4-G pages, but for logic flow limit it */
	1326	+ if (WARN_ON(unlikely(freed > U32_MAX)))
	1327	+ return U32_MAX;
	1328	+ else
	1329	+ return (u32)freed;
	1330	+}
	1331	+
	1332	+static u64 count_unused_heap_pages(struct kbase_csf_tiler_heap *heap)
	1333	+{
	1334	+ u32 chunk_cnt = 0;
	1335	+ u64 page_cnt = 0;
	1336	+
	1337	+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
	1338	+
	1339	+ /* Here the count is basically an informed estimate, avoiding the costly mapping/unmaping
	1340	+ * in the chunk list walk. The downside is that the number is a less reliable guide for
	1341	+ * later on scan (free) calls on this heap for what actually is freeable.
	1342	+ */
	1343	+ if (heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
	1344	+ chunk_cnt = heap->chunk_count - HEAP_SHRINK_STOP_LIMIT;
	1345	+ page_cnt = chunk_cnt * PFN_UP(heap->chunk_size);
	1346	+ }
	1347	+
	1348	+ dev_dbg(heap->kctx->kbdev->dev,
	1349	+ "Reclaim count chunks/pages %u/%llu (estimated), heap_va: 0x%llX\n", chunk_cnt,
	1350	+ page_cnt, heap->gpu_va);
	1351	+
	1352	+ return page_cnt;
	1353	+}
	1354	+
	1355	+u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx)
	1356	+{
	1357	+ u64 page_cnt = 0;
	1358	+ struct kbase_csf_tiler_heap *heap;
	1359	+
	1360	+ mutex_lock(&kctx->csf.tiler_heaps.lock);
	1361	+
	1362	+ list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link)
	1363	+ page_cnt += count_unused_heap_pages(heap);
	1364	+
	1365	+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
	1366	+
	1367	+ /* The count is surely not more than 4-G pages, but for logic flow limit it */
	1368	+ if (WARN_ON(unlikely(page_cnt > U32_MAX)))
	1369	+ return U32_MAX;
	1370	+ else
	1371	+ return (u32)page_cnt;
611	1372	}