From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB

---
 kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c | 1239 +++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 1,000 insertions(+), 239 deletions(-)

diff --git a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
index b31c82e..8072a8b 100644
--- a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
+++ b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,15 +25,35 @@
 #include "mali_kbase_csf_tiler_heap_def.h"
 #include "mali_kbase_csf_heap_context_alloc.h"
 
+/* Tiler heap shrink stop limit for maintaining a minimum number of chunks */
+#define HEAP_SHRINK_STOP_LIMIT (1)
+
+/**
+ * struct kbase_csf_gpu_buffer_heap - A gpu buffer object specific to tiler heap
+ *
+ * @cdsbp_0:       Descriptor_type and buffer_type
+ * @size:          The size of the current heap chunk
+ * @pointer:       Pointer to the current heap chunk
+ * @low_pointer:   Pointer to low end of current heap chunk
+ * @high_pointer:  Pointer to high end of current heap chunk
+ */
+struct kbase_csf_gpu_buffer_heap {
+	u32 cdsbp_0;
+	u32 size;
+	u64 pointer;
+	u64 low_pointer;
+	u64 high_pointer;
+} __packed;
+
 /**
  * encode_chunk_ptr - Encode the address and size of a chunk as an integer.
+ *
+ * @chunk_size: Size of a tiler heap chunk, in bytes.
+ * @chunk_addr: GPU virtual address of the same tiler heap chunk.
  *
  * The size and address of the next chunk in a list are packed into a single
  * 64-bit value for storage in a chunk's header. This function returns that
  * value.
- *
- * @chunk_size: Size of a tiler heap chunk, in bytes.
- * @chunk_addr: GPU virtual address of the same tiler heap chunk.
  *
  * Return: Next chunk pointer suitable for writing into a chunk header.
  */
@@ -66,8 +86,6 @@
 static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
 	struct kbase_csf_tiler_heap *const heap)
 {
-	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
-
 	if (list_empty(&heap->chunks_list))
 		return NULL;
 
@@ -76,15 +94,44 @@
 }
 
 /**
+ * remove_external_chunk_mappings - Remove external mappings from a chunk that
+ *                                  is being transitioned to the tiler heap
+ *                                  memory system.
+ *
+ * @kctx:  kbase context the chunk belongs to.
+ * @chunk: The chunk whose external mappings are going to be removed.
+ *
+ * This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates
+ * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other
+ * parts of kbase outside of tiler heap management should not take references on its physical
+ * pages, and should not modify them.
+ */
+static void remove_external_chunk_mappings(struct kbase_context *const kctx,
+					   struct kbase_csf_tiler_heap_chunk *chunk)
+{
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (chunk->region->cpu_alloc != NULL) {
+		kbase_mem_shrink_cpu_mapping(kctx, chunk->region, 0,
+					     chunk->region->cpu_alloc->nents);
+	}
+#if !defined(CONFIG_MALI_VECTOR_DUMP)
+	chunk->region->flags |= KBASE_REG_DONT_NEED;
+#endif
+
+	dev_dbg(kctx->kbdev->dev, "Removed external mappings from chunk 0x%llX", chunk->gpu_va);
+}
+
+/**
  * link_chunk - Link a chunk into a tiler heap
+ *
+ * @heap:  Pointer to the tiler heap.
+ * @chunk: Pointer to the heap chunk to be linked.
  *
  * Unless the @chunk is the first in the kernel's list of chunks belonging to
  * a given tiler heap, this function stores the size and address of the @chunk
  * in the header of the preceding chunk. This requires the GPU memory region
- * containing the header to be be mapped temporarily, which can fail.
- *
- * @heap:  Pointer to the tiler heap.
- * @chunk: Pointer to the heap chunk to be linked.
+ * containing the header to be mapped temporarily, which can fail.
  *
  * Return: 0 if successful or a negative error code on failure.
  */
@@ -95,19 +142,12 @@
 
 	if (prev) {
 		struct kbase_context *const kctx = heap->kctx;
-		struct kbase_vmap_struct map;
-		u64 *const prev_hdr = kbase_vmap_prot(kctx, prev->gpu_va,
-			sizeof(*prev_hdr), KBASE_REG_CPU_WR, &map);
+		u64 *prev_hdr = prev->map.addr;
 
-		if (unlikely(!prev_hdr)) {
-			dev_err(kctx->kbdev->dev,
-				"Failed to map tiler heap chunk 0x%llX\n",
-				prev->gpu_va);
-			return -ENOMEM;
-		}
+		WARN((prev->region->flags & KBASE_REG_CPU_CACHED),
+		     "Cannot support CPU cached chunks without sync operations");
 
 		*prev_hdr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va);
-		kbase_vunmap(kctx, &map);
 
 		dev_dbg(kctx->kbdev->dev,
 			"Linked tiler heap chunks, 0x%llX -> 0x%llX\n",
@@ -120,23 +160,25 @@
 /**
  * init_chunk - Initialize and link a tiler heap chunk
  *
- * Zero-initialize a new chunk's header (including its pointer to the next
- * chunk, which doesn't exist yet) and then update the previous chunk's
- * header to link the new chunk into the chunk list.
- *
  * @heap:  Pointer to the tiler heap.
  * @chunk: Pointer to the heap chunk to be initialized and linked.
  * @link_with_prev: Flag to indicate if the new chunk needs to be linked with
  *                  the previously allocated chunk.
+ *
+ * Zero-initialize a new chunk's header (including its pointer to the next
+ * chunk, which doesn't exist yet) and then update the previous chunk's
+ * header to link the new chunk into the chunk list.
  *
  * Return: 0 if successful or a negative error code on failure.
  */
 static int init_chunk(struct kbase_csf_tiler_heap *const heap,
 	struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev)
 {
-	struct kbase_vmap_struct map;
-	struct u64 *chunk_hdr = NULL;
+	int err = 0;
+	u64 *chunk_hdr;
 	struct kbase_context *const kctx = heap->kctx;
+
+	lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
 
 	if (unlikely(chunk->gpu_va & ~CHUNK_ADDR_MASK)) {
 		dev_err(kctx->kbdev->dev,
@@ -144,155 +186,283 @@
 		return -EINVAL;
 	}
 
-	chunk_hdr = kbase_vmap_prot(kctx,
-		chunk->gpu_va, CHUNK_HDR_SIZE, KBASE_REG_CPU_WR, &map);
-
-	if (unlikely(!chunk_hdr)) {
-		dev_err(kctx->kbdev->dev,
-			"Failed to map a tiler heap chunk header\n");
-		return -ENOMEM;
+	WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
+	     "Cannot support CPU cached chunks without sync operations");
+	chunk_hdr = chunk->map.addr;
+	if (WARN(chunk->map.size < CHUNK_HDR_SIZE,
+		 "Tiler chunk kernel mapping was not large enough for zero-init")) {
+		return -EINVAL;
 	}
 
 	memset(chunk_hdr, 0, CHUNK_HDR_SIZE);
-	kbase_vunmap(kctx, &map);
+	INIT_LIST_HEAD(&chunk->link);
 
 	if (link_with_prev)
-		return link_chunk(heap, chunk);
-	else
-		return 0;
-}
-
-/**
- * create_chunk - Create a tiler heap chunk
- *
- * This function allocates a chunk of memory for a tiler heap and adds it to
- * the end of the list of chunks associated with that heap. The size of the
- * chunk is not a parameter because it is configured per-heap not per-chunk.
- *
- * @heap: Pointer to the tiler heap for which to allocate memory.
- * @link_with_prev: Flag to indicate if the chunk to be allocated needs to be
- *                  linked with the previously allocated chunk.
- *
- * Return: 0 if successful or a negative error code on failure.
- */
-static int create_chunk(struct kbase_csf_tiler_heap *const heap,
-		bool link_with_prev)
-{
-	int err = 0;
-	struct kbase_context *const kctx = heap->kctx;
-	u64 nr_pages = PFN_UP(heap->chunk_size);
-	u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
-		BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE |
-		BASE_MEM_COHERENT_LOCAL;
-	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
-
-	flags |= base_mem_group_id_set(kctx->jit_group_id);
-
-#if defined(CONFIG_MALI_BIFROST_DEBUG) || defined(CONFIG_MALI_VECTOR_DUMP)
-	flags |= BASE_MEM_PROT_CPU_RD;
-#endif
-
-	lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
-
-	chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
-	if (unlikely(!chunk)) {
-		dev_err(kctx->kbdev->dev,
-			"No kernel memory for a new tiler heap chunk\n");
-		return -ENOMEM;
-	}
-
-	/* Allocate GPU memory for the new chunk. */
-	INIT_LIST_HEAD(&chunk->link);
-	chunk->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0,
-		&flags, &chunk->gpu_va);
-
-	if (unlikely(!chunk->region)) {
-		dev_err(kctx->kbdev->dev,
-			"Failed to allocate a tiler heap chunk\n");
-		err = -ENOMEM;
-	} else {
-		err = init_chunk(heap, chunk, link_with_prev);
-		if (unlikely(err)) {
-			kbase_gpu_vm_lock(kctx);
-			chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
-			kbase_mem_free_region(kctx, chunk->region);
-			kbase_gpu_vm_unlock(kctx);
-		}
-	}
+		err = link_chunk(heap, chunk);
 
 	if (unlikely(err)) {
-		kfree(chunk);
-	} else {
-		list_add_tail(&chunk->link, &heap->chunks_list);
-		heap->chunk_count++;
-
-		dev_dbg(kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n",
-			chunk->gpu_va);
+		dev_err(kctx->kbdev->dev, "Failed to link a chunk to a tiler heap\n");
+		return -EINVAL;
 	}
+
+	list_add_tail(&chunk->link, &heap->chunks_list);
+	heap->chunk_count++;
 
 	return err;
 }
 
 /**
- * delete_chunk - Delete a tiler heap chunk
+ * remove_unlinked_chunk - Remove a chunk that is not currently linked into a
+ *                         heap.
  *
- * This function frees a tiler heap chunk previously allocated by @create_chunk
- * and removes it from the list of chunks associated with the heap.
- *
- * WARNING: The deleted chunk is not unlinked from the list of chunks used by
- *          the GPU, therefore it is only safe to use this function when
- *          deleting a heap.
- *
- * @heap:  Pointer to the tiler heap for which @chunk was allocated.
- * @chunk: Pointer to a chunk to be deleted.
+ * @kctx:  Kbase context that was used to allocate the memory.
+ * @chunk: Chunk that has been allocated, but not linked into a heap.
  */
-static void delete_chunk(struct kbase_csf_tiler_heap *const heap,
-	struct kbase_csf_tiler_heap_chunk *const chunk)
+static void remove_unlinked_chunk(struct kbase_context *kctx,
+				  struct kbase_csf_tiler_heap_chunk *chunk)
 {
-	struct kbase_context *const kctx = heap->kctx;
-
-	lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
+	if (WARN_ON(!list_empty(&chunk->link)))
+		return;
 
 	kbase_gpu_vm_lock(kctx);
-	chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
+	kbase_vunmap(kctx, &chunk->map);
+	/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
+	 * regions), and so we must clear that flag too before freeing.
+	 * For "no user free count", we check that the count is 1 as it is a shrinkable region;
+	 * no other code part within kbase can take a reference to it.
+	 */
+	WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1);
+	kbase_va_region_no_user_free_dec(chunk->region);
+#if !defined(CONFIG_MALI_VECTOR_DUMP)
+	chunk->region->flags &= ~KBASE_REG_DONT_NEED;
+#endif
 	kbase_mem_free_region(kctx, chunk->region);
 	kbase_gpu_vm_unlock(kctx);
-	list_del(&chunk->link);
-	heap->chunk_count--;
+
 	kfree(chunk);
 }
 
 /**
- * delete_all_chunks - Delete all chunks belonging to a tiler heap
+ * alloc_new_chunk - Allocate new chunk metadata for the tiler heap, reserve a fully backed VA
+ *                   region for the chunk, and provide a kernel mapping.
+ * @kctx:       kbase context with which the chunk will be linked
+ * @chunk_size: the size of the chunk from the corresponding heap
  *
- * This function empties the list of chunks associated with a tiler heap by
- * freeing all chunks previously allocated by @create_chunk.
+ * Allocate the chunk tracking metadata and a corresponding fully backed VA region for the
+ * chunk. The kernel may need to invoke the reclaim path while trying to fulfill the allocation, so
+ * we cannot hold any lock that would be held in the shrinker paths (JIT evict lock or tiler heap
+ * lock).
+ *
+ * Since the chunk may have its physical backing removed, to prevent use-after-free scenarios we
+ * ensure that it is protected from being mapped by other parts of kbase.
+ *
+ * The chunk's GPU memory can be accessed via its 'map' member, but should only be done so by the
+ * shrinker path, as it may be otherwise shrunk at any time.
+ *
+ * Return: pointer to kbase_csf_tiler_heap_chunk on success or a NULL pointer
+ *         on failure
+ */
+static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *kctx,
+							  u64 chunk_size)
+{
+	u64 nr_pages = PFN_UP(chunk_size);
+	u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
+		    BASEP_MEM_NO_USER_FREE | BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD;
+	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
+	/* The chunk kernel mapping needs to be large enough to:
+	 * - initially zero the CHUNK_HDR_SIZE area
+	 * - on shrinking, access the NEXT_CHUNK_ADDR_SIZE area
+	 */
+	const size_t chunk_kernel_map_size = max(CHUNK_HDR_SIZE, NEXT_CHUNK_ADDR_SIZE);
+
+	/* Calls to this function are inherently synchronous, with respect to
+	 * MMU operations.
+	 */
+	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
+	flags |= kbase_mem_group_id_set(kctx->jit_group_id);
+
+	chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
+	if (unlikely(!chunk)) {
+		dev_err(kctx->kbdev->dev,
+			"No kernel memory for a new tiler heap chunk\n");
+		return NULL;
+	}
+
+	/* Allocate GPU memory for the new chunk. */
+	chunk->region =
+		kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info);
+
+	if (unlikely(!chunk->region)) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate a tiler heap chunk!\n");
+		goto unroll_chunk;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Some checks done here as NO_USER_FREE still allows such things to be made
+	 * whilst we had dropped the region lock
+	 */
+	if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) {
+		dev_err(kctx->kbdev->dev, "Chunk region has active kernel mappings!\n");
+		goto unroll_region;
+	}
+
+	/* There is a race condition with regard to KBASE_REG_DONT_NEED, where another
+	 * thread can have the "no user free" refcount increased between kbase_mem_alloc
+	 * and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by
+	 * remove_external_chunk_mappings (below).
+	 *
+	 * It should be fine and not a security risk if we let the region leak till
+	 * region tracker termination in such a case.
+	 */
+	if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) {
+		dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n");
+		goto unroll_region;
+	}
+
+	/* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE
+	 * being requested, it's useful to document in code what those restrictions are, and ensure
+	 * they remain in place in future.
+	 */
+	if (WARN(!chunk->region->gpu_alloc,
+		 "NO_USER_FREE chunks should not have had their alloc freed")) {
+		goto unroll_region;
+	}
+
+	if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE,
+		 "NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) {
+		goto unroll_region;
+	}
+
+	if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC),
+		 "NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) {
+		goto unroll_region;
+	}
+
+	if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED),
+		 "NO_USER_FREE chunks should not have been made ephemeral")) {
+		goto unroll_region;
+	}
+
+	if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1,
+		 "NO_USER_FREE chunks should not have been aliased")) {
+		goto unroll_region;
+	}
+
+	if (unlikely(!kbase_vmap_reg(kctx, chunk->region, chunk->gpu_va, chunk_kernel_map_size,
+				     (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &chunk->map,
+				     KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING))) {
+		dev_err(kctx->kbdev->dev, "Failed to map chunk header for shrinking!\n");
+		goto unroll_region;
+	}
+
+	remove_external_chunk_mappings(kctx, chunk);
+	kbase_gpu_vm_unlock(kctx);
+
+	/* If page migration is enabled, we don't want to migrate tiler heap pages.
+	 * This does not change if the constituent pages are already marked as isolated.
+	 */
+	if (kbase_page_migration_enabled)
+		kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE);
+
+	return chunk;
+
+unroll_region:
+	/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
+	 * regions), and so we must clear that flag too before freeing.
+	 */
+	kbase_va_region_no_user_free_dec(chunk->region);
+#if !defined(CONFIG_MALI_VECTOR_DUMP)
+	chunk->region->flags &= ~KBASE_REG_DONT_NEED;
+#endif
+	kbase_mem_free_region(kctx, chunk->region);
+	kbase_gpu_vm_unlock(kctx);
+unroll_chunk:
+	kfree(chunk);
+	return NULL;
+}
+
+/**
+ * create_chunk - Create a tiler heap chunk
+ *
+ * @heap: Pointer to the tiler heap for which to allocate memory.
+ *
+ * This function allocates a chunk of memory for a tiler heap, adds it to the
+ * the list of chunks associated with that heap both on the host side and in GPU
+ * memory.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+static int create_chunk(struct kbase_csf_tiler_heap *const heap)
+{
+	int err = 0;
+	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
+
+	chunk = alloc_new_chunk(heap->kctx, heap->chunk_size);
+	if (unlikely(!chunk)) {
+		err = -ENOMEM;
+		goto allocation_failure;
+	}
+
+	mutex_lock(&heap->kctx->csf.tiler_heaps.lock);
+	err = init_chunk(heap, chunk, true);
+	mutex_unlock(&heap->kctx->csf.tiler_heaps.lock);
+
+	if (unlikely(err))
+		goto initialization_failure;
+
+	dev_dbg(heap->kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", chunk->gpu_va);
+
+	return 0;
+initialization_failure:
+	remove_unlinked_chunk(heap->kctx, chunk);
+allocation_failure:
+	return err;
+}
+
+/**
+ * delete_all_chunks - Delete all chunks belonging to an unlinked tiler heap
  *
  * @heap: Pointer to a tiler heap.
+ *
+ * This function empties the list of chunks associated with a tiler heap by freeing all chunks
+ * previously allocated by @create_chunk.
+ *
+ * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the
+ * tiler_heaps lock cannot be held whilst deleting its chunks due to also needing the &struct
+ * kbase_context.region_lock.
+ *
+ * WARNING: Whilst the deleted chunks are unlinked from host memory, they are not unlinked from the
+ *          list of chunks used by the GPU, therefore it is only safe to use this function when
+ *          deleting a heap.
  */
 static void delete_all_chunks(struct kbase_csf_tiler_heap *heap)
 {
-	struct list_head *entry = NULL, *tmp = NULL;
 	struct kbase_context *const kctx = heap->kctx;
+	struct list_head *entry = NULL, *tmp = NULL;
 
-	lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
+	WARN(!list_empty(&heap->link),
+	     "Deleting a heap's chunks when that heap is still linked requires the tiler_heaps lock, which cannot be held by the caller");
 
 	list_for_each_safe(entry, tmp, &heap->chunks_list) {
 		struct kbase_csf_tiler_heap_chunk *chunk = list_entry(
 			entry, struct kbase_csf_tiler_heap_chunk, link);
 
-		delete_chunk(heap, chunk);
+		list_del_init(&chunk->link);
+		heap->chunk_count--;
+
+		remove_unlinked_chunk(kctx, chunk);
 	}
 }
 
 /**
  * create_initial_chunks - Create the initial list of chunks for a tiler heap
  *
- * This function allocates a given number of chunks for a tiler heap and
- * adds them to the list of chunks associated with that heap.
- *
  * @heap:    Pointer to the tiler heap for which to allocate memory.
  * @nchunks: Number of chunks to create.
+ *
+ * This function allocates a given number of chunks for a tiler heap and
+ * adds them to the list of chunks associated with that heap.
  *
  * Return: 0 if successful or a negative error code on failure.
  */
@@ -303,7 +473,7 @@
 	u32 i;
 
 	for (i = 0; (i < nchunks) && likely(!err); i++)
-		err = create_chunk(heap, true);
+		err = create_chunk(heap);
 
 	if (unlikely(err))
 		delete_all_chunks(heap);
@@ -312,14 +482,17 @@
 }
 
 /**
- * delete_heap - Delete a tiler heap
- *
- * This function frees any chunks allocated for a tiler heap previously
- * initialized by @kbase_csf_tiler_heap_init and removes it from the list of
- * heaps associated with the kbase context. The heap context structure used by
- * the firmware is also freed.
+ * delete_heap - Delete an unlinked tiler heap
  *
  * @heap: Pointer to a tiler heap to be deleted.
+ *
+ * This function frees any chunks allocated for a tiler heap previously
+ * initialized by @kbase_csf_tiler_heap_init. The heap context structure used by
+ * the firmware is also freed.
+ *
+ * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the
+ * tiler_heaps lock cannot be held whilst deleting it due to also needing the &struct
+ * kbase_context.region_lock.
  */
 static void delete_heap(struct kbase_csf_tiler_heap *heap)
 {
@@ -327,22 +500,40 @@
 
 	dev_dbg(kctx->kbdev->dev, "Deleting tiler heap 0x%llX\n", heap->gpu_va);
 
-	lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
+	WARN(!list_empty(&heap->link),
+	     "Deleting a heap that is still linked requires the tiler_heaps lock, which cannot be held by the caller");
 
+	/* Make sure that all of the VA regions corresponding to the chunks are
+	 * freed at this time and that the work queue is not trying to access freed
+	 * memory.
+	 *
+	 * Note: since the heap is unlinked, and that no references are made to chunks other
+	 * than from their heap, there is no need to separately move the chunks out of the
+	 * heap->chunks_list to delete them.
+	 */
 	delete_all_chunks(heap);
 
+	kbase_vunmap(kctx, &heap->gpu_va_map);
 	/* We could optimize context destruction by not freeing leaked heap
-	 * contexts but it doesn't seem worth the extra complexity.
+	 * contexts but it doesn't seem worth the extra complexity. After this
+	 * point, the suballocation is returned to the heap context allocator and
+	 * may be overwritten with new data, meaning heap->gpu_va should not
+	 * be used past this point.
 	 */
 	kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc,
 		heap->gpu_va);
-
-	list_del(&heap->link);
 
 	WARN_ON(heap->chunk_count);
 	KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id,
 		heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0,
 		heap->target_in_flight, 0);
+
+	if (heap->buf_desc_reg) {
+		kbase_vunmap(kctx, &heap->buf_desc_map);
+		kbase_gpu_vm_lock(kctx);
+		kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
+		kbase_gpu_vm_unlock(kctx);
+	}
 
 	kfree(heap);
 }
@@ -350,14 +541,14 @@
 /**
  * find_tiler_heap - Find a tiler heap from the address of its heap context
  *
+ * @kctx:        Pointer to the kbase context to search for a tiler heap.
+ * @heap_gpu_va: GPU virtual address of a heap context structure.
+ *
  * Each tiler heap managed by the kernel has an associated heap context
  * structure used by the firmware. This function finds a tiler heap object from
  * the GPU virtual address of its associated heap context. The heap context
  * should have been allocated by @kbase_csf_heap_context_allocator_alloc in the
  * same @kctx.
- *
- * @kctx:        Pointer to the kbase context to search for a tiler heap.
- * @heap_gpu_va: GPU virtual address of a heap context structure.
  *
  * Return: pointer to the tiler heap object, or NULL if not found.
  */
@@ -375,6 +566,23 @@
 
 	dev_dbg(kctx->kbdev->dev, "Tiler heap 0x%llX was not found\n",
 		heap_gpu_va);
+
+	return NULL;
+}
+
+static struct kbase_csf_tiler_heap_chunk *find_chunk(struct kbase_csf_tiler_heap *heap,
+						     u64 const chunk_gpu_va)
+{
+	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
+
+	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
+
+	list_for_each_entry(chunk, &heap->chunks_list, link) {
+		if (chunk->gpu_va == chunk_gpu_va)
+			return chunk;
+	}
+
+	dev_dbg(heap->kctx->kbdev->dev, "Tiler heap chunk 0x%llX was not found\n", chunk_gpu_va);
 
 	return NULL;
 }
@@ -397,37 +605,91 @@
 
 void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx)
 {
+	LIST_HEAD(local_heaps_list);
 	struct list_head *entry = NULL, *tmp = NULL;
 
 	dev_dbg(kctx->kbdev->dev, "Terminating a context for tiler heaps\n");
 
 	mutex_lock(&kctx->csf.tiler_heaps.lock);
+	list_splice_init(&kctx->csf.tiler_heaps.list, &local_heaps_list);
+	mutex_unlock(&kctx->csf.tiler_heaps.lock);
 
-	list_for_each_safe(entry, tmp, &kctx->csf.tiler_heaps.list) {
+	list_for_each_safe(entry, tmp, &local_heaps_list) {
 		struct kbase_csf_tiler_heap *heap = list_entry(
 			entry, struct kbase_csf_tiler_heap, link);
+
+		list_del_init(&heap->link);
 		delete_heap(heap);
 	}
 
-	mutex_unlock(&kctx->csf.tiler_heaps.lock);
 	mutex_destroy(&kctx->csf.tiler_heaps.lock);
 
 	kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc);
 }
 
-int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
-	u32 const chunk_size, u32 const initial_chunks, u32 const max_chunks,
-	u16 const target_in_flight, u64 *const heap_gpu_va,
-	u64 *const first_chunk_va)
+/**
+ * kbasep_is_buffer_descriptor_region_suitable - Check if a VA region chosen to house
+ *                                               the tiler heap buffer descriptor
+ *                                               is suitable for the purpose.
+ * @kctx: kbase context of the tiler heap
+ * @reg:  VA region being checked for suitability
+ *
+ * The tiler heap buffer descriptor memory does not admit page faults according
+ * to its design, so it must have the entirety of the backing upon allocation,
+ * and it has to remain alive as long as the tiler heap is alive, meaning it
+ * cannot be allocated from JIT/Ephemeral, or user freeable memory.
+ *
+ * Return: true on suitability, false otherwise.
+ */
+static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *const kctx,
+							struct kbase_va_region *const reg)
+{
+	if (kbase_is_region_invalid_or_free(reg)) {
+		dev_err(kctx->kbdev->dev, "Region is either invalid or free!\n");
+		return false;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) ||
+	    (reg->flags & KBASE_REG_PF_GROW)) {
+		dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags);
+		return false;
+	}
+
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+		dev_err(kctx->kbdev->dev, "Region has invalid type!\n");
+		return false;
+	}
+
+	if ((reg->nr_pages != kbase_reg_current_backed_size(reg)) ||
+	    (reg->nr_pages < PFN_UP(sizeof(struct kbase_csf_gpu_buffer_heap)))) {
+		dev_err(kctx->kbdev->dev, "Region has invalid backing!\n");
+		return false;
+	}
+
+	return true;
+}
+
+#define TILER_BUF_DESC_SIZE (sizeof(struct kbase_csf_gpu_buffer_heap))
+
+int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_size,
+			      u32 const initial_chunks, u32 const max_chunks,
+			      u16 const target_in_flight, u64 const buf_desc_va,
+			      u64 *const heap_gpu_va, u64 *const first_chunk_va)
 {
 	int err = 0;
 	struct kbase_csf_tiler_heap *heap = NULL;
 	struct kbase_csf_heap_context_allocator *const ctx_alloc =
 		&kctx->csf.tiler_heaps.ctx_alloc;
+	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
+	struct kbase_va_region *gpu_va_reg = NULL;
+	void *vmap_ptr = NULL;
 
 	dev_dbg(kctx->kbdev->dev,
-		"Creating a tiler heap with %u chunks (limit: %u) of size %u\n",
-		initial_chunks, max_chunks, chunk_size);
+		"Creating a tiler heap with %u chunks (limit: %u) of size %u, buf_desc_va: 0x%llx\n",
+		initial_chunks, max_chunks, chunk_size, buf_desc_va);
+
+	if (!kbase_mem_allow_alloc(kctx))
+		return -EINVAL;
 
 	if (chunk_size == 0)
 		return -EINVAL;
@@ -446,8 +708,7 @@
 
 	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
 	if (unlikely(!heap)) {
-		dev_err(kctx->kbdev->dev,
-			"No kernel memory for a new tiler heap\n");
+		dev_err(kctx->kbdev->dev, "No kernel memory for a new tiler heap");
 		return -ENOMEM;
 	}
 
@@ -455,51 +716,130 @@
 	heap->chunk_size = chunk_size;
 	heap->max_chunks = max_chunks;
 	heap->target_in_flight = target_in_flight;
+	heap->buf_desc_checked = false;
 	INIT_LIST_HEAD(&heap->chunks_list);
+	INIT_LIST_HEAD(&heap->link);
 
-	heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc);
+	/* Check on the buffer descriptor virtual Address */
+	if (buf_desc_va) {
+		struct kbase_va_region *buf_desc_reg;
 
-	mutex_lock(&kctx->csf.tiler_heaps.lock);
+		kbase_gpu_vm_lock(kctx);
+		buf_desc_reg =
+			kbase_region_tracker_find_region_enclosing_address(kctx, buf_desc_va);
 
-	if (unlikely(!heap->gpu_va)) {
-		dev_err(kctx->kbdev->dev,
-			"Failed to allocate a tiler heap context\n");
-		err = -ENOMEM;
-	} else {
-		err = create_initial_chunks(heap, initial_chunks);
-		if (unlikely(err)) {
-			kbase_csf_heap_context_allocator_free(ctx_alloc,
-				heap->gpu_va);
+		if (!kbasep_is_buffer_descriptor_region_suitable(kctx, buf_desc_reg)) {
+			kbase_gpu_vm_unlock(kctx);
+			dev_err(kctx->kbdev->dev,
+				"Could not find a suitable VA region for the tiler heap buf desc!\n");
+			err = -EINVAL;
+			goto buf_desc_not_suitable;
+		}
+
+		/* If we don't prevent userspace from unmapping this, we may run into
+		 * use-after-free, as we don't check for the existence of the region throughout.
+		 */
+
+		heap->buf_desc_va = buf_desc_va;
+		heap->buf_desc_reg = buf_desc_reg;
+		kbase_va_region_no_user_free_inc(buf_desc_reg);
+
+		vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE,
+					  KBASE_REG_CPU_RD, &heap->buf_desc_map,
+					  KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
+
+		if (kbase_page_migration_enabled)
+			kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE);
+
+		kbase_gpu_vm_unlock(kctx);
+
+		if (unlikely(!vmap_ptr)) {
+			dev_err(kctx->kbdev->dev,
+				"Could not vmap buffer descriptor into kernel memory (err %d)\n",
+				err);
+			err = -ENOMEM;
+			goto buf_desc_vmap_failed;
 		}
 	}
 
-	if (unlikely(err)) {
-		kfree(heap);
-	} else {
-		struct kbase_csf_tiler_heap_chunk const *first_chunk =
-			list_first_entry(&heap->chunks_list,
-				struct kbase_csf_tiler_heap_chunk, link);
-
-		kctx->csf.tiler_heaps.nr_of_heaps++;
-		heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps;
-		list_add(&heap->link, &kctx->csf.tiler_heaps.list);
-
-		*heap_gpu_va = heap->gpu_va;
-		*first_chunk_va = first_chunk->gpu_va;
-
-		KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
-			kctx->kbdev, kctx->id, heap->heap_id,
-			PFN_UP(heap->chunk_size * heap->max_chunks),
-			PFN_UP(heap->chunk_size * heap->chunk_count),
-			heap->max_chunks, heap->chunk_size, heap->chunk_count,
-			heap->target_in_flight, 0);
-
-		dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n",
-			heap->gpu_va);
+	heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc);
+	if (unlikely(!heap->gpu_va)) {
+		dev_dbg(kctx->kbdev->dev, "Failed to allocate a tiler heap context\n");
+		err = -ENOMEM;
+		goto heap_context_alloc_failed;
 	}
 
+	gpu_va_reg = ctx_alloc->region;
+
+	kbase_gpu_vm_lock(kctx);
+	/* gpu_va_reg was created with BASEP_MEM_NO_USER_FREE, the code to unset this only happens
+	 * on kctx termination (after all syscalls on kctx have finished), and so it is safe to
+	 * assume that gpu_va_reg is still present.
+	 */
+	vmap_ptr = kbase_vmap_reg(kctx, gpu_va_reg, heap->gpu_va, NEXT_CHUNK_ADDR_SIZE,
+				  (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &heap->gpu_va_map,
+				  KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
+	kbase_gpu_vm_unlock(kctx);
+	if (unlikely(!vmap_ptr)) {
+		dev_dbg(kctx->kbdev->dev, "Failed to vmap the correct heap GPU VA address\n");
+		err = -ENOMEM;
+		goto heap_context_vmap_failed;
+	}
+
+	err = create_initial_chunks(heap, initial_chunks);
+	if (unlikely(err)) {
+		dev_dbg(kctx->kbdev->dev, "Failed to create the initial tiler heap chunks\n");
+		goto create_chunks_failed;
+	}
+	chunk = list_first_entry(&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link);
+
+	*heap_gpu_va = heap->gpu_va;
+	*first_chunk_va = chunk->gpu_va;
+
+	mutex_lock(&kctx->csf.tiler_heaps.lock);
+	kctx->csf.tiler_heaps.nr_of_heaps++;
+	heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps;
+	list_add(&heap->link, &kctx->csf.tiler_heaps.list);
+
+	KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id,
+					    PFN_UP(heap->chunk_size * heap->max_chunks),
+					    PFN_UP(heap->chunk_size * heap->chunk_count),
+					    heap->max_chunks, heap->chunk_size, heap->chunk_count,
+					    heap->target_in_flight, 0);
+
+#if defined(CONFIG_MALI_VECTOR_DUMP)
+	list_for_each_entry(chunk, &heap->chunks_list, link) {
+		KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(kctx->kbdev, kctx->id, heap->heap_id,
+							 chunk->gpu_va);
+	}
+#endif
+	kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
+	kctx->running_total_tiler_heap_memory += (u64)heap->chunk_size * heap->chunk_count;
+	if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
+		kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
+
+	dev_dbg(kctx->kbdev->dev,
+		"Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", heap->gpu_va,
+		buf_desc_va, kctx->tgid, kctx->id);
 	mutex_unlock(&kctx->csf.tiler_heaps.lock);
 
+	return 0;
+
+create_chunks_failed:
+	kbase_vunmap(kctx, &heap->gpu_va_map);
+heap_context_vmap_failed:
+	kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va);
+heap_context_alloc_failed:
+	if (heap->buf_desc_reg)
+		kbase_vunmap(kctx, &heap->buf_desc_map);
+buf_desc_vmap_failed:
+	if (heap->buf_desc_reg) {
+		kbase_gpu_vm_lock(kctx);
+		kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
+		kbase_gpu_vm_unlock(kctx);
+	}
+buf_desc_not_suitable:
+	kfree(heap);
 	return err;
 }
 
@@ -508,47 +848,73 @@
 {
 	int err = 0;
 	struct kbase_csf_tiler_heap *heap = NULL;
+	u32 chunk_count = 0;
+	u64 heap_size = 0;
 
 	mutex_lock(&kctx->csf.tiler_heaps.lock);
-
 	heap = find_tiler_heap(kctx, heap_gpu_va);
+	if (likely(heap)) {
+		chunk_count = heap->chunk_count;
+		heap_size = heap->chunk_size * chunk_count;
+
+		list_del_init(&heap->link);
+	} else {
+		err = -EINVAL;
+	}
+
+	/* Update stats whilst still holding the lock so they are in sync with the tiler_heaps.list
+	 * at all times
+	 */
+	if (likely(kctx->running_total_tiler_heap_memory >= heap_size))
+		kctx->running_total_tiler_heap_memory -= heap_size;
+	else
+		dev_warn(kctx->kbdev->dev,
+			 "Running total tiler heap memory lower than expected!");
+	if (likely(kctx->running_total_tiler_heap_nr_chunks >= chunk_count))
+		kctx->running_total_tiler_heap_nr_chunks -= chunk_count;
+	else
+		dev_warn(kctx->kbdev->dev,
+			 "Running total tiler chunk count lower than expected!");
+	if (!err)
+		dev_dbg(kctx->kbdev->dev,
+			"Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n",
+			heap->gpu_va, heap->buf_desc_va, kctx->tgid, kctx->id);
+	mutex_unlock(&kctx->csf.tiler_heaps.lock);
+
+	/* Deletion requires the kctx->reg_lock, so must only operate on it whilst unlinked from
+	 * the kctx's csf.tiler_heaps.list, and without holding the csf.tiler_heaps.lock
+	 */
 	if (likely(heap))
 		delete_heap(heap);
-	else
-		err = -EINVAL;
-
-	mutex_unlock(&kctx->csf.tiler_heaps.lock);
 
 	return err;
 }
 
 /**
- * alloc_new_chunk - Allocate a new chunk for the tiler heap.
+ * validate_allocation_request - Check whether the chunk allocation request
+ *                               received on tiler OOM should be handled at
+ *                               current time.
  *
- * This function will allocate a new chunk for the chunked tiler heap depending
- * on the settings provided by userspace when the heap was created and the
- * heap's statistics (like number of render passes in-flight).
+ * @heap:               The tiler heap the OOM is associated with
+ * @nr_in_flight:       Number of fragment jobs in flight
+ * @pending_frag_count: Number of pending fragment jobs
  *
- * @heap:               Pointer to the tiler heap.
- * @nr_in_flight:       Number of render passes that are in-flight, must not be zero.
- * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage.
- *                      The minimum value is zero but it must be less or equal to
- *                      the total number of render passes in flight
- * @new_chunk_ptr:      Where to store the GPU virtual address & size of the new
- *                      chunk allocated for the heap.
+ * Context: must hold the tiler heap lock to guarantee its lifetime
  *
- * Return: 0 if a new chunk was allocated otherwise an appropriate negative
- *         error code.
+ * Return:
+ * * 0       - allowed to allocate an additional chunk
+ * * -EINVAL - invalid
+ * * -EBUSY  - there are fragment jobs still in flight, which may free chunks
+ *             after completing
+ * * -ENOMEM - the targeted number of in-flight chunks has been reached and
+ *             no new ones will be allocated
  */
-static int alloc_new_chunk(struct kbase_csf_tiler_heap *heap,
-		u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
+static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight,
+				       u32 pending_frag_count)
 {
-	int err = -ENOMEM;
-
 	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
 
-	if (WARN_ON(!nr_in_flight) ||
-		WARN_ON(pending_frag_count > nr_in_flight))
+	if (WARN_ON(!nr_in_flight) || WARN_ON(pending_frag_count > nr_in_flight))
 		return -EINVAL;
 
 	if (nr_in_flight <= heap->target_in_flight) {
@@ -556,56 +922,451 @@
 			/* Not exceeded the target number of render passes yet so be
 			 * generous with memory.
 			 */
-			err = create_chunk(heap, false);
-
-			if (likely(!err)) {
-				struct kbase_csf_tiler_heap_chunk *new_chunk =
-								get_last_chunk(heap);
-				if (!WARN_ON(!new_chunk)) {
-					*new_chunk_ptr =
-						encode_chunk_ptr(heap->chunk_size,
-								 new_chunk->gpu_va);
-					return 0;
-				}
-			}
+			return 0;
 		} else if (pending_frag_count > 0) {
-			err = -EBUSY;
+			return -EBUSY;
 		} else {
-			err = -ENOMEM;
+			return -ENOMEM;
 		}
 	} else {
 		/* Reached target number of render passes in flight.
 		 * Wait for some of them to finish
 		 */
-		err = -EBUSY;
+		return -EBUSY;
 	}
-
-	return err;
+	return -ENOMEM;
 }
 
 int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
 	u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
 {
 	struct kbase_csf_tiler_heap *heap;
+	struct kbase_csf_tiler_heap_chunk *chunk;
 	int err = -EINVAL;
+	u64 chunk_size = 0;
+	u64 heap_id = 0;
+
+	/* To avoid potential locking issues during allocation, this is handled
+	 * in three phases:
+	 * 1. Take the lock, find the corresponding heap, and find its chunk size
+	 * (this is always 2 MB, but may change down the line).
+	 * 2. Allocate memory for the chunk and its region.
+	 * 3. If the heap still exists, link it to the end of the list. If it
+	 * doesn't, roll back the allocation.
+	 */
 
 	mutex_lock(&kctx->csf.tiler_heaps.lock);
+	heap = find_tiler_heap(kctx, gpu_heap_va);
+	if (likely(heap)) {
+		chunk_size = heap->chunk_size;
+		heap_id = heap->heap_id;
+	} else {
+		dev_err(kctx->kbdev->dev, "Heap 0x%llX does not exist", gpu_heap_va);
+		mutex_unlock(&kctx->csf.tiler_heaps.lock);
+		goto prelink_failure;
+	}
 
+	err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
+	if (unlikely(err)) {
+		/* The allocation request can be legitimate, but be invoked on a heap
+		 * that has already reached the maximum pre-configured capacity. This
+		 * is useful debug information, but should not be treated as an error,
+		 * since the request will be re-sent at a later point.
+		 */
+		dev_dbg(kctx->kbdev->dev,
+			"Not allocating new chunk for heap 0x%llX due to current heap state (err %d)",
+			gpu_heap_va, err);
+		mutex_unlock(&kctx->csf.tiler_heaps.lock);
+		goto prelink_failure;
+	}
+	mutex_unlock(&kctx->csf.tiler_heaps.lock);
+	/* this heap must not be used whilst we have dropped the lock */
+	heap = NULL;
+
+	chunk = alloc_new_chunk(kctx, chunk_size);
+	if (unlikely(!chunk)) {
+		dev_err(kctx->kbdev->dev, "Could not allocate chunk of size %lld for ctx %d_%d",
+			chunk_size, kctx->tgid, kctx->id);
+		goto prelink_failure;
+	}
+
+	/* After this point, the heap that we were targeting could already have had the needed
+	 * chunks allocated, if we were handling multiple OoM events on multiple threads, so
+	 * we need to revalidate the need for the allocation.
+	 */
+	mutex_lock(&kctx->csf.tiler_heaps.lock);
 	heap = find_tiler_heap(kctx, gpu_heap_va);
 
-	if (likely(heap)) {
-		err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count,
-			new_chunk_ptr);
-
-		KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
-			kctx->kbdev, kctx->id, heap->heap_id,
-			PFN_UP(heap->chunk_size * heap->max_chunks),
-			PFN_UP(heap->chunk_size * heap->chunk_count),
-			heap->max_chunks, heap->chunk_size, heap->chunk_count,
-			heap->target_in_flight, nr_in_flight);
+	if (unlikely(!heap)) {
+		dev_err(kctx->kbdev->dev, "Tiler heap 0x%llX no longer exists!\n", gpu_heap_va);
+		mutex_unlock(&kctx->csf.tiler_heaps.lock);
+		goto unroll_chunk;
 	}
+
+	if (heap_id != heap->heap_id) {
+		dev_err(kctx->kbdev->dev,
+			"Tiler heap 0x%llX was removed from ctx %d_%d while allocating chunk of size %lld!",
+			gpu_heap_va, kctx->tgid, kctx->id, chunk_size);
+		mutex_unlock(&kctx->csf.tiler_heaps.lock);
+		goto unroll_chunk;
+	}
+
+	if (WARN_ON(chunk_size != heap->chunk_size)) {
+		mutex_unlock(&kctx->csf.tiler_heaps.lock);
+		goto unroll_chunk;
+	}
+
+	err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
+	if (unlikely(err)) {
+		dev_warn(
+			kctx->kbdev->dev,
+			"Aborting linking chunk to heap 0x%llX: heap state changed during allocation (err %d)",
+			gpu_heap_va, err);
+		mutex_unlock(&kctx->csf.tiler_heaps.lock);
+		goto unroll_chunk;
+	}
+
+	err = init_chunk(heap, chunk, false);
+
+	/* On error, the chunk would not be linked, so we can still treat it as an unlinked
+	 * chunk for error handling.
+	 */
+	if (unlikely(err)) {
+		dev_err(kctx->kbdev->dev,
+			"Could not link chunk(0x%llX) with tiler heap 0%llX in ctx %d_%d due to error %d",
+			chunk->gpu_va, gpu_heap_va, kctx->tgid, kctx->id, err);
+		mutex_unlock(&kctx->csf.tiler_heaps.lock);
+		goto unroll_chunk;
+	}
+
+	*new_chunk_ptr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va);
+
+	/* update total and peak tiler heap memory record */
+	kctx->running_total_tiler_heap_nr_chunks++;
+	kctx->running_total_tiler_heap_memory += heap->chunk_size;
+
+	if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
+		kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
+
+	KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id,
+					    PFN_UP(heap->chunk_size * heap->max_chunks),
+					    PFN_UP(heap->chunk_size * heap->chunk_count),
+					    heap->max_chunks, heap->chunk_size, heap->chunk_count,
+					    heap->target_in_flight, nr_in_flight);
 
 	mutex_unlock(&kctx->csf.tiler_heaps.lock);
 
 	return err;
+unroll_chunk:
+	remove_unlinked_chunk(kctx, chunk);
+prelink_failure:
+	return err;
+}
+
+static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 chunk_gpu_va,
+					u64 *hdr_val)
+{
+	int err;
+	u64 *chunk_hdr;
+	struct kbase_context *kctx = heap->kctx;
+	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
+
+	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
+
+	chunk = find_chunk(heap, chunk_gpu_va);
+	if (unlikely(!chunk)) {
+		dev_warn(kctx->kbdev->dev,
+			 "Failed to find tiler heap(0x%llX) chunk(0x%llX) for reclaim-delete\n",
+			 heap->gpu_va, chunk_gpu_va);
+		return false;
+	}
+
+	WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
+	     "Cannot support CPU cached chunks without sync operations");
+	chunk_hdr = chunk->map.addr;
+	*hdr_val = *chunk_hdr;
+
+	dev_dbg(kctx->kbdev->dev,
+		"Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n",
+		chunk_gpu_va, heap->gpu_va, *hdr_val);
+
+	err = kbase_mem_shrink_gpu_mapping(kctx, chunk->region, 0, chunk->region->gpu_alloc->nents);
+	if (unlikely(err)) {
+		dev_warn(
+			kctx->kbdev->dev,
+			"Reclaim: shrinking GPU mapping failed on chunk(0x%llx) in heap(0x%llx) (err %d)\n",
+			chunk_gpu_va, heap->gpu_va, err);
+
+		/* Cannot free the pages whilst references on the GPU remain, so keep the chunk on
+		 * the heap's chunk list and try a different heap.
+		 */
+
+		return false;
+	}
+	/* Destroy the mapping before the physical pages which are mapped are destroyed. */
+	kbase_vunmap(kctx, &chunk->map);
+
+	err = kbase_free_phy_pages_helper(chunk->region->gpu_alloc,
+					  chunk->region->gpu_alloc->nents);
+	if (unlikely(err)) {
+		dev_warn(
+			kctx->kbdev->dev,
+			"Reclaim: remove physical backing failed on chunk(0x%llx) in heap(0x%llx) (err %d), continuing with deferred removal\n",
+			chunk_gpu_va, heap->gpu_va, err);
+
+		/* kbase_free_phy_pages_helper() should only fail on invalid input, and WARNs
+		 * anyway, so continue instead of returning early.
+		 *
+		 * Indeed, we don't want to leave the chunk on the heap's chunk list whilst it has
+		 * its mapping removed, as that could lead to problems. It's safest to instead
+		 * continue with deferred destruction of the chunk.
+		 */
+	}
+
+	dev_dbg(kctx->kbdev->dev,
+		"Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n",
+		chunk_gpu_va, heap->gpu_va, *hdr_val);
+
+	mutex_lock(&heap->kctx->jit_evict_lock);
+	list_move(&chunk->region->jit_node, &kctx->jit_destroy_head);
+	mutex_unlock(&heap->kctx->jit_evict_lock);
+
+	list_del(&chunk->link);
+	heap->chunk_count--;
+	kfree(chunk);
+
+	return true;
+}
+
+static void sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap,
+					 struct kbase_csf_gpu_buffer_heap *desc)
+{
+	u64 first_hoarded_chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK;
+
+	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
+
+	if (first_hoarded_chunk_gpu_va) {
+		struct kbase_csf_tiler_heap_chunk *chunk =
+			find_chunk(heap, first_hoarded_chunk_gpu_va);
+
+		if (likely(chunk)) {
+			dev_dbg(heap->kctx->kbdev->dev,
+				"Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed\n",
+				heap->buf_desc_va);
+
+			heap->buf_desc_checked = true;
+			return;
+		}
+	}
+	/* If there is no match, defer the check to next time */
+	dev_dbg(heap->kctx->kbdev->dev, "Buffer descriptor 0x%llX runtime sanity check deferred\n",
+		heap->buf_desc_va);
+}
+
+static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, u64 *chunk_gpu_va_ptr)
+{
+	struct kbase_context *kctx = heap->kctx;
+
+	lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
+
+	/* Initialize the descriptor pointer value to 0 */
+	*chunk_gpu_va_ptr = 0;
+
+	/* The BufferDescriptor on heap is a hint on creation, do a sanity check at runtime */
+	if (heap->buf_desc_reg && !heap->buf_desc_checked) {
+		struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
+
+		/* BufferDescriptor is supplied by userspace, so could be CPU-cached */
+		if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
+			kbase_sync_mem_regions(kctx, &heap->buf_desc_map, KBASE_SYNC_TO_CPU);
+
+		sanity_check_gpu_buffer_heap(heap, desc);
+		if (heap->buf_desc_checked)
+			*chunk_gpu_va_ptr = desc->pointer & CHUNK_ADDR_MASK;
+	}
+
+	return heap->buf_desc_checked;
+}
+
+static u32 delete_hoarded_chunks(struct kbase_csf_tiler_heap *heap)
+{
+	u32 freed = 0;
+	u64 chunk_gpu_va = 0;
+	struct kbase_context *kctx = heap->kctx;
+	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
+
+	lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
+
+	if (can_read_hw_gpu_buffer_heap(heap, &chunk_gpu_va)) {
+		u64 chunk_hdr_val;
+		u64 *hw_hdr;
+
+		if (!chunk_gpu_va) {
+			struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
+
+			/* BufferDescriptor is supplied by userspace, so could be CPU-cached */
+			if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
+				kbase_sync_mem_regions(kctx, &heap->buf_desc_map,
+						       KBASE_SYNC_TO_CPU);
+			chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK;
+
+			if (!chunk_gpu_va) {
+				dev_dbg(kctx->kbdev->dev,
+					"Buffer descriptor 0x%llX has no chunks (NULL) for reclaim scan\n",
+					heap->buf_desc_va);
+				goto out;
+			}
+		}
+
+		chunk = find_chunk(heap, chunk_gpu_va);
+		if (unlikely(!chunk))
+			goto out;
+
+		WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
+		     "Cannot support CPU cached chunks without sync operations");
+		hw_hdr = chunk->map.addr;
+
+		/* Move onto the next chunk relevant information */
+		chunk_hdr_val = *hw_hdr;
+		chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
+
+		while (chunk_gpu_va && heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
+			bool success =
+				delete_chunk_physical_pages(heap, chunk_gpu_va, &chunk_hdr_val);
+
+			if (!success)
+				break;
+
+			freed++;
+			/* On success, chunk_hdr_val is updated, extract the next chunk address */
+			chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
+		}
+
+		/* Update the existing hardware chunk header, after reclaim deletion of chunks */
+		*hw_hdr = chunk_hdr_val;
+
+		dev_dbg(heap->kctx->kbdev->dev,
+			"HW reclaim scan freed chunks: %u, set hw_hdr[0]: 0x%llX\n", freed,
+			chunk_hdr_val);
+	} else {
+		dev_dbg(kctx->kbdev->dev,
+			"Skip HW reclaim scan, (disabled: buffer descriptor 0x%llX)\n",
+			heap->buf_desc_va);
+	}
+out:
+	return freed;
+}
+
+static u64 delete_unused_chunk_pages(struct kbase_csf_tiler_heap *heap)
+{
+	u32 freed_chunks = 0;
+	u64 freed_pages = 0;
+	u64 chunk_gpu_va;
+	u64 chunk_hdr_val;
+	struct kbase_context *kctx = heap->kctx;
+	u64 *ctx_ptr;
+
+	lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
+
+	WARN(heap->gpu_va_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED,
+	     "Cannot support CPU cached heap context without sync operations");
+
+	ctx_ptr = heap->gpu_va_map.addr;
+
+	/* Extract the first chunk address from the context's free_list_head */
+	chunk_hdr_val = *ctx_ptr;
+	chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
+
+	while (chunk_gpu_va) {
+		u64 hdr_val;
+		bool success = delete_chunk_physical_pages(heap, chunk_gpu_va, &hdr_val);
+
+		if (!success)
+			break;
+
+		freed_chunks++;
+		chunk_hdr_val = hdr_val;
+		/* extract the next chunk address */
+		chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
+	}
+
+	/* Update the post-scan deletion to context header */
+	*ctx_ptr = chunk_hdr_val;
+
+	/* Try to scan the HW hoarded list of unused chunks */
+	freed_chunks += delete_hoarded_chunks(heap);
+	freed_pages = freed_chunks * PFN_UP(heap->chunk_size);
+	dev_dbg(heap->kctx->kbdev->dev,
+		"Scan reclaim freed chunks/pages %u/%llu, set heap-ctx_u64[0]: 0x%llX\n",
+		freed_chunks, freed_pages, chunk_hdr_val);
+
+	/* Update context tiler heaps memory usage */
+	kctx->running_total_tiler_heap_memory -= freed_pages << PAGE_SHIFT;
+	kctx->running_total_tiler_heap_nr_chunks -= freed_chunks;
+	return freed_pages;
+}
+
+u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free)
+{
+	u64 freed = 0;
+	struct kbase_csf_tiler_heap *heap;
+
+	mutex_lock(&kctx->csf.tiler_heaps.lock);
+
+	list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) {
+		freed += delete_unused_chunk_pages(heap);
+
+		/* If freed enough, then stop here */
+		if (freed >= to_free)
+			break;
+	}
+
+	mutex_unlock(&kctx->csf.tiler_heaps.lock);
+	/* The scan is surely not more than 4-G pages, but for logic flow limit it */
+	if (WARN_ON(unlikely(freed > U32_MAX)))
+		return U32_MAX;
+	else
+		return (u32)freed;
+}
+
+static u64 count_unused_heap_pages(struct kbase_csf_tiler_heap *heap)
+{
+	u32 chunk_cnt = 0;
+	u64 page_cnt = 0;
+
+	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
+
+	/* Here the count is basically an informed estimate, avoiding the costly mapping/unmaping
+	 * in the chunk list walk. The downside is that the number is a less reliable guide for
+	 * later on scan (free) calls on this heap for what actually is freeable.
+	 */
+	if (heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
+		chunk_cnt = heap->chunk_count - HEAP_SHRINK_STOP_LIMIT;
+		page_cnt = chunk_cnt * PFN_UP(heap->chunk_size);
+	}
+
+	dev_dbg(heap->kctx->kbdev->dev,
+		"Reclaim count chunks/pages %u/%llu (estimated), heap_va: 0x%llX\n", chunk_cnt,
+		page_cnt, heap->gpu_va);
+
+	return page_cnt;
+}
+
+u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx)
+{
+	u64 page_cnt = 0;
+	struct kbase_csf_tiler_heap *heap;
+
+	mutex_lock(&kctx->csf.tiler_heaps.lock);
+
+	list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link)
+		page_cnt += count_unused_heap_pages(heap);
+
+	mutex_unlock(&kctx->csf.tiler_heaps.lock);
+
+	/* The count is surely not more than 4-G pages, but for logic flow limit it */
+	if (WARN_ON(unlikely(page_cnt > U32_MAX)))
+		return U32_MAX;
+	else
+		return (u32)page_cnt;
 }

--
Gitblit v1.6.2