// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
/*
|
*
|
* (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
|
*
|
* This program is free software and is provided to you under the terms of the
|
* GNU General Public License version 2 as published by the Free Software
|
* Foundation, and any use by you of this program is subject to the terms
|
* of such GNU license.
|
*
|
* This program is distributed in the hope that it will be useful,
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* GNU General Public License for more details.
|
*
|
* You should have received a copy of the GNU General Public License
|
* along with this program; if not, you can access it online at
|
* http://www.gnu.org/licenses/gpl-2.0.html.
|
*
|
*/
|
|
#include <tl/mali_kbase_tracepoints.h>
|
|
#include "mali_kbase_csf_tiler_heap.h"
|
#include "mali_kbase_csf_tiler_heap_def.h"
|
#include "mali_kbase_csf_heap_context_alloc.h"
|
|
/* Tiler heap shrink stop limit for maintaining a minimum number of chunks */
|
#define HEAP_SHRINK_STOP_LIMIT (1)
|
|
/**
|
* struct kbase_csf_gpu_buffer_heap - A gpu buffer object specific to tiler heap
|
*
|
* @cdsbp_0: Descriptor_type and buffer_type
|
* @size: The size of the current heap chunk
|
* @pointer: Pointer to the current heap chunk
|
* @low_pointer: Pointer to low end of current heap chunk
|
* @high_pointer: Pointer to high end of current heap chunk
|
*/
|
struct kbase_csf_gpu_buffer_heap {
|
u32 cdsbp_0;
|
u32 size;
|
u64 pointer;
|
u64 low_pointer;
|
u64 high_pointer;
|
} __packed;
|
|
/**
|
* encode_chunk_ptr - Encode the address and size of a chunk as an integer.
|
*
|
* @chunk_size: Size of a tiler heap chunk, in bytes.
|
* @chunk_addr: GPU virtual address of the same tiler heap chunk.
|
*
|
* The size and address of the next chunk in a list are packed into a single
|
* 64-bit value for storage in a chunk's header. This function returns that
|
* value.
|
*
|
* Return: Next chunk pointer suitable for writing into a chunk header.
|
*/
|
static u64 encode_chunk_ptr(u32 const chunk_size, u64 const chunk_addr)
|
{
|
u64 encoded_size, encoded_addr;
|
|
WARN_ON(chunk_size & ~CHUNK_SIZE_MASK);
|
WARN_ON(chunk_addr & ~CHUNK_ADDR_MASK);
|
|
encoded_size =
|
(u64)(chunk_size >> CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) <<
|
CHUNK_HDR_NEXT_SIZE_POS;
|
|
encoded_addr =
|
(chunk_addr >> CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) <<
|
CHUNK_HDR_NEXT_ADDR_POS;
|
|
return (encoded_size & CHUNK_HDR_NEXT_SIZE_MASK) |
|
(encoded_addr & CHUNK_HDR_NEXT_ADDR_MASK);
|
}
|
|
/**
|
* get_last_chunk - Get the last chunk of a tiler heap
|
*
|
* @heap: Pointer to the tiler heap.
|
*
|
* Return: The address of the most recently-linked chunk, or NULL if none.
|
*/
|
static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
|
struct kbase_csf_tiler_heap *const heap)
|
{
|
if (list_empty(&heap->chunks_list))
|
return NULL;
|
|
return list_last_entry(&heap->chunks_list,
|
struct kbase_csf_tiler_heap_chunk, link);
|
}
|
|
/**
|
* remove_external_chunk_mappings - Remove external mappings from a chunk that
|
* is being transitioned to the tiler heap
|
* memory system.
|
*
|
* @kctx: kbase context the chunk belongs to.
|
* @chunk: The chunk whose external mappings are going to be removed.
|
*
|
* This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates
|
* that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other
|
* parts of kbase outside of tiler heap management should not take references on its physical
|
* pages, and should not modify them.
|
*/
|
static void remove_external_chunk_mappings(struct kbase_context *const kctx,
|
struct kbase_csf_tiler_heap_chunk *chunk)
|
{
|
lockdep_assert_held(&kctx->reg_lock);
|
|
if (chunk->region->cpu_alloc != NULL) {
|
kbase_mem_shrink_cpu_mapping(kctx, chunk->region, 0,
|
chunk->region->cpu_alloc->nents);
|
}
|
#if !defined(CONFIG_MALI_VECTOR_DUMP)
|
chunk->region->flags |= KBASE_REG_DONT_NEED;
|
#endif
|
|
dev_dbg(kctx->kbdev->dev, "Removed external mappings from chunk 0x%llX", chunk->gpu_va);
|
}
|
|
/**
|
* link_chunk - Link a chunk into a tiler heap
|
*
|
* @heap: Pointer to the tiler heap.
|
* @chunk: Pointer to the heap chunk to be linked.
|
*
|
* Unless the @chunk is the first in the kernel's list of chunks belonging to
|
* a given tiler heap, this function stores the size and address of the @chunk
|
* in the header of the preceding chunk. This requires the GPU memory region
|
* containing the header to be mapped temporarily, which can fail.
|
*
|
* Return: 0 if successful or a negative error code on failure.
|
*/
|
static int link_chunk(struct kbase_csf_tiler_heap *const heap,
|
struct kbase_csf_tiler_heap_chunk *const chunk)
|
{
|
struct kbase_csf_tiler_heap_chunk *const prev = get_last_chunk(heap);
|
|
if (prev) {
|
struct kbase_context *const kctx = heap->kctx;
|
u64 *prev_hdr = prev->map.addr;
|
|
WARN((prev->region->flags & KBASE_REG_CPU_CACHED),
|
"Cannot support CPU cached chunks without sync operations");
|
|
*prev_hdr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va);
|
|
dev_dbg(kctx->kbdev->dev,
|
"Linked tiler heap chunks, 0x%llX -> 0x%llX\n",
|
prev->gpu_va, chunk->gpu_va);
|
}
|
|
return 0;
|
}
|
|
/**
|
* init_chunk - Initialize and link a tiler heap chunk
|
*
|
* @heap: Pointer to the tiler heap.
|
* @chunk: Pointer to the heap chunk to be initialized and linked.
|
* @link_with_prev: Flag to indicate if the new chunk needs to be linked with
|
* the previously allocated chunk.
|
*
|
* Zero-initialize a new chunk's header (including its pointer to the next
|
* chunk, which doesn't exist yet) and then update the previous chunk's
|
* header to link the new chunk into the chunk list.
|
*
|
* Return: 0 if successful or a negative error code on failure.
|
*/
|
static int init_chunk(struct kbase_csf_tiler_heap *const heap,
|
struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev)
|
{
|
int err = 0;
|
u64 *chunk_hdr;
|
struct kbase_context *const kctx = heap->kctx;
|
|
lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
|
|
if (unlikely(chunk->gpu_va & ~CHUNK_ADDR_MASK)) {
|
dev_err(kctx->kbdev->dev,
|
"Tiler heap chunk address is unusable\n");
|
return -EINVAL;
|
}
|
|
WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
|
"Cannot support CPU cached chunks without sync operations");
|
chunk_hdr = chunk->map.addr;
|
if (WARN(chunk->map.size < CHUNK_HDR_SIZE,
|
"Tiler chunk kernel mapping was not large enough for zero-init")) {
|
return -EINVAL;
|
}
|
|
memset(chunk_hdr, 0, CHUNK_HDR_SIZE);
|
INIT_LIST_HEAD(&chunk->link);
|
|
if (link_with_prev)
|
err = link_chunk(heap, chunk);
|
|
if (unlikely(err)) {
|
dev_err(kctx->kbdev->dev, "Failed to link a chunk to a tiler heap\n");
|
return -EINVAL;
|
}
|
|
list_add_tail(&chunk->link, &heap->chunks_list);
|
heap->chunk_count++;
|
|
return err;
|
}
|
|
/**
|
* remove_unlinked_chunk - Remove a chunk that is not currently linked into a
|
* heap.
|
*
|
* @kctx: Kbase context that was used to allocate the memory.
|
* @chunk: Chunk that has been allocated, but not linked into a heap.
|
*/
|
static void remove_unlinked_chunk(struct kbase_context *kctx,
|
struct kbase_csf_tiler_heap_chunk *chunk)
|
{
|
if (WARN_ON(!list_empty(&chunk->link)))
|
return;
|
|
kbase_gpu_vm_lock(kctx);
|
kbase_vunmap(kctx, &chunk->map);
|
/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
|
* regions), and so we must clear that flag too before freeing.
|
* For "no user free count", we check that the count is 1 as it is a shrinkable region;
|
* no other code part within kbase can take a reference to it.
|
*/
|
WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1);
|
kbase_va_region_no_user_free_dec(chunk->region);
|
#if !defined(CONFIG_MALI_VECTOR_DUMP)
|
chunk->region->flags &= ~KBASE_REG_DONT_NEED;
|
#endif
|
kbase_mem_free_region(kctx, chunk->region);
|
kbase_gpu_vm_unlock(kctx);
|
|
kfree(chunk);
|
}
|
|
/**
|
* alloc_new_chunk - Allocate new chunk metadata for the tiler heap, reserve a fully backed VA
|
* region for the chunk, and provide a kernel mapping.
|
* @kctx: kbase context with which the chunk will be linked
|
* @chunk_size: the size of the chunk from the corresponding heap
|
*
|
* Allocate the chunk tracking metadata and a corresponding fully backed VA region for the
|
* chunk. The kernel may need to invoke the reclaim path while trying to fulfill the allocation, so
|
* we cannot hold any lock that would be held in the shrinker paths (JIT evict lock or tiler heap
|
* lock).
|
*
|
* Since the chunk may have its physical backing removed, to prevent use-after-free scenarios we
|
* ensure that it is protected from being mapped by other parts of kbase.
|
*
|
* The chunk's GPU memory can be accessed via its 'map' member, but should only be done so by the
|
* shrinker path, as it may be otherwise shrunk at any time.
|
*
|
* Return: pointer to kbase_csf_tiler_heap_chunk on success or a NULL pointer
|
* on failure
|
*/
|
static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *kctx,
|
u64 chunk_size)
|
{
|
u64 nr_pages = PFN_UP(chunk_size);
|
u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
|
BASEP_MEM_NO_USER_FREE | BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD;
|
struct kbase_csf_tiler_heap_chunk *chunk = NULL;
|
/* The chunk kernel mapping needs to be large enough to:
|
* - initially zero the CHUNK_HDR_SIZE area
|
* - on shrinking, access the NEXT_CHUNK_ADDR_SIZE area
|
*/
|
const size_t chunk_kernel_map_size = max(CHUNK_HDR_SIZE, NEXT_CHUNK_ADDR_SIZE);
|
|
/* Calls to this function are inherently synchronous, with respect to
|
* MMU operations.
|
*/
|
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
|
flags |= kbase_mem_group_id_set(kctx->jit_group_id);
|
|
chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
|
if (unlikely(!chunk)) {
|
dev_err(kctx->kbdev->dev,
|
"No kernel memory for a new tiler heap chunk\n");
|
return NULL;
|
}
|
|
/* Allocate GPU memory for the new chunk. */
|
chunk->region =
|
kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info);
|
|
if (unlikely(!chunk->region)) {
|
dev_err(kctx->kbdev->dev, "Failed to allocate a tiler heap chunk!\n");
|
goto unroll_chunk;
|
}
|
|
kbase_gpu_vm_lock(kctx);
|
|
/* Some checks done here as NO_USER_FREE still allows such things to be made
|
* whilst we had dropped the region lock
|
*/
|
if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) {
|
dev_err(kctx->kbdev->dev, "Chunk region has active kernel mappings!\n");
|
goto unroll_region;
|
}
|
|
/* There is a race condition with regard to KBASE_REG_DONT_NEED, where another
|
* thread can have the "no user free" refcount increased between kbase_mem_alloc
|
* and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by
|
* remove_external_chunk_mappings (below).
|
*
|
* It should be fine and not a security risk if we let the region leak till
|
* region tracker termination in such a case.
|
*/
|
if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) {
|
dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n");
|
goto unroll_region;
|
}
|
|
/* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE
|
* being requested, it's useful to document in code what those restrictions are, and ensure
|
* they remain in place in future.
|
*/
|
if (WARN(!chunk->region->gpu_alloc,
|
"NO_USER_FREE chunks should not have had their alloc freed")) {
|
goto unroll_region;
|
}
|
|
if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE,
|
"NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) {
|
goto unroll_region;
|
}
|
|
if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC),
|
"NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) {
|
goto unroll_region;
|
}
|
|
if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED),
|
"NO_USER_FREE chunks should not have been made ephemeral")) {
|
goto unroll_region;
|
}
|
|
if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1,
|
"NO_USER_FREE chunks should not have been aliased")) {
|
goto unroll_region;
|
}
|
|
if (unlikely(!kbase_vmap_reg(kctx, chunk->region, chunk->gpu_va, chunk_kernel_map_size,
|
(KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &chunk->map,
|
KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING))) {
|
dev_err(kctx->kbdev->dev, "Failed to map chunk header for shrinking!\n");
|
goto unroll_region;
|
}
|
|
remove_external_chunk_mappings(kctx, chunk);
|
kbase_gpu_vm_unlock(kctx);
|
|
/* If page migration is enabled, we don't want to migrate tiler heap pages.
|
* This does not change if the constituent pages are already marked as isolated.
|
*/
|
if (kbase_page_migration_enabled)
|
kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE);
|
|
return chunk;
|
|
unroll_region:
|
/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
|
* regions), and so we must clear that flag too before freeing.
|
*/
|
kbase_va_region_no_user_free_dec(chunk->region);
|
#if !defined(CONFIG_MALI_VECTOR_DUMP)
|
chunk->region->flags &= ~KBASE_REG_DONT_NEED;
|
#endif
|
kbase_mem_free_region(kctx, chunk->region);
|
kbase_gpu_vm_unlock(kctx);
|
unroll_chunk:
|
kfree(chunk);
|
return NULL;
|
}
|
|
/**
|
* create_chunk - Create a tiler heap chunk
|
*
|
* @heap: Pointer to the tiler heap for which to allocate memory.
|
*
|
* This function allocates a chunk of memory for a tiler heap, adds it to the
|
* the list of chunks associated with that heap both on the host side and in GPU
|
* memory.
|
*
|
* Return: 0 if successful or a negative error code on failure.
|
*/
|
static int create_chunk(struct kbase_csf_tiler_heap *const heap)
|
{
|
int err = 0;
|
struct kbase_csf_tiler_heap_chunk *chunk = NULL;
|
|
chunk = alloc_new_chunk(heap->kctx, heap->chunk_size);
|
if (unlikely(!chunk)) {
|
err = -ENOMEM;
|
goto allocation_failure;
|
}
|
|
mutex_lock(&heap->kctx->csf.tiler_heaps.lock);
|
err = init_chunk(heap, chunk, true);
|
mutex_unlock(&heap->kctx->csf.tiler_heaps.lock);
|
|
if (unlikely(err))
|
goto initialization_failure;
|
|
dev_dbg(heap->kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", chunk->gpu_va);
|
|
return 0;
|
initialization_failure:
|
remove_unlinked_chunk(heap->kctx, chunk);
|
allocation_failure:
|
return err;
|
}
|
|
/**
|
* delete_all_chunks - Delete all chunks belonging to an unlinked tiler heap
|
*
|
* @heap: Pointer to a tiler heap.
|
*
|
* This function empties the list of chunks associated with a tiler heap by freeing all chunks
|
* previously allocated by @create_chunk.
|
*
|
* The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the
|
* tiler_heaps lock cannot be held whilst deleting its chunks due to also needing the &struct
|
* kbase_context.region_lock.
|
*
|
* WARNING: Whilst the deleted chunks are unlinked from host memory, they are not unlinked from the
|
* list of chunks used by the GPU, therefore it is only safe to use this function when
|
* deleting a heap.
|
*/
|
static void delete_all_chunks(struct kbase_csf_tiler_heap *heap)
|
{
|
struct kbase_context *const kctx = heap->kctx;
|
struct list_head *entry = NULL, *tmp = NULL;
|
|
WARN(!list_empty(&heap->link),
|
"Deleting a heap's chunks when that heap is still linked requires the tiler_heaps lock, which cannot be held by the caller");
|
|
list_for_each_safe(entry, tmp, &heap->chunks_list) {
|
struct kbase_csf_tiler_heap_chunk *chunk = list_entry(
|
entry, struct kbase_csf_tiler_heap_chunk, link);
|
|
list_del_init(&chunk->link);
|
heap->chunk_count--;
|
|
remove_unlinked_chunk(kctx, chunk);
|
}
|
}
|
|
/**
|
* create_initial_chunks - Create the initial list of chunks for a tiler heap
|
*
|
* @heap: Pointer to the tiler heap for which to allocate memory.
|
* @nchunks: Number of chunks to create.
|
*
|
* This function allocates a given number of chunks for a tiler heap and
|
* adds them to the list of chunks associated with that heap.
|
*
|
* Return: 0 if successful or a negative error code on failure.
|
*/
|
static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap,
|
u32 const nchunks)
|
{
|
int err = 0;
|
u32 i;
|
|
for (i = 0; (i < nchunks) && likely(!err); i++)
|
err = create_chunk(heap);
|
|
if (unlikely(err))
|
delete_all_chunks(heap);
|
|
return err;
|
}
|
|
/**
|
* delete_heap - Delete an unlinked tiler heap
|
*
|
* @heap: Pointer to a tiler heap to be deleted.
|
*
|
* This function frees any chunks allocated for a tiler heap previously
|
* initialized by @kbase_csf_tiler_heap_init. The heap context structure used by
|
* the firmware is also freed.
|
*
|
* The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the
|
* tiler_heaps lock cannot be held whilst deleting it due to also needing the &struct
|
* kbase_context.region_lock.
|
*/
|
static void delete_heap(struct kbase_csf_tiler_heap *heap)
|
{
|
struct kbase_context *const kctx = heap->kctx;
|
|
dev_dbg(kctx->kbdev->dev, "Deleting tiler heap 0x%llX\n", heap->gpu_va);
|
|
WARN(!list_empty(&heap->link),
|
"Deleting a heap that is still linked requires the tiler_heaps lock, which cannot be held by the caller");
|
|
/* Make sure that all of the VA regions corresponding to the chunks are
|
* freed at this time and that the work queue is not trying to access freed
|
* memory.
|
*
|
* Note: since the heap is unlinked, and that no references are made to chunks other
|
* than from their heap, there is no need to separately move the chunks out of the
|
* heap->chunks_list to delete them.
|
*/
|
delete_all_chunks(heap);
|
|
kbase_vunmap(kctx, &heap->gpu_va_map);
|
/* We could optimize context destruction by not freeing leaked heap
|
* contexts but it doesn't seem worth the extra complexity. After this
|
* point, the suballocation is returned to the heap context allocator and
|
* may be overwritten with new data, meaning heap->gpu_va should not
|
* be used past this point.
|
*/
|
kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc,
|
heap->gpu_va);
|
|
WARN_ON(heap->chunk_count);
|
KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id,
|
heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0,
|
heap->target_in_flight, 0);
|
|
if (heap->buf_desc_reg) {
|
kbase_vunmap(kctx, &heap->buf_desc_map);
|
kbase_gpu_vm_lock(kctx);
|
kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
|
kbase_gpu_vm_unlock(kctx);
|
}
|
|
kfree(heap);
|
}
|
|
/**
|
* find_tiler_heap - Find a tiler heap from the address of its heap context
|
*
|
* @kctx: Pointer to the kbase context to search for a tiler heap.
|
* @heap_gpu_va: GPU virtual address of a heap context structure.
|
*
|
* Each tiler heap managed by the kernel has an associated heap context
|
* structure used by the firmware. This function finds a tiler heap object from
|
* the GPU virtual address of its associated heap context. The heap context
|
* should have been allocated by @kbase_csf_heap_context_allocator_alloc in the
|
* same @kctx.
|
*
|
* Return: pointer to the tiler heap object, or NULL if not found.
|
*/
|
static struct kbase_csf_tiler_heap *find_tiler_heap(
|
struct kbase_context *const kctx, u64 const heap_gpu_va)
|
{
|
struct kbase_csf_tiler_heap *heap = NULL;
|
|
lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
|
|
list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) {
|
if (heap_gpu_va == heap->gpu_va)
|
return heap;
|
}
|
|
dev_dbg(kctx->kbdev->dev, "Tiler heap 0x%llX was not found\n",
|
heap_gpu_va);
|
|
return NULL;
|
}
|
|
static struct kbase_csf_tiler_heap_chunk *find_chunk(struct kbase_csf_tiler_heap *heap,
|
u64 const chunk_gpu_va)
|
{
|
struct kbase_csf_tiler_heap_chunk *chunk = NULL;
|
|
lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
|
|
list_for_each_entry(chunk, &heap->chunks_list, link) {
|
if (chunk->gpu_va == chunk_gpu_va)
|
return chunk;
|
}
|
|
dev_dbg(heap->kctx->kbdev->dev, "Tiler heap chunk 0x%llX was not found\n", chunk_gpu_va);
|
|
return NULL;
|
}
|
|
int kbase_csf_tiler_heap_context_init(struct kbase_context *const kctx)
|
{
|
int err = kbase_csf_heap_context_allocator_init(
|
&kctx->csf.tiler_heaps.ctx_alloc, kctx);
|
|
if (unlikely(err))
|
return err;
|
|
INIT_LIST_HEAD(&kctx->csf.tiler_heaps.list);
|
mutex_init(&kctx->csf.tiler_heaps.lock);
|
|
dev_dbg(kctx->kbdev->dev, "Initialized a context for tiler heaps\n");
|
|
return 0;
|
}
|
|
void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx)
|
{
|
LIST_HEAD(local_heaps_list);
|
struct list_head *entry = NULL, *tmp = NULL;
|
|
dev_dbg(kctx->kbdev->dev, "Terminating a context for tiler heaps\n");
|
|
mutex_lock(&kctx->csf.tiler_heaps.lock);
|
list_splice_init(&kctx->csf.tiler_heaps.list, &local_heaps_list);
|
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
|
list_for_each_safe(entry, tmp, &local_heaps_list) {
|
struct kbase_csf_tiler_heap *heap = list_entry(
|
entry, struct kbase_csf_tiler_heap, link);
|
|
list_del_init(&heap->link);
|
delete_heap(heap);
|
}
|
|
mutex_destroy(&kctx->csf.tiler_heaps.lock);
|
|
kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc);
|
}
|
|
/**
|
* kbasep_is_buffer_descriptor_region_suitable - Check if a VA region chosen to house
|
* the tiler heap buffer descriptor
|
* is suitable for the purpose.
|
* @kctx: kbase context of the tiler heap
|
* @reg: VA region being checked for suitability
|
*
|
* The tiler heap buffer descriptor memory does not admit page faults according
|
* to its design, so it must have the entirety of the backing upon allocation,
|
* and it has to remain alive as long as the tiler heap is alive, meaning it
|
* cannot be allocated from JIT/Ephemeral, or user freeable memory.
|
*
|
* Return: true on suitability, false otherwise.
|
*/
|
static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *const kctx,
|
struct kbase_va_region *const reg)
|
{
|
if (kbase_is_region_invalid_or_free(reg)) {
|
dev_err(kctx->kbdev->dev, "Region is either invalid or free!\n");
|
return false;
|
}
|
|
if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) ||
|
(reg->flags & KBASE_REG_PF_GROW)) {
|
dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags);
|
return false;
|
}
|
|
if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
|
dev_err(kctx->kbdev->dev, "Region has invalid type!\n");
|
return false;
|
}
|
|
if ((reg->nr_pages != kbase_reg_current_backed_size(reg)) ||
|
(reg->nr_pages < PFN_UP(sizeof(struct kbase_csf_gpu_buffer_heap)))) {
|
dev_err(kctx->kbdev->dev, "Region has invalid backing!\n");
|
return false;
|
}
|
|
return true;
|
}
|
|
#define TILER_BUF_DESC_SIZE (sizeof(struct kbase_csf_gpu_buffer_heap))
|
|
int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_size,
|
u32 const initial_chunks, u32 const max_chunks,
|
u16 const target_in_flight, u64 const buf_desc_va,
|
u64 *const heap_gpu_va, u64 *const first_chunk_va)
|
{
|
int err = 0;
|
struct kbase_csf_tiler_heap *heap = NULL;
|
struct kbase_csf_heap_context_allocator *const ctx_alloc =
|
&kctx->csf.tiler_heaps.ctx_alloc;
|
struct kbase_csf_tiler_heap_chunk *chunk = NULL;
|
struct kbase_va_region *gpu_va_reg = NULL;
|
void *vmap_ptr = NULL;
|
|
dev_dbg(kctx->kbdev->dev,
|
"Creating a tiler heap with %u chunks (limit: %u) of size %u, buf_desc_va: 0x%llx\n",
|
initial_chunks, max_chunks, chunk_size, buf_desc_va);
|
|
if (!kbase_mem_allow_alloc(kctx))
|
return -EINVAL;
|
|
if (chunk_size == 0)
|
return -EINVAL;
|
|
if (chunk_size & ~CHUNK_SIZE_MASK)
|
return -EINVAL;
|
|
if (initial_chunks == 0)
|
return -EINVAL;
|
|
if (initial_chunks > max_chunks)
|
return -EINVAL;
|
|
if (target_in_flight == 0)
|
return -EINVAL;
|
|
heap = kzalloc(sizeof(*heap), GFP_KERNEL);
|
if (unlikely(!heap)) {
|
dev_err(kctx->kbdev->dev, "No kernel memory for a new tiler heap");
|
return -ENOMEM;
|
}
|
|
heap->kctx = kctx;
|
heap->chunk_size = chunk_size;
|
heap->max_chunks = max_chunks;
|
heap->target_in_flight = target_in_flight;
|
heap->buf_desc_checked = false;
|
INIT_LIST_HEAD(&heap->chunks_list);
|
INIT_LIST_HEAD(&heap->link);
|
|
/* Check on the buffer descriptor virtual Address */
|
if (buf_desc_va) {
|
struct kbase_va_region *buf_desc_reg;
|
|
kbase_gpu_vm_lock(kctx);
|
buf_desc_reg =
|
kbase_region_tracker_find_region_enclosing_address(kctx, buf_desc_va);
|
|
if (!kbasep_is_buffer_descriptor_region_suitable(kctx, buf_desc_reg)) {
|
kbase_gpu_vm_unlock(kctx);
|
dev_err(kctx->kbdev->dev,
|
"Could not find a suitable VA region for the tiler heap buf desc!\n");
|
err = -EINVAL;
|
goto buf_desc_not_suitable;
|
}
|
|
/* If we don't prevent userspace from unmapping this, we may run into
|
* use-after-free, as we don't check for the existence of the region throughout.
|
*/
|
|
heap->buf_desc_va = buf_desc_va;
|
heap->buf_desc_reg = buf_desc_reg;
|
kbase_va_region_no_user_free_inc(buf_desc_reg);
|
|
vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE,
|
KBASE_REG_CPU_RD, &heap->buf_desc_map,
|
KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
|
|
if (kbase_page_migration_enabled)
|
kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE);
|
|
kbase_gpu_vm_unlock(kctx);
|
|
if (unlikely(!vmap_ptr)) {
|
dev_err(kctx->kbdev->dev,
|
"Could not vmap buffer descriptor into kernel memory (err %d)\n",
|
err);
|
err = -ENOMEM;
|
goto buf_desc_vmap_failed;
|
}
|
}
|
|
heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc);
|
if (unlikely(!heap->gpu_va)) {
|
dev_dbg(kctx->kbdev->dev, "Failed to allocate a tiler heap context\n");
|
err = -ENOMEM;
|
goto heap_context_alloc_failed;
|
}
|
|
gpu_va_reg = ctx_alloc->region;
|
|
kbase_gpu_vm_lock(kctx);
|
/* gpu_va_reg was created with BASEP_MEM_NO_USER_FREE, the code to unset this only happens
|
* on kctx termination (after all syscalls on kctx have finished), and so it is safe to
|
* assume that gpu_va_reg is still present.
|
*/
|
vmap_ptr = kbase_vmap_reg(kctx, gpu_va_reg, heap->gpu_va, NEXT_CHUNK_ADDR_SIZE,
|
(KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &heap->gpu_va_map,
|
KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
|
kbase_gpu_vm_unlock(kctx);
|
if (unlikely(!vmap_ptr)) {
|
dev_dbg(kctx->kbdev->dev, "Failed to vmap the correct heap GPU VA address\n");
|
err = -ENOMEM;
|
goto heap_context_vmap_failed;
|
}
|
|
err = create_initial_chunks(heap, initial_chunks);
|
if (unlikely(err)) {
|
dev_dbg(kctx->kbdev->dev, "Failed to create the initial tiler heap chunks\n");
|
goto create_chunks_failed;
|
}
|
chunk = list_first_entry(&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link);
|
|
*heap_gpu_va = heap->gpu_va;
|
*first_chunk_va = chunk->gpu_va;
|
|
mutex_lock(&kctx->csf.tiler_heaps.lock);
|
kctx->csf.tiler_heaps.nr_of_heaps++;
|
heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps;
|
list_add(&heap->link, &kctx->csf.tiler_heaps.list);
|
|
KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id,
|
PFN_UP(heap->chunk_size * heap->max_chunks),
|
PFN_UP(heap->chunk_size * heap->chunk_count),
|
heap->max_chunks, heap->chunk_size, heap->chunk_count,
|
heap->target_in_flight, 0);
|
|
#if defined(CONFIG_MALI_VECTOR_DUMP)
|
list_for_each_entry(chunk, &heap->chunks_list, link) {
|
KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(kctx->kbdev, kctx->id, heap->heap_id,
|
chunk->gpu_va);
|
}
|
#endif
|
kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
|
kctx->running_total_tiler_heap_memory += (u64)heap->chunk_size * heap->chunk_count;
|
if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
|
kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
|
|
dev_dbg(kctx->kbdev->dev,
|
"Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", heap->gpu_va,
|
buf_desc_va, kctx->tgid, kctx->id);
|
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
|
return 0;
|
|
create_chunks_failed:
|
kbase_vunmap(kctx, &heap->gpu_va_map);
|
heap_context_vmap_failed:
|
kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va);
|
heap_context_alloc_failed:
|
if (heap->buf_desc_reg)
|
kbase_vunmap(kctx, &heap->buf_desc_map);
|
buf_desc_vmap_failed:
|
if (heap->buf_desc_reg) {
|
kbase_gpu_vm_lock(kctx);
|
kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
|
kbase_gpu_vm_unlock(kctx);
|
}
|
buf_desc_not_suitable:
|
kfree(heap);
|
return err;
|
}
|
|
int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
|
u64 const heap_gpu_va)
|
{
|
int err = 0;
|
struct kbase_csf_tiler_heap *heap = NULL;
|
u32 chunk_count = 0;
|
u64 heap_size = 0;
|
|
mutex_lock(&kctx->csf.tiler_heaps.lock);
|
heap = find_tiler_heap(kctx, heap_gpu_va);
|
if (likely(heap)) {
|
chunk_count = heap->chunk_count;
|
heap_size = heap->chunk_size * chunk_count;
|
|
list_del_init(&heap->link);
|
} else {
|
err = -EINVAL;
|
}
|
|
/* Update stats whilst still holding the lock so they are in sync with the tiler_heaps.list
|
* at all times
|
*/
|
if (likely(kctx->running_total_tiler_heap_memory >= heap_size))
|
kctx->running_total_tiler_heap_memory -= heap_size;
|
else
|
dev_warn(kctx->kbdev->dev,
|
"Running total tiler heap memory lower than expected!");
|
if (likely(kctx->running_total_tiler_heap_nr_chunks >= chunk_count))
|
kctx->running_total_tiler_heap_nr_chunks -= chunk_count;
|
else
|
dev_warn(kctx->kbdev->dev,
|
"Running total tiler chunk count lower than expected!");
|
if (!err)
|
dev_dbg(kctx->kbdev->dev,
|
"Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n",
|
heap->gpu_va, heap->buf_desc_va, kctx->tgid, kctx->id);
|
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
|
/* Deletion requires the kctx->reg_lock, so must only operate on it whilst unlinked from
|
* the kctx's csf.tiler_heaps.list, and without holding the csf.tiler_heaps.lock
|
*/
|
if (likely(heap))
|
delete_heap(heap);
|
|
return err;
|
}
|
|
/**
|
* validate_allocation_request - Check whether the chunk allocation request
|
* received on tiler OOM should be handled at
|
* current time.
|
*
|
* @heap: The tiler heap the OOM is associated with
|
* @nr_in_flight: Number of fragment jobs in flight
|
* @pending_frag_count: Number of pending fragment jobs
|
*
|
* Context: must hold the tiler heap lock to guarantee its lifetime
|
*
|
* Return:
|
* * 0 - allowed to allocate an additional chunk
|
* * -EINVAL - invalid
|
* * -EBUSY - there are fragment jobs still in flight, which may free chunks
|
* after completing
|
* * -ENOMEM - the targeted number of in-flight chunks has been reached and
|
* no new ones will be allocated
|
*/
|
static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight,
|
u32 pending_frag_count)
|
{
|
lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
|
|
if (WARN_ON(!nr_in_flight) || WARN_ON(pending_frag_count > nr_in_flight))
|
return -EINVAL;
|
|
if (nr_in_flight <= heap->target_in_flight) {
|
if (heap->chunk_count < heap->max_chunks) {
|
/* Not exceeded the target number of render passes yet so be
|
* generous with memory.
|
*/
|
return 0;
|
} else if (pending_frag_count > 0) {
|
return -EBUSY;
|
} else {
|
return -ENOMEM;
|
}
|
} else {
|
/* Reached target number of render passes in flight.
|
* Wait for some of them to finish
|
*/
|
return -EBUSY;
|
}
|
return -ENOMEM;
|
}
|
|
int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
|
u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
|
{
|
struct kbase_csf_tiler_heap *heap;
|
struct kbase_csf_tiler_heap_chunk *chunk;
|
int err = -EINVAL;
|
u64 chunk_size = 0;
|
u64 heap_id = 0;
|
|
/* To avoid potential locking issues during allocation, this is handled
|
* in three phases:
|
* 1. Take the lock, find the corresponding heap, and find its chunk size
|
* (this is always 2 MB, but may change down the line).
|
* 2. Allocate memory for the chunk and its region.
|
* 3. If the heap still exists, link it to the end of the list. If it
|
* doesn't, roll back the allocation.
|
*/
|
|
mutex_lock(&kctx->csf.tiler_heaps.lock);
|
heap = find_tiler_heap(kctx, gpu_heap_va);
|
if (likely(heap)) {
|
chunk_size = heap->chunk_size;
|
heap_id = heap->heap_id;
|
} else {
|
dev_err(kctx->kbdev->dev, "Heap 0x%llX does not exist", gpu_heap_va);
|
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
goto prelink_failure;
|
}
|
|
err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
|
if (unlikely(err)) {
|
/* The allocation request can be legitimate, but be invoked on a heap
|
* that has already reached the maximum pre-configured capacity. This
|
* is useful debug information, but should not be treated as an error,
|
* since the request will be re-sent at a later point.
|
*/
|
dev_dbg(kctx->kbdev->dev,
|
"Not allocating new chunk for heap 0x%llX due to current heap state (err %d)",
|
gpu_heap_va, err);
|
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
goto prelink_failure;
|
}
|
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
/* this heap must not be used whilst we have dropped the lock */
|
heap = NULL;
|
|
chunk = alloc_new_chunk(kctx, chunk_size);
|
if (unlikely(!chunk)) {
|
dev_err(kctx->kbdev->dev, "Could not allocate chunk of size %lld for ctx %d_%d",
|
chunk_size, kctx->tgid, kctx->id);
|
goto prelink_failure;
|
}
|
|
/* After this point, the heap that we were targeting could already have had the needed
|
* chunks allocated, if we were handling multiple OoM events on multiple threads, so
|
* we need to revalidate the need for the allocation.
|
*/
|
mutex_lock(&kctx->csf.tiler_heaps.lock);
|
heap = find_tiler_heap(kctx, gpu_heap_va);
|
|
if (unlikely(!heap)) {
|
dev_err(kctx->kbdev->dev, "Tiler heap 0x%llX no longer exists!\n", gpu_heap_va);
|
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
goto unroll_chunk;
|
}
|
|
if (heap_id != heap->heap_id) {
|
dev_err(kctx->kbdev->dev,
|
"Tiler heap 0x%llX was removed from ctx %d_%d while allocating chunk of size %lld!",
|
gpu_heap_va, kctx->tgid, kctx->id, chunk_size);
|
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
goto unroll_chunk;
|
}
|
|
if (WARN_ON(chunk_size != heap->chunk_size)) {
|
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
goto unroll_chunk;
|
}
|
|
err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
|
if (unlikely(err)) {
|
dev_warn(
|
kctx->kbdev->dev,
|
"Aborting linking chunk to heap 0x%llX: heap state changed during allocation (err %d)",
|
gpu_heap_va, err);
|
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
goto unroll_chunk;
|
}
|
|
err = init_chunk(heap, chunk, false);
|
|
/* On error, the chunk would not be linked, so we can still treat it as an unlinked
|
* chunk for error handling.
|
*/
|
if (unlikely(err)) {
|
dev_err(kctx->kbdev->dev,
|
"Could not link chunk(0x%llX) with tiler heap 0%llX in ctx %d_%d due to error %d",
|
chunk->gpu_va, gpu_heap_va, kctx->tgid, kctx->id, err);
|
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
goto unroll_chunk;
|
}
|
|
*new_chunk_ptr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va);
|
|
/* update total and peak tiler heap memory record */
|
kctx->running_total_tiler_heap_nr_chunks++;
|
kctx->running_total_tiler_heap_memory += heap->chunk_size;
|
|
if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
|
kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
|
|
KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id,
|
PFN_UP(heap->chunk_size * heap->max_chunks),
|
PFN_UP(heap->chunk_size * heap->chunk_count),
|
heap->max_chunks, heap->chunk_size, heap->chunk_count,
|
heap->target_in_flight, nr_in_flight);
|
|
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
|
return err;
|
unroll_chunk:
|
remove_unlinked_chunk(kctx, chunk);
|
prelink_failure:
|
return err;
|
}
|
|
static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 chunk_gpu_va,
|
u64 *hdr_val)
|
{
|
int err;
|
u64 *chunk_hdr;
|
struct kbase_context *kctx = heap->kctx;
|
struct kbase_csf_tiler_heap_chunk *chunk = NULL;
|
|
lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
|
|
chunk = find_chunk(heap, chunk_gpu_va);
|
if (unlikely(!chunk)) {
|
dev_warn(kctx->kbdev->dev,
|
"Failed to find tiler heap(0x%llX) chunk(0x%llX) for reclaim-delete\n",
|
heap->gpu_va, chunk_gpu_va);
|
return false;
|
}
|
|
WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
|
"Cannot support CPU cached chunks without sync operations");
|
chunk_hdr = chunk->map.addr;
|
*hdr_val = *chunk_hdr;
|
|
dev_dbg(kctx->kbdev->dev,
|
"Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n",
|
chunk_gpu_va, heap->gpu_va, *hdr_val);
|
|
err = kbase_mem_shrink_gpu_mapping(kctx, chunk->region, 0, chunk->region->gpu_alloc->nents);
|
if (unlikely(err)) {
|
dev_warn(
|
kctx->kbdev->dev,
|
"Reclaim: shrinking GPU mapping failed on chunk(0x%llx) in heap(0x%llx) (err %d)\n",
|
chunk_gpu_va, heap->gpu_va, err);
|
|
/* Cannot free the pages whilst references on the GPU remain, so keep the chunk on
|
* the heap's chunk list and try a different heap.
|
*/
|
|
return false;
|
}
|
/* Destroy the mapping before the physical pages which are mapped are destroyed. */
|
kbase_vunmap(kctx, &chunk->map);
|
|
err = kbase_free_phy_pages_helper(chunk->region->gpu_alloc,
|
chunk->region->gpu_alloc->nents);
|
if (unlikely(err)) {
|
dev_warn(
|
kctx->kbdev->dev,
|
"Reclaim: remove physical backing failed on chunk(0x%llx) in heap(0x%llx) (err %d), continuing with deferred removal\n",
|
chunk_gpu_va, heap->gpu_va, err);
|
|
/* kbase_free_phy_pages_helper() should only fail on invalid input, and WARNs
|
* anyway, so continue instead of returning early.
|
*
|
* Indeed, we don't want to leave the chunk on the heap's chunk list whilst it has
|
* its mapping removed, as that could lead to problems. It's safest to instead
|
* continue with deferred destruction of the chunk.
|
*/
|
}
|
|
dev_dbg(kctx->kbdev->dev,
|
"Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n",
|
chunk_gpu_va, heap->gpu_va, *hdr_val);
|
|
mutex_lock(&heap->kctx->jit_evict_lock);
|
list_move(&chunk->region->jit_node, &kctx->jit_destroy_head);
|
mutex_unlock(&heap->kctx->jit_evict_lock);
|
|
list_del(&chunk->link);
|
heap->chunk_count--;
|
kfree(chunk);
|
|
return true;
|
}
|
|
static void sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap,
|
struct kbase_csf_gpu_buffer_heap *desc)
|
{
|
u64 first_hoarded_chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK;
|
|
lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
|
|
if (first_hoarded_chunk_gpu_va) {
|
struct kbase_csf_tiler_heap_chunk *chunk =
|
find_chunk(heap, first_hoarded_chunk_gpu_va);
|
|
if (likely(chunk)) {
|
dev_dbg(heap->kctx->kbdev->dev,
|
"Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed\n",
|
heap->buf_desc_va);
|
|
heap->buf_desc_checked = true;
|
return;
|
}
|
}
|
/* If there is no match, defer the check to next time */
|
dev_dbg(heap->kctx->kbdev->dev, "Buffer descriptor 0x%llX runtime sanity check deferred\n",
|
heap->buf_desc_va);
|
}
|
|
static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, u64 *chunk_gpu_va_ptr)
|
{
|
struct kbase_context *kctx = heap->kctx;
|
|
lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
|
|
/* Initialize the descriptor pointer value to 0 */
|
*chunk_gpu_va_ptr = 0;
|
|
/* The BufferDescriptor on heap is a hint on creation, do a sanity check at runtime */
|
if (heap->buf_desc_reg && !heap->buf_desc_checked) {
|
struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
|
|
/* BufferDescriptor is supplied by userspace, so could be CPU-cached */
|
if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
|
kbase_sync_mem_regions(kctx, &heap->buf_desc_map, KBASE_SYNC_TO_CPU);
|
|
sanity_check_gpu_buffer_heap(heap, desc);
|
if (heap->buf_desc_checked)
|
*chunk_gpu_va_ptr = desc->pointer & CHUNK_ADDR_MASK;
|
}
|
|
return heap->buf_desc_checked;
|
}
|
|
static u32 delete_hoarded_chunks(struct kbase_csf_tiler_heap *heap)
|
{
|
u32 freed = 0;
|
u64 chunk_gpu_va = 0;
|
struct kbase_context *kctx = heap->kctx;
|
struct kbase_csf_tiler_heap_chunk *chunk = NULL;
|
|
lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
|
|
if (can_read_hw_gpu_buffer_heap(heap, &chunk_gpu_va)) {
|
u64 chunk_hdr_val;
|
u64 *hw_hdr;
|
|
if (!chunk_gpu_va) {
|
struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
|
|
/* BufferDescriptor is supplied by userspace, so could be CPU-cached */
|
if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
|
kbase_sync_mem_regions(kctx, &heap->buf_desc_map,
|
KBASE_SYNC_TO_CPU);
|
chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK;
|
|
if (!chunk_gpu_va) {
|
dev_dbg(kctx->kbdev->dev,
|
"Buffer descriptor 0x%llX has no chunks (NULL) for reclaim scan\n",
|
heap->buf_desc_va);
|
goto out;
|
}
|
}
|
|
chunk = find_chunk(heap, chunk_gpu_va);
|
if (unlikely(!chunk))
|
goto out;
|
|
WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
|
"Cannot support CPU cached chunks without sync operations");
|
hw_hdr = chunk->map.addr;
|
|
/* Move onto the next chunk relevant information */
|
chunk_hdr_val = *hw_hdr;
|
chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
|
|
while (chunk_gpu_va && heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
|
bool success =
|
delete_chunk_physical_pages(heap, chunk_gpu_va, &chunk_hdr_val);
|
|
if (!success)
|
break;
|
|
freed++;
|
/* On success, chunk_hdr_val is updated, extract the next chunk address */
|
chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
|
}
|
|
/* Update the existing hardware chunk header, after reclaim deletion of chunks */
|
*hw_hdr = chunk_hdr_val;
|
|
dev_dbg(heap->kctx->kbdev->dev,
|
"HW reclaim scan freed chunks: %u, set hw_hdr[0]: 0x%llX\n", freed,
|
chunk_hdr_val);
|
} else {
|
dev_dbg(kctx->kbdev->dev,
|
"Skip HW reclaim scan, (disabled: buffer descriptor 0x%llX)\n",
|
heap->buf_desc_va);
|
}
|
out:
|
return freed;
|
}
|
|
static u64 delete_unused_chunk_pages(struct kbase_csf_tiler_heap *heap)
|
{
|
u32 freed_chunks = 0;
|
u64 freed_pages = 0;
|
u64 chunk_gpu_va;
|
u64 chunk_hdr_val;
|
struct kbase_context *kctx = heap->kctx;
|
u64 *ctx_ptr;
|
|
lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
|
|
WARN(heap->gpu_va_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED,
|
"Cannot support CPU cached heap context without sync operations");
|
|
ctx_ptr = heap->gpu_va_map.addr;
|
|
/* Extract the first chunk address from the context's free_list_head */
|
chunk_hdr_val = *ctx_ptr;
|
chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
|
|
while (chunk_gpu_va) {
|
u64 hdr_val;
|
bool success = delete_chunk_physical_pages(heap, chunk_gpu_va, &hdr_val);
|
|
if (!success)
|
break;
|
|
freed_chunks++;
|
chunk_hdr_val = hdr_val;
|
/* extract the next chunk address */
|
chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
|
}
|
|
/* Update the post-scan deletion to context header */
|
*ctx_ptr = chunk_hdr_val;
|
|
/* Try to scan the HW hoarded list of unused chunks */
|
freed_chunks += delete_hoarded_chunks(heap);
|
freed_pages = freed_chunks * PFN_UP(heap->chunk_size);
|
dev_dbg(heap->kctx->kbdev->dev,
|
"Scan reclaim freed chunks/pages %u/%llu, set heap-ctx_u64[0]: 0x%llX\n",
|
freed_chunks, freed_pages, chunk_hdr_val);
|
|
/* Update context tiler heaps memory usage */
|
kctx->running_total_tiler_heap_memory -= freed_pages << PAGE_SHIFT;
|
kctx->running_total_tiler_heap_nr_chunks -= freed_chunks;
|
return freed_pages;
|
}
|
|
u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free)
|
{
|
u64 freed = 0;
|
struct kbase_csf_tiler_heap *heap;
|
|
mutex_lock(&kctx->csf.tiler_heaps.lock);
|
|
list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) {
|
freed += delete_unused_chunk_pages(heap);
|
|
/* If freed enough, then stop here */
|
if (freed >= to_free)
|
break;
|
}
|
|
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
/* The scan is surely not more than 4-G pages, but for logic flow limit it */
|
if (WARN_ON(unlikely(freed > U32_MAX)))
|
return U32_MAX;
|
else
|
return (u32)freed;
|
}
|
|
static u64 count_unused_heap_pages(struct kbase_csf_tiler_heap *heap)
|
{
|
u32 chunk_cnt = 0;
|
u64 page_cnt = 0;
|
|
lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
|
|
/* Here the count is basically an informed estimate, avoiding the costly mapping/unmaping
|
* in the chunk list walk. The downside is that the number is a less reliable guide for
|
* later on scan (free) calls on this heap for what actually is freeable.
|
*/
|
if (heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
|
chunk_cnt = heap->chunk_count - HEAP_SHRINK_STOP_LIMIT;
|
page_cnt = chunk_cnt * PFN_UP(heap->chunk_size);
|
}
|
|
dev_dbg(heap->kctx->kbdev->dev,
|
"Reclaim count chunks/pages %u/%llu (estimated), heap_va: 0x%llX\n", chunk_cnt,
|
page_cnt, heap->gpu_va);
|
|
return page_cnt;
|
}
|
|
u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx)
|
{
|
u64 page_cnt = 0;
|
struct kbase_csf_tiler_heap *heap;
|
|
mutex_lock(&kctx->csf.tiler_heaps.lock);
|
|
list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link)
|
page_cnt += count_unused_heap_pages(heap);
|
|
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
|
/* The count is surely not more than 4-G pages, but for logic flow limit it */
|
if (WARN_ON(unlikely(page_cnt > U32_MAX)))
|
return U32_MAX;
|
else
|
return (u32)page_cnt;
|
}
|