| .. | .. |
|---|
| 1 | 1 | // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
|---|
| 2 | 2 | /* |
|---|
| 3 | 3 | * |
|---|
| 4 | | - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. |
|---|
| 4 | + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. |
|---|
| 5 | 5 | * |
|---|
| 6 | 6 | * This program is free software and is provided to you under the terms of the |
|---|
| 7 | 7 | * GNU General Public License version 2 as published by the Free Software |
|---|
| .. | .. |
|---|
| 23 | 23 | #include "mali_kbase_csf_heap_context_alloc.h" |
|---|
| 24 | 24 | |
|---|
| 25 | 25 | /* Size of one heap context structure, in bytes. */ |
|---|
| 26 | | -#define HEAP_CTX_SIZE ((size_t)32) |
|---|
| 27 | | - |
|---|
| 28 | | -/* Total size of the GPU memory region allocated for heap contexts, in bytes. */ |
|---|
| 29 | | -#define HEAP_CTX_REGION_SIZE (MAX_TILER_HEAPS * HEAP_CTX_SIZE) |
|---|
| 26 | +#define HEAP_CTX_SIZE ((u32)32) |
|---|
| 30 | 27 | |
|---|
| 31 | 28 | /** |
|---|
| 32 | 29 | * sub_alloc - Sub-allocate a heap context from a GPU memory region |
|---|
| .. | .. |
|---|
| 38 | 35 | static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) |
|---|
| 39 | 36 | { |
|---|
| 40 | 37 | struct kbase_context *const kctx = ctx_alloc->kctx; |
|---|
| 41 | | - int heap_nr = 0; |
|---|
| 42 | | - size_t ctx_offset = 0; |
|---|
| 38 | + unsigned long heap_nr = 0; |
|---|
| 39 | + u32 ctx_offset = 0; |
|---|
| 43 | 40 | u64 heap_gpu_va = 0; |
|---|
| 44 | 41 | struct kbase_vmap_struct mapping; |
|---|
| 45 | 42 | void *ctx_ptr = NULL; |
|---|
| .. | .. |
|---|
| 50 | 47 | MAX_TILER_HEAPS); |
|---|
| 51 | 48 | |
|---|
| 52 | 49 | if (unlikely(heap_nr >= MAX_TILER_HEAPS)) { |
|---|
| 53 | | - dev_err(kctx->kbdev->dev, |
|---|
| 54 | | - "No free tiler heap contexts in the pool\n"); |
|---|
| 50 | + dev_dbg(kctx->kbdev->dev, |
|---|
| 51 | + "No free tiler heap contexts in the pool"); |
|---|
| 55 | 52 | return 0; |
|---|
| 56 | 53 | } |
|---|
| 57 | 54 | |
|---|
| 58 | | - ctx_offset = heap_nr * HEAP_CTX_SIZE; |
|---|
| 55 | + ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned; |
|---|
| 59 | 56 | heap_gpu_va = ctx_alloc->gpu_va + ctx_offset; |
|---|
| 60 | 57 | ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va, |
|---|
| 61 | | - HEAP_CTX_SIZE, KBASE_REG_CPU_WR, &mapping); |
|---|
| 58 | + ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping); |
|---|
| 62 | 59 | |
|---|
| 63 | 60 | if (unlikely(!ctx_ptr)) { |
|---|
| 64 | 61 | dev_err(kctx->kbdev->dev, |
|---|
| 65 | | - "Failed to map tiler heap context %d (0x%llX)\n", |
|---|
| 62 | + "Failed to map tiler heap context %lu (0x%llX)\n", |
|---|
| 66 | 63 | heap_nr, heap_gpu_va); |
|---|
| 67 | 64 | return 0; |
|---|
| 68 | 65 | } |
|---|
| 69 | 66 | |
|---|
| 70 | | - memset(ctx_ptr, 0, HEAP_CTX_SIZE); |
|---|
| 67 | + memset(ctx_ptr, 0, ctx_alloc->heap_context_size_aligned); |
|---|
| 71 | 68 | kbase_vunmap(ctx_ptr, &mapping); |
|---|
| 72 | 69 | |
|---|
| 73 | 70 | bitmap_set(ctx_alloc->in_use, heap_nr, 1); |
|---|
| 74 | 71 | |
|---|
| 75 | | - dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %d (0x%llX)\n", |
|---|
| 72 | + dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n", |
|---|
| 76 | 73 | heap_nr, heap_gpu_va); |
|---|
| 77 | 74 | |
|---|
| 78 | 75 | return heap_gpu_va; |
|---|
| 76 | +} |
|---|
| 77 | + |
|---|
| 78 | +/** |
|---|
| 79 | + * evict_heap_context - Evict the data of heap context from GPU's L2 cache. |
|---|
| 80 | + * |
|---|
| 81 | + * @ctx_alloc: Pointer to the heap context allocator. |
|---|
| 82 | + * @heap_gpu_va: The GPU virtual address of a heap context structure to free. |
|---|
| 83 | + * |
|---|
| 84 | + * This function is called when memory for the heap context is freed. It uses the |
|---|
| 85 | + * FLUSH_PA_RANGE command to evict the data of heap context, so on older CSF GPUs |
|---|
| 86 | + * there is nothing done. The whole GPU cache is anyways expected to be flushed |
|---|
| 87 | + * on older GPUs when initial chunks of the heap are freed just before the memory |
|---|
| 88 | + * for heap context is freed. |
|---|
| 89 | + */ |
|---|
| 90 | +static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc, |
|---|
| 91 | + u64 const heap_gpu_va) |
|---|
| 92 | +{ |
|---|
| 93 | + struct kbase_context *const kctx = ctx_alloc->kctx; |
|---|
| 94 | + u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va); |
|---|
| 95 | + u32 offset_within_page = offset_in_bytes & ~PAGE_MASK; |
|---|
| 96 | + u32 page_index = offset_in_bytes >> PAGE_SHIFT; |
|---|
| 97 | + struct tagged_addr page = |
|---|
| 98 | + kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index]; |
|---|
| 99 | + phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page; |
|---|
| 100 | + |
|---|
| 101 | + lockdep_assert_held(&ctx_alloc->lock); |
|---|
| 102 | + |
|---|
| 103 | + /* There is no need to take vm_lock here as the ctx_alloc region is protected |
|---|
| 104 | + * via a nonzero no_user_free_count. The region and the backing page can't |
|---|
| 105 | + * disappear whilst this function is executing. Flush type is passed as FLUSH_PT |
|---|
| 106 | + * to CLN+INV L2 only. |
|---|
| 107 | + */ |
|---|
| 108 | + kbase_mmu_flush_pa_range(kctx->kbdev, kctx, |
|---|
| 109 | + heap_context_pa, ctx_alloc->heap_context_size_aligned, |
|---|
| 110 | + KBASE_MMU_OP_FLUSH_PT); |
|---|
| 79 | 111 | } |
|---|
| 80 | 112 | |
|---|
| 81 | 113 | /** |
|---|
| .. | .. |
|---|
| 88 | 120 | u64 const heap_gpu_va) |
|---|
| 89 | 121 | { |
|---|
| 90 | 122 | struct kbase_context *const kctx = ctx_alloc->kctx; |
|---|
| 91 | | - u64 ctx_offset = 0; |
|---|
| 123 | + u32 ctx_offset = 0; |
|---|
| 92 | 124 | unsigned int heap_nr = 0; |
|---|
| 93 | 125 | |
|---|
| 94 | 126 | lockdep_assert_held(&ctx_alloc->lock); |
|---|
| .. | .. |
|---|
| 99 | 131 | if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va)) |
|---|
| 100 | 132 | return; |
|---|
| 101 | 133 | |
|---|
| 102 | | - ctx_offset = heap_gpu_va - ctx_alloc->gpu_va; |
|---|
| 134 | + ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va); |
|---|
| 103 | 135 | |
|---|
| 104 | | - if (WARN_ON(ctx_offset >= HEAP_CTX_REGION_SIZE) || |
|---|
| 105 | | - WARN_ON(ctx_offset % HEAP_CTX_SIZE)) |
|---|
| 136 | + if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) || |
|---|
| 137 | + WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned)) |
|---|
| 106 | 138 | return; |
|---|
| 107 | 139 | |
|---|
| 108 | | - heap_nr = ctx_offset / HEAP_CTX_SIZE; |
|---|
| 140 | + evict_heap_context(ctx_alloc, heap_gpu_va); |
|---|
| 141 | + |
|---|
| 142 | + heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned; |
|---|
| 109 | 143 | dev_dbg(kctx->kbdev->dev, |
|---|
| 110 | 144 | "Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va); |
|---|
| 111 | 145 | |
|---|
| .. | .. |
|---|
| 116 | 150 | struct kbase_csf_heap_context_allocator *const ctx_alloc, |
|---|
| 117 | 151 | struct kbase_context *const kctx) |
|---|
| 118 | 152 | { |
|---|
| 153 | + const u32 gpu_cache_line_size = |
|---|
| 154 | + (1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size); |
|---|
| 155 | + |
|---|
| 119 | 156 | /* We cannot pre-allocate GPU memory here because the |
|---|
| 120 | 157 | * custom VA zone may not have been created yet. |
|---|
| 121 | 158 | */ |
|---|
| 122 | 159 | ctx_alloc->kctx = kctx; |
|---|
| 123 | 160 | ctx_alloc->region = NULL; |
|---|
| 124 | 161 | ctx_alloc->gpu_va = 0; |
|---|
| 162 | + ctx_alloc->heap_context_size_aligned = |
|---|
| 163 | + (HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1); |
|---|
| 125 | 164 | |
|---|
| 126 | 165 | mutex_init(&ctx_alloc->lock); |
|---|
| 127 | 166 | bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS); |
|---|
| .. | .. |
|---|
| 142 | 181 | |
|---|
| 143 | 182 | if (ctx_alloc->region) { |
|---|
| 144 | 183 | kbase_gpu_vm_lock(kctx); |
|---|
| 145 | | - ctx_alloc->region->flags &= ~KBASE_REG_NO_USER_FREE; |
|---|
| 184 | + WARN_ON(!kbase_va_region_is_no_user_free(ctx_alloc->region)); |
|---|
| 185 | + |
|---|
| 186 | + kbase_va_region_no_user_free_dec(ctx_alloc->region); |
|---|
| 146 | 187 | kbase_mem_free_region(kctx, ctx_alloc->region); |
|---|
| 147 | 188 | kbase_gpu_vm_unlock(kctx); |
|---|
| 148 | 189 | } |
|---|
| .. | .. |
|---|
| 154 | 195 | struct kbase_csf_heap_context_allocator *const ctx_alloc) |
|---|
| 155 | 196 | { |
|---|
| 156 | 197 | struct kbase_context *const kctx = ctx_alloc->kctx; |
|---|
| 157 | | - u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | |
|---|
| 158 | | - BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE; |
|---|
| 159 | | - u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE); |
|---|
| 198 | + u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | |
|---|
| 199 | + BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD; |
|---|
| 200 | + u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned); |
|---|
| 160 | 201 | u64 heap_gpu_va = 0; |
|---|
| 161 | 202 | |
|---|
| 162 | | -#ifdef CONFIG_MALI_VECTOR_DUMP |
|---|
| 163 | | - flags |= BASE_MEM_PROT_CPU_RD; |
|---|
| 164 | | -#endif |
|---|
| 203 | + /* Calls to this function are inherently asynchronous, with respect to |
|---|
| 204 | + * MMU operations. |
|---|
| 205 | + */ |
|---|
| 206 | + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; |
|---|
| 165 | 207 | |
|---|
| 166 | 208 | mutex_lock(&ctx_alloc->lock); |
|---|
| 167 | 209 | |
|---|
| .. | .. |
|---|
| 169 | 211 | * allocate it. |
|---|
| 170 | 212 | */ |
|---|
| 171 | 213 | if (!ctx_alloc->region) { |
|---|
| 172 | | - ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, |
|---|
| 173 | | - 0, &flags, &ctx_alloc->gpu_va); |
|---|
| 214 | + ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, |
|---|
| 215 | + &ctx_alloc->gpu_va, mmu_sync_info); |
|---|
| 174 | 216 | } |
|---|
| 175 | 217 | |
|---|
| 176 | 218 | /* If the pool still isn't allocated then an error occurred. */ |
|---|
| 177 | | - if (unlikely(!ctx_alloc->region)) { |
|---|
| 178 | | - dev_err(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts\n"); |
|---|
| 179 | | - } else { |
|---|
| 219 | + if (unlikely(!ctx_alloc->region)) |
|---|
| 220 | + dev_dbg(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts"); |
|---|
| 221 | + else |
|---|
| 180 | 222 | heap_gpu_va = sub_alloc(ctx_alloc); |
|---|
| 181 | | - } |
|---|
| 182 | 223 | |
|---|
| 183 | 224 | mutex_unlock(&ctx_alloc->lock); |
|---|
| 184 | 225 | |
|---|