.. | .. |
---|
1 | 1 | // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
---|
2 | 2 | /* |
---|
3 | 3 | * |
---|
4 | | - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. |
---|
| 4 | + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. |
---|
5 | 5 | * |
---|
6 | 6 | * This program is free software and is provided to you under the terms of the |
---|
7 | 7 | * GNU General Public License version 2 as published by the Free Software |
---|
.. | .. |
---|
23 | 23 | #include "mali_kbase_csf_heap_context_alloc.h" |
---|
24 | 24 | |
---|
25 | 25 | /* Size of one heap context structure, in bytes. */ |
---|
26 | | -#define HEAP_CTX_SIZE ((size_t)32) |
---|
27 | | - |
---|
28 | | -/* Total size of the GPU memory region allocated for heap contexts, in bytes. */ |
---|
29 | | -#define HEAP_CTX_REGION_SIZE (MAX_TILER_HEAPS * HEAP_CTX_SIZE) |
---|
| 26 | +#define HEAP_CTX_SIZE ((u32)32) |
---|
30 | 27 | |
---|
31 | 28 | /** |
---|
32 | 29 | * sub_alloc - Sub-allocate a heap context from a GPU memory region |
---|
.. | .. |
---|
38 | 35 | static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) |
---|
39 | 36 | { |
---|
40 | 37 | struct kbase_context *const kctx = ctx_alloc->kctx; |
---|
41 | | - int heap_nr = 0; |
---|
42 | | - size_t ctx_offset = 0; |
---|
| 38 | + unsigned long heap_nr = 0; |
---|
| 39 | + u32 ctx_offset = 0; |
---|
43 | 40 | u64 heap_gpu_va = 0; |
---|
44 | 41 | struct kbase_vmap_struct mapping; |
---|
45 | 42 | void *ctx_ptr = NULL; |
---|
.. | .. |
---|
50 | 47 | MAX_TILER_HEAPS); |
---|
51 | 48 | |
---|
52 | 49 | if (unlikely(heap_nr >= MAX_TILER_HEAPS)) { |
---|
53 | | - dev_err(kctx->kbdev->dev, |
---|
54 | | - "No free tiler heap contexts in the pool\n"); |
---|
| 50 | + dev_dbg(kctx->kbdev->dev, |
---|
| 51 | + "No free tiler heap contexts in the pool"); |
---|
55 | 52 | return 0; |
---|
56 | 53 | } |
---|
57 | 54 | |
---|
58 | | - ctx_offset = heap_nr * HEAP_CTX_SIZE; |
---|
| 55 | + ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned; |
---|
59 | 56 | heap_gpu_va = ctx_alloc->gpu_va + ctx_offset; |
---|
60 | 57 | ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va, |
---|
61 | | - HEAP_CTX_SIZE, KBASE_REG_CPU_WR, &mapping); |
---|
| 58 | + ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping); |
---|
62 | 59 | |
---|
63 | 60 | if (unlikely(!ctx_ptr)) { |
---|
64 | 61 | dev_err(kctx->kbdev->dev, |
---|
65 | | - "Failed to map tiler heap context %d (0x%llX)\n", |
---|
| 62 | + "Failed to map tiler heap context %lu (0x%llX)\n", |
---|
66 | 63 | heap_nr, heap_gpu_va); |
---|
67 | 64 | return 0; |
---|
68 | 65 | } |
---|
69 | 66 | |
---|
70 | | - memset(ctx_ptr, 0, HEAP_CTX_SIZE); |
---|
| 67 | + memset(ctx_ptr, 0, ctx_alloc->heap_context_size_aligned); |
---|
71 | 68 | kbase_vunmap(ctx_ptr, &mapping); |
---|
72 | 69 | |
---|
73 | 70 | bitmap_set(ctx_alloc->in_use, heap_nr, 1); |
---|
74 | 71 | |
---|
75 | | - dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %d (0x%llX)\n", |
---|
| 72 | + dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n", |
---|
76 | 73 | heap_nr, heap_gpu_va); |
---|
77 | 74 | |
---|
78 | 75 | return heap_gpu_va; |
---|
| 76 | +} |
---|
| 77 | + |
---|
| 78 | +/** |
---|
| 79 | + * evict_heap_context - Evict the data of heap context from GPU's L2 cache. |
---|
| 80 | + * |
---|
| 81 | + * @ctx_alloc: Pointer to the heap context allocator. |
---|
| 82 | + * @heap_gpu_va: The GPU virtual address of a heap context structure to free. |
---|
| 83 | + * |
---|
| 84 | + * This function is called when memory for the heap context is freed. It uses the |
---|
| 85 | + * FLUSH_PA_RANGE command to evict the data of heap context, so on older CSF GPUs |
---|
| 86 | + * there is nothing done. The whole GPU cache is anyways expected to be flushed |
---|
| 87 | + * on older GPUs when initial chunks of the heap are freed just before the memory |
---|
| 88 | + * for heap context is freed. |
---|
| 89 | + */ |
---|
| 90 | +static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc, |
---|
| 91 | + u64 const heap_gpu_va) |
---|
| 92 | +{ |
---|
| 93 | + struct kbase_context *const kctx = ctx_alloc->kctx; |
---|
| 94 | + u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va); |
---|
| 95 | + u32 offset_within_page = offset_in_bytes & ~PAGE_MASK; |
---|
| 96 | + u32 page_index = offset_in_bytes >> PAGE_SHIFT; |
---|
| 97 | + struct tagged_addr page = |
---|
| 98 | + kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index]; |
---|
| 99 | + phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page; |
---|
| 100 | + |
---|
| 101 | + lockdep_assert_held(&ctx_alloc->lock); |
---|
| 102 | + |
---|
| 103 | + /* There is no need to take vm_lock here as the ctx_alloc region is protected |
---|
| 104 | + * via a nonzero no_user_free_count. The region and the backing page can't |
---|
| 105 | + * disappear whilst this function is executing. Flush type is passed as FLUSH_PT |
---|
| 106 | + * to CLN+INV L2 only. |
---|
| 107 | + */ |
---|
| 108 | + kbase_mmu_flush_pa_range(kctx->kbdev, kctx, |
---|
| 109 | + heap_context_pa, ctx_alloc->heap_context_size_aligned, |
---|
| 110 | + KBASE_MMU_OP_FLUSH_PT); |
---|
79 | 111 | } |
---|
80 | 112 | |
---|
81 | 113 | /** |
---|
.. | .. |
---|
88 | 120 | u64 const heap_gpu_va) |
---|
89 | 121 | { |
---|
90 | 122 | struct kbase_context *const kctx = ctx_alloc->kctx; |
---|
91 | | - u64 ctx_offset = 0; |
---|
| 123 | + u32 ctx_offset = 0; |
---|
92 | 124 | unsigned int heap_nr = 0; |
---|
93 | 125 | |
---|
94 | 126 | lockdep_assert_held(&ctx_alloc->lock); |
---|
.. | .. |
---|
99 | 131 | if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va)) |
---|
100 | 132 | return; |
---|
101 | 133 | |
---|
102 | | - ctx_offset = heap_gpu_va - ctx_alloc->gpu_va; |
---|
| 134 | + ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va); |
---|
103 | 135 | |
---|
104 | | - if (WARN_ON(ctx_offset >= HEAP_CTX_REGION_SIZE) || |
---|
105 | | - WARN_ON(ctx_offset % HEAP_CTX_SIZE)) |
---|
| 136 | + if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) || |
---|
| 137 | + WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned)) |
---|
106 | 138 | return; |
---|
107 | 139 | |
---|
108 | | - heap_nr = ctx_offset / HEAP_CTX_SIZE; |
---|
| 140 | + evict_heap_context(ctx_alloc, heap_gpu_va); |
---|
| 141 | + |
---|
| 142 | + heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned; |
---|
109 | 143 | dev_dbg(kctx->kbdev->dev, |
---|
110 | 144 | "Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va); |
---|
111 | 145 | |
---|
.. | .. |
---|
116 | 150 | struct kbase_csf_heap_context_allocator *const ctx_alloc, |
---|
117 | 151 | struct kbase_context *const kctx) |
---|
118 | 152 | { |
---|
| 153 | + const u32 gpu_cache_line_size = |
---|
| 154 | + (1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size); |
---|
| 155 | + |
---|
119 | 156 | /* We cannot pre-allocate GPU memory here because the |
---|
120 | 157 | * custom VA zone may not have been created yet. |
---|
121 | 158 | */ |
---|
122 | 159 | ctx_alloc->kctx = kctx; |
---|
123 | 160 | ctx_alloc->region = NULL; |
---|
124 | 161 | ctx_alloc->gpu_va = 0; |
---|
| 162 | + ctx_alloc->heap_context_size_aligned = |
---|
| 163 | + (HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1); |
---|
125 | 164 | |
---|
126 | 165 | mutex_init(&ctx_alloc->lock); |
---|
127 | 166 | bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS); |
---|
.. | .. |
---|
142 | 181 | |
---|
143 | 182 | if (ctx_alloc->region) { |
---|
144 | 183 | kbase_gpu_vm_lock(kctx); |
---|
145 | | - ctx_alloc->region->flags &= ~KBASE_REG_NO_USER_FREE; |
---|
| 184 | + WARN_ON(!kbase_va_region_is_no_user_free(ctx_alloc->region)); |
---|
| 185 | + |
---|
| 186 | + kbase_va_region_no_user_free_dec(ctx_alloc->region); |
---|
146 | 187 | kbase_mem_free_region(kctx, ctx_alloc->region); |
---|
147 | 188 | kbase_gpu_vm_unlock(kctx); |
---|
148 | 189 | } |
---|
.. | .. |
---|
154 | 195 | struct kbase_csf_heap_context_allocator *const ctx_alloc) |
---|
155 | 196 | { |
---|
156 | 197 | struct kbase_context *const kctx = ctx_alloc->kctx; |
---|
157 | | - u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | |
---|
158 | | - BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE; |
---|
159 | | - u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE); |
---|
| 198 | + u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | |
---|
| 199 | + BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD; |
---|
| 200 | + u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned); |
---|
160 | 201 | u64 heap_gpu_va = 0; |
---|
161 | 202 | |
---|
162 | | -#ifdef CONFIG_MALI_VECTOR_DUMP |
---|
163 | | - flags |= BASE_MEM_PROT_CPU_RD; |
---|
164 | | -#endif |
---|
| 203 | + /* Calls to this function are inherently asynchronous, with respect to |
---|
| 204 | + * MMU operations. |
---|
| 205 | + */ |
---|
| 206 | + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; |
---|
165 | 207 | |
---|
166 | 208 | mutex_lock(&ctx_alloc->lock); |
---|
167 | 209 | |
---|
.. | .. |
---|
169 | 211 | * allocate it. |
---|
170 | 212 | */ |
---|
171 | 213 | if (!ctx_alloc->region) { |
---|
172 | | - ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, |
---|
173 | | - 0, &flags, &ctx_alloc->gpu_va); |
---|
| 214 | + ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, |
---|
| 215 | + &ctx_alloc->gpu_va, mmu_sync_info); |
---|
174 | 216 | } |
---|
175 | 217 | |
---|
176 | 218 | /* If the pool still isn't allocated then an error occurred. */ |
---|
177 | | - if (unlikely(!ctx_alloc->region)) { |
---|
178 | | - dev_err(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts\n"); |
---|
179 | | - } else { |
---|
| 219 | + if (unlikely(!ctx_alloc->region)) |
---|
| 220 | + dev_dbg(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts"); |
---|
| 221 | + else |
---|
180 | 222 | heap_gpu_va = sub_alloc(ctx_alloc); |
---|
181 | | - } |
---|
182 | 223 | |
---|
183 | 224 | mutex_unlock(&ctx_alloc->lock); |
---|
184 | 225 | |
---|