.. | .. |
---|
1 | 1 | // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
---|
2 | 2 | /* |
---|
3 | 3 | * |
---|
4 | | - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. |
---|
| 4 | + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. |
---|
5 | 5 | * |
---|
6 | 6 | * This program is free software and is provided to you under the terms of the |
---|
7 | 7 | * GNU General Public License version 2 as published by the Free Software |
---|
.. | .. |
---|
25 | 25 | #include "mali_kbase_csf_tiler_heap_def.h" |
---|
26 | 26 | #include "mali_kbase_csf_heap_context_alloc.h" |
---|
27 | 27 | |
---|
| 28 | +/* Tiler heap shrink stop limit for maintaining a minimum number of chunks */ |
---|
| 29 | +#define HEAP_SHRINK_STOP_LIMIT (1) |
---|
| 30 | + |
---|
| 31 | +/** |
---|
| 32 | + * struct kbase_csf_gpu_buffer_heap - A gpu buffer object specific to tiler heap |
---|
| 33 | + * |
---|
| 34 | + * @cdsbp_0: Descriptor_type and buffer_type |
---|
| 35 | + * @size: The size of the current heap chunk |
---|
| 36 | + * @pointer: Pointer to the current heap chunk |
---|
| 37 | + * @low_pointer: Pointer to low end of current heap chunk |
---|
| 38 | + * @high_pointer: Pointer to high end of current heap chunk |
---|
| 39 | + */ |
---|
| 40 | +struct kbase_csf_gpu_buffer_heap { |
---|
| 41 | + u32 cdsbp_0; |
---|
| 42 | + u32 size; |
---|
| 43 | + u64 pointer; |
---|
| 44 | + u64 low_pointer; |
---|
| 45 | + u64 high_pointer; |
---|
| 46 | +} __packed; |
---|
| 47 | + |
---|
28 | 48 | /** |
---|
29 | 49 | * encode_chunk_ptr - Encode the address and size of a chunk as an integer. |
---|
| 50 | + * |
---|
| 51 | + * @chunk_size: Size of a tiler heap chunk, in bytes. |
---|
| 52 | + * @chunk_addr: GPU virtual address of the same tiler heap chunk. |
---|
30 | 53 | * |
---|
31 | 54 | * The size and address of the next chunk in a list are packed into a single |
---|
32 | 55 | * 64-bit value for storage in a chunk's header. This function returns that |
---|
33 | 56 | * value. |
---|
34 | | - * |
---|
35 | | - * @chunk_size: Size of a tiler heap chunk, in bytes. |
---|
36 | | - * @chunk_addr: GPU virtual address of the same tiler heap chunk. |
---|
37 | 57 | * |
---|
38 | 58 | * Return: Next chunk pointer suitable for writing into a chunk header. |
---|
39 | 59 | */ |
---|
.. | .. |
---|
66 | 86 | static struct kbase_csf_tiler_heap_chunk *get_last_chunk( |
---|
67 | 87 | struct kbase_csf_tiler_heap *const heap) |
---|
68 | 88 | { |
---|
69 | | - lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); |
---|
70 | | - |
---|
71 | 89 | if (list_empty(&heap->chunks_list)) |
---|
72 | 90 | return NULL; |
---|
73 | 91 | |
---|
.. | .. |
---|
76 | 94 | } |
---|
77 | 95 | |
---|
78 | 96 | /** |
---|
| 97 | + * remove_external_chunk_mappings - Remove external mappings from a chunk that |
---|
| 98 | + * is being transitioned to the tiler heap |
---|
| 99 | + * memory system. |
---|
| 100 | + * |
---|
| 101 | + * @kctx: kbase context the chunk belongs to. |
---|
| 102 | + * @chunk: The chunk whose external mappings are going to be removed. |
---|
| 103 | + * |
---|
| 104 | + * This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates |
---|
| 105 | + * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other |
---|
| 106 | + * parts of kbase outside of tiler heap management should not take references on its physical |
---|
| 107 | + * pages, and should not modify them. |
---|
| 108 | + */ |
---|
| 109 | +static void remove_external_chunk_mappings(struct kbase_context *const kctx, |
---|
| 110 | + struct kbase_csf_tiler_heap_chunk *chunk) |
---|
| 111 | +{ |
---|
| 112 | + lockdep_assert_held(&kctx->reg_lock); |
---|
| 113 | + |
---|
| 114 | + if (chunk->region->cpu_alloc != NULL) { |
---|
| 115 | + kbase_mem_shrink_cpu_mapping(kctx, chunk->region, 0, |
---|
| 116 | + chunk->region->cpu_alloc->nents); |
---|
| 117 | + } |
---|
| 118 | +#if !defined(CONFIG_MALI_VECTOR_DUMP) |
---|
| 119 | + chunk->region->flags |= KBASE_REG_DONT_NEED; |
---|
| 120 | +#endif |
---|
| 121 | + |
---|
| 122 | + dev_dbg(kctx->kbdev->dev, "Removed external mappings from chunk 0x%llX", chunk->gpu_va); |
---|
| 123 | +} |
---|
| 124 | + |
---|
| 125 | +/** |
---|
79 | 126 | * link_chunk - Link a chunk into a tiler heap |
---|
| 127 | + * |
---|
| 128 | + * @heap: Pointer to the tiler heap. |
---|
| 129 | + * @chunk: Pointer to the heap chunk to be linked. |
---|
80 | 130 | * |
---|
81 | 131 | * Unless the @chunk is the first in the kernel's list of chunks belonging to |
---|
82 | 132 | * a given tiler heap, this function stores the size and address of the @chunk |
---|
83 | 133 | * in the header of the preceding chunk. This requires the GPU memory region |
---|
84 | | - * containing the header to be be mapped temporarily, which can fail. |
---|
85 | | - * |
---|
86 | | - * @heap: Pointer to the tiler heap. |
---|
87 | | - * @chunk: Pointer to the heap chunk to be linked. |
---|
| 134 | + * containing the header to be mapped temporarily, which can fail. |
---|
88 | 135 | * |
---|
89 | 136 | * Return: 0 if successful or a negative error code on failure. |
---|
90 | 137 | */ |
---|
.. | .. |
---|
95 | 142 | |
---|
96 | 143 | if (prev) { |
---|
97 | 144 | struct kbase_context *const kctx = heap->kctx; |
---|
98 | | - struct kbase_vmap_struct map; |
---|
99 | | - u64 *const prev_hdr = kbase_vmap_prot(kctx, prev->gpu_va, |
---|
100 | | - sizeof(*prev_hdr), KBASE_REG_CPU_WR, &map); |
---|
| 145 | + u64 *prev_hdr = prev->map.addr; |
---|
101 | 146 | |
---|
102 | | - if (unlikely(!prev_hdr)) { |
---|
103 | | - dev_err(kctx->kbdev->dev, |
---|
104 | | - "Failed to map tiler heap chunk 0x%llX\n", |
---|
105 | | - prev->gpu_va); |
---|
106 | | - return -ENOMEM; |
---|
107 | | - } |
---|
| 147 | + WARN((prev->region->flags & KBASE_REG_CPU_CACHED), |
---|
| 148 | + "Cannot support CPU cached chunks without sync operations"); |
---|
108 | 149 | |
---|
109 | 150 | *prev_hdr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va); |
---|
110 | | - kbase_vunmap(kctx, &map); |
---|
111 | 151 | |
---|
112 | 152 | dev_dbg(kctx->kbdev->dev, |
---|
113 | 153 | "Linked tiler heap chunks, 0x%llX -> 0x%llX\n", |
---|
.. | .. |
---|
120 | 160 | /** |
---|
121 | 161 | * init_chunk - Initialize and link a tiler heap chunk |
---|
122 | 162 | * |
---|
123 | | - * Zero-initialize a new chunk's header (including its pointer to the next |
---|
124 | | - * chunk, which doesn't exist yet) and then update the previous chunk's |
---|
125 | | - * header to link the new chunk into the chunk list. |
---|
126 | | - * |
---|
127 | 163 | * @heap: Pointer to the tiler heap. |
---|
128 | 164 | * @chunk: Pointer to the heap chunk to be initialized and linked. |
---|
129 | 165 | * @link_with_prev: Flag to indicate if the new chunk needs to be linked with |
---|
130 | 166 | * the previously allocated chunk. |
---|
| 167 | + * |
---|
| 168 | + * Zero-initialize a new chunk's header (including its pointer to the next |
---|
| 169 | + * chunk, which doesn't exist yet) and then update the previous chunk's |
---|
| 170 | + * header to link the new chunk into the chunk list. |
---|
131 | 171 | * |
---|
132 | 172 | * Return: 0 if successful or a negative error code on failure. |
---|
133 | 173 | */ |
---|
134 | 174 | static int init_chunk(struct kbase_csf_tiler_heap *const heap, |
---|
135 | 175 | struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev) |
---|
136 | 176 | { |
---|
137 | | - struct kbase_vmap_struct map; |
---|
138 | | - struct u64 *chunk_hdr = NULL; |
---|
| 177 | + int err = 0; |
---|
| 178 | + u64 *chunk_hdr; |
---|
139 | 179 | struct kbase_context *const kctx = heap->kctx; |
---|
| 180 | + |
---|
| 181 | + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); |
---|
140 | 182 | |
---|
141 | 183 | if (unlikely(chunk->gpu_va & ~CHUNK_ADDR_MASK)) { |
---|
142 | 184 | dev_err(kctx->kbdev->dev, |
---|
.. | .. |
---|
144 | 186 | return -EINVAL; |
---|
145 | 187 | } |
---|
146 | 188 | |
---|
147 | | - chunk_hdr = kbase_vmap_prot(kctx, |
---|
148 | | - chunk->gpu_va, CHUNK_HDR_SIZE, KBASE_REG_CPU_WR, &map); |
---|
149 | | - |
---|
150 | | - if (unlikely(!chunk_hdr)) { |
---|
151 | | - dev_err(kctx->kbdev->dev, |
---|
152 | | - "Failed to map a tiler heap chunk header\n"); |
---|
153 | | - return -ENOMEM; |
---|
| 189 | + WARN((chunk->region->flags & KBASE_REG_CPU_CACHED), |
---|
| 190 | + "Cannot support CPU cached chunks without sync operations"); |
---|
| 191 | + chunk_hdr = chunk->map.addr; |
---|
| 192 | + if (WARN(chunk->map.size < CHUNK_HDR_SIZE, |
---|
| 193 | + "Tiler chunk kernel mapping was not large enough for zero-init")) { |
---|
| 194 | + return -EINVAL; |
---|
154 | 195 | } |
---|
155 | 196 | |
---|
156 | 197 | memset(chunk_hdr, 0, CHUNK_HDR_SIZE); |
---|
157 | | - kbase_vunmap(kctx, &map); |
---|
| 198 | + INIT_LIST_HEAD(&chunk->link); |
---|
158 | 199 | |
---|
159 | 200 | if (link_with_prev) |
---|
160 | | - return link_chunk(heap, chunk); |
---|
161 | | - else |
---|
162 | | - return 0; |
---|
163 | | -} |
---|
164 | | - |
---|
165 | | -/** |
---|
166 | | - * create_chunk - Create a tiler heap chunk |
---|
167 | | - * |
---|
168 | | - * This function allocates a chunk of memory for a tiler heap and adds it to |
---|
169 | | - * the end of the list of chunks associated with that heap. The size of the |
---|
170 | | - * chunk is not a parameter because it is configured per-heap not per-chunk. |
---|
171 | | - * |
---|
172 | | - * @heap: Pointer to the tiler heap for which to allocate memory. |
---|
173 | | - * @link_with_prev: Flag to indicate if the chunk to be allocated needs to be |
---|
174 | | - * linked with the previously allocated chunk. |
---|
175 | | - * |
---|
176 | | - * Return: 0 if successful or a negative error code on failure. |
---|
177 | | - */ |
---|
178 | | -static int create_chunk(struct kbase_csf_tiler_heap *const heap, |
---|
179 | | - bool link_with_prev) |
---|
180 | | -{ |
---|
181 | | - int err = 0; |
---|
182 | | - struct kbase_context *const kctx = heap->kctx; |
---|
183 | | - u64 nr_pages = PFN_UP(heap->chunk_size); |
---|
184 | | - u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | |
---|
185 | | - BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE | |
---|
186 | | - BASE_MEM_COHERENT_LOCAL; |
---|
187 | | - struct kbase_csf_tiler_heap_chunk *chunk = NULL; |
---|
188 | | - |
---|
189 | | - flags |= base_mem_group_id_set(kctx->jit_group_id); |
---|
190 | | - |
---|
191 | | -#if defined(CONFIG_MALI_BIFROST_DEBUG) || defined(CONFIG_MALI_VECTOR_DUMP) |
---|
192 | | - flags |= BASE_MEM_PROT_CPU_RD; |
---|
193 | | -#endif |
---|
194 | | - |
---|
195 | | - lockdep_assert_held(&kctx->csf.tiler_heaps.lock); |
---|
196 | | - |
---|
197 | | - chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); |
---|
198 | | - if (unlikely(!chunk)) { |
---|
199 | | - dev_err(kctx->kbdev->dev, |
---|
200 | | - "No kernel memory for a new tiler heap chunk\n"); |
---|
201 | | - return -ENOMEM; |
---|
202 | | - } |
---|
203 | | - |
---|
204 | | - /* Allocate GPU memory for the new chunk. */ |
---|
205 | | - INIT_LIST_HEAD(&chunk->link); |
---|
206 | | - chunk->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, |
---|
207 | | - &flags, &chunk->gpu_va); |
---|
208 | | - |
---|
209 | | - if (unlikely(!chunk->region)) { |
---|
210 | | - dev_err(kctx->kbdev->dev, |
---|
211 | | - "Failed to allocate a tiler heap chunk\n"); |
---|
212 | | - err = -ENOMEM; |
---|
213 | | - } else { |
---|
214 | | - err = init_chunk(heap, chunk, link_with_prev); |
---|
215 | | - if (unlikely(err)) { |
---|
216 | | - kbase_gpu_vm_lock(kctx); |
---|
217 | | - chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; |
---|
218 | | - kbase_mem_free_region(kctx, chunk->region); |
---|
219 | | - kbase_gpu_vm_unlock(kctx); |
---|
220 | | - } |
---|
221 | | - } |
---|
| 201 | + err = link_chunk(heap, chunk); |
---|
222 | 202 | |
---|
223 | 203 | if (unlikely(err)) { |
---|
224 | | - kfree(chunk); |
---|
225 | | - } else { |
---|
226 | | - list_add_tail(&chunk->link, &heap->chunks_list); |
---|
227 | | - heap->chunk_count++; |
---|
228 | | - |
---|
229 | | - dev_dbg(kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", |
---|
230 | | - chunk->gpu_va); |
---|
| 204 | + dev_err(kctx->kbdev->dev, "Failed to link a chunk to a tiler heap\n"); |
---|
| 205 | + return -EINVAL; |
---|
231 | 206 | } |
---|
| 207 | + |
---|
| 208 | + list_add_tail(&chunk->link, &heap->chunks_list); |
---|
| 209 | + heap->chunk_count++; |
---|
232 | 210 | |
---|
233 | 211 | return err; |
---|
234 | 212 | } |
---|
235 | 213 | |
---|
236 | 214 | /** |
---|
237 | | - * delete_chunk - Delete a tiler heap chunk |
---|
| 215 | + * remove_unlinked_chunk - Remove a chunk that is not currently linked into a |
---|
| 216 | + * heap. |
---|
238 | 217 | * |
---|
239 | | - * This function frees a tiler heap chunk previously allocated by @create_chunk |
---|
240 | | - * and removes it from the list of chunks associated with the heap. |
---|
241 | | - * |
---|
242 | | - * WARNING: The deleted chunk is not unlinked from the list of chunks used by |
---|
243 | | - * the GPU, therefore it is only safe to use this function when |
---|
244 | | - * deleting a heap. |
---|
245 | | - * |
---|
246 | | - * @heap: Pointer to the tiler heap for which @chunk was allocated. |
---|
247 | | - * @chunk: Pointer to a chunk to be deleted. |
---|
| 218 | + * @kctx: Kbase context that was used to allocate the memory. |
---|
| 219 | + * @chunk: Chunk that has been allocated, but not linked into a heap. |
---|
248 | 220 | */ |
---|
249 | | -static void delete_chunk(struct kbase_csf_tiler_heap *const heap, |
---|
250 | | - struct kbase_csf_tiler_heap_chunk *const chunk) |
---|
| 221 | +static void remove_unlinked_chunk(struct kbase_context *kctx, |
---|
| 222 | + struct kbase_csf_tiler_heap_chunk *chunk) |
---|
251 | 223 | { |
---|
252 | | - struct kbase_context *const kctx = heap->kctx; |
---|
253 | | - |
---|
254 | | - lockdep_assert_held(&kctx->csf.tiler_heaps.lock); |
---|
| 224 | + if (WARN_ON(!list_empty(&chunk->link))) |
---|
| 225 | + return; |
---|
255 | 226 | |
---|
256 | 227 | kbase_gpu_vm_lock(kctx); |
---|
257 | | - chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; |
---|
| 228 | + kbase_vunmap(kctx, &chunk->map); |
---|
| 229 | + /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT |
---|
| 230 | + * regions), and so we must clear that flag too before freeing. |
---|
| 231 | + * For "no user free count", we check that the count is 1 as it is a shrinkable region; |
---|
| 232 | + * no other code part within kbase can take a reference to it. |
---|
| 233 | + */ |
---|
| 234 | + WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1); |
---|
| 235 | + kbase_va_region_no_user_free_dec(chunk->region); |
---|
| 236 | +#if !defined(CONFIG_MALI_VECTOR_DUMP) |
---|
| 237 | + chunk->region->flags &= ~KBASE_REG_DONT_NEED; |
---|
| 238 | +#endif |
---|
258 | 239 | kbase_mem_free_region(kctx, chunk->region); |
---|
259 | 240 | kbase_gpu_vm_unlock(kctx); |
---|
260 | | - list_del(&chunk->link); |
---|
261 | | - heap->chunk_count--; |
---|
| 241 | + |
---|
262 | 242 | kfree(chunk); |
---|
263 | 243 | } |
---|
264 | 244 | |
---|
265 | 245 | /** |
---|
266 | | - * delete_all_chunks - Delete all chunks belonging to a tiler heap |
---|
| 246 | + * alloc_new_chunk - Allocate new chunk metadata for the tiler heap, reserve a fully backed VA |
---|
| 247 | + * region for the chunk, and provide a kernel mapping. |
---|
| 248 | + * @kctx: kbase context with which the chunk will be linked |
---|
| 249 | + * @chunk_size: the size of the chunk from the corresponding heap |
---|
267 | 250 | * |
---|
268 | | - * This function empties the list of chunks associated with a tiler heap by |
---|
269 | | - * freeing all chunks previously allocated by @create_chunk. |
---|
| 251 | + * Allocate the chunk tracking metadata and a corresponding fully backed VA region for the |
---|
| 252 | + * chunk. The kernel may need to invoke the reclaim path while trying to fulfill the allocation, so |
---|
| 253 | + * we cannot hold any lock that would be held in the shrinker paths (JIT evict lock or tiler heap |
---|
| 254 | + * lock). |
---|
| 255 | + * |
---|
| 256 | + * Since the chunk may have its physical backing removed, to prevent use-after-free scenarios we |
---|
| 257 | + * ensure that it is protected from being mapped by other parts of kbase. |
---|
| 258 | + * |
---|
| 259 | + * The chunk's GPU memory can be accessed via its 'map' member, but should only be done so by the |
---|
| 260 | + * shrinker path, as it may be otherwise shrunk at any time. |
---|
| 261 | + * |
---|
| 262 | + * Return: pointer to kbase_csf_tiler_heap_chunk on success or a NULL pointer |
---|
| 263 | + * on failure |
---|
| 264 | + */ |
---|
| 265 | +static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *kctx, |
---|
| 266 | + u64 chunk_size) |
---|
| 267 | +{ |
---|
| 268 | + u64 nr_pages = PFN_UP(chunk_size); |
---|
| 269 | + u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | |
---|
| 270 | + BASEP_MEM_NO_USER_FREE | BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD; |
---|
| 271 | + struct kbase_csf_tiler_heap_chunk *chunk = NULL; |
---|
| 272 | + /* The chunk kernel mapping needs to be large enough to: |
---|
| 273 | + * - initially zero the CHUNK_HDR_SIZE area |
---|
| 274 | + * - on shrinking, access the NEXT_CHUNK_ADDR_SIZE area |
---|
| 275 | + */ |
---|
| 276 | + const size_t chunk_kernel_map_size = max(CHUNK_HDR_SIZE, NEXT_CHUNK_ADDR_SIZE); |
---|
| 277 | + |
---|
| 278 | + /* Calls to this function are inherently synchronous, with respect to |
---|
| 279 | + * MMU operations. |
---|
| 280 | + */ |
---|
| 281 | + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; |
---|
| 282 | + flags |= kbase_mem_group_id_set(kctx->jit_group_id); |
---|
| 283 | + |
---|
| 284 | + chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); |
---|
| 285 | + if (unlikely(!chunk)) { |
---|
| 286 | + dev_err(kctx->kbdev->dev, |
---|
| 287 | + "No kernel memory for a new tiler heap chunk\n"); |
---|
| 288 | + return NULL; |
---|
| 289 | + } |
---|
| 290 | + |
---|
| 291 | + /* Allocate GPU memory for the new chunk. */ |
---|
| 292 | + chunk->region = |
---|
| 293 | + kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info); |
---|
| 294 | + |
---|
| 295 | + if (unlikely(!chunk->region)) { |
---|
| 296 | + dev_err(kctx->kbdev->dev, "Failed to allocate a tiler heap chunk!\n"); |
---|
| 297 | + goto unroll_chunk; |
---|
| 298 | + } |
---|
| 299 | + |
---|
| 300 | + kbase_gpu_vm_lock(kctx); |
---|
| 301 | + |
---|
| 302 | + /* Some checks done here as NO_USER_FREE still allows such things to be made |
---|
| 303 | + * whilst we had dropped the region lock |
---|
| 304 | + */ |
---|
| 305 | + if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) { |
---|
| 306 | + dev_err(kctx->kbdev->dev, "Chunk region has active kernel mappings!\n"); |
---|
| 307 | + goto unroll_region; |
---|
| 308 | + } |
---|
| 309 | + |
---|
| 310 | + /* There is a race condition with regard to KBASE_REG_DONT_NEED, where another |
---|
| 311 | + * thread can have the "no user free" refcount increased between kbase_mem_alloc |
---|
| 312 | + * and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by |
---|
| 313 | + * remove_external_chunk_mappings (below). |
---|
| 314 | + * |
---|
| 315 | + * It should be fine and not a security risk if we let the region leak till |
---|
| 316 | + * region tracker termination in such a case. |
---|
| 317 | + */ |
---|
| 318 | + if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) { |
---|
| 319 | + dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n"); |
---|
| 320 | + goto unroll_region; |
---|
| 321 | + } |
---|
| 322 | + |
---|
| 323 | + /* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE |
---|
| 324 | + * being requested, it's useful to document in code what those restrictions are, and ensure |
---|
| 325 | + * they remain in place in future. |
---|
| 326 | + */ |
---|
| 327 | + if (WARN(!chunk->region->gpu_alloc, |
---|
| 328 | + "NO_USER_FREE chunks should not have had their alloc freed")) { |
---|
| 329 | + goto unroll_region; |
---|
| 330 | + } |
---|
| 331 | + |
---|
| 332 | + if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE, |
---|
| 333 | + "NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) { |
---|
| 334 | + goto unroll_region; |
---|
| 335 | + } |
---|
| 336 | + |
---|
| 337 | + if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC), |
---|
| 338 | + "NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) { |
---|
| 339 | + goto unroll_region; |
---|
| 340 | + } |
---|
| 341 | + |
---|
| 342 | + if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED), |
---|
| 343 | + "NO_USER_FREE chunks should not have been made ephemeral")) { |
---|
| 344 | + goto unroll_region; |
---|
| 345 | + } |
---|
| 346 | + |
---|
| 347 | + if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1, |
---|
| 348 | + "NO_USER_FREE chunks should not have been aliased")) { |
---|
| 349 | + goto unroll_region; |
---|
| 350 | + } |
---|
| 351 | + |
---|
| 352 | + if (unlikely(!kbase_vmap_reg(kctx, chunk->region, chunk->gpu_va, chunk_kernel_map_size, |
---|
| 353 | + (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &chunk->map, |
---|
| 354 | + KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING))) { |
---|
| 355 | + dev_err(kctx->kbdev->dev, "Failed to map chunk header for shrinking!\n"); |
---|
| 356 | + goto unroll_region; |
---|
| 357 | + } |
---|
| 358 | + |
---|
| 359 | + remove_external_chunk_mappings(kctx, chunk); |
---|
| 360 | + kbase_gpu_vm_unlock(kctx); |
---|
| 361 | + |
---|
| 362 | + /* If page migration is enabled, we don't want to migrate tiler heap pages. |
---|
| 363 | + * This does not change if the constituent pages are already marked as isolated. |
---|
| 364 | + */ |
---|
| 365 | + if (kbase_page_migration_enabled) |
---|
| 366 | + kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE); |
---|
| 367 | + |
---|
| 368 | + return chunk; |
---|
| 369 | + |
---|
| 370 | +unroll_region: |
---|
| 371 | + /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT |
---|
| 372 | + * regions), and so we must clear that flag too before freeing. |
---|
| 373 | + */ |
---|
| 374 | + kbase_va_region_no_user_free_dec(chunk->region); |
---|
| 375 | +#if !defined(CONFIG_MALI_VECTOR_DUMP) |
---|
| 376 | + chunk->region->flags &= ~KBASE_REG_DONT_NEED; |
---|
| 377 | +#endif |
---|
| 378 | + kbase_mem_free_region(kctx, chunk->region); |
---|
| 379 | + kbase_gpu_vm_unlock(kctx); |
---|
| 380 | +unroll_chunk: |
---|
| 381 | + kfree(chunk); |
---|
| 382 | + return NULL; |
---|
| 383 | +} |
---|
| 384 | + |
---|
| 385 | +/** |
---|
| 386 | + * create_chunk - Create a tiler heap chunk |
---|
| 387 | + * |
---|
| 388 | + * @heap: Pointer to the tiler heap for which to allocate memory. |
---|
| 389 | + * |
---|
| 390 | + * This function allocates a chunk of memory for a tiler heap, adds it to the |
---|
| 391 | + * the list of chunks associated with that heap both on the host side and in GPU |
---|
| 392 | + * memory. |
---|
| 393 | + * |
---|
| 394 | + * Return: 0 if successful or a negative error code on failure. |
---|
| 395 | + */ |
---|
| 396 | +static int create_chunk(struct kbase_csf_tiler_heap *const heap) |
---|
| 397 | +{ |
---|
| 398 | + int err = 0; |
---|
| 399 | + struct kbase_csf_tiler_heap_chunk *chunk = NULL; |
---|
| 400 | + |
---|
| 401 | + chunk = alloc_new_chunk(heap->kctx, heap->chunk_size); |
---|
| 402 | + if (unlikely(!chunk)) { |
---|
| 403 | + err = -ENOMEM; |
---|
| 404 | + goto allocation_failure; |
---|
| 405 | + } |
---|
| 406 | + |
---|
| 407 | + mutex_lock(&heap->kctx->csf.tiler_heaps.lock); |
---|
| 408 | + err = init_chunk(heap, chunk, true); |
---|
| 409 | + mutex_unlock(&heap->kctx->csf.tiler_heaps.lock); |
---|
| 410 | + |
---|
| 411 | + if (unlikely(err)) |
---|
| 412 | + goto initialization_failure; |
---|
| 413 | + |
---|
| 414 | + dev_dbg(heap->kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", chunk->gpu_va); |
---|
| 415 | + |
---|
| 416 | + return 0; |
---|
| 417 | +initialization_failure: |
---|
| 418 | + remove_unlinked_chunk(heap->kctx, chunk); |
---|
| 419 | +allocation_failure: |
---|
| 420 | + return err; |
---|
| 421 | +} |
---|
| 422 | + |
---|
| 423 | +/** |
---|
| 424 | + * delete_all_chunks - Delete all chunks belonging to an unlinked tiler heap |
---|
270 | 425 | * |
---|
271 | 426 | * @heap: Pointer to a tiler heap. |
---|
| 427 | + * |
---|
| 428 | + * This function empties the list of chunks associated with a tiler heap by freeing all chunks |
---|
| 429 | + * previously allocated by @create_chunk. |
---|
| 430 | + * |
---|
| 431 | + * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the |
---|
| 432 | + * tiler_heaps lock cannot be held whilst deleting its chunks due to also needing the &struct |
---|
| 433 | + * kbase_context.region_lock. |
---|
| 434 | + * |
---|
| 435 | + * WARNING: Whilst the deleted chunks are unlinked from host memory, they are not unlinked from the |
---|
| 436 | + * list of chunks used by the GPU, therefore it is only safe to use this function when |
---|
| 437 | + * deleting a heap. |
---|
272 | 438 | */ |
---|
273 | 439 | static void delete_all_chunks(struct kbase_csf_tiler_heap *heap) |
---|
274 | 440 | { |
---|
275 | | - struct list_head *entry = NULL, *tmp = NULL; |
---|
276 | 441 | struct kbase_context *const kctx = heap->kctx; |
---|
| 442 | + struct list_head *entry = NULL, *tmp = NULL; |
---|
277 | 443 | |
---|
278 | | - lockdep_assert_held(&kctx->csf.tiler_heaps.lock); |
---|
| 444 | + WARN(!list_empty(&heap->link), |
---|
| 445 | + "Deleting a heap's chunks when that heap is still linked requires the tiler_heaps lock, which cannot be held by the caller"); |
---|
279 | 446 | |
---|
280 | 447 | list_for_each_safe(entry, tmp, &heap->chunks_list) { |
---|
281 | 448 | struct kbase_csf_tiler_heap_chunk *chunk = list_entry( |
---|
282 | 449 | entry, struct kbase_csf_tiler_heap_chunk, link); |
---|
283 | 450 | |
---|
284 | | - delete_chunk(heap, chunk); |
---|
| 451 | + list_del_init(&chunk->link); |
---|
| 452 | + heap->chunk_count--; |
---|
| 453 | + |
---|
| 454 | + remove_unlinked_chunk(kctx, chunk); |
---|
285 | 455 | } |
---|
286 | 456 | } |
---|
287 | 457 | |
---|
288 | 458 | /** |
---|
289 | 459 | * create_initial_chunks - Create the initial list of chunks for a tiler heap |
---|
290 | 460 | * |
---|
291 | | - * This function allocates a given number of chunks for a tiler heap and |
---|
292 | | - * adds them to the list of chunks associated with that heap. |
---|
293 | | - * |
---|
294 | 461 | * @heap: Pointer to the tiler heap for which to allocate memory. |
---|
295 | 462 | * @nchunks: Number of chunks to create. |
---|
| 463 | + * |
---|
| 464 | + * This function allocates a given number of chunks for a tiler heap and |
---|
| 465 | + * adds them to the list of chunks associated with that heap. |
---|
296 | 466 | * |
---|
297 | 467 | * Return: 0 if successful or a negative error code on failure. |
---|
298 | 468 | */ |
---|
.. | .. |
---|
303 | 473 | u32 i; |
---|
304 | 474 | |
---|
305 | 475 | for (i = 0; (i < nchunks) && likely(!err); i++) |
---|
306 | | - err = create_chunk(heap, true); |
---|
| 476 | + err = create_chunk(heap); |
---|
307 | 477 | |
---|
308 | 478 | if (unlikely(err)) |
---|
309 | 479 | delete_all_chunks(heap); |
---|
.. | .. |
---|
312 | 482 | } |
---|
313 | 483 | |
---|
314 | 484 | /** |
---|
315 | | - * delete_heap - Delete a tiler heap |
---|
316 | | - * |
---|
317 | | - * This function frees any chunks allocated for a tiler heap previously |
---|
318 | | - * initialized by @kbase_csf_tiler_heap_init and removes it from the list of |
---|
319 | | - * heaps associated with the kbase context. The heap context structure used by |
---|
320 | | - * the firmware is also freed. |
---|
| 485 | + * delete_heap - Delete an unlinked tiler heap |
---|
321 | 486 | * |
---|
322 | 487 | * @heap: Pointer to a tiler heap to be deleted. |
---|
| 488 | + * |
---|
| 489 | + * This function frees any chunks allocated for a tiler heap previously |
---|
| 490 | + * initialized by @kbase_csf_tiler_heap_init. The heap context structure used by |
---|
| 491 | + * the firmware is also freed. |
---|
| 492 | + * |
---|
| 493 | + * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the |
---|
| 494 | + * tiler_heaps lock cannot be held whilst deleting it due to also needing the &struct |
---|
| 495 | + * kbase_context.region_lock. |
---|
323 | 496 | */ |
---|
324 | 497 | static void delete_heap(struct kbase_csf_tiler_heap *heap) |
---|
325 | 498 | { |
---|
.. | .. |
---|
327 | 500 | |
---|
328 | 501 | dev_dbg(kctx->kbdev->dev, "Deleting tiler heap 0x%llX\n", heap->gpu_va); |
---|
329 | 502 | |
---|
330 | | - lockdep_assert_held(&kctx->csf.tiler_heaps.lock); |
---|
| 503 | + WARN(!list_empty(&heap->link), |
---|
| 504 | + "Deleting a heap that is still linked requires the tiler_heaps lock, which cannot be held by the caller"); |
---|
331 | 505 | |
---|
| 506 | + /* Make sure that all of the VA regions corresponding to the chunks are |
---|
| 507 | + * freed at this time and that the work queue is not trying to access freed |
---|
| 508 | + * memory. |
---|
| 509 | + * |
---|
| 510 | + * Note: since the heap is unlinked, and that no references are made to chunks other |
---|
| 511 | + * than from their heap, there is no need to separately move the chunks out of the |
---|
| 512 | + * heap->chunks_list to delete them. |
---|
| 513 | + */ |
---|
332 | 514 | delete_all_chunks(heap); |
---|
333 | 515 | |
---|
| 516 | + kbase_vunmap(kctx, &heap->gpu_va_map); |
---|
334 | 517 | /* We could optimize context destruction by not freeing leaked heap |
---|
335 | | - * contexts but it doesn't seem worth the extra complexity. |
---|
| 518 | + * contexts but it doesn't seem worth the extra complexity. After this |
---|
| 519 | + * point, the suballocation is returned to the heap context allocator and |
---|
| 520 | + * may be overwritten with new data, meaning heap->gpu_va should not |
---|
| 521 | + * be used past this point. |
---|
336 | 522 | */ |
---|
337 | 523 | kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc, |
---|
338 | 524 | heap->gpu_va); |
---|
339 | | - |
---|
340 | | - list_del(&heap->link); |
---|
341 | 525 | |
---|
342 | 526 | WARN_ON(heap->chunk_count); |
---|
343 | 527 | KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, |
---|
344 | 528 | heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0, |
---|
345 | 529 | heap->target_in_flight, 0); |
---|
| 530 | + |
---|
| 531 | + if (heap->buf_desc_reg) { |
---|
| 532 | + kbase_vunmap(kctx, &heap->buf_desc_map); |
---|
| 533 | + kbase_gpu_vm_lock(kctx); |
---|
| 534 | + kbase_va_region_no_user_free_dec(heap->buf_desc_reg); |
---|
| 535 | + kbase_gpu_vm_unlock(kctx); |
---|
| 536 | + } |
---|
346 | 537 | |
---|
347 | 538 | kfree(heap); |
---|
348 | 539 | } |
---|
.. | .. |
---|
350 | 541 | /** |
---|
351 | 542 | * find_tiler_heap - Find a tiler heap from the address of its heap context |
---|
352 | 543 | * |
---|
| 544 | + * @kctx: Pointer to the kbase context to search for a tiler heap. |
---|
| 545 | + * @heap_gpu_va: GPU virtual address of a heap context structure. |
---|
| 546 | + * |
---|
353 | 547 | * Each tiler heap managed by the kernel has an associated heap context |
---|
354 | 548 | * structure used by the firmware. This function finds a tiler heap object from |
---|
355 | 549 | * the GPU virtual address of its associated heap context. The heap context |
---|
356 | 550 | * should have been allocated by @kbase_csf_heap_context_allocator_alloc in the |
---|
357 | 551 | * same @kctx. |
---|
358 | | - * |
---|
359 | | - * @kctx: Pointer to the kbase context to search for a tiler heap. |
---|
360 | | - * @heap_gpu_va: GPU virtual address of a heap context structure. |
---|
361 | 552 | * |
---|
362 | 553 | * Return: pointer to the tiler heap object, or NULL if not found. |
---|
363 | 554 | */ |
---|
.. | .. |
---|
375 | 566 | |
---|
376 | 567 | dev_dbg(kctx->kbdev->dev, "Tiler heap 0x%llX was not found\n", |
---|
377 | 568 | heap_gpu_va); |
---|
| 569 | + |
---|
| 570 | + return NULL; |
---|
| 571 | +} |
---|
| 572 | + |
---|
| 573 | +static struct kbase_csf_tiler_heap_chunk *find_chunk(struct kbase_csf_tiler_heap *heap, |
---|
| 574 | + u64 const chunk_gpu_va) |
---|
| 575 | +{ |
---|
| 576 | + struct kbase_csf_tiler_heap_chunk *chunk = NULL; |
---|
| 577 | + |
---|
| 578 | + lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); |
---|
| 579 | + |
---|
| 580 | + list_for_each_entry(chunk, &heap->chunks_list, link) { |
---|
| 581 | + if (chunk->gpu_va == chunk_gpu_va) |
---|
| 582 | + return chunk; |
---|
| 583 | + } |
---|
| 584 | + |
---|
| 585 | + dev_dbg(heap->kctx->kbdev->dev, "Tiler heap chunk 0x%llX was not found\n", chunk_gpu_va); |
---|
378 | 586 | |
---|
379 | 587 | return NULL; |
---|
380 | 588 | } |
---|
.. | .. |
---|
397 | 605 | |
---|
398 | 606 | void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx) |
---|
399 | 607 | { |
---|
| 608 | + LIST_HEAD(local_heaps_list); |
---|
400 | 609 | struct list_head *entry = NULL, *tmp = NULL; |
---|
401 | 610 | |
---|
402 | 611 | dev_dbg(kctx->kbdev->dev, "Terminating a context for tiler heaps\n"); |
---|
403 | 612 | |
---|
404 | 613 | mutex_lock(&kctx->csf.tiler_heaps.lock); |
---|
| 614 | + list_splice_init(&kctx->csf.tiler_heaps.list, &local_heaps_list); |
---|
| 615 | + mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
405 | 616 | |
---|
406 | | - list_for_each_safe(entry, tmp, &kctx->csf.tiler_heaps.list) { |
---|
| 617 | + list_for_each_safe(entry, tmp, &local_heaps_list) { |
---|
407 | 618 | struct kbase_csf_tiler_heap *heap = list_entry( |
---|
408 | 619 | entry, struct kbase_csf_tiler_heap, link); |
---|
| 620 | + |
---|
| 621 | + list_del_init(&heap->link); |
---|
409 | 622 | delete_heap(heap); |
---|
410 | 623 | } |
---|
411 | 624 | |
---|
412 | | - mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
413 | 625 | mutex_destroy(&kctx->csf.tiler_heaps.lock); |
---|
414 | 626 | |
---|
415 | 627 | kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc); |
---|
416 | 628 | } |
---|
417 | 629 | |
---|
418 | | -int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, |
---|
419 | | - u32 const chunk_size, u32 const initial_chunks, u32 const max_chunks, |
---|
420 | | - u16 const target_in_flight, u64 *const heap_gpu_va, |
---|
421 | | - u64 *const first_chunk_va) |
---|
| 630 | +/** |
---|
| 631 | + * kbasep_is_buffer_descriptor_region_suitable - Check if a VA region chosen to house |
---|
| 632 | + * the tiler heap buffer descriptor |
---|
| 633 | + * is suitable for the purpose. |
---|
| 634 | + * @kctx: kbase context of the tiler heap |
---|
| 635 | + * @reg: VA region being checked for suitability |
---|
| 636 | + * |
---|
| 637 | + * The tiler heap buffer descriptor memory does not admit page faults according |
---|
| 638 | + * to its design, so it must have the entirety of the backing upon allocation, |
---|
| 639 | + * and it has to remain alive as long as the tiler heap is alive, meaning it |
---|
| 640 | + * cannot be allocated from JIT/Ephemeral, or user freeable memory. |
---|
| 641 | + * |
---|
| 642 | + * Return: true on suitability, false otherwise. |
---|
| 643 | + */ |
---|
| 644 | +static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *const kctx, |
---|
| 645 | + struct kbase_va_region *const reg) |
---|
| 646 | +{ |
---|
| 647 | + if (kbase_is_region_invalid_or_free(reg)) { |
---|
| 648 | + dev_err(kctx->kbdev->dev, "Region is either invalid or free!\n"); |
---|
| 649 | + return false; |
---|
| 650 | + } |
---|
| 651 | + |
---|
| 652 | + if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) || |
---|
| 653 | + (reg->flags & KBASE_REG_PF_GROW)) { |
---|
| 654 | + dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags); |
---|
| 655 | + return false; |
---|
| 656 | + } |
---|
| 657 | + |
---|
| 658 | + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { |
---|
| 659 | + dev_err(kctx->kbdev->dev, "Region has invalid type!\n"); |
---|
| 660 | + return false; |
---|
| 661 | + } |
---|
| 662 | + |
---|
| 663 | + if ((reg->nr_pages != kbase_reg_current_backed_size(reg)) || |
---|
| 664 | + (reg->nr_pages < PFN_UP(sizeof(struct kbase_csf_gpu_buffer_heap)))) { |
---|
| 665 | + dev_err(kctx->kbdev->dev, "Region has invalid backing!\n"); |
---|
| 666 | + return false; |
---|
| 667 | + } |
---|
| 668 | + |
---|
| 669 | + return true; |
---|
| 670 | +} |
---|
| 671 | + |
---|
| 672 | +#define TILER_BUF_DESC_SIZE (sizeof(struct kbase_csf_gpu_buffer_heap)) |
---|
| 673 | + |
---|
| 674 | +int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_size, |
---|
| 675 | + u32 const initial_chunks, u32 const max_chunks, |
---|
| 676 | + u16 const target_in_flight, u64 const buf_desc_va, |
---|
| 677 | + u64 *const heap_gpu_va, u64 *const first_chunk_va) |
---|
422 | 678 | { |
---|
423 | 679 | int err = 0; |
---|
424 | 680 | struct kbase_csf_tiler_heap *heap = NULL; |
---|
425 | 681 | struct kbase_csf_heap_context_allocator *const ctx_alloc = |
---|
426 | 682 | &kctx->csf.tiler_heaps.ctx_alloc; |
---|
| 683 | + struct kbase_csf_tiler_heap_chunk *chunk = NULL; |
---|
| 684 | + struct kbase_va_region *gpu_va_reg = NULL; |
---|
| 685 | + void *vmap_ptr = NULL; |
---|
427 | 686 | |
---|
428 | 687 | dev_dbg(kctx->kbdev->dev, |
---|
429 | | - "Creating a tiler heap with %u chunks (limit: %u) of size %u\n", |
---|
430 | | - initial_chunks, max_chunks, chunk_size); |
---|
| 688 | + "Creating a tiler heap with %u chunks (limit: %u) of size %u, buf_desc_va: 0x%llx\n", |
---|
| 689 | + initial_chunks, max_chunks, chunk_size, buf_desc_va); |
---|
| 690 | + |
---|
| 691 | + if (!kbase_mem_allow_alloc(kctx)) |
---|
| 692 | + return -EINVAL; |
---|
431 | 693 | |
---|
432 | 694 | if (chunk_size == 0) |
---|
433 | 695 | return -EINVAL; |
---|
.. | .. |
---|
446 | 708 | |
---|
447 | 709 | heap = kzalloc(sizeof(*heap), GFP_KERNEL); |
---|
448 | 710 | if (unlikely(!heap)) { |
---|
449 | | - dev_err(kctx->kbdev->dev, |
---|
450 | | - "No kernel memory for a new tiler heap\n"); |
---|
| 711 | + dev_err(kctx->kbdev->dev, "No kernel memory for a new tiler heap"); |
---|
451 | 712 | return -ENOMEM; |
---|
452 | 713 | } |
---|
453 | 714 | |
---|
.. | .. |
---|
455 | 716 | heap->chunk_size = chunk_size; |
---|
456 | 717 | heap->max_chunks = max_chunks; |
---|
457 | 718 | heap->target_in_flight = target_in_flight; |
---|
| 719 | + heap->buf_desc_checked = false; |
---|
458 | 720 | INIT_LIST_HEAD(&heap->chunks_list); |
---|
| 721 | + INIT_LIST_HEAD(&heap->link); |
---|
459 | 722 | |
---|
460 | | - heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc); |
---|
| 723 | + /* Check on the buffer descriptor virtual Address */ |
---|
| 724 | + if (buf_desc_va) { |
---|
| 725 | + struct kbase_va_region *buf_desc_reg; |
---|
461 | 726 | |
---|
462 | | - mutex_lock(&kctx->csf.tiler_heaps.lock); |
---|
| 727 | + kbase_gpu_vm_lock(kctx); |
---|
| 728 | + buf_desc_reg = |
---|
| 729 | + kbase_region_tracker_find_region_enclosing_address(kctx, buf_desc_va); |
---|
463 | 730 | |
---|
464 | | - if (unlikely(!heap->gpu_va)) { |
---|
465 | | - dev_err(kctx->kbdev->dev, |
---|
466 | | - "Failed to allocate a tiler heap context\n"); |
---|
467 | | - err = -ENOMEM; |
---|
468 | | - } else { |
---|
469 | | - err = create_initial_chunks(heap, initial_chunks); |
---|
470 | | - if (unlikely(err)) { |
---|
471 | | - kbase_csf_heap_context_allocator_free(ctx_alloc, |
---|
472 | | - heap->gpu_va); |
---|
| 731 | + if (!kbasep_is_buffer_descriptor_region_suitable(kctx, buf_desc_reg)) { |
---|
| 732 | + kbase_gpu_vm_unlock(kctx); |
---|
| 733 | + dev_err(kctx->kbdev->dev, |
---|
| 734 | + "Could not find a suitable VA region for the tiler heap buf desc!\n"); |
---|
| 735 | + err = -EINVAL; |
---|
| 736 | + goto buf_desc_not_suitable; |
---|
| 737 | + } |
---|
| 738 | + |
---|
| 739 | + /* If we don't prevent userspace from unmapping this, we may run into |
---|
| 740 | + * use-after-free, as we don't check for the existence of the region throughout. |
---|
| 741 | + */ |
---|
| 742 | + |
---|
| 743 | + heap->buf_desc_va = buf_desc_va; |
---|
| 744 | + heap->buf_desc_reg = buf_desc_reg; |
---|
| 745 | + kbase_va_region_no_user_free_inc(buf_desc_reg); |
---|
| 746 | + |
---|
| 747 | + vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE, |
---|
| 748 | + KBASE_REG_CPU_RD, &heap->buf_desc_map, |
---|
| 749 | + KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); |
---|
| 750 | + |
---|
| 751 | + if (kbase_page_migration_enabled) |
---|
| 752 | + kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE); |
---|
| 753 | + |
---|
| 754 | + kbase_gpu_vm_unlock(kctx); |
---|
| 755 | + |
---|
| 756 | + if (unlikely(!vmap_ptr)) { |
---|
| 757 | + dev_err(kctx->kbdev->dev, |
---|
| 758 | + "Could not vmap buffer descriptor into kernel memory (err %d)\n", |
---|
| 759 | + err); |
---|
| 760 | + err = -ENOMEM; |
---|
| 761 | + goto buf_desc_vmap_failed; |
---|
473 | 762 | } |
---|
474 | 763 | } |
---|
475 | 764 | |
---|
476 | | - if (unlikely(err)) { |
---|
477 | | - kfree(heap); |
---|
478 | | - } else { |
---|
479 | | - struct kbase_csf_tiler_heap_chunk const *first_chunk = |
---|
480 | | - list_first_entry(&heap->chunks_list, |
---|
481 | | - struct kbase_csf_tiler_heap_chunk, link); |
---|
482 | | - |
---|
483 | | - kctx->csf.tiler_heaps.nr_of_heaps++; |
---|
484 | | - heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps; |
---|
485 | | - list_add(&heap->link, &kctx->csf.tiler_heaps.list); |
---|
486 | | - |
---|
487 | | - *heap_gpu_va = heap->gpu_va; |
---|
488 | | - *first_chunk_va = first_chunk->gpu_va; |
---|
489 | | - |
---|
490 | | - KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( |
---|
491 | | - kctx->kbdev, kctx->id, heap->heap_id, |
---|
492 | | - PFN_UP(heap->chunk_size * heap->max_chunks), |
---|
493 | | - PFN_UP(heap->chunk_size * heap->chunk_count), |
---|
494 | | - heap->max_chunks, heap->chunk_size, heap->chunk_count, |
---|
495 | | - heap->target_in_flight, 0); |
---|
496 | | - |
---|
497 | | - dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n", |
---|
498 | | - heap->gpu_va); |
---|
| 765 | + heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc); |
---|
| 766 | + if (unlikely(!heap->gpu_va)) { |
---|
| 767 | + dev_dbg(kctx->kbdev->dev, "Failed to allocate a tiler heap context\n"); |
---|
| 768 | + err = -ENOMEM; |
---|
| 769 | + goto heap_context_alloc_failed; |
---|
499 | 770 | } |
---|
500 | 771 | |
---|
| 772 | + gpu_va_reg = ctx_alloc->region; |
---|
| 773 | + |
---|
| 774 | + kbase_gpu_vm_lock(kctx); |
---|
| 775 | + /* gpu_va_reg was created with BASEP_MEM_NO_USER_FREE, the code to unset this only happens |
---|
| 776 | + * on kctx termination (after all syscalls on kctx have finished), and so it is safe to |
---|
| 777 | + * assume that gpu_va_reg is still present. |
---|
| 778 | + */ |
---|
| 779 | + vmap_ptr = kbase_vmap_reg(kctx, gpu_va_reg, heap->gpu_va, NEXT_CHUNK_ADDR_SIZE, |
---|
| 780 | + (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &heap->gpu_va_map, |
---|
| 781 | + KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); |
---|
| 782 | + kbase_gpu_vm_unlock(kctx); |
---|
| 783 | + if (unlikely(!vmap_ptr)) { |
---|
| 784 | + dev_dbg(kctx->kbdev->dev, "Failed to vmap the correct heap GPU VA address\n"); |
---|
| 785 | + err = -ENOMEM; |
---|
| 786 | + goto heap_context_vmap_failed; |
---|
| 787 | + } |
---|
| 788 | + |
---|
| 789 | + err = create_initial_chunks(heap, initial_chunks); |
---|
| 790 | + if (unlikely(err)) { |
---|
| 791 | + dev_dbg(kctx->kbdev->dev, "Failed to create the initial tiler heap chunks\n"); |
---|
| 792 | + goto create_chunks_failed; |
---|
| 793 | + } |
---|
| 794 | + chunk = list_first_entry(&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link); |
---|
| 795 | + |
---|
| 796 | + *heap_gpu_va = heap->gpu_va; |
---|
| 797 | + *first_chunk_va = chunk->gpu_va; |
---|
| 798 | + |
---|
| 799 | + mutex_lock(&kctx->csf.tiler_heaps.lock); |
---|
| 800 | + kctx->csf.tiler_heaps.nr_of_heaps++; |
---|
| 801 | + heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps; |
---|
| 802 | + list_add(&heap->link, &kctx->csf.tiler_heaps.list); |
---|
| 803 | + |
---|
| 804 | + KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id, |
---|
| 805 | + PFN_UP(heap->chunk_size * heap->max_chunks), |
---|
| 806 | + PFN_UP(heap->chunk_size * heap->chunk_count), |
---|
| 807 | + heap->max_chunks, heap->chunk_size, heap->chunk_count, |
---|
| 808 | + heap->target_in_flight, 0); |
---|
| 809 | + |
---|
| 810 | +#if defined(CONFIG_MALI_VECTOR_DUMP) |
---|
| 811 | + list_for_each_entry(chunk, &heap->chunks_list, link) { |
---|
| 812 | + KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(kctx->kbdev, kctx->id, heap->heap_id, |
---|
| 813 | + chunk->gpu_va); |
---|
| 814 | + } |
---|
| 815 | +#endif |
---|
| 816 | + kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count; |
---|
| 817 | + kctx->running_total_tiler_heap_memory += (u64)heap->chunk_size * heap->chunk_count; |
---|
| 818 | + if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory) |
---|
| 819 | + kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory; |
---|
| 820 | + |
---|
| 821 | + dev_dbg(kctx->kbdev->dev, |
---|
| 822 | + "Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", heap->gpu_va, |
---|
| 823 | + buf_desc_va, kctx->tgid, kctx->id); |
---|
501 | 824 | mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
502 | 825 | |
---|
| 826 | + return 0; |
---|
| 827 | + |
---|
| 828 | +create_chunks_failed: |
---|
| 829 | + kbase_vunmap(kctx, &heap->gpu_va_map); |
---|
| 830 | +heap_context_vmap_failed: |
---|
| 831 | + kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va); |
---|
| 832 | +heap_context_alloc_failed: |
---|
| 833 | + if (heap->buf_desc_reg) |
---|
| 834 | + kbase_vunmap(kctx, &heap->buf_desc_map); |
---|
| 835 | +buf_desc_vmap_failed: |
---|
| 836 | + if (heap->buf_desc_reg) { |
---|
| 837 | + kbase_gpu_vm_lock(kctx); |
---|
| 838 | + kbase_va_region_no_user_free_dec(heap->buf_desc_reg); |
---|
| 839 | + kbase_gpu_vm_unlock(kctx); |
---|
| 840 | + } |
---|
| 841 | +buf_desc_not_suitable: |
---|
| 842 | + kfree(heap); |
---|
503 | 843 | return err; |
---|
504 | 844 | } |
---|
505 | 845 | |
---|
.. | .. |
---|
508 | 848 | { |
---|
509 | 849 | int err = 0; |
---|
510 | 850 | struct kbase_csf_tiler_heap *heap = NULL; |
---|
| 851 | + u32 chunk_count = 0; |
---|
| 852 | + u64 heap_size = 0; |
---|
511 | 853 | |
---|
512 | 854 | mutex_lock(&kctx->csf.tiler_heaps.lock); |
---|
513 | | - |
---|
514 | 855 | heap = find_tiler_heap(kctx, heap_gpu_va); |
---|
| 856 | + if (likely(heap)) { |
---|
| 857 | + chunk_count = heap->chunk_count; |
---|
| 858 | + heap_size = heap->chunk_size * chunk_count; |
---|
| 859 | + |
---|
| 860 | + list_del_init(&heap->link); |
---|
| 861 | + } else { |
---|
| 862 | + err = -EINVAL; |
---|
| 863 | + } |
---|
| 864 | + |
---|
| 865 | + /* Update stats whilst still holding the lock so they are in sync with the tiler_heaps.list |
---|
| 866 | + * at all times |
---|
| 867 | + */ |
---|
| 868 | + if (likely(kctx->running_total_tiler_heap_memory >= heap_size)) |
---|
| 869 | + kctx->running_total_tiler_heap_memory -= heap_size; |
---|
| 870 | + else |
---|
| 871 | + dev_warn(kctx->kbdev->dev, |
---|
| 872 | + "Running total tiler heap memory lower than expected!"); |
---|
| 873 | + if (likely(kctx->running_total_tiler_heap_nr_chunks >= chunk_count)) |
---|
| 874 | + kctx->running_total_tiler_heap_nr_chunks -= chunk_count; |
---|
| 875 | + else |
---|
| 876 | + dev_warn(kctx->kbdev->dev, |
---|
| 877 | + "Running total tiler chunk count lower than expected!"); |
---|
| 878 | + if (!err) |
---|
| 879 | + dev_dbg(kctx->kbdev->dev, |
---|
| 880 | + "Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", |
---|
| 881 | + heap->gpu_va, heap->buf_desc_va, kctx->tgid, kctx->id); |
---|
| 882 | + mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
| 883 | + |
---|
| 884 | + /* Deletion requires the kctx->reg_lock, so must only operate on it whilst unlinked from |
---|
| 885 | + * the kctx's csf.tiler_heaps.list, and without holding the csf.tiler_heaps.lock |
---|
| 886 | + */ |
---|
515 | 887 | if (likely(heap)) |
---|
516 | 888 | delete_heap(heap); |
---|
517 | | - else |
---|
518 | | - err = -EINVAL; |
---|
519 | | - |
---|
520 | | - mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
521 | 889 | |
---|
522 | 890 | return err; |
---|
523 | 891 | } |
---|
524 | 892 | |
---|
525 | 893 | /** |
---|
526 | | - * alloc_new_chunk - Allocate a new chunk for the tiler heap. |
---|
| 894 | + * validate_allocation_request - Check whether the chunk allocation request |
---|
| 895 | + * received on tiler OOM should be handled at |
---|
| 896 | + * current time. |
---|
527 | 897 | * |
---|
528 | | - * This function will allocate a new chunk for the chunked tiler heap depending |
---|
529 | | - * on the settings provided by userspace when the heap was created and the |
---|
530 | | - * heap's statistics (like number of render passes in-flight). |
---|
| 898 | + * @heap: The tiler heap the OOM is associated with |
---|
| 899 | + * @nr_in_flight: Number of fragment jobs in flight |
---|
| 900 | + * @pending_frag_count: Number of pending fragment jobs |
---|
531 | 901 | * |
---|
532 | | - * @heap: Pointer to the tiler heap. |
---|
533 | | - * @nr_in_flight: Number of render passes that are in-flight, must not be zero. |
---|
534 | | - * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage. |
---|
535 | | - * The minimum value is zero but it must be less or equal to |
---|
536 | | - * the total number of render passes in flight |
---|
537 | | - * @new_chunk_ptr: Where to store the GPU virtual address & size of the new |
---|
538 | | - * chunk allocated for the heap. |
---|
| 902 | + * Context: must hold the tiler heap lock to guarantee its lifetime |
---|
539 | 903 | * |
---|
540 | | - * Return: 0 if a new chunk was allocated otherwise an appropriate negative |
---|
541 | | - * error code. |
---|
| 904 | + * Return: |
---|
| 905 | + * * 0 - allowed to allocate an additional chunk |
---|
| 906 | + * * -EINVAL - invalid |
---|
| 907 | + * * -EBUSY - there are fragment jobs still in flight, which may free chunks |
---|
| 908 | + * after completing |
---|
| 909 | + * * -ENOMEM - the targeted number of in-flight chunks has been reached and |
---|
| 910 | + * no new ones will be allocated |
---|
542 | 911 | */ |
---|
543 | | -static int alloc_new_chunk(struct kbase_csf_tiler_heap *heap, |
---|
544 | | - u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr) |
---|
| 912 | +static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight, |
---|
| 913 | + u32 pending_frag_count) |
---|
545 | 914 | { |
---|
546 | | - int err = -ENOMEM; |
---|
547 | | - |
---|
548 | 915 | lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); |
---|
549 | 916 | |
---|
550 | | - if (WARN_ON(!nr_in_flight) || |
---|
551 | | - WARN_ON(pending_frag_count > nr_in_flight)) |
---|
| 917 | + if (WARN_ON(!nr_in_flight) || WARN_ON(pending_frag_count > nr_in_flight)) |
---|
552 | 918 | return -EINVAL; |
---|
553 | 919 | |
---|
554 | 920 | if (nr_in_flight <= heap->target_in_flight) { |
---|
.. | .. |
---|
556 | 922 | /* Not exceeded the target number of render passes yet so be |
---|
557 | 923 | * generous with memory. |
---|
558 | 924 | */ |
---|
559 | | - err = create_chunk(heap, false); |
---|
560 | | - |
---|
561 | | - if (likely(!err)) { |
---|
562 | | - struct kbase_csf_tiler_heap_chunk *new_chunk = |
---|
563 | | - get_last_chunk(heap); |
---|
564 | | - if (!WARN_ON(!new_chunk)) { |
---|
565 | | - *new_chunk_ptr = |
---|
566 | | - encode_chunk_ptr(heap->chunk_size, |
---|
567 | | - new_chunk->gpu_va); |
---|
568 | | - return 0; |
---|
569 | | - } |
---|
570 | | - } |
---|
| 925 | + return 0; |
---|
571 | 926 | } else if (pending_frag_count > 0) { |
---|
572 | | - err = -EBUSY; |
---|
| 927 | + return -EBUSY; |
---|
573 | 928 | } else { |
---|
574 | | - err = -ENOMEM; |
---|
| 929 | + return -ENOMEM; |
---|
575 | 930 | } |
---|
576 | 931 | } else { |
---|
577 | 932 | /* Reached target number of render passes in flight. |
---|
578 | 933 | * Wait for some of them to finish |
---|
579 | 934 | */ |
---|
580 | | - err = -EBUSY; |
---|
| 935 | + return -EBUSY; |
---|
581 | 936 | } |
---|
582 | | - |
---|
583 | | - return err; |
---|
| 937 | + return -ENOMEM; |
---|
584 | 938 | } |
---|
585 | 939 | |
---|
586 | 940 | int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, |
---|
587 | 941 | u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr) |
---|
588 | 942 | { |
---|
589 | 943 | struct kbase_csf_tiler_heap *heap; |
---|
| 944 | + struct kbase_csf_tiler_heap_chunk *chunk; |
---|
590 | 945 | int err = -EINVAL; |
---|
| 946 | + u64 chunk_size = 0; |
---|
| 947 | + u64 heap_id = 0; |
---|
| 948 | + |
---|
| 949 | + /* To avoid potential locking issues during allocation, this is handled |
---|
| 950 | + * in three phases: |
---|
| 951 | + * 1. Take the lock, find the corresponding heap, and find its chunk size |
---|
| 952 | + * (this is always 2 MB, but may change down the line). |
---|
| 953 | + * 2. Allocate memory for the chunk and its region. |
---|
| 954 | + * 3. If the heap still exists, link it to the end of the list. If it |
---|
| 955 | + * doesn't, roll back the allocation. |
---|
| 956 | + */ |
---|
591 | 957 | |
---|
592 | 958 | mutex_lock(&kctx->csf.tiler_heaps.lock); |
---|
| 959 | + heap = find_tiler_heap(kctx, gpu_heap_va); |
---|
| 960 | + if (likely(heap)) { |
---|
| 961 | + chunk_size = heap->chunk_size; |
---|
| 962 | + heap_id = heap->heap_id; |
---|
| 963 | + } else { |
---|
| 964 | + dev_err(kctx->kbdev->dev, "Heap 0x%llX does not exist", gpu_heap_va); |
---|
| 965 | + mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
| 966 | + goto prelink_failure; |
---|
| 967 | + } |
---|
593 | 968 | |
---|
| 969 | + err = validate_allocation_request(heap, nr_in_flight, pending_frag_count); |
---|
| 970 | + if (unlikely(err)) { |
---|
| 971 | + /* The allocation request can be legitimate, but be invoked on a heap |
---|
| 972 | + * that has already reached the maximum pre-configured capacity. This |
---|
| 973 | + * is useful debug information, but should not be treated as an error, |
---|
| 974 | + * since the request will be re-sent at a later point. |
---|
| 975 | + */ |
---|
| 976 | + dev_dbg(kctx->kbdev->dev, |
---|
| 977 | + "Not allocating new chunk for heap 0x%llX due to current heap state (err %d)", |
---|
| 978 | + gpu_heap_va, err); |
---|
| 979 | + mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
| 980 | + goto prelink_failure; |
---|
| 981 | + } |
---|
| 982 | + mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
| 983 | + /* this heap must not be used whilst we have dropped the lock */ |
---|
| 984 | + heap = NULL; |
---|
| 985 | + |
---|
| 986 | + chunk = alloc_new_chunk(kctx, chunk_size); |
---|
| 987 | + if (unlikely(!chunk)) { |
---|
| 988 | + dev_err(kctx->kbdev->dev, "Could not allocate chunk of size %lld for ctx %d_%d", |
---|
| 989 | + chunk_size, kctx->tgid, kctx->id); |
---|
| 990 | + goto prelink_failure; |
---|
| 991 | + } |
---|
| 992 | + |
---|
| 993 | + /* After this point, the heap that we were targeting could already have had the needed |
---|
| 994 | + * chunks allocated, if we were handling multiple OoM events on multiple threads, so |
---|
| 995 | + * we need to revalidate the need for the allocation. |
---|
| 996 | + */ |
---|
| 997 | + mutex_lock(&kctx->csf.tiler_heaps.lock); |
---|
594 | 998 | heap = find_tiler_heap(kctx, gpu_heap_va); |
---|
595 | 999 | |
---|
596 | | - if (likely(heap)) { |
---|
597 | | - err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count, |
---|
598 | | - new_chunk_ptr); |
---|
599 | | - |
---|
600 | | - KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( |
---|
601 | | - kctx->kbdev, kctx->id, heap->heap_id, |
---|
602 | | - PFN_UP(heap->chunk_size * heap->max_chunks), |
---|
603 | | - PFN_UP(heap->chunk_size * heap->chunk_count), |
---|
604 | | - heap->max_chunks, heap->chunk_size, heap->chunk_count, |
---|
605 | | - heap->target_in_flight, nr_in_flight); |
---|
| 1000 | + if (unlikely(!heap)) { |
---|
| 1001 | + dev_err(kctx->kbdev->dev, "Tiler heap 0x%llX no longer exists!\n", gpu_heap_va); |
---|
| 1002 | + mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
| 1003 | + goto unroll_chunk; |
---|
606 | 1004 | } |
---|
| 1005 | + |
---|
| 1006 | + if (heap_id != heap->heap_id) { |
---|
| 1007 | + dev_err(kctx->kbdev->dev, |
---|
| 1008 | + "Tiler heap 0x%llX was removed from ctx %d_%d while allocating chunk of size %lld!", |
---|
| 1009 | + gpu_heap_va, kctx->tgid, kctx->id, chunk_size); |
---|
| 1010 | + mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
| 1011 | + goto unroll_chunk; |
---|
| 1012 | + } |
---|
| 1013 | + |
---|
| 1014 | + if (WARN_ON(chunk_size != heap->chunk_size)) { |
---|
| 1015 | + mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
| 1016 | + goto unroll_chunk; |
---|
| 1017 | + } |
---|
| 1018 | + |
---|
| 1019 | + err = validate_allocation_request(heap, nr_in_flight, pending_frag_count); |
---|
| 1020 | + if (unlikely(err)) { |
---|
| 1021 | + dev_warn( |
---|
| 1022 | + kctx->kbdev->dev, |
---|
| 1023 | + "Aborting linking chunk to heap 0x%llX: heap state changed during allocation (err %d)", |
---|
| 1024 | + gpu_heap_va, err); |
---|
| 1025 | + mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
| 1026 | + goto unroll_chunk; |
---|
| 1027 | + } |
---|
| 1028 | + |
---|
| 1029 | + err = init_chunk(heap, chunk, false); |
---|
| 1030 | + |
---|
| 1031 | + /* On error, the chunk would not be linked, so we can still treat it as an unlinked |
---|
| 1032 | + * chunk for error handling. |
---|
| 1033 | + */ |
---|
| 1034 | + if (unlikely(err)) { |
---|
| 1035 | + dev_err(kctx->kbdev->dev, |
---|
| 1036 | + "Could not link chunk(0x%llX) with tiler heap 0%llX in ctx %d_%d due to error %d", |
---|
| 1037 | + chunk->gpu_va, gpu_heap_va, kctx->tgid, kctx->id, err); |
---|
| 1038 | + mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
| 1039 | + goto unroll_chunk; |
---|
| 1040 | + } |
---|
| 1041 | + |
---|
| 1042 | + *new_chunk_ptr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va); |
---|
| 1043 | + |
---|
| 1044 | + /* update total and peak tiler heap memory record */ |
---|
| 1045 | + kctx->running_total_tiler_heap_nr_chunks++; |
---|
| 1046 | + kctx->running_total_tiler_heap_memory += heap->chunk_size; |
---|
| 1047 | + |
---|
| 1048 | + if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory) |
---|
| 1049 | + kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory; |
---|
| 1050 | + |
---|
| 1051 | + KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id, |
---|
| 1052 | + PFN_UP(heap->chunk_size * heap->max_chunks), |
---|
| 1053 | + PFN_UP(heap->chunk_size * heap->chunk_count), |
---|
| 1054 | + heap->max_chunks, heap->chunk_size, heap->chunk_count, |
---|
| 1055 | + heap->target_in_flight, nr_in_flight); |
---|
607 | 1056 | |
---|
608 | 1057 | mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
609 | 1058 | |
---|
610 | 1059 | return err; |
---|
| 1060 | +unroll_chunk: |
---|
| 1061 | + remove_unlinked_chunk(kctx, chunk); |
---|
| 1062 | +prelink_failure: |
---|
| 1063 | + return err; |
---|
| 1064 | +} |
---|
| 1065 | + |
---|
| 1066 | +static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 chunk_gpu_va, |
---|
| 1067 | + u64 *hdr_val) |
---|
| 1068 | +{ |
---|
| 1069 | + int err; |
---|
| 1070 | + u64 *chunk_hdr; |
---|
| 1071 | + struct kbase_context *kctx = heap->kctx; |
---|
| 1072 | + struct kbase_csf_tiler_heap_chunk *chunk = NULL; |
---|
| 1073 | + |
---|
| 1074 | + lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); |
---|
| 1075 | + |
---|
| 1076 | + chunk = find_chunk(heap, chunk_gpu_va); |
---|
| 1077 | + if (unlikely(!chunk)) { |
---|
| 1078 | + dev_warn(kctx->kbdev->dev, |
---|
| 1079 | + "Failed to find tiler heap(0x%llX) chunk(0x%llX) for reclaim-delete\n", |
---|
| 1080 | + heap->gpu_va, chunk_gpu_va); |
---|
| 1081 | + return false; |
---|
| 1082 | + } |
---|
| 1083 | + |
---|
| 1084 | + WARN((chunk->region->flags & KBASE_REG_CPU_CACHED), |
---|
| 1085 | + "Cannot support CPU cached chunks without sync operations"); |
---|
| 1086 | + chunk_hdr = chunk->map.addr; |
---|
| 1087 | + *hdr_val = *chunk_hdr; |
---|
| 1088 | + |
---|
| 1089 | + dev_dbg(kctx->kbdev->dev, |
---|
| 1090 | + "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n", |
---|
| 1091 | + chunk_gpu_va, heap->gpu_va, *hdr_val); |
---|
| 1092 | + |
---|
| 1093 | + err = kbase_mem_shrink_gpu_mapping(kctx, chunk->region, 0, chunk->region->gpu_alloc->nents); |
---|
| 1094 | + if (unlikely(err)) { |
---|
| 1095 | + dev_warn( |
---|
| 1096 | + kctx->kbdev->dev, |
---|
| 1097 | + "Reclaim: shrinking GPU mapping failed on chunk(0x%llx) in heap(0x%llx) (err %d)\n", |
---|
| 1098 | + chunk_gpu_va, heap->gpu_va, err); |
---|
| 1099 | + |
---|
| 1100 | + /* Cannot free the pages whilst references on the GPU remain, so keep the chunk on |
---|
| 1101 | + * the heap's chunk list and try a different heap. |
---|
| 1102 | + */ |
---|
| 1103 | + |
---|
| 1104 | + return false; |
---|
| 1105 | + } |
---|
| 1106 | + /* Destroy the mapping before the physical pages which are mapped are destroyed. */ |
---|
| 1107 | + kbase_vunmap(kctx, &chunk->map); |
---|
| 1108 | + |
---|
| 1109 | + err = kbase_free_phy_pages_helper(chunk->region->gpu_alloc, |
---|
| 1110 | + chunk->region->gpu_alloc->nents); |
---|
| 1111 | + if (unlikely(err)) { |
---|
| 1112 | + dev_warn( |
---|
| 1113 | + kctx->kbdev->dev, |
---|
| 1114 | + "Reclaim: remove physical backing failed on chunk(0x%llx) in heap(0x%llx) (err %d), continuing with deferred removal\n", |
---|
| 1115 | + chunk_gpu_va, heap->gpu_va, err); |
---|
| 1116 | + |
---|
| 1117 | + /* kbase_free_phy_pages_helper() should only fail on invalid input, and WARNs |
---|
| 1118 | + * anyway, so continue instead of returning early. |
---|
| 1119 | + * |
---|
| 1120 | + * Indeed, we don't want to leave the chunk on the heap's chunk list whilst it has |
---|
| 1121 | + * its mapping removed, as that could lead to problems. It's safest to instead |
---|
| 1122 | + * continue with deferred destruction of the chunk. |
---|
| 1123 | + */ |
---|
| 1124 | + } |
---|
| 1125 | + |
---|
| 1126 | + dev_dbg(kctx->kbdev->dev, |
---|
| 1127 | + "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n", |
---|
| 1128 | + chunk_gpu_va, heap->gpu_va, *hdr_val); |
---|
| 1129 | + |
---|
| 1130 | + mutex_lock(&heap->kctx->jit_evict_lock); |
---|
| 1131 | + list_move(&chunk->region->jit_node, &kctx->jit_destroy_head); |
---|
| 1132 | + mutex_unlock(&heap->kctx->jit_evict_lock); |
---|
| 1133 | + |
---|
| 1134 | + list_del(&chunk->link); |
---|
| 1135 | + heap->chunk_count--; |
---|
| 1136 | + kfree(chunk); |
---|
| 1137 | + |
---|
| 1138 | + return true; |
---|
| 1139 | +} |
---|
| 1140 | + |
---|
| 1141 | +static void sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, |
---|
| 1142 | + struct kbase_csf_gpu_buffer_heap *desc) |
---|
| 1143 | +{ |
---|
| 1144 | + u64 first_hoarded_chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK; |
---|
| 1145 | + |
---|
| 1146 | + lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); |
---|
| 1147 | + |
---|
| 1148 | + if (first_hoarded_chunk_gpu_va) { |
---|
| 1149 | + struct kbase_csf_tiler_heap_chunk *chunk = |
---|
| 1150 | + find_chunk(heap, first_hoarded_chunk_gpu_va); |
---|
| 1151 | + |
---|
| 1152 | + if (likely(chunk)) { |
---|
| 1153 | + dev_dbg(heap->kctx->kbdev->dev, |
---|
| 1154 | + "Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed\n", |
---|
| 1155 | + heap->buf_desc_va); |
---|
| 1156 | + |
---|
| 1157 | + heap->buf_desc_checked = true; |
---|
| 1158 | + return; |
---|
| 1159 | + } |
---|
| 1160 | + } |
---|
| 1161 | + /* If there is no match, defer the check to next time */ |
---|
| 1162 | + dev_dbg(heap->kctx->kbdev->dev, "Buffer descriptor 0x%llX runtime sanity check deferred\n", |
---|
| 1163 | + heap->buf_desc_va); |
---|
| 1164 | +} |
---|
| 1165 | + |
---|
| 1166 | +static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, u64 *chunk_gpu_va_ptr) |
---|
| 1167 | +{ |
---|
| 1168 | + struct kbase_context *kctx = heap->kctx; |
---|
| 1169 | + |
---|
| 1170 | + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); |
---|
| 1171 | + |
---|
| 1172 | + /* Initialize the descriptor pointer value to 0 */ |
---|
| 1173 | + *chunk_gpu_va_ptr = 0; |
---|
| 1174 | + |
---|
| 1175 | + /* The BufferDescriptor on heap is a hint on creation, do a sanity check at runtime */ |
---|
| 1176 | + if (heap->buf_desc_reg && !heap->buf_desc_checked) { |
---|
| 1177 | + struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr; |
---|
| 1178 | + |
---|
| 1179 | + /* BufferDescriptor is supplied by userspace, so could be CPU-cached */ |
---|
| 1180 | + if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED) |
---|
| 1181 | + kbase_sync_mem_regions(kctx, &heap->buf_desc_map, KBASE_SYNC_TO_CPU); |
---|
| 1182 | + |
---|
| 1183 | + sanity_check_gpu_buffer_heap(heap, desc); |
---|
| 1184 | + if (heap->buf_desc_checked) |
---|
| 1185 | + *chunk_gpu_va_ptr = desc->pointer & CHUNK_ADDR_MASK; |
---|
| 1186 | + } |
---|
| 1187 | + |
---|
| 1188 | + return heap->buf_desc_checked; |
---|
| 1189 | +} |
---|
| 1190 | + |
---|
| 1191 | +static u32 delete_hoarded_chunks(struct kbase_csf_tiler_heap *heap) |
---|
| 1192 | +{ |
---|
| 1193 | + u32 freed = 0; |
---|
| 1194 | + u64 chunk_gpu_va = 0; |
---|
| 1195 | + struct kbase_context *kctx = heap->kctx; |
---|
| 1196 | + struct kbase_csf_tiler_heap_chunk *chunk = NULL; |
---|
| 1197 | + |
---|
| 1198 | + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); |
---|
| 1199 | + |
---|
| 1200 | + if (can_read_hw_gpu_buffer_heap(heap, &chunk_gpu_va)) { |
---|
| 1201 | + u64 chunk_hdr_val; |
---|
| 1202 | + u64 *hw_hdr; |
---|
| 1203 | + |
---|
| 1204 | + if (!chunk_gpu_va) { |
---|
| 1205 | + struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr; |
---|
| 1206 | + |
---|
| 1207 | + /* BufferDescriptor is supplied by userspace, so could be CPU-cached */ |
---|
| 1208 | + if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED) |
---|
| 1209 | + kbase_sync_mem_regions(kctx, &heap->buf_desc_map, |
---|
| 1210 | + KBASE_SYNC_TO_CPU); |
---|
| 1211 | + chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK; |
---|
| 1212 | + |
---|
| 1213 | + if (!chunk_gpu_va) { |
---|
| 1214 | + dev_dbg(kctx->kbdev->dev, |
---|
| 1215 | + "Buffer descriptor 0x%llX has no chunks (NULL) for reclaim scan\n", |
---|
| 1216 | + heap->buf_desc_va); |
---|
| 1217 | + goto out; |
---|
| 1218 | + } |
---|
| 1219 | + } |
---|
| 1220 | + |
---|
| 1221 | + chunk = find_chunk(heap, chunk_gpu_va); |
---|
| 1222 | + if (unlikely(!chunk)) |
---|
| 1223 | + goto out; |
---|
| 1224 | + |
---|
| 1225 | + WARN((chunk->region->flags & KBASE_REG_CPU_CACHED), |
---|
| 1226 | + "Cannot support CPU cached chunks without sync operations"); |
---|
| 1227 | + hw_hdr = chunk->map.addr; |
---|
| 1228 | + |
---|
| 1229 | + /* Move onto the next chunk relevant information */ |
---|
| 1230 | + chunk_hdr_val = *hw_hdr; |
---|
| 1231 | + chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; |
---|
| 1232 | + |
---|
| 1233 | + while (chunk_gpu_va && heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) { |
---|
| 1234 | + bool success = |
---|
| 1235 | + delete_chunk_physical_pages(heap, chunk_gpu_va, &chunk_hdr_val); |
---|
| 1236 | + |
---|
| 1237 | + if (!success) |
---|
| 1238 | + break; |
---|
| 1239 | + |
---|
| 1240 | + freed++; |
---|
| 1241 | + /* On success, chunk_hdr_val is updated, extract the next chunk address */ |
---|
| 1242 | + chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; |
---|
| 1243 | + } |
---|
| 1244 | + |
---|
| 1245 | + /* Update the existing hardware chunk header, after reclaim deletion of chunks */ |
---|
| 1246 | + *hw_hdr = chunk_hdr_val; |
---|
| 1247 | + |
---|
| 1248 | + dev_dbg(heap->kctx->kbdev->dev, |
---|
| 1249 | + "HW reclaim scan freed chunks: %u, set hw_hdr[0]: 0x%llX\n", freed, |
---|
| 1250 | + chunk_hdr_val); |
---|
| 1251 | + } else { |
---|
| 1252 | + dev_dbg(kctx->kbdev->dev, |
---|
| 1253 | + "Skip HW reclaim scan, (disabled: buffer descriptor 0x%llX)\n", |
---|
| 1254 | + heap->buf_desc_va); |
---|
| 1255 | + } |
---|
| 1256 | +out: |
---|
| 1257 | + return freed; |
---|
| 1258 | +} |
---|
| 1259 | + |
---|
| 1260 | +static u64 delete_unused_chunk_pages(struct kbase_csf_tiler_heap *heap) |
---|
| 1261 | +{ |
---|
| 1262 | + u32 freed_chunks = 0; |
---|
| 1263 | + u64 freed_pages = 0; |
---|
| 1264 | + u64 chunk_gpu_va; |
---|
| 1265 | + u64 chunk_hdr_val; |
---|
| 1266 | + struct kbase_context *kctx = heap->kctx; |
---|
| 1267 | + u64 *ctx_ptr; |
---|
| 1268 | + |
---|
| 1269 | + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); |
---|
| 1270 | + |
---|
| 1271 | + WARN(heap->gpu_va_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED, |
---|
| 1272 | + "Cannot support CPU cached heap context without sync operations"); |
---|
| 1273 | + |
---|
| 1274 | + ctx_ptr = heap->gpu_va_map.addr; |
---|
| 1275 | + |
---|
| 1276 | + /* Extract the first chunk address from the context's free_list_head */ |
---|
| 1277 | + chunk_hdr_val = *ctx_ptr; |
---|
| 1278 | + chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; |
---|
| 1279 | + |
---|
| 1280 | + while (chunk_gpu_va) { |
---|
| 1281 | + u64 hdr_val; |
---|
| 1282 | + bool success = delete_chunk_physical_pages(heap, chunk_gpu_va, &hdr_val); |
---|
| 1283 | + |
---|
| 1284 | + if (!success) |
---|
| 1285 | + break; |
---|
| 1286 | + |
---|
| 1287 | + freed_chunks++; |
---|
| 1288 | + chunk_hdr_val = hdr_val; |
---|
| 1289 | + /* extract the next chunk address */ |
---|
| 1290 | + chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; |
---|
| 1291 | + } |
---|
| 1292 | + |
---|
| 1293 | + /* Update the post-scan deletion to context header */ |
---|
| 1294 | + *ctx_ptr = chunk_hdr_val; |
---|
| 1295 | + |
---|
| 1296 | + /* Try to scan the HW hoarded list of unused chunks */ |
---|
| 1297 | + freed_chunks += delete_hoarded_chunks(heap); |
---|
| 1298 | + freed_pages = freed_chunks * PFN_UP(heap->chunk_size); |
---|
| 1299 | + dev_dbg(heap->kctx->kbdev->dev, |
---|
| 1300 | + "Scan reclaim freed chunks/pages %u/%llu, set heap-ctx_u64[0]: 0x%llX\n", |
---|
| 1301 | + freed_chunks, freed_pages, chunk_hdr_val); |
---|
| 1302 | + |
---|
| 1303 | + /* Update context tiler heaps memory usage */ |
---|
| 1304 | + kctx->running_total_tiler_heap_memory -= freed_pages << PAGE_SHIFT; |
---|
| 1305 | + kctx->running_total_tiler_heap_nr_chunks -= freed_chunks; |
---|
| 1306 | + return freed_pages; |
---|
| 1307 | +} |
---|
| 1308 | + |
---|
| 1309 | +u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free) |
---|
| 1310 | +{ |
---|
| 1311 | + u64 freed = 0; |
---|
| 1312 | + struct kbase_csf_tiler_heap *heap; |
---|
| 1313 | + |
---|
| 1314 | + mutex_lock(&kctx->csf.tiler_heaps.lock); |
---|
| 1315 | + |
---|
| 1316 | + list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) { |
---|
| 1317 | + freed += delete_unused_chunk_pages(heap); |
---|
| 1318 | + |
---|
| 1319 | + /* If freed enough, then stop here */ |
---|
| 1320 | + if (freed >= to_free) |
---|
| 1321 | + break; |
---|
| 1322 | + } |
---|
| 1323 | + |
---|
| 1324 | + mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
| 1325 | + /* The scan is surely not more than 4-G pages, but for logic flow limit it */ |
---|
| 1326 | + if (WARN_ON(unlikely(freed > U32_MAX))) |
---|
| 1327 | + return U32_MAX; |
---|
| 1328 | + else |
---|
| 1329 | + return (u32)freed; |
---|
| 1330 | +} |
---|
| 1331 | + |
---|
| 1332 | +static u64 count_unused_heap_pages(struct kbase_csf_tiler_heap *heap) |
---|
| 1333 | +{ |
---|
| 1334 | + u32 chunk_cnt = 0; |
---|
| 1335 | + u64 page_cnt = 0; |
---|
| 1336 | + |
---|
| 1337 | + lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); |
---|
| 1338 | + |
---|
| 1339 | + /* Here the count is basically an informed estimate, avoiding the costly mapping/unmaping |
---|
| 1340 | + * in the chunk list walk. The downside is that the number is a less reliable guide for |
---|
| 1341 | + * later on scan (free) calls on this heap for what actually is freeable. |
---|
| 1342 | + */ |
---|
| 1343 | + if (heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) { |
---|
| 1344 | + chunk_cnt = heap->chunk_count - HEAP_SHRINK_STOP_LIMIT; |
---|
| 1345 | + page_cnt = chunk_cnt * PFN_UP(heap->chunk_size); |
---|
| 1346 | + } |
---|
| 1347 | + |
---|
| 1348 | + dev_dbg(heap->kctx->kbdev->dev, |
---|
| 1349 | + "Reclaim count chunks/pages %u/%llu (estimated), heap_va: 0x%llX\n", chunk_cnt, |
---|
| 1350 | + page_cnt, heap->gpu_va); |
---|
| 1351 | + |
---|
| 1352 | + return page_cnt; |
---|
| 1353 | +} |
---|
| 1354 | + |
---|
| 1355 | +u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx) |
---|
| 1356 | +{ |
---|
| 1357 | + u64 page_cnt = 0; |
---|
| 1358 | + struct kbase_csf_tiler_heap *heap; |
---|
| 1359 | + |
---|
| 1360 | + mutex_lock(&kctx->csf.tiler_heaps.lock); |
---|
| 1361 | + |
---|
| 1362 | + list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) |
---|
| 1363 | + page_cnt += count_unused_heap_pages(heap); |
---|
| 1364 | + |
---|
| 1365 | + mutex_unlock(&kctx->csf.tiler_heaps.lock); |
---|
| 1366 | + |
---|
| 1367 | + /* The count is surely not more than 4-G pages, but for logic flow limit it */ |
---|
| 1368 | + if (WARN_ON(unlikely(page_cnt > U32_MAX))) |
---|
| 1369 | + return U32_MAX; |
---|
| 1370 | + else |
---|
| 1371 | + return (u32)page_cnt; |
---|
611 | 1372 | } |
---|