hc
2023-12-06 08f87f769b595151be1afeff53e144f543faa614
kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
....@@ -1,7 +1,7 @@
11 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
22 /*
33 *
4
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
4
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
55 *
66 * This program is free software and is provided to you under the terms of the
77 * GNU General Public License version 2 as published by the Free Software
....@@ -25,15 +25,35 @@
2525 #include "mali_kbase_csf_tiler_heap_def.h"
2626 #include "mali_kbase_csf_heap_context_alloc.h"
2727
28
+/* Tiler heap shrink stop limit for maintaining a minimum number of chunks */
29
+#define HEAP_SHRINK_STOP_LIMIT (1)
30
+
31
+/**
32
+ * struct kbase_csf_gpu_buffer_heap - A gpu buffer object specific to tiler heap
33
+ *
34
+ * @cdsbp_0: Descriptor_type and buffer_type
35
+ * @size: The size of the current heap chunk
36
+ * @pointer: Pointer to the current heap chunk
37
+ * @low_pointer: Pointer to low end of current heap chunk
38
+ * @high_pointer: Pointer to high end of current heap chunk
39
+ */
40
+struct kbase_csf_gpu_buffer_heap {
41
+ u32 cdsbp_0;
42
+ u32 size;
43
+ u64 pointer;
44
+ u64 low_pointer;
45
+ u64 high_pointer;
46
+} __packed;
47
+
2848 /**
2949 * encode_chunk_ptr - Encode the address and size of a chunk as an integer.
50
+ *
51
+ * @chunk_size: Size of a tiler heap chunk, in bytes.
52
+ * @chunk_addr: GPU virtual address of the same tiler heap chunk.
3053 *
3154 * The size and address of the next chunk in a list are packed into a single
3255 * 64-bit value for storage in a chunk's header. This function returns that
3356 * value.
34
- *
35
- * @chunk_size: Size of a tiler heap chunk, in bytes.
36
- * @chunk_addr: GPU virtual address of the same tiler heap chunk.
3757 *
3858 * Return: Next chunk pointer suitable for writing into a chunk header.
3959 */
....@@ -66,8 +86,6 @@
6686 static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
6787 struct kbase_csf_tiler_heap *const heap)
6888 {
69
- lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
70
-
7189 if (list_empty(&heap->chunks_list))
7290 return NULL;
7391
....@@ -76,15 +94,44 @@
7694 }
7795
7896 /**
97
+ * remove_external_chunk_mappings - Remove external mappings from a chunk that
98
+ * is being transitioned to the tiler heap
99
+ * memory system.
100
+ *
101
+ * @kctx: kbase context the chunk belongs to.
102
+ * @chunk: The chunk whose external mappings are going to be removed.
103
+ *
104
+ * This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates
105
+ * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other
106
+ * parts of kbase outside of tiler heap management should not take references on its physical
107
+ * pages, and should not modify them.
108
+ */
109
+static void remove_external_chunk_mappings(struct kbase_context *const kctx,
110
+ struct kbase_csf_tiler_heap_chunk *chunk)
111
+{
112
+ lockdep_assert_held(&kctx->reg_lock);
113
+
114
+ if (chunk->region->cpu_alloc != NULL) {
115
+ kbase_mem_shrink_cpu_mapping(kctx, chunk->region, 0,
116
+ chunk->region->cpu_alloc->nents);
117
+ }
118
+#if !defined(CONFIG_MALI_VECTOR_DUMP)
119
+ chunk->region->flags |= KBASE_REG_DONT_NEED;
120
+#endif
121
+
122
+ dev_dbg(kctx->kbdev->dev, "Removed external mappings from chunk 0x%llX", chunk->gpu_va);
123
+}
124
+
125
+/**
79126 * link_chunk - Link a chunk into a tiler heap
127
+ *
128
+ * @heap: Pointer to the tiler heap.
129
+ * @chunk: Pointer to the heap chunk to be linked.
80130 *
81131 * Unless the @chunk is the first in the kernel's list of chunks belonging to
82132 * a given tiler heap, this function stores the size and address of the @chunk
83133 * in the header of the preceding chunk. This requires the GPU memory region
84
- * containing the header to be be mapped temporarily, which can fail.
85
- *
86
- * @heap: Pointer to the tiler heap.
87
- * @chunk: Pointer to the heap chunk to be linked.
134
+ * containing the header to be mapped temporarily, which can fail.
88135 *
89136 * Return: 0 if successful or a negative error code on failure.
90137 */
....@@ -95,19 +142,12 @@
95142
96143 if (prev) {
97144 struct kbase_context *const kctx = heap->kctx;
98
- struct kbase_vmap_struct map;
99
- u64 *const prev_hdr = kbase_vmap_prot(kctx, prev->gpu_va,
100
- sizeof(*prev_hdr), KBASE_REG_CPU_WR, &map);
145
+ u64 *prev_hdr = prev->map.addr;
101146
102
- if (unlikely(!prev_hdr)) {
103
- dev_err(kctx->kbdev->dev,
104
- "Failed to map tiler heap chunk 0x%llX\n",
105
- prev->gpu_va);
106
- return -ENOMEM;
107
- }
147
+ WARN((prev->region->flags & KBASE_REG_CPU_CACHED),
148
+ "Cannot support CPU cached chunks without sync operations");
108149
109150 *prev_hdr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va);
110
- kbase_vunmap(kctx, &map);
111151
112152 dev_dbg(kctx->kbdev->dev,
113153 "Linked tiler heap chunks, 0x%llX -> 0x%llX\n",
....@@ -120,23 +160,25 @@
120160 /**
121161 * init_chunk - Initialize and link a tiler heap chunk
122162 *
123
- * Zero-initialize a new chunk's header (including its pointer to the next
124
- * chunk, which doesn't exist yet) and then update the previous chunk's
125
- * header to link the new chunk into the chunk list.
126
- *
127163 * @heap: Pointer to the tiler heap.
128164 * @chunk: Pointer to the heap chunk to be initialized and linked.
129165 * @link_with_prev: Flag to indicate if the new chunk needs to be linked with
130166 * the previously allocated chunk.
167
+ *
168
+ * Zero-initialize a new chunk's header (including its pointer to the next
169
+ * chunk, which doesn't exist yet) and then update the previous chunk's
170
+ * header to link the new chunk into the chunk list.
131171 *
132172 * Return: 0 if successful or a negative error code on failure.
133173 */
134174 static int init_chunk(struct kbase_csf_tiler_heap *const heap,
135175 struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev)
136176 {
137
- struct kbase_vmap_struct map;
138
- struct u64 *chunk_hdr = NULL;
177
+ int err = 0;
178
+ u64 *chunk_hdr;
139179 struct kbase_context *const kctx = heap->kctx;
180
+
181
+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
140182
141183 if (unlikely(chunk->gpu_va & ~CHUNK_ADDR_MASK)) {
142184 dev_err(kctx->kbdev->dev,
....@@ -144,155 +186,283 @@
144186 return -EINVAL;
145187 }
146188
147
- chunk_hdr = kbase_vmap_prot(kctx,
148
- chunk->gpu_va, CHUNK_HDR_SIZE, KBASE_REG_CPU_WR, &map);
149
-
150
- if (unlikely(!chunk_hdr)) {
151
- dev_err(kctx->kbdev->dev,
152
- "Failed to map a tiler heap chunk header\n");
153
- return -ENOMEM;
189
+ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
190
+ "Cannot support CPU cached chunks without sync operations");
191
+ chunk_hdr = chunk->map.addr;
192
+ if (WARN(chunk->map.size < CHUNK_HDR_SIZE,
193
+ "Tiler chunk kernel mapping was not large enough for zero-init")) {
194
+ return -EINVAL;
154195 }
155196
156197 memset(chunk_hdr, 0, CHUNK_HDR_SIZE);
157
- kbase_vunmap(kctx, &map);
198
+ INIT_LIST_HEAD(&chunk->link);
158199
159200 if (link_with_prev)
160
- return link_chunk(heap, chunk);
161
- else
162
- return 0;
163
-}
164
-
165
-/**
166
- * create_chunk - Create a tiler heap chunk
167
- *
168
- * This function allocates a chunk of memory for a tiler heap and adds it to
169
- * the end of the list of chunks associated with that heap. The size of the
170
- * chunk is not a parameter because it is configured per-heap not per-chunk.
171
- *
172
- * @heap: Pointer to the tiler heap for which to allocate memory.
173
- * @link_with_prev: Flag to indicate if the chunk to be allocated needs to be
174
- * linked with the previously allocated chunk.
175
- *
176
- * Return: 0 if successful or a negative error code on failure.
177
- */
178
-static int create_chunk(struct kbase_csf_tiler_heap *const heap,
179
- bool link_with_prev)
180
-{
181
- int err = 0;
182
- struct kbase_context *const kctx = heap->kctx;
183
- u64 nr_pages = PFN_UP(heap->chunk_size);
184
- u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
185
- BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE |
186
- BASE_MEM_COHERENT_LOCAL;
187
- struct kbase_csf_tiler_heap_chunk *chunk = NULL;
188
-
189
- flags |= base_mem_group_id_set(kctx->jit_group_id);
190
-
191
-#if defined(CONFIG_MALI_BIFROST_DEBUG) || defined(CONFIG_MALI_VECTOR_DUMP)
192
- flags |= BASE_MEM_PROT_CPU_RD;
193
-#endif
194
-
195
- lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
196
-
197
- chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
198
- if (unlikely(!chunk)) {
199
- dev_err(kctx->kbdev->dev,
200
- "No kernel memory for a new tiler heap chunk\n");
201
- return -ENOMEM;
202
- }
203
-
204
- /* Allocate GPU memory for the new chunk. */
205
- INIT_LIST_HEAD(&chunk->link);
206
- chunk->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0,
207
- &flags, &chunk->gpu_va);
208
-
209
- if (unlikely(!chunk->region)) {
210
- dev_err(kctx->kbdev->dev,
211
- "Failed to allocate a tiler heap chunk\n");
212
- err = -ENOMEM;
213
- } else {
214
- err = init_chunk(heap, chunk, link_with_prev);
215
- if (unlikely(err)) {
216
- kbase_gpu_vm_lock(kctx);
217
- chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
218
- kbase_mem_free_region(kctx, chunk->region);
219
- kbase_gpu_vm_unlock(kctx);
220
- }
221
- }
201
+ err = link_chunk(heap, chunk);
222202
223203 if (unlikely(err)) {
224
- kfree(chunk);
225
- } else {
226
- list_add_tail(&chunk->link, &heap->chunks_list);
227
- heap->chunk_count++;
228
-
229
- dev_dbg(kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n",
230
- chunk->gpu_va);
204
+ dev_err(kctx->kbdev->dev, "Failed to link a chunk to a tiler heap\n");
205
+ return -EINVAL;
231206 }
207
+
208
+ list_add_tail(&chunk->link, &heap->chunks_list);
209
+ heap->chunk_count++;
232210
233211 return err;
234212 }
235213
236214 /**
237
- * delete_chunk - Delete a tiler heap chunk
215
+ * remove_unlinked_chunk - Remove a chunk that is not currently linked into a
216
+ * heap.
238217 *
239
- * This function frees a tiler heap chunk previously allocated by @create_chunk
240
- * and removes it from the list of chunks associated with the heap.
241
- *
242
- * WARNING: The deleted chunk is not unlinked from the list of chunks used by
243
- * the GPU, therefore it is only safe to use this function when
244
- * deleting a heap.
245
- *
246
- * @heap: Pointer to the tiler heap for which @chunk was allocated.
247
- * @chunk: Pointer to a chunk to be deleted.
218
+ * @kctx: Kbase context that was used to allocate the memory.
219
+ * @chunk: Chunk that has been allocated, but not linked into a heap.
248220 */
249
-static void delete_chunk(struct kbase_csf_tiler_heap *const heap,
250
- struct kbase_csf_tiler_heap_chunk *const chunk)
221
+static void remove_unlinked_chunk(struct kbase_context *kctx,
222
+ struct kbase_csf_tiler_heap_chunk *chunk)
251223 {
252
- struct kbase_context *const kctx = heap->kctx;
253
-
254
- lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
224
+ if (WARN_ON(!list_empty(&chunk->link)))
225
+ return;
255226
256227 kbase_gpu_vm_lock(kctx);
257
- chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
228
+ kbase_vunmap(kctx, &chunk->map);
229
+ /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
230
+ * regions), and so we must clear that flag too before freeing.
231
+ * For "no user free count", we check that the count is 1 as it is a shrinkable region;
232
+ * no other code part within kbase can take a reference to it.
233
+ */
234
+ WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1);
235
+ kbase_va_region_no_user_free_dec(chunk->region);
236
+#if !defined(CONFIG_MALI_VECTOR_DUMP)
237
+ chunk->region->flags &= ~KBASE_REG_DONT_NEED;
238
+#endif
258239 kbase_mem_free_region(kctx, chunk->region);
259240 kbase_gpu_vm_unlock(kctx);
260
- list_del(&chunk->link);
261
- heap->chunk_count--;
241
+
262242 kfree(chunk);
263243 }
264244
265245 /**
266
- * delete_all_chunks - Delete all chunks belonging to a tiler heap
246
+ * alloc_new_chunk - Allocate new chunk metadata for the tiler heap, reserve a fully backed VA
247
+ * region for the chunk, and provide a kernel mapping.
248
+ * @kctx: kbase context with which the chunk will be linked
249
+ * @chunk_size: the size of the chunk from the corresponding heap
267250 *
268
- * This function empties the list of chunks associated with a tiler heap by
269
- * freeing all chunks previously allocated by @create_chunk.
251
+ * Allocate the chunk tracking metadata and a corresponding fully backed VA region for the
252
+ * chunk. The kernel may need to invoke the reclaim path while trying to fulfill the allocation, so
253
+ * we cannot hold any lock that would be held in the shrinker paths (JIT evict lock or tiler heap
254
+ * lock).
255
+ *
256
+ * Since the chunk may have its physical backing removed, to prevent use-after-free scenarios we
257
+ * ensure that it is protected from being mapped by other parts of kbase.
258
+ *
259
+ * The chunk's GPU memory can be accessed via its 'map' member, but should only be done so by the
260
+ * shrinker path, as it may be otherwise shrunk at any time.
261
+ *
262
+ * Return: pointer to kbase_csf_tiler_heap_chunk on success or a NULL pointer
263
+ * on failure
264
+ */
265
+static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *kctx,
266
+ u64 chunk_size)
267
+{
268
+ u64 nr_pages = PFN_UP(chunk_size);
269
+ u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
270
+ BASEP_MEM_NO_USER_FREE | BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD;
271
+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
272
+ /* The chunk kernel mapping needs to be large enough to:
273
+ * - initially zero the CHUNK_HDR_SIZE area
274
+ * - on shrinking, access the NEXT_CHUNK_ADDR_SIZE area
275
+ */
276
+ const size_t chunk_kernel_map_size = max(CHUNK_HDR_SIZE, NEXT_CHUNK_ADDR_SIZE);
277
+
278
+ /* Calls to this function are inherently synchronous, with respect to
279
+ * MMU operations.
280
+ */
281
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
282
+ flags |= kbase_mem_group_id_set(kctx->jit_group_id);
283
+
284
+ chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
285
+ if (unlikely(!chunk)) {
286
+ dev_err(kctx->kbdev->dev,
287
+ "No kernel memory for a new tiler heap chunk\n");
288
+ return NULL;
289
+ }
290
+
291
+ /* Allocate GPU memory for the new chunk. */
292
+ chunk->region =
293
+ kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info);
294
+
295
+ if (unlikely(!chunk->region)) {
296
+ dev_err(kctx->kbdev->dev, "Failed to allocate a tiler heap chunk!\n");
297
+ goto unroll_chunk;
298
+ }
299
+
300
+ kbase_gpu_vm_lock(kctx);
301
+
302
+ /* Some checks done here as NO_USER_FREE still allows such things to be made
303
+ * whilst we had dropped the region lock
304
+ */
305
+ if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) {
306
+ dev_err(kctx->kbdev->dev, "Chunk region has active kernel mappings!\n");
307
+ goto unroll_region;
308
+ }
309
+
310
+ /* There is a race condition with regard to KBASE_REG_DONT_NEED, where another
311
+ * thread can have the "no user free" refcount increased between kbase_mem_alloc
312
+ * and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by
313
+ * remove_external_chunk_mappings (below).
314
+ *
315
+ * It should be fine and not a security risk if we let the region leak till
316
+ * region tracker termination in such a case.
317
+ */
318
+ if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) {
319
+ dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n");
320
+ goto unroll_region;
321
+ }
322
+
323
+ /* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE
324
+ * being requested, it's useful to document in code what those restrictions are, and ensure
325
+ * they remain in place in future.
326
+ */
327
+ if (WARN(!chunk->region->gpu_alloc,
328
+ "NO_USER_FREE chunks should not have had their alloc freed")) {
329
+ goto unroll_region;
330
+ }
331
+
332
+ if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE,
333
+ "NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) {
334
+ goto unroll_region;
335
+ }
336
+
337
+ if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC),
338
+ "NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) {
339
+ goto unroll_region;
340
+ }
341
+
342
+ if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED),
343
+ "NO_USER_FREE chunks should not have been made ephemeral")) {
344
+ goto unroll_region;
345
+ }
346
+
347
+ if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1,
348
+ "NO_USER_FREE chunks should not have been aliased")) {
349
+ goto unroll_region;
350
+ }
351
+
352
+ if (unlikely(!kbase_vmap_reg(kctx, chunk->region, chunk->gpu_va, chunk_kernel_map_size,
353
+ (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &chunk->map,
354
+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING))) {
355
+ dev_err(kctx->kbdev->dev, "Failed to map chunk header for shrinking!\n");
356
+ goto unroll_region;
357
+ }
358
+
359
+ remove_external_chunk_mappings(kctx, chunk);
360
+ kbase_gpu_vm_unlock(kctx);
361
+
362
+ /* If page migration is enabled, we don't want to migrate tiler heap pages.
363
+ * This does not change if the constituent pages are already marked as isolated.
364
+ */
365
+ if (kbase_page_migration_enabled)
366
+ kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE);
367
+
368
+ return chunk;
369
+
370
+unroll_region:
371
+ /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
372
+ * regions), and so we must clear that flag too before freeing.
373
+ */
374
+ kbase_va_region_no_user_free_dec(chunk->region);
375
+#if !defined(CONFIG_MALI_VECTOR_DUMP)
376
+ chunk->region->flags &= ~KBASE_REG_DONT_NEED;
377
+#endif
378
+ kbase_mem_free_region(kctx, chunk->region);
379
+ kbase_gpu_vm_unlock(kctx);
380
+unroll_chunk:
381
+ kfree(chunk);
382
+ return NULL;
383
+}
384
+
385
+/**
386
+ * create_chunk - Create a tiler heap chunk
387
+ *
388
+ * @heap: Pointer to the tiler heap for which to allocate memory.
389
+ *
390
+ * This function allocates a chunk of memory for a tiler heap, adds it to the
391
+ * the list of chunks associated with that heap both on the host side and in GPU
392
+ * memory.
393
+ *
394
+ * Return: 0 if successful or a negative error code on failure.
395
+ */
396
+static int create_chunk(struct kbase_csf_tiler_heap *const heap)
397
+{
398
+ int err = 0;
399
+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
400
+
401
+ chunk = alloc_new_chunk(heap->kctx, heap->chunk_size);
402
+ if (unlikely(!chunk)) {
403
+ err = -ENOMEM;
404
+ goto allocation_failure;
405
+ }
406
+
407
+ mutex_lock(&heap->kctx->csf.tiler_heaps.lock);
408
+ err = init_chunk(heap, chunk, true);
409
+ mutex_unlock(&heap->kctx->csf.tiler_heaps.lock);
410
+
411
+ if (unlikely(err))
412
+ goto initialization_failure;
413
+
414
+ dev_dbg(heap->kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", chunk->gpu_va);
415
+
416
+ return 0;
417
+initialization_failure:
418
+ remove_unlinked_chunk(heap->kctx, chunk);
419
+allocation_failure:
420
+ return err;
421
+}
422
+
423
+/**
424
+ * delete_all_chunks - Delete all chunks belonging to an unlinked tiler heap
270425 *
271426 * @heap: Pointer to a tiler heap.
427
+ *
428
+ * This function empties the list of chunks associated with a tiler heap by freeing all chunks
429
+ * previously allocated by @create_chunk.
430
+ *
431
+ * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the
432
+ * tiler_heaps lock cannot be held whilst deleting its chunks due to also needing the &struct
433
+ * kbase_context.region_lock.
434
+ *
435
+ * WARNING: Whilst the deleted chunks are unlinked from host memory, they are not unlinked from the
436
+ * list of chunks used by the GPU, therefore it is only safe to use this function when
437
+ * deleting a heap.
272438 */
273439 static void delete_all_chunks(struct kbase_csf_tiler_heap *heap)
274440 {
275
- struct list_head *entry = NULL, *tmp = NULL;
276441 struct kbase_context *const kctx = heap->kctx;
442
+ struct list_head *entry = NULL, *tmp = NULL;
277443
278
- lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
444
+ WARN(!list_empty(&heap->link),
445
+ "Deleting a heap's chunks when that heap is still linked requires the tiler_heaps lock, which cannot be held by the caller");
279446
280447 list_for_each_safe(entry, tmp, &heap->chunks_list) {
281448 struct kbase_csf_tiler_heap_chunk *chunk = list_entry(
282449 entry, struct kbase_csf_tiler_heap_chunk, link);
283450
284
- delete_chunk(heap, chunk);
451
+ list_del_init(&chunk->link);
452
+ heap->chunk_count--;
453
+
454
+ remove_unlinked_chunk(kctx, chunk);
285455 }
286456 }
287457
288458 /**
289459 * create_initial_chunks - Create the initial list of chunks for a tiler heap
290460 *
291
- * This function allocates a given number of chunks for a tiler heap and
292
- * adds them to the list of chunks associated with that heap.
293
- *
294461 * @heap: Pointer to the tiler heap for which to allocate memory.
295462 * @nchunks: Number of chunks to create.
463
+ *
464
+ * This function allocates a given number of chunks for a tiler heap and
465
+ * adds them to the list of chunks associated with that heap.
296466 *
297467 * Return: 0 if successful or a negative error code on failure.
298468 */
....@@ -303,7 +473,7 @@
303473 u32 i;
304474
305475 for (i = 0; (i < nchunks) && likely(!err); i++)
306
- err = create_chunk(heap, true);
476
+ err = create_chunk(heap);
307477
308478 if (unlikely(err))
309479 delete_all_chunks(heap);
....@@ -312,14 +482,17 @@
312482 }
313483
314484 /**
315
- * delete_heap - Delete a tiler heap
316
- *
317
- * This function frees any chunks allocated for a tiler heap previously
318
- * initialized by @kbase_csf_tiler_heap_init and removes it from the list of
319
- * heaps associated with the kbase context. The heap context structure used by
320
- * the firmware is also freed.
485
+ * delete_heap - Delete an unlinked tiler heap
321486 *
322487 * @heap: Pointer to a tiler heap to be deleted.
488
+ *
489
+ * This function frees any chunks allocated for a tiler heap previously
490
+ * initialized by @kbase_csf_tiler_heap_init. The heap context structure used by
491
+ * the firmware is also freed.
492
+ *
493
+ * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the
494
+ * tiler_heaps lock cannot be held whilst deleting it due to also needing the &struct
495
+ * kbase_context.region_lock.
323496 */
324497 static void delete_heap(struct kbase_csf_tiler_heap *heap)
325498 {
....@@ -327,22 +500,40 @@
327500
328501 dev_dbg(kctx->kbdev->dev, "Deleting tiler heap 0x%llX\n", heap->gpu_va);
329502
330
- lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
503
+ WARN(!list_empty(&heap->link),
504
+ "Deleting a heap that is still linked requires the tiler_heaps lock, which cannot be held by the caller");
331505
506
+ /* Make sure that all of the VA regions corresponding to the chunks are
507
+ * freed at this time and that the work queue is not trying to access freed
508
+ * memory.
509
+ *
510
+ * Note: since the heap is unlinked, and that no references are made to chunks other
511
+ * than from their heap, there is no need to separately move the chunks out of the
512
+ * heap->chunks_list to delete them.
513
+ */
332514 delete_all_chunks(heap);
333515
516
+ kbase_vunmap(kctx, &heap->gpu_va_map);
334517 /* We could optimize context destruction by not freeing leaked heap
335
- * contexts but it doesn't seem worth the extra complexity.
518
+ * contexts but it doesn't seem worth the extra complexity. After this
519
+ * point, the suballocation is returned to the heap context allocator and
520
+ * may be overwritten with new data, meaning heap->gpu_va should not
521
+ * be used past this point.
336522 */
337523 kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc,
338524 heap->gpu_va);
339
-
340
- list_del(&heap->link);
341525
342526 WARN_ON(heap->chunk_count);
343527 KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id,
344528 heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0,
345529 heap->target_in_flight, 0);
530
+
531
+ if (heap->buf_desc_reg) {
532
+ kbase_vunmap(kctx, &heap->buf_desc_map);
533
+ kbase_gpu_vm_lock(kctx);
534
+ kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
535
+ kbase_gpu_vm_unlock(kctx);
536
+ }
346537
347538 kfree(heap);
348539 }
....@@ -350,14 +541,14 @@
350541 /**
351542 * find_tiler_heap - Find a tiler heap from the address of its heap context
352543 *
544
+ * @kctx: Pointer to the kbase context to search for a tiler heap.
545
+ * @heap_gpu_va: GPU virtual address of a heap context structure.
546
+ *
353547 * Each tiler heap managed by the kernel has an associated heap context
354548 * structure used by the firmware. This function finds a tiler heap object from
355549 * the GPU virtual address of its associated heap context. The heap context
356550 * should have been allocated by @kbase_csf_heap_context_allocator_alloc in the
357551 * same @kctx.
358
- *
359
- * @kctx: Pointer to the kbase context to search for a tiler heap.
360
- * @heap_gpu_va: GPU virtual address of a heap context structure.
361552 *
362553 * Return: pointer to the tiler heap object, or NULL if not found.
363554 */
....@@ -375,6 +566,23 @@
375566
376567 dev_dbg(kctx->kbdev->dev, "Tiler heap 0x%llX was not found\n",
377568 heap_gpu_va);
569
+
570
+ return NULL;
571
+}
572
+
573
+static struct kbase_csf_tiler_heap_chunk *find_chunk(struct kbase_csf_tiler_heap *heap,
574
+ u64 const chunk_gpu_va)
575
+{
576
+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
577
+
578
+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
579
+
580
+ list_for_each_entry(chunk, &heap->chunks_list, link) {
581
+ if (chunk->gpu_va == chunk_gpu_va)
582
+ return chunk;
583
+ }
584
+
585
+ dev_dbg(heap->kctx->kbdev->dev, "Tiler heap chunk 0x%llX was not found\n", chunk_gpu_va);
378586
379587 return NULL;
380588 }
....@@ -397,37 +605,91 @@
397605
398606 void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx)
399607 {
608
+ LIST_HEAD(local_heaps_list);
400609 struct list_head *entry = NULL, *tmp = NULL;
401610
402611 dev_dbg(kctx->kbdev->dev, "Terminating a context for tiler heaps\n");
403612
404613 mutex_lock(&kctx->csf.tiler_heaps.lock);
614
+ list_splice_init(&kctx->csf.tiler_heaps.list, &local_heaps_list);
615
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
405616
406
- list_for_each_safe(entry, tmp, &kctx->csf.tiler_heaps.list) {
617
+ list_for_each_safe(entry, tmp, &local_heaps_list) {
407618 struct kbase_csf_tiler_heap *heap = list_entry(
408619 entry, struct kbase_csf_tiler_heap, link);
620
+
621
+ list_del_init(&heap->link);
409622 delete_heap(heap);
410623 }
411624
412
- mutex_unlock(&kctx->csf.tiler_heaps.lock);
413625 mutex_destroy(&kctx->csf.tiler_heaps.lock);
414626
415627 kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc);
416628 }
417629
418
-int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
419
- u32 const chunk_size, u32 const initial_chunks, u32 const max_chunks,
420
- u16 const target_in_flight, u64 *const heap_gpu_va,
421
- u64 *const first_chunk_va)
630
+/**
631
+ * kbasep_is_buffer_descriptor_region_suitable - Check if a VA region chosen to house
632
+ * the tiler heap buffer descriptor
633
+ * is suitable for the purpose.
634
+ * @kctx: kbase context of the tiler heap
635
+ * @reg: VA region being checked for suitability
636
+ *
637
+ * The tiler heap buffer descriptor memory does not admit page faults according
638
+ * to its design, so it must have the entirety of the backing upon allocation,
639
+ * and it has to remain alive as long as the tiler heap is alive, meaning it
640
+ * cannot be allocated from JIT/Ephemeral, or user freeable memory.
641
+ *
642
+ * Return: true on suitability, false otherwise.
643
+ */
644
+static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *const kctx,
645
+ struct kbase_va_region *const reg)
646
+{
647
+ if (kbase_is_region_invalid_or_free(reg)) {
648
+ dev_err(kctx->kbdev->dev, "Region is either invalid or free!\n");
649
+ return false;
650
+ }
651
+
652
+ if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) ||
653
+ (reg->flags & KBASE_REG_PF_GROW)) {
654
+ dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags);
655
+ return false;
656
+ }
657
+
658
+ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
659
+ dev_err(kctx->kbdev->dev, "Region has invalid type!\n");
660
+ return false;
661
+ }
662
+
663
+ if ((reg->nr_pages != kbase_reg_current_backed_size(reg)) ||
664
+ (reg->nr_pages < PFN_UP(sizeof(struct kbase_csf_gpu_buffer_heap)))) {
665
+ dev_err(kctx->kbdev->dev, "Region has invalid backing!\n");
666
+ return false;
667
+ }
668
+
669
+ return true;
670
+}
671
+
672
+#define TILER_BUF_DESC_SIZE (sizeof(struct kbase_csf_gpu_buffer_heap))
673
+
674
+int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_size,
675
+ u32 const initial_chunks, u32 const max_chunks,
676
+ u16 const target_in_flight, u64 const buf_desc_va,
677
+ u64 *const heap_gpu_va, u64 *const first_chunk_va)
422678 {
423679 int err = 0;
424680 struct kbase_csf_tiler_heap *heap = NULL;
425681 struct kbase_csf_heap_context_allocator *const ctx_alloc =
426682 &kctx->csf.tiler_heaps.ctx_alloc;
683
+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
684
+ struct kbase_va_region *gpu_va_reg = NULL;
685
+ void *vmap_ptr = NULL;
427686
428687 dev_dbg(kctx->kbdev->dev,
429
- "Creating a tiler heap with %u chunks (limit: %u) of size %u\n",
430
- initial_chunks, max_chunks, chunk_size);
688
+ "Creating a tiler heap with %u chunks (limit: %u) of size %u, buf_desc_va: 0x%llx\n",
689
+ initial_chunks, max_chunks, chunk_size, buf_desc_va);
690
+
691
+ if (!kbase_mem_allow_alloc(kctx))
692
+ return -EINVAL;
431693
432694 if (chunk_size == 0)
433695 return -EINVAL;
....@@ -446,8 +708,7 @@
446708
447709 heap = kzalloc(sizeof(*heap), GFP_KERNEL);
448710 if (unlikely(!heap)) {
449
- dev_err(kctx->kbdev->dev,
450
- "No kernel memory for a new tiler heap\n");
711
+ dev_err(kctx->kbdev->dev, "No kernel memory for a new tiler heap");
451712 return -ENOMEM;
452713 }
453714
....@@ -455,51 +716,130 @@
455716 heap->chunk_size = chunk_size;
456717 heap->max_chunks = max_chunks;
457718 heap->target_in_flight = target_in_flight;
719
+ heap->buf_desc_checked = false;
458720 INIT_LIST_HEAD(&heap->chunks_list);
721
+ INIT_LIST_HEAD(&heap->link);
459722
460
- heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc);
723
+ /* Check on the buffer descriptor virtual Address */
724
+ if (buf_desc_va) {
725
+ struct kbase_va_region *buf_desc_reg;
461726
462
- mutex_lock(&kctx->csf.tiler_heaps.lock);
727
+ kbase_gpu_vm_lock(kctx);
728
+ buf_desc_reg =
729
+ kbase_region_tracker_find_region_enclosing_address(kctx, buf_desc_va);
463730
464
- if (unlikely(!heap->gpu_va)) {
465
- dev_err(kctx->kbdev->dev,
466
- "Failed to allocate a tiler heap context\n");
467
- err = -ENOMEM;
468
- } else {
469
- err = create_initial_chunks(heap, initial_chunks);
470
- if (unlikely(err)) {
471
- kbase_csf_heap_context_allocator_free(ctx_alloc,
472
- heap->gpu_va);
731
+ if (!kbasep_is_buffer_descriptor_region_suitable(kctx, buf_desc_reg)) {
732
+ kbase_gpu_vm_unlock(kctx);
733
+ dev_err(kctx->kbdev->dev,
734
+ "Could not find a suitable VA region for the tiler heap buf desc!\n");
735
+ err = -EINVAL;
736
+ goto buf_desc_not_suitable;
737
+ }
738
+
739
+ /* If we don't prevent userspace from unmapping this, we may run into
740
+ * use-after-free, as we don't check for the existence of the region throughout.
741
+ */
742
+
743
+ heap->buf_desc_va = buf_desc_va;
744
+ heap->buf_desc_reg = buf_desc_reg;
745
+ kbase_va_region_no_user_free_inc(buf_desc_reg);
746
+
747
+ vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE,
748
+ KBASE_REG_CPU_RD, &heap->buf_desc_map,
749
+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
750
+
751
+ if (kbase_page_migration_enabled)
752
+ kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE);
753
+
754
+ kbase_gpu_vm_unlock(kctx);
755
+
756
+ if (unlikely(!vmap_ptr)) {
757
+ dev_err(kctx->kbdev->dev,
758
+ "Could not vmap buffer descriptor into kernel memory (err %d)\n",
759
+ err);
760
+ err = -ENOMEM;
761
+ goto buf_desc_vmap_failed;
473762 }
474763 }
475764
476
- if (unlikely(err)) {
477
- kfree(heap);
478
- } else {
479
- struct kbase_csf_tiler_heap_chunk const *first_chunk =
480
- list_first_entry(&heap->chunks_list,
481
- struct kbase_csf_tiler_heap_chunk, link);
482
-
483
- kctx->csf.tiler_heaps.nr_of_heaps++;
484
- heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps;
485
- list_add(&heap->link, &kctx->csf.tiler_heaps.list);
486
-
487
- *heap_gpu_va = heap->gpu_va;
488
- *first_chunk_va = first_chunk->gpu_va;
489
-
490
- KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
491
- kctx->kbdev, kctx->id, heap->heap_id,
492
- PFN_UP(heap->chunk_size * heap->max_chunks),
493
- PFN_UP(heap->chunk_size * heap->chunk_count),
494
- heap->max_chunks, heap->chunk_size, heap->chunk_count,
495
- heap->target_in_flight, 0);
496
-
497
- dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n",
498
- heap->gpu_va);
765
+ heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc);
766
+ if (unlikely(!heap->gpu_va)) {
767
+ dev_dbg(kctx->kbdev->dev, "Failed to allocate a tiler heap context\n");
768
+ err = -ENOMEM;
769
+ goto heap_context_alloc_failed;
499770 }
500771
772
+ gpu_va_reg = ctx_alloc->region;
773
+
774
+ kbase_gpu_vm_lock(kctx);
775
+ /* gpu_va_reg was created with BASEP_MEM_NO_USER_FREE, the code to unset this only happens
776
+ * on kctx termination (after all syscalls on kctx have finished), and so it is safe to
777
+ * assume that gpu_va_reg is still present.
778
+ */
779
+ vmap_ptr = kbase_vmap_reg(kctx, gpu_va_reg, heap->gpu_va, NEXT_CHUNK_ADDR_SIZE,
780
+ (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &heap->gpu_va_map,
781
+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
782
+ kbase_gpu_vm_unlock(kctx);
783
+ if (unlikely(!vmap_ptr)) {
784
+ dev_dbg(kctx->kbdev->dev, "Failed to vmap the correct heap GPU VA address\n");
785
+ err = -ENOMEM;
786
+ goto heap_context_vmap_failed;
787
+ }
788
+
789
+ err = create_initial_chunks(heap, initial_chunks);
790
+ if (unlikely(err)) {
791
+ dev_dbg(kctx->kbdev->dev, "Failed to create the initial tiler heap chunks\n");
792
+ goto create_chunks_failed;
793
+ }
794
+ chunk = list_first_entry(&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link);
795
+
796
+ *heap_gpu_va = heap->gpu_va;
797
+ *first_chunk_va = chunk->gpu_va;
798
+
799
+ mutex_lock(&kctx->csf.tiler_heaps.lock);
800
+ kctx->csf.tiler_heaps.nr_of_heaps++;
801
+ heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps;
802
+ list_add(&heap->link, &kctx->csf.tiler_heaps.list);
803
+
804
+ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id,
805
+ PFN_UP(heap->chunk_size * heap->max_chunks),
806
+ PFN_UP(heap->chunk_size * heap->chunk_count),
807
+ heap->max_chunks, heap->chunk_size, heap->chunk_count,
808
+ heap->target_in_flight, 0);
809
+
810
+#if defined(CONFIG_MALI_VECTOR_DUMP)
811
+ list_for_each_entry(chunk, &heap->chunks_list, link) {
812
+ KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(kctx->kbdev, kctx->id, heap->heap_id,
813
+ chunk->gpu_va);
814
+ }
815
+#endif
816
+ kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
817
+ kctx->running_total_tiler_heap_memory += (u64)heap->chunk_size * heap->chunk_count;
818
+ if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
819
+ kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
820
+
821
+ dev_dbg(kctx->kbdev->dev,
822
+ "Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", heap->gpu_va,
823
+ buf_desc_va, kctx->tgid, kctx->id);
501824 mutex_unlock(&kctx->csf.tiler_heaps.lock);
502825
826
+ return 0;
827
+
828
+create_chunks_failed:
829
+ kbase_vunmap(kctx, &heap->gpu_va_map);
830
+heap_context_vmap_failed:
831
+ kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va);
832
+heap_context_alloc_failed:
833
+ if (heap->buf_desc_reg)
834
+ kbase_vunmap(kctx, &heap->buf_desc_map);
835
+buf_desc_vmap_failed:
836
+ if (heap->buf_desc_reg) {
837
+ kbase_gpu_vm_lock(kctx);
838
+ kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
839
+ kbase_gpu_vm_unlock(kctx);
840
+ }
841
+buf_desc_not_suitable:
842
+ kfree(heap);
503843 return err;
504844 }
505845
....@@ -508,47 +848,73 @@
508848 {
509849 int err = 0;
510850 struct kbase_csf_tiler_heap *heap = NULL;
851
+ u32 chunk_count = 0;
852
+ u64 heap_size = 0;
511853
512854 mutex_lock(&kctx->csf.tiler_heaps.lock);
513
-
514855 heap = find_tiler_heap(kctx, heap_gpu_va);
856
+ if (likely(heap)) {
857
+ chunk_count = heap->chunk_count;
858
+ heap_size = heap->chunk_size * chunk_count;
859
+
860
+ list_del_init(&heap->link);
861
+ } else {
862
+ err = -EINVAL;
863
+ }
864
+
865
+ /* Update stats whilst still holding the lock so they are in sync with the tiler_heaps.list
866
+ * at all times
867
+ */
868
+ if (likely(kctx->running_total_tiler_heap_memory >= heap_size))
869
+ kctx->running_total_tiler_heap_memory -= heap_size;
870
+ else
871
+ dev_warn(kctx->kbdev->dev,
872
+ "Running total tiler heap memory lower than expected!");
873
+ if (likely(kctx->running_total_tiler_heap_nr_chunks >= chunk_count))
874
+ kctx->running_total_tiler_heap_nr_chunks -= chunk_count;
875
+ else
876
+ dev_warn(kctx->kbdev->dev,
877
+ "Running total tiler chunk count lower than expected!");
878
+ if (!err)
879
+ dev_dbg(kctx->kbdev->dev,
880
+ "Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n",
881
+ heap->gpu_va, heap->buf_desc_va, kctx->tgid, kctx->id);
882
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
883
+
884
+ /* Deletion requires the kctx->reg_lock, so must only operate on it whilst unlinked from
885
+ * the kctx's csf.tiler_heaps.list, and without holding the csf.tiler_heaps.lock
886
+ */
515887 if (likely(heap))
516888 delete_heap(heap);
517
- else
518
- err = -EINVAL;
519
-
520
- mutex_unlock(&kctx->csf.tiler_heaps.lock);
521889
522890 return err;
523891 }
524892
525893 /**
526
- * alloc_new_chunk - Allocate a new chunk for the tiler heap.
894
+ * validate_allocation_request - Check whether the chunk allocation request
895
+ * received on tiler OOM should be handled at
896
+ * current time.
527897 *
528
- * This function will allocate a new chunk for the chunked tiler heap depending
529
- * on the settings provided by userspace when the heap was created and the
530
- * heap's statistics (like number of render passes in-flight).
898
+ * @heap: The tiler heap the OOM is associated with
899
+ * @nr_in_flight: Number of fragment jobs in flight
900
+ * @pending_frag_count: Number of pending fragment jobs
531901 *
532
- * @heap: Pointer to the tiler heap.
533
- * @nr_in_flight: Number of render passes that are in-flight, must not be zero.
534
- * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage.
535
- * The minimum value is zero but it must be less or equal to
536
- * the total number of render passes in flight
537
- * @new_chunk_ptr: Where to store the GPU virtual address & size of the new
538
- * chunk allocated for the heap.
902
+ * Context: must hold the tiler heap lock to guarantee its lifetime
539903 *
540
- * Return: 0 if a new chunk was allocated otherwise an appropriate negative
541
- * error code.
904
+ * Return:
905
+ * * 0 - allowed to allocate an additional chunk
906
+ * * -EINVAL - invalid
907
+ * * -EBUSY - there are fragment jobs still in flight, which may free chunks
908
+ * after completing
909
+ * * -ENOMEM - the targeted number of in-flight chunks has been reached and
910
+ * no new ones will be allocated
542911 */
543
-static int alloc_new_chunk(struct kbase_csf_tiler_heap *heap,
544
- u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
912
+static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight,
913
+ u32 pending_frag_count)
545914 {
546
- int err = -ENOMEM;
547
-
548915 lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
549916
550
- if (WARN_ON(!nr_in_flight) ||
551
- WARN_ON(pending_frag_count > nr_in_flight))
917
+ if (WARN_ON(!nr_in_flight) || WARN_ON(pending_frag_count > nr_in_flight))
552918 return -EINVAL;
553919
554920 if (nr_in_flight <= heap->target_in_flight) {
....@@ -556,56 +922,451 @@
556922 /* Not exceeded the target number of render passes yet so be
557923 * generous with memory.
558924 */
559
- err = create_chunk(heap, false);
560
-
561
- if (likely(!err)) {
562
- struct kbase_csf_tiler_heap_chunk *new_chunk =
563
- get_last_chunk(heap);
564
- if (!WARN_ON(!new_chunk)) {
565
- *new_chunk_ptr =
566
- encode_chunk_ptr(heap->chunk_size,
567
- new_chunk->gpu_va);
568
- return 0;
569
- }
570
- }
925
+ return 0;
571926 } else if (pending_frag_count > 0) {
572
- err = -EBUSY;
927
+ return -EBUSY;
573928 } else {
574
- err = -ENOMEM;
929
+ return -ENOMEM;
575930 }
576931 } else {
577932 /* Reached target number of render passes in flight.
578933 * Wait for some of them to finish
579934 */
580
- err = -EBUSY;
935
+ return -EBUSY;
581936 }
582
-
583
- return err;
937
+ return -ENOMEM;
584938 }
585939
586940 int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
587941 u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
588942 {
589943 struct kbase_csf_tiler_heap *heap;
944
+ struct kbase_csf_tiler_heap_chunk *chunk;
590945 int err = -EINVAL;
946
+ u64 chunk_size = 0;
947
+ u64 heap_id = 0;
948
+
949
+ /* To avoid potential locking issues during allocation, this is handled
950
+ * in three phases:
951
+ * 1. Take the lock, find the corresponding heap, and find its chunk size
952
+ * (this is always 2 MB, but may change down the line).
953
+ * 2. Allocate memory for the chunk and its region.
954
+ * 3. If the heap still exists, link it to the end of the list. If it
955
+ * doesn't, roll back the allocation.
956
+ */
591957
592958 mutex_lock(&kctx->csf.tiler_heaps.lock);
959
+ heap = find_tiler_heap(kctx, gpu_heap_va);
960
+ if (likely(heap)) {
961
+ chunk_size = heap->chunk_size;
962
+ heap_id = heap->heap_id;
963
+ } else {
964
+ dev_err(kctx->kbdev->dev, "Heap 0x%llX does not exist", gpu_heap_va);
965
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
966
+ goto prelink_failure;
967
+ }
593968
969
+ err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
970
+ if (unlikely(err)) {
971
+ /* The allocation request can be legitimate, but be invoked on a heap
972
+ * that has already reached the maximum pre-configured capacity. This
973
+ * is useful debug information, but should not be treated as an error,
974
+ * since the request will be re-sent at a later point.
975
+ */
976
+ dev_dbg(kctx->kbdev->dev,
977
+ "Not allocating new chunk for heap 0x%llX due to current heap state (err %d)",
978
+ gpu_heap_va, err);
979
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
980
+ goto prelink_failure;
981
+ }
982
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
983
+ /* this heap must not be used whilst we have dropped the lock */
984
+ heap = NULL;
985
+
986
+ chunk = alloc_new_chunk(kctx, chunk_size);
987
+ if (unlikely(!chunk)) {
988
+ dev_err(kctx->kbdev->dev, "Could not allocate chunk of size %lld for ctx %d_%d",
989
+ chunk_size, kctx->tgid, kctx->id);
990
+ goto prelink_failure;
991
+ }
992
+
993
+ /* After this point, the heap that we were targeting could already have had the needed
994
+ * chunks allocated, if we were handling multiple OoM events on multiple threads, so
995
+ * we need to revalidate the need for the allocation.
996
+ */
997
+ mutex_lock(&kctx->csf.tiler_heaps.lock);
594998 heap = find_tiler_heap(kctx, gpu_heap_va);
595999
596
- if (likely(heap)) {
597
- err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count,
598
- new_chunk_ptr);
599
-
600
- KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
601
- kctx->kbdev, kctx->id, heap->heap_id,
602
- PFN_UP(heap->chunk_size * heap->max_chunks),
603
- PFN_UP(heap->chunk_size * heap->chunk_count),
604
- heap->max_chunks, heap->chunk_size, heap->chunk_count,
605
- heap->target_in_flight, nr_in_flight);
1000
+ if (unlikely(!heap)) {
1001
+ dev_err(kctx->kbdev->dev, "Tiler heap 0x%llX no longer exists!\n", gpu_heap_va);
1002
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
1003
+ goto unroll_chunk;
6061004 }
1005
+
1006
+ if (heap_id != heap->heap_id) {
1007
+ dev_err(kctx->kbdev->dev,
1008
+ "Tiler heap 0x%llX was removed from ctx %d_%d while allocating chunk of size %lld!",
1009
+ gpu_heap_va, kctx->tgid, kctx->id, chunk_size);
1010
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
1011
+ goto unroll_chunk;
1012
+ }
1013
+
1014
+ if (WARN_ON(chunk_size != heap->chunk_size)) {
1015
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
1016
+ goto unroll_chunk;
1017
+ }
1018
+
1019
+ err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
1020
+ if (unlikely(err)) {
1021
+ dev_warn(
1022
+ kctx->kbdev->dev,
1023
+ "Aborting linking chunk to heap 0x%llX: heap state changed during allocation (err %d)",
1024
+ gpu_heap_va, err);
1025
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
1026
+ goto unroll_chunk;
1027
+ }
1028
+
1029
+ err = init_chunk(heap, chunk, false);
1030
+
1031
+ /* On error, the chunk would not be linked, so we can still treat it as an unlinked
1032
+ * chunk for error handling.
1033
+ */
1034
+ if (unlikely(err)) {
1035
+ dev_err(kctx->kbdev->dev,
1036
+ "Could not link chunk(0x%llX) with tiler heap 0%llX in ctx %d_%d due to error %d",
1037
+ chunk->gpu_va, gpu_heap_va, kctx->tgid, kctx->id, err);
1038
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
1039
+ goto unroll_chunk;
1040
+ }
1041
+
1042
+ *new_chunk_ptr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va);
1043
+
1044
+ /* update total and peak tiler heap memory record */
1045
+ kctx->running_total_tiler_heap_nr_chunks++;
1046
+ kctx->running_total_tiler_heap_memory += heap->chunk_size;
1047
+
1048
+ if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
1049
+ kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
1050
+
1051
+ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id,
1052
+ PFN_UP(heap->chunk_size * heap->max_chunks),
1053
+ PFN_UP(heap->chunk_size * heap->chunk_count),
1054
+ heap->max_chunks, heap->chunk_size, heap->chunk_count,
1055
+ heap->target_in_flight, nr_in_flight);
6071056
6081057 mutex_unlock(&kctx->csf.tiler_heaps.lock);
6091058
6101059 return err;
1060
+unroll_chunk:
1061
+ remove_unlinked_chunk(kctx, chunk);
1062
+prelink_failure:
1063
+ return err;
1064
+}
1065
+
1066
+static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 chunk_gpu_va,
1067
+ u64 *hdr_val)
1068
+{
1069
+ int err;
1070
+ u64 *chunk_hdr;
1071
+ struct kbase_context *kctx = heap->kctx;
1072
+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
1073
+
1074
+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
1075
+
1076
+ chunk = find_chunk(heap, chunk_gpu_va);
1077
+ if (unlikely(!chunk)) {
1078
+ dev_warn(kctx->kbdev->dev,
1079
+ "Failed to find tiler heap(0x%llX) chunk(0x%llX) for reclaim-delete\n",
1080
+ heap->gpu_va, chunk_gpu_va);
1081
+ return false;
1082
+ }
1083
+
1084
+ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
1085
+ "Cannot support CPU cached chunks without sync operations");
1086
+ chunk_hdr = chunk->map.addr;
1087
+ *hdr_val = *chunk_hdr;
1088
+
1089
+ dev_dbg(kctx->kbdev->dev,
1090
+ "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n",
1091
+ chunk_gpu_va, heap->gpu_va, *hdr_val);
1092
+
1093
+ err = kbase_mem_shrink_gpu_mapping(kctx, chunk->region, 0, chunk->region->gpu_alloc->nents);
1094
+ if (unlikely(err)) {
1095
+ dev_warn(
1096
+ kctx->kbdev->dev,
1097
+ "Reclaim: shrinking GPU mapping failed on chunk(0x%llx) in heap(0x%llx) (err %d)\n",
1098
+ chunk_gpu_va, heap->gpu_va, err);
1099
+
1100
+ /* Cannot free the pages whilst references on the GPU remain, so keep the chunk on
1101
+ * the heap's chunk list and try a different heap.
1102
+ */
1103
+
1104
+ return false;
1105
+ }
1106
+ /* Destroy the mapping before the physical pages which are mapped are destroyed. */
1107
+ kbase_vunmap(kctx, &chunk->map);
1108
+
1109
+ err = kbase_free_phy_pages_helper(chunk->region->gpu_alloc,
1110
+ chunk->region->gpu_alloc->nents);
1111
+ if (unlikely(err)) {
1112
+ dev_warn(
1113
+ kctx->kbdev->dev,
1114
+ "Reclaim: remove physical backing failed on chunk(0x%llx) in heap(0x%llx) (err %d), continuing with deferred removal\n",
1115
+ chunk_gpu_va, heap->gpu_va, err);
1116
+
1117
+ /* kbase_free_phy_pages_helper() should only fail on invalid input, and WARNs
1118
+ * anyway, so continue instead of returning early.
1119
+ *
1120
+ * Indeed, we don't want to leave the chunk on the heap's chunk list whilst it has
1121
+ * its mapping removed, as that could lead to problems. It's safest to instead
1122
+ * continue with deferred destruction of the chunk.
1123
+ */
1124
+ }
1125
+
1126
+ dev_dbg(kctx->kbdev->dev,
1127
+ "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n",
1128
+ chunk_gpu_va, heap->gpu_va, *hdr_val);
1129
+
1130
+ mutex_lock(&heap->kctx->jit_evict_lock);
1131
+ list_move(&chunk->region->jit_node, &kctx->jit_destroy_head);
1132
+ mutex_unlock(&heap->kctx->jit_evict_lock);
1133
+
1134
+ list_del(&chunk->link);
1135
+ heap->chunk_count--;
1136
+ kfree(chunk);
1137
+
1138
+ return true;
1139
+}
1140
+
1141
+static void sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap,
1142
+ struct kbase_csf_gpu_buffer_heap *desc)
1143
+{
1144
+ u64 first_hoarded_chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK;
1145
+
1146
+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
1147
+
1148
+ if (first_hoarded_chunk_gpu_va) {
1149
+ struct kbase_csf_tiler_heap_chunk *chunk =
1150
+ find_chunk(heap, first_hoarded_chunk_gpu_va);
1151
+
1152
+ if (likely(chunk)) {
1153
+ dev_dbg(heap->kctx->kbdev->dev,
1154
+ "Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed\n",
1155
+ heap->buf_desc_va);
1156
+
1157
+ heap->buf_desc_checked = true;
1158
+ return;
1159
+ }
1160
+ }
1161
+ /* If there is no match, defer the check to next time */
1162
+ dev_dbg(heap->kctx->kbdev->dev, "Buffer descriptor 0x%llX runtime sanity check deferred\n",
1163
+ heap->buf_desc_va);
1164
+}
1165
+
1166
+static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, u64 *chunk_gpu_va_ptr)
1167
+{
1168
+ struct kbase_context *kctx = heap->kctx;
1169
+
1170
+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
1171
+
1172
+ /* Initialize the descriptor pointer value to 0 */
1173
+ *chunk_gpu_va_ptr = 0;
1174
+
1175
+ /* The BufferDescriptor on heap is a hint on creation, do a sanity check at runtime */
1176
+ if (heap->buf_desc_reg && !heap->buf_desc_checked) {
1177
+ struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
1178
+
1179
+ /* BufferDescriptor is supplied by userspace, so could be CPU-cached */
1180
+ if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
1181
+ kbase_sync_mem_regions(kctx, &heap->buf_desc_map, KBASE_SYNC_TO_CPU);
1182
+
1183
+ sanity_check_gpu_buffer_heap(heap, desc);
1184
+ if (heap->buf_desc_checked)
1185
+ *chunk_gpu_va_ptr = desc->pointer & CHUNK_ADDR_MASK;
1186
+ }
1187
+
1188
+ return heap->buf_desc_checked;
1189
+}
1190
+
1191
+static u32 delete_hoarded_chunks(struct kbase_csf_tiler_heap *heap)
1192
+{
1193
+ u32 freed = 0;
1194
+ u64 chunk_gpu_va = 0;
1195
+ struct kbase_context *kctx = heap->kctx;
1196
+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
1197
+
1198
+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
1199
+
1200
+ if (can_read_hw_gpu_buffer_heap(heap, &chunk_gpu_va)) {
1201
+ u64 chunk_hdr_val;
1202
+ u64 *hw_hdr;
1203
+
1204
+ if (!chunk_gpu_va) {
1205
+ struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
1206
+
1207
+ /* BufferDescriptor is supplied by userspace, so could be CPU-cached */
1208
+ if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
1209
+ kbase_sync_mem_regions(kctx, &heap->buf_desc_map,
1210
+ KBASE_SYNC_TO_CPU);
1211
+ chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK;
1212
+
1213
+ if (!chunk_gpu_va) {
1214
+ dev_dbg(kctx->kbdev->dev,
1215
+ "Buffer descriptor 0x%llX has no chunks (NULL) for reclaim scan\n",
1216
+ heap->buf_desc_va);
1217
+ goto out;
1218
+ }
1219
+ }
1220
+
1221
+ chunk = find_chunk(heap, chunk_gpu_va);
1222
+ if (unlikely(!chunk))
1223
+ goto out;
1224
+
1225
+ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
1226
+ "Cannot support CPU cached chunks without sync operations");
1227
+ hw_hdr = chunk->map.addr;
1228
+
1229
+ /* Move onto the next chunk relevant information */
1230
+ chunk_hdr_val = *hw_hdr;
1231
+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
1232
+
1233
+ while (chunk_gpu_va && heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
1234
+ bool success =
1235
+ delete_chunk_physical_pages(heap, chunk_gpu_va, &chunk_hdr_val);
1236
+
1237
+ if (!success)
1238
+ break;
1239
+
1240
+ freed++;
1241
+ /* On success, chunk_hdr_val is updated, extract the next chunk address */
1242
+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
1243
+ }
1244
+
1245
+ /* Update the existing hardware chunk header, after reclaim deletion of chunks */
1246
+ *hw_hdr = chunk_hdr_val;
1247
+
1248
+ dev_dbg(heap->kctx->kbdev->dev,
1249
+ "HW reclaim scan freed chunks: %u, set hw_hdr[0]: 0x%llX\n", freed,
1250
+ chunk_hdr_val);
1251
+ } else {
1252
+ dev_dbg(kctx->kbdev->dev,
1253
+ "Skip HW reclaim scan, (disabled: buffer descriptor 0x%llX)\n",
1254
+ heap->buf_desc_va);
1255
+ }
1256
+out:
1257
+ return freed;
1258
+}
1259
+
1260
+static u64 delete_unused_chunk_pages(struct kbase_csf_tiler_heap *heap)
1261
+{
1262
+ u32 freed_chunks = 0;
1263
+ u64 freed_pages = 0;
1264
+ u64 chunk_gpu_va;
1265
+ u64 chunk_hdr_val;
1266
+ struct kbase_context *kctx = heap->kctx;
1267
+ u64 *ctx_ptr;
1268
+
1269
+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
1270
+
1271
+ WARN(heap->gpu_va_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED,
1272
+ "Cannot support CPU cached heap context without sync operations");
1273
+
1274
+ ctx_ptr = heap->gpu_va_map.addr;
1275
+
1276
+ /* Extract the first chunk address from the context's free_list_head */
1277
+ chunk_hdr_val = *ctx_ptr;
1278
+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
1279
+
1280
+ while (chunk_gpu_va) {
1281
+ u64 hdr_val;
1282
+ bool success = delete_chunk_physical_pages(heap, chunk_gpu_va, &hdr_val);
1283
+
1284
+ if (!success)
1285
+ break;
1286
+
1287
+ freed_chunks++;
1288
+ chunk_hdr_val = hdr_val;
1289
+ /* extract the next chunk address */
1290
+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
1291
+ }
1292
+
1293
+ /* Update the post-scan deletion to context header */
1294
+ *ctx_ptr = chunk_hdr_val;
1295
+
1296
+ /* Try to scan the HW hoarded list of unused chunks */
1297
+ freed_chunks += delete_hoarded_chunks(heap);
1298
+ freed_pages = freed_chunks * PFN_UP(heap->chunk_size);
1299
+ dev_dbg(heap->kctx->kbdev->dev,
1300
+ "Scan reclaim freed chunks/pages %u/%llu, set heap-ctx_u64[0]: 0x%llX\n",
1301
+ freed_chunks, freed_pages, chunk_hdr_val);
1302
+
1303
+ /* Update context tiler heaps memory usage */
1304
+ kctx->running_total_tiler_heap_memory -= freed_pages << PAGE_SHIFT;
1305
+ kctx->running_total_tiler_heap_nr_chunks -= freed_chunks;
1306
+ return freed_pages;
1307
+}
1308
+
1309
+u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free)
1310
+{
1311
+ u64 freed = 0;
1312
+ struct kbase_csf_tiler_heap *heap;
1313
+
1314
+ mutex_lock(&kctx->csf.tiler_heaps.lock);
1315
+
1316
+ list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) {
1317
+ freed += delete_unused_chunk_pages(heap);
1318
+
1319
+ /* If freed enough, then stop here */
1320
+ if (freed >= to_free)
1321
+ break;
1322
+ }
1323
+
1324
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
1325
+ /* The scan is surely not more than 4-G pages, but for logic flow limit it */
1326
+ if (WARN_ON(unlikely(freed > U32_MAX)))
1327
+ return U32_MAX;
1328
+ else
1329
+ return (u32)freed;
1330
+}
1331
+
1332
+static u64 count_unused_heap_pages(struct kbase_csf_tiler_heap *heap)
1333
+{
1334
+ u32 chunk_cnt = 0;
1335
+ u64 page_cnt = 0;
1336
+
1337
+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
1338
+
1339
+ /* Here the count is basically an informed estimate, avoiding the costly mapping/unmaping
1340
+ * in the chunk list walk. The downside is that the number is a less reliable guide for
1341
+ * later on scan (free) calls on this heap for what actually is freeable.
1342
+ */
1343
+ if (heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
1344
+ chunk_cnt = heap->chunk_count - HEAP_SHRINK_STOP_LIMIT;
1345
+ page_cnt = chunk_cnt * PFN_UP(heap->chunk_size);
1346
+ }
1347
+
1348
+ dev_dbg(heap->kctx->kbdev->dev,
1349
+ "Reclaim count chunks/pages %u/%llu (estimated), heap_va: 0x%llX\n", chunk_cnt,
1350
+ page_cnt, heap->gpu_va);
1351
+
1352
+ return page_cnt;
1353
+}
1354
+
1355
+u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx)
1356
+{
1357
+ u64 page_cnt = 0;
1358
+ struct kbase_csf_tiler_heap *heap;
1359
+
1360
+ mutex_lock(&kctx->csf.tiler_heaps.lock);
1361
+
1362
+ list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link)
1363
+ page_cnt += count_unused_heap_pages(heap);
1364
+
1365
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
1366
+
1367
+ /* The count is surely not more than 4-G pages, but for logic flow limit it */
1368
+ if (WARN_ON(unlikely(page_cnt > U32_MAX)))
1369
+ return U32_MAX;
1370
+ else
1371
+ return (u32)page_cnt;
6111372 }