hc
2023-12-06 08f87f769b595151be1afeff53e144f543faa614
kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
....@@ -1,7 +1,7 @@
11 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
22 /*
33 *
4
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
4
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
55 *
66 * This program is free software and is provided to you under the terms of the
77 * GNU General Public License version 2 as published by the Free Software
....@@ -23,10 +23,7 @@
2323 #include "mali_kbase_csf_heap_context_alloc.h"
2424
2525 /* Size of one heap context structure, in bytes. */
26
-#define HEAP_CTX_SIZE ((size_t)32)
27
-
28
-/* Total size of the GPU memory region allocated for heap contexts, in bytes. */
29
-#define HEAP_CTX_REGION_SIZE (MAX_TILER_HEAPS * HEAP_CTX_SIZE)
26
+#define HEAP_CTX_SIZE ((u32)32)
3027
3128 /**
3229 * sub_alloc - Sub-allocate a heap context from a GPU memory region
....@@ -38,8 +35,8 @@
3835 static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
3936 {
4037 struct kbase_context *const kctx = ctx_alloc->kctx;
41
- int heap_nr = 0;
42
- size_t ctx_offset = 0;
38
+ unsigned long heap_nr = 0;
39
+ u32 ctx_offset = 0;
4340 u64 heap_gpu_va = 0;
4441 struct kbase_vmap_struct mapping;
4542 void *ctx_ptr = NULL;
....@@ -50,32 +47,67 @@
5047 MAX_TILER_HEAPS);
5148
5249 if (unlikely(heap_nr >= MAX_TILER_HEAPS)) {
53
- dev_err(kctx->kbdev->dev,
54
- "No free tiler heap contexts in the pool\n");
50
+ dev_dbg(kctx->kbdev->dev,
51
+ "No free tiler heap contexts in the pool");
5552 return 0;
5653 }
5754
58
- ctx_offset = heap_nr * HEAP_CTX_SIZE;
55
+ ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned;
5956 heap_gpu_va = ctx_alloc->gpu_va + ctx_offset;
6057 ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va,
61
- HEAP_CTX_SIZE, KBASE_REG_CPU_WR, &mapping);
58
+ ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping);
6259
6360 if (unlikely(!ctx_ptr)) {
6461 dev_err(kctx->kbdev->dev,
65
- "Failed to map tiler heap context %d (0x%llX)\n",
62
+ "Failed to map tiler heap context %lu (0x%llX)\n",
6663 heap_nr, heap_gpu_va);
6764 return 0;
6865 }
6966
70
- memset(ctx_ptr, 0, HEAP_CTX_SIZE);
67
+ memset(ctx_ptr, 0, ctx_alloc->heap_context_size_aligned);
7168 kbase_vunmap(ctx_ptr, &mapping);
7269
7370 bitmap_set(ctx_alloc->in_use, heap_nr, 1);
7471
75
- dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %d (0x%llX)\n",
72
+ dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n",
7673 heap_nr, heap_gpu_va);
7774
7875 return heap_gpu_va;
76
+}
77
+
78
+/**
79
+ * evict_heap_context - Evict the data of heap context from GPU's L2 cache.
80
+ *
81
+ * @ctx_alloc: Pointer to the heap context allocator.
82
+ * @heap_gpu_va: The GPU virtual address of a heap context structure to free.
83
+ *
84
+ * This function is called when memory for the heap context is freed. It uses the
85
+ * FLUSH_PA_RANGE command to evict the data of heap context, so on older CSF GPUs
86
+ * there is nothing done. The whole GPU cache is anyways expected to be flushed
87
+ * on older GPUs when initial chunks of the heap are freed just before the memory
88
+ * for heap context is freed.
89
+ */
90
+static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc,
91
+ u64 const heap_gpu_va)
92
+{
93
+ struct kbase_context *const kctx = ctx_alloc->kctx;
94
+ u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
95
+ u32 offset_within_page = offset_in_bytes & ~PAGE_MASK;
96
+ u32 page_index = offset_in_bytes >> PAGE_SHIFT;
97
+ struct tagged_addr page =
98
+ kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index];
99
+ phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page;
100
+
101
+ lockdep_assert_held(&ctx_alloc->lock);
102
+
103
+ /* There is no need to take vm_lock here as the ctx_alloc region is protected
104
+ * via a nonzero no_user_free_count. The region and the backing page can't
105
+ * disappear whilst this function is executing. Flush type is passed as FLUSH_PT
106
+ * to CLN+INV L2 only.
107
+ */
108
+ kbase_mmu_flush_pa_range(kctx->kbdev, kctx,
109
+ heap_context_pa, ctx_alloc->heap_context_size_aligned,
110
+ KBASE_MMU_OP_FLUSH_PT);
79111 }
80112
81113 /**
....@@ -88,7 +120,7 @@
88120 u64 const heap_gpu_va)
89121 {
90122 struct kbase_context *const kctx = ctx_alloc->kctx;
91
- u64 ctx_offset = 0;
123
+ u32 ctx_offset = 0;
92124 unsigned int heap_nr = 0;
93125
94126 lockdep_assert_held(&ctx_alloc->lock);
....@@ -99,13 +131,15 @@
99131 if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va))
100132 return;
101133
102
- ctx_offset = heap_gpu_va - ctx_alloc->gpu_va;
134
+ ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
103135
104
- if (WARN_ON(ctx_offset >= HEAP_CTX_REGION_SIZE) ||
105
- WARN_ON(ctx_offset % HEAP_CTX_SIZE))
136
+ if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) ||
137
+ WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned))
106138 return;
107139
108
- heap_nr = ctx_offset / HEAP_CTX_SIZE;
140
+ evict_heap_context(ctx_alloc, heap_gpu_va);
141
+
142
+ heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned;
109143 dev_dbg(kctx->kbdev->dev,
110144 "Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va);
111145
....@@ -116,12 +150,17 @@
116150 struct kbase_csf_heap_context_allocator *const ctx_alloc,
117151 struct kbase_context *const kctx)
118152 {
153
+ const u32 gpu_cache_line_size =
154
+ (1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
155
+
119156 /* We cannot pre-allocate GPU memory here because the
120157 * custom VA zone may not have been created yet.
121158 */
122159 ctx_alloc->kctx = kctx;
123160 ctx_alloc->region = NULL;
124161 ctx_alloc->gpu_va = 0;
162
+ ctx_alloc->heap_context_size_aligned =
163
+ (HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1);
125164
126165 mutex_init(&ctx_alloc->lock);
127166 bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS);
....@@ -142,7 +181,9 @@
142181
143182 if (ctx_alloc->region) {
144183 kbase_gpu_vm_lock(kctx);
145
- ctx_alloc->region->flags &= ~KBASE_REG_NO_USER_FREE;
184
+ WARN_ON(!kbase_va_region_is_no_user_free(ctx_alloc->region));
185
+
186
+ kbase_va_region_no_user_free_dec(ctx_alloc->region);
146187 kbase_mem_free_region(kctx, ctx_alloc->region);
147188 kbase_gpu_vm_unlock(kctx);
148189 }
....@@ -154,14 +195,15 @@
154195 struct kbase_csf_heap_context_allocator *const ctx_alloc)
155196 {
156197 struct kbase_context *const kctx = ctx_alloc->kctx;
157
- u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
158
- BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE;
159
- u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE);
198
+ u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
199
+ BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD;
200
+ u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned);
160201 u64 heap_gpu_va = 0;
161202
162
-#ifdef CONFIG_MALI_VECTOR_DUMP
163
- flags |= BASE_MEM_PROT_CPU_RD;
164
-#endif
203
+ /* Calls to this function are inherently asynchronous, with respect to
204
+ * MMU operations.
205
+ */
206
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
165207
166208 mutex_lock(&ctx_alloc->lock);
167209
....@@ -169,16 +211,15 @@
169211 * allocate it.
170212 */
171213 if (!ctx_alloc->region) {
172
- ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages,
173
- 0, &flags, &ctx_alloc->gpu_va);
214
+ ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
215
+ &ctx_alloc->gpu_va, mmu_sync_info);
174216 }
175217
176218 /* If the pool still isn't allocated then an error occurred. */
177
- if (unlikely(!ctx_alloc->region)) {
178
- dev_err(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts\n");
179
- } else {
219
+ if (unlikely(!ctx_alloc->region))
220
+ dev_dbg(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts");
221
+ else
180222 heap_gpu_va = sub_alloc(ctx_alloc);
181
- }
182223
183224 mutex_unlock(&ctx_alloc->lock);
184225