~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,7 +1,7 @@
1	1	// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2	2	/*
3	3	*
4		- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
	4	+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
5	5	*
6	6	* This program is free software and is provided to you under the terms of the
7	7	* GNU General Public License version 2 as published by the Free Software
..	..	@@ -23,10 +23,7 @@
23	23	#include "mali_kbase_csf_heap_context_alloc.h"
24	24
25	25	/* Size of one heap context structure, in bytes. */
26		-#define HEAP_CTX_SIZE ((size_t)32)
27		-
28		-/* Total size of the GPU memory region allocated for heap contexts, in bytes. */
29		-#define HEAP_CTX_REGION_SIZE (MAX_TILER_HEAPS * HEAP_CTX_SIZE)
	26	+#define HEAP_CTX_SIZE ((u32)32)
30	27
31	28	/**
32	29	* sub_alloc - Sub-allocate a heap context from a GPU memory region
..	..	@@ -38,8 +35,8 @@
38	35	static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
39	36	{
40	37	struct kbase_context *const kctx = ctx_alloc->kctx;
41		- int heap_nr = 0;
42		- size_t ctx_offset = 0;
	38	+ unsigned long heap_nr = 0;
	39	+ u32 ctx_offset = 0;
43	40	u64 heap_gpu_va = 0;
44	41	struct kbase_vmap_struct mapping;
45	42	void *ctx_ptr = NULL;
..	..	@@ -50,32 +47,67 @@
50	47	MAX_TILER_HEAPS);
51	48
52	49	if (unlikely(heap_nr >= MAX_TILER_HEAPS)) {
53		- dev_err(kctx->kbdev->dev,
54		- "No free tiler heap contexts in the pool\n");
	50	+ dev_dbg(kctx->kbdev->dev,
	51	+ "No free tiler heap contexts in the pool");
55	52	return 0;
56	53	}
57	54
58		- ctx_offset = heap_nr * HEAP_CTX_SIZE;
	55	+ ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned;
59	56	heap_gpu_va = ctx_alloc->gpu_va + ctx_offset;
60	57	ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va,
61		- HEAP_CTX_SIZE, KBASE_REG_CPU_WR, &mapping);
	58	+ ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping);
62	59
63	60	if (unlikely(!ctx_ptr)) {
64	61	dev_err(kctx->kbdev->dev,
65		- "Failed to map tiler heap context %d (0x%llX)\n",
	62	+ "Failed to map tiler heap context %lu (0x%llX)\n",
66	63	heap_nr, heap_gpu_va);
67	64	return 0;
68	65	}
69	66
70		- memset(ctx_ptr, 0, HEAP_CTX_SIZE);
	67	+ memset(ctx_ptr, 0, ctx_alloc->heap_context_size_aligned);
71	68	kbase_vunmap(ctx_ptr, &mapping);
72	69
73	70	bitmap_set(ctx_alloc->in_use, heap_nr, 1);
74	71
75		- dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %d (0x%llX)\n",
	72	+ dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n",
76	73	heap_nr, heap_gpu_va);
77	74
78	75	return heap_gpu_va;
	76	+}
	77	+
	78	+/**
	79	+ * evict_heap_context - Evict the data of heap context from GPU's L2 cache.
	80	+ *
	81	+ * @ctx_alloc: Pointer to the heap context allocator.
	82	+ * @heap_gpu_va: The GPU virtual address of a heap context structure to free.
	83	+ *
	84	+ * This function is called when memory for the heap context is freed. It uses the
	85	+ * FLUSH_PA_RANGE command to evict the data of heap context, so on older CSF GPUs
	86	+ * there is nothing done. The whole GPU cache is anyways expected to be flushed
	87	+ * on older GPUs when initial chunks of the heap are freed just before the memory
	88	+ * for heap context is freed.
	89	+ */
	90	+static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc,
	91	+ u64 const heap_gpu_va)
	92	+{
	93	+ struct kbase_context *const kctx = ctx_alloc->kctx;
	94	+ u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
	95	+ u32 offset_within_page = offset_in_bytes & ~PAGE_MASK;
	96	+ u32 page_index = offset_in_bytes >> PAGE_SHIFT;
	97	+ struct tagged_addr page =
	98	+ kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index];
	99	+ phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page;
	100	+
	101	+ lockdep_assert_held(&ctx_alloc->lock);
	102	+
	103	+ /* There is no need to take vm_lock here as the ctx_alloc region is protected
	104	+ * via a nonzero no_user_free_count. The region and the backing page can't
	105	+ * disappear whilst this function is executing. Flush type is passed as FLUSH_PT
	106	+ * to CLN+INV L2 only.
	107	+ */
	108	+ kbase_mmu_flush_pa_range(kctx->kbdev, kctx,
	109	+ heap_context_pa, ctx_alloc->heap_context_size_aligned,
	110	+ KBASE_MMU_OP_FLUSH_PT);
79	111	}
80	112
81	113	/**
..	..	@@ -88,7 +120,7 @@
88	120	u64 const heap_gpu_va)
89	121	{
90	122	struct kbase_context *const kctx = ctx_alloc->kctx;
91		- u64 ctx_offset = 0;
	123	+ u32 ctx_offset = 0;
92	124	unsigned int heap_nr = 0;
93	125
94	126	lockdep_assert_held(&ctx_alloc->lock);
..	..	@@ -99,13 +131,15 @@
99	131	if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va))
100	132	return;
101	133
102		- ctx_offset = heap_gpu_va - ctx_alloc->gpu_va;
	134	+ ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
103	135
104		- if (WARN_ON(ctx_offset >= HEAP_CTX_REGION_SIZE) \|\|
105		- WARN_ON(ctx_offset % HEAP_CTX_SIZE))
	136	+ if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) \|\|
	137	+ WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned))
106	138	return;
107	139
108		- heap_nr = ctx_offset / HEAP_CTX_SIZE;
	140	+ evict_heap_context(ctx_alloc, heap_gpu_va);
	141	+
	142	+ heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned;
109	143	dev_dbg(kctx->kbdev->dev,
110	144	"Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va);
111	145
..	..	@@ -116,12 +150,17 @@
116	150	struct kbase_csf_heap_context_allocator *const ctx_alloc,
117	151	struct kbase_context *const kctx)
118	152	{
	153	+ const u32 gpu_cache_line_size =
	154	+ (1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
	155	+
119	156	/* We cannot pre-allocate GPU memory here because the
120	157	* custom VA zone may not have been created yet.
121	158	*/
122	159	ctx_alloc->kctx = kctx;
123	160	ctx_alloc->region = NULL;
124	161	ctx_alloc->gpu_va = 0;
	162	+ ctx_alloc->heap_context_size_aligned =
	163	+ (HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1);
125	164
126	165	mutex_init(&ctx_alloc->lock);
127	166	bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS);
..	..	@@ -142,7 +181,9 @@
142	181
143	182	if (ctx_alloc->region) {
144	183	kbase_gpu_vm_lock(kctx);
145		- ctx_alloc->region->flags &= ~KBASE_REG_NO_USER_FREE;
	184	+ WARN_ON(!kbase_va_region_is_no_user_free(ctx_alloc->region));
	185	+
	186	+ kbase_va_region_no_user_free_dec(ctx_alloc->region);
146	187	kbase_mem_free_region(kctx, ctx_alloc->region);
147	188	kbase_gpu_vm_unlock(kctx);
148	189	}
..	..	@@ -154,14 +195,15 @@
154	195	struct kbase_csf_heap_context_allocator *const ctx_alloc)
155	196	{
156	197	struct kbase_context *const kctx = ctx_alloc->kctx;
157		- u64 flags = BASE_MEM_PROT_GPU_RD \| BASE_MEM_PROT_GPU_WR \|
158		- BASE_MEM_PROT_CPU_WR \| BASEP_MEM_NO_USER_FREE;
159		- u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE);
	198	+ u64 flags = BASE_MEM_PROT_GPU_RD \| BASE_MEM_PROT_GPU_WR \| BASE_MEM_PROT_CPU_WR \|
	199	+ BASEP_MEM_NO_USER_FREE \| BASE_MEM_PROT_CPU_RD;
	200	+ u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned);
160	201	u64 heap_gpu_va = 0;
161	202
162		-#ifdef CONFIG_MALI_VECTOR_DUMP
163		- flags \|= BASE_MEM_PROT_CPU_RD;
164		-#endif
	203	+ /* Calls to this function are inherently asynchronous, with respect to
	204	+ * MMU operations.
	205	+ */
	206	+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
165	207
166	208	mutex_lock(&ctx_alloc->lock);
167	209
..	..	@@ -169,16 +211,15 @@
169	211	* allocate it.
170	212	*/
171	213	if (!ctx_alloc->region) {
172		- ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages,
173		- 0, &flags, &ctx_alloc->gpu_va);
	214	+ ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
	215	+ &ctx_alloc->gpu_va, mmu_sync_info);
174	216	}
175	217
176	218	/* If the pool still isn't allocated then an error occurred. */
177		- if (unlikely(!ctx_alloc->region)) {
178		- dev_err(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts\n");
179		- } else {
	219	+ if (unlikely(!ctx_alloc->region))
	220	+ dev_dbg(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts");
	221	+ else
180	222	heap_gpu_va = sub_alloc(ctx_alloc);
181		- }
182	223
183	224	mutex_unlock(&ctx_alloc->lock);
184	225