~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,7 +1,7 @@
1	1	// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2	2	/*
3	3	*
4		- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
	4	+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
5	5	*
6	6	* This program is free software and is provided to you under the terms of the
7	7	* GNU General Public License version 2 as published by the Free Software
..	..	@@ -27,34 +27,23 @@
27	27	#include <linux/export.h>
28	28	#include <linux/priority_control_manager.h>
29	29	#include <linux/shmem_fs.h>
30		-#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
	30	+#include <csf/mali_kbase_csf_registers.h>
31	31	#include "mali_kbase_csf_tiler_heap.h"
32	32	#include <mmu/mali_kbase_mmu.h>
33	33	#include "mali_kbase_csf_timeout.h"
34	34	#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
	35	+#include <mali_kbase_hwaccess_time.h>
	36	+#include "mali_kbase_csf_event.h"
	37	+#include <tl/mali_kbase_tracepoints.h>
	38	+#include "mali_kbase_csf_mcu_shared_reg.h"
35	39
36	40	#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK \| CS_REQ_FATAL_MASK)
37	41	#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK \| CS_ACK_FATAL_MASK)
38		-#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
39	42
40		-/**
41		- * struct kbase_csf_event - CSF event callback.
42		- *
43		- * This structure belongs to the list of events which is part of a Kbase
44		- * context, and describes a callback function with a custom parameter to pass
45		- * to it when a CSF event is signalled.
46		- *
47		- * @link: Link to the rest of the list.
48		- * @kctx: Pointer to the Kbase context this event belongs to.
49		- * @callback: Callback function to call when a CSF event is signalled.
50		- * @param: Parameter to pass to the callback function.
51		- */
52		-struct kbase_csf_event {
53		- struct list_head link;
54		- struct kbase_context *kctx;
55		- kbase_csf_event_callback *callback;
56		- void *param;
57		-};
	43	+#define CS_RING_BUFFER_MAX_SIZE ((uint32_t)(1 << 31)) /* 2GiB */
	44	+#define CS_RING_BUFFER_MIN_SIZE ((uint32_t)4096)
	45	+
	46	+#define PROTM_ALLOC_MAX_RETRIES ((u8)5)
58	47
59	48	const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = {
60	49	KBASE_QUEUE_GROUP_PRIORITY_HIGH,
..	..	@@ -68,6 +57,55 @@
68	57	BASE_QUEUE_GROUP_PRIORITY_MEDIUM,
69	58	BASE_QUEUE_GROUP_PRIORITY_LOW
70	59	};
	60	+
	61	+/*
	62	+ * struct irq_idle_and_protm_track - Object that tracks the idle and protected mode
	63	+ * request information in an interrupt case across
	64	+ * groups.
	65	+ *
	66	+ * @protm_grp: Possibly schedulable group that requested protected mode in the interrupt.
	67	+ * If NULL, no such case observed in the tracked interrupt case.
	68	+ * @idle_seq: The highest priority group that notified idle. If no such instance in the
	69	+ * interrupt case, marked with the largest field value: U32_MAX.
	70	+ * @idle_slot: The slot number if @p idle_seq is valid in the given tracking case.
	71	+ */
	72	+struct irq_idle_and_protm_track {
	73	+ struct kbase_queue_group *protm_grp;
	74	+ u32 idle_seq;
	75	+ s8 idle_slot;
	76	+};
	77	+
	78	+/**
	79	+ * kbasep_ctx_user_reg_page_mapping_term() - Terminate resources for USER Register Page.
	80	+ *
	81	+ * @kctx: Pointer to the kbase context
	82	+ */
	83	+static void kbasep_ctx_user_reg_page_mapping_term(struct kbase_context *kctx)
	84	+{
	85	+ struct kbase_device *kbdev = kctx->kbdev;
	86	+
	87	+ if (unlikely(kctx->csf.user_reg.vma))
	88	+ dev_err(kbdev->dev, "VMA for USER Register page exist on termination of ctx %d_%d",
	89	+ kctx->tgid, kctx->id);
	90	+ if (WARN_ON_ONCE(!list_empty(&kctx->csf.user_reg.link)))
	91	+ list_del_init(&kctx->csf.user_reg.link);
	92	+}
	93	+
	94	+/**
	95	+ * kbasep_ctx_user_reg_page_mapping_init() - Initialize resources for USER Register Page.
	96	+ *
	97	+ * @kctx: Pointer to the kbase context
	98	+ *
	99	+ * @return: 0 on success.
	100	+ */
	101	+static int kbasep_ctx_user_reg_page_mapping_init(struct kbase_context *kctx)
	102	+{
	103	+ INIT_LIST_HEAD(&kctx->csf.user_reg.link);
	104	+ kctx->csf.user_reg.vma = NULL;
	105	+ kctx->csf.user_reg.file_offset = 0;
	106	+
	107	+ return 0;
	108	+}
71	109
72	110	static void put_user_pages_mmap_handle(struct kbase_context *kctx,
73	111	struct kbase_queue *queue)
..	..	@@ -129,21 +167,6 @@
129	167	return 0;
130	168	}
131	169
132		-static void gpu_munmap_user_io_pages(struct kbase_context *kctx,
133		- struct kbase_va_region *reg)
134		-{
135		- size_t num_pages = 2;
136		-
137		- kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
138		- reg->start_pfn, num_pages, MCU_AS_NR);
139		-
140		- WARN_ON(reg->flags & KBASE_REG_FREE);
141		-
142		- mutex_lock(&kctx->kbdev->csf.reg_lock);
143		- kbase_remove_va_region(reg);
144		- mutex_unlock(&kctx->kbdev->csf.reg_lock);
145		-}
146		-
147	170	static void init_user_io_pages(struct kbase_queue *queue)
148	171	{
149	172	u32 input_addr = (u32 )(queue->user_io_addr);
..	..	@@ -161,80 +184,15 @@
161	184	output_addr[CS_ACTIVE/4] = 0;
162	185	}
163	186
164		-/* Map the input/output pages in the shared interface segment of MCU firmware
165		- * address space.
166		- */
167		-static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
168		- struct tagged_addr phys, struct kbase_va_region reg)
169		-{
170		- unsigned long mem_flags = KBASE_REG_GPU_RD;
171		- const size_t num_pages = 2;
172		- int ret;
173		-
174		-#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) \|\| \
175		- ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
176		- (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
177		- mem_flags \|=
178		- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
179		-#else
180		- if (kbdev->system_coherency == COHERENCY_NONE) {
181		- mem_flags \|=
182		- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
183		- } else {
184		- mem_flags \|= KBASE_REG_SHARE_BOTH \|
185		- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
186		- }
187		-#endif
188		-
189		- mutex_lock(&kbdev->csf.reg_lock);
190		- ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1);
191		- reg->flags &= ~KBASE_REG_FREE;
192		- mutex_unlock(&kbdev->csf.reg_lock);
193		-
194		- if (ret)
195		- return ret;
196		-
197		- /* Map input page */
198		- ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
199		- reg->start_pfn, &phys[0],
200		- 1, mem_flags, MCU_AS_NR,
201		- KBASE_MEM_GROUP_CSF_IO);
202		- if (ret)
203		- goto bad_insert;
204		-
205		- /* Map output page, it needs rw access */
206		- mem_flags \|= KBASE_REG_GPU_WR;
207		- ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
208		- reg->start_pfn + 1, &phys[1],
209		- 1, mem_flags, MCU_AS_NR,
210		- KBASE_MEM_GROUP_CSF_IO);
211		- if (ret)
212		- goto bad_insert_output_page;
213		-
214		- return 0;
215		-
216		-bad_insert_output_page:
217		- kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu,
218		- reg->start_pfn, 1, MCU_AS_NR);
219		-bad_insert:
220		- mutex_lock(&kbdev->csf.reg_lock);
221		- kbase_remove_va_region(reg);
222		- mutex_unlock(&kbdev->csf.reg_lock);
223		-
224		- return ret;
225		-}
226		-
227	187	static void kernel_unmap_user_io_pages(struct kbase_context *kctx,
228	188	struct kbase_queue *queue)
229	189	{
230		- const size_t num_pages = 2;
231		-
232	190	kbase_gpu_vm_lock(kctx);
233	191
234	192	vunmap(queue->user_io_addr);
235	193
236		- WARN_ON(num_pages > atomic_read(&kctx->permanent_mapped_pages));
237		- atomic_sub(num_pages, &kctx->permanent_mapped_pages);
	194	+ WARN_ON(atomic_read(&kctx->permanent_mapped_pages) < KBASEP_NUM_CS_USER_IO_PAGES);
	195	+ atomic_sub(KBASEP_NUM_CS_USER_IO_PAGES, &kctx->permanent_mapped_pages);
238	196
239	197	kbase_gpu_vm_unlock(kctx);
240	198	}
..	..	@@ -244,6 +202,8 @@
244	202	{
245	203	struct page *page_list[2];
246	204	pgprot_t cpu_map_prot;
	205	+ unsigned long flags;
	206	+ char *user_io_addr;
247	207	int ret = 0;
248	208	size_t i;
249	209
..	..	@@ -258,26 +218,29 @@
258	218	/* The pages are mapped to Userspace also, so use the same mapping
259	219	* attributes as used inside the CPU page fault handler.
260	220	*/
261		-#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) \|\| \
262		- ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
263		- (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
264		- cpu_map_prot = pgprot_device(PAGE_KERNEL);
265		-#else
266	221	if (kctx->kbdev->system_coherency == COHERENCY_NONE)
267	222	cpu_map_prot = pgprot_writecombine(PAGE_KERNEL);
268	223	else
269	224	cpu_map_prot = PAGE_KERNEL;
270		-#endif
271	225
272	226	for (i = 0; i < ARRAY_SIZE(page_list); i++)
273	227	page_list[i] = as_page(queue->phys[i]);
274	228
275		- queue->user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot);
	229	+ user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot);
276	230
277		- if (!queue->user_io_addr)
	231	+ if (!user_io_addr) {
	232	+ dev_err(kctx->kbdev->dev,
	233	+ "%s(): user_io_addr is NULL, queue: %p",
	234	+ __func__,
	235	+ queue);
278	236	ret = -ENOMEM;
279		- else
	237	+ } else {
280	238	atomic_add(ARRAY_SIZE(page_list), &kctx->permanent_mapped_pages);
	239	+ }
	240	+
	241	+ kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags);
	242	+ queue->user_io_addr = user_io_addr;
	243	+ kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
281	244
282	245	unlock:
283	246	kbase_gpu_vm_unlock(kctx);
..	..	@@ -310,70 +273,62 @@
310	273	* If an explicit or implicit unbind was missed by the userspace then the
311	274	* mapping will persist. On process exit kernel itself will remove the mapping.
312	275	*/
313		-static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
314		- struct kbase_queue *queue)
	276	+void kbase_csf_free_command_stream_user_pages(struct kbase_context kctx, struct kbase_queue queue)
315	277	{
316		- const size_t num_pages = 2;
317		-
318		- gpu_munmap_user_io_pages(kctx, queue->reg);
319	278	kernel_unmap_user_io_pages(kctx, queue);
320	279
321	280	kbase_mem_pool_free_pages(
322	281	&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
323		- num_pages, queue->phys, true, false);
	282	+ KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, true, false);
	283	+ kbase_process_page_usage_dec(kctx, KBASEP_NUM_CS_USER_IO_PAGES);
324	284
325		- kfree(queue->reg);
326		- queue->reg = NULL;
	285	+ /* The user_io_gpu_va should have been unmapped inside the scheduler */
	286	+ WARN_ONCE(queue->user_io_gpu_va, "Userio pages appears still have mapping");
327	287
328	288	/* If the queue has already been terminated by userspace
329	289	* then the ref count for queue object will drop to 0 here.
330	290	*/
331	291	release_queue(queue);
332	292	}
	293	+KBASE_EXPORT_TEST_API(kbase_csf_free_command_stream_user_pages);
333	294
334		-int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
335		- struct kbase_queue *queue)
	295	+int kbase_csf_alloc_command_stream_user_pages(struct kbase_context kctx, struct kbase_queue queue)
336	296	{
337	297	struct kbase_device *kbdev = kctx->kbdev;
338		- struct kbase_va_region *reg;
339		- const size_t num_pages = 2;
340	298	int ret;
341	299
342	300	lockdep_assert_held(&kctx->csf.lock);
343	301
344		- reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
345		- num_pages, KBASE_REG_ZONE_MCU_SHARED);
346		- if (!reg)
	302	+ ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
	303	+ KBASEP_NUM_CS_USER_IO_PAGES,
	304	+ queue->phys, false, kctx->task);
	305	+ if (ret != KBASEP_NUM_CS_USER_IO_PAGES) {
	306	+ /* Marking both the phys to zero for indicating there is no phys allocated */
	307	+ queue->phys[0].tagged_addr = 0;
	308	+ queue->phys[1].tagged_addr = 0;
347	309	return -ENOMEM;
348		-
349		- ret = kbase_mem_pool_alloc_pages(
350		- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
351		- num_pages, queue->phys, false);
352		-
353		- if (ret != num_pages)
354		- goto phys_alloc_failed;
	310	+ }
355	311
356	312	ret = kernel_map_user_io_pages(kctx, queue);
357	313	if (ret)
358	314	goto kernel_map_failed;
359	315
	316	+ kbase_process_page_usage_inc(kctx, KBASEP_NUM_CS_USER_IO_PAGES);
360	317	init_user_io_pages(queue);
361	318
362		- ret = gpu_mmap_user_io_pages(kctx->kbdev, queue->phys, reg);
363		- if (ret)
364		- goto gpu_mmap_failed;
365		-
366		- queue->reg = reg;
	319	+ /* user_io_gpu_va is only mapped when scheduler decides to put the queue
	320	+ * on slot at runtime. Initialize it to 0, signalling no mapping.
	321	+ */
	322	+ queue->user_io_gpu_va = 0;
367	323
368	324	mutex_lock(&kbdev->csf.reg_lock);
369		- if (kbdev->csf.db_file_offsets >
370		- (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
	325	+ if (kbdev->csf.db_file_offsets > (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
371	326	kbdev->csf.db_file_offsets = 0;
372	327
373	328	queue->db_file_offset = kbdev->csf.db_file_offsets;
374	329	kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES;
375		-
376		- WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
	330	+ WARN(kbase_refcount_read(&queue->refcount) != 1,
	331	+ "Incorrect refcounting for queue object\n");
377	332	/* This is the second reference taken on the queue object and
378	333	* would be dropped only when the IO mapping is removed either
379	334	* explicitly by userspace or implicitly by kernel on process exit.
..	..	@@ -384,19 +339,16 @@
384	339
385	340	return 0;
386	341
387		-gpu_mmap_failed:
388		- kernel_unmap_user_io_pages(kctx, queue);
389		-
390	342	kernel_map_failed:
391		- kbase_mem_pool_free_pages(
392		- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
393		- num_pages, queue->phys, false, false);
	343	+ kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
	344	+ KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false, false);
	345	+ /* Marking both the phys to zero for indicating there is no phys allocated */
	346	+ queue->phys[0].tagged_addr = 0;
	347	+ queue->phys[1].tagged_addr = 0;
394	348
395		-phys_alloc_failed:
396		- kfree(reg);
397		-
398		- return -ENOMEM;
	349	+ return ret;
399	350	}
	351	+KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages);
400	352
401	353	static struct kbase_queue_group find_queue_group(struct kbase_context kctx,
402	354	u8 group_handle)
..	..	@@ -413,6 +365,12 @@
413	365
414	366	return NULL;
415	367	}
	368	+
	369	+struct kbase_queue_group kbase_csf_find_queue_group(struct kbase_context kctx, u8 group_handle)
	370	+{
	371	+ return find_queue_group(kctx, group_handle);
	372	+}
	373	+KBASE_EXPORT_TEST_API(kbase_csf_find_queue_group);
416	374
417	375	int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx,
418	376	u8 group_handle)
..	..	@@ -442,25 +400,37 @@
442	400
443	401	static void get_queue(struct kbase_queue *queue)
444	402	{
445		- WARN_ON(!atomic_inc_not_zero(&queue->refcount));
	403	+ WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount));
446	404	}
447	405
448	406	static void release_queue(struct kbase_queue *queue)
449	407	{
450	408	lockdep_assert_held(&queue->kctx->csf.lock);
451		-
452		- WARN_ON(atomic_read(&queue->refcount) <= 0);
453		-
454		- if (atomic_dec_and_test(&queue->refcount)) {
	409	+ if (kbase_refcount_dec_and_test(&queue->refcount)) {
455	410	/* The queue can't still be on the per context list. */
456	411	WARN_ON(!list_empty(&queue->link));
457	412	WARN_ON(queue->group);
	413	+ dev_dbg(queue->kctx->kbdev->dev,
	414	+ "Remove any pending command queue fatal from ctx %d_%d",
	415	+ queue->kctx->tgid, queue->kctx->id);
	416	+ kbase_csf_event_remove_error(queue->kctx, &queue->error);
	417	+
	418	+ /* After this the Userspace would be able to free the
	419	+ * memory for GPU queue. In case the Userspace missed
	420	+ * terminating the queue, the cleanup will happen on
	421	+ * context termination where tear down of region tracker
	422	+ * would free up the GPU queue memory.
	423	+ */
	424	+ kbase_gpu_vm_lock(queue->kctx);
	425	+ kbase_va_region_no_user_free_dec(queue->queue_reg);
	426	+ kbase_gpu_vm_unlock(queue->kctx);
	427	+
458	428	kfree(queue);
459	429	}
460	430	}
461	431
462	432	static void oom_event_worker(struct work_struct *data);
463		-static void fatal_event_worker(struct work_struct *data);
	433	+static void cs_error_worker(struct work_struct *data);
464	434
465	435	/* Between reg and reg_ex, one and only one must be null */
466	436	static int csf_queue_register_internal(struct kbase_context *kctx,
..	..	@@ -475,7 +445,7 @@
475	445
476	446	/* Only one pointer expected, otherwise coding error */
477	447	if ((reg == NULL && reg_ex == NULL) \|\| (reg && reg_ex)) {
478		- dev_err(kctx->kbdev->dev,
	448	+ dev_dbg(kctx->kbdev->dev,
479	449	"Error, one and only one param-ptr expected!");
480	450	return -EINVAL;
481	451	}
..	..	@@ -508,7 +478,8 @@
508	478	region = kbase_region_tracker_find_region_enclosing_address(kctx,
509	479	queue_addr);
510	480
511		- if (kbase_is_region_invalid_or_free(region)) {
	481	+ if (kbase_is_region_invalid_or_free(region) \|\| kbase_is_region_shrinkable(region) \|\|
	482	+ region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
512	483	ret = -ENOENT;
513	484	goto out_unlock_vm;
514	485	}
..	..	@@ -525,24 +496,24 @@
525	496	if (reg_ex && reg_ex->ex_buffer_size) {
526	497	int buf_pages = (reg_ex->ex_buffer_size +
527	498	(1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT;
	499	+ struct kbase_va_region *region_ex =
	500	+ kbase_region_tracker_find_region_enclosing_address(kctx,
	501	+ reg_ex->ex_buffer_base);
528	502
529		- region = kbase_region_tracker_find_region_enclosing_address(
530		- kctx, reg_ex->ex_buffer_base);
531		- if (kbase_is_region_invalid_or_free(region)) {
	503	+ if (kbase_is_region_invalid_or_free(region_ex)) {
532	504	ret = -ENOENT;
533	505	goto out_unlock_vm;
534	506	}
535	507
536		- if (buf_pages > (region->nr_pages -
537		- ((reg_ex->ex_buffer_base >> PAGE_SHIFT) -
538		- region->start_pfn))) {
	508	+ if (buf_pages > (region_ex->nr_pages -
	509	+ ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - region_ex->start_pfn))) {
539	510	ret = -EINVAL;
540	511	goto out_unlock_vm;
541	512	}
542	513
543		- region = kbase_region_tracker_find_region_enclosing_address(
544		- kctx, reg_ex->ex_offset_var_addr);
545		- if (kbase_is_region_invalid_or_free(region)) {
	514	+ region_ex = kbase_region_tracker_find_region_enclosing_address(
	515	+ kctx, reg_ex->ex_offset_var_addr);
	516	+ if (kbase_is_region_invalid_or_free(region_ex)) {
546	517	ret = -ENOENT;
547	518	goto out_unlock_vm;
548	519	}
..	..	@@ -557,13 +528,16 @@
557	528
558	529	queue->kctx = kctx;
559	530	queue->base_addr = queue_addr;
	531	+
560	532	queue->queue_reg = region;
	533	+ kbase_va_region_no_user_free_inc(region);
	534	+
561	535	queue->size = (queue_size << PAGE_SHIFT);
562	536	queue->csi_index = KBASEP_IF_NR_INVALID;
563	537	queue->enabled = false;
564	538
565	539	queue->priority = reg->priority;
566		- atomic_set(&queue->refcount, 1);
	540	+ kbase_refcount_set(&queue->refcount, 1);
567	541
568	542	queue->group = NULL;
569	543	queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
..	..	@@ -574,16 +548,24 @@
574	548	queue->sync_ptr = 0;
575	549	queue->sync_value = 0;
576	550
	551	+#if IS_ENABLED(CONFIG_DEBUG_FS)
	552	+ queue->saved_cmd_ptr = 0;
	553	+#endif
	554	+
577	555	queue->sb_status = 0;
578	556	queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
	557	+
	558	+ atomic_set(&queue->pending, 0);
579	559
580	560	INIT_LIST_HEAD(&queue->link);
581	561	INIT_LIST_HEAD(&queue->error.link);
582	562	INIT_WORK(&queue->oom_event_work, oom_event_worker);
583		- INIT_WORK(&queue->fatal_event_work, fatal_event_worker);
	563	+ INIT_WORK(&queue->cs_error_work, cs_error_worker);
584	564	list_add(&queue->link, &kctx->csf.queue_list);
585	565
586		- region->flags \|= KBASE_REG_NO_USER_FREE;
	566	+ queue->extract_ofs = 0;
	567	+
	568	+ region->user_data = queue;
587	569
588	570	/* Initialize the cs_trace configuration parameters, When buffer_size
589	571	* is 0, trace is disabled. Here we only update the fields when
..	..	@@ -612,6 +594,13 @@
612	594	int kbase_csf_queue_register(struct kbase_context *kctx,
613	595	struct kbase_ioctl_cs_queue_register *reg)
614	596	{
	597	+ /* Validate the ring buffer configuration parameters */
	598	+ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE \|\|
	599	+ reg->buffer_size > CS_RING_BUFFER_MAX_SIZE \|\|
	600	+ reg->buffer_size & (reg->buffer_size - 1) \|\| !reg->buffer_gpu_addr \|\|
	601	+ reg->buffer_gpu_addr & ~PAGE_MASK)
	602	+ return -EINVAL;
	603	+
615	604	return csf_queue_register_internal(kctx, reg, NULL);
616	605	}
617	606
..	..	@@ -630,14 +619,21 @@
630	619	if (glb_version < kbase_csf_interface_version(1, 1, 0))
631	620	return -EINVAL;
632	621
633		- /* Validate the cs_trace configuration parameters */
634		- if (reg->ex_buffer_size &&
635		- ((reg->ex_event_size > max_size) \|\|
636		- (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) \|\|
637		- (reg->ex_buffer_size < min_buf_size)))
638		- return -EINVAL;
	622	+ /* Validate the ring buffer configuration parameters */
	623	+ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE \|\|
	624	+ reg->buffer_size > CS_RING_BUFFER_MAX_SIZE \|\|
	625	+ reg->buffer_size & (reg->buffer_size - 1) \|\| !reg->buffer_gpu_addr \|\|
	626	+ reg->buffer_gpu_addr & ~PAGE_MASK)
	627	+ return -EINVAL;
639	628
640		- return csf_queue_register_internal(kctx, NULL, reg);
	629	+ /* Validate the cs_trace configuration parameters */
	630	+ if (reg->ex_buffer_size &&
	631	+ ((reg->ex_event_size > max_size) \|\|
	632	+ (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) \|\|
	633	+ (reg->ex_buffer_size < min_buf_size)))
	634	+ return -EINVAL;
	635	+
	636	+ return csf_queue_register_internal(kctx, NULL, reg);
641	637	}
642	638
643	639	static void unbind_queue(struct kbase_context *kctx,
..	..	@@ -664,8 +660,6 @@
664	660	queue = find_queue(kctx, term->buffer_gpu_addr);
665	661
666	662	if (queue) {
667		- unsigned long flags;
668		-
669	663	/* As the GPU queue has been terminated by the
670	664	* user space, undo the actions that were performed when the
671	665	* queue was registered i.e. remove the queue from the per
..	..	@@ -678,23 +672,9 @@
678	672	unbind_queue(kctx, queue);
679	673
680	674	kbase_gpu_vm_lock(kctx);
681		- if (!WARN_ON(!queue->queue_reg)) {
682		- /* After this the Userspace would be able to free the
683		- * memory for GPU queue. In case the Userspace missed
684		- * terminating the queue, the cleanup will happen on
685		- * context termination where teardown of region tracker
686		- * would free up the GPU queue memory.
687		- */
688		- queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE;
689		- }
	675	+ if (!WARN_ON(!queue->queue_reg))
	676	+ queue->queue_reg->user_data = NULL;
690	677	kbase_gpu_vm_unlock(kctx);
691		-
692		- spin_lock_irqsave(&kctx->csf.event_lock, flags);
693		- dev_dbg(kctx->kbdev->dev,
694		- "Remove any pending command queue fatal from context %pK\n",
695		- (void *)kctx);
696		- list_del_init(&queue->error.link);
697		- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
698	678
699	679	release_queue(queue);
700	680	}
..	..	@@ -776,10 +756,69 @@
776	756	return group;
777	757	}
778	758
	759	+static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
	760	+{
	761	+ queue_work(system_highpri_wq, &kctx->csf.pending_submission_work);
	762	+}
	763	+
	764	+/**
	765	+ * pending_submission_worker() - Work item to process pending kicked GPU command queues.
	766	+ *
	767	+ * @work: Pointer to pending_submission_work.
	768	+ *
	769	+ * This function starts all pending queues, for which the work
	770	+ * was previously submitted via ioctl call from application thread.
	771	+ * If the queue is already scheduled and resident, it will be started
	772	+ * right away, otherwise once the group is made resident.
	773	+ */
	774	+static void pending_submission_worker(struct work_struct *work)
	775	+{
	776	+ struct kbase_context *kctx =
	777	+ container_of(work, struct kbase_context, csf.pending_submission_work);
	778	+ struct kbase_device *kbdev = kctx->kbdev;
	779	+ struct kbase_queue *queue;
	780	+ int err = kbase_reset_gpu_prevent_and_wait(kbdev);
	781	+
	782	+ if (err) {
	783	+ dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue ");
	784	+ return;
	785	+ }
	786	+
	787	+ mutex_lock(&kctx->csf.lock);
	788	+
	789	+ /* Iterate through the queue list and schedule the pending ones for submission. */
	790	+ list_for_each_entry(queue, &kctx->csf.queue_list, link) {
	791	+ if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) {
	792	+ struct kbase_queue_group *group = get_bound_queue_group(queue);
	793	+ int ret;
	794	+
	795	+ if (!group \|\| queue->bind_state != KBASE_CSF_QUEUE_BOUND) {
	796	+ dev_dbg(kbdev->dev, "queue is not bound to a group");
	797	+ continue;
	798	+ }
	799	+
	800	+ ret = kbase_csf_scheduler_queue_start(queue);
	801	+ if (unlikely(ret)) {
	802	+ dev_dbg(kbdev->dev, "Failed to start queue");
	803	+ if (ret == -EBUSY) {
	804	+ atomic_cmpxchg(&queue->pending, 0, 1);
	805	+ enqueue_gpu_submission_work(kctx);
	806	+ }
	807	+ }
	808	+ }
	809	+ }
	810	+
	811	+ mutex_unlock(&kctx->csf.lock);
	812	+
	813	+ kbase_reset_gpu_allow(kbdev);
	814	+}
	815	+
779	816	void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot)
780	817	{
781	818	if (WARN_ON(slot < 0))
782	819	return;
	820	+
	821	+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
783	822
784	823	kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot));
785	824	}
..	..	@@ -793,8 +832,19 @@
793	832	(u32) ((1U << kbdev->csf.global_iface.group_num) - 1);
794	833	u32 value;
795	834
	835	+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
	836	+
796	837	if (WARN_ON(slot_bitmap > allowed_bitmap))
797	838	return;
	839	+
	840	+ /* The access to GLB_DB_REQ/ACK needs to be ordered with respect to CSG_REQ/ACK and
	841	+ * CSG_DB_REQ/ACK to avoid a scenario where a CSI request overlaps with a CSG request
	842	+ * or 2 CSI requests overlap and FW ends up missing the 2nd request.
	843	+ * Memory barrier is required, both on Host and FW side, to guarantee the ordering.
	844	+ *
	845	+ * 'osh' is used as CPU and GPU would be in the same Outer shareable domain.
	846	+ */
	847	+ dmb(osh);
798	848
799	849	value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK);
800	850	value ^= slot_bitmap;
..	..	@@ -822,6 +872,8 @@
822	872	struct kbase_csf_cmd_stream_group_info *ginfo;
823	873	u32 value;
824	874
	875	+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
	876	+
825	877	if (WARN_ON(csg_nr < 0) \|\|
826	878	WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
827	879	return;
..	..	@@ -831,6 +883,14 @@
831	883	if (WARN_ON(csi_index < 0) \|\|
832	884	WARN_ON(csi_index >= ginfo->stream_num))
833	885	return;
	886	+
	887	+ /* The access to CSG_DB_REQ/ACK needs to be ordered with respect to
	888	+ * CS_REQ/ACK to avoid a scenario where CSG_DB_REQ/ACK becomes visibile to
	889	+ * FW before CS_REQ/ACK is set.
	890	+ *
	891	+ * 'osh' is used as CPU and GPU would be in the same outer shareable domain.
	892	+ */
	893	+ dmb(osh);
834	894
835	895	value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK);
836	896	value ^= (1 << csi_index);
..	..	@@ -845,36 +905,37 @@
845	905	struct kbase_ioctl_cs_queue_kick *kick)
846	906	{
847	907	struct kbase_device *kbdev = kctx->kbdev;
848		- struct kbase_queue_group *group;
849		- struct kbase_queue *queue;
	908	+ bool trigger_submission = false;
	909	+ struct kbase_va_region *region;
850	910	int err = 0;
851	911
852		- err = kbase_reset_gpu_prevent_and_wait(kbdev);
853		- if (err) {
854		- dev_warn(
855		- kbdev->dev,
856		- "Unsuccessful GPU reset detected when kicking queue (buffer_addr=0x%.16llx)",
857		- kick->buffer_gpu_addr);
858		- return err;
859		- }
	912	+ KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK(kbdev, kctx->id, kick->buffer_gpu_addr);
860	913
861		- mutex_lock(&kctx->csf.lock);
862		- queue = find_queue(kctx, kick->buffer_gpu_addr);
863		- if (!queue)
864		- err = -EINVAL;
	914	+ /* GPU work submission happening asynchronously to prevent the contention with
	915	+ * scheduler lock and as the result blocking application thread. For this reason,
	916	+ * the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr
	917	+ * from the context list of active va_regions.
	918	+ * Once the target queue is found the pending flag is set to one atomically avoiding
	919	+ * a race between submission ioctl thread and the work item.
	920	+ */
	921	+ kbase_gpu_vm_lock(kctx);
	922	+ region = kbase_region_tracker_find_region_enclosing_address(kctx, kick->buffer_gpu_addr);
	923	+ if (!kbase_is_region_invalid_or_free(region)) {
	924	+ struct kbase_queue *queue = region->user_data;
865	925
866		- if (!err) {
867		- group = get_bound_queue_group(queue);
868		- if (!group) {
869		- dev_err(kctx->kbdev->dev, "queue not bound\n");
870		- err = -EINVAL;
	926	+ if (queue) {
	927	+ atomic_cmpxchg(&queue->pending, 0, 1);
	928	+ trigger_submission = true;
871	929	}
	930	+ } else {
	931	+ dev_dbg(kbdev->dev,
	932	+ "Attempt to kick GPU queue without a valid command buffer region");
	933	+ err = -EFAULT;
872	934	}
	935	+ kbase_gpu_vm_unlock(kctx);
873	936
874		- if (!err)
875		- err = kbase_csf_scheduler_queue_start(queue);
876		- mutex_unlock(&kctx->csf.lock);
877		- kbase_reset_gpu_allow(kbdev);
	937	+ if (likely(trigger_submission))
	938	+ enqueue_gpu_submission_work(kctx);
878	939
879	940	return err;
880	941	}
..	..	@@ -884,19 +945,23 @@
884	945	{
885	946	lockdep_assert_held(&kctx->csf.lock);
886	947
	948	+ if (WARN_ON(queue->csi_index < 0))
	949	+ return;
	950	+
887	951	if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) {
888	952	unsigned long flags;
889	953
890	954	kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags);
891	955	bitmap_clear(queue->group->protm_pending_bitmap,
892	956	queue->csi_index, 1);
893		- KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, PROTM_PENDING_CLEAR,
	957	+ KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, CSI_PROTM_PEND_CLEAR,
894	958	queue->group, queue, queue->group->protm_pending_bitmap[0]);
895	959	queue->group->bound_queues[queue->csi_index] = NULL;
896	960	queue->group = NULL;
897	961	kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
898	962
899	963	put_user_pages_mmap_handle(kctx, queue);
	964	+ WARN_ON_ONCE(queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID);
900	965	queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
901	966	}
902	967	}
..	..	@@ -938,7 +1003,16 @@
938	1003	}
939	1004	}
940	1005
941		-void kbase_csf_queue_unbind(struct kbase_queue *queue)
	1006	+static bool kbase_csf_queue_phys_allocated(struct kbase_queue *queue)
	1007	+{
	1008	+ /* The queue's phys are zeroed when allocation fails. Both of them being
	1009	+ * zero is an impossible condition for a successful allocated set of phy pages.
	1010	+ */
	1011	+
	1012	+ return (queue->phys[0].tagged_addr \| queue->phys[1].tagged_addr);
	1013	+}
	1014	+
	1015	+void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit)
942	1016	{
943	1017	struct kbase_context *kctx = queue->kctx;
944	1018
..	..	@@ -952,7 +1026,7 @@
952	1026	* whereas CSG TERM request would result in an immediate abort or
953	1027	* cancellation of the pending work.
954	1028	*/
955		- if (current->flags & PF_EXITING) {
	1029	+ if (process_exit) {
956	1030	struct kbase_queue_group *group = get_bound_queue_group(queue);
957	1031
958	1032	if (group)
..	..	@@ -963,8 +1037,8 @@
963	1037	unbind_queue(kctx, queue);
964	1038	}
965	1039
966		- /* Free the resources, if allocated for this queue. */
967		- if (queue->reg)
	1040	+ /* Free the resources, if allocated phys for this queue */
	1041	+ if (kbase_csf_queue_phys_allocated(queue))
968	1042	kbase_csf_free_command_stream_user_pages(kctx, queue);
969	1043	}
970	1044
..	..	@@ -977,8 +1051,8 @@
977	1051	WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND);
978	1052	unbind_stopped_queue(kctx, queue);
979	1053
980		- /* Free the resources, if allocated for this queue. */
981		- if (queue->reg)
	1054	+ /* Free the resources, if allocated phys for this queue */
	1055	+ if (kbase_csf_queue_phys_allocated(queue))
982	1056	kbase_csf_free_command_stream_user_pages(kctx, queue);
983	1057	}
984	1058
..	..	@@ -1041,159 +1115,39 @@
1041	1115	* @kctx: Pointer to kbase context where the queue group is created at
1042	1116	* @s_buf: Pointer to suspend buffer that is attached to queue group
1043	1117	*
1044		- * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
1045		- * MMU page table. Otherwise -ENOMEM.
	1118	+ * Return: 0 if phy-pages for the suspend buffer is successfully allocated.
	1119	+ * Otherwise -ENOMEM or error code.
1046	1120	*/
1047	1121	static int create_normal_suspend_buffer(struct kbase_context *const kctx,
1048	1122	struct kbase_normal_suspend_buffer *s_buf)
1049	1123	{
1050		- struct kbase_va_region *reg = NULL;
1051		- const unsigned long mem_flags = KBASE_REG_GPU_RD \| KBASE_REG_GPU_WR;
1052	1124	const size_t nr_pages =
1053	1125	PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
1054		- int err = 0;
	1126	+ int err;
1055	1127
1056	1128	lockdep_assert_held(&kctx->csf.lock);
1057	1129
1058		- /* Allocate and initialize Region Object */
1059		- reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
1060		- nr_pages, KBASE_REG_ZONE_MCU_SHARED);
1061		-
1062		- if (!reg)
1063		- return -ENOMEM;
	1130	+ /* The suspend buffer's mapping address is valid only when the CSG is to
	1131	+ * run on slot, initializing it 0, signalling the buffer is not mapped.
	1132	+ */
	1133	+ s_buf->gpu_va = 0;
1064	1134
1065	1135	s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL);
1066	1136
1067		- if (!s_buf->phy) {
1068		- err = -ENOMEM;
1069		- goto phy_alloc_failed;
1070		- }
1071		-
1072		- /* Get physical page for a normal suspend buffer */
1073		- err = kbase_mem_pool_alloc_pages(
1074		- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
1075		- nr_pages, &s_buf->phy[0], false);
1076		-
1077		- if (err < 0)
1078		- goto phy_pages_alloc_failed;
1079		-
1080		- /* Insert Region Object into rbtree and make virtual address available
1081		- * to map it to physical page
1082		- */
1083		- mutex_lock(&kctx->kbdev->csf.reg_lock);
1084		- err = kbase_add_va_region_rbtree(kctx->kbdev, reg, 0, nr_pages, 1);
1085		- reg->flags &= ~KBASE_REG_FREE;
1086		- mutex_unlock(&kctx->kbdev->csf.reg_lock);
1087		-
1088		- if (err)
1089		- goto add_va_region_failed;
1090		-
1091		- /* Update MMU table */
1092		- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
1093		- reg->start_pfn, &s_buf->phy[0],
1094		- nr_pages, mem_flags,
1095		- MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW);
1096		- if (err)
1097		- goto mmu_insert_failed;
1098		-
1099		- s_buf->reg = reg;
1100		-
1101		- return 0;
1102		-
1103		-mmu_insert_failed:
1104		- mutex_lock(&kctx->kbdev->csf.reg_lock);
1105		- WARN_ON(kbase_remove_va_region(reg));
1106		- mutex_unlock(&kctx->kbdev->csf.reg_lock);
1107		-
1108		-add_va_region_failed:
1109		- kbase_mem_pool_free_pages(
1110		- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
1111		- &s_buf->phy[0], false, false);
1112		-
1113		-phy_pages_alloc_failed:
1114		- kfree(s_buf->phy);
1115		-phy_alloc_failed:
1116		- kfree(reg);
1117		-
1118		- return err;
1119		-}
1120		-
1121		-/**
1122		- * create_protected_suspend_buffer() - Create protected-mode suspend buffer
1123		- * per queue group
1124		- *
1125		- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
1126		- * @s_buf: Pointer to suspend buffer that is attached to queue group
1127		- *
1128		- * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
1129		- * MMU page table. Otherwise -ENOMEM.
1130		- */
1131		-static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
1132		- struct kbase_protected_suspend_buffer *s_buf)
1133		-{
1134		- struct kbase_va_region *reg = NULL;
1135		- struct tagged_addr *phys = NULL;
1136		- const unsigned long mem_flags = KBASE_REG_GPU_RD \| KBASE_REG_GPU_WR;
1137		- const size_t nr_pages =
1138		- PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
1139		- int err = 0;
1140		-
1141		- /* Allocate and initialize Region Object */
1142		- reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
1143		- nr_pages, KBASE_REG_ZONE_MCU_SHARED);
1144		-
1145		- if (!reg)
	1137	+ if (!s_buf->phy)
1146	1138	return -ENOMEM;
1147	1139
1148		- phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
1149		- if (!phys) {
1150		- err = -ENOMEM;
1151		- goto phy_alloc_failed;
	1140	+ /* Get physical page for a normal suspend buffer */
	1141	+ err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
	1142	+ &s_buf->phy[0], false, kctx->task);
	1143	+
	1144	+ if (err < 0) {
	1145	+ kfree(s_buf->phy);
	1146	+ return err;
1152	1147	}
1153	1148
1154		- s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys,
1155		- nr_pages);
1156		- if (s_buf->pma == NULL) {
1157		- err = -ENOMEM;
1158		- goto pma_alloc_failed;
1159		- }
1160		-
1161		- /* Insert Region Object into rbtree and make virtual address available
1162		- * to map it to physical page
1163		- */
1164		- mutex_lock(&kbdev->csf.reg_lock);
1165		- err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1);
1166		- reg->flags &= ~KBASE_REG_FREE;
1167		- mutex_unlock(&kbdev->csf.reg_lock);
1168		-
1169		- if (err)
1170		- goto add_va_region_failed;
1171		-
1172		- /* Update MMU table */
1173		- err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
1174		- reg->start_pfn, phys,
1175		- nr_pages, mem_flags, MCU_AS_NR,
1176		- KBASE_MEM_GROUP_CSF_FW);
1177		- if (err)
1178		- goto mmu_insert_failed;
1179		-
1180		- s_buf->reg = reg;
1181		- kfree(phys);
	1149	+ kbase_process_page_usage_inc(kctx, nr_pages);
1182	1150	return 0;
1183		-
1184		-mmu_insert_failed:
1185		- mutex_lock(&kbdev->csf.reg_lock);
1186		- WARN_ON(kbase_remove_va_region(reg));
1187		- mutex_unlock(&kbdev->csf.reg_lock);
1188		-
1189		-add_va_region_failed:
1190		- kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
1191		-pma_alloc_failed:
1192		- kfree(phys);
1193		-phy_alloc_failed:
1194		- kfree(reg);
1195		-
1196		- return err;
1197	1151	}
1198	1152
1199	1153	static void timer_event_worker(struct work_struct *data);
..	..	@@ -1214,26 +1168,17 @@
1214	1168	static int create_suspend_buffers(struct kbase_context *const kctx,
1215	1169	struct kbase_queue_group * const group)
1216	1170	{
1217		- int err = 0;
1218		-
1219	1171	if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) {
1220	1172	dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n");
1221	1173	return -ENOMEM;
1222	1174	}
1223	1175
1224		- if (kctx->kbdev->csf.pma_dev) {
1225		- err = create_protected_suspend_buffer(kctx->kbdev,
1226		- &group->protected_suspend_buf);
1227		- if (err) {
1228		- term_normal_suspend_buffer(kctx,
1229		- &group->normal_suspend_buf);
1230		- dev_err(kctx->kbdev->dev, "Failed to create protected suspend buffer\n");
1231		- }
1232		- } else {
1233		- group->protected_suspend_buf.reg = NULL;
1234		- }
	1176	+ /* Protected suspend buffer, runtime binding so just initialize it */
	1177	+ group->protected_suspend_buf.gpu_va = 0;
	1178	+ group->protected_suspend_buf.pma = NULL;
	1179	+ group->protected_suspend_buf.alloc_retries = 0;
1235	1180
1236		- return err;
	1181	+ return 0;
1237	1182	}
1238	1183
1239	1184	/**
..	..	@@ -1244,16 +1189,9 @@
1244	1189	*/
1245	1190	static u32 generate_group_uid(void)
1246	1191	{
1247		- /* use first KBase device to store max UID */
1248		- struct kbase_device *kbdev = kbase_find_device(-1);
1249		- u32 uid = 1;
	1192	+ static atomic_t global_csg_uid = ATOMIC_INIT(0);
1250	1193
1251		- if (kbdev)
1252		- uid = (u32) atomic_inc_return(&kbdev->group_max_uid_in_devices);
1253		- else
1254		- WARN(1, "NULL kbase device pointer in group UID generation");
1255		-
1256		- return uid;
	1194	+ return (u32)atomic_inc_return(&global_csg_uid);
1257	1195	}
1258	1196
1259	1197	/**
..	..	@@ -1272,8 +1210,8 @@
1272	1210	int group_handle = find_free_group_handle(kctx);
1273	1211
1274	1212	if (group_handle < 0) {
1275		- dev_err(kctx->kbdev->dev,
1276		- "All queue group handles are already in use\n");
	1213	+ dev_dbg(kctx->kbdev->dev,
	1214	+ "All queue group handles are already in use");
1277	1215	} else {
1278	1216	struct kbase_queue_group * const group =
1279	1217	kmalloc(sizeof(struct kbase_queue_group),
..	..	@@ -1298,10 +1236,22 @@
1298	1236	group->tiler_max = create->in.tiler_max;
1299	1237	group->fragment_max = create->in.fragment_max;
1300	1238	group->compute_max = create->in.compute_max;
	1239	+ group->csi_handlers = create->in.csi_handlers;
1301	1240	group->priority = kbase_csf_priority_queue_group_priority_to_relative(
1302	1241	kbase_csf_priority_check(kctx->kbdev, create->in.priority));
1303	1242	group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
1304	1243	group->faulted = false;
	1244	+ group->cs_unrecoverable = false;
	1245	+ group->reevaluate_idle_status = false;
	1246	+
	1247	+ group->csg_reg = NULL;
	1248	+ group->csg_reg_bind_retries = 0;
	1249	+
	1250	+ group->dvs_buf = create->in.dvs_buf;
	1251	+
	1252	+#if IS_ENABLED(CONFIG_DEBUG_FS)
	1253	+ group->deschedule_deferred_cnt = 0;
	1254	+#endif
1305	1255
1306	1256	group->group_uid = generate_group_uid();
1307	1257	create->out.group_uid = group->group_uid;
..	..	@@ -1317,6 +1267,9 @@
1317	1267	MAX_SUPPORTED_STREAMS_PER_GROUP);
1318	1268
1319	1269	group->run_state = KBASE_CSF_GROUP_INACTIVE;
	1270	+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group,
	1271	+ group->run_state);
	1272	+
1320	1273	err = create_suspend_buffers(kctx, group);
1321	1274
1322	1275	if (err < 0) {
..	..	@@ -1336,6 +1289,18 @@
1336	1289	return group_handle;
1337	1290	}
1338	1291
	1292	+static bool dvs_supported(u32 csf_version)
	1293	+{
	1294	+ if (GLB_VERSION_MAJOR_GET(csf_version) < 3)
	1295	+ return false;
	1296	+
	1297	+ if (GLB_VERSION_MAJOR_GET(csf_version) == 3)
	1298	+ if (GLB_VERSION_MINOR_GET(csf_version) < 2)
	1299	+ return false;
	1300	+
	1301	+ return true;
	1302	+}
	1303	+
1339	1304	int kbase_csf_queue_group_create(struct kbase_context *const kctx,
1340	1305	union kbase_ioctl_cs_queue_group_create *const create)
1341	1306	{
..	..	@@ -1343,23 +1308,47 @@
1343	1308	const u32 tiler_count = hweight64(create->in.tiler_mask);
1344	1309	const u32 fragment_count = hweight64(create->in.fragment_mask);
1345	1310	const u32 compute_count = hweight64(create->in.compute_mask);
	1311	+ size_t i;
	1312	+
	1313	+ for (i = 0; i < sizeof(create->in.padding); i++) {
	1314	+ if (create->in.padding[i] != 0) {
	1315	+ dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
	1316	+ return -EINVAL;
	1317	+ }
	1318	+ }
1346	1319
1347	1320	mutex_lock(&kctx->csf.lock);
1348	1321
1349	1322	if ((create->in.tiler_max > tiler_count) \|\|
1350	1323	(create->in.fragment_max > fragment_count) \|\|
1351	1324	(create->in.compute_max > compute_count)) {
1352		- dev_err(kctx->kbdev->dev,
1353		- "Invalid maximum number of endpoints for a queue group\n");
	1325	+ dev_dbg(kctx->kbdev->dev,
	1326	+ "Invalid maximum number of endpoints for a queue group");
1354	1327	err = -EINVAL;
1355	1328	} else if (create->in.priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) {
1356		- dev_err(kctx->kbdev->dev, "Invalid queue group priority %u\n",
	1329	+ dev_dbg(kctx->kbdev->dev, "Invalid queue group priority %u",
1357	1330	(unsigned int)create->in.priority);
1358	1331	err = -EINVAL;
1359	1332	} else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) {
1360		- dev_err(kctx->kbdev->dev,
1361		- "No CSG has at least %d CSs\n",
	1333	+ dev_dbg(kctx->kbdev->dev,
	1334	+ "No CSG has at least %d CSs",
1362	1335	create->in.cs_min);
	1336	+ err = -EINVAL;
	1337	+ } else if (create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK) {
	1338	+ dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u",
	1339	+ create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK);
	1340	+ err = -EINVAL;
	1341	+ } else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) &&
	1342	+ create->in.dvs_buf) {
	1343	+ dev_warn(
	1344	+ kctx->kbdev->dev,
	1345	+ "GPU does not support DVS but userspace is trying to use it");
	1346	+ err = -EINVAL;
	1347	+ } else if (dvs_supported(kctx->kbdev->csf.global_iface.version) &&
	1348	+ !CSG_DVS_BUF_BUFFER_POINTER_GET(create->in.dvs_buf) &&
	1349	+ CSG_DVS_BUF_BUFFER_SIZE_GET(create->in.dvs_buf)) {
	1350	+ dev_warn(kctx->kbdev->dev,
	1351	+ "DVS buffer pointer is null but size is not 0");
1363	1352	err = -EINVAL;
1364	1353	} else {
1365	1354	/* For the CSG which satisfies the condition for having
..	..	@@ -1389,60 +1378,39 @@
1389	1378	* @s_buf: Pointer to queue group suspend buffer to be freed
1390	1379	*/
1391	1380	static void term_normal_suspend_buffer(struct kbase_context *const kctx,
1392		- struct kbase_normal_suspend_buffer *s_buf)
	1381	+ struct kbase_normal_suspend_buffer *s_buf)
1393	1382	{
1394		- const size_t nr_pages =
1395		- PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
	1383	+ const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
1396	1384
1397	1385	lockdep_assert_held(&kctx->csf.lock);
1398	1386
1399		- WARN_ON(kbase_mmu_teardown_pages(
1400		- kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
1401		- s_buf->reg->start_pfn, nr_pages, MCU_AS_NR));
	1387	+ /* The group should not have a bind remaining on any suspend buf region */
	1388	+ WARN_ONCE(s_buf->gpu_va, "Suspend buffer address should be 0 at termination");
1402	1389
1403		- WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
1404		-
1405		- mutex_lock(&kctx->kbdev->csf.reg_lock);
1406		- WARN_ON(kbase_remove_va_region(s_buf->reg));
1407		- mutex_unlock(&kctx->kbdev->csf.reg_lock);
1408		-
1409		- kbase_mem_pool_free_pages(
1410		- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
1411		- nr_pages, &s_buf->phy[0], false, false);
	1390	+ kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
	1391	+ &s_buf->phy[0], false, false);
	1392	+ kbase_process_page_usage_dec(kctx, nr_pages);
1412	1393
1413	1394	kfree(s_buf->phy);
1414	1395	s_buf->phy = NULL;
1415		- kfree(s_buf->reg);
1416		- s_buf->reg = NULL;
1417	1396	}
1418	1397
1419	1398	/**
1420		- * term_protected_suspend_buffer() - Free normal-mode suspend buffer of
	1399	+ * term_protected_suspend_buffer() - Free protected-mode suspend buffer of
1421	1400	* queue group
1422	1401	*
1423	1402	* @kbdev: Instance of a GPU platform device that implements a CSF interface.
1424		- * @s_buf: Pointer to queue group suspend buffer to be freed
	1403	+ * @sbuf: Pointer to queue group suspend buffer to be freed
1425	1404	*/
1426	1405	static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
1427		- struct kbase_protected_suspend_buffer *s_buf)
	1406	+ struct kbase_protected_suspend_buffer *sbuf)
1428	1407	{
1429		- const size_t nr_pages =
1430		- PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
1431		-
1432		- WARN_ON(kbase_mmu_teardown_pages(
1433		- kbdev, &kbdev->csf.mcu_mmu,
1434		- s_buf->reg->start_pfn, nr_pages, MCU_AS_NR));
1435		-
1436		- WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
1437		-
1438		- mutex_lock(&kbdev->csf.reg_lock);
1439		- WARN_ON(kbase_remove_va_region(s_buf->reg));
1440		- mutex_unlock(&kbdev->csf.reg_lock);
1441		-
1442		- kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
1443		- s_buf->pma = NULL;
1444		- kfree(s_buf->reg);
1445		- s_buf->reg = NULL;
	1408	+ WARN_ONCE(sbuf->gpu_va, "Suspend buf should have been unmapped inside scheduler!");
	1409	+ if (sbuf->pma) {
	1410	+ const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
	1411	+ kbase_csf_protected_memory_free(kbdev, sbuf->pma, nr_pages, true);
	1412	+ sbuf->pma = NULL;
	1413	+ }
1446	1414	}
1447	1415
1448	1416	void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
..	..	@@ -1474,6 +1442,7 @@
1474	1442	&group->protected_suspend_buf);
1475	1443
1476	1444	group->run_state = KBASE_CSF_GROUP_TERMINATED;
	1445	+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, group->run_state);
1477	1446	}
1478	1447
1479	1448	/**
..	..	@@ -1504,10 +1473,51 @@
1504	1473	kbase_csf_term_descheduled_queue_group(group);
1505	1474	}
1506	1475
	1476	+/**
	1477	+ * wait_group_deferred_deschedule_completion - Wait for refcount of the group to
	1478	+ * become 0 that was taken when the group deschedule had to be deferred.
	1479	+ *
	1480	+ * @group: Pointer to GPU command queue group that is being deleted.
	1481	+ *
	1482	+ * This function is called when Userspace deletes the group and after the group
	1483	+ * has been descheduled. The function synchronizes with the other threads that were
	1484	+ * also trying to deschedule the group whilst the dumping was going on for a fault.
	1485	+ * Please refer the documentation of wait_for_dump_complete_on_group_deschedule()
	1486	+ * for more details.
	1487	+ */
	1488	+static void wait_group_deferred_deschedule_completion(struct kbase_queue_group *group)
	1489	+{
	1490	+#if IS_ENABLED(CONFIG_DEBUG_FS)
	1491	+ struct kbase_context *kctx = group->kctx;
	1492	+
	1493	+ lockdep_assert_held(&kctx->csf.lock);
	1494	+
	1495	+ if (likely(!group->deschedule_deferred_cnt))
	1496	+ return;
	1497	+
	1498	+ mutex_unlock(&kctx->csf.lock);
	1499	+ wait_event(kctx->kbdev->csf.event_wait, !group->deschedule_deferred_cnt);
	1500	+ mutex_lock(&kctx->csf.lock);
	1501	+#endif
	1502	+}
	1503	+
1507	1504	static void cancel_queue_group_events(struct kbase_queue_group *group)
1508	1505	{
1509	1506	cancel_work_sync(&group->timer_event_work);
1510	1507	cancel_work_sync(&group->protm_event_work);
	1508	+}
	1509	+
	1510	+static void remove_pending_group_fatal_error(struct kbase_queue_group *group)
	1511	+{
	1512	+ struct kbase_context *kctx = group->kctx;
	1513	+
	1514	+ dev_dbg(kctx->kbdev->dev,
	1515	+ "Remove any pending group fatal error from context %pK\n",
	1516	+ (void *)group->kctx);
	1517	+
	1518	+ kbase_csf_event_remove_error(kctx, &group->error_tiler_oom);
	1519	+ kbase_csf_event_remove_error(kctx, &group->error_timeout);
	1520	+ kbase_csf_event_remove_error(kctx, &group->error_fatal);
1511	1521	}
1512	1522
1513	1523	void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
..	..	@@ -1532,39 +1542,44 @@
1532	1542	group = find_queue_group(kctx, group_handle);
1533	1543
1534	1544	if (group) {
1535		- unsigned long flags;
1536		-
1537		- spin_lock_irqsave(&kctx->csf.event_lock, flags);
1538		-
1539		- dev_dbg(kbdev->dev,
1540		- "Remove any pending group fatal error from context %pK\n",
1541		- (void *)group->kctx);
1542		-
1543		- list_del_init(&group->error_tiler_oom.link);
1544		- list_del_init(&group->error_timeout.link);
1545		- list_del_init(&group->error_fatal.link);
1546		- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
1547		-
1548		- term_queue_group(group);
1549	1545	kctx->csf.queue_groups[group_handle] = NULL;
	1546	+ /* Stop the running of the given group */
	1547	+ term_queue_group(group);
	1548	+ mutex_unlock(&kctx->csf.lock);
	1549	+
	1550	+ if (reset_prevented) {
	1551	+ /* Allow GPU reset before cancelling the group specific
	1552	+ * work item to avoid potential deadlock.
	1553	+ * Reset prevention isn't needed after group termination.
	1554	+ */
	1555	+ kbase_reset_gpu_allow(kbdev);
	1556	+ reset_prevented = false;
	1557	+ }
	1558	+
	1559	+ /* Cancel any pending event callbacks. If one is in progress
	1560	+ * then this thread waits synchronously for it to complete (which
	1561	+ * is why we must unlock the context first). We already ensured
	1562	+ * that no more callbacks can be enqueued by terminating the group.
	1563	+ */
	1564	+ cancel_queue_group_events(group);
	1565	+
	1566	+ mutex_lock(&kctx->csf.lock);
	1567	+
	1568	+ /* Clean up after the termination */
	1569	+ remove_pending_group_fatal_error(group);
	1570	+
	1571	+ wait_group_deferred_deschedule_completion(group);
1550	1572	}
1551	1573
1552	1574	mutex_unlock(&kctx->csf.lock);
1553	1575	if (reset_prevented)
1554	1576	kbase_reset_gpu_allow(kbdev);
1555	1577
1556		- if (!group)
1557		- return;
1558		-
1559		- /* Cancel any pending event callbacks. If one is in progress
1560		- * then this thread waits synchronously for it to complete (which
1561		- * is why we must unlock the context first). We already ensured
1562		- * that no more callbacks can be enqueued by terminating the group.
1563		- */
1564		- cancel_queue_group_events(group);
1565	1578	kfree(group);
1566	1579	}
	1580	+KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate);
1567	1581
	1582	+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) \|\| MALI_UNIT_TEST
1568	1583	int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
1569	1584	struct kbase_suspend_copy_buffer *sus_buf,
1570	1585	u8 group_handle)
..	..	@@ -1595,48 +1610,7 @@
1595	1610
1596	1611	return err;
1597	1612	}
1598		-
1599		-/**
1600		- * add_error() - Add an error to the list of errors to report to user space
1601		- *
1602		- * @kctx: Address of a base context associated with a GPU address space.
1603		- * @error: Address of the item to be added to the context's pending error list.
1604		- * @data: Error data to be returned to userspace.
1605		- *
1606		- * Does not wake up the event queue blocking a user thread in kbase_poll. This
1607		- * is to make it more efficient to add multiple errors.
1608		- *
1609		- * The added error must not already be on the context's list of errors waiting
1610		- * to be reported (e.g. because a previous error concerning the same object has
1611		- * not yet been reported).
1612		- */
1613		-static void add_error(struct kbase_context *const kctx,
1614		- struct kbase_csf_notification *const error,
1615		- struct base_csf_notification const *const data)
1616		-{
1617		- unsigned long flags;
1618		-
1619		- if (WARN_ON(!kctx))
1620		- return;
1621		-
1622		- if (WARN_ON(!error))
1623		- return;
1624		-
1625		- if (WARN_ON(!data))
1626		- return;
1627		-
1628		- spin_lock_irqsave(&kctx->csf.event_lock, flags);
1629		-
1630		- if (!WARN_ON(!list_empty(&error->link))) {
1631		- error->data = *data;
1632		- list_add_tail(&error->link, &kctx->csf.error_list);
1633		- dev_dbg(kctx->kbdev->dev,
1634		- "Added error %pK of type %d in context %pK\n",
1635		- (void )error, data->type, (void )kctx);
1636		- }
1637		-
1638		- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
1639		-}
	1613	+#endif
1640	1614
1641	1615	void kbase_csf_add_group_fatal_error(
1642	1616	struct kbase_queue_group *const group,
..	..	@@ -1660,7 +1634,7 @@
1660	1634	}
1661	1635	};
1662	1636
1663		- add_error(group->kctx, &group->error_fatal, &error);
	1637	+ kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error);
1664	1638	}
1665	1639
1666	1640	void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
..	..	@@ -1698,29 +1672,12 @@
1698	1672
1699	1673	int kbase_csf_ctx_init(struct kbase_context *kctx)
1700	1674	{
1701		- struct kbase_device *kbdev = kctx->kbdev;
1702	1675	int err = -ENOMEM;
1703	1676
1704		- INIT_LIST_HEAD(&kctx->csf.event_callback_list);
1705	1677	INIT_LIST_HEAD(&kctx->csf.queue_list);
1706	1678	INIT_LIST_HEAD(&kctx->csf.link);
1707		- INIT_LIST_HEAD(&kctx->csf.error_list);
1708	1679
1709		- spin_lock_init(&kctx->csf.event_lock);
1710		- kctx->csf.user_reg_vma = NULL;
1711		- mutex_lock(&kbdev->pm.lock);
1712		- /* The inode information for /dev/malixx file is not available at the
1713		- * time of device probe as the inode is created when the device node
1714		- * is created by udevd (through mknod).
1715		- */
1716		- if (kctx->filp) {
1717		- if (!kbdev->csf.mali_file_inode)
1718		- kbdev->csf.mali_file_inode = kctx->filp->f_inode;
1719		-
1720		- /* inode is unique for a file */
1721		- WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode);
1722		- }
1723		- mutex_unlock(&kbdev->pm.lock);
	1680	+ kbase_csf_event_init(kctx);
1724	1681
1725	1682	/* Mark all the cookies as 'free' */
1726	1683	bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
..	..	@@ -1737,9 +1694,18 @@
1737	1694	if (likely(!err)) {
1738	1695	err = kbase_csf_tiler_heap_context_init(kctx);
1739	1696
1740		- if (likely(!err))
	1697	+ if (likely(!err)) {
1741	1698	mutex_init(&kctx->csf.lock);
1742		- else
	1699	+ INIT_WORK(&kctx->csf.pending_submission_work,
	1700	+ pending_submission_worker);
	1701	+
	1702	+ err = kbasep_ctx_user_reg_page_mapping_init(kctx);
	1703	+
	1704	+ if (unlikely(err))
	1705	+ kbase_csf_tiler_heap_context_term(kctx);
	1706	+ }
	1707	+
	1708	+ if (unlikely(err))
1743	1709	kbase_csf_kcpu_queue_context_term(kctx);
1744	1710	}
1745	1711
..	..	@@ -1822,7 +1788,6 @@
1822	1788	* for queue groups & kcpu queues, hence no need to explicitly remove
1823	1789	* those debugfs files.
1824	1790	*/
1825		- kbase_csf_event_wait_remove_all(kctx);
1826	1791
1827	1792	/* Wait for a GPU reset if it is happening, prevent it if not happening */
1828	1793	err = kbase_reset_gpu_prevent_and_wait(kbdev);
..	..	@@ -1835,17 +1800,24 @@
1835	1800	reset_prevented = true;
1836	1801
1837	1802	mutex_lock(&kctx->csf.lock);
	1803	+
1838	1804	/* Iterate through the queue groups that were not terminated by
1839	1805	* userspace and issue the term request to firmware for them.
1840	1806	*/
1841	1807	for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
1842		- if (kctx->csf.queue_groups[i])
1843		- term_queue_group(kctx->csf.queue_groups[i]);
	1808	+ struct kbase_queue_group *group = kctx->csf.queue_groups[i];
	1809	+
	1810	+ if (group) {
	1811	+ remove_pending_group_fatal_error(group);
	1812	+ term_queue_group(group);
	1813	+ }
1844	1814	}
1845	1815	mutex_unlock(&kctx->csf.lock);
1846	1816
1847	1817	if (reset_prevented)
1848	1818	kbase_reset_gpu_allow(kbdev);
	1819	+
	1820	+ cancel_work_sync(&kctx->csf.pending_submission_work);
1849	1821
1850	1822	/* Now that all queue groups have been terminated, there can be no
1851	1823	* more OoM or timer event interrupts but there can be inflight work
..	..	@@ -1891,200 +1863,45 @@
1891	1863	* only one reference left that was taken when queue was
1892	1864	* registered.
1893	1865	*/
1894		- if (atomic_read(&queue->refcount) != 1)
1895		- dev_warn(kctx->kbdev->dev,
1896		- "Releasing queue with incorrect refcounting!\n");
	1866	+ WARN_ON(kbase_refcount_read(&queue->refcount) != 1);
1897	1867	list_del_init(&queue->link);
1898	1868	release_queue(queue);
1899	1869	}
1900	1870
1901	1871	mutex_unlock(&kctx->csf.lock);
1902	1872
	1873	+ kbasep_ctx_user_reg_page_mapping_term(kctx);
1903	1874	kbase_csf_tiler_heap_context_term(kctx);
1904	1875	kbase_csf_kcpu_queue_context_term(kctx);
1905	1876	kbase_csf_scheduler_context_term(kctx);
	1877	+ kbase_csf_event_term(kctx);
1906	1878
1907	1879	mutex_destroy(&kctx->csf.lock);
1908		-}
1909		-
1910		-int kbase_csf_event_wait_add(struct kbase_context *kctx,
1911		- kbase_csf_event_callback callback, void param)
1912		-{
1913		- int err = -ENOMEM;
1914		- struct kbase_csf_event *event =
1915		- kzalloc(sizeof(struct kbase_csf_event), GFP_KERNEL);
1916		-
1917		- if (event) {
1918		- unsigned long flags;
1919		-
1920		- event->kctx = kctx;
1921		- event->callback = callback;
1922		- event->param = param;
1923		-
1924		- spin_lock_irqsave(&kctx->csf.event_lock, flags);
1925		- list_add_tail(&event->link, &kctx->csf.event_callback_list);
1926		- dev_dbg(kctx->kbdev->dev,
1927		- "Added event handler %pK with param %pK\n", event,
1928		- event->param);
1929		- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
1930		-
1931		- err = 0;
1932		- }
1933		-
1934		- return err;
1935		-}
1936		-
1937		-void kbase_csf_event_wait_remove(struct kbase_context *kctx,
1938		- kbase_csf_event_callback callback, void param)
1939		-{
1940		- struct kbase_csf_event *event;
1941		- unsigned long flags;
1942		-
1943		- spin_lock_irqsave(&kctx->csf.event_lock, flags);
1944		-
1945		- list_for_each_entry(event, &kctx->csf.event_callback_list, link) {
1946		- if ((event->callback == callback) && (event->param == param)) {
1947		- list_del(&event->link);
1948		- dev_dbg(kctx->kbdev->dev,
1949		- "Removed event handler %pK with param %pK\n",
1950		- event, event->param);
1951		- kfree(event);
1952		- break;
1953		- }
1954		- }
1955		- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
1956		-}
1957		-
1958		-bool kbase_csf_read_error(struct kbase_context *kctx,
1959		- struct base_csf_notification *event_data)
1960		-{
1961		- bool got_event = true;
1962		- struct kbase_csf_notification *error_data = NULL;
1963		- unsigned long flags;
1964		-
1965		- spin_lock_irqsave(&kctx->csf.event_lock, flags);
1966		-
1967		- if (likely(!list_empty(&kctx->csf.error_list))) {
1968		- error_data = list_first_entry(&kctx->csf.error_list,
1969		- struct kbase_csf_notification, link);
1970		- list_del_init(&error_data->link);
1971		- *event_data = error_data->data;
1972		- dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n",
1973		- (void )error_data, (void )kctx);
1974		- } else {
1975		- got_event = false;
1976		- }
1977		-
1978		- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
1979		-
1980		- return got_event;
1981		-}
1982		-
1983		-bool kbase_csf_error_pending(struct kbase_context *kctx)
1984		-{
1985		- bool event_pended = false;
1986		- unsigned long flags;
1987		-
1988		- spin_lock_irqsave(&kctx->csf.event_lock, flags);
1989		- event_pended = !list_empty(&kctx->csf.error_list);
1990		- dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n",
1991		- event_pended ? "An" : "No", (void *)kctx);
1992		- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
1993		-
1994		- return event_pended;
1995		-}
1996		-
1997		-void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
1998		-{
1999		- struct kbase_csf_event event, next_event;
2000		- unsigned long flags;
2001		-
2002		- dev_dbg(kctx->kbdev->dev,
2003		- "Signal event (%s GPU notify) for context %pK\n",
2004		- notify_gpu ? "with" : "without", (void *)kctx);
2005		-
2006		- /* First increment the signal count and wake up event thread.
2007		- */
2008		- atomic_set(&kctx->event_count, 1);
2009		- kbase_event_wakeup(kctx);
2010		-
2011		- /* Signal the CSF firmware. This is to ensure that pending command
2012		- * stream synch object wait operations are re-evaluated.
2013		- * Write to GLB_DOORBELL would suffice as spec says that all pending
2014		- * synch object wait operations are re-evaluated on a write to any
2015		- * CS_DOORBELL/GLB_DOORBELL register.
2016		- */
2017		- if (notify_gpu) {
2018		- spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
2019		- if (kctx->kbdev->pm.backend.gpu_powered)
2020		- kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR);
2021		- KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u);
2022		- spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
2023		- }
2024		-
2025		- /* Now invoke the callbacks registered on backend side.
2026		- * Allow item removal inside the loop, if requested by the callback.
2027		- */
2028		- spin_lock_irqsave(&kctx->csf.event_lock, flags);
2029		-
2030		- list_for_each_entry_safe(
2031		- event, next_event, &kctx->csf.event_callback_list, link) {
2032		- enum kbase_csf_event_callback_action action;
2033		-
2034		- dev_dbg(kctx->kbdev->dev,
2035		- "Calling event handler %pK with param %pK\n",
2036		- (void *)event, event->param);
2037		- action = event->callback(event->param);
2038		- if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) {
2039		- list_del(&event->link);
2040		- kfree(event);
2041		- }
2042		- }
2043		-
2044		- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
2045		-}
2046		-
2047		-void kbase_csf_event_wait_remove_all(struct kbase_context *kctx)
2048		-{
2049		- struct kbase_csf_event event, next_event;
2050		- unsigned long flags;
2051		-
2052		- spin_lock_irqsave(&kctx->csf.event_lock, flags);
2053		-
2054		- list_for_each_entry_safe(
2055		- event, next_event, &kctx->csf.event_callback_list, link) {
2056		- list_del(&event->link);
2057		- dev_dbg(kctx->kbdev->dev,
2058		- "Removed event handler %pK with param %pK\n",
2059		- (void *)event, event->param);
2060		- kfree(event);
2061		- }
2062		-
2063		- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
2064	1880	}
2065	1881
2066	1882	/**
2067	1883	* handle_oom_event - Handle the OoM event generated by the firmware for the
2068	1884	* CSI.
2069	1885	*
	1886	+ * @group: Pointer to the CSG group the oom-event belongs to.
	1887	+ * @stream: Pointer to the structure containing info provided by the firmware
	1888	+ * about the CSI.
	1889	+ *
2070	1890	* This function will handle the OoM event request from the firmware for the
2071	1891	* CS. It will retrieve the address of heap context and heap's
2072	1892	* statistics (like number of render passes in-flight) from the CS's kernel
2073		- * kernel output page and pass them to the tiler heap function to allocate a
	1893	+ * output page and pass them to the tiler heap function to allocate a
2074	1894	* new chunk.
2075	1895	* It will also update the CS's kernel input page with the address
2076	1896	* of a new chunk that was allocated.
2077	1897	*
2078		- * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
2079		- * @stream: Pointer to the structure containing info provided by the firmware
2080		- * about the CSI.
2081		- *
2082	1898	* Return: 0 if successfully handled the request, otherwise a negative error
2083	1899	* code on failure.
2084	1900	*/
2085		-static int handle_oom_event(struct kbase_context *const kctx,
2086		- struct kbase_csf_cmd_stream_info const *const stream)
	1901	+static int handle_oom_event(struct kbase_queue_group *const group,
	1902	+ struct kbase_csf_cmd_stream_info const *const stream)
2087	1903	{
	1904	+ struct kbase_context *const kctx = group->kctx;
2088	1905	u64 gpu_heap_va =
2089	1906	kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) \|
2090	1907	((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32);
..	..	@@ -2098,25 +1915,36 @@
2098	1915	u32 pending_frag_count;
2099	1916	u64 new_chunk_ptr;
2100	1917	int err;
	1918	+ bool frag_end_err = false;
2101	1919
2102	1920	if ((frag_end > vt_end) \|\| (vt_end >= vt_start)) {
2103		- dev_warn(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n",
	1921	+ frag_end_err = true;
	1922	+ dev_dbg(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n",
2104	1923	vt_start, vt_end, frag_end);
2105		- return -EINVAL;
2106	1924	}
2107		-
2108		- renderpasses_in_flight = vt_start - frag_end;
2109		- pending_frag_count = vt_end - frag_end;
	1925	+ if (frag_end_err) {
	1926	+ renderpasses_in_flight = 1;
	1927	+ pending_frag_count = 1;
	1928	+ } else {
	1929	+ renderpasses_in_flight = vt_start - frag_end;
	1930	+ pending_frag_count = vt_end - frag_end;
	1931	+ }
2110	1932
2111	1933	err = kbase_csf_tiler_heap_alloc_new_chunk(kctx,
2112	1934	gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr);
2113	1935
2114		- /* It is okay to acknowledge with a NULL chunk (firmware will then wait
2115		- * for the fragment jobs to complete and release chunks)
2116		- */
2117		- if (err == -EBUSY)
	1936	+ if ((group->csi_handlers & BASE_CSF_TILER_OOM_EXCEPTION_FLAG) &&
	1937	+ (pending_frag_count == 0) && (err == -ENOMEM \|\| err == -EBUSY)) {
	1938	+ /* The group allows incremental rendering, trigger it */
2118	1939	new_chunk_ptr = 0;
2119		- else if (err)
	1940	+ dev_dbg(kctx->kbdev->dev, "Group-%d (slot-%d) enter incremental render\n",
	1941	+ group->handle, group->csg_nr);
	1942	+ } else if (err == -EBUSY) {
	1943	+ /* Acknowledge with a NULL chunk (firmware will then wait for
	1944	+ * the fragment jobs to complete and release chunks)
	1945	+ */
	1946	+ new_chunk_ptr = 0;
	1947	+ } else if (err)
2120	1948	return err;
2121	1949
2122	1950	kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO,
..	..	@@ -2149,8 +1977,40 @@
2149	1977	BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
2150	1978	} } } };
2151	1979
2152		- add_error(group->kctx, &group->error_tiler_oom, &error);
	1980	+ kbase_csf_event_add_error(group->kctx,
	1981	+ &group->error_tiler_oom,
	1982	+ &error);
2153	1983	kbase_event_wakeup(group->kctx);
	1984	+}
	1985	+
	1986	+static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev)
	1987	+{
	1988	+ int err;
	1989	+ const unsigned int cache_flush_wait_timeout_ms = 2000;
	1990	+
	1991	+ kbase_pm_lock(kbdev);
	1992	+ /* With the advent of partial cache flush, dirty cache lines could
	1993	+ * be left in the GPU L2 caches by terminating the queue group here
	1994	+ * without waiting for proper cache maintenance. A full cache flush
	1995	+ * here will prevent these dirty cache lines from being arbitrarily
	1996	+ * evicted later and possible causing memory corruption.
	1997	+ */
	1998	+ if (kbdev->pm.backend.gpu_powered) {
	1999	+ kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
	2000	+ err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms);
	2001	+
	2002	+ if (err) {
	2003	+ dev_warn(
	2004	+ kbdev->dev,
	2005	+ "[%llu] Timeout waiting for cache clean to complete after fatal error",
	2006	+ kbase_backend_get_cycle_cnt(kbdev));
	2007	+
	2008	+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
	2009	+ kbase_reset_gpu(kbdev);
	2010	+ }
	2011	+ }
	2012	+
	2013	+ kbase_pm_unlock(kbdev);
2154	2014	}
2155	2015
2156	2016	/**
..	..	@@ -2165,8 +2025,8 @@
2165	2025	* notification to allow the firmware to report out-of-memory again in future.
2166	2026	* If the out-of-memory condition was successfully handled then this function
2167	2027	* rings the relevant doorbell to notify the firmware; otherwise, it terminates
2168		- * the GPU command queue group to which the queue is bound. See
2169		- * term_queue_group() for details.
	2028	+ * the GPU command queue group to which the queue is bound and notify a waiting
	2029	+ * user space client of the failure.
2170	2030	*/
2171	2031	static void kbase_queue_oom_event(struct kbase_queue *const queue)
2172	2032	{
..	..	@@ -2178,6 +2038,7 @@
2178	2038	struct kbase_csf_cmd_stream_info const *stream;
2179	2039	int csi_index = queue->csi_index;
2180	2040	u32 cs_oom_ack, cs_oom_req;
	2041	+ unsigned long flags;
2181	2042
2182	2043	lockdep_assert_held(&kctx->csf.lock);
2183	2044
..	..	@@ -2221,22 +2082,25 @@
2221	2082	if (cs_oom_ack == cs_oom_req)
2222	2083	goto unlock;
2223	2084
2224		- err = handle_oom_event(kctx, stream);
	2085	+ err = handle_oom_event(group, stream);
2225	2086
	2087	+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
2226	2088	kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack,
2227	2089	CS_REQ_TILER_OOM_MASK);
	2090	+ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
	2091	+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
2228	2092
2229		- if (err) {
	2093	+ if (unlikely(err)) {
2230	2094	dev_warn(
2231	2095	kbdev->dev,
2232	2096	"Queue group to be terminated, couldn't handle the OoM event\n");
	2097	+ kbase_debug_csf_fault_notify(kbdev, kctx, DF_TILER_OOM);
2233	2098	kbase_csf_scheduler_unlock(kbdev);
2234	2099	term_queue_group(group);
	2100	+ flush_gpu_cache_on_fatal_error(kbdev);
2235	2101	report_tiler_oom_error(group);
2236	2102	return;
2237	2103	}
2238		-
2239		- kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
2240	2104	unlock:
2241	2105	kbase_csf_scheduler_unlock(kbdev);
2242	2106	}
..	..	@@ -2258,6 +2122,7 @@
2258	2122	struct kbase_device *const kbdev = kctx->kbdev;
2259	2123
2260	2124	int err = kbase_reset_gpu_try_prevent(kbdev);
	2125	+
2261	2126	/* Regardless of whether reset failed or is currently happening, exit
2262	2127	* early
2263	2128	*/
..	..	@@ -2294,7 +2159,7 @@
2294	2159	"Notify the event notification thread, forward progress timeout (%llu cycles)\n",
2295	2160	kbase_csf_timeout_get(group->kctx->kbdev));
2296	2161
2297		- add_error(group->kctx, &group->error_timeout, &error);
	2162	+ kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error);
2298	2163	kbase_event_wakeup(group->kctx);
2299	2164	}
2300	2165
..	..	@@ -2310,12 +2175,13 @@
2310	2175	struct kbase_queue_group *const group =
2311	2176	container_of(data, struct kbase_queue_group, timer_event_work);
2312	2177	struct kbase_context *const kctx = group->kctx;
	2178	+ struct kbase_device *const kbdev = kctx->kbdev;
2313	2179	bool reset_prevented = false;
2314		- int err = kbase_reset_gpu_prevent_and_wait(kctx->kbdev);
	2180	+ int err = kbase_reset_gpu_prevent_and_wait(kbdev);
2315	2181
2316	2182	if (err)
2317	2183	dev_warn(
2318		- kctx->kbdev->dev,
	2184	+ kbdev->dev,
2319	2185	"Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless",
2320	2186	group->handle);
2321	2187	else
..	..	@@ -2324,11 +2190,12 @@
2324	2190	mutex_lock(&kctx->csf.lock);
2325	2191
2326	2192	term_queue_group(group);
	2193	+ flush_gpu_cache_on_fatal_error(kbdev);
2327	2194	report_group_timeout_error(group);
2328	2195
2329	2196	mutex_unlock(&kctx->csf.lock);
2330	2197	if (reset_prevented)
2331		- kbase_reset_gpu_allow(kctx->kbdev);
	2198	+ kbase_reset_gpu_allow(kbdev);
2332	2199	}
2333	2200
2334	2201	/**
..	..	@@ -2336,12 +2203,91 @@
2336	2203	*
2337	2204	* @group: Pointer to GPU queue group for which the timeout event is received.
2338	2205	*
	2206	+ * Notify a waiting user space client of the timeout.
2339	2207	* Enqueue a work item to terminate the group and notify the event notification
2340	2208	* thread of progress timeout fault for the GPU command queue group.
2341	2209	*/
2342	2210	static void handle_progress_timer_event(struct kbase_queue_group *const group)
2343	2211	{
	2212	+ kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx,
	2213	+ DF_PROGRESS_TIMER_TIMEOUT);
	2214	+
2344	2215	queue_work(group->kctx->csf.wq, &group->timer_event_work);
	2216	+}
	2217	+
	2218	+/**
	2219	+ * alloc_grp_protected_suspend_buffer_pages() - Allocate physical pages from the protected
	2220	+ * memory for the protected mode suspend buffer.
	2221	+ * @group: Pointer to the GPU queue group.
	2222	+ *
	2223	+ * Return: 0 if suspend buffer allocation is successful or if its already allocated, otherwise
	2224	+ * negative error value.
	2225	+ */
	2226	+static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *const group)
	2227	+{
	2228	+ struct kbase_device *const kbdev = group->kctx->kbdev;
	2229	+ struct kbase_context *kctx = group->kctx;
	2230	+ struct tagged_addr *phys = NULL;
	2231	+ struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
	2232	+ size_t nr_pages;
	2233	+ int err = 0;
	2234	+
	2235	+ if (likely(sbuf->pma))
	2236	+ return 0;
	2237	+
	2238	+ nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
	2239	+ phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
	2240	+ if (unlikely(!phys)) {
	2241	+ err = -ENOMEM;
	2242	+ goto phys_free;
	2243	+ }
	2244	+
	2245	+ mutex_lock(&kctx->csf.lock);
	2246	+ kbase_csf_scheduler_lock(kbdev);
	2247	+
	2248	+ if (unlikely(!group->csg_reg)) {
	2249	+ /* The only chance of the bound csg_reg is removed from the group is
	2250	+ * that it has been put off slot by the scheduler and the csg_reg resource
	2251	+ * is contended by other groups. In this case, it needs another occasion for
	2252	+ * mapping the pma, which needs a bound csg_reg. Since the group is already
	2253	+ * off-slot, returning no error is harmless as the scheduler, when place the
	2254	+ * group back on-slot again would do the required MMU map operation on the
	2255	+ * allocated and retained pma.
	2256	+ */
	2257	+ WARN_ON(group->csg_nr >= 0);
	2258	+ dev_dbg(kbdev->dev, "No bound csg_reg for group_%d_%d_%d to enter protected mode",
	2259	+ group->kctx->tgid, group->kctx->id, group->handle);
	2260	+ goto unlock;
	2261	+ }
	2262	+
	2263	+ /* Allocate the protected mode pages */
	2264	+ sbuf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true);
	2265	+ if (unlikely(!sbuf->pma)) {
	2266	+ err = -ENOMEM;
	2267	+ goto unlock;
	2268	+ }
	2269	+
	2270	+ /* Map the bound susp_reg to the just allocated pma pages */
	2271	+ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
	2272	+
	2273	+unlock:
	2274	+ kbase_csf_scheduler_unlock(kbdev);
	2275	+ mutex_unlock(&kctx->csf.lock);
	2276	+phys_free:
	2277	+ kfree(phys);
	2278	+ return err;
	2279	+}
	2280	+
	2281	+static void report_group_fatal_error(struct kbase_queue_group *const group)
	2282	+{
	2283	+ struct base_gpu_queue_group_error const
	2284	+ err_payload = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
	2285	+ .payload = { .fatal_group = {
	2286	+ .status = GPU_EXCEPTION_TYPE_SW_FAULT_0,
	2287	+ } } };
	2288	+
	2289	+ kbase_csf_add_group_fatal_error(group, &err_payload);
	2290	+ kbase_event_wakeup(group->kctx);
2345	2291	}
2346	2292
2347	2293	/**
..	..	@@ -2356,53 +2302,48 @@
2356	2302	{
2357	2303	struct kbase_queue_group *const group =
2358	2304	container_of(data, struct kbase_queue_group, protm_event_work);
	2305	+ struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
	2306	+ int err = 0;
2359	2307
2360		- KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_BEGIN,
	2308	+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START,
2361	2309	group, 0u);
2362		- kbase_csf_scheduler_group_protm_enter(group);
	2310	+
	2311	+ err = alloc_grp_protected_suspend_buffer_pages(group);
	2312	+ if (!err) {
	2313	+ kbase_csf_scheduler_group_protm_enter(group);
	2314	+ } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) {
	2315	+ sbuf->alloc_retries++;
	2316	+ /* try again to allocate pages */
	2317	+ queue_work(group->kctx->csf.wq, &group->protm_event_work);
	2318	+ } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES \|\| err != -ENOMEM) {
	2319	+ dev_err(group->kctx->kbdev->dev,
	2320	+ "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d",
	2321	+ group->handle, group->kctx->tgid, group->kctx->id);
	2322	+ report_group_fatal_error(group);
	2323	+ }
	2324	+
2363	2325	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END,
2364	2326	group, 0u);
2365		-}
2366		-
2367		-static void report_queue_fatal_error(struct kbase_queue *const queue,
2368		- u32 cs_fatal, u64 cs_fatal_info,
2369		- u8 group_handle)
2370		-{
2371		- struct base_csf_notification error =
2372		- { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
2373		- .payload = {
2374		- .csg_error = {
2375		- .handle = group_handle,
2376		- .error = {
2377		- .error_type =
2378		- BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
2379		- .payload = {
2380		- .fatal_queue = {
2381		- .sideband =
2382		- cs_fatal_info,
2383		- .status = cs_fatal,
2384		- .csi_index =
2385		- queue->csi_index,
2386		- } } } } } };
2387		-
2388		- add_error(queue->kctx, &queue->error, &error);
2389		- kbase_event_wakeup(queue->kctx);
2390	2327	}
2391	2328
2392	2329	/**
2393	2330	* handle_fault_event - Handler for CS fault.
2394	2331	*
2395	2332	* @queue: Pointer to queue for which fault event was received.
2396		- * @stream: Pointer to the structure containing info provided by the
2397		- * firmware about the CSI.
	2333	+ * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
	2334	+ * the queue.
2398	2335	*
2399		- * Prints meaningful CS fault information.
2400		- *
	2336	+ * Print required information about the CS fault and notify the user space client
	2337	+ * about the fault.
2401	2338	*/
2402	2339	static void
2403		-handle_fault_event(struct kbase_queue *const queue,
2404		- struct kbase_csf_cmd_stream_info const *const stream)
	2340	+handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack)
2405	2341	{
	2342	+ struct kbase_device *const kbdev = queue->kctx->kbdev;
	2343	+ struct kbase_csf_cmd_stream_group_info const *ginfo =
	2344	+ &kbdev->csf.global_iface.groups[queue->group->csg_nr];
	2345	+ struct kbase_csf_cmd_stream_info const *stream =
	2346	+ &ginfo->streams[queue->csi_index];
2406	2347	const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT);
2407	2348	const u64 cs_fault_info =
2408	2349	kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) \|
..	..	@@ -2414,7 +2355,6 @@
2414	2355	CS_FAULT_EXCEPTION_DATA_GET(cs_fault);
2415	2356	const u64 cs_fault_info_exception_data =
2416	2357	CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info);
2417		- struct kbase_device *const kbdev = queue->kctx->kbdev;
2418	2358
2419	2359	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2420	2360
..	..	@@ -2429,29 +2369,86 @@
2429	2369	kbase_gpu_exception_name(cs_fault_exception_type),
2430	2370	cs_fault_exception_data, cs_fault_info_exception_data);
2431	2371
2432		- if (cs_fault_exception_type ==
2433		- CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT)
2434		- report_queue_fatal_error(queue, GPU_EXCEPTION_TYPE_SW_FAULT_2,
2435		- 0, queue->group->handle);
	2372	+
	2373	+#if IS_ENABLED(CONFIG_DEBUG_FS)
	2374	+ /* CS_RESOURCE_TERMINATED type fault event can be ignored from the
	2375	+ * standpoint of dump on error. It is used to report fault for the CSIs
	2376	+ * that are associated with the same CSG as the CSI for which the actual
	2377	+ * fault was reported by the Iterator.
	2378	+ * Dumping would be triggered when the actual fault is reported.
	2379	+ *
	2380	+ * CS_INHERIT_FAULT can also be ignored. It could happen due to the error
	2381	+ * in other types of queues (cpu/kcpu). If a fault had occurred in some
	2382	+ * other GPU queue then the dump would have been performed anyways when
	2383	+ * that fault was reported.
	2384	+ */
	2385	+ if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) &&
	2386	+ (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) {
	2387	+ if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) {
	2388	+ get_queue(queue);
	2389	+ queue->cs_error = cs_fault;
	2390	+ queue->cs_error_info = cs_fault_info;
	2391	+ queue->cs_error_fatal = false;
	2392	+ if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
	2393	+ release_queue(queue);
	2394	+ return;
	2395	+ }
	2396	+ }
	2397	+#endif
	2398	+
	2399	+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
	2400	+ CS_REQ_FAULT_MASK);
	2401	+ kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true);
	2402	+}
	2403	+
	2404	+static void report_queue_fatal_error(struct kbase_queue *const queue,
	2405	+ u32 cs_fatal, u64 cs_fatal_info,
	2406	+ u8 group_handle)
	2407	+{
	2408	+ struct base_csf_notification error = {
	2409	+ .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
	2410	+ .payload = {
	2411	+ .csg_error = {
	2412	+ .handle = group_handle,
	2413	+ .error = {
	2414	+ .error_type =
	2415	+ BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
	2416	+ .payload = {
	2417	+ .fatal_queue = {
	2418	+ .sideband = cs_fatal_info,
	2419	+ .status = cs_fatal,
	2420	+ .csi_index = queue->csi_index,
	2421	+ }
	2422	+ }
	2423	+ }
	2424	+ }
	2425	+ }
	2426	+ };
	2427	+
	2428	+ kbase_csf_event_add_error(queue->kctx, &queue->error, &error);
	2429	+ kbase_event_wakeup(queue->kctx);
2436	2430	}
2437	2431
2438	2432	/**
2439		- * fatal_event_worker - Handle the fatal error for the GPU queue
	2433	+ * fatal_event_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue
2440	2434	*
2441	2435	* @data: Pointer to a work_struct embedded in GPU command queue.
2442	2436	*
2443	2437	* Terminate the CSG and report the error to userspace.
2444	2438	*/
2445		-static void fatal_event_worker(struct work_struct *const data)
	2439	+static void cs_error_worker(struct work_struct *const data)
2446	2440	{
2447	2441	struct kbase_queue *const queue =
2448		- container_of(data, struct kbase_queue, fatal_event_work);
	2442	+ container_of(data, struct kbase_queue, cs_error_work);
2449	2443	struct kbase_context *const kctx = queue->kctx;
2450	2444	struct kbase_device *const kbdev = kctx->kbdev;
2451	2445	struct kbase_queue_group *group;
2452	2446	u8 group_handle;
2453	2447	bool reset_prevented = false;
2454		- int err = kbase_reset_gpu_prevent_and_wait(kbdev);
	2448	+ int err;
	2449	+
	2450	+ kbase_debug_csf_fault_wait_completion(kbdev);
	2451	+ err = kbase_reset_gpu_prevent_and_wait(kbdev);
2455	2452
2456	2453	if (err)
2457	2454	dev_warn(
..	..	@@ -2468,9 +2465,35 @@
2468	2465	goto unlock;
2469	2466	}
2470	2467
	2468	+#if IS_ENABLED(CONFIG_DEBUG_FS)
	2469	+ if (!queue->cs_error_fatal) {
	2470	+ unsigned long flags;
	2471	+ int slot_num;
	2472	+
	2473	+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
	2474	+ slot_num = kbase_csf_scheduler_group_get_slot_locked(group);
	2475	+ if (slot_num >= 0) {
	2476	+ struct kbase_csf_cmd_stream_group_info const *ginfo =
	2477	+ &kbdev->csf.global_iface.groups[slot_num];
	2478	+ struct kbase_csf_cmd_stream_info const *stream =
	2479	+ &ginfo->streams[queue->csi_index];
	2480	+ u32 const cs_ack =
	2481	+ kbase_csf_firmware_cs_output(stream, CS_ACK);
	2482	+
	2483	+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
	2484	+ CS_REQ_FAULT_MASK);
	2485	+ kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index,
	2486	+ slot_num, true);
	2487	+ }
	2488	+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
	2489	+ goto unlock;
	2490	+ }
	2491	+#endif
	2492	+
2471	2493	group_handle = group->handle;
2472	2494	term_queue_group(group);
2473		- report_queue_fatal_error(queue, queue->cs_fatal, queue->cs_fatal_info,
	2495	+ flush_gpu_cache_on_fatal_error(kbdev);
	2496	+ report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info,
2474	2497	group_handle);
2475	2498
2476	2499	unlock:
..	..	@@ -2486,14 +2509,18 @@
2486	2509	* @queue: Pointer to queue for which fatal event was received.
2487	2510	* @stream: Pointer to the structure containing info provided by the
2488	2511	* firmware about the CSI.
	2512	+ * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
	2513	+ * the queue.
2489	2514	*
2490		- * Prints meaningful CS fatal information.
	2515	+ * Notify a waiting user space client of the CS fatal and prints meaningful
	2516	+ * information.
2491	2517	* Enqueue a work item to terminate the group and report the fatal error
2492	2518	* to user space.
2493	2519	*/
2494	2520	static void
2495	2521	handle_fatal_event(struct kbase_queue *const queue,
2496		- struct kbase_csf_cmd_stream_info const *const stream)
	2522	+ struct kbase_csf_cmd_stream_info const *const stream,
	2523	+ u32 cs_ack)
2497	2524	{
2498	2525	const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL);
2499	2526	const u64 cs_fatal_info =
..	..	@@ -2523,51 +2550,26 @@
2523	2550
2524	2551	if (cs_fatal_exception_type ==
2525	2552	CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) {
	2553	+ kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_FW_INTERNAL_ERROR);
2526	2554	queue_work(system_wq, &kbdev->csf.fw_error_work);
2527	2555	} else {
	2556	+ kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FATAL);
	2557	+ if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE) {
	2558	+ queue->group->cs_unrecoverable = true;
	2559	+ if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE))
	2560	+ kbase_reset_gpu(queue->kctx->kbdev);
	2561	+ }
2528	2562	get_queue(queue);
2529		- queue->cs_fatal = cs_fatal;
2530		- queue->cs_fatal_info = cs_fatal_info;
2531		- if (!queue_work(queue->kctx->csf.wq, &queue->fatal_event_work))
	2563	+ queue->cs_error = cs_fatal;
	2564	+ queue->cs_error_info = cs_fatal_info;
	2565	+ queue->cs_error_fatal = true;
	2566	+ if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
2532	2567	release_queue(queue);
2533	2568	}
2534		-}
2535	2569
2536		-/**
2537		- * handle_queue_exception_event - Handler for CS fatal/fault exception events.
2538		- *
2539		- * @queue: Pointer to queue for which fatal/fault event was received.
2540		- * @cs_req: Value of the CS_REQ register from the CS's input page.
2541		- * @cs_ack: Value of the CS_ACK register from the CS's output page.
2542		- */
2543		-static void handle_queue_exception_event(struct kbase_queue *const queue,
2544		- const u32 cs_req, const u32 cs_ack)
2545		-{
2546		- struct kbase_csf_cmd_stream_group_info const *ginfo;
2547		- struct kbase_csf_cmd_stream_info const *stream;
2548		- struct kbase_context *const kctx = queue->kctx;
2549		- struct kbase_device *const kbdev = kctx->kbdev;
2550		- struct kbase_queue_group *group = queue->group;
2551		- int csi_index = queue->csi_index;
2552		- int slot_num = group->csg_nr;
	2570	+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
	2571	+ CS_REQ_FATAL_MASK);
2553	2572
2554		- kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2555		-
2556		- ginfo = &kbdev->csf.global_iface.groups[slot_num];
2557		- stream = &ginfo->streams[csi_index];
2558		-
2559		- if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
2560		- handle_fatal_event(queue, stream);
2561		- kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
2562		- CS_REQ_FATAL_MASK);
2563		- }
2564		-
2565		- if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
2566		- handle_fault_event(queue, stream);
2567		- kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
2568		- CS_REQ_FAULT_MASK);
2569		- kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
2570		- }
2571	2573	}
2572	2574
2573	2575	/**
..	..	@@ -2577,6 +2579,9 @@
2577	2579	* @ginfo: The CSG interface provided by the firmware.
2578	2580	* @irqreq: CSG's IRQ request bitmask (one bit per CS).
2579	2581	* @irqack: CSG's IRQ acknowledge bitmask (one bit per CS).
	2582	+ * @track: Pointer that tracks the highest scanout priority idle CSG
	2583	+ * and any newly potentially viable protected mode requesting
	2584	+ * CSG in current IRQ context.
2580	2585	*
2581	2586	* If the interrupt request bitmask differs from the acknowledge bitmask
2582	2587	* then the firmware is notifying the host of an event concerning those
..	..	@@ -2585,8 +2590,9 @@
2585	2590	* the request and acknowledge registers for the individual CS(s).
2586	2591	*/
2587	2592	static void process_cs_interrupts(struct kbase_queue_group *const group,
2588		- struct kbase_csf_cmd_stream_group_info const *const ginfo,
2589		- u32 const irqreq, u32 const irqack)
	2593	+ struct kbase_csf_cmd_stream_group_info const *const ginfo,
	2594	+ u32 const irqreq, u32 const irqack,
	2595	+ struct irq_idle_and_protm_track *track)
2590	2596	{
2591	2597	struct kbase_device *const kbdev = group->kctx->kbdev;
2592	2598	u32 remaining = irqreq ^ irqack;
..	..	@@ -2616,10 +2622,16 @@
2616	2622	kbase_csf_firmware_cs_output(stream, CS_ACK);
2617	2623	struct workqueue_struct *wq = group->kctx->csf.wq;
2618	2624
2619		- if ((cs_req & CS_REQ_EXCEPTION_MASK) ^
2620		- (cs_ack & CS_ACK_EXCEPTION_MASK)) {
2621		- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_FAULT_INTERRUPT, group, queue, cs_req ^ cs_ack);
2622		- handle_queue_exception_event(queue, cs_req, cs_ack);
	2625	+ if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
	2626	+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
	2627	+ group, queue, cs_req ^ cs_ack);
	2628	+ handle_fatal_event(queue, stream, cs_ack);
	2629	+ }
	2630	+
	2631	+ if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
	2632	+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
	2633	+ group, queue, cs_req ^ cs_ack);
	2634	+ handle_fault_event(queue, cs_ack);
2623	2635	}
2624	2636
2625	2637	/* PROTM_PEND and TILER_OOM can be safely ignored
..	..	@@ -2630,30 +2642,37 @@
2630	2642	u32 const cs_req_remain = cs_req & ~CS_REQ_EXCEPTION_MASK;
2631	2643	u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK;
2632	2644
2633		- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND,
2634		- group, queue, cs_req_remain ^ cs_ack_remain);
	2645	+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev,
	2646	+ CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED,
	2647	+ group, queue,
	2648	+ cs_req_remain ^ cs_ack_remain);
2635	2649	continue;
2636	2650	}
2637	2651
2638	2652	if (((cs_req & CS_REQ_TILER_OOM_MASK) ^
2639	2653	(cs_ack & CS_ACK_TILER_OOM_MASK))) {
2640	2654	get_queue(queue);
2641		- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_TILER_OOM_INTERRUPT, group, queue,
2642		- cs_req ^ cs_ack);
2643		- if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) {
	2655	+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM,
	2656	+ group, queue, cs_req ^ cs_ack);
	2657	+ if (!queue_work(wq, &queue->oom_event_work)) {
2644	2658	/* The work item shall not have been
2645	2659	* already queued, there can be only
2646	2660	* one pending OoM event for a
2647	2661	* queue.
2648	2662	*/
	2663	+ dev_warn(
	2664	+ kbdev->dev,
	2665	+ "Tiler OOM work pending: queue %d group %d (ctx %d_%d)",
	2666	+ queue->csi_index, group->handle, queue->kctx->tgid,
	2667	+ queue->kctx->id);
2649	2668	release_queue(queue);
2650	2669	}
2651	2670	}
2652	2671
2653	2672	if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^
2654	2673	(cs_ack & CS_ACK_PROTM_PEND_MASK)) {
2655		- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_INTERRUPT, group, queue,
2656		- cs_req ^ cs_ack);
	2674	+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_PROTM_PEND,
	2675	+ group, queue, cs_req ^ cs_ack);
2657	2676
2658	2677	dev_dbg(kbdev->dev,
2659	2678	"Protected mode entry request for queue on csi %d bound to group-%d on slot %d",
..	..	@@ -2661,15 +2680,34 @@
2661	2680	group->csg_nr);
2662	2681
2663	2682	bitmap_set(group->protm_pending_bitmap, i, 1);
2664		- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_SET, group, queue,
	2683	+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_SET, group, queue,
2665	2684	group->protm_pending_bitmap[0]);
2666	2685	protm_pend = true;
2667	2686	}
2668	2687	}
2669	2688	}
2670	2689
2671		- if (protm_pend)
2672		- queue_work(group->kctx->csf.wq, &group->protm_event_work);
	2690	+ if (protm_pend) {
	2691	+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
	2692	+
	2693	+ if (scheduler->tick_protm_pending_seq > group->scan_seq_num) {
	2694	+ scheduler->tick_protm_pending_seq = group->scan_seq_num;
	2695	+ track->protm_grp = group;
	2696	+ }
	2697	+
	2698	+ if (!group->protected_suspend_buf.pma)
	2699	+ queue_work(group->kctx->csf.wq, &group->protm_event_work);
	2700	+
	2701	+ if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
	2702	+ clear_bit(group->csg_nr,
	2703	+ scheduler->csg_slots_idle_mask);
	2704	+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
	2705	+ scheduler->csg_slots_idle_mask[0]);
	2706	+ dev_dbg(kbdev->dev,
	2707	+ "Group-%d on slot %d de-idled by protm request",
	2708	+ group->handle, group->csg_nr);
	2709	+ }
	2710	+ }
2673	2711	}
2674	2712
2675	2713	/**
..	..	@@ -2677,6 +2715,8 @@
2677	2715	*
2678	2716	* @kbdev: Instance of a GPU platform device that implements a CSF interface.
2679	2717	* @csg_nr: CSG number.
	2718	+ * @track: Pointer that tracks the highest idle CSG and the newly possible viable
	2719	+ * protected mode requesting group, in current IRQ context.
2680	2720	*
2681	2721	* Handles interrupts for a CSG and for CSs within it.
2682	2722	*
..	..	@@ -2687,8 +2727,8 @@
2687	2727	*
2688	2728	* See process_cs_interrupts() for details of per-stream interrupt handling.
2689	2729	*/
2690		-static void process_csg_interrupts(struct kbase_device *const kbdev,
2691		- int const csg_nr)
	2730	+static void process_csg_interrupts(struct kbase_device *const kbdev, int const csg_nr,
	2731	+ struct irq_idle_and_protm_track *track)
2692	2732	{
2693	2733	struct kbase_csf_cmd_stream_group_info *ginfo;
2694	2734	struct kbase_queue_group *group = NULL;
..	..	@@ -2699,8 +2739,6 @@
2699	2739	if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
2700	2740	return;
2701	2741
2702		- KBASE_KTRACE_ADD(kbdev, CSG_INTERRUPT_PROCESS, NULL, csg_nr);
2703		-
2704	2742	ginfo = &kbdev->csf.global_iface.groups[csg_nr];
2705	2743	req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
2706	2744	ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
..	..	@@ -2709,7 +2747,7 @@
2709	2747
2710	2748	/* There may not be any pending CSG/CS interrupts to process */
2711	2749	if ((req == ack) && (irqreq == irqack))
2712		- goto out;
	2750	+ return;
2713	2751
2714	2752	/* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
2715	2753	* examining the CS_ACK & CS_REQ bits. This would ensure that Host
..	..	@@ -2730,21 +2768,30 @@
2730	2768	* slot scheduler spinlock is required.
2731	2769	*/
2732	2770	if (!group)
2733		- goto out;
	2771	+ return;
2734	2772
2735	2773	if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr))
2736		- goto out;
	2774	+ return;
	2775	+
	2776	+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr);
2737	2777
2738	2778	if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
2739	2779	kbase_csf_firmware_csg_input_mask(ginfo,
2740	2780	CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
2741	2781
2742		- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack);
	2782	+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack);
	2783	+
	2784	+ /* SYNC_UPDATE events shall invalidate GPU idle event */
	2785	+ atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true);
	2786	+
2743	2787	kbase_csf_event_signal_cpu_only(group->kctx);
2744	2788	}
2745	2789
2746	2790	if ((req ^ ack) & CSG_REQ_IDLE_MASK) {
2747	2791	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
	2792	+
	2793	+ KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE(
	2794	+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, csg_nr);
2748	2795
2749	2796	kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
2750	2797	CSG_REQ_IDLE_MASK);
..	..	@@ -2752,34 +2799,45 @@
2752	2799	set_bit(csg_nr, scheduler->csg_slots_idle_mask);
2753	2800	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group,
2754	2801	scheduler->csg_slots_idle_mask[0]);
2755		- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_IDLE_INTERRUPT, group, req ^ ack);
	2802	+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_IDLE, group, req ^ ack);
2756	2803	dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n",
2757	2804	group->handle, csg_nr);
2758	2805
2759		- /* Check if the scheduling tick can be advanced */
2760		- if (kbase_csf_scheduler_all_csgs_idle(kbdev) &&
2761		- !scheduler->gpu_idle_fw_timer_enabled) {
2762		- kbase_csf_scheduler_advance_tick_nolock(kbdev);
	2806	+ if (atomic_read(&scheduler->non_idle_offslot_grps)) {
	2807	+ /* If there are non-idle CSGs waiting for a slot, fire
	2808	+ * a tock for a replacement.
	2809	+ */
	2810	+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NON_IDLE_GROUPS,
	2811	+ group, req ^ ack);
	2812	+ kbase_csf_scheduler_invoke_tock(kbdev);
	2813	+ } else {
	2814	+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NO_NON_IDLE_GROUPS,
	2815	+ group, req ^ ack);
	2816	+ }
	2817	+
	2818	+ if (group->scan_seq_num < track->idle_seq) {
	2819	+ track->idle_seq = group->scan_seq_num;
	2820	+ track->idle_slot = csg_nr;
2763	2821	}
2764	2822	}
2765	2823
2766	2824	if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) {
2767	2825	kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
2768		- CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
	2826	+ CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
2769	2827
2770		- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PROGRESS_TIMER_INTERRUPT,
2771		- group, req ^ ack);
2772		- dev_info(kbdev->dev,
2773		- "Timeout notification received for group %u of ctx %d_%d on slot %d\n",
2774		- group->handle, group->kctx->tgid, group->kctx->id, csg_nr);
	2828	+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, group,
	2829	+ req ^ ack);
	2830	+ dev_info(
	2831	+ kbdev->dev,
	2832	+ "[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n",
	2833	+ kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid,
	2834	+ group->kctx->id, csg_nr);
2775	2835
2776	2836	handle_progress_timer_event(group);
2777	2837	}
2778	2838
2779		- process_cs_interrupts(group, ginfo, irqreq, irqack);
	2839	+ process_cs_interrupts(group, ginfo, irqreq, irqack, track);
2780	2840
2781		-out:
2782		- /* group may still be NULL here */
2783	2841	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group,
2784	2842	((u64)req ^ ack) \| (((u64)irqreq ^ irqack) << 32));
2785	2843	}
..	..	@@ -2868,105 +2926,264 @@
2868	2926	}
2869	2927	}
2870	2928
	2929	+/**
	2930	+ * check_protm_enter_req_complete - Check if PROTM_ENTER request completed
	2931	+ *
	2932	+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
	2933	+ * @glb_req: Global request register value.
	2934	+ * @glb_ack: Global acknowledge register value.
	2935	+ *
	2936	+ * This function checks if the PROTM_ENTER Global request had completed and
	2937	+ * appropriately sends notification about the protected mode entry to components
	2938	+ * like IPA, HWC, IPA_CONTROL.
	2939	+ */
	2940	+static inline void check_protm_enter_req_complete(struct kbase_device *kbdev,
	2941	+ u32 glb_req, u32 glb_ack)
	2942	+{
	2943	+ lockdep_assert_held(&kbdev->hwaccess_lock);
	2944	+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
	2945	+
	2946	+ if (likely(!kbdev->csf.scheduler.active_protm_grp))
	2947	+ return;
	2948	+
	2949	+ if (kbdev->protected_mode)
	2950	+ return;
	2951	+
	2952	+ if ((glb_req & GLB_REQ_PROTM_ENTER_MASK) !=
	2953	+ (glb_ack & GLB_REQ_PROTM_ENTER_MASK))
	2954	+ return;
	2955	+
	2956	+ dev_dbg(kbdev->dev, "Protected mode entry interrupt received");
	2957	+
	2958	+ kbdev->protected_mode = true;
	2959	+ kbase_ipa_protection_mode_switch_event(kbdev);
	2960	+ kbase_ipa_control_protm_entered(kbdev);
	2961	+ kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface);
	2962	+}
	2963	+
	2964	+/**
	2965	+ * process_protm_exit - Handle the protected mode exit interrupt
	2966	+ *
	2967	+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
	2968	+ * @glb_ack: Global acknowledge register value.
	2969	+ *
	2970	+ * This function handles the PROTM_EXIT interrupt and sends notification
	2971	+ * about the protected mode exit to components like HWC, IPA_CONTROL.
	2972	+ */
	2973	+static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
	2974	+{
	2975	+ const struct kbase_csf_global_iface *const global_iface =
	2976	+ &kbdev->csf.global_iface;
	2977	+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
	2978	+
	2979	+ lockdep_assert_held(&kbdev->hwaccess_lock);
	2980	+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
	2981	+
	2982	+ dev_dbg(kbdev->dev, "Protected mode exit interrupt received");
	2983	+
	2984	+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_ack,
	2985	+ GLB_REQ_PROTM_EXIT_MASK);
	2986	+
	2987	+ if (likely(scheduler->active_protm_grp)) {
	2988	+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT,
	2989	+ scheduler->active_protm_grp, 0u);
	2990	+ scheduler->active_protm_grp = NULL;
	2991	+ } else {
	2992	+ dev_warn(kbdev->dev, "PROTM_EXIT interrupt after no pmode group");
	2993	+ }
	2994	+
	2995	+ if (!WARN_ON(!kbdev->protected_mode)) {
	2996	+ kbdev->protected_mode = false;
	2997	+ kbase_ipa_control_protm_exited(kbdev);
	2998	+ kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface);
	2999	+ }
	3000	+
	3001	+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
	3002	+ kbase_debug_coresight_csf_enable_pmode_exit(kbdev);
	3003	+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
	3004	+}
	3005	+
	3006	+static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
	3007	+ struct irq_idle_and_protm_track *track)
	3008	+{
	3009	+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
	3010	+ struct kbase_queue_group *group = track->protm_grp;
	3011	+ u32 current_protm_pending_seq = scheduler->tick_protm_pending_seq;
	3012	+
	3013	+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
	3014	+
	3015	+ if (likely(current_protm_pending_seq == KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID))
	3016	+ return;
	3017	+
	3018	+ /* Handle protm from the tracked information */
	3019	+ if (track->idle_seq < current_protm_pending_seq) {
	3020	+ /* If the protm enter was prevented due to groups priority, then fire a tock
	3021	+ * for the scheduler to re-examine the case.
	3022	+ */
	3023	+ dev_dbg(kbdev->dev, "Attempt pending protm from idle slot %d\n", track->idle_slot);
	3024	+ kbase_csf_scheduler_invoke_tock(kbdev);
	3025	+ } else if (group) {
	3026	+ u32 i, num_groups = kbdev->csf.global_iface.group_num;
	3027	+ struct kbase_queue_group *grp;
	3028	+ bool tock_triggered = false;
	3029	+
	3030	+ /* A new protm request, and track->idle_seq is not sufficient, check across
	3031	+ * previously notified idle CSGs in the current tick/tock cycle.
	3032	+ */
	3033	+ for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
	3034	+ if (i == track->idle_slot)
	3035	+ continue;
	3036	+ grp = kbase_csf_scheduler_get_group_on_slot(kbdev, i);
	3037	+ /* If not NULL then the group pointer cannot disappear as the
	3038	+ * scheduler spinlock is held.
	3039	+ */
	3040	+ if (grp == NULL)
	3041	+ continue;
	3042	+
	3043	+ if (grp->scan_seq_num < current_protm_pending_seq) {
	3044	+ tock_triggered = true;
	3045	+ dev_dbg(kbdev->dev,
	3046	+ "Attempt new protm from tick/tock idle slot %d\n", i);
	3047	+ kbase_csf_scheduler_invoke_tock(kbdev);
	3048	+ break;
	3049	+ }
	3050	+ }
	3051	+
	3052	+ if (!tock_triggered) {
	3053	+ dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n",
	3054	+ group->handle, group->csg_nr);
	3055	+ queue_work(group->kctx->csf.wq, &group->protm_event_work);
	3056	+ }
	3057	+ }
	3058	+}
	3059	+
	3060	+static void order_job_irq_clear_with_iface_mem_read(void)
	3061	+{
	3062	+ /* Ensure that write to the JOB_IRQ_CLEAR is ordered with regards to the
	3063	+ * read from interface memory. The ordering is needed considering the way
	3064	+ * FW & Kbase writes to the JOB_IRQ_RAWSTAT and JOB_IRQ_CLEAR registers
	3065	+ * without any synchronization. Without the barrier there is no guarantee
	3066	+ * about the ordering, the write to IRQ_CLEAR can take effect after the read
	3067	+ * from interface memory and that could cause a problem for the scenario where
	3068	+ * FW sends back to back notifications for the same CSG for events like
	3069	+ * SYNC_UPDATE and IDLE, but Kbase gets a single IRQ and observes only the
	3070	+ * first event. Similar thing can happen with glb events like CFG_ALLOC_EN
	3071	+ * acknowledgment and GPU idle notification.
	3072	+ *
	3073	+ * MCU CPU
	3074	+ * --------------- ----------------
	3075	+ * Update interface memory Write to IRQ_CLEAR to clear current IRQ
	3076	+ * <barrier> <barrier>
	3077	+ * Write to IRQ_RAWSTAT to raise new IRQ Read interface memory
	3078	+ */
	3079	+
	3080	+ /* CPU and GPU would be in the same Outer shareable domain */
	3081	+ dmb(osh);
	3082	+}
	3083	+
2871	3084	void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
2872	3085	{
2873		- unsigned long flags;
2874		- u32 remaining = val;
	3086	+ bool deferred_handling_glb_idle_irq = false;
2875	3087
2876	3088	lockdep_assert_held(&kbdev->hwaccess_lock);
2877	3089
2878		- KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val);
2879		- kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
	3090	+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_START, NULL, val);
2880	3091
2881		- if (val & JOB_IRQ_GLOBAL_IF) {
2882		- const struct kbase_csf_global_iface *const global_iface =
2883		- &kbdev->csf.global_iface;
2884		- struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
	3092	+ do {
	3093	+ unsigned long flags;
	3094	+ u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
	3095	+ struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX };
	3096	+ bool glb_idle_irq_received = false;
2885	3097
2886		- kbdev->csf.interrupt_received = true;
2887		- remaining &= ~JOB_IRQ_GLOBAL_IF;
	3098	+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
	3099	+ order_job_irq_clear_with_iface_mem_read();
2888	3100
2889		- if (!kbdev->csf.firmware_reloaded)
2890		- kbase_csf_firmware_reload_completed(kbdev);
2891		- else if (global_iface->output) {
2892		- u32 glb_req, glb_ack;
2893		-
	3101	+ if (csg_interrupts != 0) {
2894	3102	kbase_csf_scheduler_spin_lock(kbdev, &flags);
2895		- glb_req = kbase_csf_firmware_global_input_read(
2896		- global_iface, GLB_REQ);
2897		- glb_ack = kbase_csf_firmware_global_output(
2898		- global_iface, GLB_ACK);
2899		- KBASE_KTRACE_ADD(kbdev, GLB_REQ_ACQ, NULL, glb_req ^ glb_ack);
	3103	+ /* Looping through and track the highest idle and protm groups */
	3104	+ while (csg_interrupts != 0) {
	3105	+ int const csg_nr = ffs(csg_interrupts) - 1;
2900	3106
2901		- if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK) {
2902		- dev_dbg(kbdev->dev, "Protected mode exit interrupt received");
2903		- kbase_csf_firmware_global_input_mask(
2904		- global_iface, GLB_REQ, glb_ack,
2905		- GLB_REQ_PROTM_EXIT_MASK);
2906		- WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(kbdev));
2907		- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM, scheduler->active_protm_grp, 0u);
2908		- scheduler->active_protm_grp = NULL;
2909		- kbdev->protected_mode = false;
2910		- kbase_ipa_control_protm_exited(kbdev);
2911		- kbase_hwcnt_backend_csf_protm_exited(
2912		- &kbdev->hwcnt_gpu_iface);
	3107	+ process_csg_interrupts(kbdev, csg_nr, &track);
	3108	+ csg_interrupts &= ~(1 << csg_nr);
2913	3109	}
2914	3110
2915		- /* Handle IDLE Hysteresis notification event */
2916		- if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
2917		- int non_idle_offslot_grps;
2918		- bool can_suspend_on_idle;
2919		- dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
2920		- kbase_csf_firmware_global_input_mask(
	3111	+ /* Handle protm from the tracked information */
	3112	+ process_tracked_info_for_protm(kbdev, &track);
	3113	+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
	3114	+ }
	3115	+
	3116	+ if (val & JOB_IRQ_GLOBAL_IF) {
	3117	+ const struct kbase_csf_global_iface *const global_iface =
	3118	+ &kbdev->csf.global_iface;
	3119	+
	3120	+ kbdev->csf.interrupt_received = true;
	3121	+
	3122	+ if (!kbdev->csf.firmware_reloaded)
	3123	+ kbase_csf_firmware_reload_completed(kbdev);
	3124	+ else if (global_iface->output) {
	3125	+ u32 glb_req, glb_ack;
	3126	+
	3127	+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
	3128	+ glb_req =
	3129	+ kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
	3130	+ glb_ack = kbase_csf_firmware_global_output(global_iface, GLB_ACK);
	3131	+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL,
	3132	+ glb_req ^ glb_ack);
	3133	+
	3134	+ check_protm_enter_req_complete(kbdev, glb_req, glb_ack);
	3135	+
	3136	+ if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK)
	3137	+ process_protm_exit(kbdev, glb_ack);
	3138	+
	3139	+ /* Handle IDLE Hysteresis notification event */
	3140	+ if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
	3141	+ dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
	3142	+ kbase_csf_firmware_global_input_mask(
2921	3143	global_iface, GLB_REQ, glb_ack,
2922	3144	GLB_REQ_IDLE_EVENT_MASK);
2923	3145
2924		- non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
2925		- can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
2926		- KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
2927		- ((u64)(u32)non_idle_offslot_grps) \| (((u64)can_suspend_on_idle) << 32));
2928		-
2929		- if (!non_idle_offslot_grps) {
2930		- if (can_suspend_on_idle)
2931		- queue_work(system_highpri_wq,
2932		- &scheduler->gpu_idle_work);
2933		- } else {
2934		- /* Advance the scheduling tick to get
2935		- * the non-idle suspended groups loaded
2936		- * soon.
	3146	+ glb_idle_irq_received = true;
	3147	+ /* Defer handling this IRQ to account for a race condition
	3148	+ * where the idle worker could be executed before we have
	3149	+ * finished handling all pending IRQs (including CSG IDLE
	3150	+ * IRQs).
2937	3151	*/
2938		- kbase_csf_scheduler_advance_tick_nolock(
2939		- kbdev);
	3152	+ deferred_handling_glb_idle_irq = true;
2940	3153	}
	3154	+
	3155	+ process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
	3156	+
	3157	+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
	3158	+
	3159	+ /* Invoke the MCU state machine as a state transition
	3160	+ * might have completed.
	3161	+ */
	3162	+ kbase_pm_update_state(kbdev);
2941	3163	}
2942		-
2943		- process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
2944		-
2945		- kbase_csf_scheduler_spin_unlock(kbdev, flags);
2946		-
2947		- /* Invoke the MCU state machine as a state transition
2948		- * might have completed.
2949		- */
2950		- kbase_pm_update_state(kbdev);
2951	3164	}
2952	3165
2953		- if (!remaining) {
2954		- wake_up_all(&kbdev->csf.event_wait);
2955		- KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
2956		- return;
2957		- }
2958		- }
	3166	+ if (!glb_idle_irq_received)
	3167	+ break;
	3168	+ /* Attempt to serve potential IRQs that might have occurred
	3169	+ * whilst handling the previous IRQ. In case we have observed
	3170	+ * the GLB IDLE IRQ without all CSGs having been marked as
	3171	+ * idle, the GPU would be treated as no longer idle and left
	3172	+ * powered on.
	3173	+ */
	3174	+ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
	3175	+ } while (val);
2959	3176
2960		- kbase_csf_scheduler_spin_lock(kbdev, &flags);
2961		- while (remaining != 0) {
2962		- int const csg_nr = ffs(remaining) - 1;
	3177	+ if (deferred_handling_glb_idle_irq) {
	3178	+ unsigned long flags;
2963	3179
2964		- process_csg_interrupts(kbdev, csg_nr);
2965		- remaining &= ~(1 << csg_nr);
	3180	+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
	3181	+ kbase_csf_scheduler_process_gpu_idle_event(kbdev);
	3182	+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
2966	3183	}
2967		- kbase_csf_scheduler_spin_unlock(kbdev, flags);
2968	3184
2969	3185	wake_up_all(&kbdev->csf.event_wait);
	3186	+
2970	3187	KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
2971	3188	}
2972	3189
..	..	@@ -2989,13 +3206,12 @@
2989	3206	struct file *filp;
2990	3207	int ret;
2991	3208
2992		- filp = shmem_file_setup("mali csf", MAX_LFS_FILESIZE, VM_NORESERVE);
	3209	+ filp = shmem_file_setup("mali csf db", MAX_LFS_FILESIZE, VM_NORESERVE);
2993	3210	if (IS_ERR(filp))
2994	3211	return PTR_ERR(filp);
2995	3212
2996		- ret = kbase_mem_pool_alloc_pages(
2997		- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
2998		- 1, &phys, false);
	3213	+ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
	3214	+ false, NULL);
2999	3215
3000	3216	if (ret <= 0) {
3001	3217	fput(filp);
..	..	@@ -3011,30 +3227,34 @@
3011	3227
3012	3228	void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
3013	3229	{
3014		- if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) {
3015		- struct page *page = as_page(kbdev->csf.dummy_user_reg_page);
	3230	+ if (kbdev->csf.user_reg.filp) {
	3231	+ struct page *page = as_page(kbdev->csf.user_reg.dummy_page);
3016	3232
3017		- kbase_mem_pool_free(
3018		- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page,
3019		- false);
	3233	+ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
	3234	+ fput(kbdev->csf.user_reg.filp);
3020	3235	}
3021	3236	}
3022	3237
3023	3238	int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
3024	3239	{
3025	3240	struct tagged_addr phys;
	3241	+ struct file *filp;
3026	3242	struct page *page;
3027	3243	u32 *addr;
3028		- int ret;
3029	3244
3030		- kbdev->csf.dummy_user_reg_page = as_tagged(0);
	3245	+ kbdev->csf.user_reg.filp = NULL;
3031	3246
3032		- ret = kbase_mem_pool_alloc_pages(
3033		- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
3034		- false);
	3247	+ filp = shmem_file_setup("mali csf user_reg", MAX_LFS_FILESIZE, VM_NORESERVE);
	3248	+ if (IS_ERR(filp)) {
	3249	+ dev_err(kbdev->dev, "failed to get an unlinked file for user_reg");
	3250	+ return PTR_ERR(filp);
	3251	+ }
3035	3252
3036		- if (ret <= 0)
3037		- return ret;
	3253	+ if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
	3254	+ false, NULL) <= 0) {
	3255	+ fput(filp);
	3256	+ return -ENOMEM;
	3257	+ }
3038	3258
3039	3259	page = as_page(phys);
3040	3260	addr = kmap_atomic(page);
..	..	@@ -3044,12 +3264,13 @@
3044	3264	*/
3045	3265	addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE;
3046	3266
3047		- kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32),
	3267	+ kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32),
3048	3268	DMA_BIDIRECTIONAL);
3049	3269	kunmap_atomic(addr);
3050	3270
3051		- kbdev->csf.dummy_user_reg_page = phys;
3052		-
	3271	+ kbdev->csf.user_reg.filp = filp;
	3272	+ kbdev->csf.user_reg.dummy_page = phys;
	3273	+ kbdev->csf.user_reg.file_offset = 0;
3053	3274	return 0;
3054	3275	}
3055	3276
..	..	@@ -3066,4 +3287,3 @@
3066	3287
3067	3288	return out_priority;
3068	3289	}
3069		-