forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-11 6778948f9de86c3cfaf36725a7c87dcff9ba247f
kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
....@@ -1,7 +1,7 @@
11 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
22 /*
33 *
4
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
4
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
55 *
66 * This program is free software and is provided to you under the terms of the
77 * GNU General Public License version 2 as published by the Free Software
....@@ -27,34 +27,23 @@
2727 #include <linux/export.h>
2828 #include <linux/priority_control_manager.h>
2929 #include <linux/shmem_fs.h>
30
-#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
30
+#include <csf/mali_kbase_csf_registers.h>
3131 #include "mali_kbase_csf_tiler_heap.h"
3232 #include <mmu/mali_kbase_mmu.h>
3333 #include "mali_kbase_csf_timeout.h"
3434 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
35
+#include <mali_kbase_hwaccess_time.h>
36
+#include "mali_kbase_csf_event.h"
37
+#include <tl/mali_kbase_tracepoints.h>
38
+#include "mali_kbase_csf_mcu_shared_reg.h"
3539
3640 #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
3741 #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
38
-#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
3942
40
-/**
41
- * struct kbase_csf_event - CSF event callback.
42
- *
43
- * This structure belongs to the list of events which is part of a Kbase
44
- * context, and describes a callback function with a custom parameter to pass
45
- * to it when a CSF event is signalled.
46
- *
47
- * @link: Link to the rest of the list.
48
- * @kctx: Pointer to the Kbase context this event belongs to.
49
- * @callback: Callback function to call when a CSF event is signalled.
50
- * @param: Parameter to pass to the callback function.
51
- */
52
-struct kbase_csf_event {
53
- struct list_head link;
54
- struct kbase_context *kctx;
55
- kbase_csf_event_callback *callback;
56
- void *param;
57
-};
43
+#define CS_RING_BUFFER_MAX_SIZE ((uint32_t)(1 << 31)) /* 2GiB */
44
+#define CS_RING_BUFFER_MIN_SIZE ((uint32_t)4096)
45
+
46
+#define PROTM_ALLOC_MAX_RETRIES ((u8)5)
5847
5948 const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = {
6049 KBASE_QUEUE_GROUP_PRIORITY_HIGH,
....@@ -68,6 +57,55 @@
6857 BASE_QUEUE_GROUP_PRIORITY_MEDIUM,
6958 BASE_QUEUE_GROUP_PRIORITY_LOW
7059 };
60
+
61
+/*
62
+ * struct irq_idle_and_protm_track - Object that tracks the idle and protected mode
63
+ * request information in an interrupt case across
64
+ * groups.
65
+ *
66
+ * @protm_grp: Possibly schedulable group that requested protected mode in the interrupt.
67
+ * If NULL, no such case observed in the tracked interrupt case.
68
+ * @idle_seq: The highest priority group that notified idle. If no such instance in the
69
+ * interrupt case, marked with the largest field value: U32_MAX.
70
+ * @idle_slot: The slot number if @p idle_seq is valid in the given tracking case.
71
+ */
72
+struct irq_idle_and_protm_track {
73
+ struct kbase_queue_group *protm_grp;
74
+ u32 idle_seq;
75
+ s8 idle_slot;
76
+};
77
+
78
+/**
79
+ * kbasep_ctx_user_reg_page_mapping_term() - Terminate resources for USER Register Page.
80
+ *
81
+ * @kctx: Pointer to the kbase context
82
+ */
83
+static void kbasep_ctx_user_reg_page_mapping_term(struct kbase_context *kctx)
84
+{
85
+ struct kbase_device *kbdev = kctx->kbdev;
86
+
87
+ if (unlikely(kctx->csf.user_reg.vma))
88
+ dev_err(kbdev->dev, "VMA for USER Register page exist on termination of ctx %d_%d",
89
+ kctx->tgid, kctx->id);
90
+ if (WARN_ON_ONCE(!list_empty(&kctx->csf.user_reg.link)))
91
+ list_del_init(&kctx->csf.user_reg.link);
92
+}
93
+
94
+/**
95
+ * kbasep_ctx_user_reg_page_mapping_init() - Initialize resources for USER Register Page.
96
+ *
97
+ * @kctx: Pointer to the kbase context
98
+ *
99
+ * @return: 0 on success.
100
+ */
101
+static int kbasep_ctx_user_reg_page_mapping_init(struct kbase_context *kctx)
102
+{
103
+ INIT_LIST_HEAD(&kctx->csf.user_reg.link);
104
+ kctx->csf.user_reg.vma = NULL;
105
+ kctx->csf.user_reg.file_offset = 0;
106
+
107
+ return 0;
108
+}
71109
72110 static void put_user_pages_mmap_handle(struct kbase_context *kctx,
73111 struct kbase_queue *queue)
....@@ -129,21 +167,6 @@
129167 return 0;
130168 }
131169
132
-static void gpu_munmap_user_io_pages(struct kbase_context *kctx,
133
- struct kbase_va_region *reg)
134
-{
135
- size_t num_pages = 2;
136
-
137
- kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
138
- reg->start_pfn, num_pages, MCU_AS_NR);
139
-
140
- WARN_ON(reg->flags & KBASE_REG_FREE);
141
-
142
- mutex_lock(&kctx->kbdev->csf.reg_lock);
143
- kbase_remove_va_region(reg);
144
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
145
-}
146
-
147170 static void init_user_io_pages(struct kbase_queue *queue)
148171 {
149172 u32 *input_addr = (u32 *)(queue->user_io_addr);
....@@ -161,80 +184,15 @@
161184 output_addr[CS_ACTIVE/4] = 0;
162185 }
163186
164
-/* Map the input/output pages in the shared interface segment of MCU firmware
165
- * address space.
166
- */
167
-static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
168
- struct tagged_addr *phys, struct kbase_va_region *reg)
169
-{
170
- unsigned long mem_flags = KBASE_REG_GPU_RD;
171
- const size_t num_pages = 2;
172
- int ret;
173
-
174
-#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
175
- ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
176
- (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
177
- mem_flags |=
178
- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
179
-#else
180
- if (kbdev->system_coherency == COHERENCY_NONE) {
181
- mem_flags |=
182
- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
183
- } else {
184
- mem_flags |= KBASE_REG_SHARE_BOTH |
185
- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
186
- }
187
-#endif
188
-
189
- mutex_lock(&kbdev->csf.reg_lock);
190
- ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1);
191
- reg->flags &= ~KBASE_REG_FREE;
192
- mutex_unlock(&kbdev->csf.reg_lock);
193
-
194
- if (ret)
195
- return ret;
196
-
197
- /* Map input page */
198
- ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
199
- reg->start_pfn, &phys[0],
200
- 1, mem_flags, MCU_AS_NR,
201
- KBASE_MEM_GROUP_CSF_IO);
202
- if (ret)
203
- goto bad_insert;
204
-
205
- /* Map output page, it needs rw access */
206
- mem_flags |= KBASE_REG_GPU_WR;
207
- ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
208
- reg->start_pfn + 1, &phys[1],
209
- 1, mem_flags, MCU_AS_NR,
210
- KBASE_MEM_GROUP_CSF_IO);
211
- if (ret)
212
- goto bad_insert_output_page;
213
-
214
- return 0;
215
-
216
-bad_insert_output_page:
217
- kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu,
218
- reg->start_pfn, 1, MCU_AS_NR);
219
-bad_insert:
220
- mutex_lock(&kbdev->csf.reg_lock);
221
- kbase_remove_va_region(reg);
222
- mutex_unlock(&kbdev->csf.reg_lock);
223
-
224
- return ret;
225
-}
226
-
227187 static void kernel_unmap_user_io_pages(struct kbase_context *kctx,
228188 struct kbase_queue *queue)
229189 {
230
- const size_t num_pages = 2;
231
-
232190 kbase_gpu_vm_lock(kctx);
233191
234192 vunmap(queue->user_io_addr);
235193
236
- WARN_ON(num_pages > atomic_read(&kctx->permanent_mapped_pages));
237
- atomic_sub(num_pages, &kctx->permanent_mapped_pages);
194
+ WARN_ON(atomic_read(&kctx->permanent_mapped_pages) < KBASEP_NUM_CS_USER_IO_PAGES);
195
+ atomic_sub(KBASEP_NUM_CS_USER_IO_PAGES, &kctx->permanent_mapped_pages);
238196
239197 kbase_gpu_vm_unlock(kctx);
240198 }
....@@ -244,6 +202,8 @@
244202 {
245203 struct page *page_list[2];
246204 pgprot_t cpu_map_prot;
205
+ unsigned long flags;
206
+ char *user_io_addr;
247207 int ret = 0;
248208 size_t i;
249209
....@@ -258,26 +218,29 @@
258218 /* The pages are mapped to Userspace also, so use the same mapping
259219 * attributes as used inside the CPU page fault handler.
260220 */
261
-#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
262
- ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
263
- (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
264
- cpu_map_prot = pgprot_device(PAGE_KERNEL);
265
-#else
266221 if (kctx->kbdev->system_coherency == COHERENCY_NONE)
267222 cpu_map_prot = pgprot_writecombine(PAGE_KERNEL);
268223 else
269224 cpu_map_prot = PAGE_KERNEL;
270
-#endif
271225
272226 for (i = 0; i < ARRAY_SIZE(page_list); i++)
273227 page_list[i] = as_page(queue->phys[i]);
274228
275
- queue->user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot);
229
+ user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot);
276230
277
- if (!queue->user_io_addr)
231
+ if (!user_io_addr) {
232
+ dev_err(kctx->kbdev->dev,
233
+ "%s(): user_io_addr is NULL, queue: %p",
234
+ __func__,
235
+ queue);
278236 ret = -ENOMEM;
279
- else
237
+ } else {
280238 atomic_add(ARRAY_SIZE(page_list), &kctx->permanent_mapped_pages);
239
+ }
240
+
241
+ kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags);
242
+ queue->user_io_addr = user_io_addr;
243
+ kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
281244
282245 unlock:
283246 kbase_gpu_vm_unlock(kctx);
....@@ -310,70 +273,62 @@
310273 * If an explicit or implicit unbind was missed by the userspace then the
311274 * mapping will persist. On process exit kernel itself will remove the mapping.
312275 */
313
-static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
314
- struct kbase_queue *queue)
276
+void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue)
315277 {
316
- const size_t num_pages = 2;
317
-
318
- gpu_munmap_user_io_pages(kctx, queue->reg);
319278 kernel_unmap_user_io_pages(kctx, queue);
320279
321280 kbase_mem_pool_free_pages(
322281 &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
323
- num_pages, queue->phys, true, false);
282
+ KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, true, false);
283
+ kbase_process_page_usage_dec(kctx, KBASEP_NUM_CS_USER_IO_PAGES);
324284
325
- kfree(queue->reg);
326
- queue->reg = NULL;
285
+ /* The user_io_gpu_va should have been unmapped inside the scheduler */
286
+ WARN_ONCE(queue->user_io_gpu_va, "Userio pages appears still have mapping");
327287
328288 /* If the queue has already been terminated by userspace
329289 * then the ref count for queue object will drop to 0 here.
330290 */
331291 release_queue(queue);
332292 }
293
+KBASE_EXPORT_TEST_API(kbase_csf_free_command_stream_user_pages);
333294
334
-int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
335
- struct kbase_queue *queue)
295
+int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue)
336296 {
337297 struct kbase_device *kbdev = kctx->kbdev;
338
- struct kbase_va_region *reg;
339
- const size_t num_pages = 2;
340298 int ret;
341299
342300 lockdep_assert_held(&kctx->csf.lock);
343301
344
- reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
345
- num_pages, KBASE_REG_ZONE_MCU_SHARED);
346
- if (!reg)
302
+ ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
303
+ KBASEP_NUM_CS_USER_IO_PAGES,
304
+ queue->phys, false, kctx->task);
305
+ if (ret != KBASEP_NUM_CS_USER_IO_PAGES) {
306
+ /* Marking both the phys to zero for indicating there is no phys allocated */
307
+ queue->phys[0].tagged_addr = 0;
308
+ queue->phys[1].tagged_addr = 0;
347309 return -ENOMEM;
348
-
349
- ret = kbase_mem_pool_alloc_pages(
350
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
351
- num_pages, queue->phys, false);
352
-
353
- if (ret != num_pages)
354
- goto phys_alloc_failed;
310
+ }
355311
356312 ret = kernel_map_user_io_pages(kctx, queue);
357313 if (ret)
358314 goto kernel_map_failed;
359315
316
+ kbase_process_page_usage_inc(kctx, KBASEP_NUM_CS_USER_IO_PAGES);
360317 init_user_io_pages(queue);
361318
362
- ret = gpu_mmap_user_io_pages(kctx->kbdev, queue->phys, reg);
363
- if (ret)
364
- goto gpu_mmap_failed;
365
-
366
- queue->reg = reg;
319
+ /* user_io_gpu_va is only mapped when scheduler decides to put the queue
320
+ * on slot at runtime. Initialize it to 0, signalling no mapping.
321
+ */
322
+ queue->user_io_gpu_va = 0;
367323
368324 mutex_lock(&kbdev->csf.reg_lock);
369
- if (kbdev->csf.db_file_offsets >
370
- (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
325
+ if (kbdev->csf.db_file_offsets > (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
371326 kbdev->csf.db_file_offsets = 0;
372327
373328 queue->db_file_offset = kbdev->csf.db_file_offsets;
374329 kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES;
375
-
376
- WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
330
+ WARN(kbase_refcount_read(&queue->refcount) != 1,
331
+ "Incorrect refcounting for queue object\n");
377332 /* This is the second reference taken on the queue object and
378333 * would be dropped only when the IO mapping is removed either
379334 * explicitly by userspace or implicitly by kernel on process exit.
....@@ -384,19 +339,16 @@
384339
385340 return 0;
386341
387
-gpu_mmap_failed:
388
- kernel_unmap_user_io_pages(kctx, queue);
389
-
390342 kernel_map_failed:
391
- kbase_mem_pool_free_pages(
392
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
393
- num_pages, queue->phys, false, false);
343
+ kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
344
+ KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false, false);
345
+ /* Marking both the phys to zero for indicating there is no phys allocated */
346
+ queue->phys[0].tagged_addr = 0;
347
+ queue->phys[1].tagged_addr = 0;
394348
395
-phys_alloc_failed:
396
- kfree(reg);
397
-
398
- return -ENOMEM;
349
+ return ret;
399350 }
351
+KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages);
400352
401353 static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx,
402354 u8 group_handle)
....@@ -413,6 +365,12 @@
413365
414366 return NULL;
415367 }
368
+
369
+struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle)
370
+{
371
+ return find_queue_group(kctx, group_handle);
372
+}
373
+KBASE_EXPORT_TEST_API(kbase_csf_find_queue_group);
416374
417375 int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx,
418376 u8 group_handle)
....@@ -442,25 +400,37 @@
442400
443401 static void get_queue(struct kbase_queue *queue)
444402 {
445
- WARN_ON(!atomic_inc_not_zero(&queue->refcount));
403
+ WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount));
446404 }
447405
448406 static void release_queue(struct kbase_queue *queue)
449407 {
450408 lockdep_assert_held(&queue->kctx->csf.lock);
451
-
452
- WARN_ON(atomic_read(&queue->refcount) <= 0);
453
-
454
- if (atomic_dec_and_test(&queue->refcount)) {
409
+ if (kbase_refcount_dec_and_test(&queue->refcount)) {
455410 /* The queue can't still be on the per context list. */
456411 WARN_ON(!list_empty(&queue->link));
457412 WARN_ON(queue->group);
413
+ dev_dbg(queue->kctx->kbdev->dev,
414
+ "Remove any pending command queue fatal from ctx %d_%d",
415
+ queue->kctx->tgid, queue->kctx->id);
416
+ kbase_csf_event_remove_error(queue->kctx, &queue->error);
417
+
418
+ /* After this the Userspace would be able to free the
419
+ * memory for GPU queue. In case the Userspace missed
420
+ * terminating the queue, the cleanup will happen on
421
+ * context termination where tear down of region tracker
422
+ * would free up the GPU queue memory.
423
+ */
424
+ kbase_gpu_vm_lock(queue->kctx);
425
+ kbase_va_region_no_user_free_dec(queue->queue_reg);
426
+ kbase_gpu_vm_unlock(queue->kctx);
427
+
458428 kfree(queue);
459429 }
460430 }
461431
462432 static void oom_event_worker(struct work_struct *data);
463
-static void fatal_event_worker(struct work_struct *data);
433
+static void cs_error_worker(struct work_struct *data);
464434
465435 /* Between reg and reg_ex, one and only one must be null */
466436 static int csf_queue_register_internal(struct kbase_context *kctx,
....@@ -475,7 +445,7 @@
475445
476446 /* Only one pointer expected, otherwise coding error */
477447 if ((reg == NULL && reg_ex == NULL) || (reg && reg_ex)) {
478
- dev_err(kctx->kbdev->dev,
448
+ dev_dbg(kctx->kbdev->dev,
479449 "Error, one and only one param-ptr expected!");
480450 return -EINVAL;
481451 }
....@@ -508,7 +478,8 @@
508478 region = kbase_region_tracker_find_region_enclosing_address(kctx,
509479 queue_addr);
510480
511
- if (kbase_is_region_invalid_or_free(region)) {
481
+ if (kbase_is_region_invalid_or_free(region) || kbase_is_region_shrinkable(region) ||
482
+ region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
512483 ret = -ENOENT;
513484 goto out_unlock_vm;
514485 }
....@@ -525,24 +496,24 @@
525496 if (reg_ex && reg_ex->ex_buffer_size) {
526497 int buf_pages = (reg_ex->ex_buffer_size +
527498 (1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT;
499
+ struct kbase_va_region *region_ex =
500
+ kbase_region_tracker_find_region_enclosing_address(kctx,
501
+ reg_ex->ex_buffer_base);
528502
529
- region = kbase_region_tracker_find_region_enclosing_address(
530
- kctx, reg_ex->ex_buffer_base);
531
- if (kbase_is_region_invalid_or_free(region)) {
503
+ if (kbase_is_region_invalid_or_free(region_ex)) {
532504 ret = -ENOENT;
533505 goto out_unlock_vm;
534506 }
535507
536
- if (buf_pages > (region->nr_pages -
537
- ((reg_ex->ex_buffer_base >> PAGE_SHIFT) -
538
- region->start_pfn))) {
508
+ if (buf_pages > (region_ex->nr_pages -
509
+ ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - region_ex->start_pfn))) {
539510 ret = -EINVAL;
540511 goto out_unlock_vm;
541512 }
542513
543
- region = kbase_region_tracker_find_region_enclosing_address(
544
- kctx, reg_ex->ex_offset_var_addr);
545
- if (kbase_is_region_invalid_or_free(region)) {
514
+ region_ex = kbase_region_tracker_find_region_enclosing_address(
515
+ kctx, reg_ex->ex_offset_var_addr);
516
+ if (kbase_is_region_invalid_or_free(region_ex)) {
546517 ret = -ENOENT;
547518 goto out_unlock_vm;
548519 }
....@@ -557,13 +528,16 @@
557528
558529 queue->kctx = kctx;
559530 queue->base_addr = queue_addr;
531
+
560532 queue->queue_reg = region;
533
+ kbase_va_region_no_user_free_inc(region);
534
+
561535 queue->size = (queue_size << PAGE_SHIFT);
562536 queue->csi_index = KBASEP_IF_NR_INVALID;
563537 queue->enabled = false;
564538
565539 queue->priority = reg->priority;
566
- atomic_set(&queue->refcount, 1);
540
+ kbase_refcount_set(&queue->refcount, 1);
567541
568542 queue->group = NULL;
569543 queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
....@@ -574,16 +548,24 @@
574548 queue->sync_ptr = 0;
575549 queue->sync_value = 0;
576550
551
+#if IS_ENABLED(CONFIG_DEBUG_FS)
552
+ queue->saved_cmd_ptr = 0;
553
+#endif
554
+
577555 queue->sb_status = 0;
578556 queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
557
+
558
+ atomic_set(&queue->pending, 0);
579559
580560 INIT_LIST_HEAD(&queue->link);
581561 INIT_LIST_HEAD(&queue->error.link);
582562 INIT_WORK(&queue->oom_event_work, oom_event_worker);
583
- INIT_WORK(&queue->fatal_event_work, fatal_event_worker);
563
+ INIT_WORK(&queue->cs_error_work, cs_error_worker);
584564 list_add(&queue->link, &kctx->csf.queue_list);
585565
586
- region->flags |= KBASE_REG_NO_USER_FREE;
566
+ queue->extract_ofs = 0;
567
+
568
+ region->user_data = queue;
587569
588570 /* Initialize the cs_trace configuration parameters, When buffer_size
589571 * is 0, trace is disabled. Here we only update the fields when
....@@ -612,6 +594,13 @@
612594 int kbase_csf_queue_register(struct kbase_context *kctx,
613595 struct kbase_ioctl_cs_queue_register *reg)
614596 {
597
+ /* Validate the ring buffer configuration parameters */
598
+ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE ||
599
+ reg->buffer_size > CS_RING_BUFFER_MAX_SIZE ||
600
+ reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr ||
601
+ reg->buffer_gpu_addr & ~PAGE_MASK)
602
+ return -EINVAL;
603
+
615604 return csf_queue_register_internal(kctx, reg, NULL);
616605 }
617606
....@@ -630,14 +619,21 @@
630619 if (glb_version < kbase_csf_interface_version(1, 1, 0))
631620 return -EINVAL;
632621
633
- /* Validate the cs_trace configuration parameters */
634
- if (reg->ex_buffer_size &&
635
- ((reg->ex_event_size > max_size) ||
636
- (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
637
- (reg->ex_buffer_size < min_buf_size)))
638
- return -EINVAL;
622
+ /* Validate the ring buffer configuration parameters */
623
+ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE ||
624
+ reg->buffer_size > CS_RING_BUFFER_MAX_SIZE ||
625
+ reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr ||
626
+ reg->buffer_gpu_addr & ~PAGE_MASK)
627
+ return -EINVAL;
639628
640
- return csf_queue_register_internal(kctx, NULL, reg);
629
+ /* Validate the cs_trace configuration parameters */
630
+ if (reg->ex_buffer_size &&
631
+ ((reg->ex_event_size > max_size) ||
632
+ (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
633
+ (reg->ex_buffer_size < min_buf_size)))
634
+ return -EINVAL;
635
+
636
+ return csf_queue_register_internal(kctx, NULL, reg);
641637 }
642638
643639 static void unbind_queue(struct kbase_context *kctx,
....@@ -664,8 +660,6 @@
664660 queue = find_queue(kctx, term->buffer_gpu_addr);
665661
666662 if (queue) {
667
- unsigned long flags;
668
-
669663 /* As the GPU queue has been terminated by the
670664 * user space, undo the actions that were performed when the
671665 * queue was registered i.e. remove the queue from the per
....@@ -678,23 +672,9 @@
678672 unbind_queue(kctx, queue);
679673
680674 kbase_gpu_vm_lock(kctx);
681
- if (!WARN_ON(!queue->queue_reg)) {
682
- /* After this the Userspace would be able to free the
683
- * memory for GPU queue. In case the Userspace missed
684
- * terminating the queue, the cleanup will happen on
685
- * context termination where teardown of region tracker
686
- * would free up the GPU queue memory.
687
- */
688
- queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE;
689
- }
675
+ if (!WARN_ON(!queue->queue_reg))
676
+ queue->queue_reg->user_data = NULL;
690677 kbase_gpu_vm_unlock(kctx);
691
-
692
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
693
- dev_dbg(kctx->kbdev->dev,
694
- "Remove any pending command queue fatal from context %pK\n",
695
- (void *)kctx);
696
- list_del_init(&queue->error.link);
697
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
698678
699679 release_queue(queue);
700680 }
....@@ -776,10 +756,69 @@
776756 return group;
777757 }
778758
759
+static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
760
+{
761
+ queue_work(system_highpri_wq, &kctx->csf.pending_submission_work);
762
+}
763
+
764
+/**
765
+ * pending_submission_worker() - Work item to process pending kicked GPU command queues.
766
+ *
767
+ * @work: Pointer to pending_submission_work.
768
+ *
769
+ * This function starts all pending queues, for which the work
770
+ * was previously submitted via ioctl call from application thread.
771
+ * If the queue is already scheduled and resident, it will be started
772
+ * right away, otherwise once the group is made resident.
773
+ */
774
+static void pending_submission_worker(struct work_struct *work)
775
+{
776
+ struct kbase_context *kctx =
777
+ container_of(work, struct kbase_context, csf.pending_submission_work);
778
+ struct kbase_device *kbdev = kctx->kbdev;
779
+ struct kbase_queue *queue;
780
+ int err = kbase_reset_gpu_prevent_and_wait(kbdev);
781
+
782
+ if (err) {
783
+ dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue ");
784
+ return;
785
+ }
786
+
787
+ mutex_lock(&kctx->csf.lock);
788
+
789
+ /* Iterate through the queue list and schedule the pending ones for submission. */
790
+ list_for_each_entry(queue, &kctx->csf.queue_list, link) {
791
+ if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) {
792
+ struct kbase_queue_group *group = get_bound_queue_group(queue);
793
+ int ret;
794
+
795
+ if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) {
796
+ dev_dbg(kbdev->dev, "queue is not bound to a group");
797
+ continue;
798
+ }
799
+
800
+ ret = kbase_csf_scheduler_queue_start(queue);
801
+ if (unlikely(ret)) {
802
+ dev_dbg(kbdev->dev, "Failed to start queue");
803
+ if (ret == -EBUSY) {
804
+ atomic_cmpxchg(&queue->pending, 0, 1);
805
+ enqueue_gpu_submission_work(kctx);
806
+ }
807
+ }
808
+ }
809
+ }
810
+
811
+ mutex_unlock(&kctx->csf.lock);
812
+
813
+ kbase_reset_gpu_allow(kbdev);
814
+}
815
+
779816 void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot)
780817 {
781818 if (WARN_ON(slot < 0))
782819 return;
820
+
821
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
783822
784823 kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot));
785824 }
....@@ -793,8 +832,19 @@
793832 (u32) ((1U << kbdev->csf.global_iface.group_num) - 1);
794833 u32 value;
795834
835
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
836
+
796837 if (WARN_ON(slot_bitmap > allowed_bitmap))
797838 return;
839
+
840
+ /* The access to GLB_DB_REQ/ACK needs to be ordered with respect to CSG_REQ/ACK and
841
+ * CSG_DB_REQ/ACK to avoid a scenario where a CSI request overlaps with a CSG request
842
+ * or 2 CSI requests overlap and FW ends up missing the 2nd request.
843
+ * Memory barrier is required, both on Host and FW side, to guarantee the ordering.
844
+ *
845
+ * 'osh' is used as CPU and GPU would be in the same Outer shareable domain.
846
+ */
847
+ dmb(osh);
798848
799849 value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK);
800850 value ^= slot_bitmap;
....@@ -822,6 +872,8 @@
822872 struct kbase_csf_cmd_stream_group_info *ginfo;
823873 u32 value;
824874
875
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
876
+
825877 if (WARN_ON(csg_nr < 0) ||
826878 WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
827879 return;
....@@ -831,6 +883,14 @@
831883 if (WARN_ON(csi_index < 0) ||
832884 WARN_ON(csi_index >= ginfo->stream_num))
833885 return;
886
+
887
+ /* The access to CSG_DB_REQ/ACK needs to be ordered with respect to
888
+ * CS_REQ/ACK to avoid a scenario where CSG_DB_REQ/ACK becomes visibile to
889
+ * FW before CS_REQ/ACK is set.
890
+ *
891
+ * 'osh' is used as CPU and GPU would be in the same outer shareable domain.
892
+ */
893
+ dmb(osh);
834894
835895 value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK);
836896 value ^= (1 << csi_index);
....@@ -845,36 +905,37 @@
845905 struct kbase_ioctl_cs_queue_kick *kick)
846906 {
847907 struct kbase_device *kbdev = kctx->kbdev;
848
- struct kbase_queue_group *group;
849
- struct kbase_queue *queue;
908
+ bool trigger_submission = false;
909
+ struct kbase_va_region *region;
850910 int err = 0;
851911
852
- err = kbase_reset_gpu_prevent_and_wait(kbdev);
853
- if (err) {
854
- dev_warn(
855
- kbdev->dev,
856
- "Unsuccessful GPU reset detected when kicking queue (buffer_addr=0x%.16llx)",
857
- kick->buffer_gpu_addr);
858
- return err;
859
- }
912
+ KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK(kbdev, kctx->id, kick->buffer_gpu_addr);
860913
861
- mutex_lock(&kctx->csf.lock);
862
- queue = find_queue(kctx, kick->buffer_gpu_addr);
863
- if (!queue)
864
- err = -EINVAL;
914
+ /* GPU work submission happening asynchronously to prevent the contention with
915
+ * scheduler lock and as the result blocking application thread. For this reason,
916
+ * the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr
917
+ * from the context list of active va_regions.
918
+ * Once the target queue is found the pending flag is set to one atomically avoiding
919
+ * a race between submission ioctl thread and the work item.
920
+ */
921
+ kbase_gpu_vm_lock(kctx);
922
+ region = kbase_region_tracker_find_region_enclosing_address(kctx, kick->buffer_gpu_addr);
923
+ if (!kbase_is_region_invalid_or_free(region)) {
924
+ struct kbase_queue *queue = region->user_data;
865925
866
- if (!err) {
867
- group = get_bound_queue_group(queue);
868
- if (!group) {
869
- dev_err(kctx->kbdev->dev, "queue not bound\n");
870
- err = -EINVAL;
926
+ if (queue) {
927
+ atomic_cmpxchg(&queue->pending, 0, 1);
928
+ trigger_submission = true;
871929 }
930
+ } else {
931
+ dev_dbg(kbdev->dev,
932
+ "Attempt to kick GPU queue without a valid command buffer region");
933
+ err = -EFAULT;
872934 }
935
+ kbase_gpu_vm_unlock(kctx);
873936
874
- if (!err)
875
- err = kbase_csf_scheduler_queue_start(queue);
876
- mutex_unlock(&kctx->csf.lock);
877
- kbase_reset_gpu_allow(kbdev);
937
+ if (likely(trigger_submission))
938
+ enqueue_gpu_submission_work(kctx);
878939
879940 return err;
880941 }
....@@ -884,19 +945,23 @@
884945 {
885946 lockdep_assert_held(&kctx->csf.lock);
886947
948
+ if (WARN_ON(queue->csi_index < 0))
949
+ return;
950
+
887951 if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) {
888952 unsigned long flags;
889953
890954 kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags);
891955 bitmap_clear(queue->group->protm_pending_bitmap,
892956 queue->csi_index, 1);
893
- KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, PROTM_PENDING_CLEAR,
957
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, CSI_PROTM_PEND_CLEAR,
894958 queue->group, queue, queue->group->protm_pending_bitmap[0]);
895959 queue->group->bound_queues[queue->csi_index] = NULL;
896960 queue->group = NULL;
897961 kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
898962
899963 put_user_pages_mmap_handle(kctx, queue);
964
+ WARN_ON_ONCE(queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID);
900965 queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
901966 }
902967 }
....@@ -938,7 +1003,16 @@
9381003 }
9391004 }
9401005
941
-void kbase_csf_queue_unbind(struct kbase_queue *queue)
1006
+static bool kbase_csf_queue_phys_allocated(struct kbase_queue *queue)
1007
+{
1008
+ /* The queue's phys are zeroed when allocation fails. Both of them being
1009
+ * zero is an impossible condition for a successful allocated set of phy pages.
1010
+ */
1011
+
1012
+ return (queue->phys[0].tagged_addr | queue->phys[1].tagged_addr);
1013
+}
1014
+
1015
+void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit)
9421016 {
9431017 struct kbase_context *kctx = queue->kctx;
9441018
....@@ -952,7 +1026,7 @@
9521026 * whereas CSG TERM request would result in an immediate abort or
9531027 * cancellation of the pending work.
9541028 */
955
- if (current->flags & PF_EXITING) {
1029
+ if (process_exit) {
9561030 struct kbase_queue_group *group = get_bound_queue_group(queue);
9571031
9581032 if (group)
....@@ -963,8 +1037,8 @@
9631037 unbind_queue(kctx, queue);
9641038 }
9651039
966
- /* Free the resources, if allocated for this queue. */
967
- if (queue->reg)
1040
+ /* Free the resources, if allocated phys for this queue */
1041
+ if (kbase_csf_queue_phys_allocated(queue))
9681042 kbase_csf_free_command_stream_user_pages(kctx, queue);
9691043 }
9701044
....@@ -977,8 +1051,8 @@
9771051 WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND);
9781052 unbind_stopped_queue(kctx, queue);
9791053
980
- /* Free the resources, if allocated for this queue. */
981
- if (queue->reg)
1054
+ /* Free the resources, if allocated phys for this queue */
1055
+ if (kbase_csf_queue_phys_allocated(queue))
9821056 kbase_csf_free_command_stream_user_pages(kctx, queue);
9831057 }
9841058
....@@ -1041,159 +1115,39 @@
10411115 * @kctx: Pointer to kbase context where the queue group is created at
10421116 * @s_buf: Pointer to suspend buffer that is attached to queue group
10431117 *
1044
- * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
1045
- * MMU page table. Otherwise -ENOMEM.
1118
+ * Return: 0 if phy-pages for the suspend buffer is successfully allocated.
1119
+ * Otherwise -ENOMEM or error code.
10461120 */
10471121 static int create_normal_suspend_buffer(struct kbase_context *const kctx,
10481122 struct kbase_normal_suspend_buffer *s_buf)
10491123 {
1050
- struct kbase_va_region *reg = NULL;
1051
- const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
10521124 const size_t nr_pages =
10531125 PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
1054
- int err = 0;
1126
+ int err;
10551127
10561128 lockdep_assert_held(&kctx->csf.lock);
10571129
1058
- /* Allocate and initialize Region Object */
1059
- reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
1060
- nr_pages, KBASE_REG_ZONE_MCU_SHARED);
1061
-
1062
- if (!reg)
1063
- return -ENOMEM;
1130
+ /* The suspend buffer's mapping address is valid only when the CSG is to
1131
+ * run on slot, initializing it 0, signalling the buffer is not mapped.
1132
+ */
1133
+ s_buf->gpu_va = 0;
10641134
10651135 s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL);
10661136
1067
- if (!s_buf->phy) {
1068
- err = -ENOMEM;
1069
- goto phy_alloc_failed;
1070
- }
1071
-
1072
- /* Get physical page for a normal suspend buffer */
1073
- err = kbase_mem_pool_alloc_pages(
1074
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
1075
- nr_pages, &s_buf->phy[0], false);
1076
-
1077
- if (err < 0)
1078
- goto phy_pages_alloc_failed;
1079
-
1080
- /* Insert Region Object into rbtree and make virtual address available
1081
- * to map it to physical page
1082
- */
1083
- mutex_lock(&kctx->kbdev->csf.reg_lock);
1084
- err = kbase_add_va_region_rbtree(kctx->kbdev, reg, 0, nr_pages, 1);
1085
- reg->flags &= ~KBASE_REG_FREE;
1086
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
1087
-
1088
- if (err)
1089
- goto add_va_region_failed;
1090
-
1091
- /* Update MMU table */
1092
- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
1093
- reg->start_pfn, &s_buf->phy[0],
1094
- nr_pages, mem_flags,
1095
- MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW);
1096
- if (err)
1097
- goto mmu_insert_failed;
1098
-
1099
- s_buf->reg = reg;
1100
-
1101
- return 0;
1102
-
1103
-mmu_insert_failed:
1104
- mutex_lock(&kctx->kbdev->csf.reg_lock);
1105
- WARN_ON(kbase_remove_va_region(reg));
1106
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
1107
-
1108
-add_va_region_failed:
1109
- kbase_mem_pool_free_pages(
1110
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
1111
- &s_buf->phy[0], false, false);
1112
-
1113
-phy_pages_alloc_failed:
1114
- kfree(s_buf->phy);
1115
-phy_alloc_failed:
1116
- kfree(reg);
1117
-
1118
- return err;
1119
-}
1120
-
1121
-/**
1122
- * create_protected_suspend_buffer() - Create protected-mode suspend buffer
1123
- * per queue group
1124
- *
1125
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
1126
- * @s_buf: Pointer to suspend buffer that is attached to queue group
1127
- *
1128
- * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
1129
- * MMU page table. Otherwise -ENOMEM.
1130
- */
1131
-static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
1132
- struct kbase_protected_suspend_buffer *s_buf)
1133
-{
1134
- struct kbase_va_region *reg = NULL;
1135
- struct tagged_addr *phys = NULL;
1136
- const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
1137
- const size_t nr_pages =
1138
- PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
1139
- int err = 0;
1140
-
1141
- /* Allocate and initialize Region Object */
1142
- reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
1143
- nr_pages, KBASE_REG_ZONE_MCU_SHARED);
1144
-
1145
- if (!reg)
1137
+ if (!s_buf->phy)
11461138 return -ENOMEM;
11471139
1148
- phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
1149
- if (!phys) {
1150
- err = -ENOMEM;
1151
- goto phy_alloc_failed;
1140
+ /* Get physical page for a normal suspend buffer */
1141
+ err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
1142
+ &s_buf->phy[0], false, kctx->task);
1143
+
1144
+ if (err < 0) {
1145
+ kfree(s_buf->phy);
1146
+ return err;
11521147 }
11531148
1154
- s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys,
1155
- nr_pages);
1156
- if (s_buf->pma == NULL) {
1157
- err = -ENOMEM;
1158
- goto pma_alloc_failed;
1159
- }
1160
-
1161
- /* Insert Region Object into rbtree and make virtual address available
1162
- * to map it to physical page
1163
- */
1164
- mutex_lock(&kbdev->csf.reg_lock);
1165
- err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1);
1166
- reg->flags &= ~KBASE_REG_FREE;
1167
- mutex_unlock(&kbdev->csf.reg_lock);
1168
-
1169
- if (err)
1170
- goto add_va_region_failed;
1171
-
1172
- /* Update MMU table */
1173
- err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
1174
- reg->start_pfn, phys,
1175
- nr_pages, mem_flags, MCU_AS_NR,
1176
- KBASE_MEM_GROUP_CSF_FW);
1177
- if (err)
1178
- goto mmu_insert_failed;
1179
-
1180
- s_buf->reg = reg;
1181
- kfree(phys);
1149
+ kbase_process_page_usage_inc(kctx, nr_pages);
11821150 return 0;
1183
-
1184
-mmu_insert_failed:
1185
- mutex_lock(&kbdev->csf.reg_lock);
1186
- WARN_ON(kbase_remove_va_region(reg));
1187
- mutex_unlock(&kbdev->csf.reg_lock);
1188
-
1189
-add_va_region_failed:
1190
- kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
1191
-pma_alloc_failed:
1192
- kfree(phys);
1193
-phy_alloc_failed:
1194
- kfree(reg);
1195
-
1196
- return err;
11971151 }
11981152
11991153 static void timer_event_worker(struct work_struct *data);
....@@ -1214,26 +1168,17 @@
12141168 static int create_suspend_buffers(struct kbase_context *const kctx,
12151169 struct kbase_queue_group * const group)
12161170 {
1217
- int err = 0;
1218
-
12191171 if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) {
12201172 dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n");
12211173 return -ENOMEM;
12221174 }
12231175
1224
- if (kctx->kbdev->csf.pma_dev) {
1225
- err = create_protected_suspend_buffer(kctx->kbdev,
1226
- &group->protected_suspend_buf);
1227
- if (err) {
1228
- term_normal_suspend_buffer(kctx,
1229
- &group->normal_suspend_buf);
1230
- dev_err(kctx->kbdev->dev, "Failed to create protected suspend buffer\n");
1231
- }
1232
- } else {
1233
- group->protected_suspend_buf.reg = NULL;
1234
- }
1176
+ /* Protected suspend buffer, runtime binding so just initialize it */
1177
+ group->protected_suspend_buf.gpu_va = 0;
1178
+ group->protected_suspend_buf.pma = NULL;
1179
+ group->protected_suspend_buf.alloc_retries = 0;
12351180
1236
- return err;
1181
+ return 0;
12371182 }
12381183
12391184 /**
....@@ -1244,16 +1189,9 @@
12441189 */
12451190 static u32 generate_group_uid(void)
12461191 {
1247
- /* use first KBase device to store max UID */
1248
- struct kbase_device *kbdev = kbase_find_device(-1);
1249
- u32 uid = 1;
1192
+ static atomic_t global_csg_uid = ATOMIC_INIT(0);
12501193
1251
- if (kbdev)
1252
- uid = (u32) atomic_inc_return(&kbdev->group_max_uid_in_devices);
1253
- else
1254
- WARN(1, "NULL kbase device pointer in group UID generation");
1255
-
1256
- return uid;
1194
+ return (u32)atomic_inc_return(&global_csg_uid);
12571195 }
12581196
12591197 /**
....@@ -1272,8 +1210,8 @@
12721210 int group_handle = find_free_group_handle(kctx);
12731211
12741212 if (group_handle < 0) {
1275
- dev_err(kctx->kbdev->dev,
1276
- "All queue group handles are already in use\n");
1213
+ dev_dbg(kctx->kbdev->dev,
1214
+ "All queue group handles are already in use");
12771215 } else {
12781216 struct kbase_queue_group * const group =
12791217 kmalloc(sizeof(struct kbase_queue_group),
....@@ -1298,10 +1236,22 @@
12981236 group->tiler_max = create->in.tiler_max;
12991237 group->fragment_max = create->in.fragment_max;
13001238 group->compute_max = create->in.compute_max;
1239
+ group->csi_handlers = create->in.csi_handlers;
13011240 group->priority = kbase_csf_priority_queue_group_priority_to_relative(
13021241 kbase_csf_priority_check(kctx->kbdev, create->in.priority));
13031242 group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
13041243 group->faulted = false;
1244
+ group->cs_unrecoverable = false;
1245
+ group->reevaluate_idle_status = false;
1246
+
1247
+ group->csg_reg = NULL;
1248
+ group->csg_reg_bind_retries = 0;
1249
+
1250
+ group->dvs_buf = create->in.dvs_buf;
1251
+
1252
+#if IS_ENABLED(CONFIG_DEBUG_FS)
1253
+ group->deschedule_deferred_cnt = 0;
1254
+#endif
13051255
13061256 group->group_uid = generate_group_uid();
13071257 create->out.group_uid = group->group_uid;
....@@ -1317,6 +1267,9 @@
13171267 MAX_SUPPORTED_STREAMS_PER_GROUP);
13181268
13191269 group->run_state = KBASE_CSF_GROUP_INACTIVE;
1270
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group,
1271
+ group->run_state);
1272
+
13201273 err = create_suspend_buffers(kctx, group);
13211274
13221275 if (err < 0) {
....@@ -1336,6 +1289,18 @@
13361289 return group_handle;
13371290 }
13381291
1292
+static bool dvs_supported(u32 csf_version)
1293
+{
1294
+ if (GLB_VERSION_MAJOR_GET(csf_version) < 3)
1295
+ return false;
1296
+
1297
+ if (GLB_VERSION_MAJOR_GET(csf_version) == 3)
1298
+ if (GLB_VERSION_MINOR_GET(csf_version) < 2)
1299
+ return false;
1300
+
1301
+ return true;
1302
+}
1303
+
13391304 int kbase_csf_queue_group_create(struct kbase_context *const kctx,
13401305 union kbase_ioctl_cs_queue_group_create *const create)
13411306 {
....@@ -1343,23 +1308,47 @@
13431308 const u32 tiler_count = hweight64(create->in.tiler_mask);
13441309 const u32 fragment_count = hweight64(create->in.fragment_mask);
13451310 const u32 compute_count = hweight64(create->in.compute_mask);
1311
+ size_t i;
1312
+
1313
+ for (i = 0; i < sizeof(create->in.padding); i++) {
1314
+ if (create->in.padding[i] != 0) {
1315
+ dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
1316
+ return -EINVAL;
1317
+ }
1318
+ }
13461319
13471320 mutex_lock(&kctx->csf.lock);
13481321
13491322 if ((create->in.tiler_max > tiler_count) ||
13501323 (create->in.fragment_max > fragment_count) ||
13511324 (create->in.compute_max > compute_count)) {
1352
- dev_err(kctx->kbdev->dev,
1353
- "Invalid maximum number of endpoints for a queue group\n");
1325
+ dev_dbg(kctx->kbdev->dev,
1326
+ "Invalid maximum number of endpoints for a queue group");
13541327 err = -EINVAL;
13551328 } else if (create->in.priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) {
1356
- dev_err(kctx->kbdev->dev, "Invalid queue group priority %u\n",
1329
+ dev_dbg(kctx->kbdev->dev, "Invalid queue group priority %u",
13571330 (unsigned int)create->in.priority);
13581331 err = -EINVAL;
13591332 } else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) {
1360
- dev_err(kctx->kbdev->dev,
1361
- "No CSG has at least %d CSs\n",
1333
+ dev_dbg(kctx->kbdev->dev,
1334
+ "No CSG has at least %d CSs",
13621335 create->in.cs_min);
1336
+ err = -EINVAL;
1337
+ } else if (create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK) {
1338
+ dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u",
1339
+ create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK);
1340
+ err = -EINVAL;
1341
+ } else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) &&
1342
+ create->in.dvs_buf) {
1343
+ dev_warn(
1344
+ kctx->kbdev->dev,
1345
+ "GPU does not support DVS but userspace is trying to use it");
1346
+ err = -EINVAL;
1347
+ } else if (dvs_supported(kctx->kbdev->csf.global_iface.version) &&
1348
+ !CSG_DVS_BUF_BUFFER_POINTER_GET(create->in.dvs_buf) &&
1349
+ CSG_DVS_BUF_BUFFER_SIZE_GET(create->in.dvs_buf)) {
1350
+ dev_warn(kctx->kbdev->dev,
1351
+ "DVS buffer pointer is null but size is not 0");
13631352 err = -EINVAL;
13641353 } else {
13651354 /* For the CSG which satisfies the condition for having
....@@ -1389,60 +1378,39 @@
13891378 * @s_buf: Pointer to queue group suspend buffer to be freed
13901379 */
13911380 static void term_normal_suspend_buffer(struct kbase_context *const kctx,
1392
- struct kbase_normal_suspend_buffer *s_buf)
1381
+ struct kbase_normal_suspend_buffer *s_buf)
13931382 {
1394
- const size_t nr_pages =
1395
- PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
1383
+ const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
13961384
13971385 lockdep_assert_held(&kctx->csf.lock);
13981386
1399
- WARN_ON(kbase_mmu_teardown_pages(
1400
- kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
1401
- s_buf->reg->start_pfn, nr_pages, MCU_AS_NR));
1387
+ /* The group should not have a bind remaining on any suspend buf region */
1388
+ WARN_ONCE(s_buf->gpu_va, "Suspend buffer address should be 0 at termination");
14021389
1403
- WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
1404
-
1405
- mutex_lock(&kctx->kbdev->csf.reg_lock);
1406
- WARN_ON(kbase_remove_va_region(s_buf->reg));
1407
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
1408
-
1409
- kbase_mem_pool_free_pages(
1410
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
1411
- nr_pages, &s_buf->phy[0], false, false);
1390
+ kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
1391
+ &s_buf->phy[0], false, false);
1392
+ kbase_process_page_usage_dec(kctx, nr_pages);
14121393
14131394 kfree(s_buf->phy);
14141395 s_buf->phy = NULL;
1415
- kfree(s_buf->reg);
1416
- s_buf->reg = NULL;
14171396 }
14181397
14191398 /**
1420
- * term_protected_suspend_buffer() - Free normal-mode suspend buffer of
1399
+ * term_protected_suspend_buffer() - Free protected-mode suspend buffer of
14211400 * queue group
14221401 *
14231402 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
1424
- * @s_buf: Pointer to queue group suspend buffer to be freed
1403
+ * @sbuf: Pointer to queue group suspend buffer to be freed
14251404 */
14261405 static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
1427
- struct kbase_protected_suspend_buffer *s_buf)
1406
+ struct kbase_protected_suspend_buffer *sbuf)
14281407 {
1429
- const size_t nr_pages =
1430
- PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
1431
-
1432
- WARN_ON(kbase_mmu_teardown_pages(
1433
- kbdev, &kbdev->csf.mcu_mmu,
1434
- s_buf->reg->start_pfn, nr_pages, MCU_AS_NR));
1435
-
1436
- WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
1437
-
1438
- mutex_lock(&kbdev->csf.reg_lock);
1439
- WARN_ON(kbase_remove_va_region(s_buf->reg));
1440
- mutex_unlock(&kbdev->csf.reg_lock);
1441
-
1442
- kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
1443
- s_buf->pma = NULL;
1444
- kfree(s_buf->reg);
1445
- s_buf->reg = NULL;
1408
+ WARN_ONCE(sbuf->gpu_va, "Suspend buf should have been unmapped inside scheduler!");
1409
+ if (sbuf->pma) {
1410
+ const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
1411
+ kbase_csf_protected_memory_free(kbdev, sbuf->pma, nr_pages, true);
1412
+ sbuf->pma = NULL;
1413
+ }
14461414 }
14471415
14481416 void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
....@@ -1474,6 +1442,7 @@
14741442 &group->protected_suspend_buf);
14751443
14761444 group->run_state = KBASE_CSF_GROUP_TERMINATED;
1445
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, group->run_state);
14771446 }
14781447
14791448 /**
....@@ -1504,10 +1473,51 @@
15041473 kbase_csf_term_descheduled_queue_group(group);
15051474 }
15061475
1476
+/**
1477
+ * wait_group_deferred_deschedule_completion - Wait for refcount of the group to
1478
+ * become 0 that was taken when the group deschedule had to be deferred.
1479
+ *
1480
+ * @group: Pointer to GPU command queue group that is being deleted.
1481
+ *
1482
+ * This function is called when Userspace deletes the group and after the group
1483
+ * has been descheduled. The function synchronizes with the other threads that were
1484
+ * also trying to deschedule the group whilst the dumping was going on for a fault.
1485
+ * Please refer the documentation of wait_for_dump_complete_on_group_deschedule()
1486
+ * for more details.
1487
+ */
1488
+static void wait_group_deferred_deschedule_completion(struct kbase_queue_group *group)
1489
+{
1490
+#if IS_ENABLED(CONFIG_DEBUG_FS)
1491
+ struct kbase_context *kctx = group->kctx;
1492
+
1493
+ lockdep_assert_held(&kctx->csf.lock);
1494
+
1495
+ if (likely(!group->deschedule_deferred_cnt))
1496
+ return;
1497
+
1498
+ mutex_unlock(&kctx->csf.lock);
1499
+ wait_event(kctx->kbdev->csf.event_wait, !group->deschedule_deferred_cnt);
1500
+ mutex_lock(&kctx->csf.lock);
1501
+#endif
1502
+}
1503
+
15071504 static void cancel_queue_group_events(struct kbase_queue_group *group)
15081505 {
15091506 cancel_work_sync(&group->timer_event_work);
15101507 cancel_work_sync(&group->protm_event_work);
1508
+}
1509
+
1510
+static void remove_pending_group_fatal_error(struct kbase_queue_group *group)
1511
+{
1512
+ struct kbase_context *kctx = group->kctx;
1513
+
1514
+ dev_dbg(kctx->kbdev->dev,
1515
+ "Remove any pending group fatal error from context %pK\n",
1516
+ (void *)group->kctx);
1517
+
1518
+ kbase_csf_event_remove_error(kctx, &group->error_tiler_oom);
1519
+ kbase_csf_event_remove_error(kctx, &group->error_timeout);
1520
+ kbase_csf_event_remove_error(kctx, &group->error_fatal);
15111521 }
15121522
15131523 void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
....@@ -1532,39 +1542,44 @@
15321542 group = find_queue_group(kctx, group_handle);
15331543
15341544 if (group) {
1535
- unsigned long flags;
1536
-
1537
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
1538
-
1539
- dev_dbg(kbdev->dev,
1540
- "Remove any pending group fatal error from context %pK\n",
1541
- (void *)group->kctx);
1542
-
1543
- list_del_init(&group->error_tiler_oom.link);
1544
- list_del_init(&group->error_timeout.link);
1545
- list_del_init(&group->error_fatal.link);
1546
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
1547
-
1548
- term_queue_group(group);
15491545 kctx->csf.queue_groups[group_handle] = NULL;
1546
+ /* Stop the running of the given group */
1547
+ term_queue_group(group);
1548
+ mutex_unlock(&kctx->csf.lock);
1549
+
1550
+ if (reset_prevented) {
1551
+ /* Allow GPU reset before cancelling the group specific
1552
+ * work item to avoid potential deadlock.
1553
+ * Reset prevention isn't needed after group termination.
1554
+ */
1555
+ kbase_reset_gpu_allow(kbdev);
1556
+ reset_prevented = false;
1557
+ }
1558
+
1559
+ /* Cancel any pending event callbacks. If one is in progress
1560
+ * then this thread waits synchronously for it to complete (which
1561
+ * is why we must unlock the context first). We already ensured
1562
+ * that no more callbacks can be enqueued by terminating the group.
1563
+ */
1564
+ cancel_queue_group_events(group);
1565
+
1566
+ mutex_lock(&kctx->csf.lock);
1567
+
1568
+ /* Clean up after the termination */
1569
+ remove_pending_group_fatal_error(group);
1570
+
1571
+ wait_group_deferred_deschedule_completion(group);
15501572 }
15511573
15521574 mutex_unlock(&kctx->csf.lock);
15531575 if (reset_prevented)
15541576 kbase_reset_gpu_allow(kbdev);
15551577
1556
- if (!group)
1557
- return;
1558
-
1559
- /* Cancel any pending event callbacks. If one is in progress
1560
- * then this thread waits synchronously for it to complete (which
1561
- * is why we must unlock the context first). We already ensured
1562
- * that no more callbacks can be enqueued by terminating the group.
1563
- */
1564
- cancel_queue_group_events(group);
15651578 kfree(group);
15661579 }
1580
+KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate);
15671581
1582
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
15681583 int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
15691584 struct kbase_suspend_copy_buffer *sus_buf,
15701585 u8 group_handle)
....@@ -1595,48 +1610,7 @@
15951610
15961611 return err;
15971612 }
1598
-
1599
-/**
1600
- * add_error() - Add an error to the list of errors to report to user space
1601
- *
1602
- * @kctx: Address of a base context associated with a GPU address space.
1603
- * @error: Address of the item to be added to the context's pending error list.
1604
- * @data: Error data to be returned to userspace.
1605
- *
1606
- * Does not wake up the event queue blocking a user thread in kbase_poll. This
1607
- * is to make it more efficient to add multiple errors.
1608
- *
1609
- * The added error must not already be on the context's list of errors waiting
1610
- * to be reported (e.g. because a previous error concerning the same object has
1611
- * not yet been reported).
1612
- */
1613
-static void add_error(struct kbase_context *const kctx,
1614
- struct kbase_csf_notification *const error,
1615
- struct base_csf_notification const *const data)
1616
-{
1617
- unsigned long flags;
1618
-
1619
- if (WARN_ON(!kctx))
1620
- return;
1621
-
1622
- if (WARN_ON(!error))
1623
- return;
1624
-
1625
- if (WARN_ON(!data))
1626
- return;
1627
-
1628
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
1629
-
1630
- if (!WARN_ON(!list_empty(&error->link))) {
1631
- error->data = *data;
1632
- list_add_tail(&error->link, &kctx->csf.error_list);
1633
- dev_dbg(kctx->kbdev->dev,
1634
- "Added error %pK of type %d in context %pK\n",
1635
- (void *)error, data->type, (void *)kctx);
1636
- }
1637
-
1638
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
1639
-}
1613
+#endif
16401614
16411615 void kbase_csf_add_group_fatal_error(
16421616 struct kbase_queue_group *const group,
....@@ -1660,7 +1634,7 @@
16601634 }
16611635 };
16621636
1663
- add_error(group->kctx, &group->error_fatal, &error);
1637
+ kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error);
16641638 }
16651639
16661640 void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
....@@ -1698,29 +1672,12 @@
16981672
16991673 int kbase_csf_ctx_init(struct kbase_context *kctx)
17001674 {
1701
- struct kbase_device *kbdev = kctx->kbdev;
17021675 int err = -ENOMEM;
17031676
1704
- INIT_LIST_HEAD(&kctx->csf.event_callback_list);
17051677 INIT_LIST_HEAD(&kctx->csf.queue_list);
17061678 INIT_LIST_HEAD(&kctx->csf.link);
1707
- INIT_LIST_HEAD(&kctx->csf.error_list);
17081679
1709
- spin_lock_init(&kctx->csf.event_lock);
1710
- kctx->csf.user_reg_vma = NULL;
1711
- mutex_lock(&kbdev->pm.lock);
1712
- /* The inode information for /dev/malixx file is not available at the
1713
- * time of device probe as the inode is created when the device node
1714
- * is created by udevd (through mknod).
1715
- */
1716
- if (kctx->filp) {
1717
- if (!kbdev->csf.mali_file_inode)
1718
- kbdev->csf.mali_file_inode = kctx->filp->f_inode;
1719
-
1720
- /* inode is unique for a file */
1721
- WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode);
1722
- }
1723
- mutex_unlock(&kbdev->pm.lock);
1680
+ kbase_csf_event_init(kctx);
17241681
17251682 /* Mark all the cookies as 'free' */
17261683 bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
....@@ -1737,9 +1694,18 @@
17371694 if (likely(!err)) {
17381695 err = kbase_csf_tiler_heap_context_init(kctx);
17391696
1740
- if (likely(!err))
1697
+ if (likely(!err)) {
17411698 mutex_init(&kctx->csf.lock);
1742
- else
1699
+ INIT_WORK(&kctx->csf.pending_submission_work,
1700
+ pending_submission_worker);
1701
+
1702
+ err = kbasep_ctx_user_reg_page_mapping_init(kctx);
1703
+
1704
+ if (unlikely(err))
1705
+ kbase_csf_tiler_heap_context_term(kctx);
1706
+ }
1707
+
1708
+ if (unlikely(err))
17431709 kbase_csf_kcpu_queue_context_term(kctx);
17441710 }
17451711
....@@ -1822,7 +1788,6 @@
18221788 * for queue groups & kcpu queues, hence no need to explicitly remove
18231789 * those debugfs files.
18241790 */
1825
- kbase_csf_event_wait_remove_all(kctx);
18261791
18271792 /* Wait for a GPU reset if it is happening, prevent it if not happening */
18281793 err = kbase_reset_gpu_prevent_and_wait(kbdev);
....@@ -1835,17 +1800,24 @@
18351800 reset_prevented = true;
18361801
18371802 mutex_lock(&kctx->csf.lock);
1803
+
18381804 /* Iterate through the queue groups that were not terminated by
18391805 * userspace and issue the term request to firmware for them.
18401806 */
18411807 for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
1842
- if (kctx->csf.queue_groups[i])
1843
- term_queue_group(kctx->csf.queue_groups[i]);
1808
+ struct kbase_queue_group *group = kctx->csf.queue_groups[i];
1809
+
1810
+ if (group) {
1811
+ remove_pending_group_fatal_error(group);
1812
+ term_queue_group(group);
1813
+ }
18441814 }
18451815 mutex_unlock(&kctx->csf.lock);
18461816
18471817 if (reset_prevented)
18481818 kbase_reset_gpu_allow(kbdev);
1819
+
1820
+ cancel_work_sync(&kctx->csf.pending_submission_work);
18491821
18501822 /* Now that all queue groups have been terminated, there can be no
18511823 * more OoM or timer event interrupts but there can be inflight work
....@@ -1891,200 +1863,45 @@
18911863 * only one reference left that was taken when queue was
18921864 * registered.
18931865 */
1894
- if (atomic_read(&queue->refcount) != 1)
1895
- dev_warn(kctx->kbdev->dev,
1896
- "Releasing queue with incorrect refcounting!\n");
1866
+ WARN_ON(kbase_refcount_read(&queue->refcount) != 1);
18971867 list_del_init(&queue->link);
18981868 release_queue(queue);
18991869 }
19001870
19011871 mutex_unlock(&kctx->csf.lock);
19021872
1873
+ kbasep_ctx_user_reg_page_mapping_term(kctx);
19031874 kbase_csf_tiler_heap_context_term(kctx);
19041875 kbase_csf_kcpu_queue_context_term(kctx);
19051876 kbase_csf_scheduler_context_term(kctx);
1877
+ kbase_csf_event_term(kctx);
19061878
19071879 mutex_destroy(&kctx->csf.lock);
1908
-}
1909
-
1910
-int kbase_csf_event_wait_add(struct kbase_context *kctx,
1911
- kbase_csf_event_callback *callback, void *param)
1912
-{
1913
- int err = -ENOMEM;
1914
- struct kbase_csf_event *event =
1915
- kzalloc(sizeof(struct kbase_csf_event), GFP_KERNEL);
1916
-
1917
- if (event) {
1918
- unsigned long flags;
1919
-
1920
- event->kctx = kctx;
1921
- event->callback = callback;
1922
- event->param = param;
1923
-
1924
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
1925
- list_add_tail(&event->link, &kctx->csf.event_callback_list);
1926
- dev_dbg(kctx->kbdev->dev,
1927
- "Added event handler %pK with param %pK\n", event,
1928
- event->param);
1929
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
1930
-
1931
- err = 0;
1932
- }
1933
-
1934
- return err;
1935
-}
1936
-
1937
-void kbase_csf_event_wait_remove(struct kbase_context *kctx,
1938
- kbase_csf_event_callback *callback, void *param)
1939
-{
1940
- struct kbase_csf_event *event;
1941
- unsigned long flags;
1942
-
1943
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
1944
-
1945
- list_for_each_entry(event, &kctx->csf.event_callback_list, link) {
1946
- if ((event->callback == callback) && (event->param == param)) {
1947
- list_del(&event->link);
1948
- dev_dbg(kctx->kbdev->dev,
1949
- "Removed event handler %pK with param %pK\n",
1950
- event, event->param);
1951
- kfree(event);
1952
- break;
1953
- }
1954
- }
1955
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
1956
-}
1957
-
1958
-bool kbase_csf_read_error(struct kbase_context *kctx,
1959
- struct base_csf_notification *event_data)
1960
-{
1961
- bool got_event = true;
1962
- struct kbase_csf_notification *error_data = NULL;
1963
- unsigned long flags;
1964
-
1965
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
1966
-
1967
- if (likely(!list_empty(&kctx->csf.error_list))) {
1968
- error_data = list_first_entry(&kctx->csf.error_list,
1969
- struct kbase_csf_notification, link);
1970
- list_del_init(&error_data->link);
1971
- *event_data = error_data->data;
1972
- dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n",
1973
- (void *)error_data, (void *)kctx);
1974
- } else {
1975
- got_event = false;
1976
- }
1977
-
1978
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
1979
-
1980
- return got_event;
1981
-}
1982
-
1983
-bool kbase_csf_error_pending(struct kbase_context *kctx)
1984
-{
1985
- bool event_pended = false;
1986
- unsigned long flags;
1987
-
1988
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
1989
- event_pended = !list_empty(&kctx->csf.error_list);
1990
- dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n",
1991
- event_pended ? "An" : "No", (void *)kctx);
1992
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
1993
-
1994
- return event_pended;
1995
-}
1996
-
1997
-void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
1998
-{
1999
- struct kbase_csf_event *event, *next_event;
2000
- unsigned long flags;
2001
-
2002
- dev_dbg(kctx->kbdev->dev,
2003
- "Signal event (%s GPU notify) for context %pK\n",
2004
- notify_gpu ? "with" : "without", (void *)kctx);
2005
-
2006
- /* First increment the signal count and wake up event thread.
2007
- */
2008
- atomic_set(&kctx->event_count, 1);
2009
- kbase_event_wakeup(kctx);
2010
-
2011
- /* Signal the CSF firmware. This is to ensure that pending command
2012
- * stream synch object wait operations are re-evaluated.
2013
- * Write to GLB_DOORBELL would suffice as spec says that all pending
2014
- * synch object wait operations are re-evaluated on a write to any
2015
- * CS_DOORBELL/GLB_DOORBELL register.
2016
- */
2017
- if (notify_gpu) {
2018
- spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
2019
- if (kctx->kbdev->pm.backend.gpu_powered)
2020
- kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR);
2021
- KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u);
2022
- spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
2023
- }
2024
-
2025
- /* Now invoke the callbacks registered on backend side.
2026
- * Allow item removal inside the loop, if requested by the callback.
2027
- */
2028
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
2029
-
2030
- list_for_each_entry_safe(
2031
- event, next_event, &kctx->csf.event_callback_list, link) {
2032
- enum kbase_csf_event_callback_action action;
2033
-
2034
- dev_dbg(kctx->kbdev->dev,
2035
- "Calling event handler %pK with param %pK\n",
2036
- (void *)event, event->param);
2037
- action = event->callback(event->param);
2038
- if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) {
2039
- list_del(&event->link);
2040
- kfree(event);
2041
- }
2042
- }
2043
-
2044
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
2045
-}
2046
-
2047
-void kbase_csf_event_wait_remove_all(struct kbase_context *kctx)
2048
-{
2049
- struct kbase_csf_event *event, *next_event;
2050
- unsigned long flags;
2051
-
2052
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
2053
-
2054
- list_for_each_entry_safe(
2055
- event, next_event, &kctx->csf.event_callback_list, link) {
2056
- list_del(&event->link);
2057
- dev_dbg(kctx->kbdev->dev,
2058
- "Removed event handler %pK with param %pK\n",
2059
- (void *)event, event->param);
2060
- kfree(event);
2061
- }
2062
-
2063
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
20641880 }
20651881
20661882 /**
20671883 * handle_oom_event - Handle the OoM event generated by the firmware for the
20681884 * CSI.
20691885 *
1886
+ * @group: Pointer to the CSG group the oom-event belongs to.
1887
+ * @stream: Pointer to the structure containing info provided by the firmware
1888
+ * about the CSI.
1889
+ *
20701890 * This function will handle the OoM event request from the firmware for the
20711891 * CS. It will retrieve the address of heap context and heap's
20721892 * statistics (like number of render passes in-flight) from the CS's kernel
2073
- * kernel output page and pass them to the tiler heap function to allocate a
1893
+ * output page and pass them to the tiler heap function to allocate a
20741894 * new chunk.
20751895 * It will also update the CS's kernel input page with the address
20761896 * of a new chunk that was allocated.
20771897 *
2078
- * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
2079
- * @stream: Pointer to the structure containing info provided by the firmware
2080
- * about the CSI.
2081
- *
20821898 * Return: 0 if successfully handled the request, otherwise a negative error
20831899 * code on failure.
20841900 */
2085
-static int handle_oom_event(struct kbase_context *const kctx,
2086
- struct kbase_csf_cmd_stream_info const *const stream)
1901
+static int handle_oom_event(struct kbase_queue_group *const group,
1902
+ struct kbase_csf_cmd_stream_info const *const stream)
20871903 {
1904
+ struct kbase_context *const kctx = group->kctx;
20881905 u64 gpu_heap_va =
20891906 kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) |
20901907 ((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32);
....@@ -2098,25 +1915,36 @@
20981915 u32 pending_frag_count;
20991916 u64 new_chunk_ptr;
21001917 int err;
1918
+ bool frag_end_err = false;
21011919
21021920 if ((frag_end > vt_end) || (vt_end >= vt_start)) {
2103
- dev_warn(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n",
1921
+ frag_end_err = true;
1922
+ dev_dbg(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n",
21041923 vt_start, vt_end, frag_end);
2105
- return -EINVAL;
21061924 }
2107
-
2108
- renderpasses_in_flight = vt_start - frag_end;
2109
- pending_frag_count = vt_end - frag_end;
1925
+ if (frag_end_err) {
1926
+ renderpasses_in_flight = 1;
1927
+ pending_frag_count = 1;
1928
+ } else {
1929
+ renderpasses_in_flight = vt_start - frag_end;
1930
+ pending_frag_count = vt_end - frag_end;
1931
+ }
21101932
21111933 err = kbase_csf_tiler_heap_alloc_new_chunk(kctx,
21121934 gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr);
21131935
2114
- /* It is okay to acknowledge with a NULL chunk (firmware will then wait
2115
- * for the fragment jobs to complete and release chunks)
2116
- */
2117
- if (err == -EBUSY)
1936
+ if ((group->csi_handlers & BASE_CSF_TILER_OOM_EXCEPTION_FLAG) &&
1937
+ (pending_frag_count == 0) && (err == -ENOMEM || err == -EBUSY)) {
1938
+ /* The group allows incremental rendering, trigger it */
21181939 new_chunk_ptr = 0;
2119
- else if (err)
1940
+ dev_dbg(kctx->kbdev->dev, "Group-%d (slot-%d) enter incremental render\n",
1941
+ group->handle, group->csg_nr);
1942
+ } else if (err == -EBUSY) {
1943
+ /* Acknowledge with a NULL chunk (firmware will then wait for
1944
+ * the fragment jobs to complete and release chunks)
1945
+ */
1946
+ new_chunk_ptr = 0;
1947
+ } else if (err)
21201948 return err;
21211949
21221950 kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO,
....@@ -2149,8 +1977,40 @@
21491977 BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
21501978 } } } };
21511979
2152
- add_error(group->kctx, &group->error_tiler_oom, &error);
1980
+ kbase_csf_event_add_error(group->kctx,
1981
+ &group->error_tiler_oom,
1982
+ &error);
21531983 kbase_event_wakeup(group->kctx);
1984
+}
1985
+
1986
+static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev)
1987
+{
1988
+ int err;
1989
+ const unsigned int cache_flush_wait_timeout_ms = 2000;
1990
+
1991
+ kbase_pm_lock(kbdev);
1992
+ /* With the advent of partial cache flush, dirty cache lines could
1993
+ * be left in the GPU L2 caches by terminating the queue group here
1994
+ * without waiting for proper cache maintenance. A full cache flush
1995
+ * here will prevent these dirty cache lines from being arbitrarily
1996
+ * evicted later and possible causing memory corruption.
1997
+ */
1998
+ if (kbdev->pm.backend.gpu_powered) {
1999
+ kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
2000
+ err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms);
2001
+
2002
+ if (err) {
2003
+ dev_warn(
2004
+ kbdev->dev,
2005
+ "[%llu] Timeout waiting for cache clean to complete after fatal error",
2006
+ kbase_backend_get_cycle_cnt(kbdev));
2007
+
2008
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
2009
+ kbase_reset_gpu(kbdev);
2010
+ }
2011
+ }
2012
+
2013
+ kbase_pm_unlock(kbdev);
21542014 }
21552015
21562016 /**
....@@ -2165,8 +2025,8 @@
21652025 * notification to allow the firmware to report out-of-memory again in future.
21662026 * If the out-of-memory condition was successfully handled then this function
21672027 * rings the relevant doorbell to notify the firmware; otherwise, it terminates
2168
- * the GPU command queue group to which the queue is bound. See
2169
- * term_queue_group() for details.
2028
+ * the GPU command queue group to which the queue is bound and notify a waiting
2029
+ * user space client of the failure.
21702030 */
21712031 static void kbase_queue_oom_event(struct kbase_queue *const queue)
21722032 {
....@@ -2178,6 +2038,7 @@
21782038 struct kbase_csf_cmd_stream_info const *stream;
21792039 int csi_index = queue->csi_index;
21802040 u32 cs_oom_ack, cs_oom_req;
2041
+ unsigned long flags;
21812042
21822043 lockdep_assert_held(&kctx->csf.lock);
21832044
....@@ -2221,22 +2082,25 @@
22212082 if (cs_oom_ack == cs_oom_req)
22222083 goto unlock;
22232084
2224
- err = handle_oom_event(kctx, stream);
2085
+ err = handle_oom_event(group, stream);
22252086
2087
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
22262088 kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack,
22272089 CS_REQ_TILER_OOM_MASK);
2090
+ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
2091
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
22282092
2229
- if (err) {
2093
+ if (unlikely(err)) {
22302094 dev_warn(
22312095 kbdev->dev,
22322096 "Queue group to be terminated, couldn't handle the OoM event\n");
2097
+ kbase_debug_csf_fault_notify(kbdev, kctx, DF_TILER_OOM);
22332098 kbase_csf_scheduler_unlock(kbdev);
22342099 term_queue_group(group);
2100
+ flush_gpu_cache_on_fatal_error(kbdev);
22352101 report_tiler_oom_error(group);
22362102 return;
22372103 }
2238
-
2239
- kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
22402104 unlock:
22412105 kbase_csf_scheduler_unlock(kbdev);
22422106 }
....@@ -2258,6 +2122,7 @@
22582122 struct kbase_device *const kbdev = kctx->kbdev;
22592123
22602124 int err = kbase_reset_gpu_try_prevent(kbdev);
2125
+
22612126 /* Regardless of whether reset failed or is currently happening, exit
22622127 * early
22632128 */
....@@ -2294,7 +2159,7 @@
22942159 "Notify the event notification thread, forward progress timeout (%llu cycles)\n",
22952160 kbase_csf_timeout_get(group->kctx->kbdev));
22962161
2297
- add_error(group->kctx, &group->error_timeout, &error);
2162
+ kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error);
22982163 kbase_event_wakeup(group->kctx);
22992164 }
23002165
....@@ -2310,12 +2175,13 @@
23102175 struct kbase_queue_group *const group =
23112176 container_of(data, struct kbase_queue_group, timer_event_work);
23122177 struct kbase_context *const kctx = group->kctx;
2178
+ struct kbase_device *const kbdev = kctx->kbdev;
23132179 bool reset_prevented = false;
2314
- int err = kbase_reset_gpu_prevent_and_wait(kctx->kbdev);
2180
+ int err = kbase_reset_gpu_prevent_and_wait(kbdev);
23152181
23162182 if (err)
23172183 dev_warn(
2318
- kctx->kbdev->dev,
2184
+ kbdev->dev,
23192185 "Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless",
23202186 group->handle);
23212187 else
....@@ -2324,11 +2190,12 @@
23242190 mutex_lock(&kctx->csf.lock);
23252191
23262192 term_queue_group(group);
2193
+ flush_gpu_cache_on_fatal_error(kbdev);
23272194 report_group_timeout_error(group);
23282195
23292196 mutex_unlock(&kctx->csf.lock);
23302197 if (reset_prevented)
2331
- kbase_reset_gpu_allow(kctx->kbdev);
2198
+ kbase_reset_gpu_allow(kbdev);
23322199 }
23332200
23342201 /**
....@@ -2336,12 +2203,91 @@
23362203 *
23372204 * @group: Pointer to GPU queue group for which the timeout event is received.
23382205 *
2206
+ * Notify a waiting user space client of the timeout.
23392207 * Enqueue a work item to terminate the group and notify the event notification
23402208 * thread of progress timeout fault for the GPU command queue group.
23412209 */
23422210 static void handle_progress_timer_event(struct kbase_queue_group *const group)
23432211 {
2212
+ kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx,
2213
+ DF_PROGRESS_TIMER_TIMEOUT);
2214
+
23442215 queue_work(group->kctx->csf.wq, &group->timer_event_work);
2216
+}
2217
+
2218
+/**
2219
+ * alloc_grp_protected_suspend_buffer_pages() - Allocate physical pages from the protected
2220
+ * memory for the protected mode suspend buffer.
2221
+ * @group: Pointer to the GPU queue group.
2222
+ *
2223
+ * Return: 0 if suspend buffer allocation is successful or if its already allocated, otherwise
2224
+ * negative error value.
2225
+ */
2226
+static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *const group)
2227
+{
2228
+ struct kbase_device *const kbdev = group->kctx->kbdev;
2229
+ struct kbase_context *kctx = group->kctx;
2230
+ struct tagged_addr *phys = NULL;
2231
+ struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
2232
+ size_t nr_pages;
2233
+ int err = 0;
2234
+
2235
+ if (likely(sbuf->pma))
2236
+ return 0;
2237
+
2238
+ nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
2239
+ phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
2240
+ if (unlikely(!phys)) {
2241
+ err = -ENOMEM;
2242
+ goto phys_free;
2243
+ }
2244
+
2245
+ mutex_lock(&kctx->csf.lock);
2246
+ kbase_csf_scheduler_lock(kbdev);
2247
+
2248
+ if (unlikely(!group->csg_reg)) {
2249
+ /* The only chance of the bound csg_reg is removed from the group is
2250
+ * that it has been put off slot by the scheduler and the csg_reg resource
2251
+ * is contended by other groups. In this case, it needs another occasion for
2252
+ * mapping the pma, which needs a bound csg_reg. Since the group is already
2253
+ * off-slot, returning no error is harmless as the scheduler, when place the
2254
+ * group back on-slot again would do the required MMU map operation on the
2255
+ * allocated and retained pma.
2256
+ */
2257
+ WARN_ON(group->csg_nr >= 0);
2258
+ dev_dbg(kbdev->dev, "No bound csg_reg for group_%d_%d_%d to enter protected mode",
2259
+ group->kctx->tgid, group->kctx->id, group->handle);
2260
+ goto unlock;
2261
+ }
2262
+
2263
+ /* Allocate the protected mode pages */
2264
+ sbuf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true);
2265
+ if (unlikely(!sbuf->pma)) {
2266
+ err = -ENOMEM;
2267
+ goto unlock;
2268
+ }
2269
+
2270
+ /* Map the bound susp_reg to the just allocated pma pages */
2271
+ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
2272
+
2273
+unlock:
2274
+ kbase_csf_scheduler_unlock(kbdev);
2275
+ mutex_unlock(&kctx->csf.lock);
2276
+phys_free:
2277
+ kfree(phys);
2278
+ return err;
2279
+}
2280
+
2281
+static void report_group_fatal_error(struct kbase_queue_group *const group)
2282
+{
2283
+ struct base_gpu_queue_group_error const
2284
+ err_payload = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
2285
+ .payload = { .fatal_group = {
2286
+ .status = GPU_EXCEPTION_TYPE_SW_FAULT_0,
2287
+ } } };
2288
+
2289
+ kbase_csf_add_group_fatal_error(group, &err_payload);
2290
+ kbase_event_wakeup(group->kctx);
23452291 }
23462292
23472293 /**
....@@ -2356,53 +2302,48 @@
23562302 {
23572303 struct kbase_queue_group *const group =
23582304 container_of(data, struct kbase_queue_group, protm_event_work);
2305
+ struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
2306
+ int err = 0;
23592307
2360
- KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_BEGIN,
2308
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START,
23612309 group, 0u);
2362
- kbase_csf_scheduler_group_protm_enter(group);
2310
+
2311
+ err = alloc_grp_protected_suspend_buffer_pages(group);
2312
+ if (!err) {
2313
+ kbase_csf_scheduler_group_protm_enter(group);
2314
+ } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) {
2315
+ sbuf->alloc_retries++;
2316
+ /* try again to allocate pages */
2317
+ queue_work(group->kctx->csf.wq, &group->protm_event_work);
2318
+ } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) {
2319
+ dev_err(group->kctx->kbdev->dev,
2320
+ "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d",
2321
+ group->handle, group->kctx->tgid, group->kctx->id);
2322
+ report_group_fatal_error(group);
2323
+ }
2324
+
23632325 KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END,
23642326 group, 0u);
2365
-}
2366
-
2367
-static void report_queue_fatal_error(struct kbase_queue *const queue,
2368
- u32 cs_fatal, u64 cs_fatal_info,
2369
- u8 group_handle)
2370
-{
2371
- struct base_csf_notification error =
2372
- { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
2373
- .payload = {
2374
- .csg_error = {
2375
- .handle = group_handle,
2376
- .error = {
2377
- .error_type =
2378
- BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
2379
- .payload = {
2380
- .fatal_queue = {
2381
- .sideband =
2382
- cs_fatal_info,
2383
- .status = cs_fatal,
2384
- .csi_index =
2385
- queue->csi_index,
2386
- } } } } } };
2387
-
2388
- add_error(queue->kctx, &queue->error, &error);
2389
- kbase_event_wakeup(queue->kctx);
23902327 }
23912328
23922329 /**
23932330 * handle_fault_event - Handler for CS fault.
23942331 *
23952332 * @queue: Pointer to queue for which fault event was received.
2396
- * @stream: Pointer to the structure containing info provided by the
2397
- * firmware about the CSI.
2333
+ * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
2334
+ * the queue.
23982335 *
2399
- * Prints meaningful CS fault information.
2400
- *
2336
+ * Print required information about the CS fault and notify the user space client
2337
+ * about the fault.
24012338 */
24022339 static void
2403
-handle_fault_event(struct kbase_queue *const queue,
2404
- struct kbase_csf_cmd_stream_info const *const stream)
2340
+handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack)
24052341 {
2342
+ struct kbase_device *const kbdev = queue->kctx->kbdev;
2343
+ struct kbase_csf_cmd_stream_group_info const *ginfo =
2344
+ &kbdev->csf.global_iface.groups[queue->group->csg_nr];
2345
+ struct kbase_csf_cmd_stream_info const *stream =
2346
+ &ginfo->streams[queue->csi_index];
24062347 const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT);
24072348 const u64 cs_fault_info =
24082349 kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) |
....@@ -2414,7 +2355,6 @@
24142355 CS_FAULT_EXCEPTION_DATA_GET(cs_fault);
24152356 const u64 cs_fault_info_exception_data =
24162357 CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info);
2417
- struct kbase_device *const kbdev = queue->kctx->kbdev;
24182358
24192359 kbase_csf_scheduler_spin_lock_assert_held(kbdev);
24202360
....@@ -2429,29 +2369,86 @@
24292369 kbase_gpu_exception_name(cs_fault_exception_type),
24302370 cs_fault_exception_data, cs_fault_info_exception_data);
24312371
2432
- if (cs_fault_exception_type ==
2433
- CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT)
2434
- report_queue_fatal_error(queue, GPU_EXCEPTION_TYPE_SW_FAULT_2,
2435
- 0, queue->group->handle);
2372
+
2373
+#if IS_ENABLED(CONFIG_DEBUG_FS)
2374
+ /* CS_RESOURCE_TERMINATED type fault event can be ignored from the
2375
+ * standpoint of dump on error. It is used to report fault for the CSIs
2376
+ * that are associated with the same CSG as the CSI for which the actual
2377
+ * fault was reported by the Iterator.
2378
+ * Dumping would be triggered when the actual fault is reported.
2379
+ *
2380
+ * CS_INHERIT_FAULT can also be ignored. It could happen due to the error
2381
+ * in other types of queues (cpu/kcpu). If a fault had occurred in some
2382
+ * other GPU queue then the dump would have been performed anyways when
2383
+ * that fault was reported.
2384
+ */
2385
+ if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) &&
2386
+ (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) {
2387
+ if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) {
2388
+ get_queue(queue);
2389
+ queue->cs_error = cs_fault;
2390
+ queue->cs_error_info = cs_fault_info;
2391
+ queue->cs_error_fatal = false;
2392
+ if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
2393
+ release_queue(queue);
2394
+ return;
2395
+ }
2396
+ }
2397
+#endif
2398
+
2399
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
2400
+ CS_REQ_FAULT_MASK);
2401
+ kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true);
2402
+}
2403
+
2404
+static void report_queue_fatal_error(struct kbase_queue *const queue,
2405
+ u32 cs_fatal, u64 cs_fatal_info,
2406
+ u8 group_handle)
2407
+{
2408
+ struct base_csf_notification error = {
2409
+ .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
2410
+ .payload = {
2411
+ .csg_error = {
2412
+ .handle = group_handle,
2413
+ .error = {
2414
+ .error_type =
2415
+ BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
2416
+ .payload = {
2417
+ .fatal_queue = {
2418
+ .sideband = cs_fatal_info,
2419
+ .status = cs_fatal,
2420
+ .csi_index = queue->csi_index,
2421
+ }
2422
+ }
2423
+ }
2424
+ }
2425
+ }
2426
+ };
2427
+
2428
+ kbase_csf_event_add_error(queue->kctx, &queue->error, &error);
2429
+ kbase_event_wakeup(queue->kctx);
24362430 }
24372431
24382432 /**
2439
- * fatal_event_worker - Handle the fatal error for the GPU queue
2433
+ * fatal_event_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue
24402434 *
24412435 * @data: Pointer to a work_struct embedded in GPU command queue.
24422436 *
24432437 * Terminate the CSG and report the error to userspace.
24442438 */
2445
-static void fatal_event_worker(struct work_struct *const data)
2439
+static void cs_error_worker(struct work_struct *const data)
24462440 {
24472441 struct kbase_queue *const queue =
2448
- container_of(data, struct kbase_queue, fatal_event_work);
2442
+ container_of(data, struct kbase_queue, cs_error_work);
24492443 struct kbase_context *const kctx = queue->kctx;
24502444 struct kbase_device *const kbdev = kctx->kbdev;
24512445 struct kbase_queue_group *group;
24522446 u8 group_handle;
24532447 bool reset_prevented = false;
2454
- int err = kbase_reset_gpu_prevent_and_wait(kbdev);
2448
+ int err;
2449
+
2450
+ kbase_debug_csf_fault_wait_completion(kbdev);
2451
+ err = kbase_reset_gpu_prevent_and_wait(kbdev);
24552452
24562453 if (err)
24572454 dev_warn(
....@@ -2468,9 +2465,35 @@
24682465 goto unlock;
24692466 }
24702467
2468
+#if IS_ENABLED(CONFIG_DEBUG_FS)
2469
+ if (!queue->cs_error_fatal) {
2470
+ unsigned long flags;
2471
+ int slot_num;
2472
+
2473
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
2474
+ slot_num = kbase_csf_scheduler_group_get_slot_locked(group);
2475
+ if (slot_num >= 0) {
2476
+ struct kbase_csf_cmd_stream_group_info const *ginfo =
2477
+ &kbdev->csf.global_iface.groups[slot_num];
2478
+ struct kbase_csf_cmd_stream_info const *stream =
2479
+ &ginfo->streams[queue->csi_index];
2480
+ u32 const cs_ack =
2481
+ kbase_csf_firmware_cs_output(stream, CS_ACK);
2482
+
2483
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
2484
+ CS_REQ_FAULT_MASK);
2485
+ kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index,
2486
+ slot_num, true);
2487
+ }
2488
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
2489
+ goto unlock;
2490
+ }
2491
+#endif
2492
+
24712493 group_handle = group->handle;
24722494 term_queue_group(group);
2473
- report_queue_fatal_error(queue, queue->cs_fatal, queue->cs_fatal_info,
2495
+ flush_gpu_cache_on_fatal_error(kbdev);
2496
+ report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info,
24742497 group_handle);
24752498
24762499 unlock:
....@@ -2486,14 +2509,18 @@
24862509 * @queue: Pointer to queue for which fatal event was received.
24872510 * @stream: Pointer to the structure containing info provided by the
24882511 * firmware about the CSI.
2512
+ * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
2513
+ * the queue.
24892514 *
2490
- * Prints meaningful CS fatal information.
2515
+ * Notify a waiting user space client of the CS fatal and prints meaningful
2516
+ * information.
24912517 * Enqueue a work item to terminate the group and report the fatal error
24922518 * to user space.
24932519 */
24942520 static void
24952521 handle_fatal_event(struct kbase_queue *const queue,
2496
- struct kbase_csf_cmd_stream_info const *const stream)
2522
+ struct kbase_csf_cmd_stream_info const *const stream,
2523
+ u32 cs_ack)
24972524 {
24982525 const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL);
24992526 const u64 cs_fatal_info =
....@@ -2523,51 +2550,26 @@
25232550
25242551 if (cs_fatal_exception_type ==
25252552 CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) {
2553
+ kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_FW_INTERNAL_ERROR);
25262554 queue_work(system_wq, &kbdev->csf.fw_error_work);
25272555 } else {
2556
+ kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FATAL);
2557
+ if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE) {
2558
+ queue->group->cs_unrecoverable = true;
2559
+ if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE))
2560
+ kbase_reset_gpu(queue->kctx->kbdev);
2561
+ }
25282562 get_queue(queue);
2529
- queue->cs_fatal = cs_fatal;
2530
- queue->cs_fatal_info = cs_fatal_info;
2531
- if (!queue_work(queue->kctx->csf.wq, &queue->fatal_event_work))
2563
+ queue->cs_error = cs_fatal;
2564
+ queue->cs_error_info = cs_fatal_info;
2565
+ queue->cs_error_fatal = true;
2566
+ if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
25322567 release_queue(queue);
25332568 }
2534
-}
25352569
2536
-/**
2537
- * handle_queue_exception_event - Handler for CS fatal/fault exception events.
2538
- *
2539
- * @queue: Pointer to queue for which fatal/fault event was received.
2540
- * @cs_req: Value of the CS_REQ register from the CS's input page.
2541
- * @cs_ack: Value of the CS_ACK register from the CS's output page.
2542
- */
2543
-static void handle_queue_exception_event(struct kbase_queue *const queue,
2544
- const u32 cs_req, const u32 cs_ack)
2545
-{
2546
- struct kbase_csf_cmd_stream_group_info const *ginfo;
2547
- struct kbase_csf_cmd_stream_info const *stream;
2548
- struct kbase_context *const kctx = queue->kctx;
2549
- struct kbase_device *const kbdev = kctx->kbdev;
2550
- struct kbase_queue_group *group = queue->group;
2551
- int csi_index = queue->csi_index;
2552
- int slot_num = group->csg_nr;
2570
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
2571
+ CS_REQ_FATAL_MASK);
25532572
2554
- kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2555
-
2556
- ginfo = &kbdev->csf.global_iface.groups[slot_num];
2557
- stream = &ginfo->streams[csi_index];
2558
-
2559
- if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
2560
- handle_fatal_event(queue, stream);
2561
- kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
2562
- CS_REQ_FATAL_MASK);
2563
- }
2564
-
2565
- if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
2566
- handle_fault_event(queue, stream);
2567
- kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
2568
- CS_REQ_FAULT_MASK);
2569
- kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
2570
- }
25712573 }
25722574
25732575 /**
....@@ -2577,6 +2579,9 @@
25772579 * @ginfo: The CSG interface provided by the firmware.
25782580 * @irqreq: CSG's IRQ request bitmask (one bit per CS).
25792581 * @irqack: CSG's IRQ acknowledge bitmask (one bit per CS).
2582
+ * @track: Pointer that tracks the highest scanout priority idle CSG
2583
+ * and any newly potentially viable protected mode requesting
2584
+ * CSG in current IRQ context.
25802585 *
25812586 * If the interrupt request bitmask differs from the acknowledge bitmask
25822587 * then the firmware is notifying the host of an event concerning those
....@@ -2585,8 +2590,9 @@
25852590 * the request and acknowledge registers for the individual CS(s).
25862591 */
25872592 static void process_cs_interrupts(struct kbase_queue_group *const group,
2588
- struct kbase_csf_cmd_stream_group_info const *const ginfo,
2589
- u32 const irqreq, u32 const irqack)
2593
+ struct kbase_csf_cmd_stream_group_info const *const ginfo,
2594
+ u32 const irqreq, u32 const irqack,
2595
+ struct irq_idle_and_protm_track *track)
25902596 {
25912597 struct kbase_device *const kbdev = group->kctx->kbdev;
25922598 u32 remaining = irqreq ^ irqack;
....@@ -2616,10 +2622,16 @@
26162622 kbase_csf_firmware_cs_output(stream, CS_ACK);
26172623 struct workqueue_struct *wq = group->kctx->csf.wq;
26182624
2619
- if ((cs_req & CS_REQ_EXCEPTION_MASK) ^
2620
- (cs_ack & CS_ACK_EXCEPTION_MASK)) {
2621
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_FAULT_INTERRUPT, group, queue, cs_req ^ cs_ack);
2622
- handle_queue_exception_event(queue, cs_req, cs_ack);
2625
+ if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
2626
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
2627
+ group, queue, cs_req ^ cs_ack);
2628
+ handle_fatal_event(queue, stream, cs_ack);
2629
+ }
2630
+
2631
+ if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
2632
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
2633
+ group, queue, cs_req ^ cs_ack);
2634
+ handle_fault_event(queue, cs_ack);
26232635 }
26242636
26252637 /* PROTM_PEND and TILER_OOM can be safely ignored
....@@ -2630,30 +2642,37 @@
26302642 u32 const cs_req_remain = cs_req & ~CS_REQ_EXCEPTION_MASK;
26312643 u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK;
26322644
2633
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND,
2634
- group, queue, cs_req_remain ^ cs_ack_remain);
2645
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev,
2646
+ CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED,
2647
+ group, queue,
2648
+ cs_req_remain ^ cs_ack_remain);
26352649 continue;
26362650 }
26372651
26382652 if (((cs_req & CS_REQ_TILER_OOM_MASK) ^
26392653 (cs_ack & CS_ACK_TILER_OOM_MASK))) {
26402654 get_queue(queue);
2641
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_TILER_OOM_INTERRUPT, group, queue,
2642
- cs_req ^ cs_ack);
2643
- if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) {
2655
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM,
2656
+ group, queue, cs_req ^ cs_ack);
2657
+ if (!queue_work(wq, &queue->oom_event_work)) {
26442658 /* The work item shall not have been
26452659 * already queued, there can be only
26462660 * one pending OoM event for a
26472661 * queue.
26482662 */
2663
+ dev_warn(
2664
+ kbdev->dev,
2665
+ "Tiler OOM work pending: queue %d group %d (ctx %d_%d)",
2666
+ queue->csi_index, group->handle, queue->kctx->tgid,
2667
+ queue->kctx->id);
26492668 release_queue(queue);
26502669 }
26512670 }
26522671
26532672 if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^
26542673 (cs_ack & CS_ACK_PROTM_PEND_MASK)) {
2655
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_INTERRUPT, group, queue,
2656
- cs_req ^ cs_ack);
2674
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_PROTM_PEND,
2675
+ group, queue, cs_req ^ cs_ack);
26572676
26582677 dev_dbg(kbdev->dev,
26592678 "Protected mode entry request for queue on csi %d bound to group-%d on slot %d",
....@@ -2661,15 +2680,34 @@
26612680 group->csg_nr);
26622681
26632682 bitmap_set(group->protm_pending_bitmap, i, 1);
2664
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_SET, group, queue,
2683
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_SET, group, queue,
26652684 group->protm_pending_bitmap[0]);
26662685 protm_pend = true;
26672686 }
26682687 }
26692688 }
26702689
2671
- if (protm_pend)
2672
- queue_work(group->kctx->csf.wq, &group->protm_event_work);
2690
+ if (protm_pend) {
2691
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2692
+
2693
+ if (scheduler->tick_protm_pending_seq > group->scan_seq_num) {
2694
+ scheduler->tick_protm_pending_seq = group->scan_seq_num;
2695
+ track->protm_grp = group;
2696
+ }
2697
+
2698
+ if (!group->protected_suspend_buf.pma)
2699
+ queue_work(group->kctx->csf.wq, &group->protm_event_work);
2700
+
2701
+ if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
2702
+ clear_bit(group->csg_nr,
2703
+ scheduler->csg_slots_idle_mask);
2704
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
2705
+ scheduler->csg_slots_idle_mask[0]);
2706
+ dev_dbg(kbdev->dev,
2707
+ "Group-%d on slot %d de-idled by protm request",
2708
+ group->handle, group->csg_nr);
2709
+ }
2710
+ }
26732711 }
26742712
26752713 /**
....@@ -2677,6 +2715,8 @@
26772715 *
26782716 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
26792717 * @csg_nr: CSG number.
2718
+ * @track: Pointer that tracks the highest idle CSG and the newly possible viable
2719
+ * protected mode requesting group, in current IRQ context.
26802720 *
26812721 * Handles interrupts for a CSG and for CSs within it.
26822722 *
....@@ -2687,8 +2727,8 @@
26872727 *
26882728 * See process_cs_interrupts() for details of per-stream interrupt handling.
26892729 */
2690
-static void process_csg_interrupts(struct kbase_device *const kbdev,
2691
- int const csg_nr)
2730
+static void process_csg_interrupts(struct kbase_device *const kbdev, int const csg_nr,
2731
+ struct irq_idle_and_protm_track *track)
26922732 {
26932733 struct kbase_csf_cmd_stream_group_info *ginfo;
26942734 struct kbase_queue_group *group = NULL;
....@@ -2699,8 +2739,6 @@
26992739 if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
27002740 return;
27012741
2702
- KBASE_KTRACE_ADD(kbdev, CSG_INTERRUPT_PROCESS, NULL, csg_nr);
2703
-
27042742 ginfo = &kbdev->csf.global_iface.groups[csg_nr];
27052743 req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
27062744 ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
....@@ -2709,7 +2747,7 @@
27092747
27102748 /* There may not be any pending CSG/CS interrupts to process */
27112749 if ((req == ack) && (irqreq == irqack))
2712
- goto out;
2750
+ return;
27132751
27142752 /* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
27152753 * examining the CS_ACK & CS_REQ bits. This would ensure that Host
....@@ -2730,21 +2768,30 @@
27302768 * slot scheduler spinlock is required.
27312769 */
27322770 if (!group)
2733
- goto out;
2771
+ return;
27342772
27352773 if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr))
2736
- goto out;
2774
+ return;
2775
+
2776
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr);
27372777
27382778 if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
27392779 kbase_csf_firmware_csg_input_mask(ginfo,
27402780 CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
27412781
2742
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack);
2782
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack);
2783
+
2784
+ /* SYNC_UPDATE events shall invalidate GPU idle event */
2785
+ atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true);
2786
+
27432787 kbase_csf_event_signal_cpu_only(group->kctx);
27442788 }
27452789
27462790 if ((req ^ ack) & CSG_REQ_IDLE_MASK) {
27472791 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2792
+
2793
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE(
2794
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, csg_nr);
27482795
27492796 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
27502797 CSG_REQ_IDLE_MASK);
....@@ -2752,34 +2799,45 @@
27522799 set_bit(csg_nr, scheduler->csg_slots_idle_mask);
27532800 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group,
27542801 scheduler->csg_slots_idle_mask[0]);
2755
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_IDLE_INTERRUPT, group, req ^ ack);
2802
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_IDLE, group, req ^ ack);
27562803 dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n",
27572804 group->handle, csg_nr);
27582805
2759
- /* Check if the scheduling tick can be advanced */
2760
- if (kbase_csf_scheduler_all_csgs_idle(kbdev) &&
2761
- !scheduler->gpu_idle_fw_timer_enabled) {
2762
- kbase_csf_scheduler_advance_tick_nolock(kbdev);
2806
+ if (atomic_read(&scheduler->non_idle_offslot_grps)) {
2807
+ /* If there are non-idle CSGs waiting for a slot, fire
2808
+ * a tock for a replacement.
2809
+ */
2810
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NON_IDLE_GROUPS,
2811
+ group, req ^ ack);
2812
+ kbase_csf_scheduler_invoke_tock(kbdev);
2813
+ } else {
2814
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NO_NON_IDLE_GROUPS,
2815
+ group, req ^ ack);
2816
+ }
2817
+
2818
+ if (group->scan_seq_num < track->idle_seq) {
2819
+ track->idle_seq = group->scan_seq_num;
2820
+ track->idle_slot = csg_nr;
27632821 }
27642822 }
27652823
27662824 if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) {
27672825 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
2768
- CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
2826
+ CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
27692827
2770
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PROGRESS_TIMER_INTERRUPT,
2771
- group, req ^ ack);
2772
- dev_info(kbdev->dev,
2773
- "Timeout notification received for group %u of ctx %d_%d on slot %d\n",
2774
- group->handle, group->kctx->tgid, group->kctx->id, csg_nr);
2828
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, group,
2829
+ req ^ ack);
2830
+ dev_info(
2831
+ kbdev->dev,
2832
+ "[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n",
2833
+ kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid,
2834
+ group->kctx->id, csg_nr);
27752835
27762836 handle_progress_timer_event(group);
27772837 }
27782838
2779
- process_cs_interrupts(group, ginfo, irqreq, irqack);
2839
+ process_cs_interrupts(group, ginfo, irqreq, irqack, track);
27802840
2781
-out:
2782
- /* group may still be NULL here */
27832841 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group,
27842842 ((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32));
27852843 }
....@@ -2868,105 +2926,264 @@
28682926 }
28692927 }
28702928
2929
+/**
2930
+ * check_protm_enter_req_complete - Check if PROTM_ENTER request completed
2931
+ *
2932
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
2933
+ * @glb_req: Global request register value.
2934
+ * @glb_ack: Global acknowledge register value.
2935
+ *
2936
+ * This function checks if the PROTM_ENTER Global request had completed and
2937
+ * appropriately sends notification about the protected mode entry to components
2938
+ * like IPA, HWC, IPA_CONTROL.
2939
+ */
2940
+static inline void check_protm_enter_req_complete(struct kbase_device *kbdev,
2941
+ u32 glb_req, u32 glb_ack)
2942
+{
2943
+ lockdep_assert_held(&kbdev->hwaccess_lock);
2944
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2945
+
2946
+ if (likely(!kbdev->csf.scheduler.active_protm_grp))
2947
+ return;
2948
+
2949
+ if (kbdev->protected_mode)
2950
+ return;
2951
+
2952
+ if ((glb_req & GLB_REQ_PROTM_ENTER_MASK) !=
2953
+ (glb_ack & GLB_REQ_PROTM_ENTER_MASK))
2954
+ return;
2955
+
2956
+ dev_dbg(kbdev->dev, "Protected mode entry interrupt received");
2957
+
2958
+ kbdev->protected_mode = true;
2959
+ kbase_ipa_protection_mode_switch_event(kbdev);
2960
+ kbase_ipa_control_protm_entered(kbdev);
2961
+ kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface);
2962
+}
2963
+
2964
+/**
2965
+ * process_protm_exit - Handle the protected mode exit interrupt
2966
+ *
2967
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
2968
+ * @glb_ack: Global acknowledge register value.
2969
+ *
2970
+ * This function handles the PROTM_EXIT interrupt and sends notification
2971
+ * about the protected mode exit to components like HWC, IPA_CONTROL.
2972
+ */
2973
+static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
2974
+{
2975
+ const struct kbase_csf_global_iface *const global_iface =
2976
+ &kbdev->csf.global_iface;
2977
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2978
+
2979
+ lockdep_assert_held(&kbdev->hwaccess_lock);
2980
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2981
+
2982
+ dev_dbg(kbdev->dev, "Protected mode exit interrupt received");
2983
+
2984
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_ack,
2985
+ GLB_REQ_PROTM_EXIT_MASK);
2986
+
2987
+ if (likely(scheduler->active_protm_grp)) {
2988
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT,
2989
+ scheduler->active_protm_grp, 0u);
2990
+ scheduler->active_protm_grp = NULL;
2991
+ } else {
2992
+ dev_warn(kbdev->dev, "PROTM_EXIT interrupt after no pmode group");
2993
+ }
2994
+
2995
+ if (!WARN_ON(!kbdev->protected_mode)) {
2996
+ kbdev->protected_mode = false;
2997
+ kbase_ipa_control_protm_exited(kbdev);
2998
+ kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface);
2999
+ }
3000
+
3001
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
3002
+ kbase_debug_coresight_csf_enable_pmode_exit(kbdev);
3003
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
3004
+}
3005
+
3006
+static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
3007
+ struct irq_idle_and_protm_track *track)
3008
+{
3009
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3010
+ struct kbase_queue_group *group = track->protm_grp;
3011
+ u32 current_protm_pending_seq = scheduler->tick_protm_pending_seq;
3012
+
3013
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
3014
+
3015
+ if (likely(current_protm_pending_seq == KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID))
3016
+ return;
3017
+
3018
+ /* Handle protm from the tracked information */
3019
+ if (track->idle_seq < current_protm_pending_seq) {
3020
+ /* If the protm enter was prevented due to groups priority, then fire a tock
3021
+ * for the scheduler to re-examine the case.
3022
+ */
3023
+ dev_dbg(kbdev->dev, "Attempt pending protm from idle slot %d\n", track->idle_slot);
3024
+ kbase_csf_scheduler_invoke_tock(kbdev);
3025
+ } else if (group) {
3026
+ u32 i, num_groups = kbdev->csf.global_iface.group_num;
3027
+ struct kbase_queue_group *grp;
3028
+ bool tock_triggered = false;
3029
+
3030
+ /* A new protm request, and track->idle_seq is not sufficient, check across
3031
+ * previously notified idle CSGs in the current tick/tock cycle.
3032
+ */
3033
+ for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
3034
+ if (i == track->idle_slot)
3035
+ continue;
3036
+ grp = kbase_csf_scheduler_get_group_on_slot(kbdev, i);
3037
+ /* If not NULL then the group pointer cannot disappear as the
3038
+ * scheduler spinlock is held.
3039
+ */
3040
+ if (grp == NULL)
3041
+ continue;
3042
+
3043
+ if (grp->scan_seq_num < current_protm_pending_seq) {
3044
+ tock_triggered = true;
3045
+ dev_dbg(kbdev->dev,
3046
+ "Attempt new protm from tick/tock idle slot %d\n", i);
3047
+ kbase_csf_scheduler_invoke_tock(kbdev);
3048
+ break;
3049
+ }
3050
+ }
3051
+
3052
+ if (!tock_triggered) {
3053
+ dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n",
3054
+ group->handle, group->csg_nr);
3055
+ queue_work(group->kctx->csf.wq, &group->protm_event_work);
3056
+ }
3057
+ }
3058
+}
3059
+
3060
+static void order_job_irq_clear_with_iface_mem_read(void)
3061
+{
3062
+ /* Ensure that write to the JOB_IRQ_CLEAR is ordered with regards to the
3063
+ * read from interface memory. The ordering is needed considering the way
3064
+ * FW & Kbase writes to the JOB_IRQ_RAWSTAT and JOB_IRQ_CLEAR registers
3065
+ * without any synchronization. Without the barrier there is no guarantee
3066
+ * about the ordering, the write to IRQ_CLEAR can take effect after the read
3067
+ * from interface memory and that could cause a problem for the scenario where
3068
+ * FW sends back to back notifications for the same CSG for events like
3069
+ * SYNC_UPDATE and IDLE, but Kbase gets a single IRQ and observes only the
3070
+ * first event. Similar thing can happen with glb events like CFG_ALLOC_EN
3071
+ * acknowledgment and GPU idle notification.
3072
+ *
3073
+ * MCU CPU
3074
+ * --------------- ----------------
3075
+ * Update interface memory Write to IRQ_CLEAR to clear current IRQ
3076
+ * <barrier> <barrier>
3077
+ * Write to IRQ_RAWSTAT to raise new IRQ Read interface memory
3078
+ */
3079
+
3080
+ /* CPU and GPU would be in the same Outer shareable domain */
3081
+ dmb(osh);
3082
+}
3083
+
28713084 void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
28723085 {
2873
- unsigned long flags;
2874
- u32 remaining = val;
3086
+ bool deferred_handling_glb_idle_irq = false;
28753087
28763088 lockdep_assert_held(&kbdev->hwaccess_lock);
28773089
2878
- KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val);
2879
- kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
3090
+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_START, NULL, val);
28803091
2881
- if (val & JOB_IRQ_GLOBAL_IF) {
2882
- const struct kbase_csf_global_iface *const global_iface =
2883
- &kbdev->csf.global_iface;
2884
- struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3092
+ do {
3093
+ unsigned long flags;
3094
+ u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
3095
+ struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX };
3096
+ bool glb_idle_irq_received = false;
28853097
2886
- kbdev->csf.interrupt_received = true;
2887
- remaining &= ~JOB_IRQ_GLOBAL_IF;
3098
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
3099
+ order_job_irq_clear_with_iface_mem_read();
28883100
2889
- if (!kbdev->csf.firmware_reloaded)
2890
- kbase_csf_firmware_reload_completed(kbdev);
2891
- else if (global_iface->output) {
2892
- u32 glb_req, glb_ack;
2893
-
3101
+ if (csg_interrupts != 0) {
28943102 kbase_csf_scheduler_spin_lock(kbdev, &flags);
2895
- glb_req = kbase_csf_firmware_global_input_read(
2896
- global_iface, GLB_REQ);
2897
- glb_ack = kbase_csf_firmware_global_output(
2898
- global_iface, GLB_ACK);
2899
- KBASE_KTRACE_ADD(kbdev, GLB_REQ_ACQ, NULL, glb_req ^ glb_ack);
3103
+ /* Looping through and track the highest idle and protm groups */
3104
+ while (csg_interrupts != 0) {
3105
+ int const csg_nr = ffs(csg_interrupts) - 1;
29003106
2901
- if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK) {
2902
- dev_dbg(kbdev->dev, "Protected mode exit interrupt received");
2903
- kbase_csf_firmware_global_input_mask(
2904
- global_iface, GLB_REQ, glb_ack,
2905
- GLB_REQ_PROTM_EXIT_MASK);
2906
- WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(kbdev));
2907
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM, scheduler->active_protm_grp, 0u);
2908
- scheduler->active_protm_grp = NULL;
2909
- kbdev->protected_mode = false;
2910
- kbase_ipa_control_protm_exited(kbdev);
2911
- kbase_hwcnt_backend_csf_protm_exited(
2912
- &kbdev->hwcnt_gpu_iface);
3107
+ process_csg_interrupts(kbdev, csg_nr, &track);
3108
+ csg_interrupts &= ~(1 << csg_nr);
29133109 }
29143110
2915
- /* Handle IDLE Hysteresis notification event */
2916
- if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
2917
- int non_idle_offslot_grps;
2918
- bool can_suspend_on_idle;
2919
- dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
2920
- kbase_csf_firmware_global_input_mask(
3111
+ /* Handle protm from the tracked information */
3112
+ process_tracked_info_for_protm(kbdev, &track);
3113
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
3114
+ }
3115
+
3116
+ if (val & JOB_IRQ_GLOBAL_IF) {
3117
+ const struct kbase_csf_global_iface *const global_iface =
3118
+ &kbdev->csf.global_iface;
3119
+
3120
+ kbdev->csf.interrupt_received = true;
3121
+
3122
+ if (!kbdev->csf.firmware_reloaded)
3123
+ kbase_csf_firmware_reload_completed(kbdev);
3124
+ else if (global_iface->output) {
3125
+ u32 glb_req, glb_ack;
3126
+
3127
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
3128
+ glb_req =
3129
+ kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
3130
+ glb_ack = kbase_csf_firmware_global_output(global_iface, GLB_ACK);
3131
+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL,
3132
+ glb_req ^ glb_ack);
3133
+
3134
+ check_protm_enter_req_complete(kbdev, glb_req, glb_ack);
3135
+
3136
+ if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK)
3137
+ process_protm_exit(kbdev, glb_ack);
3138
+
3139
+ /* Handle IDLE Hysteresis notification event */
3140
+ if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
3141
+ dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
3142
+ kbase_csf_firmware_global_input_mask(
29213143 global_iface, GLB_REQ, glb_ack,
29223144 GLB_REQ_IDLE_EVENT_MASK);
29233145
2924
- non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
2925
- can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
2926
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
2927
- ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
2928
-
2929
- if (!non_idle_offslot_grps) {
2930
- if (can_suspend_on_idle)
2931
- queue_work(system_highpri_wq,
2932
- &scheduler->gpu_idle_work);
2933
- } else {
2934
- /* Advance the scheduling tick to get
2935
- * the non-idle suspended groups loaded
2936
- * soon.
3146
+ glb_idle_irq_received = true;
3147
+ /* Defer handling this IRQ to account for a race condition
3148
+ * where the idle worker could be executed before we have
3149
+ * finished handling all pending IRQs (including CSG IDLE
3150
+ * IRQs).
29373151 */
2938
- kbase_csf_scheduler_advance_tick_nolock(
2939
- kbdev);
3152
+ deferred_handling_glb_idle_irq = true;
29403153 }
3154
+
3155
+ process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
3156
+
3157
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
3158
+
3159
+ /* Invoke the MCU state machine as a state transition
3160
+ * might have completed.
3161
+ */
3162
+ kbase_pm_update_state(kbdev);
29413163 }
2942
-
2943
- process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
2944
-
2945
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
2946
-
2947
- /* Invoke the MCU state machine as a state transition
2948
- * might have completed.
2949
- */
2950
- kbase_pm_update_state(kbdev);
29513164 }
29523165
2953
- if (!remaining) {
2954
- wake_up_all(&kbdev->csf.event_wait);
2955
- KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
2956
- return;
2957
- }
2958
- }
3166
+ if (!glb_idle_irq_received)
3167
+ break;
3168
+ /* Attempt to serve potential IRQs that might have occurred
3169
+ * whilst handling the previous IRQ. In case we have observed
3170
+ * the GLB IDLE IRQ without all CSGs having been marked as
3171
+ * idle, the GPU would be treated as no longer idle and left
3172
+ * powered on.
3173
+ */
3174
+ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
3175
+ } while (val);
29593176
2960
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
2961
- while (remaining != 0) {
2962
- int const csg_nr = ffs(remaining) - 1;
3177
+ if (deferred_handling_glb_idle_irq) {
3178
+ unsigned long flags;
29633179
2964
- process_csg_interrupts(kbdev, csg_nr);
2965
- remaining &= ~(1 << csg_nr);
3180
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
3181
+ kbase_csf_scheduler_process_gpu_idle_event(kbdev);
3182
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
29663183 }
2967
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
29683184
29693185 wake_up_all(&kbdev->csf.event_wait);
3186
+
29703187 KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
29713188 }
29723189
....@@ -2989,13 +3206,12 @@
29893206 struct file *filp;
29903207 int ret;
29913208
2992
- filp = shmem_file_setup("mali csf", MAX_LFS_FILESIZE, VM_NORESERVE);
3209
+ filp = shmem_file_setup("mali csf db", MAX_LFS_FILESIZE, VM_NORESERVE);
29933210 if (IS_ERR(filp))
29943211 return PTR_ERR(filp);
29953212
2996
- ret = kbase_mem_pool_alloc_pages(
2997
- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
2998
- 1, &phys, false);
3213
+ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
3214
+ false, NULL);
29993215
30003216 if (ret <= 0) {
30013217 fput(filp);
....@@ -3011,30 +3227,34 @@
30113227
30123228 void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
30133229 {
3014
- if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) {
3015
- struct page *page = as_page(kbdev->csf.dummy_user_reg_page);
3230
+ if (kbdev->csf.user_reg.filp) {
3231
+ struct page *page = as_page(kbdev->csf.user_reg.dummy_page);
30163232
3017
- kbase_mem_pool_free(
3018
- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page,
3019
- false);
3233
+ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
3234
+ fput(kbdev->csf.user_reg.filp);
30203235 }
30213236 }
30223237
30233238 int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
30243239 {
30253240 struct tagged_addr phys;
3241
+ struct file *filp;
30263242 struct page *page;
30273243 u32 *addr;
3028
- int ret;
30293244
3030
- kbdev->csf.dummy_user_reg_page = as_tagged(0);
3245
+ kbdev->csf.user_reg.filp = NULL;
30313246
3032
- ret = kbase_mem_pool_alloc_pages(
3033
- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
3034
- false);
3247
+ filp = shmem_file_setup("mali csf user_reg", MAX_LFS_FILESIZE, VM_NORESERVE);
3248
+ if (IS_ERR(filp)) {
3249
+ dev_err(kbdev->dev, "failed to get an unlinked file for user_reg");
3250
+ return PTR_ERR(filp);
3251
+ }
30353252
3036
- if (ret <= 0)
3037
- return ret;
3253
+ if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
3254
+ false, NULL) <= 0) {
3255
+ fput(filp);
3256
+ return -ENOMEM;
3257
+ }
30383258
30393259 page = as_page(phys);
30403260 addr = kmap_atomic(page);
....@@ -3044,12 +3264,13 @@
30443264 */
30453265 addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE;
30463266
3047
- kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32),
3267
+ kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32),
30483268 DMA_BIDIRECTIONAL);
30493269 kunmap_atomic(addr);
30503270
3051
- kbdev->csf.dummy_user_reg_page = phys;
3052
-
3271
+ kbdev->csf.user_reg.filp = filp;
3272
+ kbdev->csf.user_reg.dummy_page = phys;
3273
+ kbdev->csf.user_reg.file_offset = 0;
30533274 return 0;
30543275 }
30553276
....@@ -3066,4 +3287,3 @@
30663287
30673288 return out_priority;
30683289 }
3069
-