hc
2024-05-16 8d2a02b24d66aa359e83eebc1ed3c0f85367a1cb
kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
....@@ -1,7 +1,7 @@
11 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
22 /*
33 *
4
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
4
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
55 *
66 * This program is free software and is provided to you under the terms of the
77 * GNU General Public License version 2 as published by the Free Software
....@@ -30,6 +30,13 @@
3030 #include <linux/wait.h>
3131
3232 #include "mali_kbase_csf_firmware.h"
33
+#include "mali_kbase_refcount_defs.h"
34
+#include "mali_kbase_csf_event.h"
35
+#include <uapi/gpu/arm/bifrost/csf/mali_kbase_csf_errors_dumpfault.h>
36
+
37
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
38
+#include <debug/backend/mali_kbase_debug_coresight_internal_csf.h>
39
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
3340
3441 /* Maximum number of KCPU command queues to be created per GPU address space.
3542 */
....@@ -54,7 +61,7 @@
5461 #define CSF_FIRMWARE_ENTRY_ZERO (1ul << 31)
5562
5663 /**
57
- * enum kbase_csf_bind_state - bind state of the queue
64
+ * enum kbase_csf_queue_bind_state - bind state of the queue
5865 *
5966 * @KBASE_CSF_QUEUE_UNBOUND: Set when the queue is registered or when the link
6067 * between queue and the group to which it was bound or being bound is removed.
....@@ -219,11 +226,19 @@
219226 * management reference. This can happen if the GPU
220227 * becomes idle for a duration exceeding a threshold,
221228 * or due to a system triggered suspend action.
229
+ * @SCHED_SLEEPING: The scheduler is in low-power mode with scheduling
230
+ * operations suspended and is not holding the power
231
+ * management reference. This state is set, only for the
232
+ * GPUs that supports the sleep feature, when GPU idle
233
+ * notification is received. The state is changed to
234
+ * @SCHED_SUSPENDED from the runtime suspend callback
235
+ * function after the suspend of CSGs.
222236 */
223237 enum kbase_csf_scheduler_state {
224238 SCHED_BUSY,
225239 SCHED_INACTIVE,
226240 SCHED_SUSPENDED,
241
+ SCHED_SLEEPING,
227242 };
228243
229244 /**
....@@ -243,6 +258,36 @@
243258 KBASE_QUEUE_GROUP_PRIORITY_COUNT
244259 };
245260
261
+/**
262
+ * enum kbase_timeout_selector - The choice of which timeout to get scaled
263
+ * using the lowest GPU frequency.
264
+ * @CSF_FIRMWARE_TIMEOUT: Response timeout from CSF firmware.
265
+ * @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired
266
+ * Shader, L2 and MCU state.
267
+ * @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete.
268
+ * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for all active CSGs to be suspended.
269
+ * @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot.
270
+ * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond
271
+ * to a ping from KBase.
272
+ * @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang.
273
+ * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion
274
+ * of a MMU operation
275
+ * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
276
+ * the enum.
277
+ */
278
+enum kbase_timeout_selector {
279
+ CSF_FIRMWARE_TIMEOUT,
280
+ CSF_PM_TIMEOUT,
281
+ CSF_GPU_RESET_TIMEOUT,
282
+ CSF_CSG_SUSPEND_TIMEOUT,
283
+ CSF_FIRMWARE_BOOT_TIMEOUT,
284
+ CSF_FIRMWARE_PING_TIMEOUT,
285
+ CSF_SCHED_PROTM_PROGRESS_TIMEOUT,
286
+ MMU_AS_INACTIVE_WAIT_TIMEOUT,
287
+
288
+ /* Must be the last in the enum */
289
+ KBASE_TIMEOUT_SELECTOR_COUNT
290
+};
246291
247292 /**
248293 * struct kbase_csf_notification - Event or error generated as part of command
....@@ -261,9 +306,9 @@
261306 *
262307 * @kctx: Pointer to the base context with which this GPU command queue
263308 * is associated.
264
- * @reg: Pointer to the region allocated from the shared
265
- * interface segment for mapping the User mode
266
- * input/output pages in MCU firmware address space.
309
+ * @user_io_gpu_va: The start GPU VA address of this queue's userio pages. Only
310
+ * valid (i.e. not 0 ) when the queue is enabled and its owner
311
+ * group has a runtime bound csg_reg (group region).
267312 * @phys: Pointer to the physical pages allocated for the
268313 * pair or User mode input/output page
269314 * @user_io_addr: Pointer to the permanent kernel mapping of User mode
....@@ -319,21 +364,34 @@
319364 * @trace_buffer_size: CS trace buffer size for the queue.
320365 * @trace_cfg: CS trace configuration parameters.
321366 * @error: GPU command queue fatal information to pass to user space.
322
- * @fatal_event_work: Work item to handle the CS fatal event reported for this
323
- * queue.
324
- * @cs_fatal_info: Records additional information about the CS fatal event.
325
- * @cs_fatal: Records information about the CS fatal event.
367
+ * @cs_error_work: Work item to handle the CS fatal event reported for this
368
+ * queue or the CS fault event if dump on fault is enabled
369
+ * and acknowledgment for CS fault event needs to be done
370
+ * after dumping is complete.
371
+ * @cs_error_info: Records additional information about the CS fatal event or
372
+ * about CS fault event if dump on fault is enabled.
373
+ * @cs_error: Records information about the CS fatal event or
374
+ * about CS fault event if dump on fault is enabled.
375
+ * @cs_error_fatal: Flag to track if the CS fault or CS fatal event occurred.
376
+ * @pending: Indicating whether the queue has new submitted work.
377
+ * @extract_ofs: The current EXTRACT offset, this is only updated when handling
378
+ * the GLB IDLE IRQ if the idle timeout value is non-0 in order
379
+ * to help detect a queue's true idle status.
380
+ * @saved_cmd_ptr: The command pointer value for the GPU queue, saved when the
381
+ * group to which queue is bound is suspended.
382
+ * This can be useful in certain cases to know that till which
383
+ * point the execution reached in the Linear command buffer.
326384 */
327385 struct kbase_queue {
328386 struct kbase_context *kctx;
329
- struct kbase_va_region *reg;
387
+ u64 user_io_gpu_va;
330388 struct tagged_addr phys[2];
331389 char *user_io_addr;
332390 u64 handle;
333391 int doorbell_nr;
334392 unsigned long db_file_offset;
335393 struct list_head link;
336
- atomic_t refcount;
394
+ kbase_refcount_t refcount;
337395 struct kbase_queue_group *group;
338396 struct kbase_va_region *queue_reg;
339397 struct work_struct oom_event_work;
....@@ -353,34 +411,47 @@
353411 u32 trace_buffer_size;
354412 u32 trace_cfg;
355413 struct kbase_csf_notification error;
356
- struct work_struct fatal_event_work;
357
- u64 cs_fatal_info;
358
- u32 cs_fatal;
414
+ struct work_struct cs_error_work;
415
+ u64 cs_error_info;
416
+ u32 cs_error;
417
+ bool cs_error_fatal;
418
+ atomic_t pending;
419
+ u64 extract_ofs;
420
+#if IS_ENABLED(CONFIG_DEBUG_FS)
421
+ u64 saved_cmd_ptr;
422
+#endif /* CONFIG_DEBUG_FS */
359423 };
360424
361425 /**
362426 * struct kbase_normal_suspend_buffer - Object representing a normal
363427 * suspend buffer for queue group.
364
- * @reg: Memory region allocated for the normal-mode suspend buffer.
428
+ * @gpu_va: The start GPU VA address of the bound suspend buffer. Note, this
429
+ * field is only valid when the owner group has a region bound at
430
+ * runtime.
365431 * @phy: Array of physical memory pages allocated for the normal-
366432 * mode suspend buffer.
367433 */
368434 struct kbase_normal_suspend_buffer {
369
- struct kbase_va_region *reg;
435
+ u64 gpu_va;
370436 struct tagged_addr *phy;
371437 };
372438
373439 /**
374440 * struct kbase_protected_suspend_buffer - Object representing a protected
375441 * suspend buffer for queue group.
376
- * @reg: Memory region allocated for the protected-mode suspend buffer.
442
+ * @gpu_va: The start GPU VA address of the bound protected mode suspend buffer.
443
+ * Note, this field is only valid when the owner group has a region
444
+ * bound at runtime.
377445 * @pma: Array of pointer to protected mode allocations containing
378446 * information about memory pages allocated for protected mode
379447 * suspend buffer.
448
+ * @alloc_retries: Number of times we retried allocing physical pages
449
+ * for protected suspend buffers.
380450 */
381451 struct kbase_protected_suspend_buffer {
382
- struct kbase_va_region *reg;
452
+ u64 gpu_va;
383453 struct protected_memory_allocation **pma;
454
+ u8 alloc_retries;
384455 };
385456
386457 /**
....@@ -406,6 +477,7 @@
406477 * allowed to use.
407478 * @compute_max: Maximum number of compute endpoints the group is
408479 * allowed to use.
480
+ * @csi_handlers: Requested CSI exception handler flags for the group.
409481 * @tiler_mask: Mask of tiler endpoints the group is allowed to use.
410482 * @fragment_mask: Mask of fragment endpoints the group is allowed to use.
411483 * @compute_mask: Mask of compute endpoints the group is allowed to use.
....@@ -427,6 +499,12 @@
427499 * @faulted: Indicates that a GPU fault occurred for the queue group.
428500 * This flag persists until the fault has been queued to be
429501 * reported to userspace.
502
+ * @cs_unrecoverable: Flag to unblock the thread waiting for CSG termination in
503
+ * case of CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE
504
+ * @reevaluate_idle_status : Flag set when work is submitted for the normal group
505
+ * or it becomes unblocked during protected mode. The
506
+ * flag helps Scheduler confirm if the group actually
507
+ * became non idle or not.
430508 * @bound_queues: Array of registered queues bound to this queue group.
431509 * @doorbell_nr: Index of the hardware doorbell page assigned to the
432510 * group.
....@@ -442,6 +520,16 @@
442520 * to be returned to userspace if such an error has occurred.
443521 * @timer_event_work: Work item to handle the progress timeout fatal event
444522 * for the group.
523
+ * @deschedule_deferred_cnt: Counter keeping a track of the number of threads
524
+ * that tried to deschedule the group and had to defer
525
+ * the descheduling due to the dump on fault.
526
+ * @csg_reg: An opaque pointer to the runtime bound shared regions. It is
527
+ * dynamically managed by the scheduler and can be NULL if the
528
+ * group is off-slot.
529
+ * @csg_reg_bind_retries: Runtime MCU shared region map operation attempted counts.
530
+ * It is accumulated on consecutive mapping attempt failures. On
531
+ * reaching a preset limit, the group is regarded as suffered
532
+ * a fatal error and triggers a fatal error notification.
445533 */
446534 struct kbase_queue_group {
447535 struct kbase_context *kctx;
....@@ -454,6 +542,7 @@
454542 u8 tiler_max;
455543 u8 fragment_max;
456544 u8 compute_max;
545
+ u8 csi_handlers;
457546
458547 u64 tiler_mask;
459548 u64 fragment_mask;
....@@ -467,6 +556,8 @@
467556 u32 prepared_seq_num;
468557 u32 scan_seq_num;
469558 bool faulted;
559
+ bool cs_unrecoverable;
560
+ bool reevaluate_idle_status;
470561
471562 struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP];
472563
....@@ -479,6 +570,18 @@
479570 struct kbase_csf_notification error_tiler_oom;
480571
481572 struct work_struct timer_event_work;
573
+
574
+ /**
575
+ * @dvs_buf: Address and size of scratch memory.
576
+ *
577
+ * Used to store intermediate DVS data by the GPU.
578
+ */
579
+ u64 dvs_buf;
580
+#if IS_ENABLED(CONFIG_DEBUG_FS)
581
+ u32 deschedule_deferred_cnt;
582
+#endif
583
+ void *csg_reg;
584
+ u8 csg_reg_bind_retries;
482585 };
483586
484587 /**
....@@ -488,10 +591,10 @@
488591 * @lock: Lock preventing concurrent access to @array and the @in_use bitmap.
489592 * @array: Array of pointers to kernel CPU command queues.
490593 * @in_use: Bitmap which indicates which kernel CPU command queues are in use.
491
- * @wq: Dedicated workqueue for processing kernel CPU command queues.
492
- * @num_cmds: The number of commands that have been enqueued across
493
- * all the KCPU command queues. This could be used as a
494
- * timestamp to determine the command's enqueueing time.
594
+ * @cmd_seq_num: The sequence number assigned to an enqueued command,
595
+ * in incrementing order (older commands shall have a
596
+ * smaller number).
597
+ * @jit_lock: Lock to serialise JIT operations.
495598 * @jit_cmds_head: A list of the just-in-time memory commands, both
496599 * allocate & free, in submission order, protected
497600 * by kbase_csf_kcpu_queue_context.lock.
....@@ -504,9 +607,9 @@
504607 struct mutex lock;
505608 struct kbase_kcpu_command_queue *array[KBASEP_MAX_KCPU_QUEUES];
506609 DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES);
507
- struct workqueue_struct *wq;
508
- u64 num_cmds;
610
+ atomic64_t cmd_seq_num;
509611
612
+ struct mutex jit_lock;
510613 struct list_head jit_cmds_head;
511614 struct list_head jit_blocked_queues;
512615 };
....@@ -530,10 +633,6 @@
530633 /**
531634 * struct kbase_csf_heap_context_allocator - Allocator of heap contexts
532635 *
533
- * Heap context structures are allocated by the kernel for use by the firmware.
534
- * The current implementation subdivides a single GPU memory region for use as
535
- * a sparse array.
536
- *
537636 * @kctx: Pointer to the kbase context with which this allocator is
538637 * associated.
539638 * @region: Pointer to a GPU memory region from which heap context structures
....@@ -544,6 +643,12 @@
544643 * @lock: Lock preventing concurrent access to the @in_use bitmap.
545644 * @in_use: Bitmap that indicates which heap context structures are currently
546645 * allocated (in @region).
646
+ * @heap_context_size_aligned: Size of a heap context structure, in bytes,
647
+ * aligned to GPU cacheline size.
648
+ *
649
+ * Heap context structures are allocated by the kernel for use by the firmware.
650
+ * The current implementation subdivides a single GPU memory region for use as
651
+ * a sparse array.
547652 */
548653 struct kbase_csf_heap_context_allocator {
549654 struct kbase_context *kctx;
....@@ -551,27 +656,52 @@
551656 u64 gpu_va;
552657 struct mutex lock;
553658 DECLARE_BITMAP(in_use, MAX_TILER_HEAPS);
659
+ u32 heap_context_size_aligned;
554660 };
555661
556662 /**
557663 * struct kbase_csf_tiler_heap_context - Object representing the tiler heaps
558664 * context for a GPU address space.
559665 *
560
- * This contains all of the CSF state relating to chunked tiler heaps for one
561
- * @kbase_context. It is not the same as a heap context structure allocated by
562
- * the kernel for use by the firmware.
563
- *
564
- * @lock: Lock preventing concurrent access to the tiler heaps.
666
+ * @lock: Lock to prevent the concurrent access to tiler heaps (after the
667
+ * initialization), a tiler heap can be terminated whilst an OoM
668
+ * event is being handled for it.
565669 * @list: List of tiler heaps.
566670 * @ctx_alloc: Allocator for heap context structures.
567671 * @nr_of_heaps: Total number of tiler heaps that were added during the
568672 * life time of the context.
673
+ *
674
+ * This contains all of the CSF state relating to chunked tiler heaps for one
675
+ * @kbase_context. It is not the same as a heap context structure allocated by
676
+ * the kernel for use by the firmware.
569677 */
570678 struct kbase_csf_tiler_heap_context {
571679 struct mutex lock;
572680 struct list_head list;
573681 struct kbase_csf_heap_context_allocator ctx_alloc;
574682 u64 nr_of_heaps;
683
+};
684
+
685
+/**
686
+ * struct kbase_csf_ctx_heap_reclaim_info - Object representing the data section of
687
+ * a kctx for tiler heap reclaim manger
688
+ * @mgr_link: Link for hooking up to the heap reclaim manger's kctx lists
689
+ * @nr_freed_pages: Number of freed pages from the the kctx, after its attachment
690
+ * to the reclaim manager. This is used for tracking reclaim's
691
+ * free operation progress.
692
+ * @nr_est_unused_pages: Estimated number of pages that could be freed for the kctx
693
+ * when all its CSGs are off-slot, on attaching to the reclaim
694
+ * manager.
695
+ * @on_slot_grps: Number of on-slot groups from this kctx. In principle, if a
696
+ * kctx has groups on-slot, the scheduler will detach it from
697
+ * the tiler heap reclaim manager, i.e. no tiler heap memory
698
+ * reclaiming operations on the kctx.
699
+ */
700
+struct kbase_csf_ctx_heap_reclaim_info {
701
+ struct list_head mgr_link;
702
+ u32 nr_freed_pages;
703
+ u32 nr_est_unused_pages;
704
+ u8 on_slot_grps;
575705 };
576706
577707 /**
....@@ -595,6 +725,10 @@
595725 * streams bound to groups of @idle_wait_groups list.
596726 * @ngrp_to_schedule: Number of groups added for the context to the
597727 * 'groups_to_schedule' list of scheduler instance.
728
+ * @heap_info: Heap reclaim information data of the kctx. As the
729
+ * reclaim action needs to be coordinated with the scheduler
730
+ * operations, any manipulations on the data needs holding
731
+ * the scheduler's mutex lock.
598732 */
599733 struct kbase_csf_scheduler_context {
600734 struct list_head runnable_groups[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
....@@ -604,6 +738,61 @@
604738 struct workqueue_struct *sync_update_wq;
605739 struct work_struct sync_update_work;
606740 u32 ngrp_to_schedule;
741
+ struct kbase_csf_ctx_heap_reclaim_info heap_info;
742
+};
743
+
744
+/**
745
+ * enum kbase_csf_event_callback_action - return type for CSF event callbacks.
746
+ *
747
+ * @KBASE_CSF_EVENT_CALLBACK_FIRST: Never set explicitly.
748
+ * It doesn't correspond to any action or type of event callback.
749
+ *
750
+ * @KBASE_CSF_EVENT_CALLBACK_KEEP: The callback will remain registered.
751
+ *
752
+ * @KBASE_CSF_EVENT_CALLBACK_REMOVE: The callback will be removed
753
+ * immediately upon return.
754
+ *
755
+ * @KBASE_CSF_EVENT_CALLBACK_LAST: Never set explicitly.
756
+ * It doesn't correspond to any action or type of event callback.
757
+ */
758
+enum kbase_csf_event_callback_action {
759
+ KBASE_CSF_EVENT_CALLBACK_FIRST = 0,
760
+ KBASE_CSF_EVENT_CALLBACK_KEEP,
761
+ KBASE_CSF_EVENT_CALLBACK_REMOVE,
762
+ KBASE_CSF_EVENT_CALLBACK_LAST,
763
+};
764
+
765
+/**
766
+ * struct kbase_csf_event - Object representing CSF event and error
767
+ *
768
+ * @callback_list: List of callbacks which are registered to serve CSF
769
+ * events.
770
+ * @error_list: List for CS fatal errors in CSF context.
771
+ * Link of fatal error is &struct_kbase_csf_notification.link.
772
+ * @lock: Lock protecting access to @callback_list and
773
+ * @error_list.
774
+ */
775
+struct kbase_csf_event {
776
+ struct list_head callback_list;
777
+ struct list_head error_list;
778
+ spinlock_t lock;
779
+};
780
+
781
+/**
782
+ * struct kbase_csf_user_reg_context - Object containing members to manage the mapping
783
+ * of USER Register page for a context.
784
+ *
785
+ * @vma: Pointer to the VMA corresponding to the virtual mapping
786
+ * of the USER register page.
787
+ * @file_offset: File offset value that is assigned to userspace mapping
788
+ * of the USER Register page. It is in page units.
789
+ * @link: Links the context to the device list when mapping is pointing to
790
+ * either the dummy or the real Register page.
791
+ */
792
+struct kbase_csf_user_reg_context {
793
+ struct vm_area_struct *vma;
794
+ u32 file_offset;
795
+ struct list_head link;
607796 };
608797
609798 /**
....@@ -637,26 +826,18 @@
637826 * userspace mapping created for them on bind operation
638827 * hasn't been removed.
639828 * @kcpu_queues: Kernel CPU command queues.
640
- * @event_lock: Lock protecting access to @event_callback_list and
641
- * @error_list.
642
- * @event_callback_list: List of callbacks which are registered to serve CSF
643
- * events.
829
+ * @event: CSF event object.
644830 * @tiler_heaps: Chunked tiler memory heaps.
645831 * @wq: Dedicated workqueue to process work items corresponding
646832 * to the OoM events raised for chunked tiler heaps being
647833 * used by GPU command queues, and progress timeout events.
648834 * @link: Link to this csf context in the 'runnable_kctxs' list of
649835 * the scheduler instance
650
- * @user_reg_vma: Pointer to the vma corresponding to the virtual mapping
651
- * of the USER register page. Currently used only for sanity
652
- * checking.
653836 * @sched: Object representing the scheduler's context
654
- * @error_list: List for CS fatal errors in this context.
655
- * Link of fatal error is
656
- * &struct_kbase_csf_notification.link.
657
- * @event_lock needs to be held to access this list.
837
+ * @pending_submission_work: Work item to process pending kicked GPU command queues.
658838 * @cpu_queue: CPU queue information. Only be available when DEBUG_FS
659839 * is enabled.
840
+ * @user_reg: Collective information to support mapping to USER Register page.
660841 */
661842 struct kbase_csf_context {
662843 struct list_head event_pages_head;
....@@ -667,17 +848,16 @@
667848 struct kbase_queue_group *queue_groups[MAX_QUEUE_GROUP_NUM];
668849 struct list_head queue_list;
669850 struct kbase_csf_kcpu_queue_context kcpu_queues;
670
- spinlock_t event_lock;
671
- struct list_head event_callback_list;
851
+ struct kbase_csf_event event;
672852 struct kbase_csf_tiler_heap_context tiler_heaps;
673853 struct workqueue_struct *wq;
674854 struct list_head link;
675
- struct vm_area_struct *user_reg_vma;
676855 struct kbase_csf_scheduler_context sched;
677
- struct list_head error_list;
856
+ struct work_struct pending_submission_work;
678857 #if IS_ENABLED(CONFIG_DEBUG_FS)
679858 struct kbase_csf_cpu_queue_context cpu_queue;
680859 #endif
860
+ struct kbase_csf_user_reg_context user_reg;
681861 };
682862
683863 /**
....@@ -714,6 +894,49 @@
714894 atomic_t state;
715895 unsigned long trigger_jiffies;
716896 u8 priority;
897
+};
898
+
899
+/**
900
+ * struct kbase_csf_sched_heap_reclaim_mgr - Object for managing tiler heap reclaim
901
+ * kctx lists inside the CSF device's scheduler.
902
+ *
903
+ * @heap_reclaim: Tiler heap reclaim shrinker object.
904
+ * @ctx_lists: Array of kctx lists, size matching CSG defined priorities. The
905
+ * lists track the kctxs attached to the reclaim manager.
906
+ * @unused_pages: Estimated number of unused pages from the @ctxlist array. The
907
+ * number is indicative for use with reclaim shrinker's count method.
908
+ */
909
+struct kbase_csf_sched_heap_reclaim_mgr {
910
+ struct shrinker heap_reclaim;
911
+ struct list_head ctx_lists[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
912
+ atomic_t unused_pages;
913
+};
914
+
915
+/**
916
+ * struct kbase_csf_mcu_shared_regions - Control data for managing the MCU shared
917
+ * interface segment regions for scheduler
918
+ * operations
919
+ *
920
+ * @array_csg_regs: Base pointer of an internally created array_csg_regs[].
921
+ * @unused_csg_regs: List contains unused csg_regs items. When an item is bound to a
922
+ * group that is placed onto on-slot by the scheduler, it is dropped
923
+ * from the list (i.e busy active). The Scheduler will put an active
924
+ * item back when it's becoming off-slot (not in use).
925
+ * @dummy_phys: An array of dummy phys[nr_susp_pages] pages for use with normal
926
+ * and pmode suspend buffers, as a default replacement of a CSG's pages
927
+ * for the MMU mapping when the csg_reg is not bound to a group.
928
+ * @pma_phys: Pre-allocated array phy[nr_susp_pages] for transitional use with
929
+ * protected suspend buffer MMU map operations.
930
+ * @userio_mem_rd_flags: Userio input page's read access mapping configuration flags.
931
+ * @dummy_phys_allocated: Indicating the @p dummy_phy page is allocated when true.
932
+ */
933
+struct kbase_csf_mcu_shared_regions {
934
+ void *array_csg_regs;
935
+ struct list_head unused_csg_regs;
936
+ struct tagged_addr *dummy_phys;
937
+ struct tagged_addr *pma_phys;
938
+ unsigned long userio_mem_rd_flags;
939
+ bool dummy_phys_allocated;
717940 };
718941
719942 /**
....@@ -788,6 +1011,8 @@
7881011 * operation to implement timeslice-based scheduling.
7891012 * @tock_work: Work item that would perform the schedule on tock
7901013 * operation to implement the asynchronous scheduling.
1014
+ * @pending_tock_work: Indicates that the tock work item should re-execute
1015
+ * once it's finished instead of going back to sleep.
7911016 * @ping_work: Work item that would ping the firmware at regular
7921017 * intervals, only if there is a single active CSG
7931018 * slot, to check if firmware is alive and would
....@@ -797,16 +1022,29 @@
7971022 * @top_grp.
7981023 * @top_grp: Pointer to queue group inside @groups_to_schedule
7991024 * list that was assigned the highest slot priority.
800
- * @tock_pending_request: A "tock" request is pending: a group that is not
801
- * currently on the GPU demands to be scheduled.
8021025 * @active_protm_grp: Indicates if firmware has been permitted to let GPU
8031026 * enter protected mode with the given group. On exit
8041027 * from protected mode the pointer is reset to NULL.
805
- * @gpu_idle_fw_timer_enabled: Whether the CSF scheduler has activiated the
806
- * firmware idle hysteresis timer for preparing a
807
- * GPU suspend on idle.
1028
+ * This pointer is set and PROTM_ENTER request is sent
1029
+ * atomically with @interrupt_lock held.
1030
+ * This pointer being set doesn't necessarily indicates
1031
+ * that GPU is in protected mode, kbdev->protected_mode
1032
+ * needs to be checked for that.
1033
+ * @idle_wq: Workqueue for executing GPU idle notification
1034
+ * handler.
8081035 * @gpu_idle_work: Work item for facilitating the scheduler to bring
8091036 * the GPU to a low-power mode on becoming idle.
1037
+ * @fast_gpu_idle_handling: Indicates whether to relax many of the checks
1038
+ * normally done in the GPU idle worker. This is
1039
+ * set to true when handling the GLB IDLE IRQ if the
1040
+ * idle hysteresis timeout is 0, since it makes it
1041
+ * possible to receive this IRQ before the extract
1042
+ * offset is published (which would cause more
1043
+ * extensive GPU idle checks to fail).
1044
+ * @gpu_no_longer_idle: Effective only when the GPU idle worker has been
1045
+ * queued for execution, this indicates whether the
1046
+ * GPU has become non-idle since the last time the
1047
+ * idle notification was received.
8101048 * @non_idle_offslot_grps: Count of off-slot non-idle groups. Reset during
8111049 * the scheduler active phase in a tick. It then
8121050 * tracks the count of non-idle groups across all the
....@@ -827,6 +1065,17 @@
8271065 * when scheduling tick needs to be advanced from
8281066 * interrupt context, without actually deactivating
8291067 * the @tick_timer first and then enqueing @tick_work.
1068
+ * @tick_protm_pending_seq: Scan out sequence number of the group that has
1069
+ * protected mode execution pending for the queue(s)
1070
+ * bound to it and will be considered first for the
1071
+ * protected mode execution compared to other such
1072
+ * groups. It is updated on every tick/tock.
1073
+ * @interrupt_lock is used to serialize the access.
1074
+ * @protm_enter_time: GPU protected mode enter time.
1075
+ * @reclaim_mgr: CSGs tiler heap manager object.
1076
+ * @mcu_regs_data: Scheduler MCU shared regions data for managing the
1077
+ * shared interface mappings for on-slot queues and
1078
+ * CSG suspend buffers.
8301079 */
8311080 struct kbase_csf_scheduler {
8321081 struct mutex lock;
....@@ -853,26 +1102,32 @@
8531102 struct hrtimer tick_timer;
8541103 struct work_struct tick_work;
8551104 struct delayed_work tock_work;
1105
+ atomic_t pending_tock_work;
8561106 struct delayed_work ping_work;
8571107 struct kbase_context *top_ctx;
8581108 struct kbase_queue_group *top_grp;
859
- bool tock_pending_request;
8601109 struct kbase_queue_group *active_protm_grp;
861
- bool gpu_idle_fw_timer_enabled;
1110
+ struct workqueue_struct *idle_wq;
8621111 struct work_struct gpu_idle_work;
1112
+ bool fast_gpu_idle_handling;
1113
+ atomic_t gpu_no_longer_idle;
8631114 atomic_t non_idle_offslot_grps;
8641115 u32 non_idle_scanout_grps;
8651116 u32 pm_active_count;
8661117 unsigned int csg_scheduling_period_ms;
8671118 bool tick_timer_active;
1119
+ u32 tick_protm_pending_seq;
1120
+ ktime_t protm_enter_time;
1121
+ struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr;
1122
+ struct kbase_csf_mcu_shared_regions mcu_regs_data;
8681123 };
8691124
870
-/**
1125
+/*
8711126 * Number of GPU cycles per unit of the global progress timeout.
8721127 */
8731128 #define GLB_PROGRESS_TIMER_TIMEOUT_SCALE ((u64)1024)
8741129
875
-/**
1130
+/*
8761131 * Maximum value of the global progress timeout.
8771132 */
8781133 #define GLB_PROGRESS_TIMER_TIMEOUT_MAX \
....@@ -880,12 +1135,12 @@
8801135 GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) * \
8811136 GLB_PROGRESS_TIMER_TIMEOUT_SCALE)
8821137
883
-/**
1138
+/*
8841139 * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of micro-seconds.
8851140 */
8861141 #define DEFAULT_GLB_PWROFF_TIMEOUT_US (800)
8871142
888
-/**
1143
+/*
8891144 * In typical operations, the management of the shader core power transitions
8901145 * is delegated to the MCU/firmware. However, if the host driver is configured
8911146 * to take direct control, one needs to disable the MCU firmware GLB_PWROFF
....@@ -896,7 +1151,7 @@
8961151 /* Index of the GPU_ACTIVE counter within the CSHW counter block */
8971152 #define GPU_ACTIVE_CNT_IDX (4)
8981153
899
-/**
1154
+/*
9001155 * Maximum number of sessions that can be managed by the IPA Control component.
9011156 */
9021157 #if MALI_UNIT_TEST
....@@ -922,13 +1177,13 @@
9221177 KBASE_IPA_CORE_TYPE_NUM
9231178 };
9241179
925
-/**
1180
+/*
9261181 * Number of configurable counters per type of block on the IPA Control
9271182 * interface.
9281183 */
9291184 #define KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS ((size_t)8)
9301185
931
-/**
1186
+/*
9321187 * Total number of configurable counters existing on the IPA Control interface.
9331188 */
9341189 #define KBASE_IPA_CONTROL_MAX_COUNTERS \
....@@ -1001,8 +1256,7 @@
10011256 *
10021257 */
10031258 struct kbase_ipa_control_prfcnt_block {
1004
- struct kbase_ipa_control_prfcnt_config
1005
- select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS];
1259
+ struct kbase_ipa_control_prfcnt_config select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS];
10061260 size_t num_available_counters;
10071261 };
10081262
....@@ -1025,8 +1279,7 @@
10251279 */
10261280 struct kbase_ipa_control {
10271281 struct kbase_ipa_control_prfcnt_block blocks[KBASE_IPA_CORE_TYPE_NUM];
1028
- struct kbase_ipa_control_session
1029
- sessions[KBASE_IPA_CONTROL_MAX_SESSIONS];
1282
+ struct kbase_ipa_control_session sessions[KBASE_IPA_CONTROL_MAX_SESSIONS];
10301283 spinlock_t lock;
10311284 void *rtm_listener_data;
10321285 size_t num_active_sessions;
....@@ -1040,12 +1293,20 @@
10401293 * @node: Interface objects are on the kbase_device:csf.firmware_interfaces
10411294 * list using this list_head to link them
10421295 * @phys: Array of the physical (tagged) addresses making up this interface
1296
+ * @reuse_pages: Flag used to identify if the FW interface entry reuses
1297
+ * physical pages allocated for another FW interface entry.
1298
+ * @is_small_page: Flag used to identify if small pages are used for
1299
+ * the FW interface entry.
10431300 * @name: NULL-terminated string naming the interface
10441301 * @num_pages: Number of entries in @phys and @pma (and length of the interface)
1302
+ * @num_pages_aligned: Same as @num_pages except for the case when @is_small_page
1303
+ * is false and @reuse_pages is false and therefore will be
1304
+ * aligned to NUM_4K_PAGES_IN_2MB_PAGE.
10451305 * @virtual: Starting GPU virtual address this interface is mapped at
10461306 * @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes
10471307 * @data_start: Offset into firmware image at which the interface data starts
10481308 * @data_end: Offset into firmware image at which the interface data ends
1309
+ * @virtual_exe_start: Starting GPU execution virtual address of this interface
10491310 * @kernel_map: A kernel mapping of the memory or NULL if not required to be
10501311 * mapped in the kernel
10511312 * @pma: Array of pointers to protected memory allocations.
....@@ -1053,12 +1314,16 @@
10531314 struct kbase_csf_firmware_interface {
10541315 struct list_head node;
10551316 struct tagged_addr *phys;
1317
+ bool reuse_pages;
1318
+ bool is_small_page;
10561319 char *name;
10571320 u32 num_pages;
1321
+ u32 num_pages_aligned;
10581322 u32 virtual;
10591323 u32 flags;
10601324 u32 data_start;
10611325 u32 data_end;
1326
+ u32 virtual_exe_start;
10621327 void *kernel_map;
10631328 struct protected_memory_allocation **pma;
10641329 };
....@@ -1075,6 +1340,136 @@
10751340 struct kbase_csf_hwcnt {
10761341 bool request_pending;
10771342 bool enable_pending;
1343
+};
1344
+
1345
+/*
1346
+ * struct kbase_csf_mcu_fw - Object containing device loaded MCU firmware data.
1347
+ *
1348
+ * @size: Loaded firmware data size. Meaningful only when the
1349
+ * other field @p data is not NULL.
1350
+ * @data: Pointer to the device retained firmware data. If NULL
1351
+ * means not loaded yet or error in loading stage.
1352
+ */
1353
+struct kbase_csf_mcu_fw {
1354
+ size_t size;
1355
+ u8 *data;
1356
+};
1357
+
1358
+/*
1359
+ * Firmware log polling period.
1360
+ */
1361
+#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS 25
1362
+
1363
+/**
1364
+ * enum kbase_csf_firmware_log_mode - Firmware log operating mode
1365
+ *
1366
+ * @KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL: Manual mode, firmware log can be read
1367
+ * manually by the userspace (and it will also be dumped automatically into
1368
+ * dmesg on GPU reset).
1369
+ *
1370
+ * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: Automatic printing mode, firmware log
1371
+ * will be periodically emptied into dmesg, manual reading through debugfs is
1372
+ * disabled.
1373
+ */
1374
+enum kbase_csf_firmware_log_mode {
1375
+ KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL,
1376
+ KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT
1377
+};
1378
+
1379
+/**
1380
+ * struct kbase_csf_firmware_log - Object containing members for handling firmware log.
1381
+ *
1382
+ * @mode: Firmware log operating mode.
1383
+ * @busy: Indicating whether a firmware log operation is in progress.
1384
+ * @poll_work: Work item that would poll firmware log buffer
1385
+ * at regular intervals to perform any periodic
1386
+ * activities required by current log mode.
1387
+ * @dump_buf: Buffer used for dumping the log.
1388
+ * @func_call_list_va_start: Virtual address of the start of the call list of FW log functions.
1389
+ * @func_call_list_va_end: Virtual address of the end of the call list of FW log functions.
1390
+ */
1391
+struct kbase_csf_firmware_log {
1392
+ enum kbase_csf_firmware_log_mode mode;
1393
+ atomic_t busy;
1394
+ struct delayed_work poll_work;
1395
+ u8 *dump_buf;
1396
+ u32 func_call_list_va_start;
1397
+ u32 func_call_list_va_end;
1398
+};
1399
+
1400
+/**
1401
+ * struct kbase_csf_firmware_core_dump - Object containing members for handling
1402
+ * firmware core dump.
1403
+ *
1404
+ * @mcu_regs_addr: GPU virtual address of the start of the MCU registers buffer
1405
+ * in Firmware.
1406
+ * @version: Version of the FW image header core dump data format. Bits
1407
+ * 7:0 specify version minor and 15:8 specify version major.
1408
+ * @available: Flag to identify if the FW core dump buffer is available.
1409
+ * True if entry is available in the FW image header and version
1410
+ * is supported, False otherwise.
1411
+ */
1412
+struct kbase_csf_firmware_core_dump {
1413
+ u32 mcu_regs_addr;
1414
+ u16 version;
1415
+ bool available;
1416
+};
1417
+
1418
+#if IS_ENABLED(CONFIG_DEBUG_FS)
1419
+/**
1420
+ * struct kbase_csf_dump_on_fault - Faulty information to deliver to the daemon
1421
+ *
1422
+ * @error_code: Error code.
1423
+ * @kctx_tgid: tgid value of the Kbase context for which the fault happened.
1424
+ * @kctx_id: id of the Kbase context for which the fault happened.
1425
+ * @enabled: Flag to indicate that 'csf_fault' debugfs has been opened
1426
+ * so dump on fault is enabled.
1427
+ * @fault_wait_wq: Waitqueue on which user space client is blocked till kbase
1428
+ * reports a fault.
1429
+ * @dump_wait_wq: Waitqueue on which kbase threads are blocked till user space client
1430
+ * completes the dump on fault.
1431
+ * @lock: Lock to protect this struct members from concurrent access.
1432
+ */
1433
+struct kbase_csf_dump_on_fault {
1434
+ enum dumpfault_error_type error_code;
1435
+ u32 kctx_tgid;
1436
+ u32 kctx_id;
1437
+ atomic_t enabled;
1438
+ wait_queue_head_t fault_wait_wq;
1439
+ wait_queue_head_t dump_wait_wq;
1440
+ spinlock_t lock;
1441
+};
1442
+#endif /* CONFIG_DEBUG_FS*/
1443
+
1444
+/**
1445
+ * struct kbase_csf_user_reg - Object containing members to manage the mapping
1446
+ * of USER Register page for all contexts
1447
+ *
1448
+ * @dummy_page: Address of a dummy page that is mapped in place
1449
+ * of the real USER Register page just before the GPU
1450
+ * is powered down. The USER Register page is mapped
1451
+ * in the address space of every process, that created
1452
+ * a Base context, to enable the access to LATEST_FLUSH
1453
+ * register from userspace.
1454
+ * @filp: Pointer to a dummy file, that along with @file_offset,
1455
+ * facilitates the use of unique file offset for the userspace mapping
1456
+ * created for USER Register page.
1457
+ * The userspace mapping is made to point to this file
1458
+ * inside the mmap handler.
1459
+ * @file_offset: Counter that is incremented every time Userspace creates a mapping of
1460
+ * USER Register page, to provide a unique file offset range for
1461
+ * @filp file, so that the CPU PTE of the Userspace mapping can be zapped
1462
+ * through the kernel function unmap_mapping_range().
1463
+ * It is incremented in page units.
1464
+ * @list: Linked list to maintain user processes(contexts)
1465
+ * having the mapping to USER Register page.
1466
+ * It's protected by &kbase_csf_device.reg_lock.
1467
+ */
1468
+struct kbase_csf_user_reg {
1469
+ struct tagged_addr dummy_page;
1470
+ struct file *filp;
1471
+ u32 file_offset;
1472
+ struct list_head list;
10781473 };
10791474
10801475 /**
....@@ -1114,21 +1509,10 @@
11141509 * of the real Hw doorbell page for the active GPU
11151510 * command queues after they are stopped or after the
11161511 * GPU is powered down.
1117
- * @dummy_user_reg_page: Address of the dummy page that is mapped in place
1118
- * of the real User register page just before the GPU
1119
- * is powered down. The User register page is mapped
1120
- * in the address space of every process, that created
1121
- * a Base context, to enable the access to LATEST_FLUSH
1122
- * register from userspace.
1123
- * @mali_file_inode: Pointer to the inode corresponding to mali device
1124
- * file. This is needed in order to switch to the
1125
- * @dummy_user_reg_page on GPU power down.
1126
- * All instances of the mali device file will point to
1127
- * the same inode.
11281512 * @reg_lock: Lock to serialize the MCU firmware related actions
11291513 * that affect all contexts such as allocation of
11301514 * regions from shared interface area, assignment of
1131
- * of hardware doorbell pages, assignment of CSGs,
1515
+ * hardware doorbell pages, assignment of CSGs,
11321516 * sending global requests.
11331517 * @event_wait: Wait queue to wait for receiving csf events, i.e.
11341518 * the interrupt from CSF firmware, or scheduler state
....@@ -1151,6 +1535,10 @@
11511535 * in GPU reset has completed.
11521536 * @firmware_reload_needed: Flag for indicating that the firmware needs to be
11531537 * reloaded as part of the GPU reset action.
1538
+ * @firmware_full_reload_needed: Flag for indicating that the firmware needs to
1539
+ * be fully re-loaded. This may be set when the
1540
+ * boot or re-init of MCU fails after a successful
1541
+ * soft reset.
11541542 * @firmware_hctl_core_pwr: Flag for indicating that the host diver is in
11551543 * charge of the shader core's power transitions, and
11561544 * the mcu_core_pwroff timeout feature is disabled
....@@ -1174,9 +1562,9 @@
11741562 * the glb_pwoff register. This is separated from
11751563 * the @p mcu_core_pwroff_dur_count as an update
11761564 * to the latter is asynchronous.
1177
- * @gpu_idle_hysteresis_ms: Sysfs attribute for the idle hysteresis time
1178
- * window in unit of ms. The firmware does not use it
1179
- * directly.
1565
+ * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time
1566
+ * window in unit of microseconds. The firmware does not
1567
+ * use it directly.
11801568 * @gpu_idle_dur_count: The counterpart of the hysteresis time window in
11811569 * interface required format, ready to be used
11821570 * directly in the firmware.
....@@ -1184,6 +1572,13 @@
11841572 * for any request sent to the firmware.
11851573 * @hwcnt: Contain members required for handling the dump of
11861574 * HW counters.
1575
+ * @fw: Copy of the loaded MCU firmware image.
1576
+ * @fw_log: Contain members required for handling firmware log.
1577
+ * @fw_core_dump: Contain members required for handling the firmware
1578
+ * core dump.
1579
+ * @dof: Structure for dump on fault.
1580
+ * @user_reg: Collective information to support the mapping to
1581
+ * USER Register page for user processes.
11871582 */
11881583 struct kbase_csf_device {
11891584 struct kbase_mmu_table mcu_mmu;
....@@ -1197,8 +1592,6 @@
11971592 struct file *db_filp;
11981593 u32 db_file_offsets;
11991594 struct tagged_addr dummy_db_page;
1200
- struct tagged_addr dummy_user_reg_page;
1201
- struct inode *mali_file_inode;
12021595 struct mutex reg_lock;
12031596 wait_queue_head_t event_wait;
12041597 bool interrupt_received;
....@@ -1210,6 +1603,7 @@
12101603 bool firmware_inited;
12111604 bool firmware_reloaded;
12121605 bool firmware_reload_needed;
1606
+ bool firmware_full_reload_needed;
12131607 bool firmware_hctl_core_pwr;
12141608 struct work_struct firmware_reload_work;
12151609 bool glb_init_request_pending;
....@@ -1218,10 +1612,23 @@
12181612 u32 mcu_core_pwroff_dur_us;
12191613 u32 mcu_core_pwroff_dur_count;
12201614 u32 mcu_core_pwroff_reg_shadow;
1221
- u32 gpu_idle_hysteresis_ms;
1615
+ u32 gpu_idle_hysteresis_us;
12221616 u32 gpu_idle_dur_count;
12231617 unsigned int fw_timeout_ms;
12241618 struct kbase_csf_hwcnt hwcnt;
1619
+ struct kbase_csf_mcu_fw fw;
1620
+ struct kbase_csf_firmware_log fw_log;
1621
+ struct kbase_csf_firmware_core_dump fw_core_dump;
1622
+#if IS_ENABLED(CONFIG_DEBUG_FS)
1623
+ struct kbase_csf_dump_on_fault dof;
1624
+#endif /* CONFIG_DEBUG_FS */
1625
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
1626
+ /**
1627
+ * @coresight: Coresight device structure.
1628
+ */
1629
+ struct kbase_debug_coresight_device coresight;
1630
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
1631
+ struct kbase_csf_user_reg user_reg;
12251632 };
12261633
12271634 /**
....@@ -1238,6 +1645,10 @@
12381645 * @bf_data: Data relating to Bus fault.
12391646 * @gf_data: Data relating to GPU fault.
12401647 * @current_setup: Stores the MMU configuration for this address space.
1648
+ * @is_unresponsive: Flag to indicate MMU is not responding.
1649
+ * Set if a MMU command isn't completed within
1650
+ * &kbase_device:mmu_as_inactive_wait_time_ms.
1651
+ * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
12411652 */
12421653 struct kbase_as {
12431654 int number;
....@@ -1249,6 +1660,7 @@
12491660 struct kbase_fault bf_data;
12501661 struct kbase_fault gf_data;
12511662 struct kbase_mmu_setup current_setup;
1663
+ bool is_unresponsive;
12521664 };
12531665
12541666 #endif /* _KBASE_CSF_DEFS_H_ */