.. | .. |
---|
1 | 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ |
---|
2 | 2 | /* |
---|
3 | 3 | * |
---|
4 | | - * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. |
---|
| 4 | + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. |
---|
5 | 5 | * |
---|
6 | 6 | * This program is free software and is provided to you under the terms of the |
---|
7 | 7 | * GNU General Public License version 2 as published by the Free Software |
---|
.. | .. |
---|
35 | 35 | #include <backend/gpu/mali_kbase_instr_defs.h> |
---|
36 | 36 | #include <mali_kbase_pm.h> |
---|
37 | 37 | #include <mali_kbase_gpuprops_types.h> |
---|
| 38 | +#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h> |
---|
| 39 | + |
---|
38 | 40 | #if MALI_USE_CSF |
---|
39 | | -#include <mali_kbase_hwcnt_backend_csf.h> |
---|
| 41 | +#include <hwcnt/backend/mali_kbase_hwcnt_backend_csf.h> |
---|
40 | 42 | #else |
---|
41 | | -#include <mali_kbase_hwcnt_backend_jm.h> |
---|
| 43 | +#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm.h> |
---|
| 44 | +#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h> |
---|
42 | 45 | #endif |
---|
| 46 | + |
---|
43 | 47 | #include <protected_mode_switcher.h> |
---|
44 | 48 | |
---|
45 | 49 | #include <linux/atomic.h> |
---|
.. | .. |
---|
49 | 53 | #include <linux/sizes.h> |
---|
50 | 54 | |
---|
51 | 55 | |
---|
52 | | -#if defined(CONFIG_SYNC) |
---|
53 | | -#include <sync.h> |
---|
54 | | -#else |
---|
55 | 56 | #include "mali_kbase_fence_defs.h" |
---|
56 | | -#endif |
---|
57 | 57 | |
---|
58 | 58 | #if IS_ENABLED(CONFIG_DEBUG_FS) |
---|
59 | 59 | #include <linux/debugfs.h> |
---|
.. | .. |
---|
63 | 63 | #include <linux/devfreq.h> |
---|
64 | 64 | #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ |
---|
65 | 65 | |
---|
| 66 | +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) |
---|
| 67 | +#include <linux/devfreq_cooling.h> |
---|
| 68 | +#endif |
---|
| 69 | + |
---|
66 | 70 | #ifdef CONFIG_MALI_ARBITER_SUPPORT |
---|
67 | 71 | #include <arbiter/mali_kbase_arbiter_defs.h> |
---|
68 | 72 | #endif /* CONFIG_MALI_ARBITER_SUPPORT */ |
---|
.. | .. |
---|
70 | 74 | #include <linux/clk.h> |
---|
71 | 75 | #include <linux/regulator/consumer.h> |
---|
72 | 76 | #include <linux/memory_group_manager.h> |
---|
73 | | - |
---|
74 | | -#if defined(CONFIG_PM_RUNTIME) || defined(CONFIG_PM) |
---|
75 | | -#define KBASE_PM_RUNTIME 1 |
---|
76 | | -#endif |
---|
| 77 | +#include <soc/rockchip/rockchip_opp_select.h> |
---|
77 | 78 | |
---|
78 | 79 | #include "debug/mali_kbase_debug_ktrace_defs.h" |
---|
79 | 80 | |
---|
.. | .. |
---|
81 | 82 | #define RESET_TIMEOUT 500 |
---|
82 | 83 | |
---|
83 | 84 | /** |
---|
84 | | - * The maximum number of Job Slots to support in the Hardware. |
---|
| 85 | + * BASE_JM_MAX_NR_SLOTS - The maximum number of Job Slots to support in the Hardware. |
---|
85 | 86 | * |
---|
86 | 87 | * You can optimize this down if your target devices will only ever support a |
---|
87 | 88 | * small number of job slots. |
---|
.. | .. |
---|
89 | 90 | #define BASE_JM_MAX_NR_SLOTS 3 |
---|
90 | 91 | |
---|
91 | 92 | /** |
---|
92 | | - * The maximum number of Address Spaces to support in the Hardware. |
---|
| 93 | + * BASE_MAX_NR_AS - The maximum number of Address Spaces to support in the Hardware. |
---|
93 | 94 | * |
---|
94 | 95 | * You can optimize this down if your target devices will only ever support a |
---|
95 | 96 | * small number of Address Spaces |
---|
.. | .. |
---|
109 | 110 | #define KBASEP_AS_NR_INVALID (-1) |
---|
110 | 111 | |
---|
111 | 112 | /** |
---|
112 | | - * Maximum size in bytes of a MMU lock region, as a logarithm |
---|
| 113 | + * KBASE_LOCK_REGION_MAX_SIZE_LOG2 - Maximum size in bytes of a MMU lock region, |
---|
| 114 | + * as a logarithm |
---|
113 | 115 | */ |
---|
114 | | -#define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (64) |
---|
| 116 | +#define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (48) /* 256 TB */ |
---|
115 | 117 | |
---|
116 | 118 | /** |
---|
117 | | - * Minimum size in bytes of a MMU lock region, as a logarithm |
---|
| 119 | + * KBASE_REG_ZONE_MAX - Maximum number of GPU memory region zones |
---|
118 | 120 | */ |
---|
119 | | -#define KBASE_LOCK_REGION_MIN_SIZE_LOG2 (15) |
---|
120 | | - |
---|
121 | | -/** |
---|
122 | | - * Maximum number of GPU memory region zones |
---|
123 | | - */ |
---|
| 121 | +#if MALI_USE_CSF |
---|
| 122 | +#define KBASE_REG_ZONE_MAX 6ul |
---|
| 123 | +#else |
---|
124 | 124 | #define KBASE_REG_ZONE_MAX 4ul |
---|
| 125 | +#endif |
---|
125 | 126 | |
---|
126 | 127 | #include "mali_kbase_hwaccess_defs.h" |
---|
127 | 128 | |
---|
128 | 129 | /* Maximum number of pages of memory that require a permanent mapping, per |
---|
129 | 130 | * kbase_context |
---|
130 | 131 | */ |
---|
131 | | -#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((32 * 1024ul * 1024ul) >> \ |
---|
132 | | - PAGE_SHIFT) |
---|
| 132 | +#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((64 * 1024ul * 1024ul) >> PAGE_SHIFT) |
---|
133 | 133 | /* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer |
---|
134 | 134 | * clients, to reduce undesired system load. |
---|
135 | 135 | * If a virtualizer client requests a dump within this threshold period after |
---|
.. | .. |
---|
152 | 152 | * the device node. |
---|
153 | 153 | * This is dependent on support for of_property_read_u64_array() in the |
---|
154 | 154 | * kernel. |
---|
| 155 | + * While, the number of clocks could be more than regulators, |
---|
| 156 | + * as mentioned in power_control_init(). |
---|
155 | 157 | */ |
---|
156 | | -#define BASE_MAX_NR_CLOCKS_REGULATORS (2) |
---|
| 158 | +#define BASE_MAX_NR_CLOCKS_REGULATORS (4) |
---|
157 | 159 | |
---|
158 | 160 | /* Forward declarations */ |
---|
159 | 161 | struct kbase_context; |
---|
.. | .. |
---|
243 | 245 | bool protected_mode; |
---|
244 | 246 | }; |
---|
245 | 247 | |
---|
| 248 | +/** Maximum number of memory pages that should be allocated for the array |
---|
| 249 | + * of pointers to free PGDs. |
---|
| 250 | + * |
---|
| 251 | + * This number has been pre-calculated to deal with the maximum allocation |
---|
| 252 | + * size expressed by the default value of KBASE_MEM_ALLOC_MAX_SIZE. |
---|
| 253 | + * This is supposed to be enough for almost the entirety of MMU operations. |
---|
| 254 | + * Any size greater than KBASE_MEM_ALLOC_MAX_SIZE requires being broken down |
---|
| 255 | + * into multiple iterations, each dealing with at most KBASE_MEM_ALLOC_MAX_SIZE |
---|
| 256 | + * bytes. |
---|
| 257 | + * |
---|
| 258 | + * Please update this value if KBASE_MEM_ALLOC_MAX_SIZE changes. |
---|
| 259 | + */ |
---|
| 260 | +#define MAX_PAGES_FOR_FREE_PGDS ((size_t)9) |
---|
| 261 | + |
---|
| 262 | +/* Maximum number of pointers to free PGDs */ |
---|
| 263 | +#define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(struct page *)) * MAX_PAGES_FOR_FREE_PGDS) |
---|
| 264 | + |
---|
246 | 265 | /** |
---|
247 | 266 | * struct kbase_mmu_table - object representing a set of GPU page tables |
---|
248 | | - * @mmu_teardown_pages: Buffer of 4 Pages in size, used to cache the entries |
---|
249 | | - * of top & intermediate level page tables to avoid |
---|
250 | | - * repeated calls to kmap_atomic during the MMU teardown. |
---|
251 | 267 | * @mmu_lock: Lock to serialize the accesses made to multi level GPU |
---|
252 | 268 | * page tables |
---|
253 | 269 | * @pgd: Physical address of the page allocated for the top |
---|
.. | .. |
---|
259 | 275 | * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). |
---|
260 | 276 | * @kctx: If this set of MMU tables belongs to a context then |
---|
261 | 277 | * this is a back-reference to the context, otherwise |
---|
262 | | - * it is NULL |
---|
| 278 | + * it is NULL. |
---|
| 279 | + * @scratch_mem: Scratch memory used for MMU operations, which are |
---|
| 280 | + * serialized by the @mmu_lock. |
---|
263 | 281 | */ |
---|
264 | 282 | struct kbase_mmu_table { |
---|
265 | | - u64 *mmu_teardown_pages; |
---|
266 | 283 | struct mutex mmu_lock; |
---|
267 | 284 | phys_addr_t pgd; |
---|
268 | 285 | u8 group_id; |
---|
269 | 286 | struct kbase_context *kctx; |
---|
| 287 | + union { |
---|
| 288 | + /** |
---|
| 289 | + * @teardown_pages: Scratch memory used for backup copies of whole |
---|
| 290 | + * PGD pages when tearing down levels upon |
---|
| 291 | + * termination of the MMU table. |
---|
| 292 | + */ |
---|
| 293 | + struct { |
---|
| 294 | + /** |
---|
| 295 | + * @levels: Array of PGD pages, large enough to copy one PGD |
---|
| 296 | + * for each level of the MMU table. |
---|
| 297 | + */ |
---|
| 298 | + u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)]; |
---|
| 299 | + } teardown_pages; |
---|
| 300 | + /** |
---|
| 301 | + * @free_pgds: Scratch memory user for insertion, update and teardown |
---|
| 302 | + * operations to store a temporary list of PGDs to be freed |
---|
| 303 | + * at the end of the operation. |
---|
| 304 | + */ |
---|
| 305 | + struct { |
---|
| 306 | + /** @pgds: Array of pointers to PGDs to free. */ |
---|
| 307 | + struct page *pgds[MAX_FREE_PGDS]; |
---|
| 308 | + /** @head_index: Index of first free element in the PGDs array. */ |
---|
| 309 | + size_t head_index; |
---|
| 310 | + } free_pgds; |
---|
| 311 | + } scratch_mem; |
---|
| 312 | +}; |
---|
| 313 | + |
---|
| 314 | +/** |
---|
| 315 | + * struct kbase_reg_zone - Information about GPU memory region zones |
---|
| 316 | + * @base_pfn: Page Frame Number in GPU virtual address space for the start of |
---|
| 317 | + * the Zone |
---|
| 318 | + * @va_size_pages: Size of the Zone in pages |
---|
| 319 | + * |
---|
| 320 | + * Track information about a zone KBASE_REG_ZONE() and related macros. |
---|
| 321 | + * In future, this could also store the &rb_root that are currently in |
---|
| 322 | + * &kbase_context and &kbase_csf_device. |
---|
| 323 | + */ |
---|
| 324 | +struct kbase_reg_zone { |
---|
| 325 | + u64 base_pfn; |
---|
| 326 | + u64 va_size_pages; |
---|
270 | 327 | }; |
---|
271 | 328 | |
---|
272 | 329 | #if MALI_USE_CSF |
---|
.. | .. |
---|
274 | 331 | #else |
---|
275 | 332 | #include "jm/mali_kbase_jm_defs.h" |
---|
276 | 333 | #endif |
---|
| 334 | + |
---|
| 335 | +#include "mali_kbase_hwaccess_time.h" |
---|
277 | 336 | |
---|
278 | 337 | static inline int kbase_as_has_bus_fault(struct kbase_as *as, |
---|
279 | 338 | struct kbase_fault *fault) |
---|
.. | .. |
---|
339 | 398 | * enumerated GPU clock. |
---|
340 | 399 | * @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace |
---|
341 | 400 | * operations. |
---|
342 | | - * @gpu_clk_rate_trace_write: Pointer to the function that would emit the |
---|
343 | | - * tracepoint for the clock rate change. |
---|
344 | 401 | * @listeners: List of listener attached. |
---|
345 | 402 | * @lock: Lock to serialize the actions of GPU clock rate trace |
---|
346 | 403 | * manager. |
---|
.. | .. |
---|
355 | 412 | |
---|
356 | 413 | /** |
---|
357 | 414 | * struct kbase_pm_device_data - Data stored per device for power management. |
---|
358 | | - * @lock: The lock protecting Power Management structures accessed outside of |
---|
359 | | - * IRQ. |
---|
360 | | - * This lock must also be held whenever the GPU is being powered on or |
---|
361 | | - * off. |
---|
362 | | - * @active_count: The reference count of active contexts on this device. Note |
---|
363 | | - * that some code paths keep shaders/the tiler powered whilst this is 0. |
---|
364 | | - * Use kbase_pm_is_active() instead to check for such cases. |
---|
| 415 | + * @lock: The lock protecting Power Management structures accessed |
---|
| 416 | + * outside of IRQ. |
---|
| 417 | + * This lock must also be held whenever the GPU is being |
---|
| 418 | + * powered on or off. |
---|
| 419 | + * @active_count: The reference count of active contexts on this device. |
---|
| 420 | + * Note that some code paths keep shaders/the tiler |
---|
| 421 | + * powered whilst this is 0. |
---|
| 422 | + * Use kbase_pm_is_active() instead to check for such cases. |
---|
365 | 423 | * @suspending: Flag indicating suspending/suspended |
---|
| 424 | + * @runtime_active: Flag to track if the GPU is in runtime suspended or active |
---|
| 425 | + * state. This ensures that runtime_put and runtime_get |
---|
| 426 | + * functions are called in pairs. For example if runtime_get |
---|
| 427 | + * has already been called from the power_on callback, then |
---|
| 428 | + * the call to it from runtime_gpu_active callback can be |
---|
| 429 | + * skipped. |
---|
366 | 430 | * @gpu_lost: Flag indicating gpu lost |
---|
367 | | - * This structure contains data for the power management framework. There |
---|
368 | | - * is one instance of this structure per device in the system. |
---|
| 431 | + * This structure contains data for the power management framework. |
---|
| 432 | + * There is one instance of this structure per device in the system. |
---|
369 | 433 | * @zero_active_count_wait: Wait queue set when active_count == 0 |
---|
370 | 434 | * @resume_wait: system resume of GPU device. |
---|
371 | 435 | * @debug_core_mask: Bit masks identifying the available shader cores that are |
---|
372 | | - * specified via sysfs. One mask per job slot. |
---|
| 436 | + * specified via sysfs. One mask per job slot. |
---|
373 | 437 | * @debug_core_mask_all: Bit masks identifying the available shader cores that |
---|
374 | | - * are specified via sysfs. |
---|
| 438 | + * are specified via sysfs. |
---|
375 | 439 | * @callback_power_runtime_init: Callback for initializing the runtime power |
---|
376 | | - * management. Return 0 on success, else error code |
---|
| 440 | + * management. Return 0 on success, else error code |
---|
377 | 441 | * @callback_power_runtime_term: Callback for terminating the runtime power |
---|
378 | | - * management. |
---|
| 442 | + * management. |
---|
379 | 443 | * @dvfs_period: Time in milliseconds between each dvfs sample |
---|
380 | 444 | * @backend: KBase PM backend data |
---|
381 | 445 | * @arb_vm_state: The state of the arbiter VM machine |
---|
382 | 446 | * @gpu_users_waiting: Used by virtualization to notify the arbiter that there |
---|
383 | | - * are users waiting for the GPU so that it can request and resume the |
---|
384 | | - * driver. |
---|
| 447 | + * are users waiting for the GPU so that it can request |
---|
| 448 | + * and resume the driver. |
---|
385 | 449 | * @clk_rtm: The state of the GPU clock rate trace manager |
---|
386 | 450 | */ |
---|
387 | 451 | struct kbase_pm_device_data { |
---|
388 | 452 | struct mutex lock; |
---|
389 | 453 | int active_count; |
---|
390 | 454 | bool suspending; |
---|
| 455 | +#if MALI_USE_CSF |
---|
| 456 | + bool runtime_active; |
---|
| 457 | +#endif |
---|
391 | 458 | #ifdef CONFIG_MALI_ARBITER_SUPPORT |
---|
392 | 459 | atomic_t gpu_lost; |
---|
393 | 460 | #endif /* CONFIG_MALI_ARBITER_SUPPORT */ |
---|
.. | .. |
---|
415 | 482 | |
---|
416 | 483 | /** |
---|
417 | 484 | * struct kbase_mem_pool - Page based memory pool for kctx/kbdev |
---|
418 | | - * @kbdev: Kbase device where memory is used |
---|
419 | | - * @cur_size: Number of free pages currently in the pool (may exceed |
---|
420 | | - * @max_size in some corner cases) |
---|
421 | | - * @max_size: Maximum number of free pages in the pool |
---|
422 | | - * @order: order = 0 refers to a pool of 4 KB pages |
---|
423 | | - * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB) |
---|
424 | | - * @group_id: A memory group ID to be passed to a platform-specific |
---|
425 | | - * memory group manager, if present. Immutable. |
---|
426 | | - * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). |
---|
427 | | - * @pool_lock: Lock protecting the pool - must be held when modifying |
---|
428 | | - * @cur_size and @page_list |
---|
429 | | - * @page_list: List of free pages in the pool |
---|
430 | | - * @reclaim: Shrinker for kernel reclaim of free pages |
---|
431 | | - * @next_pool: Pointer to next pool where pages can be allocated when this |
---|
432 | | - * pool is empty. Pages will spill over to the next pool when |
---|
433 | | - * this pool is full. Can be NULL if there is no next pool. |
---|
434 | | - * @dying: true if the pool is being terminated, and any ongoing |
---|
435 | | - * operations should be abandoned |
---|
436 | | - * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from |
---|
437 | | - * this pool, eg during a grow operation |
---|
| 485 | + * @kbdev: Kbase device where memory is used |
---|
| 486 | + * @cur_size: Number of free pages currently in the pool (may exceed |
---|
| 487 | + * @max_size in some corner cases) |
---|
| 488 | + * @max_size: Maximum number of free pages in the pool |
---|
| 489 | + * @order: order = 0 refers to a pool of 4 KB pages |
---|
| 490 | + * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB) |
---|
| 491 | + * @group_id: A memory group ID to be passed to a platform-specific |
---|
| 492 | + * memory group manager, if present. Immutable. |
---|
| 493 | + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). |
---|
| 494 | + * @pool_lock: Lock protecting the pool - must be held when modifying |
---|
| 495 | + * @cur_size and @page_list |
---|
| 496 | + * @page_list: List of free pages in the pool |
---|
| 497 | + * @reclaim: Shrinker for kernel reclaim of free pages |
---|
| 498 | + * @isolation_in_progress_cnt: Number of pages in pool undergoing page isolation. |
---|
| 499 | + * This is used to avoid race condition between pool termination |
---|
| 500 | + * and page isolation for page migration. |
---|
| 501 | + * @next_pool: Pointer to next pool where pages can be allocated when this |
---|
| 502 | + * pool is empty. Pages will spill over to the next pool when |
---|
| 503 | + * this pool is full. Can be NULL if there is no next pool. |
---|
| 504 | + * @dying: true if the pool is being terminated, and any ongoing |
---|
| 505 | + * operations should be abandoned |
---|
| 506 | + * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from |
---|
| 507 | + * this pool, eg during a grow operation |
---|
438 | 508 | */ |
---|
439 | 509 | struct kbase_mem_pool { |
---|
440 | 510 | struct kbase_device *kbdev; |
---|
441 | | - size_t cur_size; |
---|
442 | | - size_t max_size; |
---|
443 | | - u8 order; |
---|
444 | | - u8 group_id; |
---|
445 | | - spinlock_t pool_lock; |
---|
446 | | - struct list_head page_list; |
---|
447 | | - struct shrinker reclaim; |
---|
| 511 | + size_t cur_size; |
---|
| 512 | + size_t max_size; |
---|
| 513 | + u8 order; |
---|
| 514 | + u8 group_id; |
---|
| 515 | + spinlock_t pool_lock; |
---|
| 516 | + struct list_head page_list; |
---|
| 517 | + struct shrinker reclaim; |
---|
| 518 | + atomic_t isolation_in_progress_cnt; |
---|
448 | 519 | |
---|
449 | 520 | struct kbase_mem_pool *next_pool; |
---|
450 | 521 | |
---|
.. | .. |
---|
455 | 526 | /** |
---|
456 | 527 | * struct kbase_mem_pool_group - a complete set of physical memory pools. |
---|
457 | 528 | * |
---|
| 529 | + * @small: Array of objects containing the state for pools of 4 KiB size |
---|
| 530 | + * physical pages. |
---|
| 531 | + * @large: Array of objects containing the state for pools of 2 MiB size |
---|
| 532 | + * physical pages. |
---|
| 533 | + * |
---|
458 | 534 | * Memory pools are used to allow efficient reallocation of previously-freed |
---|
459 | 535 | * physical pages. A pair of memory pools is initialized for each physical |
---|
460 | 536 | * memory group: one for 4 KiB pages and one for 2 MiB pages. These arrays |
---|
461 | 537 | * should be indexed by physical memory group ID, the meaning of which is |
---|
462 | 538 | * defined by the systems integrator. |
---|
463 | | - * |
---|
464 | | - * @small: Array of objects containing the state for pools of 4 KiB size |
---|
465 | | - * physical pages. |
---|
466 | | - * @large: Array of objects containing the state for pools of 2 MiB size |
---|
467 | | - * physical pages. |
---|
468 | 539 | */ |
---|
469 | 540 | struct kbase_mem_pool_group { |
---|
470 | 541 | struct kbase_mem_pool small[MEMORY_GROUP_MANAGER_NR_GROUPS]; |
---|
.. | .. |
---|
485 | 556 | * struct kbase_mem_pool_group_config - Initial configuration for a complete |
---|
486 | 557 | * set of physical memory pools |
---|
487 | 558 | * |
---|
488 | | - * This array should be indexed by physical memory group ID, the meaning |
---|
489 | | - * of which is defined by the systems integrator. |
---|
490 | | - * |
---|
491 | 559 | * @small: Array of initial configuration for pools of 4 KiB pages. |
---|
492 | 560 | * @large: Array of initial configuration for pools of 2 MiB pages. |
---|
| 561 | + * |
---|
| 562 | + * This array should be indexed by physical memory group ID, the meaning |
---|
| 563 | + * of which is defined by the systems integrator. |
---|
493 | 564 | */ |
---|
494 | 565 | struct kbase_mem_pool_group_config { |
---|
495 | 566 | struct kbase_mem_pool_config small[MEMORY_GROUP_MANAGER_NR_GROUPS]; |
---|
.. | .. |
---|
529 | 600 | * @entry_set_ate: program the pte to be a valid address translation entry to |
---|
530 | 601 | * encode the physical address of the actual page being mapped. |
---|
531 | 602 | * @entry_set_pte: program the pte to be a valid entry to encode the physical |
---|
532 | | - * address of the next lower level page table. |
---|
533 | | - * @entry_invalidate: clear out or invalidate the pte. |
---|
| 603 | + * address of the next lower level page table and also update |
---|
| 604 | + * the number of valid entries. |
---|
| 605 | + * @entries_invalidate: clear out or invalidate a range of ptes. |
---|
| 606 | + * @get_num_valid_entries: returns the number of valid entries for a specific pgd. |
---|
| 607 | + * @set_num_valid_entries: sets the number of valid entries for a specific pgd |
---|
534 | 608 | * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants. |
---|
535 | 609 | */ |
---|
536 | 610 | struct kbase_mmu_mode { |
---|
.. | .. |
---|
546 | 620 | void (*entry_set_ate)(u64 *entry, struct tagged_addr phy, |
---|
547 | 621 | unsigned long flags, int level); |
---|
548 | 622 | void (*entry_set_pte)(u64 *entry, phys_addr_t phy); |
---|
549 | | - void (*entry_invalidate)(u64 *entry); |
---|
| 623 | + void (*entries_invalidate)(u64 *entry, u32 count); |
---|
| 624 | + unsigned int (*get_num_valid_entries)(u64 *pgd); |
---|
| 625 | + void (*set_num_valid_entries)(u64 *pgd, |
---|
| 626 | + unsigned int num_of_valid_entries); |
---|
550 | 627 | unsigned long flags; |
---|
551 | 628 | }; |
---|
552 | 629 | |
---|
.. | .. |
---|
611 | 688 | }; |
---|
612 | 689 | |
---|
613 | 690 | /** |
---|
| 691 | + * struct kbase_mem_migrate - Object representing an instance for managing |
---|
| 692 | + * page migration. |
---|
| 693 | + * |
---|
| 694 | + * @free_pages_list: List of deferred pages to free. Mostly used when page migration |
---|
| 695 | + * is enabled. Pages in memory pool that require migrating |
---|
| 696 | + * will be freed instead. However page cannot be freed |
---|
| 697 | + * right away as Linux will need to release the page lock. |
---|
| 698 | + * Therefore page will be added to this list and freed later. |
---|
| 699 | + * @free_pages_lock: This lock should be held when adding or removing pages |
---|
| 700 | + * from @free_pages_list. |
---|
| 701 | + * @free_pages_workq: Work queue to process the work items queued to free |
---|
| 702 | + * pages in @free_pages_list. |
---|
| 703 | + * @free_pages_work: Work item to free pages in @free_pages_list. |
---|
| 704 | + * @inode: Pointer to inode whose address space operations are used |
---|
| 705 | + * for page migration purposes. |
---|
| 706 | + */ |
---|
| 707 | +struct kbase_mem_migrate { |
---|
| 708 | + struct list_head free_pages_list; |
---|
| 709 | + spinlock_t free_pages_lock; |
---|
| 710 | + struct workqueue_struct *free_pages_workq; |
---|
| 711 | + struct work_struct free_pages_work; |
---|
| 712 | +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) |
---|
| 713 | + struct inode *inode; |
---|
| 714 | +#endif |
---|
| 715 | +}; |
---|
| 716 | + |
---|
| 717 | +/** |
---|
614 | 718 | * struct kbase_device - Object representing an instance of GPU platform device, |
---|
615 | 719 | * allocated from the probe method of mali driver. |
---|
616 | 720 | * @hw_quirks_sc: Configuration to be used for the shader cores as per |
---|
.. | .. |
---|
641 | 745 | * @irqs.flags: irq flags |
---|
642 | 746 | * @clocks: Pointer to the input clock resources referenced by |
---|
643 | 747 | * the GPU device node. |
---|
| 748 | + * @scmi_clk: Pointer to the input scmi clock resources |
---|
644 | 749 | * @nr_clocks: Number of clocks set in the clocks array. |
---|
645 | 750 | * @regulators: Pointer to the structs corresponding to the |
---|
646 | 751 | * regulators referenced by the GPU device node. |
---|
.. | .. |
---|
648 | 753 | * @opp_table: Pointer to the device OPP structure maintaining the |
---|
649 | 754 | * link to OPPs attached to a device. This is obtained |
---|
650 | 755 | * after setting regulator names for the device. |
---|
| 756 | + * @token: Integer replacement for opp_table in kernel versions |
---|
| 757 | + * 6 and greater. Value is a token id number when 0 or greater, |
---|
| 758 | + * and a linux errno when negative. Must be initialised |
---|
| 759 | + * to an non-zero value as 0 is valid token id. |
---|
651 | 760 | * @devname: string containing the name used for GPU device instance, |
---|
652 | 761 | * miscellaneous device is registered using the same name. |
---|
653 | 762 | * @id: Unique identifier for the device, indicates the number of |
---|
.. | .. |
---|
694 | 803 | * GPU adrress spaces assigned to them. |
---|
695 | 804 | * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask |
---|
696 | 805 | * register used in the handling of Bus & Page faults. |
---|
| 806 | + * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are |
---|
| 807 | + * supported and used where possible. |
---|
697 | 808 | * @gpu_props: Object containing complete information about the |
---|
698 | 809 | * configuration/properties of GPU HW device in use. |
---|
699 | 810 | * @hw_issues_mask: List of SW workarounds for HW issues |
---|
.. | .. |
---|
716 | 827 | * @hwcnt.addr: HW counter address |
---|
717 | 828 | * @hwcnt.addr_bytes: HW counter size in bytes |
---|
718 | 829 | * @hwcnt.backend: Kbase instrumentation backend |
---|
| 830 | + * @hwcnt_gpu_jm_backend: Job manager GPU backend interface, used as superclass reference |
---|
| 831 | + * pointer by hwcnt_gpu_iface, which wraps this implementation in |
---|
| 832 | + * order to extend it with periodic dumping functionality. |
---|
719 | 833 | * @hwcnt_gpu_iface: Backend interface for GPU hardware counter access. |
---|
| 834 | + * @hwcnt_watchdog_timer: Watchdog interface, used by the GPU backend hwcnt_gpu_iface to |
---|
| 835 | + * perform periodic dumps in order to prevent hardware counter value |
---|
| 836 | + * overflow or saturation. |
---|
720 | 837 | * @hwcnt_gpu_ctx: Context for GPU hardware counter access. |
---|
721 | 838 | * @hwaccess_lock must be held when calling |
---|
722 | 839 | * kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx. |
---|
723 | 840 | * @hwcnt_gpu_virt: Virtualizer for GPU hardware counters. |
---|
724 | 841 | * @vinstr_ctx: vinstr context created per device. |
---|
| 842 | + * @kinstr_prfcnt_ctx: kinstr_prfcnt context created per device. |
---|
725 | 843 | * @timeline_flags: Bitmask defining which sets of timeline tracepoints |
---|
726 | 844 | * are enabled. If zero, there is no timeline client and |
---|
727 | 845 | * therefore timeline is disabled. |
---|
728 | 846 | * @timeline: Timeline context created per device. |
---|
729 | 847 | * @ktrace: kbase device's ktrace |
---|
730 | | - * @trace_lock: Lock to serialize the access to trace buffer. |
---|
731 | | - * @trace_first_out: Index/offset in the trace buffer at which the first |
---|
732 | | - * unread message is present. |
---|
733 | | - * @trace_next_in: Index/offset in the trace buffer at which the new |
---|
734 | | - * message will be written. |
---|
735 | | - * @trace_rbuf: Pointer to the buffer storing debug messages/prints |
---|
736 | | - * tracing the various events in Driver. |
---|
737 | | - * The buffer is filled in circular fashion. |
---|
738 | 848 | * @reset_timeout_ms: Number of milliseconds to wait for the soft stop to |
---|
739 | 849 | * complete for the GPU jobs before proceeding with the |
---|
740 | 850 | * GPU reset. |
---|
| 851 | + * @lowest_gpu_freq_khz: Lowest frequency in KHz that the GPU can run at. Used |
---|
| 852 | + * to calculate suitable timeouts for wait operations. |
---|
| 853 | + * @backend_time: Kbase backend time related attributes. |
---|
741 | 854 | * @cache_clean_in_progress: Set when a cache clean has been started, and |
---|
742 | 855 | * cleared when it has finished. This prevents multiple |
---|
743 | 856 | * cache cleans being done simultaneously. |
---|
744 | | - * @cache_clean_queued: Set if a cache clean is invoked while another is in |
---|
745 | | - * progress. If this happens, another cache clean needs |
---|
| 857 | + * @cache_clean_queued: Pended cache clean operations invoked while another is |
---|
| 858 | + * in progress. If this is not 0, another cache clean needs |
---|
746 | 859 | * to be triggered immediately after completion of the |
---|
747 | 860 | * current one. |
---|
748 | 861 | * @cache_clean_wait: Signalled when a cache clean has finished. |
---|
.. | .. |
---|
752 | 865 | * including any contexts that might be created for |
---|
753 | 866 | * hardware counters. |
---|
754 | 867 | * @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list. |
---|
755 | | - * @group_max_uid_in_devices: Max value of any queue group UID in any kernel |
---|
756 | | - * context in the kbase device. |
---|
757 | 868 | * @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed |
---|
758 | 869 | * to devfreq_add_device() to add devfreq feature to Mali |
---|
759 | 870 | * GPU device. |
---|
.. | .. |
---|
839 | 950 | * backend specific data for HW access layer. |
---|
840 | 951 | * @faults_pending: Count of page/bus faults waiting for bottom half processing |
---|
841 | 952 | * via workqueues. |
---|
| 953 | + * @mmu_hw_operation_in_progress: Set before sending the MMU command and is |
---|
| 954 | + * cleared after the command is complete. Whilst this |
---|
| 955 | + * flag is set, the write to L2_PWROFF register will be |
---|
| 956 | + * skipped which is needed to workaround the HW issue |
---|
| 957 | + * GPU2019-3878. PM state machine is invoked after |
---|
| 958 | + * clearing this flag and @hwaccess_lock is used to |
---|
| 959 | + * serialize the access. |
---|
| 960 | + * @mmu_page_migrate_in_progress: Set before starting a MMU page migration transaction |
---|
| 961 | + * and cleared after the transaction completes. PM L2 state is |
---|
| 962 | + * prevented from entering powering up/down transitions when the |
---|
| 963 | + * flag is set, @hwaccess_lock is used to serialize the access. |
---|
842 | 964 | * @poweroff_pending: Set when power off operation for GPU is started, reset when |
---|
843 | 965 | * power on for GPU is started. |
---|
844 | 966 | * @infinite_cache_active_default: Set to enable using infinite cache for all the |
---|
.. | .. |
---|
868 | 990 | * enabled. |
---|
869 | 991 | * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware |
---|
870 | 992 | * counters, used if atomic disable is not possible. |
---|
871 | | - * @buslogger: Pointer to the structure required for interfacing |
---|
872 | | - * with the bus logger module to set the size of buffer |
---|
873 | | - * used by the module for capturing bus logs. |
---|
874 | 993 | * @irq_reset_flush: Flag to indicate that GPU reset is in-flight and flush of |
---|
875 | 994 | * IRQ + bottom half is being done, to prevent the writes |
---|
876 | 995 | * to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers. |
---|
.. | .. |
---|
891 | 1010 | * @l2_hash_override: Used to set L2 cache hash via device tree blob |
---|
892 | 1011 | * @l2_hash_values_override: true if @l2_hash_values is valid. |
---|
893 | 1012 | * @l2_hash_values: Used to set L2 asn_hash via device tree blob |
---|
| 1013 | + * @sysc_alloc: Array containing values to be programmed into |
---|
| 1014 | + * SYSC_ALLOC[0..7] GPU registers on L2 cache |
---|
| 1015 | + * power down. These come from either DTB or |
---|
| 1016 | + * via DebugFS (if it is available in kernel). |
---|
894 | 1017 | * @process_root: rb_tree root node for maintaining a rb_tree of |
---|
895 | 1018 | * kbase_process based on key tgid(thread group ID). |
---|
896 | 1019 | * @dma_buf_root: rb_tree root node for maintaining a rb_tree of |
---|
.. | .. |
---|
917 | 1040 | * @pcm_dev: The priority control manager device. |
---|
918 | 1041 | * @oom_notifier_block: notifier_block containing kernel-registered out-of- |
---|
919 | 1042 | * memory handler. |
---|
| 1043 | + * @mem_migrate: Per device object for managing page migration. |
---|
| 1044 | + * @live_fence_metadata: Count of live fence metadata structures created by |
---|
| 1045 | + * KCPU queue. These structures may outlive kbase module |
---|
| 1046 | + * itself. Therefore, in such a case, a warning should be |
---|
| 1047 | + * be produced. |
---|
| 1048 | + * @mmu_as_inactive_wait_time_ms: Maximum waiting time in ms for the completion of |
---|
| 1049 | + * a MMU operation |
---|
| 1050 | + * @va_region_slab: kmem_cache (slab) for allocated kbase_va_region structures. |
---|
920 | 1051 | */ |
---|
921 | 1052 | struct kbase_device { |
---|
922 | 1053 | u32 hw_quirks_sc; |
---|
.. | .. |
---|
941 | 1072 | #if IS_ENABLED(CONFIG_REGULATOR) |
---|
942 | 1073 | struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS]; |
---|
943 | 1074 | unsigned int nr_regulators; |
---|
944 | | -#if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) |
---|
| 1075 | +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) |
---|
| 1076 | + int token; |
---|
| 1077 | +#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) |
---|
945 | 1078 | struct opp_table *opp_table; |
---|
946 | | -#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ |
---|
| 1079 | +#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ |
---|
947 | 1080 | #endif /* CONFIG_REGULATOR */ |
---|
948 | 1081 | char devname[DEVNAME_SIZE]; |
---|
949 | 1082 | u32 id; |
---|
950 | 1083 | |
---|
| 1084 | +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) |
---|
| 1085 | + void *model; |
---|
| 1086 | + struct kmem_cache *irq_slab; |
---|
| 1087 | + struct workqueue_struct *irq_workq; |
---|
| 1088 | + atomic_t serving_job_irq; |
---|
| 1089 | + atomic_t serving_gpu_irq; |
---|
| 1090 | + atomic_t serving_mmu_irq; |
---|
| 1091 | + spinlock_t reg_op_lock; |
---|
| 1092 | +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ |
---|
951 | 1093 | struct kbase_pm_device_data pm; |
---|
952 | 1094 | |
---|
953 | 1095 | struct kbase_mem_pool_group mem_pools; |
---|
.. | .. |
---|
957 | 1099 | struct memory_group_manager_device *mgm_dev; |
---|
958 | 1100 | |
---|
959 | 1101 | struct kbase_as as[BASE_MAX_NR_AS]; |
---|
960 | | - u16 as_free; /* Bitpattern of free Address Spaces */ |
---|
| 1102 | + u16 as_free; |
---|
961 | 1103 | struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; |
---|
962 | 1104 | |
---|
963 | 1105 | spinlock_t mmu_mask_change; |
---|
| 1106 | + |
---|
| 1107 | + bool pagesize_2mb; |
---|
964 | 1108 | |
---|
965 | 1109 | struct kbase_gpu_props gpu_props; |
---|
966 | 1110 | |
---|
.. | .. |
---|
975 | 1119 | s8 nr_hw_address_spaces; |
---|
976 | 1120 | s8 nr_user_address_spaces; |
---|
977 | 1121 | |
---|
| 1122 | + /** |
---|
| 1123 | + * @pbha_propagate_bits: Record of Page-Based Hardware Attribute Propagate bits to |
---|
| 1124 | + * restore to L2_CONFIG upon GPU reset. |
---|
| 1125 | + */ |
---|
| 1126 | + u8 pbha_propagate_bits; |
---|
| 1127 | + |
---|
978 | 1128 | #if MALI_USE_CSF |
---|
979 | 1129 | struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw; |
---|
980 | 1130 | #else |
---|
.. | .. |
---|
987 | 1137 | |
---|
988 | 1138 | struct kbase_instr_backend backend; |
---|
989 | 1139 | } hwcnt; |
---|
| 1140 | + |
---|
| 1141 | + struct kbase_hwcnt_backend_interface hwcnt_gpu_jm_backend; |
---|
990 | 1142 | #endif |
---|
991 | 1143 | |
---|
992 | 1144 | struct kbase_hwcnt_backend_interface hwcnt_gpu_iface; |
---|
| 1145 | + struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer; |
---|
| 1146 | + |
---|
993 | 1147 | struct kbase_hwcnt_context *hwcnt_gpu_ctx; |
---|
994 | 1148 | struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt; |
---|
995 | 1149 | struct kbase_vinstr_context *vinstr_ctx; |
---|
| 1150 | + struct kbase_kinstr_prfcnt_context *kinstr_prfcnt_ctx; |
---|
996 | 1151 | |
---|
997 | 1152 | atomic_t timeline_flags; |
---|
998 | 1153 | struct kbase_timeline *timeline; |
---|
.. | .. |
---|
1002 | 1157 | #endif |
---|
1003 | 1158 | u32 reset_timeout_ms; |
---|
1004 | 1159 | |
---|
| 1160 | + u64 lowest_gpu_freq_khz; |
---|
| 1161 | + |
---|
| 1162 | +#if MALI_USE_CSF |
---|
| 1163 | + struct kbase_backend_time backend_time; |
---|
| 1164 | +#endif |
---|
| 1165 | + |
---|
1005 | 1166 | bool cache_clean_in_progress; |
---|
1006 | | - bool cache_clean_queued; |
---|
| 1167 | + u32 cache_clean_queued; |
---|
1007 | 1168 | wait_queue_head_t cache_clean_wait; |
---|
1008 | 1169 | |
---|
1009 | 1170 | void *platform_context; |
---|
1010 | 1171 | |
---|
1011 | 1172 | struct list_head kctx_list; |
---|
1012 | 1173 | struct mutex kctx_list_lock; |
---|
1013 | | - atomic_t group_max_uid_in_devices; |
---|
1014 | 1174 | |
---|
| 1175 | + struct rockchip_opp_info opp_info; |
---|
| 1176 | + bool is_runtime_resumed; |
---|
| 1177 | + unsigned long current_nominal_freq; |
---|
| 1178 | + struct monitor_dev_info *mdev_info; |
---|
1015 | 1179 | #ifdef CONFIG_MALI_BIFROST_DEVFREQ |
---|
1016 | 1180 | struct devfreq_dev_profile devfreq_profile; |
---|
1017 | 1181 | struct devfreq *devfreq; |
---|
1018 | 1182 | unsigned long current_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; |
---|
1019 | | - unsigned long current_nominal_freq; |
---|
1020 | 1183 | unsigned long current_voltages[BASE_MAX_NR_CLOCKS_REGULATORS]; |
---|
1021 | 1184 | u64 current_core_mask; |
---|
1022 | 1185 | struct kbase_devfreq_opp *devfreq_table; |
---|
1023 | 1186 | int num_opps; |
---|
1024 | 1187 | struct kbasep_pm_metrics last_devfreq_metrics; |
---|
1025 | | - struct monitor_dev_info *mdev_info; |
---|
1026 | 1188 | struct ipa_power_model_data *model_data; |
---|
1027 | 1189 | struct kbase_devfreq_queue_info devfreq_queue; |
---|
1028 | 1190 | |
---|
1029 | 1191 | #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) |
---|
| 1192 | + struct devfreq_cooling_power dfc_power; |
---|
1030 | 1193 | struct thermal_cooling_device *devfreq_cooling; |
---|
1031 | 1194 | bool ipa_protection_mode_switched; |
---|
1032 | 1195 | struct { |
---|
.. | .. |
---|
1052 | 1215 | #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ |
---|
1053 | 1216 | unsigned long previous_frequency; |
---|
1054 | 1217 | |
---|
| 1218 | +#if !MALI_USE_CSF |
---|
1055 | 1219 | atomic_t job_fault_debug; |
---|
| 1220 | +#endif /* !MALI_USE_CSF */ |
---|
1056 | 1221 | |
---|
1057 | 1222 | #if IS_ENABLED(CONFIG_DEBUG_FS) |
---|
1058 | 1223 | struct dentry *mali_debugfs_directory; |
---|
.. | .. |
---|
1063 | 1228 | u64 debugfs_as_read_bitmap; |
---|
1064 | 1229 | #endif /* CONFIG_MALI_BIFROST_DEBUG */ |
---|
1065 | 1230 | |
---|
| 1231 | +#if !MALI_USE_CSF |
---|
1066 | 1232 | wait_queue_head_t job_fault_wq; |
---|
1067 | 1233 | wait_queue_head_t job_fault_resume_wq; |
---|
1068 | 1234 | struct workqueue_struct *job_fault_resume_workq; |
---|
1069 | 1235 | struct list_head job_fault_event_list; |
---|
1070 | 1236 | spinlock_t job_fault_event_lock; |
---|
| 1237 | +#endif /* !MALI_USE_CSF */ |
---|
1071 | 1238 | |
---|
1072 | 1239 | #if !MALI_CUSTOMER_RELEASE |
---|
1073 | 1240 | struct { |
---|
.. | .. |
---|
1086 | 1253 | |
---|
1087 | 1254 | atomic_t faults_pending; |
---|
1088 | 1255 | |
---|
| 1256 | +#if MALI_USE_CSF |
---|
| 1257 | + bool mmu_hw_operation_in_progress; |
---|
| 1258 | +#endif |
---|
| 1259 | + bool mmu_page_migrate_in_progress; |
---|
1089 | 1260 | bool poweroff_pending; |
---|
1090 | 1261 | |
---|
1091 | | -#if (KERNEL_VERSION(4, 4, 0) <= LINUX_VERSION_CODE) |
---|
1092 | 1262 | bool infinite_cache_active_default; |
---|
1093 | | -#else |
---|
1094 | | - u32 infinite_cache_active_default; |
---|
1095 | | -#endif |
---|
| 1263 | + |
---|
1096 | 1264 | struct kbase_mem_pool_group_config mem_pool_defaults; |
---|
1097 | 1265 | |
---|
1098 | 1266 | u32 current_gpu_coherency_mode; |
---|
.. | .. |
---|
1130 | 1298 | u8 l2_hash_override; |
---|
1131 | 1299 | bool l2_hash_values_override; |
---|
1132 | 1300 | u32 l2_hash_values[ASN_HASH_COUNT]; |
---|
| 1301 | + |
---|
| 1302 | + u32 sysc_alloc[SYSC_ALLOC_COUNT]; |
---|
1133 | 1303 | |
---|
1134 | 1304 | struct mutex fw_load_lock; |
---|
1135 | 1305 | #if MALI_USE_CSF |
---|
.. | .. |
---|
1172 | 1342 | struct priority_control_manager_device *pcm_dev; |
---|
1173 | 1343 | |
---|
1174 | 1344 | struct notifier_block oom_notifier_block; |
---|
| 1345 | + |
---|
| 1346 | +#if !MALI_USE_CSF |
---|
| 1347 | + spinlock_t quick_reset_lock; |
---|
| 1348 | + bool quick_reset_enabled; |
---|
| 1349 | + /* |
---|
| 1350 | + * 进入 quck_reset_mode 后 (quick_reset_enabled 为 true), |
---|
| 1351 | + * 对已经进入 KBASE_JD_ATOM_STATE_HW_COMPLETED 状态的 atom 的计数. |
---|
| 1352 | + * |
---|
| 1353 | + * 若 num_of_atoms_hw_completed 达到一定值, 将退出 quck_reset_mode. |
---|
| 1354 | + * 见 kbase_js_complete_atom() 对 num_of_atoms_hw_completed 的引用. |
---|
| 1355 | + */ |
---|
| 1356 | + u32 num_of_atoms_hw_completed; |
---|
| 1357 | +#endif |
---|
| 1358 | + |
---|
| 1359 | + struct kbase_mem_migrate mem_migrate; |
---|
| 1360 | + |
---|
| 1361 | +#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) |
---|
| 1362 | + atomic_t live_fence_metadata; |
---|
| 1363 | +#endif |
---|
| 1364 | + u32 mmu_as_inactive_wait_time_ms; |
---|
| 1365 | + struct kmem_cache *va_region_slab; |
---|
1175 | 1366 | }; |
---|
1176 | 1367 | |
---|
1177 | 1368 | /** |
---|
.. | .. |
---|
1254 | 1445 | * |
---|
1255 | 1446 | * @KCTX_DYING: Set when the context process is in the process of being evicted. |
---|
1256 | 1447 | * |
---|
1257 | | - * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this |
---|
1258 | | - * context, to disable use of implicit dma-buf fences. This is used to avoid |
---|
1259 | | - * potential synchronization deadlocks. |
---|
1260 | | - * |
---|
1261 | 1448 | * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory |
---|
1262 | 1449 | * allocations. For 64-bit clients it is enabled by default, and disabled by |
---|
1263 | 1450 | * default on 32-bit clients. Being able to clear this flag is only used for |
---|
.. | .. |
---|
1300 | 1487 | KCTX_PRIVILEGED = 1U << 7, |
---|
1301 | 1488 | KCTX_SCHEDULED = 1U << 8, |
---|
1302 | 1489 | KCTX_DYING = 1U << 9, |
---|
1303 | | - KCTX_NO_IMPLICIT_SYNC = 1U << 10, |
---|
1304 | 1490 | KCTX_FORCE_SAME_VA = 1U << 11, |
---|
1305 | 1491 | KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, |
---|
1306 | 1492 | KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, |
---|
.. | .. |
---|
1339 | 1525 | * |
---|
1340 | 1526 | * @KCTX_DYING: Set when the context process is in the process of being evicted. |
---|
1341 | 1527 | * |
---|
1342 | | - * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this |
---|
1343 | | - * context, to disable use of implicit dma-buf fences. This is used to avoid |
---|
1344 | | - * potential synchronization deadlocks. |
---|
1345 | 1528 | * |
---|
1346 | 1529 | * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory |
---|
1347 | 1530 | * allocations. For 64-bit clients it is enabled by default, and disabled by |
---|
.. | .. |
---|
1382 | 1565 | KCTX_PRIVILEGED = 1U << 7, |
---|
1383 | 1566 | KCTX_SCHEDULED = 1U << 8, |
---|
1384 | 1567 | KCTX_DYING = 1U << 9, |
---|
1385 | | - KCTX_NO_IMPLICIT_SYNC = 1U << 10, |
---|
1386 | 1568 | KCTX_FORCE_SAME_VA = 1U << 11, |
---|
1387 | 1569 | KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, |
---|
1388 | 1570 | KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, |
---|
.. | .. |
---|
1395 | 1577 | struct list_head link; |
---|
1396 | 1578 | struct page *page; |
---|
1397 | 1579 | DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K); |
---|
1398 | | -}; |
---|
1399 | | - |
---|
1400 | | -/** |
---|
1401 | | - * struct kbase_reg_zone - Information about GPU memory region zones |
---|
1402 | | - * @base_pfn: Page Frame Number in GPU virtual address space for the start of |
---|
1403 | | - * the Zone |
---|
1404 | | - * @va_size_pages: Size of the Zone in pages |
---|
1405 | | - * |
---|
1406 | | - * Track information about a zone KBASE_REG_ZONE() and related macros. |
---|
1407 | | - * In future, this could also store the &rb_root that are currently in |
---|
1408 | | - * &kbase_context |
---|
1409 | | - */ |
---|
1410 | | -struct kbase_reg_zone { |
---|
1411 | | - u64 base_pfn; |
---|
1412 | | - u64 va_size_pages; |
---|
1413 | 1580 | }; |
---|
1414 | 1581 | |
---|
1415 | 1582 | /** |
---|
.. | .. |
---|
1449 | 1616 | * @mem_partials_lock: Lock for protecting the operations done on the elements |
---|
1450 | 1617 | * added to @mem_partials list. |
---|
1451 | 1618 | * @mem_partials: List head for the list of large pages, 2MB in size, which |
---|
1452 | | - * which have been split into 4 KB pages and are used |
---|
1453 | | - * partially for the allocations >= 2 MB in size. |
---|
| 1619 | + * have been split into 4 KB pages and are used partially |
---|
| 1620 | + * for the allocations >= 2 MB in size. |
---|
1454 | 1621 | * @reg_lock: Lock used for GPU virtual address space management operations, |
---|
1455 | 1622 | * like adding/freeing a memory region in the address space. |
---|
1456 | 1623 | * Can be converted to a rwlock ?. |
---|
.. | .. |
---|
1462 | 1629 | * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA |
---|
1463 | 1630 | * zone of the GPU virtual address space. Used for GPU-executable |
---|
1464 | 1631 | * allocations which don't need the SAME_VA property. |
---|
| 1632 | + * @reg_rbtree_exec_fixed: RB tree of the memory regions allocated from the |
---|
| 1633 | + * EXEC_FIXED_VA zone of the GPU virtual address space. Used for |
---|
| 1634 | + * GPU-executable allocations with FIXED/FIXABLE GPU virtual |
---|
| 1635 | + * addresses. |
---|
| 1636 | + * @reg_rbtree_fixed: RB tree of the memory regions allocated from the FIXED_VA zone |
---|
| 1637 | + * of the GPU virtual address space. Used for allocations with |
---|
| 1638 | + * FIXED/FIXABLE GPU virtual addresses. |
---|
| 1639 | + * @num_fixable_allocs: A count for the number of memory allocations with the |
---|
| 1640 | + * BASE_MEM_FIXABLE property. |
---|
| 1641 | + * @num_fixed_allocs: A count for the number of memory allocations with the |
---|
| 1642 | + * BASE_MEM_FIXED property. |
---|
1465 | 1643 | * @reg_zone: Zone information for the reg_rbtree_<...> members. |
---|
1466 | 1644 | * @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for |
---|
1467 | 1645 | * SAME_VA allocations to defer the reservation of memory region |
---|
.. | .. |
---|
1538 | 1716 | * is scheduled in and an atom is pulled from the context's per |
---|
1539 | 1717 | * slot runnable tree in JM GPU or GPU command queue |
---|
1540 | 1718 | * group is programmed on CSG slot in CSF GPU. |
---|
1541 | | - * @mm_update_lock: lock used for handling of special tracking page. |
---|
1542 | 1719 | * @process_mm: Pointer to the memory descriptor of the process which |
---|
1543 | 1720 | * created the context. Used for accounting the physical |
---|
1544 | 1721 | * pages used for GPU allocations, done for the context, |
---|
1545 | | - * to the memory consumed by the process. |
---|
| 1722 | + * to the memory consumed by the process. A reference is taken |
---|
| 1723 | + * on this descriptor for the Userspace created contexts so that |
---|
| 1724 | + * Kbase can safely access it to update the memory usage counters. |
---|
| 1725 | + * The reference is dropped on context termination. |
---|
1546 | 1726 | * @gpu_va_end: End address of the GPU va space (in 4KB page units) |
---|
| 1727 | + * @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all |
---|
| 1728 | + * tiler heaps of the kbase context. |
---|
| 1729 | + * @running_total_tiler_heap_memory: Running total of the tiler heap memory in the |
---|
| 1730 | + * kbase context. |
---|
| 1731 | + * @peak_total_tiler_heap_memory: Peak value of the total tiler heap memory in the |
---|
| 1732 | + * kbase context. |
---|
1547 | 1733 | * @jit_va: Indicates if a JIT_VA zone has been created. |
---|
1548 | 1734 | * @mem_profile_data: Buffer containing the profiling information provided by |
---|
1549 | 1735 | * Userspace, can be read through the mem_profile debugfs file. |
---|
.. | .. |
---|
1559 | 1745 | * dumping of its debug info is in progress. |
---|
1560 | 1746 | * @job_fault_resume_event_list: List containing atoms completed after the faulty |
---|
1561 | 1747 | * atom but before the debug data for faulty atom was dumped. |
---|
| 1748 | + * @mem_view_column_width: Controls the number of bytes shown in every column of the |
---|
| 1749 | + * output of "mem_view" debugfs file. |
---|
1562 | 1750 | * @jsctx_queue: Per slot & priority arrays of object containing the root |
---|
1563 | 1751 | * of RB-tree holding currently runnable atoms on the job slot |
---|
1564 | 1752 | * and the head item of the linked list of atoms blocked on |
---|
1565 | 1753 | * cross-slot dependencies. |
---|
1566 | | - * @atoms_pulled: Total number of atoms currently pulled from the context. |
---|
1567 | | - * @atoms_pulled_slot: Per slot count of the number of atoms currently pulled |
---|
1568 | | - * from the context. |
---|
1569 | | - * @atoms_pulled_slot_pri: Per slot & priority count of the number of atoms currently |
---|
1570 | | - * pulled from the context. hwaccess_lock shall be held when |
---|
1571 | | - * accessing it. |
---|
1572 | | - * @blocked_js: Indicates if the context is blocked from submitting atoms |
---|
1573 | | - * on a slot at a given priority. This is set to true, when |
---|
1574 | | - * the atom corresponding to context is soft/hard stopped or |
---|
1575 | | - * removed from the HEAD_NEXT register in response to |
---|
1576 | | - * soft/hard stop. |
---|
| 1754 | + * @slot_tracking: Tracking and control of this context's use of all job |
---|
| 1755 | + * slots |
---|
| 1756 | + * @atoms_pulled_all_slots: Total number of atoms currently pulled from the |
---|
| 1757 | + * context, across all slots. |
---|
1577 | 1758 | * @slots_pullable: Bitmask of slots, indicating the slots for which the |
---|
1578 | 1759 | * context has pullable atoms in the runnable tree. |
---|
1579 | 1760 | * @work: Work structure used for deferred ASID assignment. |
---|
1580 | | - * @legacy_hwcnt_cli: Pointer to the legacy userspace hardware counters |
---|
1581 | | - * client, there can be only such client per kbase |
---|
1582 | | - * context. |
---|
1583 | | - * @legacy_hwcnt_lock: Lock used to prevent concurrent access to |
---|
1584 | | - * @legacy_hwcnt_cli. |
---|
1585 | 1761 | * @completed_jobs: List containing completed atoms for which base_jd_event is |
---|
1586 | 1762 | * to be posted. |
---|
1587 | 1763 | * @work_count: Number of work items, corresponding to atoms, currently |
---|
.. | .. |
---|
1597 | 1773 | * memory allocations. |
---|
1598 | 1774 | * @jit_current_allocations_per_bin: Current number of in-flight just-in-time |
---|
1599 | 1775 | * memory allocations per bin. |
---|
1600 | | - * @jit_version: Version number indicating whether userspace is using |
---|
1601 | | - * old or new version of interface for just-in-time |
---|
1602 | | - * memory allocations. |
---|
1603 | | - * 1 -> client used KBASE_IOCTL_MEM_JIT_INIT_10_2 |
---|
1604 | | - * 2 -> client used KBASE_IOCTL_MEM_JIT_INIT_11_5 |
---|
1605 | | - * 3 -> client used KBASE_IOCTL_MEM_JIT_INIT |
---|
1606 | 1776 | * @jit_group_id: A memory group ID to be passed to a platform-specific |
---|
1607 | 1777 | * memory group manager. |
---|
1608 | 1778 | * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). |
---|
.. | .. |
---|
1674 | 1844 | * @limited_core_mask: The mask that is applied to the affinity in case of atoms |
---|
1675 | 1845 | * marked with BASE_JD_REQ_LIMITED_CORE_MASK. |
---|
1676 | 1846 | * @platform_data: Pointer to platform specific per-context data. |
---|
| 1847 | + * @task: Pointer to the task structure of the main thread of the process |
---|
| 1848 | + * that created the Kbase context. It would be set only for the |
---|
| 1849 | + * contexts created by the Userspace and not for the contexts |
---|
| 1850 | + * created internally by the Kbase. |
---|
1677 | 1851 | * |
---|
1678 | 1852 | * A kernel base context is an entity among which the GPU is scheduled. |
---|
1679 | 1853 | * Each context has its own GPU address space. |
---|
.. | .. |
---|
1711 | 1885 | struct rb_root reg_rbtree_same; |
---|
1712 | 1886 | struct rb_root reg_rbtree_custom; |
---|
1713 | 1887 | struct rb_root reg_rbtree_exec; |
---|
| 1888 | +#if MALI_USE_CSF |
---|
| 1889 | + struct rb_root reg_rbtree_exec_fixed; |
---|
| 1890 | + struct rb_root reg_rbtree_fixed; |
---|
| 1891 | + atomic64_t num_fixable_allocs; |
---|
| 1892 | + atomic64_t num_fixed_allocs; |
---|
| 1893 | +#endif |
---|
1714 | 1894 | struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX]; |
---|
1715 | 1895 | |
---|
1716 | 1896 | #if MALI_USE_CSF |
---|
.. | .. |
---|
1719 | 1899 | struct kbase_jd_context jctx; |
---|
1720 | 1900 | struct jsctx_queue jsctx_queue |
---|
1721 | 1901 | [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; |
---|
| 1902 | + struct kbase_jsctx_slot_tracking slot_tracking[BASE_JM_MAX_NR_SLOTS]; |
---|
| 1903 | + atomic_t atoms_pulled_all_slots; |
---|
1722 | 1904 | |
---|
1723 | 1905 | struct list_head completed_jobs; |
---|
1724 | 1906 | atomic_t work_count; |
---|
1725 | 1907 | struct timer_list soft_job_timeout; |
---|
1726 | 1908 | |
---|
1727 | | - atomic_t atoms_pulled; |
---|
1728 | | - atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS]; |
---|
1729 | | - int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][ |
---|
1730 | | - KBASE_JS_ATOM_SCHED_PRIO_COUNT]; |
---|
1731 | 1909 | int priority; |
---|
1732 | | - bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; |
---|
1733 | 1910 | s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; |
---|
1734 | 1911 | u32 slots_pullable; |
---|
1735 | 1912 | u32 age_count; |
---|
.. | .. |
---|
1753 | 1930 | |
---|
1754 | 1931 | struct list_head waiting_soft_jobs; |
---|
1755 | 1932 | spinlock_t waiting_soft_jobs_lock; |
---|
1756 | | -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE |
---|
1757 | | - struct { |
---|
1758 | | - struct list_head waiting_resource; |
---|
1759 | | - struct workqueue_struct *wq; |
---|
1760 | | - } dma_fence; |
---|
1761 | | -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ |
---|
1762 | 1933 | |
---|
1763 | 1934 | int as_nr; |
---|
1764 | 1935 | |
---|
1765 | 1936 | atomic_t refcount; |
---|
1766 | 1937 | |
---|
1767 | | - spinlock_t mm_update_lock; |
---|
1768 | | - struct mm_struct __rcu *process_mm; |
---|
| 1938 | + struct mm_struct *process_mm; |
---|
1769 | 1939 | u64 gpu_va_end; |
---|
| 1940 | +#if MALI_USE_CSF |
---|
| 1941 | + u32 running_total_tiler_heap_nr_chunks; |
---|
| 1942 | + u64 running_total_tiler_heap_memory; |
---|
| 1943 | + u64 peak_total_tiler_heap_memory; |
---|
| 1944 | +#endif |
---|
1770 | 1945 | bool jit_va; |
---|
1771 | 1946 | |
---|
1772 | 1947 | #if IS_ENABLED(CONFIG_DEBUG_FS) |
---|
.. | .. |
---|
1778 | 1953 | unsigned int *reg_dump; |
---|
1779 | 1954 | atomic_t job_fault_count; |
---|
1780 | 1955 | struct list_head job_fault_resume_event_list; |
---|
| 1956 | + unsigned int mem_view_column_width; |
---|
1781 | 1957 | |
---|
1782 | 1958 | #endif /* CONFIG_DEBUG_FS */ |
---|
1783 | | - |
---|
1784 | | - struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli; |
---|
1785 | | - struct mutex legacy_hwcnt_lock; |
---|
1786 | | - |
---|
1787 | 1959 | struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT]; |
---|
1788 | 1960 | u8 jit_max_allocations; |
---|
1789 | 1961 | u8 jit_current_allocations; |
---|
1790 | 1962 | u8 jit_current_allocations_per_bin[256]; |
---|
1791 | | - u8 jit_version; |
---|
1792 | 1963 | u8 jit_group_id; |
---|
1793 | 1964 | #if MALI_JIT_PRESSURE_LIMIT_BASE |
---|
1794 | 1965 | u64 jit_phys_pages_limit; |
---|
.. | .. |
---|
1827 | 1998 | #if !MALI_USE_CSF |
---|
1828 | 1999 | void *platform_data; |
---|
1829 | 2000 | #endif |
---|
| 2001 | + |
---|
| 2002 | + struct task_struct *task; |
---|
1830 | 2003 | }; |
---|
1831 | 2004 | |
---|
1832 | 2005 | #ifdef CONFIG_MALI_CINSTR_GWT |
---|
.. | .. |
---|
1855 | 2028 | * to a @kbase_context. |
---|
1856 | 2029 | * @ext_res_node: List head for adding the metadata to a |
---|
1857 | 2030 | * @kbase_context. |
---|
1858 | | - * @alloc: The physical memory allocation structure |
---|
1859 | | - * which is mapped. |
---|
1860 | | - * @gpu_addr: The GPU virtual address the resource is |
---|
1861 | | - * mapped to. |
---|
| 2031 | + * @reg: External resource information, containing |
---|
| 2032 | + * the corresponding VA region |
---|
1862 | 2033 | * @ref: Reference count. |
---|
1863 | 2034 | * |
---|
1864 | 2035 | * External resources can be mapped into multiple contexts as well as the same |
---|
1865 | 2036 | * context multiple times. |
---|
1866 | | - * As kbase_va_region itself isn't refcounted we can't attach our extra |
---|
1867 | | - * information to it as it could be removed under our feet leaving external |
---|
1868 | | - * resources pinned. |
---|
| 2037 | + * As kbase_va_region is refcounted, we guarantee that it will be available |
---|
| 2038 | + * for the duration of the external resource, meaning it is sufficient to use |
---|
| 2039 | + * it to rederive any additional data, like the GPU address. |
---|
1869 | 2040 | * This metadata structure binds a single external resource to a single |
---|
1870 | 2041 | * context, ensuring that per context mapping is tracked separately so it can |
---|
1871 | 2042 | * be overridden when needed and abuses by the application (freeing the resource |
---|
.. | .. |
---|
1873 | 2044 | */ |
---|
1874 | 2045 | struct kbase_ctx_ext_res_meta { |
---|
1875 | 2046 | struct list_head ext_res_node; |
---|
1876 | | - struct kbase_mem_phy_alloc *alloc; |
---|
1877 | | - u64 gpu_addr; |
---|
| 2047 | + struct kbase_va_region *reg; |
---|
1878 | 2048 | u32 ref; |
---|
1879 | 2049 | }; |
---|
1880 | 2050 | |
---|
.. | .. |
---|
1904 | 2074 | return false; |
---|
1905 | 2075 | } |
---|
1906 | 2076 | |
---|
| 2077 | +/** |
---|
| 2078 | + * kbase_get_lock_region_min_size_log2 - Returns the minimum size of the MMU lock |
---|
| 2079 | + * region, as a logarithm |
---|
| 2080 | + * |
---|
| 2081 | + * @gpu_props: GPU properties |
---|
| 2082 | + * |
---|
| 2083 | + * Return: the minimum size of the MMU lock region as dictated by the corresponding |
---|
| 2084 | + * arch spec. |
---|
| 2085 | + */ |
---|
| 2086 | +static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props const *gpu_props) |
---|
| 2087 | +{ |
---|
| 2088 | + if (GPU_ID2_MODEL_MATCH_VALUE(gpu_props->props.core_props.product_id) >= |
---|
| 2089 | + GPU_ID2_MODEL_MAKE(12, 0)) |
---|
| 2090 | + return 12; /* 4 kB */ |
---|
| 2091 | + |
---|
| 2092 | + return 15; /* 32 kB */ |
---|
| 2093 | +} |
---|
| 2094 | + |
---|
1907 | 2095 | /* Conversion helpers for setting up high resolution timers */ |
---|
1908 | 2096 | #define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U)) |
---|
1909 | 2097 | #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) |
---|
.. | .. |
---|
1912 | 2100 | #define KBASE_CLEAN_CACHE_MAX_LOOPS 100000 |
---|
1913 | 2101 | /* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */ |
---|
1914 | 2102 | #define KBASE_AS_INACTIVE_MAX_LOOPS 100000000 |
---|
1915 | | - |
---|
1916 | | -#endif /* _KBASE_DEFS_H_ */ |
---|
| 2103 | +/* Maximum number of loops polling the GPU PRFCNT_ACTIVE bit before we assume the GPU has hung */ |
---|
| 2104 | +#define KBASE_PRFCNT_ACTIVE_MAX_LOOPS 100000000 |
---|
| 2105 | +#endif /* _KBASE_DEFS_H_ */ |
---|