From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 08:20:59 +0000 Subject: [PATCH] kernel_5.10 no rt --- kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c | 1401 ++++++++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 1,104 insertions(+), 297 deletions(-) diff --git a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c index b51c0ff..42bff1e 100644 --- a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c +++ b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,40 +21,46 @@ #include "mali_kbase.h" #include "mali_kbase_csf_firmware_cfg.h" +#include "mali_kbase_csf_firmware_log.h" +#include "mali_kbase_csf_firmware_core_dump.h" #include "mali_kbase_csf_trace_buffer.h" #include "mali_kbase_csf_timeout.h" #include "mali_kbase_mem.h" +#include "mali_kbase_mem_pool_group.h" #include "mali_kbase_reset_gpu.h" #include "mali_kbase_ctx_sched.h" #include "mali_kbase_csf_scheduler.h" +#include <mali_kbase_hwaccess_time.h> #include "device/mali_kbase_device.h" #include "backend/gpu/mali_kbase_pm_internal.h" #include "tl/mali_kbase_timeline_priv.h" +#include "tl/mali_kbase_tracepoints.h" #include "mali_kbase_csf_tl_reader.h" #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" #include <csf/ipa_control/mali_kbase_csf_ipa_control.h> - +#include <csf/mali_kbase_csf_registers.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/firmware.h> #include <linux/mman.h> #include <linux/string.h> #include <linux/mutex.h> +#include <linux/ctype.h> #if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE) #include <linux/set_memory.h> #endif #include <mmu/mali_kbase_mmu.h> #include <asm/arch_timer.h> +#include <linux/delay.h> #define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20) - static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin"; module_param_string(fw_name, fw_name, sizeof(fw_name), 0644); MODULE_PARM_DESC(fw_name, "firmware image"); /* The waiting time for firmware to boot */ -static unsigned int csf_firmware_boot_timeout_ms = 500; +static unsigned int csf_firmware_boot_timeout_ms; module_param(csf_firmware_boot_timeout_ms, uint, 0444); MODULE_PARM_DESC(csf_firmware_boot_timeout_ms, "Maximum time to wait for firmware to boot."); @@ -72,9 +78,11 @@ "Enables effective use of a debugger for debugging firmware code."); #endif -#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul) -#define FIRMWARE_HEADER_VERSION (0ul) -#define FIRMWARE_HEADER_LENGTH (0x14ul) + +#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul) +#define FIRMWARE_HEADER_VERSION_MAJOR (0ul) +#define FIRMWARE_HEADER_VERSION_MINOR (3ul) +#define FIRMWARE_HEADER_LENGTH (0x14ul) #define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \ (CSF_FIRMWARE_ENTRY_READ | \ @@ -85,11 +93,13 @@ CSF_FIRMWARE_ENTRY_ZERO | \ CSF_FIRMWARE_ENTRY_CACHE_MODE) -#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) -#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) -#define CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST (2) -#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) -#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) +#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) +#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) +#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) +#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) +#define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6) +#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7) +#define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP (9) #define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3) #define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3) @@ -100,11 +110,16 @@ #define TL_METADATA_ENTRY_NAME_OFFSET (0x8) +#define BUILD_INFO_METADATA_SIZE_OFFSET (0x4) +#define BUILD_INFO_GIT_SHA_LEN (40U) +#define BUILD_INFO_GIT_DIRTY_LEN (1U) +#define BUILD_INFO_GIT_SHA_PATTERN "git_sha: " + #define CSF_MAX_FW_STOP_LOOPS (100000) -#define CSF_GLB_REQ_CFG_MASK \ - (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ - GLB_REQ_CFG_PWROFF_TIMER_MASK) +#define CSF_GLB_REQ_CFG_MASK \ + (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ + GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) static inline u32 input_page_read(const u32 *const input, const u32 offset) { @@ -155,8 +170,7 @@ } /** - * struct firmware_timeline_metadata - - * Timeline metadata item within the MCU firmware + * struct firmware_timeline_metadata - Timeline metadata item within the MCU firmware * * @node: List head linking all timeline metadata to * kbase_device:csf.firmware_timeline_metadata. @@ -187,11 +201,13 @@ if (!interface) return -EINVAL; - reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, - interface->num_pages, KBASE_REG_ZONE_MCU_SHARED); + reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, + interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED); if (reg) { + mutex_lock(&kbdev->csf.reg_lock); ret = kbase_add_va_region_rbtree(kbdev, reg, - interface->virtual, interface->num_pages, 1); + interface->virtual, interface->num_pages_aligned, 1); + mutex_unlock(&kbdev->csf.reg_lock); if (ret) kfree(reg); else @@ -213,10 +229,11 @@ return (max_loops == 0) ? -1 : 0; } -void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) +void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) { - if (wait_mcu_status_value(kbdev, MCU_CNTRL_DISABLE) < 0) - dev_err(kbdev->dev, "MCU failed to get disabled"); + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE); } static void wait_for_firmware_stop(struct kbase_device *kbdev) @@ -225,6 +242,13 @@ /* This error shall go away once MIDJM-2371 is closed */ dev_err(kbdev->dev, "Firmware failed to stop"); } + + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF(kbdev, kbase_backend_get_cycle_cnt(kbdev)); +} + +void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) +{ + wait_for_firmware_stop(kbdev); } static void stop_csf_firmware(struct kbase_device *kbdev) @@ -237,9 +261,14 @@ static void wait_for_firmware_boot(struct kbase_device *kbdev) { - const long wait_timeout = - kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms); + long wait_timeout; long remaining; + + if (!csf_firmware_boot_timeout_ms) + csf_firmware_boot_timeout_ms = + kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_BOOT_TIMEOUT); + + wait_timeout = kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms); /* Firmware will generate a global interface interrupt once booting * is complete @@ -257,22 +286,51 @@ { kbase_csf_firmware_enable_mcu(kbdev); +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + kbase_debug_coresight_csf_state_request(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); + + if (!kbase_debug_coresight_csf_state_wait(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) + dev_err(kbdev->dev, "Timeout waiting for CoreSight to be enabled"); +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + wait_for_firmware_boot(kbdev); } -static void wait_ready(struct kbase_device *kbdev) +/** + * wait_ready() - Wait for previously issued MMU command to complete. + * + * @kbdev: Kbase device to wait for a MMU command to complete. + * + * Reset GPU if the wait for previously issued command times out. + * + * Return: 0 on success, error code otherwise. + */ +static int wait_ready(struct kbase_device *kbdev) { - u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; - u32 val; + const ktime_t wait_loop_start = ktime_get_raw(); + const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms; + s64 diff; - val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)); + do { + unsigned int i; - /* Wait for a while for the update command to take effect */ - while (--max_loops && (val & AS_STATUS_AS_ACTIVE)) - val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)); + for (i = 0; i < 1000; i++) { + /* Wait for the MMU status to indicate there is no active command */ + if (!(kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & + AS_STATUS_AS_ACTIVE)) + return 0; + } - if (max_loops == 0) - dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n"); + diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); + } while (diff < mmu_as_inactive_wait_time_ms); + + dev_err(kbdev->dev, + "AS_ACTIVE bit stuck for MCU AS. Might be caused by unstable GPU clk/pwr or faulty system"); + + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu_locked(kbdev); + + return -ETIMEDOUT; } static void unload_mmu_tables(struct kbase_device *kbdev) @@ -287,7 +345,7 @@ mutex_unlock(&kbdev->mmu_hw_mutex); } -static void load_mmu_tables(struct kbase_device *kbdev) +static int load_mmu_tables(struct kbase_device *kbdev) { unsigned long irq_flags; @@ -298,7 +356,7 @@ mutex_unlock(&kbdev->mmu_hw_mutex); /* Wait for a while for the update command to take effect */ - wait_ready(kbdev); + return wait_ready(kbdev); } /** @@ -405,74 +463,175 @@ memset(p + copy_len, 0, zi_len); } - kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), - PAGE_SIZE, DMA_TO_DEVICE); + kbase_sync_single_for_device(kbdev, kbase_dma_addr_from_tagged(phys[page_num]), + PAGE_SIZE, DMA_TO_DEVICE); kunmap_atomic(p); } } -static int reload_fw_data_sections(struct kbase_device *kbdev) +static int reload_fw_image(struct kbase_device *kbdev) { const u32 magic = FIRMWARE_HEADER_MAGIC; struct kbase_csf_firmware_interface *interface; - const struct firmware *firmware; + struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw; int ret = 0; - if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) { - dev_err(kbdev->dev, - "Failed to reload firmware image '%s'\n", - fw_name); - return -ENOENT; - } - - /* Do couple of basic sanity checks */ - if (firmware->size < FIRMWARE_HEADER_LENGTH) { - dev_err(kbdev->dev, "Firmware image unexpectedly too small\n"); + if (WARN_ON(mcu_fw->data == NULL)) { + dev_err(kbdev->dev, "Firmware image copy not loaded\n"); ret = -EINVAL; goto out; } - if (memcmp(firmware->data, &magic, sizeof(magic)) != 0) { + /* Do a basic sanity check on MAGIC signature */ + if (memcmp(mcu_fw->data, &magic, sizeof(magic)) != 0) { dev_err(kbdev->dev, "Incorrect magic value, firmware image could have been corrupted\n"); ret = -EINVAL; goto out; } list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { - /* Skip reload of text & read only data sections */ - if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) || - !(interface->flags & CSF_FIRMWARE_ENTRY_WRITE)) - continue; + /* Dont skip re-loading any section if full reload was requested */ + if (!kbdev->csf.firmware_full_reload_needed) { + /* Skip reload of text & read only data sections */ + if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) || + !(interface->flags & CSF_FIRMWARE_ENTRY_WRITE)) + continue; + } - load_fw_image_section(kbdev, firmware->data, interface->phys, - interface->num_pages, interface->flags, - interface->data_start, interface->data_end); + load_fw_image_section(kbdev, mcu_fw->data, interface->phys, interface->num_pages, + interface->flags, interface->data_start, interface->data_end); } - kbase_csf_firmware_reload_trace_buffers_data(kbdev); + kbdev->csf.firmware_full_reload_needed = false; + kbase_csf_firmware_reload_trace_buffers_data(kbdev); out: - release_firmware(firmware); return ret; } /** + * entry_find_large_page_to_reuse() - Find if the large page of previously parsed + * FW interface entry can be reused to store + * the contents of new FW interface entry. + * + * @kbdev: Kbase device structure + * @virtual_start: Start of the virtual address range required for an entry allocation + * @virtual_end: End of the virtual address range required for an entry allocation + * @flags: Firmware entry flags for comparison with the reusable pages found + * @phys: Pointer to the array of physical (tagged) addresses making up the new + * FW interface entry. It is an output parameter which would be made to + * point to an already existing array allocated for the previously parsed + * FW interface entry using large page(s). If no appropriate entry is + * found it is set to NULL. + * @pma: Pointer to a protected memory allocation. It is an output parameter + * which would be made to the protected memory allocation of a previously + * parsed FW interface entry using large page(s) from protected memory. + * If no appropriate entry is found it is set to NULL. + * @num_pages: Number of pages requested. + * @num_pages_aligned: This is an output parameter used to carry the number of 4KB pages + * within the 2MB pages aligned allocation. + * @is_small_page: This is an output flag used to select between the small and large page + * to be used for the FW entry allocation. + * + * Go through all the already initialized interfaces and find if a previously + * allocated large page can be used to store contents of new FW interface entry. + * + * Return: true if a large page can be reused, false otherwise. + */ +static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev, + const u32 virtual_start, const u32 virtual_end, + const u32 flags, struct tagged_addr **phys, + struct protected_memory_allocation ***pma, + u32 num_pages, u32 *num_pages_aligned, + bool *is_small_page) +{ + struct kbase_csf_firmware_interface *interface = NULL; + struct kbase_csf_firmware_interface *target_interface = NULL; + u32 virtual_diff_min = U32_MAX; + bool reuse_large_page = false; + + CSTD_UNUSED(interface); + CSTD_UNUSED(target_interface); + CSTD_UNUSED(virtual_diff_min); + + *num_pages_aligned = num_pages; + *is_small_page = true; + *phys = NULL; + *pma = NULL; + + + /* If the section starts at 2MB aligned boundary, + * then use 2MB page(s) for it. + */ + if (!(virtual_start & (SZ_2M - 1))) { + *num_pages_aligned = + round_up(*num_pages_aligned, NUM_4K_PAGES_IN_2MB_PAGE); + *is_small_page = false; + goto out; + } + + /* If the section doesn't lie within the same 2MB aligned boundary, + * then use 4KB pages as it would be complicated to use a 2MB page + * for such section. + */ + if ((virtual_start & ~(SZ_2M - 1)) != (virtual_end & ~(SZ_2M - 1))) + goto out; + + /* Find the nearest 2MB aligned section which comes before the current + * section. + */ + list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { + const u32 virtual_diff = virtual_start - interface->virtual; + + if (interface->virtual > virtual_end) + continue; + + if (interface->virtual & (SZ_2M - 1)) + continue; + + if ((virtual_diff < virtual_diff_min) && (interface->flags == flags)) { + target_interface = interface; + virtual_diff_min = virtual_diff; + } + } + + if (target_interface) { + const u32 page_index = virtual_diff_min >> PAGE_SHIFT; + + if (page_index >= target_interface->num_pages_aligned) + goto out; + + if (target_interface->phys) + *phys = &target_interface->phys[page_index]; + + if (target_interface->pma) + *pma = &target_interface->pma[page_index / NUM_4K_PAGES_IN_2MB_PAGE]; + + *is_small_page = false; + reuse_large_page = true; + } + +out: + return reuse_large_page; +} + +/** * parse_memory_setup_entry() - Process an "interface memory setup" section + * + * @kbdev: Kbase device structure + * @fw: The firmware image containing the section + * @entry: Pointer to the start of the section + * @size: Size (in bytes) of the section * * Read an "interface memory setup" section from the firmware image and create * the necessary memory region including the MMU page tables. If successful * the interface will be added to the kbase_device:csf.firmware_interfaces list. * * Return: 0 if successful, negative error code on failure - * - * @kbdev: Kbase device structure - * @fw: The firmware image containing the section - * @entry: Pointer to the start of the section - * @size: Size (in bytes) of the section */ static int parse_memory_setup_entry(struct kbase_device *kbdev, - const struct firmware *fw, - const u32 *entry, unsigned int size) + const struct kbase_csf_mcu_fw *const fw, const u32 *entry, + unsigned int size) { int ret = 0; const u32 flags = entry[0]; @@ -481,13 +640,19 @@ const u32 data_start = entry[3]; const u32 data_end = entry[4]; u32 num_pages; + u32 num_pages_aligned; char *name; + void *name_entry; + unsigned int name_len; struct tagged_addr *phys = NULL; struct kbase_csf_firmware_interface *interface = NULL; bool allocated_pages = false, protected_mode = false; unsigned long mem_flags = 0; u32 cache_mode = 0; struct protected_memory_allocation **pma = NULL; + bool reuse_pages = false; + bool is_small_page = true; + bool ignore_page_migration = true; if (data_end < data_start) { dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n", @@ -522,7 +687,7 @@ protected_mode = true; if (protected_mode && kbdev->csf.pma_dev == NULL) { - dev_err(kbdev->dev, + dev_dbg(kbdev->dev, "Protected memory allocator not found, Firmware protected mode entry will not be supported"); return 0; } @@ -530,23 +695,38 @@ num_pages = (virtual_end - virtual_start) >> PAGE_SHIFT; - phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); + reuse_pages = + entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags, &phys, + &pma, num_pages, &num_pages_aligned, &is_small_page); + if (!reuse_pages) + phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL); + if (!phys) return -ENOMEM; if (protected_mode) { - pma = kbase_csf_protected_memory_alloc(kbdev, phys, num_pages); - - if (pma == NULL) { - ret = -ENOMEM; - goto out; + if (!reuse_pages) { + pma = kbase_csf_protected_memory_alloc( + kbdev, phys, num_pages_aligned, is_small_page); } + + if (!pma) + ret = -ENOMEM; } else { - ret = kbase_mem_pool_alloc_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - num_pages, phys, false); - if (ret < 0) - goto out; + if (!reuse_pages) { + ret = kbase_mem_pool_alloc_pages( + kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW, + is_small_page), + num_pages_aligned, phys, false, NULL); + ignore_page_migration = false; + } + } + + if (ret < 0) { + dev_err(kbdev->dev, + "Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x\n", + num_pages_aligned, virtual_start); + goto out; } allocated_pages = true; @@ -554,31 +734,42 @@ data_start, data_end); /* Allocate enough memory for the struct kbase_csf_firmware_interface and - * the name of the interface. An extra byte is allocated to place a - * NUL-terminator in. This should already be included according to the - * specification but here we add it anyway to be robust against a - * corrupt firmware image. + * the name of the interface. */ - interface = kmalloc(sizeof(*interface) + - size - INTERFACE_ENTRY_NAME_OFFSET + 1, GFP_KERNEL); + name_entry = (void *)entry + INTERFACE_ENTRY_NAME_OFFSET; + name_len = strnlen(name_entry, size - INTERFACE_ENTRY_NAME_OFFSET); + if (size < (INTERFACE_ENTRY_NAME_OFFSET + name_len + 1 + sizeof(u32))) { + dev_err(kbdev->dev, "Memory setup entry too short to contain virtual_exe_start"); + ret = -EINVAL; + goto out; + } + + interface = kmalloc(sizeof(*interface) + name_len + 1, GFP_KERNEL); if (!interface) { ret = -ENOMEM; goto out; } name = (void *)(interface + 1); - memcpy(name, entry + (INTERFACE_ENTRY_NAME_OFFSET / sizeof(*entry)), - size - INTERFACE_ENTRY_NAME_OFFSET); - name[size - INTERFACE_ENTRY_NAME_OFFSET] = 0; + memcpy(name, name_entry, name_len); + name[name_len] = 0; interface->name = name; interface->phys = phys; + interface->reuse_pages = reuse_pages; + interface->is_small_page = is_small_page; interface->num_pages = num_pages; + interface->num_pages_aligned = num_pages_aligned; interface->virtual = virtual_start; interface->kernel_map = NULL; interface->flags = flags; interface->data_start = data_start; interface->data_end = data_end; interface->pma = pma; + + /* Discover the virtual execution address field after the end of the name + * field taking into account the NULL-termination character. + */ + interface->virtual_exe_start = *((u32 *)(name_entry + name_len + 1)); mem_flags = convert_mem_flags(kbdev, flags, &cache_mode); @@ -633,15 +824,19 @@ list_add(&interface->node, &kbdev->csf.firmware_interfaces); - ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, - virtual_start >> PAGE_SHIFT, phys, num_pages, mem_flags, - KBASE_MEM_GROUP_CSF_FW); + if (!reuse_pages) { + ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, + virtual_start >> PAGE_SHIFT, phys, + num_pages_aligned, mem_flags, + KBASE_MEM_GROUP_CSF_FW, NULL, NULL, + ignore_page_migration); - if (ret != 0) { - dev_err(kbdev->dev, "Failed to insert firmware pages\n"); - /* The interface has been added to the list, so cleanup will - * be handled by firmware unloading - */ + if (ret != 0) { + dev_err(kbdev->dev, "Failed to insert firmware pages\n"); + /* The interface has been added to the list, so cleanup will + * be handled by firmware unloading + */ + } } dev_dbg(kbdev->dev, "Processed section '%s'", name); @@ -650,16 +845,22 @@ out: if (allocated_pages) { - if (protected_mode) { - kbase_csf_protected_memory_free(kbdev, pma, num_pages); - } else { - kbase_mem_pool_free_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - num_pages, phys, false, false); + if (!reuse_pages) { + if (protected_mode) { + kbase_csf_protected_memory_free( + kbdev, pma, num_pages_aligned, is_small_page); + } else { + kbase_mem_pool_free_pages( + kbase_mem_pool_group_select( + kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page), + num_pages_aligned, phys, false, false); + } } } - kfree(phys); + if (!reuse_pages) + kfree(phys); + kfree(interface); return ret; } @@ -675,7 +876,8 @@ * @size: Size (in bytes) of the section */ static int parse_timeline_metadata_entry(struct kbase_device *kbdev, - const struct firmware *fw, const u32 *entry, unsigned int size) + const struct kbase_csf_mcu_fw *const fw, const u32 *entry, + unsigned int size) { const u32 data_start = entry[0]; const u32 data_size = entry[1]; @@ -718,7 +920,63 @@ } /** + * parse_build_info_metadata_entry() - Process a "build info metadata" section + * @kbdev: Kbase device structure + * @fw: Firmware image containing the section + * @entry: Pointer to the section + * @size: Size (in bytes) of the section + * + * This prints the git SHA of the firmware on frimware load. + * + * Return: 0 if successful, negative error code on failure + */ +static int parse_build_info_metadata_entry(struct kbase_device *kbdev, + const struct kbase_csf_mcu_fw *const fw, + const u32 *entry, unsigned int size) +{ + const u32 meta_start_addr = entry[0]; + char *ptr = NULL; + size_t sha_pattern_len = strlen(BUILD_INFO_GIT_SHA_PATTERN); + + /* Only print git SHA to avoid releasing sensitive information */ + ptr = strstr(fw->data + meta_start_addr, BUILD_INFO_GIT_SHA_PATTERN); + /* Check that we won't overrun the found string */ + if (ptr && + strlen(ptr) >= BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + sha_pattern_len) { + char git_sha[BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + 1]; + int i = 0; + + /* Move ptr to start of SHA */ + ptr += sha_pattern_len; + for (i = 0; i < BUILD_INFO_GIT_SHA_LEN; i++) { + /* Ensure that the SHA is made up of hex digits */ + if (!isxdigit(ptr[i])) + break; + + git_sha[i] = ptr[i]; + } + + /* Check if the next char indicates git SHA is dirty */ + if (ptr[i] == ' ' || ptr[i] == '+') { + git_sha[i] = ptr[i]; + i++; + } + git_sha[i] = '\0'; + + dev_info(kbdev->dev, "Mali firmware git_sha: %s\n", git_sha); + } else + dev_info(kbdev->dev, "Mali firmware git_sha not found or invalid\n"); + + return 0; +} + +/** * load_firmware_entry() - Process an entry from a firmware image + * + * @kbdev: Kbase device + * @fw: Firmware image containing the entry + * @offset: Byte offset within the image of the entry to load + * @header: Header word of the entry * * Read an entry from a firmware image and do any necessary work (e.g. loading * the data into page accessible to the MCU). @@ -727,15 +985,9 @@ * otherwise the function will fail with -EINVAL * * Return: 0 if successful, negative error code on failure - * - * @kbdev: Kbase device - * @fw: Firmware image containing the entry - * @offset: Byte offset within the image of the entry to load - * @header: Header word of the entry */ -static int load_firmware_entry(struct kbase_device *kbdev, - const struct firmware *fw, - u32 offset, u32 header) +static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_csf_mcu_fw *const fw, + u32 offset, u32 header) { const unsigned int type = entry_type(header); unsigned int size = entry_size(header); @@ -780,18 +1032,6 @@ } return kbase_csf_firmware_cfg_option_entry_parse( kbdev, fw, entry, size, updatable); - case CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST: -#ifndef MALI_KBASE_BUILD - /* FW UTF option */ - if (size < 2*sizeof(*entry)) { - dev_err(kbdev->dev, "FW UTF entry too short (size=%u)\n", - size); - return -EINVAL; - } - return mali_kutf_process_fw_utf_entry(kbdev, fw->data, - fw->size, entry); -#endif - break; case CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER: /* Trace buffer */ if (size < TRACE_BUFFER_ENTRY_NAME_OFFSET + sizeof(*entry)) { @@ -809,13 +1049,35 @@ return -EINVAL; } return parse_timeline_metadata_entry(kbdev, fw, entry, size); - } - - if (!optional) { - dev_err(kbdev->dev, - "Unsupported non-optional entry type %u in firmware\n", - type); - return -EINVAL; + case CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA: + if (size < BUILD_INFO_METADATA_SIZE_OFFSET + sizeof(*entry)) { + dev_err(kbdev->dev, "Build info metadata entry too short (size=%u)\n", + size); + return -EINVAL; + } + return parse_build_info_metadata_entry(kbdev, fw, entry, size); + case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST: + /* Function call list section */ + if (size < FUNC_CALL_LIST_ENTRY_NAME_OFFSET + sizeof(*entry)) { + dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n", + size); + return -EINVAL; + } + kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry); + return 0; + case CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP: + /* Core Dump section */ + if (size < CORE_DUMP_ENTRY_START_ADDR_OFFSET + sizeof(*entry)) { + dev_err(kbdev->dev, "FW Core dump entry too short (size=%u)\n", size); + return -EINVAL; + } + return kbase_csf_firmware_core_dump_entry_parse(kbdev, entry); + default: + if (!optional) { + dev_err(kbdev->dev, "Unsupported non-optional entry type %u in firmware\n", + type); + return -EINVAL; + } } return 0; @@ -994,11 +1256,10 @@ iface->group_stride = shared_info[GLB_GROUP_STRIDE/4]; iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4]; - if (iface->version >= kbase_csf_interface_version(1, 1, 0)) { + if (iface->version >= kbase_csf_interface_version(1, 1, 0)) iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4]; - } else { + else iface->instr_features = 0; - } if ((GROUP_CONTROL_0 + (unsigned long)iface->group_num * iface->group_stride) > @@ -1033,40 +1294,80 @@ return 0; } +static inline void access_firmware_memory_common(struct kbase_device *kbdev, + struct kbase_csf_firmware_interface *interface, u32 offset_bytes, + u32 *value, const bool read) +{ + u32 page_num = offset_bytes >> PAGE_SHIFT; + u32 offset_in_page = offset_bytes & ~PAGE_MASK; + struct page *target_page = as_page(interface->phys[page_num]); + uintptr_t cpu_addr = (uintptr_t)kmap_atomic(target_page); + u32 *addr = (u32 *)(cpu_addr + offset_in_page); + + if (read) { + kbase_sync_single_for_device(kbdev, + kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page, + sizeof(u32), DMA_BIDIRECTIONAL); + *value = *addr; + } else { + *addr = *value; + kbase_sync_single_for_device(kbdev, + kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page, + sizeof(u32), DMA_BIDIRECTIONAL); + } + + kunmap_atomic((u32 *)cpu_addr); +} + static inline void access_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 *value, const bool read) { - struct kbase_csf_firmware_interface *interface; + struct kbase_csf_firmware_interface *interface, *access_interface = NULL; + u32 offset_bytes = 0; list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { if ((gpu_addr >= interface->virtual) && (gpu_addr < interface->virtual + (interface->num_pages << PAGE_SHIFT))) { - u32 offset_bytes = gpu_addr - interface->virtual; - u32 page_num = offset_bytes >> PAGE_SHIFT; - u32 offset_in_page = offset_bytes & ~PAGE_MASK; - struct page *target_page = as_page( - interface->phys[page_num]); - u32 *cpu_addr = kmap_atomic(target_page); - - if (read) { - kbase_sync_single_for_device(kbdev, - kbase_dma_addr(target_page) + offset_in_page, - sizeof(u32), DMA_BIDIRECTIONAL); - - *value = cpu_addr[offset_in_page >> 2]; - } else { - cpu_addr[offset_in_page >> 2] = *value; - - kbase_sync_single_for_device(kbdev, - kbase_dma_addr(target_page) + offset_in_page, - sizeof(u32), DMA_BIDIRECTIONAL); - } - - kunmap_atomic(cpu_addr); - return; + offset_bytes = gpu_addr - interface->virtual; + access_interface = interface; + break; } } - dev_warn(kbdev->dev, "Invalid GPU VA %x passed\n", gpu_addr); + + if (access_interface) + access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read); + else + dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr); +} + +static inline void access_firmware_memory_exe(struct kbase_device *kbdev, + u32 gpu_addr, u32 *value, const bool read) +{ + struct kbase_csf_firmware_interface *interface, *access_interface = NULL; + u32 offset_bytes = 0; + + list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { + if ((gpu_addr >= interface->virtual_exe_start) && + (gpu_addr < interface->virtual_exe_start + + (interface->num_pages << PAGE_SHIFT))) { + offset_bytes = gpu_addr - interface->virtual_exe_start; + access_interface = interface; + + /* If there's an overlap in execution address range between a moved and a + * non-moved areas, always prefer the moved one. The idea is that FW may + * move sections around during init time, but after the layout is settled, + * any moved sections are going to override non-moved areas at the same + * location. + */ + if (interface->virtual_exe_start != interface->virtual) + break; + } + } + + if (access_interface) + access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read); + else + dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr); } void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, @@ -1079,6 +1380,18 @@ u32 gpu_addr, u32 value) { access_firmware_memory(kbdev, gpu_addr, &value, false); +} + +void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, + u32 gpu_addr, u32 *value) +{ + access_firmware_memory_exe(kbdev, gpu_addr, value, true); +} + +void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, + u32 gpu_addr, u32 value) +{ + access_firmware_memory_exe(kbdev, gpu_addr, &value, false); } void kbase_csf_firmware_cs_input( @@ -1166,6 +1479,7 @@ dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); return val; } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output); void kbase_csf_firmware_global_input( const struct kbase_csf_global_iface *const iface, const u32 offset, @@ -1176,6 +1490,7 @@ dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value); input_page_write(iface->input, offset, value); } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input); void kbase_csf_firmware_global_input_mask( const struct kbase_csf_global_iface *const iface, const u32 offset, @@ -1187,6 +1502,7 @@ offset, value, mask); input_page_partial_write(iface->input, offset, value, mask); } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask); u32 kbase_csf_firmware_global_input_read( const struct kbase_csf_global_iface *const iface, const u32 offset) @@ -1207,6 +1523,27 @@ dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); return val; } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output); + +/** + * csf_doorbell_offset() - Calculate the offset to the CSF host doorbell + * @doorbell_nr: Doorbell number + * + * Return: CSF host register offset for the specified doorbell number. + */ +static u32 csf_doorbell_offset(int doorbell_nr) +{ + WARN_ON(doorbell_nr < 0); + WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); + + return CSF_HW_DOORBELL_PAGE_OFFSET + (doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE); +} + +void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr) +{ + kbase_reg_write(kbdev, csf_doorbell_offset(doorbell_nr), (u32)1); +} +EXPORT_SYMBOL(kbase_csf_ring_doorbell); /** * handle_internal_firmware_fatal - Handler for CS internal firmware fault. @@ -1292,11 +1629,10 @@ return complete; } -static int wait_for_global_request(struct kbase_device *const kbdev, - u32 const req_mask) +static int wait_for_global_request_with_timeout(struct kbase_device *const kbdev, + u32 const req_mask, unsigned int timeout_ms) { - const long wait_timeout = - kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + const long wait_timeout = kbase_csf_timeout_in_jiffies(timeout_ms); long remaining; int err = 0; @@ -1305,12 +1641,19 @@ wait_timeout); if (!remaining) { - dev_warn(kbdev->dev, "Timed out waiting for global request %x to complete", - req_mask); + dev_warn(kbdev->dev, + "[%llu] Timeout (%d ms) waiting for global request %x to complete", + kbase_backend_get_cycle_cnt(kbdev), timeout_ms, req_mask); err = -ETIMEDOUT; + } return err; +} + +static int wait_for_global_request(struct kbase_device *const kbdev, u32 const req_mask) +{ + return wait_for_global_request_with_timeout(kbdev, req_mask, kbdev->csf.fw_timeout_ms); } static void set_global_request( @@ -1371,16 +1714,113 @@ set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); } +static void enable_gpu_idle_timer(struct kbase_device *const kbdev) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, + kbdev->csf.gpu_idle_dur_count); + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, + GLB_REQ_IDLE_ENABLE_MASK); + dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", + kbdev->csf.gpu_idle_dur_count); +} + +static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + bool complete = false; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) == + (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask)) + complete = true; + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + return complete; +} + +static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface, + u32 const req_mask) +{ + u32 glb_debug_req; + + kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); + + glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); + glb_debug_req ^= req_mask; + + kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask); +} + +static void request_fw_core_dump( + const struct kbase_csf_global_iface *const global_iface) +{ + uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP); + + set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode); + + set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); +} + +int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) +{ + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags; + int ret; + + /* Serialize CORE_DUMP requests. */ + mutex_lock(&kbdev->csf.reg_lock); + + /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + request_fw_core_dump(global_iface); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */ + ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + if (!ret) + WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK)); + + mutex_unlock(&kbdev->csf.reg_lock); + + return ret; +} + +/** + * kbasep_enable_rtu - Enable Ray Tracing Unit on powering up shader core + * + * @kbdev: The kbase device structure of the device + * + * This function needs to be called to enable the Ray Tracing Unit + * by writing SHADER_PWRFEATURES only when host controls shader cores power. + */ +static void kbasep_enable_rtu(struct kbase_device *kbdev) +{ + const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + + if (gpu_id < GPU_ID2_PRODUCT_MAKE(12, 8, 3, 0)) + return; + + if (kbdev->csf.firmware_hctl_core_pwr) + kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_PWRFEATURES), 1); +} + static void global_init(struct kbase_device *const kbdev, u64 core_mask) { - u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | - GLB_ACK_IRQ_MASK_PING_MASK | - GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | - GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | - GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | - GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | - GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | - GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK; + u32 const ack_irq_mask = + GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK | + GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | + GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | + GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | + GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK; const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -1388,10 +1828,7 @@ kbase_csf_scheduler_spin_lock(kbdev, &flags); - /* Set the coherency mode for protected mode execution */ - WARN_ON(kbdev->system_coherency == COHERENCY_ACE); - kbase_csf_firmware_global_input(global_iface, GLB_PROTM_COHERENCY, - kbdev->system_coherency); + kbasep_enable_rtu(kbdev); /* Update shader core allocation enable mask */ enable_endpoints_global(global_iface, core_mask); @@ -1399,9 +1836,23 @@ set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev)); + /* The GPU idle timer is always enabled for simplicity. Checks will be + * done before scheduling the GPU idle worker to see if it is + * appropriate for the current power policy. + */ + enable_gpu_idle_timer(kbdev); + /* Unmask the interrupts */ kbase_csf_firmware_global_input(global_iface, GLB_ACK_IRQ_MASK, ack_irq_mask); + +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + /* Enable FW MCU read/write debug interfaces */ + kbase_csf_firmware_global_input_mask( + global_iface, GLB_DEBUG_ACK_IRQ_MASK, + GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK, + GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK); +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); @@ -1482,8 +1933,7 @@ } /** - * kbase_csf_firmware_reload_worker() - - * reload the fw image and re-enable the MCU + * kbase_csf_firmware_reload_worker() - reload the fw image and re-enable the MCU * @work: CSF Work item for reloading the firmware. * * This helper function will reload the firmware image and re-enable the MCU. @@ -1503,8 +1953,10 @@ dev_info(kbdev->dev, "reloading firmware"); + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + /* Reload just the data sections from firmware binary image */ - err = reload_fw_data_sections(kbdev); + err = reload_fw_image(kbdev); if (err) return; @@ -1545,19 +1997,19 @@ if (version != kbdev->csf.global_iface.version) dev_err(kbdev->dev, "Version check failed in firmware reboot."); - KBASE_KTRACE_ADD(kbdev, FIRMWARE_REBOOT, NULL, 0u); + KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_REBOOT, NULL, 0u); /* Tell MCU state machine to transit to next state */ kbdev->csf.firmware_reloaded = true; kbase_pm_update_state(kbdev); } -static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms) +static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us) { #define HYSTERESIS_VAL_UNIT_SHIFT (10) /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = arch_timer_get_cntfrq(); - u64 dur_val = dur_ms; + u64 dur_val = dur_us; u32 cnt_val_u32, reg_val_u32; bool src_system_timestamp = freq > 0; @@ -1570,13 +2022,14 @@ dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!"); spin_unlock(&kbdev->pm.clk_rtm.lock); - dev_info(kbdev->dev, "Can't get the timestamp frequency, " - "use cycle counter format with firmware idle hysteresis!"); + dev_info( + kbdev->dev, + "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!"); } - /* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */ + /* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; - dur_val = div_u64(dur_val, 1000); + dur_val = div_u64(dur_val, 1000000); /* Interface limits the value field to S32_MAX */ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; @@ -1595,7 +2048,14 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) { - return kbdev->csf.gpu_idle_hysteresis_ms; + unsigned long flags; + u32 dur; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + dur = kbdev->csf.gpu_idle_hysteresis_us; + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + return dur; } u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur) @@ -1603,11 +2063,53 @@ unsigned long flags; const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur); - kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_ms = dur; - kbdev->csf.gpu_idle_dur_count = hysteresis_val; - kbase_csf_scheduler_spin_unlock(kbdev, flags); + /* The 'fw_load_lock' is taken to synchronize against the deferred + * loading of FW, where the idle timer will be enabled. + */ + mutex_lock(&kbdev->fw_load_lock); + if (unlikely(!kbdev->csf.firmware_inited)) { + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbdev->csf.gpu_idle_hysteresis_us = dur; + kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbase_csf_scheduler_spin_unlock(kbdev, flags); + mutex_unlock(&kbdev->fw_load_lock); + goto end; + } + mutex_unlock(&kbdev->fw_load_lock); + kbase_csf_scheduler_pm_active(kbdev); + if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { + dev_err(kbdev->dev, + "Unable to activate the MCU, the idle hysteresis value shall remain unchanged"); + kbase_csf_scheduler_pm_idle(kbdev); + return kbdev->csf.gpu_idle_dur_count; + } + + /* The 'reg_lock' is also taken and is held till the update is not + * complete, to ensure the update of idle timer value by multiple Users + * gets serialized. + */ + mutex_lock(&kbdev->csf.reg_lock); + /* The firmware only reads the new idle timer value when the timer is + * disabled. + */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_csf_firmware_disable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + /* Ensure that the request has taken effect */ + wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbdev->csf.gpu_idle_hysteresis_us = dur; + kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbase_csf_firmware_enable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); + mutex_unlock(&kbdev->csf.reg_lock); + + kbase_csf_scheduler_pm_idle(kbdev); + +end: dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", hysteresis_val); @@ -1616,7 +2118,6 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) { -#define PWROFF_VAL_UNIT_SHIFT (10) /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = arch_timer_get_cntfrq(); u64 dur_val = dur_us; @@ -1632,8 +2133,9 @@ dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!"); spin_unlock(&kbdev->pm.clk_rtm.lock); - dev_info(kbdev->dev, "Can't get the timestamp frequency, " - "use cycle counter with MCU Core Poweroff timer!"); + dev_info( + kbdev->dev, + "Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!"); } /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */ @@ -1657,7 +2159,14 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) { - return kbdev->csf.mcu_core_pwroff_dur_us; + u32 pwroff; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + pwroff = kbdev->csf.mcu_core_pwroff_dur_us; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return pwroff; } u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) @@ -1670,34 +2179,124 @@ kbdev->csf.mcu_core_pwroff_dur_count = pwroff; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff); + dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff); return pwroff; } +/** + * kbase_device_csf_iterator_trace_init - Send request to enable iterator + * trace port. + * @kbdev: Kernel base device pointer + * + * Return: 0 on success (or if enable request is not sent), or error + * code -EINVAL on failure of GPU to acknowledge enable request. + */ +static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev) +{ + /* Enable the iterator trace port if supported by the GPU. + * It requires the GPU to have a nonzero "iter_trace_enable" + * property in the device tree, and the FW must advertise + * this feature in GLB_FEATURES. + */ + if (kbdev->pm.backend.gpu_powered) { + /* check device tree for iterator trace enable property */ + const void *iter_trace_param = of_get_property( + kbdev->dev->of_node, + "iter_trace_enable", NULL); + + const struct kbase_csf_global_iface *iface = + &kbdev->csf.global_iface; + + if (iter_trace_param) { + u32 iter_trace_value = be32_to_cpup(iter_trace_param); + + if ((iface->features & + GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) && + iter_trace_value) { + long ack_timeout; + + ack_timeout = kbase_csf_timeout_in_jiffies( + kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT)); + + /* write enable request to global input */ + kbase_csf_firmware_global_input_mask( + iface, GLB_REQ, + GLB_REQ_ITER_TRACE_ENABLE_MASK, + GLB_REQ_ITER_TRACE_ENABLE_MASK); + /* Ring global doorbell */ + kbase_csf_ring_doorbell(kbdev, + CSF_KERNEL_DOORBELL_NR); + + ack_timeout = wait_event_timeout( + kbdev->csf.event_wait, + !((kbase_csf_firmware_global_input_read( + iface, GLB_REQ) ^ + kbase_csf_firmware_global_output( + iface, GLB_ACK)) & + GLB_REQ_ITER_TRACE_ENABLE_MASK), + ack_timeout); + + return ack_timeout ? 0 : -EINVAL; + + } + } + + } + return 0; +} int kbase_csf_firmware_early_init(struct kbase_device *kbdev) { init_waitqueue_head(&kbdev->csf.event_wait); kbdev->csf.interrupt_received = false; - kbdev->csf.fw_timeout_ms = CSF_FIRMWARE_TIMEOUT_MS; + + kbdev->csf.fw_timeout_ms = + kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); + + kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US; + kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count( + kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US); INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); INIT_LIST_HEAD(&kbdev->csf.firmware_config); INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata); INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); + INIT_LIST_HEAD(&kbdev->csf.user_reg.list); INIT_WORK(&kbdev->csf.firmware_reload_work, kbase_csf_firmware_reload_worker); INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); mutex_init(&kbdev->csf.reg_lock); + kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL }; + return 0; } -int kbase_csf_firmware_init(struct kbase_device *kbdev) +void kbase_csf_firmware_early_term(struct kbase_device *kbdev) { - const struct firmware *firmware; + mutex_destroy(&kbdev->csf.reg_lock); +} + +int kbase_csf_firmware_late_init(struct kbase_device *kbdev) +{ + kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; +#ifdef KBASE_PM_RUNTIME + if (kbase_pm_gpu_sleep_allowed(kbdev)) + kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; +#endif + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); + kbdev->csf.gpu_idle_dur_count = + convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); + + return 0; +} + +int kbase_csf_firmware_load_init(struct kbase_device *kbdev) +{ + const struct firmware *firmware = NULL; + struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw; const u32 magic = FIRMWARE_HEADER_MAGIC; u8 version_major, version_minor; u32 version_hash; @@ -1720,19 +2319,11 @@ return ret; } - kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; - kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count( - kbdev, FIRMWARE_IDLE_HYSTERESIS_TIME_MS); - - kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US; - kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count( - kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US); - ret = kbase_mcu_shared_interface_region_tracker_init(kbdev); if (ret != 0) { dev_err(kbdev->dev, "Failed to setup the rb tree for managing shared interface segment\n"); - goto error; + goto err_out; } if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) { @@ -1740,43 +2331,60 @@ "Failed to load firmware image '%s'\n", fw_name); ret = -ENOENT; - goto error; + } else { + /* Try to save a copy and then release the loaded firmware image */ + mcu_fw->size = firmware->size; + mcu_fw->data = vmalloc((unsigned long)mcu_fw->size); + + if (mcu_fw->data == NULL) { + ret = -ENOMEM; + } else { + memcpy(mcu_fw->data, firmware->data, mcu_fw->size); + dev_dbg(kbdev->dev, "Firmware image (%zu-bytes) retained in csf.fw\n", + mcu_fw->size); + } + + release_firmware(firmware); } - if (firmware->size < FIRMWARE_HEADER_LENGTH) { + /* If error in loading or saving the image, branches to error out */ + if (ret) + goto err_out; + + if (mcu_fw->size < FIRMWARE_HEADER_LENGTH) { dev_err(kbdev->dev, "Firmware too small\n"); ret = -EINVAL; - goto error; + goto err_out; } - if (memcmp(firmware->data, &magic, sizeof(magic)) != 0) { + if (memcmp(mcu_fw->data, &magic, sizeof(magic)) != 0) { dev_err(kbdev->dev, "Incorrect firmware magic\n"); ret = -EINVAL; - goto error; + goto err_out; } - version_minor = firmware->data[4]; - version_major = firmware->data[5]; + version_minor = mcu_fw->data[4]; + version_major = mcu_fw->data[5]; - if (version_major != FIRMWARE_HEADER_VERSION) { + if (version_major != FIRMWARE_HEADER_VERSION_MAJOR || + version_minor != FIRMWARE_HEADER_VERSION_MINOR) { dev_err(kbdev->dev, "Firmware header version %d.%d not understood\n", version_major, version_minor); ret = -EINVAL; - goto error; + goto err_out; } - memcpy(&version_hash, &firmware->data[8], sizeof(version_hash)); + memcpy(&version_hash, &mcu_fw->data[8], sizeof(version_hash)); dev_notice(kbdev->dev, "Loading Mali firmware 0x%x", version_hash); - memcpy(&entry_end_offset, &firmware->data[0x10], - sizeof(entry_end_offset)); + memcpy(&entry_end_offset, &mcu_fw->data[0x10], sizeof(entry_end_offset)); - if (entry_end_offset > firmware->size) { + if (entry_end_offset > mcu_fw->size) { dev_err(kbdev->dev, "Firmware image is truncated\n"); ret = -EINVAL; - goto error; + goto err_out; } entry_offset = FIRMWARE_HEADER_LENGTH; @@ -1784,15 +2392,14 @@ u32 header; unsigned int size; - memcpy(&header, &firmware->data[entry_offset], sizeof(header)); + memcpy(&header, &mcu_fw->data[entry_offset], sizeof(header)); size = entry_size(header); - ret = load_firmware_entry(kbdev, firmware, entry_offset, - header); + ret = load_firmware_entry(kbdev, mcu_fw, entry_offset, header); if (ret != 0) { dev_err(kbdev->dev, "Failed to load firmware image\n"); - goto error; + goto err_out; } entry_offset += size; } @@ -1800,72 +2407,84 @@ if (!kbdev->csf.shared_interface) { dev_err(kbdev->dev, "Shared interface region not found\n"); ret = -EINVAL; - goto error; + goto err_out; } else { ret = setup_shared_iface_static_region(kbdev); if (ret != 0) { dev_err(kbdev->dev, "Failed to insert a region for shared iface entry parsed from fw image\n"); - goto error; + goto err_out; } } ret = kbase_csf_firmware_trace_buffers_init(kbdev); if (ret != 0) { dev_err(kbdev->dev, "Failed to initialize trace buffers\n"); - goto error; + goto err_out; } /* Make sure L2 cache is powered up */ kbase_pm_wait_for_l2_powered(kbdev); /* Load the MMU tables into the selected address space */ - load_mmu_tables(kbdev); + ret = load_mmu_tables(kbdev); + if (ret != 0) + goto err_out; boot_csf_firmware(kbdev); ret = parse_capabilities(kbdev); if (ret != 0) - goto error; + goto err_out; ret = kbase_csf_doorbell_mapping_init(kbdev); if (ret != 0) - goto error; + goto err_out; ret = kbase_csf_scheduler_init(kbdev); if (ret != 0) - goto error; + goto err_out; ret = kbase_csf_setup_dummy_user_reg_page(kbdev); if (ret != 0) - goto error; + goto err_out; ret = kbase_csf_timeout_init(kbdev); if (ret != 0) - goto error; + goto err_out; ret = global_init_on_boot(kbdev); if (ret != 0) - goto error; + goto err_out; ret = kbase_csf_firmware_cfg_init(kbdev); if (ret != 0) - goto error; + goto err_out; + ret = kbase_device_csf_iterator_trace_init(kbdev); + if (ret != 0) + goto err_out; - /* Firmware loaded successfully */ - release_firmware(firmware); - KBASE_KTRACE_ADD(kbdev, FIRMWARE_BOOT, NULL, + ret = kbase_csf_firmware_log_init(kbdev); + if (ret != 0) { + dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret); + goto err_out; + } + + if (kbdev->csf.fw_core_dump.available) + kbase_csf_firmware_core_dump_init(kbdev); + + /* Firmware loaded successfully, ret = 0 */ + KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL, (((u64)version_hash) << 32) | (((u64)version_major) << 8) | version_minor); return 0; -error: - kbase_csf_firmware_term(kbdev); - release_firmware(firmware); +err_out: + kbase_csf_firmware_unload_term(kbdev); return ret; } -void kbase_csf_firmware_term(struct kbase_device *kbdev) +void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) { unsigned long flags; int ret = 0; @@ -1875,6 +2494,8 @@ ret = kbase_reset_gpu_wait(kbdev); WARN(ret, "failed to wait for GPU reset"); + + kbase_csf_firmware_log_term(kbdev); kbase_csf_firmware_cfg_term(kbdev); @@ -1917,17 +2538,25 @@ list_del(&interface->node); vunmap(interface->kernel_map); - if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) { - kbase_csf_protected_memory_free(kbdev, interface->pma, - interface->num_pages); - } else { - kbase_mem_pool_free_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - interface->num_pages, interface->phys, - true, false); + + if (!interface->reuse_pages) { + if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) { + kbase_csf_protected_memory_free( + kbdev, interface->pma, interface->num_pages_aligned, + interface->is_small_page); + } else { + kbase_mem_pool_free_pages( + kbase_mem_pool_group_select( + kbdev, KBASE_MEM_GROUP_CSF_FW, + interface->is_small_page), + interface->num_pages_aligned, + interface->phys, + true, false); + } + + kfree(interface->phys); } - kfree(interface->phys); kfree(interface); } @@ -1943,16 +2572,17 @@ kfree(metadata); } -#ifndef MALI_KBASE_BUILD - mali_kutf_fw_utf_entry_cleanup(kbdev); -#endif + if (kbdev->csf.fw.data) { + /* Free the copy of the firmware image */ + vfree(kbdev->csf.fw.data); + kbdev->csf.fw.data = NULL; + dev_dbg(kbdev->dev, "Free retained image csf.fw (%zu-bytes)\n", kbdev->csf.fw.size); + } /* This will also free up the region allocated for the shared interface * entry parsed from the firmware image. */ kbase_mcu_shared_interface_region_tracker_term(kbdev); - - mutex_destroy(&kbdev->csf.reg_lock); kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); @@ -1960,32 +2590,135 @@ kbdev->as_free |= MCU_AS_BITMASK; } +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) +int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr, + u32 const reg_val) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; + int err; + u32 glb_req; + + mutex_lock(&kbdev->csf.reg_lock); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + /* Set the address and value to write */ + kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr); + kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN1, reg_val); + + /* Set the Global Debug request for FW MCU write */ + glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); + glb_req ^= GLB_DEBUG_REQ_FW_AS_WRITE_MASK; + kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req, + GLB_DEBUG_REQ_FW_AS_WRITE_MASK); + + set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); + + /* Notify FW about the Global Debug request */ + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + + mutex_unlock(&kbdev->csf.reg_lock); + + dev_dbg(kbdev->dev, "w: reg %08x val %08x", reg_addr, reg_val); + + return err; +} + +int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr, + u32 *reg_val) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; + int err; + u32 glb_req; + + if (WARN_ON(reg_val == NULL)) + return -EINVAL; + + mutex_lock(&kbdev->csf.reg_lock); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + /* Set the address to read */ + kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr); + + /* Set the Global Debug request for FW MCU read */ + glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); + glb_req ^= GLB_DEBUG_REQ_FW_AS_READ_MASK; + kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req, + GLB_DEBUG_REQ_FW_AS_READ_MASK); + + set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); + + /* Notify FW about the Global Debug request */ + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + + if (!err) { + kbase_csf_scheduler_spin_lock(kbdev, &flags); + *reg_val = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ARG_OUT0); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + } + + mutex_unlock(&kbdev->csf.reg_lock); + + dev_dbg(kbdev->dev, "r: reg %08x val %08x", reg_addr, *reg_val); + + return err; +} + +int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr, + u32 const val_mask, u32 const reg_val) +{ + unsigned long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms) + jiffies; + u32 read_val; + + dev_dbg(kbdev->dev, "p: reg %08x val %08x mask %08x", reg_addr, reg_val, val_mask); + + while (time_before(jiffies, remaining)) { + int err = kbase_csf_firmware_mcu_register_read(kbdev, reg_addr, &read_val); + + if (err) { + dev_err(kbdev->dev, + "Error reading MCU register value (read_val = %u, expect = %u)\n", + read_val, reg_val); + return err; + } + + if ((read_val & val_mask) == reg_val) + return 0; + } + + dev_err(kbdev->dev, + "Timeout waiting for MCU register value to be set (read_val = %u, expect = %u)\n", + read_val, reg_val); + + return -ETIMEDOUT; +} +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) { struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; - const u32 glb_req = - kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); + const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); kbase_csf_scheduler_spin_lock_assert_held(kbdev); - /* The scheduler is assumed to only call the enable when its internal * state indicates that the idle timer has previously been disabled. So * on entry the expected field values are: * 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0 * 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0 */ - if (glb_req & GLB_REQ_IDLE_ENABLE_MASK) dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!"); - kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, - kbdev->csf.gpu_idle_dur_count); - - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, - GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK); - - dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", - kbdev->csf.gpu_idle_dur_count); + enable_gpu_idle_timer(kbdev); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); } @@ -2015,10 +2748,11 @@ kbase_csf_scheduler_spin_unlock(kbdev, flags); } -int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev) +int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms) { kbase_csf_firmware_ping(kbdev); - return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); + + return wait_for_global_request_with_timeout(kbdev, GLB_REQ_PING_MASK, wait_timeout_ms); } int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, @@ -2048,31 +2782,63 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) { struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; - unsigned long flags; - int err; - kbase_csf_scheduler_spin_lock(kbdev, &flags); + KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev); + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK); dev_dbg(kbdev->dev, "Sending request to enter protected mode"); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); - kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) +{ + int err; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); if (!err) { - unsigned long irq_flags; +#define WAIT_TIMEOUT 5000 /* 50ms timeout */ +#define DELAY_TIME_IN_US 10 + const int max_iterations = WAIT_TIMEOUT; + int loop; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->protected_mode = true; - kbase_ipa_protection_mode_switch_event(kbdev); - kbase_ipa_control_protm_entered(kbdev); + /* Wait for the GPU to actually enter protected mode */ + for (loop = 0; loop < max_iterations; loop++) { + unsigned long flags; + bool pmode_exited; - kbase_csf_scheduler_spin_lock(kbdev, &irq_flags); - kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface); - kbase_csf_scheduler_spin_unlock(kbdev, irq_flags); + if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & + GPU_STATUS_PROTECTED_MODE_ACTIVE) + break; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + /* Check if GPU already exited the protected mode */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + pmode_exited = + !kbase_csf_scheduler_protected_mode_in_use(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + if (pmode_exited) + break; + + udelay(DELAY_TIME_IN_US); + } + + if (loop == max_iterations) { + dev_err(kbdev->dev, "Timeout for actual pmode entry after PROTM_ENTER ack"); + err = -ETIMEDOUT; + } } + + if (unlikely(err)) { + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + } + + KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev); + + return err; } void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) @@ -2080,12 +2846,53 @@ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; unsigned long flags; + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + /* Validate there are no on-slot groups when sending the + * halt request to firmware. + */ + WARN_ON(kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev)); set_global_request(global_iface, GLB_REQ_HALT_MASK); dev_dbg(kbdev->dev, "Sending request to HALT MCU"); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); kbase_csf_scheduler_spin_unlock(kbdev, flags); } + +void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) +{ + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + + /* Trigger the boot of MCU firmware, Use the AUTO mode as + * otherwise on fast reset, to exit protected mode, MCU will + * not reboot by itself to enter normal mode. + */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO); +} + +#ifdef KBASE_PM_RUNTIME +void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; + + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + set_global_request(global_iface, GLB_REQ_SLEEP_MASK); + dev_dbg(kbdev->dev, "Sending sleep request to MCU"); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK) && + kbase_csf_firmware_mcu_halted(kbdev)); +} +#endif int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) { @@ -2095,6 +2902,7 @@ /* Ensure GPU is powered-up until we complete config update.*/ kbase_csf_scheduler_pm_active(kbdev); + kbase_csf_scheduler_wait_mcu_active(kbdev); /* The 'reg_lock' is also taken and is held till the update is * complete, to ensure the config update gets serialized. @@ -2234,7 +3042,7 @@ gpu_map_prot = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); cpu_map_prot = pgprot_writecombine(cpu_map_prot); - }; + } phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); if (!phys) @@ -2244,9 +3052,8 @@ if (!page_list) goto page_list_alloc_error; - ret = kbase_mem_pool_alloc_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - num_pages, phys, false); + ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, + phys, false, NULL); if (ret <= 0) goto phys_mem_pool_alloc_error; @@ -2257,8 +3064,8 @@ if (!cpu_addr) goto vmap_error; - va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, - num_pages, KBASE_REG_ZONE_MCU_SHARED); + va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages, + KBASE_REG_ZONE_MCU_SHARED); if (!va_reg) goto va_region_alloc_error; @@ -2272,9 +3079,9 @@ gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); gpu_map_properties |= gpu_map_prot; - ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, - va_reg->start_pfn, &phys[0], num_pages, - gpu_map_properties, KBASE_MEM_GROUP_CSF_FW); + ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, + &phys[0], num_pages, gpu_map_properties, + KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); if (ret) goto mmu_insert_pages_error; @@ -2288,7 +3095,7 @@ mmu_insert_pages_error: mutex_lock(&kbdev->csf.reg_lock); - kbase_remove_va_region(va_reg); + kbase_remove_va_region(kbdev, va_reg); va_region_add_error: kbase_free_alloced_region(va_reg); mutex_unlock(&kbdev->csf.reg_lock); @@ -2320,7 +3127,7 @@ { if (csf_mapping->va_reg) { mutex_lock(&kbdev->csf.reg_lock); - kbase_remove_va_region(csf_mapping->va_reg); + kbase_remove_va_region(kbdev, csf_mapping->va_reg); kbase_free_alloced_region(csf_mapping->va_reg); mutex_unlock(&kbdev->csf.reg_lock); } -- Gitblit v1.6.2