From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Mon, 11 Dec 2023 08:20:59 +0000
Subject: [PATCH] kernel_5.10 no rt

---
 kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c | 1401 ++++++++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 1,104 insertions(+), 297 deletions(-)

diff --git a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
index b51c0ff..42bff1e 100644
--- a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
+++ b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -21,40 +21,46 @@
 
 #include "mali_kbase.h"
 #include "mali_kbase_csf_firmware_cfg.h"
+#include "mali_kbase_csf_firmware_log.h"
+#include "mali_kbase_csf_firmware_core_dump.h"
 #include "mali_kbase_csf_trace_buffer.h"
 #include "mali_kbase_csf_timeout.h"
 #include "mali_kbase_mem.h"
+#include "mali_kbase_mem_pool_group.h"
 #include "mali_kbase_reset_gpu.h"
 #include "mali_kbase_ctx_sched.h"
 #include "mali_kbase_csf_scheduler.h"
+#include <mali_kbase_hwaccess_time.h>
 #include "device/mali_kbase_device.h"
 #include "backend/gpu/mali_kbase_pm_internal.h"
 #include "tl/mali_kbase_timeline_priv.h"
+#include "tl/mali_kbase_tracepoints.h"
 #include "mali_kbase_csf_tl_reader.h"
 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
-
+#include <csf/mali_kbase_csf_registers.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/firmware.h>
 #include <linux/mman.h>
 #include <linux/string.h>
 #include <linux/mutex.h>
+#include <linux/ctype.h>
 #if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE)
 #include <linux/set_memory.h>
 #endif
 #include <mmu/mali_kbase_mmu.h>
 #include <asm/arch_timer.h>
+#include <linux/delay.h>
 
 #define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20)
-
 
 static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin";
 module_param_string(fw_name, fw_name, sizeof(fw_name), 0644);
 MODULE_PARM_DESC(fw_name, "firmware image");
 
 /* The waiting time for firmware to boot */
-static unsigned int csf_firmware_boot_timeout_ms = 500;
+static unsigned int csf_firmware_boot_timeout_ms;
 module_param(csf_firmware_boot_timeout_ms, uint, 0444);
 MODULE_PARM_DESC(csf_firmware_boot_timeout_ms,
 		 "Maximum time to wait for firmware to boot.");
@@ -72,9 +78,11 @@
 	"Enables effective use of a debugger for debugging firmware code.");
 #endif
 
-#define FIRMWARE_HEADER_MAGIC    (0xC3F13A6Eul)
-#define FIRMWARE_HEADER_VERSION  (0ul)
-#define FIRMWARE_HEADER_LENGTH   (0x14ul)
+
+#define FIRMWARE_HEADER_MAGIC		(0xC3F13A6Eul)
+#define FIRMWARE_HEADER_VERSION_MAJOR	(0ul)
+#define FIRMWARE_HEADER_VERSION_MINOR	(3ul)
+#define FIRMWARE_HEADER_LENGTH		(0x14ul)
 
 #define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \
 	(CSF_FIRMWARE_ENTRY_READ | \
@@ -85,11 +93,13 @@
 	 CSF_FIRMWARE_ENTRY_ZERO | \
 	 CSF_FIRMWARE_ENTRY_CACHE_MODE)
 
-#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE     (0)
-#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1)
-#define CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST     (2)
-#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER  (3)
-#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4)
+#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE           (0)
+#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION       (1)
+#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER        (3)
+#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA   (4)
+#define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6)
+#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST      (7)
+#define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP           (9)
 
 #define CSF_FIRMWARE_CACHE_MODE_NONE              (0ul << 3)
 #define CSF_FIRMWARE_CACHE_MODE_CACHED            (1ul << 3)
@@ -100,11 +110,16 @@
 
 #define TL_METADATA_ENTRY_NAME_OFFSET (0x8)
 
+#define BUILD_INFO_METADATA_SIZE_OFFSET (0x4)
+#define BUILD_INFO_GIT_SHA_LEN (40U)
+#define BUILD_INFO_GIT_DIRTY_LEN (1U)
+#define BUILD_INFO_GIT_SHA_PATTERN "git_sha: "
+
 #define CSF_MAX_FW_STOP_LOOPS            (100000)
 
-#define CSF_GLB_REQ_CFG_MASK                                                   \
-	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |         \
-	 GLB_REQ_CFG_PWROFF_TIMER_MASK)
+#define CSF_GLB_REQ_CFG_MASK                                                                       \
+	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |                             \
+	 GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
 
 static inline u32 input_page_read(const u32 *const input, const u32 offset)
 {
@@ -155,8 +170,7 @@
 }
 
 /**
- * struct firmware_timeline_metadata -
- * Timeline metadata item within the MCU firmware
+ * struct firmware_timeline_metadata - Timeline metadata item within the MCU firmware
  *
  * @node: List head linking all timeline metadata to
  *        kbase_device:csf.firmware_timeline_metadata.
@@ -187,11 +201,13 @@
 	if (!interface)
 		return -EINVAL;
 
-	reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
-			interface->num_pages, KBASE_REG_ZONE_MCU_SHARED);
+	reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0,
+				      interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
 	if (reg) {
+		mutex_lock(&kbdev->csf.reg_lock);
 		ret = kbase_add_va_region_rbtree(kbdev, reg,
-				interface->virtual, interface->num_pages, 1);
+				interface->virtual, interface->num_pages_aligned, 1);
+		mutex_unlock(&kbdev->csf.reg_lock);
 		if (ret)
 			kfree(reg);
 		else
@@ -213,10 +229,11 @@
 	return (max_loops == 0) ? -1 : 0;
 }
 
-void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev)
+void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
 {
-	if (wait_mcu_status_value(kbdev, MCU_CNTRL_DISABLE) < 0)
-		dev_err(kbdev->dev, "MCU failed to get disabled");
+	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE);
 }
 
 static void wait_for_firmware_stop(struct kbase_device *kbdev)
@@ -225,6 +242,13 @@
 		/* This error shall go away once MIDJM-2371 is closed */
 		dev_err(kbdev->dev, "Firmware failed to stop");
 	}
+
+	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+}
+
+void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev)
+{
+	wait_for_firmware_stop(kbdev);
 }
 
 static void stop_csf_firmware(struct kbase_device *kbdev)
@@ -237,9 +261,14 @@
 
 static void wait_for_firmware_boot(struct kbase_device *kbdev)
 {
-	const long wait_timeout =
-		kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms);
+	long wait_timeout;
 	long remaining;
+
+	if (!csf_firmware_boot_timeout_ms)
+		csf_firmware_boot_timeout_ms =
+			kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_BOOT_TIMEOUT);
+
+	wait_timeout = kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms);
 
 	/* Firmware will generate a global interface interrupt once booting
 	 * is complete
@@ -257,22 +286,51 @@
 {
 	kbase_csf_firmware_enable_mcu(kbdev);
 
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+	kbase_debug_coresight_csf_state_request(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED);
+
+	if (!kbase_debug_coresight_csf_state_wait(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED))
+		dev_err(kbdev->dev, "Timeout waiting for CoreSight to be enabled");
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
 	wait_for_firmware_boot(kbdev);
 }
 
-static void wait_ready(struct kbase_device *kbdev)
+/**
+ * wait_ready() - Wait for previously issued MMU command to complete.
+ *
+ * @kbdev:        Kbase device to wait for a MMU command to complete.
+ *
+ * Reset GPU if the wait for previously issued command times out.
+ *
+ * Return:  0 on success, error code otherwise.
+ */
+static int wait_ready(struct kbase_device *kbdev)
 {
-	u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
-	u32 val;
+	const ktime_t wait_loop_start = ktime_get_raw();
+	const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms;
+	s64 diff;
 
-	val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS));
+	do {
+		unsigned int i;
 
-	/* Wait for a while for the update command to take effect */
-	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
-		val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS));
+		for (i = 0; i < 1000; i++) {
+			/* Wait for the MMU status to indicate there is no active command */
+			if (!(kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) &
+			      AS_STATUS_AS_ACTIVE))
+				return 0;
+		}
 
-	if (max_loops == 0)
-		dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n");
+		diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+	} while (diff < mmu_as_inactive_wait_time_ms);
+
+	dev_err(kbdev->dev,
+		"AS_ACTIVE bit stuck for MCU AS. Might be caused by unstable GPU clk/pwr or faulty system");
+
+	if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+		kbase_reset_gpu_locked(kbdev);
+
+	return -ETIMEDOUT;
 }
 
 static void unload_mmu_tables(struct kbase_device *kbdev)
@@ -287,7 +345,7 @@
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 }
 
-static void load_mmu_tables(struct kbase_device *kbdev)
+static int load_mmu_tables(struct kbase_device *kbdev)
 {
 	unsigned long irq_flags;
 
@@ -298,7 +356,7 @@
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 
 	/* Wait for a while for the update command to take effect */
-	wait_ready(kbdev);
+	return wait_ready(kbdev);
 }
 
 /**
@@ -405,74 +463,175 @@
 			memset(p + copy_len, 0, zi_len);
 		}
 
-		kbase_sync_single_for_device(kbdev, kbase_dma_addr(page),
-				PAGE_SIZE, DMA_TO_DEVICE);
+		kbase_sync_single_for_device(kbdev, kbase_dma_addr_from_tagged(phys[page_num]),
+					     PAGE_SIZE, DMA_TO_DEVICE);
 		kunmap_atomic(p);
 	}
 }
 
-static int reload_fw_data_sections(struct kbase_device *kbdev)
+static int reload_fw_image(struct kbase_device *kbdev)
 {
 	const u32 magic = FIRMWARE_HEADER_MAGIC;
 	struct kbase_csf_firmware_interface *interface;
-	const struct firmware *firmware;
+	struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw;
 	int ret = 0;
 
-	if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) {
-		dev_err(kbdev->dev,
-			"Failed to reload firmware image '%s'\n",
-			fw_name);
-		return -ENOENT;
-	}
-
-	/* Do couple of basic sanity checks */
-	if (firmware->size < FIRMWARE_HEADER_LENGTH) {
-		dev_err(kbdev->dev, "Firmware image unexpectedly too small\n");
+	if (WARN_ON(mcu_fw->data == NULL)) {
+		dev_err(kbdev->dev, "Firmware image copy not loaded\n");
 		ret = -EINVAL;
 		goto out;
 	}
 
-	if (memcmp(firmware->data, &magic, sizeof(magic)) != 0) {
+	/* Do a basic sanity check on MAGIC signature */
+	if (memcmp(mcu_fw->data, &magic, sizeof(magic)) != 0) {
 		dev_err(kbdev->dev, "Incorrect magic value, firmware image could have been corrupted\n");
 		ret = -EINVAL;
 		goto out;
 	}
 
 	list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
-		/* Skip reload of text & read only data sections */
-		if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) ||
-		    !(interface->flags & CSF_FIRMWARE_ENTRY_WRITE))
-			continue;
+		/* Dont skip re-loading any section if full reload was requested */
+		if (!kbdev->csf.firmware_full_reload_needed) {
+			/* Skip reload of text & read only data sections */
+			if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) ||
+			    !(interface->flags & CSF_FIRMWARE_ENTRY_WRITE))
+				continue;
+		}
 
-		load_fw_image_section(kbdev, firmware->data, interface->phys,
-			interface->num_pages, interface->flags,
-			interface->data_start, interface->data_end);
+		load_fw_image_section(kbdev, mcu_fw->data, interface->phys, interface->num_pages,
+				      interface->flags, interface->data_start, interface->data_end);
 	}
 
-	kbase_csf_firmware_reload_trace_buffers_data(kbdev);
+	kbdev->csf.firmware_full_reload_needed = false;
 
+	kbase_csf_firmware_reload_trace_buffers_data(kbdev);
 out:
-	release_firmware(firmware);
 	return ret;
 }
 
 /**
+ * entry_find_large_page_to_reuse() - Find if the large page of previously parsed
+ *                                    FW interface entry can be reused to store
+ *                                    the contents of new FW interface entry.
+ *
+ * @kbdev: Kbase device structure
+ * @virtual_start: Start of the virtual address range required for an entry allocation
+ * @virtual_end: End of the virtual address range required for an entry allocation
+ * @flags: Firmware entry flags for comparison with the reusable pages found
+ * @phys: Pointer to the array of physical (tagged) addresses making up the new
+ *        FW interface entry. It is an output parameter which would be made to
+ *        point to an already existing array allocated for the previously parsed
+ *        FW interface entry using large page(s). If no appropriate entry is
+ *        found it is set to NULL.
+ * @pma:  Pointer to a protected memory allocation. It is an output parameter
+ *        which would be made to the protected memory allocation of a previously
+ *        parsed FW interface entry using large page(s) from protected memory.
+ *        If no appropriate entry is found it is set to NULL.
+ * @num_pages: Number of pages requested.
+ * @num_pages_aligned: This is an output parameter used to carry the number of 4KB pages
+ *                     within the 2MB pages aligned allocation.
+ * @is_small_page: This is an output flag used to select between the small and large page
+ *                 to be used for the FW entry allocation.
+ *
+ * Go through all the already initialized interfaces and find if a previously
+ * allocated large page can be used to store contents of new FW interface entry.
+ *
+ * Return: true if a large page can be reused, false otherwise.
+ */
+static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev,
+						  const u32 virtual_start, const u32 virtual_end,
+						  const u32 flags, struct tagged_addr **phys,
+						  struct protected_memory_allocation ***pma,
+						  u32 num_pages, u32 *num_pages_aligned,
+						  bool *is_small_page)
+{
+	struct kbase_csf_firmware_interface *interface = NULL;
+	struct kbase_csf_firmware_interface *target_interface = NULL;
+	u32 virtual_diff_min = U32_MAX;
+	bool reuse_large_page = false;
+
+	CSTD_UNUSED(interface);
+	CSTD_UNUSED(target_interface);
+	CSTD_UNUSED(virtual_diff_min);
+
+	*num_pages_aligned = num_pages;
+	*is_small_page = true;
+	*phys = NULL;
+	*pma = NULL;
+
+
+	/* If the section starts at 2MB aligned boundary,
+	 * then use 2MB page(s) for it.
+	 */
+	if (!(virtual_start & (SZ_2M - 1))) {
+		*num_pages_aligned =
+			round_up(*num_pages_aligned, NUM_4K_PAGES_IN_2MB_PAGE);
+		*is_small_page = false;
+		goto out;
+	}
+
+	/* If the section doesn't lie within the same 2MB aligned boundary,
+	 * then use 4KB pages as it would be complicated to use a 2MB page
+	 * for such section.
+	 */
+	if ((virtual_start & ~(SZ_2M - 1)) != (virtual_end & ~(SZ_2M - 1)))
+		goto out;
+
+	/* Find the nearest 2MB aligned section which comes before the current
+	 * section.
+	 */
+	list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
+		const u32 virtual_diff = virtual_start - interface->virtual;
+
+		if (interface->virtual > virtual_end)
+			continue;
+
+		if (interface->virtual & (SZ_2M - 1))
+			continue;
+
+		if ((virtual_diff < virtual_diff_min) && (interface->flags == flags)) {
+			target_interface = interface;
+			virtual_diff_min = virtual_diff;
+		}
+	}
+
+	if (target_interface) {
+		const u32 page_index = virtual_diff_min >> PAGE_SHIFT;
+
+		if (page_index >= target_interface->num_pages_aligned)
+			goto out;
+
+		if (target_interface->phys)
+			*phys = &target_interface->phys[page_index];
+
+		if (target_interface->pma)
+			*pma = &target_interface->pma[page_index / NUM_4K_PAGES_IN_2MB_PAGE];
+
+		*is_small_page = false;
+		reuse_large_page = true;
+	}
+
+out:
+	return reuse_large_page;
+}
+
+/**
  * parse_memory_setup_entry() - Process an "interface memory setup" section
+ *
+ * @kbdev: Kbase device structure
+ * @fw: The firmware image containing the section
+ * @entry: Pointer to the start of the section
+ * @size: Size (in bytes) of the section
  *
  * Read an "interface memory setup" section from the firmware image and create
  * the necessary memory region including the MMU page tables. If successful
  * the interface will be added to the kbase_device:csf.firmware_interfaces list.
  *
  * Return: 0 if successful, negative error code on failure
- *
- * @kbdev: Kbase device structure
- * @fw: The firmware image containing the section
- * @entry: Pointer to the start of the section
- * @size: Size (in bytes) of the section
  */
 static int parse_memory_setup_entry(struct kbase_device *kbdev,
-		const struct firmware *fw,
-		const u32 *entry, unsigned int size)
+				    const struct kbase_csf_mcu_fw *const fw, const u32 *entry,
+				    unsigned int size)
 {
 	int ret = 0;
 	const u32 flags = entry[0];
@@ -481,13 +640,19 @@
 	const u32 data_start = entry[3];
 	const u32 data_end = entry[4];
 	u32 num_pages;
+	u32 num_pages_aligned;
 	char *name;
+	void *name_entry;
+	unsigned int name_len;
 	struct tagged_addr *phys = NULL;
 	struct kbase_csf_firmware_interface *interface = NULL;
 	bool allocated_pages = false, protected_mode = false;
 	unsigned long mem_flags = 0;
 	u32 cache_mode = 0;
 	struct protected_memory_allocation **pma = NULL;
+	bool reuse_pages = false;
+	bool is_small_page = true;
+	bool ignore_page_migration = true;
 
 	if (data_end < data_start) {
 		dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n",
@@ -522,7 +687,7 @@
 		protected_mode = true;
 
 	if (protected_mode && kbdev->csf.pma_dev == NULL) {
-		dev_err(kbdev->dev,
+		dev_dbg(kbdev->dev,
 			"Protected memory allocator not found, Firmware protected mode entry will not be supported");
 		return 0;
 	}
@@ -530,23 +695,38 @@
 	num_pages = (virtual_end - virtual_start)
 		>> PAGE_SHIFT;
 
-	phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
+	reuse_pages =
+		entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags, &phys,
+					       &pma, num_pages, &num_pages_aligned, &is_small_page);
+	if (!reuse_pages)
+		phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL);
+
 	if (!phys)
 		return -ENOMEM;
 
 	if (protected_mode) {
-		pma = kbase_csf_protected_memory_alloc(kbdev, phys, num_pages);
-
-		if (pma == NULL) {
-			ret = -ENOMEM;
-			goto out;
+		if (!reuse_pages) {
+			pma = kbase_csf_protected_memory_alloc(
+				kbdev, phys, num_pages_aligned, is_small_page);
 		}
+
+		if (!pma)
+			ret = -ENOMEM;
 	} else {
-		ret = kbase_mem_pool_alloc_pages(
-			&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-			num_pages, phys, false);
-		if (ret < 0)
-			goto out;
+		if (!reuse_pages) {
+			ret = kbase_mem_pool_alloc_pages(
+				kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW,
+							    is_small_page),
+				num_pages_aligned, phys, false, NULL);
+			ignore_page_migration = false;
+		}
+	}
+
+	if (ret < 0) {
+		dev_err(kbdev->dev,
+			"Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x\n",
+			num_pages_aligned, virtual_start);
+		goto out;
 	}
 
 	allocated_pages = true;
@@ -554,31 +734,42 @@
 			data_start, data_end);
 
 	/* Allocate enough memory for the struct kbase_csf_firmware_interface and
-	 * the name of the interface. An extra byte is allocated to place a
-	 * NUL-terminator in. This should already be included according to the
-	 * specification but here we add it anyway to be robust against a
-	 * corrupt firmware image.
+	 * the name of the interface.
 	 */
-	interface = kmalloc(sizeof(*interface) +
-			size - INTERFACE_ENTRY_NAME_OFFSET + 1, GFP_KERNEL);
+	name_entry = (void *)entry + INTERFACE_ENTRY_NAME_OFFSET;
+	name_len = strnlen(name_entry, size - INTERFACE_ENTRY_NAME_OFFSET);
+	if (size < (INTERFACE_ENTRY_NAME_OFFSET + name_len + 1 + sizeof(u32))) {
+		dev_err(kbdev->dev, "Memory setup entry too short to contain virtual_exe_start");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	interface = kmalloc(sizeof(*interface) + name_len + 1, GFP_KERNEL);
 	if (!interface) {
 		ret = -ENOMEM;
 		goto out;
 	}
 	name = (void *)(interface + 1);
-	memcpy(name, entry + (INTERFACE_ENTRY_NAME_OFFSET / sizeof(*entry)),
-			size - INTERFACE_ENTRY_NAME_OFFSET);
-	name[size - INTERFACE_ENTRY_NAME_OFFSET] = 0;
+	memcpy(name, name_entry, name_len);
+	name[name_len] = 0;
 
 	interface->name = name;
 	interface->phys = phys;
+	interface->reuse_pages = reuse_pages;
+	interface->is_small_page = is_small_page;
 	interface->num_pages = num_pages;
+	interface->num_pages_aligned = num_pages_aligned;
 	interface->virtual = virtual_start;
 	interface->kernel_map = NULL;
 	interface->flags = flags;
 	interface->data_start = data_start;
 	interface->data_end = data_end;
 	interface->pma = pma;
+
+	/* Discover the virtual execution address field after the end of the name
+	 * field taking into account the NULL-termination character.
+	 */
+	interface->virtual_exe_start = *((u32 *)(name_entry + name_len + 1));
 
 	mem_flags = convert_mem_flags(kbdev, flags, &cache_mode);
 
@@ -633,15 +824,19 @@
 
 	list_add(&interface->node, &kbdev->csf.firmware_interfaces);
 
-	ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
-			virtual_start >> PAGE_SHIFT, phys, num_pages, mem_flags,
-			KBASE_MEM_GROUP_CSF_FW);
+	if (!reuse_pages) {
+		ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
+						      virtual_start >> PAGE_SHIFT, phys,
+						      num_pages_aligned, mem_flags,
+						      KBASE_MEM_GROUP_CSF_FW, NULL, NULL,
+						      ignore_page_migration);
 
-	if (ret != 0) {
-		dev_err(kbdev->dev, "Failed to insert firmware pages\n");
-		/* The interface has been added to the list, so cleanup will
-		 * be handled by firmware unloading
-		 */
+		if (ret != 0) {
+			dev_err(kbdev->dev, "Failed to insert firmware pages\n");
+			/* The interface has been added to the list, so cleanup will
+			 * be handled by firmware unloading
+			 */
+		}
 	}
 
 	dev_dbg(kbdev->dev, "Processed section '%s'", name);
@@ -650,16 +845,22 @@
 
 out:
 	if (allocated_pages) {
-		if (protected_mode) {
-			kbase_csf_protected_memory_free(kbdev, pma, num_pages);
-		} else {
-			kbase_mem_pool_free_pages(
-				&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-				num_pages, phys, false, false);
+		if (!reuse_pages) {
+			if (protected_mode) {
+				kbase_csf_protected_memory_free(
+					kbdev, pma, num_pages_aligned, is_small_page);
+			} else {
+				kbase_mem_pool_free_pages(
+					kbase_mem_pool_group_select(
+						kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page),
+					num_pages_aligned, phys, false, false);
+			}
 		}
 	}
 
-	kfree(phys);
+	if (!reuse_pages)
+		kfree(phys);
+
 	kfree(interface);
 	return ret;
 }
@@ -675,7 +876,8 @@
  * @size:  Size (in bytes) of the section
  */
 static int parse_timeline_metadata_entry(struct kbase_device *kbdev,
-	const struct firmware *fw, const u32 *entry, unsigned int size)
+					 const struct kbase_csf_mcu_fw *const fw, const u32 *entry,
+					 unsigned int size)
 {
 	const u32 data_start = entry[0];
 	const u32 data_size = entry[1];
@@ -718,7 +920,63 @@
 }
 
 /**
+ * parse_build_info_metadata_entry() - Process a "build info metadata" section
+ * @kbdev: Kbase device structure
+ * @fw:    Firmware image containing the section
+ * @entry: Pointer to the section
+ * @size:  Size (in bytes) of the section
+ *
+ * This prints the git SHA of the firmware on frimware load.
+ *
+ * Return: 0 if successful, negative error code on failure
+ */
+static int parse_build_info_metadata_entry(struct kbase_device *kbdev,
+					   const struct kbase_csf_mcu_fw *const fw,
+					   const u32 *entry, unsigned int size)
+{
+	const u32 meta_start_addr = entry[0];
+	char *ptr = NULL;
+	size_t sha_pattern_len = strlen(BUILD_INFO_GIT_SHA_PATTERN);
+
+	/* Only print git SHA to avoid releasing sensitive information */
+	ptr = strstr(fw->data + meta_start_addr, BUILD_INFO_GIT_SHA_PATTERN);
+	/* Check that we won't overrun the found string  */
+	if (ptr &&
+	    strlen(ptr) >= BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + sha_pattern_len) {
+		char git_sha[BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + 1];
+		int i = 0;
+
+		/* Move ptr to start of SHA */
+		ptr += sha_pattern_len;
+		for (i = 0; i < BUILD_INFO_GIT_SHA_LEN; i++) {
+			/* Ensure that the SHA is made up of hex digits */
+			if (!isxdigit(ptr[i]))
+				break;
+
+			git_sha[i] = ptr[i];
+		}
+
+		/* Check if the next char indicates git SHA is dirty */
+		if (ptr[i] == ' ' || ptr[i] == '+') {
+			git_sha[i] = ptr[i];
+			i++;
+		}
+		git_sha[i] = '\0';
+
+		dev_info(kbdev->dev, "Mali firmware git_sha: %s\n", git_sha);
+	} else
+		dev_info(kbdev->dev, "Mali firmware git_sha not found or invalid\n");
+
+	return 0;
+}
+
+/**
  * load_firmware_entry() - Process an entry from a firmware image
+ *
+ * @kbdev:  Kbase device
+ * @fw:     Firmware image containing the entry
+ * @offset: Byte offset within the image of the entry to load
+ * @header: Header word of the entry
  *
  * Read an entry from a firmware image and do any necessary work (e.g. loading
  * the data into page accessible to the MCU).
@@ -727,15 +985,9 @@
  * otherwise the function will fail with -EINVAL
  *
  * Return: 0 if successful, negative error code on failure
- *
- * @kbdev:  Kbase device
- * @fw:     Firmware image containing the entry
- * @offset: Byte offset within the image of the entry to load
- * @header: Header word of the entry
  */
-static int load_firmware_entry(struct kbase_device *kbdev,
-		const struct firmware *fw,
-		u32 offset, u32 header)
+static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_csf_mcu_fw *const fw,
+			       u32 offset, u32 header)
 {
 	const unsigned int type = entry_type(header);
 	unsigned int size = entry_size(header);
@@ -780,18 +1032,6 @@
 		}
 		return kbase_csf_firmware_cfg_option_entry_parse(
 			kbdev, fw, entry, size, updatable);
-	case CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST:
-#ifndef MALI_KBASE_BUILD
-		/* FW UTF option */
-		if (size < 2*sizeof(*entry)) {
-			dev_err(kbdev->dev, "FW UTF entry too short (size=%u)\n",
-					size);
-			return -EINVAL;
-		}
-		return mali_kutf_process_fw_utf_entry(kbdev, fw->data,
-						      fw->size, entry);
-#endif
-		break;
 	case CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER:
 		/* Trace buffer */
 		if (size < TRACE_BUFFER_ENTRY_NAME_OFFSET + sizeof(*entry)) {
@@ -809,13 +1049,35 @@
 			return -EINVAL;
 		}
 		return parse_timeline_metadata_entry(kbdev, fw, entry, size);
-	}
-
-	if (!optional) {
-		dev_err(kbdev->dev,
-			"Unsupported non-optional entry type %u in firmware\n",
-			type);
-		return -EINVAL;
+	case CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA:
+		if (size < BUILD_INFO_METADATA_SIZE_OFFSET + sizeof(*entry)) {
+			dev_err(kbdev->dev, "Build info metadata entry too short (size=%u)\n",
+				size);
+			return -EINVAL;
+		}
+		return parse_build_info_metadata_entry(kbdev, fw, entry, size);
+	case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST:
+		/* Function call list section */
+		if (size < FUNC_CALL_LIST_ENTRY_NAME_OFFSET + sizeof(*entry)) {
+			dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n",
+				size);
+			return -EINVAL;
+		}
+		kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry);
+		return 0;
+	case CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP:
+		/* Core Dump section */
+		if (size < CORE_DUMP_ENTRY_START_ADDR_OFFSET + sizeof(*entry)) {
+			dev_err(kbdev->dev, "FW Core dump entry too short (size=%u)\n", size);
+			return -EINVAL;
+		}
+		return kbase_csf_firmware_core_dump_entry_parse(kbdev, entry);
+	default:
+		if (!optional) {
+			dev_err(kbdev->dev, "Unsupported non-optional entry type %u in firmware\n",
+				type);
+			return -EINVAL;
+		}
 	}
 
 	return 0;
@@ -994,11 +1256,10 @@
 	iface->group_stride = shared_info[GLB_GROUP_STRIDE/4];
 	iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4];
 
-	if (iface->version >= kbase_csf_interface_version(1, 1, 0)) {
+	if (iface->version >= kbase_csf_interface_version(1, 1, 0))
 		iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4];
-	} else {
+	else
 		iface->instr_features = 0;
-	}
 
 	if ((GROUP_CONTROL_0 +
 		(unsigned long)iface->group_num * iface->group_stride) >
@@ -1033,40 +1294,80 @@
 	return 0;
 }
 
+static inline void access_firmware_memory_common(struct kbase_device *kbdev,
+		struct kbase_csf_firmware_interface *interface, u32 offset_bytes,
+		u32 *value, const bool read)
+{
+	u32 page_num = offset_bytes >> PAGE_SHIFT;
+	u32 offset_in_page = offset_bytes & ~PAGE_MASK;
+	struct page *target_page = as_page(interface->phys[page_num]);
+	uintptr_t cpu_addr = (uintptr_t)kmap_atomic(target_page);
+	u32 *addr = (u32 *)(cpu_addr + offset_in_page);
+
+	if (read) {
+		kbase_sync_single_for_device(kbdev,
+			kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page,
+			sizeof(u32), DMA_BIDIRECTIONAL);
+		*value = *addr;
+	} else {
+		*addr = *value;
+		kbase_sync_single_for_device(kbdev,
+			kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page,
+			sizeof(u32), DMA_BIDIRECTIONAL);
+	}
+
+	kunmap_atomic((u32 *)cpu_addr);
+}
+
 static inline void access_firmware_memory(struct kbase_device *kbdev,
 	u32 gpu_addr, u32 *value, const bool read)
 {
-	struct kbase_csf_firmware_interface *interface;
+	struct kbase_csf_firmware_interface *interface, *access_interface = NULL;
+	u32 offset_bytes = 0;
 
 	list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
 		if ((gpu_addr >= interface->virtual) &&
 			(gpu_addr < interface->virtual + (interface->num_pages << PAGE_SHIFT))) {
-			u32 offset_bytes = gpu_addr - interface->virtual;
-			u32 page_num = offset_bytes >> PAGE_SHIFT;
-			u32 offset_in_page = offset_bytes & ~PAGE_MASK;
-			struct page *target_page = as_page(
-				interface->phys[page_num]);
-			u32 *cpu_addr = kmap_atomic(target_page);
-
-			if (read) {
-				kbase_sync_single_for_device(kbdev,
-					kbase_dma_addr(target_page) + offset_in_page,
-					sizeof(u32), DMA_BIDIRECTIONAL);
-
-				*value = cpu_addr[offset_in_page >> 2];
-			} else {
-				cpu_addr[offset_in_page >> 2] = *value;
-
-				kbase_sync_single_for_device(kbdev,
-					kbase_dma_addr(target_page) + offset_in_page,
-					sizeof(u32), DMA_BIDIRECTIONAL);
-			}
-
-			kunmap_atomic(cpu_addr);
-			return;
+			offset_bytes = gpu_addr - interface->virtual;
+			access_interface = interface;
+			break;
 		}
 	}
-	dev_warn(kbdev->dev, "Invalid GPU VA %x passed\n", gpu_addr);
+
+	if (access_interface)
+		access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read);
+	else
+		dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr);
+}
+
+static inline void access_firmware_memory_exe(struct kbase_device *kbdev,
+	u32 gpu_addr, u32 *value, const bool read)
+{
+	struct kbase_csf_firmware_interface *interface, *access_interface = NULL;
+	u32 offset_bytes = 0;
+
+	list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
+		if ((gpu_addr >= interface->virtual_exe_start) &&
+			(gpu_addr < interface->virtual_exe_start +
+				(interface->num_pages << PAGE_SHIFT))) {
+			offset_bytes = gpu_addr - interface->virtual_exe_start;
+			access_interface = interface;
+
+			/* If there's an overlap in execution address range between a moved and a
+			 * non-moved areas, always prefer the moved one. The idea is that FW may
+			 * move sections around during init time, but after the layout is settled,
+			 * any moved sections are going to override non-moved areas at the same
+			 * location.
+			 */
+			if (interface->virtual_exe_start != interface->virtual)
+				break;
+		}
+	}
+
+	if (access_interface)
+		access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read);
+	else
+		dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr);
 }
 
 void kbase_csf_read_firmware_memory(struct kbase_device *kbdev,
@@ -1079,6 +1380,18 @@
 	u32 gpu_addr, u32 value)
 {
 	access_firmware_memory(kbdev, gpu_addr, &value, false);
+}
+
+void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev,
+	u32 gpu_addr, u32 *value)
+{
+	access_firmware_memory_exe(kbdev, gpu_addr, value, true);
+}
+
+void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev,
+	u32 gpu_addr, u32 value)
+{
+	access_firmware_memory_exe(kbdev, gpu_addr, &value, false);
 }
 
 void kbase_csf_firmware_cs_input(
@@ -1166,6 +1479,7 @@
 	dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val);
 	return val;
 }
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output);
 
 void kbase_csf_firmware_global_input(
 	const struct kbase_csf_global_iface *const iface, const u32 offset,
@@ -1176,6 +1490,7 @@
 	dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value);
 	input_page_write(iface->input, offset, value);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input);
 
 void kbase_csf_firmware_global_input_mask(
 	const struct kbase_csf_global_iface *const iface, const u32 offset,
@@ -1187,6 +1502,7 @@
 			offset, value, mask);
 	input_page_partial_write(iface->input, offset, value, mask);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask);
 
 u32 kbase_csf_firmware_global_input_read(
 	const struct kbase_csf_global_iface *const iface, const u32 offset)
@@ -1207,6 +1523,27 @@
 	dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val);
 	return val;
 }
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output);
+
+/**
+ * csf_doorbell_offset() - Calculate the offset to the CSF host doorbell
+ * @doorbell_nr: Doorbell number
+ *
+ * Return: CSF host register offset for the specified doorbell number.
+ */
+static u32 csf_doorbell_offset(int doorbell_nr)
+{
+	WARN_ON(doorbell_nr < 0);
+	WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL);
+
+	return CSF_HW_DOORBELL_PAGE_OFFSET + (doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE);
+}
+
+void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr)
+{
+	kbase_reg_write(kbdev, csf_doorbell_offset(doorbell_nr), (u32)1);
+}
+EXPORT_SYMBOL(kbase_csf_ring_doorbell);
 
 /**
  * handle_internal_firmware_fatal - Handler for CS internal firmware fault.
@@ -1292,11 +1629,10 @@
 	return complete;
 }
 
-static int wait_for_global_request(struct kbase_device *const kbdev,
-				   u32 const req_mask)
+static int wait_for_global_request_with_timeout(struct kbase_device *const kbdev,
+						u32 const req_mask, unsigned int timeout_ms)
 {
-	const long wait_timeout =
-		kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
+	const long wait_timeout = kbase_csf_timeout_in_jiffies(timeout_ms);
 	long remaining;
 	int err = 0;
 
@@ -1305,12 +1641,19 @@
 				       wait_timeout);
 
 	if (!remaining) {
-		dev_warn(kbdev->dev, "Timed out waiting for global request %x to complete",
-			 req_mask);
+		dev_warn(kbdev->dev,
+			 "[%llu] Timeout (%d ms) waiting for global request %x to complete",
+			 kbase_backend_get_cycle_cnt(kbdev), timeout_ms, req_mask);
 		err = -ETIMEDOUT;
+
 	}
 
 	return err;
+}
+
+static int wait_for_global_request(struct kbase_device *const kbdev, u32 const req_mask)
+{
+	return wait_for_global_request_with_timeout(kbdev, req_mask, kbdev->csf.fw_timeout_ms);
 }
 
 static void set_global_request(
@@ -1371,16 +1714,113 @@
 	set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
 }
 
+static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
+					kbdev->csf.gpu_idle_dur_count);
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
+					     GLB_REQ_IDLE_ENABLE_MASK);
+	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
+		kbdev->csf.gpu_idle_dur_count);
+}
+
+static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	bool complete = false;
+	unsigned long flags;
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+	if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) ==
+	    (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask))
+		complete = true;
+
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	return complete;
+}
+
+static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface,
+				     u32 const req_mask)
+{
+	u32 glb_debug_req;
+
+	kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev);
+
+	glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+	glb_debug_req ^= req_mask;
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask);
+}
+
+static void request_fw_core_dump(
+	const struct kbase_csf_global_iface *const global_iface)
+{
+	uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP);
+
+	set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode);
+
+	set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+}
+
+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev)
+{
+	const struct kbase_csf_global_iface *const global_iface =
+		&kbdev->csf.global_iface;
+	unsigned long flags;
+	int ret;
+
+	/* Serialize CORE_DUMP requests. */
+	mutex_lock(&kbdev->csf.reg_lock);
+
+	/* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	request_fw_core_dump(global_iface);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	/* Wait for firmware to acknowledge completion of the CORE_DUMP request. */
+	ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+	if (!ret)
+		WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK));
+
+	mutex_unlock(&kbdev->csf.reg_lock);
+
+	return ret;
+}
+
+/**
+ * kbasep_enable_rtu - Enable Ray Tracing Unit on powering up shader core
+ *
+ * @kbdev:     The kbase device structure of the device
+ *
+ * This function needs to be called to enable the Ray Tracing Unit
+ * by writing SHADER_PWRFEATURES only when host controls shader cores power.
+ */
+static void kbasep_enable_rtu(struct kbase_device *kbdev)
+{
+	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+
+	if (gpu_id < GPU_ID2_PRODUCT_MAKE(12, 8, 3, 0))
+		return;
+
+	if (kbdev->csf.firmware_hctl_core_pwr)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_PWRFEATURES), 1);
+}
+
 static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 {
-	u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
-				 GLB_ACK_IRQ_MASK_PING_MASK |
-				 GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
-				 GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
-				 GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
-				 GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
-				 GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
-				 GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
+	u32 const ack_irq_mask =
+		GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK |
+		GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
+		GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
+		GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
+		GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
 
 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -1388,10 +1828,7 @@
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 
-	/* Set the coherency mode for protected mode execution */
-	WARN_ON(kbdev->system_coherency == COHERENCY_ACE);
-	kbase_csf_firmware_global_input(global_iface, GLB_PROTM_COHERENCY,
-					kbdev->system_coherency);
+	kbasep_enable_rtu(kbdev);
 
 	/* Update shader core allocation enable mask */
 	enable_endpoints_global(global_iface, core_mask);
@@ -1399,9 +1836,23 @@
 
 	set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
 
+	/* The GPU idle timer is always enabled for simplicity. Checks will be
+	 * done before scheduling the GPU idle worker to see if it is
+	 * appropriate for the current power policy.
+	 */
+	enable_gpu_idle_timer(kbdev);
+
 	/* Unmask the interrupts */
 	kbase_csf_firmware_global_input(global_iface,
 		GLB_ACK_IRQ_MASK, ack_irq_mask);
+
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+	/* Enable FW MCU read/write debug interfaces */
+	kbase_csf_firmware_global_input_mask(
+		global_iface, GLB_DEBUG_ACK_IRQ_MASK,
+		GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK,
+		GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
 
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 
@@ -1482,8 +1933,7 @@
 }
 
 /**
- * kbase_csf_firmware_reload_worker() -
- * reload the fw image and re-enable the MCU
+ * kbase_csf_firmware_reload_worker() - reload the fw image and re-enable the MCU
  * @work: CSF Work item for reloading the firmware.
  *
  * This helper function will reload the firmware image and re-enable the MCU.
@@ -1503,8 +1953,10 @@
 
 	dev_info(kbdev->dev, "reloading firmware");
 
+	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
 	/* Reload just the data sections from firmware binary image */
-	err = reload_fw_data_sections(kbdev);
+	err = reload_fw_image(kbdev);
 	if (err)
 		return;
 
@@ -1545,19 +1997,19 @@
 	if (version != kbdev->csf.global_iface.version)
 		dev_err(kbdev->dev, "Version check failed in firmware reboot.");
 
-	KBASE_KTRACE_ADD(kbdev, FIRMWARE_REBOOT, NULL, 0u);
+	KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_REBOOT, NULL, 0u);
 
 	/* Tell MCU state machine to transit to next state */
 	kbdev->csf.firmware_reloaded = true;
 	kbase_pm_update_state(kbdev);
 }
 
-static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms)
+static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us)
 {
 #define HYSTERESIS_VAL_UNIT_SHIFT (10)
 	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
 	u64 freq = arch_timer_get_cntfrq();
-	u64 dur_val = dur_ms;
+	u64 dur_val = dur_us;
 	u32 cnt_val_u32, reg_val_u32;
 	bool src_system_timestamp = freq > 0;
 
@@ -1570,13 +2022,14 @@
 			dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!");
 		spin_unlock(&kbdev->pm.clk_rtm.lock);
 
-		dev_info(kbdev->dev, "Can't get the timestamp frequency, "
-			 "use cycle counter format with firmware idle hysteresis!");
+		dev_info(
+			kbdev->dev,
+			"Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!");
 	}
 
-	/* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */
+	/* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */
 	dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
-	dur_val = div_u64(dur_val, 1000);
+	dur_val = div_u64(dur_val, 1000000);
 
 	/* Interface limits the value field to S32_MAX */
 	cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
@@ -1595,7 +2048,14 @@
 
 u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
 {
-	return kbdev->csf.gpu_idle_hysteresis_ms;
+	unsigned long flags;
+	u32 dur;
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	dur = kbdev->csf.gpu_idle_hysteresis_us;
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	return dur;
 }
 
 u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
@@ -1603,11 +2063,53 @@
 	unsigned long flags;
 	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
 
-	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	kbdev->csf.gpu_idle_hysteresis_ms = dur;
-	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
-	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	/* The 'fw_load_lock' is taken to synchronize against the deferred
+	 * loading of FW, where the idle timer will be enabled.
+	 */
+	mutex_lock(&kbdev->fw_load_lock);
+	if (unlikely(!kbdev->csf.firmware_inited)) {
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		kbdev->csf.gpu_idle_hysteresis_us = dur;
+		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+		mutex_unlock(&kbdev->fw_load_lock);
+		goto end;
+	}
+	mutex_unlock(&kbdev->fw_load_lock);
 
+	kbase_csf_scheduler_pm_active(kbdev);
+	if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
+		dev_err(kbdev->dev,
+			"Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
+		kbase_csf_scheduler_pm_idle(kbdev);
+		return kbdev->csf.gpu_idle_dur_count;
+	}
+
+	/* The 'reg_lock' is also taken and is held till the update is not
+	 * complete, to ensure the update of idle timer value by multiple Users
+	 * gets serialized.
+	 */
+	mutex_lock(&kbdev->csf.reg_lock);
+	/* The firmware only reads the new idle timer value when the timer is
+	 * disabled.
+	 */
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	/* Ensure that the request has taken effect */
+	wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	kbdev->csf.gpu_idle_hysteresis_us = dur;
+	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
+	mutex_unlock(&kbdev->csf.reg_lock);
+
+	kbase_csf_scheduler_pm_idle(kbdev);
+
+end:
 	dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
 		hysteresis_val);
 
@@ -1616,7 +2118,6 @@
 
 static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
 {
-#define PWROFF_VAL_UNIT_SHIFT (10)
 	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
 	u64 freq = arch_timer_get_cntfrq();
 	u64 dur_val = dur_us;
@@ -1632,8 +2133,9 @@
 			dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!");
 		spin_unlock(&kbdev->pm.clk_rtm.lock);
 
-		dev_info(kbdev->dev, "Can't get the timestamp frequency, "
-			 "use cycle counter with MCU Core Poweroff timer!");
+		dev_info(
+			kbdev->dev,
+			"Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!");
 	}
 
 	/* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */
@@ -1657,7 +2159,14 @@
 
 u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev)
 {
-	return kbdev->csf.mcu_core_pwroff_dur_us;
+	u32 pwroff;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	pwroff = kbdev->csf.mcu_core_pwroff_dur_us;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return pwroff;
 }
 
 u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur)
@@ -1670,34 +2179,124 @@
 	kbdev->csf.mcu_core_pwroff_dur_count = pwroff;
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-	dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff);
+	dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff);
 
 	return pwroff;
 }
 
+/**
+ * kbase_device_csf_iterator_trace_init - Send request to enable iterator
+ *                                        trace port.
+ * @kbdev: Kernel base device pointer
+ *
+ * Return: 0 on success (or if enable request is not sent), or error
+ *         code -EINVAL on failure of GPU to acknowledge enable request.
+ */
+static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev)
+{
+	/* Enable the iterator trace port if supported by the GPU.
+	 * It requires the GPU to have a nonzero "iter_trace_enable"
+	 * property in the device tree, and the FW must advertise
+	 * this feature in GLB_FEATURES.
+	 */
+	if (kbdev->pm.backend.gpu_powered) {
+		/* check device tree for iterator trace enable property */
+		const void *iter_trace_param = of_get_property(
+					       kbdev->dev->of_node,
+					       "iter_trace_enable", NULL);
+
+		const struct kbase_csf_global_iface *iface =
+						&kbdev->csf.global_iface;
+
+		if (iter_trace_param) {
+			u32 iter_trace_value = be32_to_cpup(iter_trace_param);
+
+			if ((iface->features &
+			     GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) &&
+			    iter_trace_value) {
+				long ack_timeout;
+
+				ack_timeout = kbase_csf_timeout_in_jiffies(
+					kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT));
+
+				/* write enable request to global input */
+				kbase_csf_firmware_global_input_mask(
+					iface, GLB_REQ,
+					GLB_REQ_ITER_TRACE_ENABLE_MASK,
+					GLB_REQ_ITER_TRACE_ENABLE_MASK);
+				/* Ring global doorbell */
+				kbase_csf_ring_doorbell(kbdev,
+						    CSF_KERNEL_DOORBELL_NR);
+
+				ack_timeout = wait_event_timeout(
+					kbdev->csf.event_wait,
+					!((kbase_csf_firmware_global_input_read(
+						   iface, GLB_REQ) ^
+					   kbase_csf_firmware_global_output(
+						   iface, GLB_ACK)) &
+					  GLB_REQ_ITER_TRACE_ENABLE_MASK),
+					ack_timeout);
+
+				return ack_timeout ? 0 : -EINVAL;
+
+			}
+		}
+
+	}
+	return 0;
+}
 
 int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 {
 	init_waitqueue_head(&kbdev->csf.event_wait);
 	kbdev->csf.interrupt_received = false;
-	kbdev->csf.fw_timeout_ms = CSF_FIRMWARE_TIMEOUT_MS;
+
+	kbdev->csf.fw_timeout_ms =
+		kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
+
+	kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
+	kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
+		kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
 
 	INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_config);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
+	INIT_LIST_HEAD(&kbdev->csf.user_reg.list);
 	INIT_WORK(&kbdev->csf.firmware_reload_work,
 		  kbase_csf_firmware_reload_worker);
 	INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
 
 	mutex_init(&kbdev->csf.reg_lock);
 
+	kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL };
+
 	return 0;
 }
 
-int kbase_csf_firmware_init(struct kbase_device *kbdev)
+void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
 {
-	const struct firmware *firmware;
+	mutex_destroy(&kbdev->csf.reg_lock);
+}
+
+int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
+{
+	kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
+#ifdef KBASE_PM_RUNTIME
+	if (kbase_pm_gpu_sleep_allowed(kbdev))
+		kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+#endif
+	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
+	kbdev->csf.gpu_idle_dur_count =
+		convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us);
+
+	return 0;
+}
+
+int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
+{
+	const struct firmware *firmware = NULL;
+	struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw;
 	const u32 magic = FIRMWARE_HEADER_MAGIC;
 	u8 version_major, version_minor;
 	u32 version_hash;
@@ -1720,19 +2319,11 @@
 		return ret;
 	}
 
-	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
-	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
-		kbdev, FIRMWARE_IDLE_HYSTERESIS_TIME_MS);
-
-	kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
-	kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
-		kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
-
 	ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
 	if (ret != 0) {
 		dev_err(kbdev->dev,
 			"Failed to setup the rb tree for managing shared interface segment\n");
-		goto error;
+		goto err_out;
 	}
 
 	if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) {
@@ -1740,43 +2331,60 @@
 				"Failed to load firmware image '%s'\n",
 				fw_name);
 		ret = -ENOENT;
-		goto error;
+	} else {
+		/* Try to save a copy and then release the loaded firmware image */
+		mcu_fw->size = firmware->size;
+		mcu_fw->data = vmalloc((unsigned long)mcu_fw->size);
+
+		if (mcu_fw->data == NULL) {
+			ret = -ENOMEM;
+		} else {
+			memcpy(mcu_fw->data, firmware->data, mcu_fw->size);
+			dev_dbg(kbdev->dev, "Firmware image (%zu-bytes) retained in csf.fw\n",
+				mcu_fw->size);
+		}
+
+		release_firmware(firmware);
 	}
 
-	if (firmware->size < FIRMWARE_HEADER_LENGTH) {
+	/* If error in loading or saving the image, branches to error out */
+	if (ret)
+		goto err_out;
+
+	if (mcu_fw->size < FIRMWARE_HEADER_LENGTH) {
 		dev_err(kbdev->dev, "Firmware too small\n");
 		ret = -EINVAL;
-		goto error;
+		goto err_out;
 	}
 
-	if (memcmp(firmware->data, &magic, sizeof(magic)) != 0) {
+	if (memcmp(mcu_fw->data, &magic, sizeof(magic)) != 0) {
 		dev_err(kbdev->dev, "Incorrect firmware magic\n");
 		ret = -EINVAL;
-		goto error;
+		goto err_out;
 	}
 
-	version_minor = firmware->data[4];
-	version_major = firmware->data[5];
+	version_minor = mcu_fw->data[4];
+	version_major = mcu_fw->data[5];
 
-	if (version_major != FIRMWARE_HEADER_VERSION) {
+	if (version_major != FIRMWARE_HEADER_VERSION_MAJOR ||
+			version_minor != FIRMWARE_HEADER_VERSION_MINOR) {
 		dev_err(kbdev->dev,
 				"Firmware header version %d.%d not understood\n",
 				version_major, version_minor);
 		ret = -EINVAL;
-		goto error;
+		goto err_out;
 	}
 
-	memcpy(&version_hash, &firmware->data[8], sizeof(version_hash));
+	memcpy(&version_hash, &mcu_fw->data[8], sizeof(version_hash));
 
 	dev_notice(kbdev->dev, "Loading Mali firmware 0x%x", version_hash);
 
-	memcpy(&entry_end_offset, &firmware->data[0x10],
-			sizeof(entry_end_offset));
+	memcpy(&entry_end_offset, &mcu_fw->data[0x10], sizeof(entry_end_offset));
 
-	if (entry_end_offset > firmware->size) {
+	if (entry_end_offset > mcu_fw->size) {
 		dev_err(kbdev->dev, "Firmware image is truncated\n");
 		ret = -EINVAL;
-		goto error;
+		goto err_out;
 	}
 
 	entry_offset = FIRMWARE_HEADER_LENGTH;
@@ -1784,15 +2392,14 @@
 		u32 header;
 		unsigned int size;
 
-		memcpy(&header, &firmware->data[entry_offset], sizeof(header));
+		memcpy(&header, &mcu_fw->data[entry_offset], sizeof(header));
 
 		size = entry_size(header);
 
-		ret = load_firmware_entry(kbdev, firmware, entry_offset,
-				header);
+		ret = load_firmware_entry(kbdev, mcu_fw, entry_offset, header);
 		if (ret != 0) {
 			dev_err(kbdev->dev, "Failed to load firmware image\n");
-			goto error;
+			goto err_out;
 		}
 		entry_offset += size;
 	}
@@ -1800,72 +2407,84 @@
 	if (!kbdev->csf.shared_interface) {
 		dev_err(kbdev->dev, "Shared interface region not found\n");
 		ret = -EINVAL;
-		goto error;
+		goto err_out;
 	} else {
 		ret = setup_shared_iface_static_region(kbdev);
 		if (ret != 0) {
 			dev_err(kbdev->dev, "Failed to insert a region for shared iface entry parsed from fw image\n");
-			goto error;
+			goto err_out;
 		}
 	}
 
 	ret = kbase_csf_firmware_trace_buffers_init(kbdev);
 	if (ret != 0) {
 		dev_err(kbdev->dev, "Failed to initialize trace buffers\n");
-		goto error;
+		goto err_out;
 	}
 
 	/* Make sure L2 cache is powered up */
 	kbase_pm_wait_for_l2_powered(kbdev);
 
 	/* Load the MMU tables into the selected address space */
-	load_mmu_tables(kbdev);
+	ret = load_mmu_tables(kbdev);
+	if (ret != 0)
+		goto err_out;
 
 	boot_csf_firmware(kbdev);
 
 	ret = parse_capabilities(kbdev);
 	if (ret != 0)
-		goto error;
+		goto err_out;
 
 	ret = kbase_csf_doorbell_mapping_init(kbdev);
 	if (ret != 0)
-		goto error;
+		goto err_out;
 
 	ret = kbase_csf_scheduler_init(kbdev);
 	if (ret != 0)
-		goto error;
+		goto err_out;
 
 	ret = kbase_csf_setup_dummy_user_reg_page(kbdev);
 	if (ret != 0)
-		goto error;
+		goto err_out;
 
 	ret = kbase_csf_timeout_init(kbdev);
 	if (ret != 0)
-		goto error;
+		goto err_out;
 
 	ret = global_init_on_boot(kbdev);
 	if (ret != 0)
-		goto error;
+		goto err_out;
 
 	ret = kbase_csf_firmware_cfg_init(kbdev);
 	if (ret != 0)
-		goto error;
+		goto err_out;
 
+	ret = kbase_device_csf_iterator_trace_init(kbdev);
+	if (ret != 0)
+		goto err_out;
 
-	/* Firmware loaded successfully */
-	release_firmware(firmware);
-	KBASE_KTRACE_ADD(kbdev, FIRMWARE_BOOT, NULL,
+	ret = kbase_csf_firmware_log_init(kbdev);
+	if (ret != 0) {
+		dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret);
+		goto err_out;
+	}
+
+	if (kbdev->csf.fw_core_dump.available)
+		kbase_csf_firmware_core_dump_init(kbdev);
+
+	/* Firmware loaded successfully, ret = 0 */
+	KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL,
 			(((u64)version_hash) << 32) |
 			(((u64)version_major) << 8) | version_minor);
 	return 0;
 
-error:
-	kbase_csf_firmware_term(kbdev);
-	release_firmware(firmware);
+err_out:
+	kbase_csf_firmware_unload_term(kbdev);
 	return ret;
 }
 
-void kbase_csf_firmware_term(struct kbase_device *kbdev)
+void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
 {
 	unsigned long flags;
 	int ret = 0;
@@ -1875,6 +2494,8 @@
 	ret = kbase_reset_gpu_wait(kbdev);
 
 	WARN(ret, "failed to wait for GPU reset");
+
+	kbase_csf_firmware_log_term(kbdev);
 
 	kbase_csf_firmware_cfg_term(kbdev);
 
@@ -1917,17 +2538,25 @@
 		list_del(&interface->node);
 
 		vunmap(interface->kernel_map);
-		if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) {
-			kbase_csf_protected_memory_free(kbdev, interface->pma,
-				interface->num_pages);
-		} else {
-			kbase_mem_pool_free_pages(
-				&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-				interface->num_pages, interface->phys,
-				true, false);
+
+		if (!interface->reuse_pages) {
+			if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) {
+				kbase_csf_protected_memory_free(
+					kbdev, interface->pma, interface->num_pages_aligned,
+					interface->is_small_page);
+			} else {
+				kbase_mem_pool_free_pages(
+					kbase_mem_pool_group_select(
+						kbdev, KBASE_MEM_GROUP_CSF_FW,
+						interface->is_small_page),
+					interface->num_pages_aligned,
+					interface->phys,
+					true, false);
+			}
+
+			kfree(interface->phys);
 		}
 
-		kfree(interface->phys);
 		kfree(interface);
 	}
 
@@ -1943,16 +2572,17 @@
 		kfree(metadata);
 	}
 
-#ifndef MALI_KBASE_BUILD
-	mali_kutf_fw_utf_entry_cleanup(kbdev);
-#endif
+	if (kbdev->csf.fw.data) {
+		/* Free the copy of the firmware image */
+		vfree(kbdev->csf.fw.data);
+		kbdev->csf.fw.data = NULL;
+		dev_dbg(kbdev->dev, "Free retained image csf.fw (%zu-bytes)\n", kbdev->csf.fw.size);
+	}
 
 	/* This will also free up the region allocated for the shared interface
 	 * entry parsed from the firmware image.
 	 */
 	kbase_mcu_shared_interface_region_tracker_term(kbdev);
-
-	mutex_destroy(&kbdev->csf.reg_lock);
 
 	kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
 
@@ -1960,32 +2590,135 @@
 	kbdev->as_free |= MCU_AS_BITMASK;
 }
 
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr,
+					  u32 const reg_val)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	unsigned long flags;
+	int err;
+	u32 glb_req;
+
+	mutex_lock(&kbdev->csf.reg_lock);
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+	/* Set the address and value to write */
+	kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr);
+	kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN1, reg_val);
+
+	/* Set the Global Debug request for FW MCU write */
+	glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+	glb_req ^= GLB_DEBUG_REQ_FW_AS_WRITE_MASK;
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req,
+					     GLB_DEBUG_REQ_FW_AS_WRITE_MASK);
+
+	set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+	/* Notify FW about the Global Debug request */
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+	mutex_unlock(&kbdev->csf.reg_lock);
+
+	dev_dbg(kbdev->dev, "w: reg %08x val %08x", reg_addr, reg_val);
+
+	return err;
+}
+
+int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr,
+					 u32 *reg_val)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	unsigned long flags;
+	int err;
+	u32 glb_req;
+
+	if (WARN_ON(reg_val == NULL))
+		return -EINVAL;
+
+	mutex_lock(&kbdev->csf.reg_lock);
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+	/* Set the address to read */
+	kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr);
+
+	/* Set the Global Debug request for FW MCU read */
+	glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+	glb_req ^= GLB_DEBUG_REQ_FW_AS_READ_MASK;
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req,
+					     GLB_DEBUG_REQ_FW_AS_READ_MASK);
+
+	set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+	/* Notify FW about the Global Debug request */
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+	if (!err) {
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		*reg_val = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ARG_OUT0);
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	}
+
+	mutex_unlock(&kbdev->csf.reg_lock);
+
+	dev_dbg(kbdev->dev, "r: reg %08x val %08x", reg_addr, *reg_val);
+
+	return err;
+}
+
+int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr,
+					 u32 const val_mask, u32 const reg_val)
+{
+	unsigned long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms) + jiffies;
+	u32 read_val;
+
+	dev_dbg(kbdev->dev, "p: reg %08x val %08x mask %08x", reg_addr, reg_val, val_mask);
+
+	while (time_before(jiffies, remaining)) {
+		int err = kbase_csf_firmware_mcu_register_read(kbdev, reg_addr, &read_val);
+
+		if (err) {
+			dev_err(kbdev->dev,
+				"Error reading MCU register value (read_val = %u, expect = %u)\n",
+				read_val, reg_val);
+			return err;
+		}
+
+		if ((read_val & val_mask) == reg_val)
+			return 0;
+	}
+
+	dev_err(kbdev->dev,
+		"Timeout waiting for MCU register value to be set (read_val = %u, expect = %u)\n",
+		read_val, reg_val);
+
+	return -ETIMEDOUT;
+}
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
 void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
-	const u32 glb_req =
-		kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
+	const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
 
 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
-
 	/* The scheduler is assumed to only call the enable when its internal
 	 * state indicates that the idle timer has previously been disabled. So
 	 * on entry the expected field values are:
 	 *   1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
 	 *   2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
 	 */
-
 	if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
 		dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");
 
-	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
-					kbdev->csf.gpu_idle_dur_count);
-
-	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
-				GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
-
-	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
-		kbdev->csf.gpu_idle_dur_count);
+	enable_gpu_idle_timer(kbdev);
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 }
 
@@ -2015,10 +2748,11 @@
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 }
 
-int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev)
+int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms)
 {
 	kbase_csf_firmware_ping(kbdev);
-	return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
+
+	return wait_for_global_request_with_timeout(kbdev, GLB_REQ_PING_MASK, wait_timeout_ms);
 }
 
 int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev,
@@ -2048,31 +2782,63 @@
 void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
-	unsigned long flags;
-	int err;
 
-	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev);
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 	set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK);
 	dev_dbg(kbdev->dev, "Sending request to enter protected mode");
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
-	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
+int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
+{
+	int err;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
 
 	err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
 
 	if (!err) {
-		unsigned long irq_flags;
+#define WAIT_TIMEOUT 5000 /* 50ms timeout */
+#define DELAY_TIME_IN_US 10
+		const int max_iterations = WAIT_TIMEOUT;
+		int loop;
 
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-		kbdev->protected_mode = true;
-		kbase_ipa_protection_mode_switch_event(kbdev);
-		kbase_ipa_control_protm_entered(kbdev);
+		/* Wait for the GPU to actually enter protected mode */
+		for (loop = 0; loop < max_iterations; loop++) {
+			unsigned long flags;
+			bool pmode_exited;
 
-		kbase_csf_scheduler_spin_lock(kbdev, &irq_flags);
-		kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface);
-		kbase_csf_scheduler_spin_unlock(kbdev, irq_flags);
+			if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) &
+			    GPU_STATUS_PROTECTED_MODE_ACTIVE)
+				break;
 
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			/* Check if GPU already exited the protected mode */
+			kbase_csf_scheduler_spin_lock(kbdev, &flags);
+			pmode_exited =
+				!kbase_csf_scheduler_protected_mode_in_use(kbdev);
+			kbase_csf_scheduler_spin_unlock(kbdev, flags);
+			if (pmode_exited)
+				break;
+
+			udelay(DELAY_TIME_IN_US);
+		}
+
+		if (loop == max_iterations) {
+			dev_err(kbdev->dev, "Timeout for actual pmode entry after PROTM_ENTER ack");
+			err = -ETIMEDOUT;
+		}
 	}
+
+	if (unlikely(err)) {
+		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+			kbase_reset_gpu(kbdev);
+	}
+
+	KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
+
+	return err;
 }
 
 void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
@@ -2080,12 +2846,53 @@
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
 	unsigned long flags;
 
+	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	/* Validate there are no on-slot groups when sending the
+	 * halt request to firmware.
+	 */
+	WARN_ON(kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev));
 	set_global_request(global_iface, GLB_REQ_HALT_MASK);
 	dev_dbg(kbdev->dev, "Sending request to HALT MCU");
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 }
+
+void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
+{
+	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
+	/* Trigger the boot of MCU firmware, Use the AUTO mode as
+	 * otherwise on fast reset, to exit protected mode, MCU will
+	 * not reboot by itself to enter normal mode.
+	 */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO);
+}
+
+#ifdef KBASE_PM_RUNTIME
+void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	unsigned long flags;
+
+	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	set_global_request(global_iface, GLB_REQ_SLEEP_MASK);
+	dev_dbg(kbdev->dev, "Sending sleep request to MCU");
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
+bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK) &&
+		kbase_csf_firmware_mcu_halted(kbdev));
+}
+#endif
 
 int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev)
 {
@@ -2095,6 +2902,7 @@
 
 	/* Ensure GPU is powered-up until we complete config update.*/
 	kbase_csf_scheduler_pm_active(kbdev);
+	kbase_csf_scheduler_wait_mcu_active(kbdev);
 
 	/* The 'reg_lock' is also taken and is held till the update is
 	 * complete, to ensure the config update gets serialized.
@@ -2234,7 +3042,7 @@
 		gpu_map_prot =
 			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
 		cpu_map_prot = pgprot_writecombine(cpu_map_prot);
-	};
+	}
 
 	phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
 	if (!phys)
@@ -2244,9 +3052,8 @@
 	if (!page_list)
 		goto page_list_alloc_error;
 
-	ret = kbase_mem_pool_alloc_pages(
-		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-		num_pages, phys, false);
+	ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
+					 phys, false, NULL);
 	if (ret <= 0)
 		goto phys_mem_pool_alloc_error;
 
@@ -2257,8 +3064,8 @@
 	if (!cpu_addr)
 		goto vmap_error;
 
-	va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
-			num_pages, KBASE_REG_ZONE_MCU_SHARED);
+	va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
+					 KBASE_REG_ZONE_MCU_SHARED);
 	if (!va_reg)
 		goto va_region_alloc_error;
 
@@ -2272,9 +3079,9 @@
 	gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR);
 	gpu_map_properties |= gpu_map_prot;
 
-	ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
-			va_reg->start_pfn, &phys[0], num_pages,
-			gpu_map_properties, KBASE_MEM_GROUP_CSF_FW);
+	ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
+					      &phys[0], num_pages, gpu_map_properties,
+					      KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
 	if (ret)
 		goto mmu_insert_pages_error;
 
@@ -2288,7 +3095,7 @@
 
 mmu_insert_pages_error:
 	mutex_lock(&kbdev->csf.reg_lock);
-	kbase_remove_va_region(va_reg);
+	kbase_remove_va_region(kbdev, va_reg);
 va_region_add_error:
 	kbase_free_alloced_region(va_reg);
 	mutex_unlock(&kbdev->csf.reg_lock);
@@ -2320,7 +3127,7 @@
 {
 	if (csf_mapping->va_reg) {
 		mutex_lock(&kbdev->csf.reg_lock);
-		kbase_remove_va_region(csf_mapping->va_reg);
+		kbase_remove_va_region(kbdev, csf_mapping->va_reg);
 		kbase_free_alloced_region(csf_mapping->va_reg);
 		mutex_unlock(&kbdev->csf.reg_lock);
 	}

--
Gitblit v1.6.2