From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB
---
kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c | 504 +++++++++++++++++++++++++++++++++++++++++++------------
1 files changed, 388 insertions(+), 116 deletions(-)
diff --git a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
index 48864cc..833947f 100644
--- a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,10 +27,13 @@
#include "mali_kbase_reset_gpu.h"
#include "mali_kbase_ctx_sched.h"
#include "device/mali_kbase_device.h"
+#include <mali_kbase_hwaccess_time.h>
#include "backend/gpu/mali_kbase_pm_internal.h"
#include "mali_kbase_csf_scheduler.h"
#include "mmu/mali_kbase_mmu.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
+#include <backend/gpu/mali_kbase_model_linux.h>
+#include <csf/mali_kbase_csf_registers.h>
#include <linux/list.h>
#include <linux/slab.h>
@@ -100,7 +103,7 @@
#define CSF_GLB_REQ_CFG_MASK \
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
- GLB_REQ_CFG_PWROFF_TIMER_MASK)
+ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
static inline u32 input_page_read(const u32 *const input, const u32 offset)
{
@@ -115,15 +118,6 @@
WARN_ON(offset % sizeof(u32));
input[offset / sizeof(u32)] = value;
-}
-
-static inline void input_page_partial_write(u32 *const input, const u32 offset,
- u32 value, u32 mask)
-{
- WARN_ON(offset % sizeof(u32));
-
- input[offset / sizeof(u32)] =
- (input_page_read(input, offset) & ~mask) | (value & mask);
}
static inline u32 output_page_read(const u32 *const output, const u32 offset)
@@ -144,13 +138,13 @@
/**
* invent_memory_setup_entry() - Invent an "interface memory setup" section
*
+ * @kbdev: Kbase device structure
+ *
* Invent an "interface memory setup" section similar to one from a firmware
* image. If successful the interface will be added to the
* kbase_device:csf.firmware_interfaces list.
*
* Return: 0 if successful, negative error code on failure
- *
- * @kbdev: Kbase device structure
*/
static int invent_memory_setup_entry(struct kbase_device *kbdev)
{
@@ -201,9 +195,8 @@
ginfo->stream_stride = 0;
ginfo->streams = kcalloc(ginfo->stream_num, sizeof(*ginfo->streams), GFP_KERNEL);
- if (ginfo->streams == NULL) {
+ if (ginfo->streams == NULL)
return -ENOMEM;
- }
for (sid = 0; sid < ginfo->stream_num; ++sid) {
struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[sid];
@@ -236,7 +229,8 @@
iface->version = 1;
iface->kbdev = kbdev;
iface->features = 0;
- iface->prfcnt_size = 64;
+ iface->prfcnt_size =
+ GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET(0, KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE);
if (iface->version >= kbase_csf_interface_version(1, 1, 0)) {
/* update rate=1, max event size = 1<<8 = 256 */
@@ -249,9 +243,8 @@
iface->group_stride = 0;
iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), GFP_KERNEL);
- if (iface->groups == NULL) {
+ if (iface->groups == NULL)
return -ENOMEM;
- }
for (gid = 0; gid < iface->group_num; ++gid) {
int err;
@@ -275,6 +268,18 @@
void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
+ u32 gpu_addr, u32 value)
+{
+ /* NO_MALI: Nothing to do here */
+}
+
+void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev,
+ u32 gpu_addr, u32 *value)
+{
+ /* NO_MALI: Nothing to do here */
+}
+
+void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev,
u32 gpu_addr, u32 value)
{
/* NO_MALI: Nothing to do here */
@@ -379,37 +384,7 @@
dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val);
return val;
}
-
-static void
-csf_firmware_prfcnt_process(const struct kbase_csf_global_iface *const iface,
- const u32 glb_req)
-{
- struct kbase_device *kbdev = iface->kbdev;
- u32 glb_ack = output_page_read(iface->output, GLB_ACK);
- /* If the value of GLB_REQ.PRFCNT_SAMPLE is different from the value of
- * GLB_ACK.PRFCNT_SAMPLE, the CSF will sample the performance counters.
- */
- if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_SAMPLE_MASK) {
- /* NO_MALI only uses the first buffer in the ring buffer. */
- input_page_write(iface->input, GLB_PRFCNT_EXTRACT, 0);
- output_page_write(iface->output, GLB_PRFCNT_INSERT, 1);
- kbase_reg_write(kbdev, GPU_COMMAND, GPU_COMMAND_PRFCNT_SAMPLE);
- }
-
- /* Propagate enable masks to model if request to enable. */
- if (glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) {
- u32 tiler_en, l2_en, sc_en;
-
- tiler_en = input_page_read(iface->input, GLB_PRFCNT_TILER_EN);
- l2_en = input_page_read(iface->input, GLB_PRFCNT_MMU_L2_EN);
- sc_en = input_page_read(iface->input, GLB_PRFCNT_SHADER_EN);
-
- /* NO_MALI platform enabled all CSHW counters by default. */
- kbase_reg_write(kbdev, PRFCNT_TILER_EN, tiler_en);
- kbase_reg_write(kbdev, PRFCNT_MMU_L2_EN, l2_en);
- kbase_reg_write(kbdev, PRFCNT_SHADER_EN, sc_en);
- }
-}
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output);
void kbase_csf_firmware_global_input(
const struct kbase_csf_global_iface *const iface, const u32 offset,
@@ -421,11 +396,20 @@
input_page_write(iface->input, offset, value);
if (offset == GLB_REQ) {
- csf_firmware_prfcnt_process(iface, value);
- /* NO_MALI: Immediately acknowledge requests */
- output_page_write(iface->output, GLB_ACK, value);
+ /* NO_MALI: Immediately acknowledge requests - except for PRFCNT_ENABLE
+ * and PRFCNT_SAMPLE. These will be processed along with the
+ * corresponding performance counter registers when the global doorbell
+ * is rung in order to emulate the performance counter sampling behavior
+ * of the real firmware.
+ */
+ const u32 ack = output_page_read(iface->output, GLB_ACK);
+ const u32 req_mask = ~(GLB_REQ_PRFCNT_ENABLE_MASK | GLB_REQ_PRFCNT_SAMPLE_MASK);
+ const u32 toggled = (value ^ ack) & req_mask;
+
+ output_page_write(iface->output, GLB_ACK, ack ^ toggled);
}
}
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input);
void kbase_csf_firmware_global_input_mask(
const struct kbase_csf_global_iface *const iface, const u32 offset,
@@ -439,6 +423,7 @@
/* NO_MALI: Go through kbase_csf_firmware_global_input to capture writes */
kbase_csf_firmware_global_input(iface, offset, (input_page_read(iface->input, offset) & ~mask) | (value & mask));
}
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask);
u32 kbase_csf_firmware_global_input_read(
const struct kbase_csf_global_iface *const iface, const u32 offset)
@@ -459,6 +444,100 @@
dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val);
return val;
}
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output);
+
+/**
+ * csf_doorbell_prfcnt() - Process CSF performance counter doorbell request
+ *
+ * @kbdev: An instance of the GPU platform device
+ */
+static void csf_doorbell_prfcnt(struct kbase_device *kbdev)
+{
+ struct kbase_csf_global_iface *iface;
+ u32 req;
+ u32 ack;
+ u32 extract_index;
+
+ if (WARN_ON(!kbdev))
+ return;
+
+ iface = &kbdev->csf.global_iface;
+
+ req = input_page_read(iface->input, GLB_REQ);
+ ack = output_page_read(iface->output, GLB_ACK);
+ extract_index = input_page_read(iface->input, GLB_PRFCNT_EXTRACT);
+
+ /* Process enable bit toggle */
+ if ((req ^ ack) & GLB_REQ_PRFCNT_ENABLE_MASK) {
+ if (req & GLB_REQ_PRFCNT_ENABLE_MASK) {
+ /* Reset insert index to zero on enable bit set */
+ output_page_write(iface->output, GLB_PRFCNT_INSERT, 0);
+ WARN_ON(extract_index != 0);
+ }
+ ack ^= GLB_REQ_PRFCNT_ENABLE_MASK;
+ }
+
+ /* Process sample request */
+ if ((req ^ ack) & GLB_REQ_PRFCNT_SAMPLE_MASK) {
+ const u32 ring_size = GLB_PRFCNT_CONFIG_SIZE_GET(
+ input_page_read(iface->input, GLB_PRFCNT_CONFIG));
+ u32 insert_index = output_page_read(iface->output, GLB_PRFCNT_INSERT);
+
+ const bool prev_overflow = (req ^ ack) & GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK;
+ const bool prev_threshold = (req ^ ack) & GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK;
+
+ /* If ringbuffer is full toggle PRFCNT_OVERFLOW and skip sample */
+ if (insert_index - extract_index >= ring_size) {
+ WARN_ON(insert_index - extract_index > ring_size);
+ if (!prev_overflow)
+ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK;
+ } else {
+ struct gpu_model_prfcnt_en enable_maps = {
+ .fe = input_page_read(iface->input, GLB_PRFCNT_CSF_EN),
+ .tiler = input_page_read(iface->input, GLB_PRFCNT_TILER_EN),
+ .l2 = input_page_read(iface->input, GLB_PRFCNT_MMU_L2_EN),
+ .shader = input_page_read(iface->input, GLB_PRFCNT_SHADER_EN),
+ };
+
+ const u64 prfcnt_base =
+ input_page_read(iface->input, GLB_PRFCNT_BASE_LO) +
+ ((u64)input_page_read(iface->input, GLB_PRFCNT_BASE_HI) << 32);
+
+ u32 *sample_base = (u32 *)(uintptr_t)prfcnt_base +
+ (KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE *
+ (insert_index % ring_size));
+
+ /* trigger sample dump in the dummy model */
+ gpu_model_prfcnt_dump_request(sample_base, enable_maps);
+
+ /* increment insert index and toggle PRFCNT_SAMPLE bit in ACK */
+ output_page_write(iface->output, GLB_PRFCNT_INSERT, ++insert_index);
+ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK;
+ }
+
+ /* When the ringbuffer reaches 50% capacity toggle PRFCNT_THRESHOLD */
+ if (!prev_threshold && (insert_index - extract_index >= (ring_size / 2)))
+ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK;
+ }
+
+ /* Update GLB_ACK */
+ output_page_write(iface->output, GLB_ACK, ack);
+}
+
+void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr)
+{
+ WARN_ON(doorbell_nr < 0);
+ WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL);
+
+ if (WARN_ON(!kbdev))
+ return;
+
+ if (doorbell_nr == CSF_KERNEL_DOORBELL_NR) {
+ csf_doorbell_prfcnt(kbdev);
+ gpu_model_glb_request_job_irq(kbdev->model);
+ }
+}
+EXPORT_SYMBOL(kbase_csf_ring_doorbell);
/**
* handle_internal_firmware_fatal - Handler for CS internal firmware fault.
@@ -560,6 +639,8 @@
dev_warn(kbdev->dev, "Timed out waiting for global request %x to complete",
req_mask);
err = -ETIMEDOUT;
+
+
}
return err;
@@ -621,16 +702,94 @@
set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
}
+static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
+{
+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+ kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
+ kbdev->csf.gpu_idle_dur_count);
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
+ GLB_REQ_IDLE_ENABLE_MASK);
+ dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
+ kbdev->csf.gpu_idle_dur_count);
+}
+
+static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask)
+{
+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+ bool complete = false;
+ unsigned long flags;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+ if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) ==
+ (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask))
+ complete = true;
+
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ return complete;
+}
+
+static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface,
+ u32 const req_mask)
+{
+ u32 glb_debug_req;
+
+ kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev);
+
+ glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+ glb_debug_req ^= req_mask;
+
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask);
+}
+
+static void request_fw_core_dump(
+ const struct kbase_csf_global_iface *const global_iface)
+{
+ uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP);
+
+ set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode);
+
+ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+}
+
+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev)
+{
+ const struct kbase_csf_global_iface *const global_iface =
+ &kbdev->csf.global_iface;
+ unsigned long flags;
+ int ret;
+
+ /* Serialize CORE_DUMP requests. */
+ mutex_lock(&kbdev->csf.reg_lock);
+
+ /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ request_fw_core_dump(global_iface);
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */
+ ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+ if (!ret)
+ WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK));
+
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ return ret;
+}
+
static void global_init(struct kbase_device *const kbdev, u64 core_mask)
{
- u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
- GLB_ACK_IRQ_MASK_PING_MASK |
- GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
- GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
- GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
- GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
- GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
- GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
+ u32 const ack_irq_mask =
+ GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK |
+ GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
+ GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
+ GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
+ GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | GLB_REQ_DEBUG_CSF_REQ_MASK;
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
@@ -643,6 +802,12 @@
enable_shader_poweroff_timer(kbdev, global_iface);
set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
+
+ /* The GPU idle timer is always enabled for simplicity. Checks will be
+ * done before scheduling the GPU idle worker to see if it is
+ * appropriate for the current power policy.
+ */
+ enable_gpu_idle_timer(kbdev);
/* Unmask the interrupts */
kbase_csf_firmware_global_input(global_iface,
@@ -786,8 +951,9 @@
dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!");
spin_unlock(&kbdev->pm.clk_rtm.lock);
- dev_info(kbdev->dev, "Can't get the timestamp frequency, "
- "use cycle counter format with firmware idle hysteresis!");
+ dev_info(
+ kbdev->dev,
+ "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!");
}
/* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */
@@ -811,7 +977,14 @@
u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
{
- return kbdev->csf.gpu_idle_hysteresis_ms;
+ unsigned long flags;
+ u32 dur;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ dur = kbdev->csf.gpu_idle_hysteresis_us;
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ return dur;
}
u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
@@ -819,11 +992,53 @@
unsigned long flags;
const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbdev->csf.gpu_idle_hysteresis_ms = dur;
- kbdev->csf.gpu_idle_dur_count = hysteresis_val;
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ /* The 'fw_load_lock' is taken to synchronize against the deferred
+ * loading of FW, where the idle timer will be enabled.
+ */
+ mutex_lock(&kbdev->fw_load_lock);
+ if (unlikely(!kbdev->csf.firmware_inited)) {
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ kbdev->csf.gpu_idle_hysteresis_us = dur;
+ kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ mutex_unlock(&kbdev->fw_load_lock);
+ goto end;
+ }
+ mutex_unlock(&kbdev->fw_load_lock);
+ kbase_csf_scheduler_pm_active(kbdev);
+ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
+ dev_err(kbdev->dev,
+ "Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
+ kbase_csf_scheduler_pm_idle(kbdev);
+ return kbdev->csf.gpu_idle_dur_count;
+ }
+
+ /* The 'reg_lock' is also taken and is held till the update is not
+ * complete, to ensure the update of idle timer value by multiple Users
+ * gets serialized.
+ */
+ mutex_lock(&kbdev->csf.reg_lock);
+ /* The firmware only reads the new idle timer value when the timer is
+ * disabled.
+ */
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ /* Ensure that the request has taken effect */
+ wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ kbdev->csf.gpu_idle_hysteresis_us = dur;
+ kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+ kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ kbase_csf_scheduler_pm_idle(kbdev);
+
+end:
dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
hysteresis_val);
@@ -832,7 +1047,6 @@
static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
{
-#define PWROFF_VAL_UNIT_SHIFT (10)
/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
u64 freq = arch_timer_get_cntfrq();
u64 dur_val = dur_us;
@@ -848,8 +1062,9 @@
dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!");
spin_unlock(&kbdev->pm.clk_rtm.lock);
- dev_info(kbdev->dev, "Can't get the timestamp frequency, "
- "use cycle counter with MCU Core Poweroff timer!");
+ dev_info(
+ kbdev->dev,
+ "Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!");
}
/* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */
@@ -873,7 +1088,14 @@
u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev)
{
- return kbdev->csf.mcu_core_pwroff_dur_us;
+ u32 pwroff;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ pwroff = kbdev->csf.mcu_core_pwroff_dur_us;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return pwroff;
}
u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur)
@@ -886,7 +1108,7 @@
kbdev->csf.mcu_core_pwroff_dur_count = pwroff;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff);
+ dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff);
return pwroff;
}
@@ -895,11 +1117,14 @@
{
init_waitqueue_head(&kbdev->csf.event_wait);
kbdev->csf.interrupt_received = false;
- kbdev->csf.fw_timeout_ms = CSF_FIRMWARE_TIMEOUT_MS;
+
+ kbdev->csf.fw_timeout_ms =
+ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
+ INIT_LIST_HEAD(&kbdev->csf.user_reg.list);
INIT_WORK(&kbdev->csf.firmware_reload_work,
kbase_csf_firmware_reload_worker);
INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
@@ -909,7 +1134,26 @@
return 0;
}
-int kbase_csf_firmware_init(struct kbase_device *kbdev)
+void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
+{
+ mutex_destroy(&kbdev->csf.reg_lock);
+}
+
+int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
+{
+ kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
+#ifdef KBASE_PM_RUNTIME
+ if (kbase_pm_gpu_sleep_allowed(kbdev))
+ kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+#endif
+ WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
+ kbdev->csf.gpu_idle_dur_count =
+ convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us);
+
+ return 0;
+}
+
+int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
{
int ret;
@@ -927,10 +1171,6 @@
kbdev->as_free |= MCU_AS_BITMASK;
return ret;
}
-
- kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
- kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
- kbdev, FIRMWARE_IDLE_HYSTERESIS_TIME_MS);
ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
if (ret != 0) {
@@ -979,19 +1219,17 @@
return 0;
error:
- kbase_csf_firmware_term(kbdev);
+ kbase_csf_firmware_unload_term(kbdev);
return ret;
}
-void kbase_csf_firmware_term(struct kbase_device *kbdev)
+void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
{
cancel_work_sync(&kbdev->csf.fw_error_work);
kbase_csf_timeout_term(kbdev);
/* NO_MALI: Don't stop firmware or unload MMU tables */
-
- kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
kbase_csf_scheduler_term(kbdev);
@@ -1018,44 +1256,30 @@
/* NO_MALI: No trace buffers to terminate */
-#ifndef MALI_KBASE_BUILD
- mali_kutf_fw_utf_entry_cleanup(kbdev);
-#endif
-
- mutex_destroy(&kbdev->csf.reg_lock);
-
/* This will also free up the region allocated for the shared interface
* entry parsed from the firmware image.
*/
kbase_mcu_shared_interface_region_tracker_term(kbdev);
+
+ kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
}
void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
- u32 glb_req;
+ const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
-
/* The scheduler is assumed to only call the enable when its internal
* state indicates that the idle timer has previously been disabled. So
* on entry the expected field values are:
* 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
* 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
*/
-
- glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");
- kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
- kbdev->csf.gpu_idle_dur_count);
-
- kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
- GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
-
- dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
- kbdev->csf.gpu_idle_dur_count);
+ enable_gpu_idle_timer(kbdev);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
}
@@ -1086,8 +1310,9 @@
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
-int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev)
+int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms)
{
+ CSTD_UNUSED(wait_timeout_ms);
kbase_csf_firmware_ping(kbdev);
return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
}
@@ -1119,15 +1344,23 @@
void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
- unsigned long flags;
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK);
dev_dbg(kbdev->dev, "Sending request to enter protected mode");
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
- wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
+int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
+{
+ int err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
+
+ if (err) {
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
+ kbase_reset_gpu(kbdev);
+ }
+
+ return err;
}
void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
@@ -1136,11 +1369,46 @@
unsigned long flags;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ /* Validate there are no on-slot groups when sending the
+ * halt request to firmware.
+ */
+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev));
set_global_request(global_iface, GLB_REQ_HALT_MASK);
dev_dbg(kbdev->dev, "Sending request to HALT MCU");
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
+
+void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
+{
+ /* Trigger the boot of MCU firmware, Use the AUTO mode as
+ * otherwise on fast reset, to exit protected mode, MCU will
+ * not reboot by itself to enter normal mode.
+ */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO);
+}
+
+#ifdef KBASE_PM_RUNTIME
+void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev)
+{
+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+ unsigned long flags;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ set_global_request(global_iface, GLB_REQ_SLEEP_MASK);
+ dev_dbg(kbdev->dev, "Sending sleep request to MCU");
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
+bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ return (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK) &&
+ kbase_csf_firmware_mcu_halted(kbdev));
+}
+#endif
int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev)
{
@@ -1256,6 +1524,11 @@
return NULL;
}
+void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
+{
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE);
+}
+
void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev)
{
/* NO_MALI: Nothing to do here */
@@ -1286,7 +1559,7 @@
gpu_map_prot =
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
cpu_map_prot = pgprot_writecombine(cpu_map_prot);
- };
+ }
phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
if (!phys)
@@ -1296,9 +1569,8 @@
if (!page_list)
goto page_list_alloc_error;
- ret = kbase_mem_pool_alloc_pages(
- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- num_pages, phys, false);
+ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
+ phys, false, NULL);
if (ret <= 0)
goto phys_mem_pool_alloc_error;
@@ -1309,8 +1581,8 @@
if (!cpu_addr)
goto vmap_error;
- va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
- num_pages, KBASE_REG_ZONE_MCU_SHARED);
+ va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
+ KBASE_REG_ZONE_MCU_SHARED);
if (!va_reg)
goto va_region_alloc_error;
@@ -1324,9 +1596,9 @@
gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR);
gpu_map_properties |= gpu_map_prot;
- ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
- va_reg->start_pfn, &phys[0], num_pages,
- gpu_map_properties, KBASE_MEM_GROUP_CSF_FW);
+ ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
+ &phys[0], num_pages, gpu_map_properties,
+ KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
if (ret)
goto mmu_insert_pages_error;
@@ -1340,7 +1612,7 @@
mmu_insert_pages_error:
mutex_lock(&kbdev->csf.reg_lock);
- kbase_remove_va_region(va_reg);
+ kbase_remove_va_region(kbdev, va_reg);
va_region_add_error:
kbase_free_alloced_region(va_reg);
mutex_unlock(&kbdev->csf.reg_lock);
@@ -1372,7 +1644,7 @@
{
if (csf_mapping->va_reg) {
mutex_lock(&kbdev->csf.reg_lock);
- kbase_remove_va_region(csf_mapping->va_reg);
+ kbase_remove_va_region(kbdev, csf_mapping->va_reg);
kbase_free_alloced_region(csf_mapping->va_reg);
mutex_unlock(&kbdev->csf.reg_lock);
}
--
Gitblit v1.6.2