From f70575805708cabdedea7498aaa3f710fde4d920 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Wed, 31 Jan 2024 03:29:01 +0000 Subject: [PATCH] add lvds1024*800 --- kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c | 504 +++++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 388 insertions(+), 116 deletions(-) diff --git a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c index 48864cc..833947f 100644 --- a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c +++ b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,10 +27,13 @@ #include "mali_kbase_reset_gpu.h" #include "mali_kbase_ctx_sched.h" #include "device/mali_kbase_device.h" +#include <mali_kbase_hwaccess_time.h> #include "backend/gpu/mali_kbase_pm_internal.h" #include "mali_kbase_csf_scheduler.h" #include "mmu/mali_kbase_mmu.h" #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" +#include <backend/gpu/mali_kbase_model_linux.h> +#include <csf/mali_kbase_csf_registers.h> #include <linux/list.h> #include <linux/slab.h> @@ -100,7 +103,7 @@ #define CSF_GLB_REQ_CFG_MASK \ (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ - GLB_REQ_CFG_PWROFF_TIMER_MASK) + GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) static inline u32 input_page_read(const u32 *const input, const u32 offset) { @@ -115,15 +118,6 @@ WARN_ON(offset % sizeof(u32)); input[offset / sizeof(u32)] = value; -} - -static inline void input_page_partial_write(u32 *const input, const u32 offset, - u32 value, u32 mask) -{ - WARN_ON(offset % sizeof(u32)); - - input[offset / sizeof(u32)] = - (input_page_read(input, offset) & ~mask) | (value & mask); } static inline u32 output_page_read(const u32 *const output, const u32 offset) @@ -144,13 +138,13 @@ /** * invent_memory_setup_entry() - Invent an "interface memory setup" section * + * @kbdev: Kbase device structure + * * Invent an "interface memory setup" section similar to one from a firmware * image. If successful the interface will be added to the * kbase_device:csf.firmware_interfaces list. * * Return: 0 if successful, negative error code on failure - * - * @kbdev: Kbase device structure */ static int invent_memory_setup_entry(struct kbase_device *kbdev) { @@ -201,9 +195,8 @@ ginfo->stream_stride = 0; ginfo->streams = kcalloc(ginfo->stream_num, sizeof(*ginfo->streams), GFP_KERNEL); - if (ginfo->streams == NULL) { + if (ginfo->streams == NULL) return -ENOMEM; - } for (sid = 0; sid < ginfo->stream_num; ++sid) { struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[sid]; @@ -236,7 +229,8 @@ iface->version = 1; iface->kbdev = kbdev; iface->features = 0; - iface->prfcnt_size = 64; + iface->prfcnt_size = + GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET(0, KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE); if (iface->version >= kbase_csf_interface_version(1, 1, 0)) { /* update rate=1, max event size = 1<<8 = 256 */ @@ -249,9 +243,8 @@ iface->group_stride = 0; iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), GFP_KERNEL); - if (iface->groups == NULL) { + if (iface->groups == NULL) return -ENOMEM; - } for (gid = 0; gid < iface->group_num; ++gid) { int err; @@ -275,6 +268,18 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, + u32 gpu_addr, u32 value) +{ + /* NO_MALI: Nothing to do here */ +} + +void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, + u32 gpu_addr, u32 *value) +{ + /* NO_MALI: Nothing to do here */ +} + +void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, u32 gpu_addr, u32 value) { /* NO_MALI: Nothing to do here */ @@ -379,37 +384,7 @@ dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); return val; } - -static void -csf_firmware_prfcnt_process(const struct kbase_csf_global_iface *const iface, - const u32 glb_req) -{ - struct kbase_device *kbdev = iface->kbdev; - u32 glb_ack = output_page_read(iface->output, GLB_ACK); - /* If the value of GLB_REQ.PRFCNT_SAMPLE is different from the value of - * GLB_ACK.PRFCNT_SAMPLE, the CSF will sample the performance counters. - */ - if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_SAMPLE_MASK) { - /* NO_MALI only uses the first buffer in the ring buffer. */ - input_page_write(iface->input, GLB_PRFCNT_EXTRACT, 0); - output_page_write(iface->output, GLB_PRFCNT_INSERT, 1); - kbase_reg_write(kbdev, GPU_COMMAND, GPU_COMMAND_PRFCNT_SAMPLE); - } - - /* Propagate enable masks to model if request to enable. */ - if (glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) { - u32 tiler_en, l2_en, sc_en; - - tiler_en = input_page_read(iface->input, GLB_PRFCNT_TILER_EN); - l2_en = input_page_read(iface->input, GLB_PRFCNT_MMU_L2_EN); - sc_en = input_page_read(iface->input, GLB_PRFCNT_SHADER_EN); - - /* NO_MALI platform enabled all CSHW counters by default. */ - kbase_reg_write(kbdev, PRFCNT_TILER_EN, tiler_en); - kbase_reg_write(kbdev, PRFCNT_MMU_L2_EN, l2_en); - kbase_reg_write(kbdev, PRFCNT_SHADER_EN, sc_en); - } -} +KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output); void kbase_csf_firmware_global_input( const struct kbase_csf_global_iface *const iface, const u32 offset, @@ -421,11 +396,20 @@ input_page_write(iface->input, offset, value); if (offset == GLB_REQ) { - csf_firmware_prfcnt_process(iface, value); - /* NO_MALI: Immediately acknowledge requests */ - output_page_write(iface->output, GLB_ACK, value); + /* NO_MALI: Immediately acknowledge requests - except for PRFCNT_ENABLE + * and PRFCNT_SAMPLE. These will be processed along with the + * corresponding performance counter registers when the global doorbell + * is rung in order to emulate the performance counter sampling behavior + * of the real firmware. + */ + const u32 ack = output_page_read(iface->output, GLB_ACK); + const u32 req_mask = ~(GLB_REQ_PRFCNT_ENABLE_MASK | GLB_REQ_PRFCNT_SAMPLE_MASK); + const u32 toggled = (value ^ ack) & req_mask; + + output_page_write(iface->output, GLB_ACK, ack ^ toggled); } } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input); void kbase_csf_firmware_global_input_mask( const struct kbase_csf_global_iface *const iface, const u32 offset, @@ -439,6 +423,7 @@ /* NO_MALI: Go through kbase_csf_firmware_global_input to capture writes */ kbase_csf_firmware_global_input(iface, offset, (input_page_read(iface->input, offset) & ~mask) | (value & mask)); } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask); u32 kbase_csf_firmware_global_input_read( const struct kbase_csf_global_iface *const iface, const u32 offset) @@ -459,6 +444,100 @@ dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); return val; } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output); + +/** + * csf_doorbell_prfcnt() - Process CSF performance counter doorbell request + * + * @kbdev: An instance of the GPU platform device + */ +static void csf_doorbell_prfcnt(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *iface; + u32 req; + u32 ack; + u32 extract_index; + + if (WARN_ON(!kbdev)) + return; + + iface = &kbdev->csf.global_iface; + + req = input_page_read(iface->input, GLB_REQ); + ack = output_page_read(iface->output, GLB_ACK); + extract_index = input_page_read(iface->input, GLB_PRFCNT_EXTRACT); + + /* Process enable bit toggle */ + if ((req ^ ack) & GLB_REQ_PRFCNT_ENABLE_MASK) { + if (req & GLB_REQ_PRFCNT_ENABLE_MASK) { + /* Reset insert index to zero on enable bit set */ + output_page_write(iface->output, GLB_PRFCNT_INSERT, 0); + WARN_ON(extract_index != 0); + } + ack ^= GLB_REQ_PRFCNT_ENABLE_MASK; + } + + /* Process sample request */ + if ((req ^ ack) & GLB_REQ_PRFCNT_SAMPLE_MASK) { + const u32 ring_size = GLB_PRFCNT_CONFIG_SIZE_GET( + input_page_read(iface->input, GLB_PRFCNT_CONFIG)); + u32 insert_index = output_page_read(iface->output, GLB_PRFCNT_INSERT); + + const bool prev_overflow = (req ^ ack) & GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK; + const bool prev_threshold = (req ^ ack) & GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK; + + /* If ringbuffer is full toggle PRFCNT_OVERFLOW and skip sample */ + if (insert_index - extract_index >= ring_size) { + WARN_ON(insert_index - extract_index > ring_size); + if (!prev_overflow) + ack ^= GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK; + } else { + struct gpu_model_prfcnt_en enable_maps = { + .fe = input_page_read(iface->input, GLB_PRFCNT_CSF_EN), + .tiler = input_page_read(iface->input, GLB_PRFCNT_TILER_EN), + .l2 = input_page_read(iface->input, GLB_PRFCNT_MMU_L2_EN), + .shader = input_page_read(iface->input, GLB_PRFCNT_SHADER_EN), + }; + + const u64 prfcnt_base = + input_page_read(iface->input, GLB_PRFCNT_BASE_LO) + + ((u64)input_page_read(iface->input, GLB_PRFCNT_BASE_HI) << 32); + + u32 *sample_base = (u32 *)(uintptr_t)prfcnt_base + + (KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE * + (insert_index % ring_size)); + + /* trigger sample dump in the dummy model */ + gpu_model_prfcnt_dump_request(sample_base, enable_maps); + + /* increment insert index and toggle PRFCNT_SAMPLE bit in ACK */ + output_page_write(iface->output, GLB_PRFCNT_INSERT, ++insert_index); + ack ^= GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK; + } + + /* When the ringbuffer reaches 50% capacity toggle PRFCNT_THRESHOLD */ + if (!prev_threshold && (insert_index - extract_index >= (ring_size / 2))) + ack ^= GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK; + } + + /* Update GLB_ACK */ + output_page_write(iface->output, GLB_ACK, ack); +} + +void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr) +{ + WARN_ON(doorbell_nr < 0); + WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); + + if (WARN_ON(!kbdev)) + return; + + if (doorbell_nr == CSF_KERNEL_DOORBELL_NR) { + csf_doorbell_prfcnt(kbdev); + gpu_model_glb_request_job_irq(kbdev->model); + } +} +EXPORT_SYMBOL(kbase_csf_ring_doorbell); /** * handle_internal_firmware_fatal - Handler for CS internal firmware fault. @@ -560,6 +639,8 @@ dev_warn(kbdev->dev, "Timed out waiting for global request %x to complete", req_mask); err = -ETIMEDOUT; + + } return err; @@ -621,16 +702,94 @@ set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); } +static void enable_gpu_idle_timer(struct kbase_device *const kbdev) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, + kbdev->csf.gpu_idle_dur_count); + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, + GLB_REQ_IDLE_ENABLE_MASK); + dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", + kbdev->csf.gpu_idle_dur_count); +} + +static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + bool complete = false; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) == + (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask)) + complete = true; + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + return complete; +} + +static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface, + u32 const req_mask) +{ + u32 glb_debug_req; + + kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); + + glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); + glb_debug_req ^= req_mask; + + kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask); +} + +static void request_fw_core_dump( + const struct kbase_csf_global_iface *const global_iface) +{ + uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP); + + set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode); + + set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); +} + +int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) +{ + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags; + int ret; + + /* Serialize CORE_DUMP requests. */ + mutex_lock(&kbdev->csf.reg_lock); + + /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + request_fw_core_dump(global_iface); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */ + ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + if (!ret) + WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK)); + + mutex_unlock(&kbdev->csf.reg_lock); + + return ret; +} + static void global_init(struct kbase_device *const kbdev, u64 core_mask) { - u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | - GLB_ACK_IRQ_MASK_PING_MASK | - GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | - GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | - GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | - GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | - GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | - GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK; + u32 const ack_irq_mask = + GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK | + GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | + GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | + GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | + GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | GLB_REQ_DEBUG_CSF_REQ_MASK; const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -643,6 +802,12 @@ enable_shader_poweroff_timer(kbdev, global_iface); set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev)); + + /* The GPU idle timer is always enabled for simplicity. Checks will be + * done before scheduling the GPU idle worker to see if it is + * appropriate for the current power policy. + */ + enable_gpu_idle_timer(kbdev); /* Unmask the interrupts */ kbase_csf_firmware_global_input(global_iface, @@ -786,8 +951,9 @@ dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!"); spin_unlock(&kbdev->pm.clk_rtm.lock); - dev_info(kbdev->dev, "Can't get the timestamp frequency, " - "use cycle counter format with firmware idle hysteresis!"); + dev_info( + kbdev->dev, + "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!"); } /* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */ @@ -811,7 +977,14 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) { - return kbdev->csf.gpu_idle_hysteresis_ms; + unsigned long flags; + u32 dur; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + dur = kbdev->csf.gpu_idle_hysteresis_us; + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + return dur; } u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur) @@ -819,11 +992,53 @@ unsigned long flags; const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur); - kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_ms = dur; - kbdev->csf.gpu_idle_dur_count = hysteresis_val; - kbase_csf_scheduler_spin_unlock(kbdev, flags); + /* The 'fw_load_lock' is taken to synchronize against the deferred + * loading of FW, where the idle timer will be enabled. + */ + mutex_lock(&kbdev->fw_load_lock); + if (unlikely(!kbdev->csf.firmware_inited)) { + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbdev->csf.gpu_idle_hysteresis_us = dur; + kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbase_csf_scheduler_spin_unlock(kbdev, flags); + mutex_unlock(&kbdev->fw_load_lock); + goto end; + } + mutex_unlock(&kbdev->fw_load_lock); + kbase_csf_scheduler_pm_active(kbdev); + if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { + dev_err(kbdev->dev, + "Unable to activate the MCU, the idle hysteresis value shall remain unchanged"); + kbase_csf_scheduler_pm_idle(kbdev); + return kbdev->csf.gpu_idle_dur_count; + } + + /* The 'reg_lock' is also taken and is held till the update is not + * complete, to ensure the update of idle timer value by multiple Users + * gets serialized. + */ + mutex_lock(&kbdev->csf.reg_lock); + /* The firmware only reads the new idle timer value when the timer is + * disabled. + */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_csf_firmware_disable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + /* Ensure that the request has taken effect */ + wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbdev->csf.gpu_idle_hysteresis_us = dur; + kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbase_csf_firmware_enable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); + mutex_unlock(&kbdev->csf.reg_lock); + + kbase_csf_scheduler_pm_idle(kbdev); + +end: dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", hysteresis_val); @@ -832,7 +1047,6 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) { -#define PWROFF_VAL_UNIT_SHIFT (10) /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = arch_timer_get_cntfrq(); u64 dur_val = dur_us; @@ -848,8 +1062,9 @@ dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!"); spin_unlock(&kbdev->pm.clk_rtm.lock); - dev_info(kbdev->dev, "Can't get the timestamp frequency, " - "use cycle counter with MCU Core Poweroff timer!"); + dev_info( + kbdev->dev, + "Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!"); } /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */ @@ -873,7 +1088,14 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) { - return kbdev->csf.mcu_core_pwroff_dur_us; + u32 pwroff; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + pwroff = kbdev->csf.mcu_core_pwroff_dur_us; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return pwroff; } u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) @@ -886,7 +1108,7 @@ kbdev->csf.mcu_core_pwroff_dur_count = pwroff; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff); + dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff); return pwroff; } @@ -895,11 +1117,14 @@ { init_waitqueue_head(&kbdev->csf.event_wait); kbdev->csf.interrupt_received = false; - kbdev->csf.fw_timeout_ms = CSF_FIRMWARE_TIMEOUT_MS; + + kbdev->csf.fw_timeout_ms = + kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); INIT_LIST_HEAD(&kbdev->csf.firmware_config); INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); + INIT_LIST_HEAD(&kbdev->csf.user_reg.list); INIT_WORK(&kbdev->csf.firmware_reload_work, kbase_csf_firmware_reload_worker); INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); @@ -909,7 +1134,26 @@ return 0; } -int kbase_csf_firmware_init(struct kbase_device *kbdev) +void kbase_csf_firmware_early_term(struct kbase_device *kbdev) +{ + mutex_destroy(&kbdev->csf.reg_lock); +} + +int kbase_csf_firmware_late_init(struct kbase_device *kbdev) +{ + kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; +#ifdef KBASE_PM_RUNTIME + if (kbase_pm_gpu_sleep_allowed(kbdev)) + kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; +#endif + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); + kbdev->csf.gpu_idle_dur_count = + convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); + + return 0; +} + +int kbase_csf_firmware_load_init(struct kbase_device *kbdev) { int ret; @@ -927,10 +1171,6 @@ kbdev->as_free |= MCU_AS_BITMASK; return ret; } - - kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; - kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count( - kbdev, FIRMWARE_IDLE_HYSTERESIS_TIME_MS); ret = kbase_mcu_shared_interface_region_tracker_init(kbdev); if (ret != 0) { @@ -979,19 +1219,17 @@ return 0; error: - kbase_csf_firmware_term(kbdev); + kbase_csf_firmware_unload_term(kbdev); return ret; } -void kbase_csf_firmware_term(struct kbase_device *kbdev) +void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) { cancel_work_sync(&kbdev->csf.fw_error_work); kbase_csf_timeout_term(kbdev); /* NO_MALI: Don't stop firmware or unload MMU tables */ - - kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); kbase_csf_scheduler_term(kbdev); @@ -1018,44 +1256,30 @@ /* NO_MALI: No trace buffers to terminate */ -#ifndef MALI_KBASE_BUILD - mali_kutf_fw_utf_entry_cleanup(kbdev); -#endif - - mutex_destroy(&kbdev->csf.reg_lock); - /* This will also free up the region allocated for the shared interface * entry parsed from the firmware image. */ kbase_mcu_shared_interface_region_tracker_term(kbdev); + + kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); } void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) { struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; - u32 glb_req; + const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); kbase_csf_scheduler_spin_lock_assert_held(kbdev); - /* The scheduler is assumed to only call the enable when its internal * state indicates that the idle timer has previously been disabled. So * on entry the expected field values are: * 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0 * 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0 */ - - glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); if (glb_req & GLB_REQ_IDLE_ENABLE_MASK) dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!"); - kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, - kbdev->csf.gpu_idle_dur_count); - - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, - GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK); - - dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", - kbdev->csf.gpu_idle_dur_count); + enable_gpu_idle_timer(kbdev); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); } @@ -1086,8 +1310,9 @@ kbase_csf_scheduler_spin_unlock(kbdev, flags); } -int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev) +int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms) { + CSTD_UNUSED(wait_timeout_ms); kbase_csf_firmware_ping(kbdev); return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); } @@ -1119,15 +1344,23 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) { struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; - unsigned long flags; - kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_csf_scheduler_spin_lock_assert_held(kbdev); set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK); dev_dbg(kbdev->dev, "Sending request to enter protected mode"); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); - kbase_csf_scheduler_spin_unlock(kbdev, flags); +} - wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); +int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) +{ + int err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); + + if (err) { + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu(kbdev); + } + + return err; } void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) @@ -1136,11 +1369,46 @@ unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); + /* Validate there are no on-slot groups when sending the + * halt request to firmware. + */ + WARN_ON(kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev)); set_global_request(global_iface, GLB_REQ_HALT_MASK); dev_dbg(kbdev->dev, "Sending request to HALT MCU"); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); kbase_csf_scheduler_spin_unlock(kbdev, flags); } + +void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) +{ + /* Trigger the boot of MCU firmware, Use the AUTO mode as + * otherwise on fast reset, to exit protected mode, MCU will + * not reboot by itself to enter normal mode. + */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO); +} + +#ifdef KBASE_PM_RUNTIME +void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + set_global_request(global_iface, GLB_REQ_SLEEP_MASK); + dev_dbg(kbdev->dev, "Sending sleep request to MCU"); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK) && + kbase_csf_firmware_mcu_halted(kbdev)); +} +#endif int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) { @@ -1256,6 +1524,11 @@ return NULL; } +void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) +{ + kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE); +} + void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) { /* NO_MALI: Nothing to do here */ @@ -1286,7 +1559,7 @@ gpu_map_prot = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); cpu_map_prot = pgprot_writecombine(cpu_map_prot); - }; + } phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); if (!phys) @@ -1296,9 +1569,8 @@ if (!page_list) goto page_list_alloc_error; - ret = kbase_mem_pool_alloc_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - num_pages, phys, false); + ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, + phys, false, NULL); if (ret <= 0) goto phys_mem_pool_alloc_error; @@ -1309,8 +1581,8 @@ if (!cpu_addr) goto vmap_error; - va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, - num_pages, KBASE_REG_ZONE_MCU_SHARED); + va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages, + KBASE_REG_ZONE_MCU_SHARED); if (!va_reg) goto va_region_alloc_error; @@ -1324,9 +1596,9 @@ gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); gpu_map_properties |= gpu_map_prot; - ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, - va_reg->start_pfn, &phys[0], num_pages, - gpu_map_properties, KBASE_MEM_GROUP_CSF_FW); + ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, + &phys[0], num_pages, gpu_map_properties, + KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); if (ret) goto mmu_insert_pages_error; @@ -1340,7 +1612,7 @@ mmu_insert_pages_error: mutex_lock(&kbdev->csf.reg_lock); - kbase_remove_va_region(va_reg); + kbase_remove_va_region(kbdev, va_reg); va_region_add_error: kbase_free_alloced_region(va_reg); mutex_unlock(&kbdev->csf.reg_lock); @@ -1372,7 +1644,7 @@ { if (csf_mapping->va_reg) { mutex_lock(&kbdev->csf.reg_lock); - kbase_remove_va_region(csf_mapping->va_reg); + kbase_remove_va_region(kbdev, csf_mapping->va_reg); kbase_free_alloced_region(csf_mapping->va_reg); mutex_unlock(&kbdev->csf.reg_lock); } -- Gitblit v1.6.2