/*
|
* Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved.
|
*
|
* This program is free software and is provided to you under the terms of the GNU General Public License version 2
|
* as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
|
*
|
* A copy of the licence is included with the program, and can also be obtained from Free Software
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
*/
|
|
#include "mali_kernel_utilization.h"
|
#include "mali_osk.h"
|
#include "mali_osk_mali.h"
|
#include "mali_kernel_common.h"
|
#include "mali_session.h"
|
#include "mali_scheduler.h"
|
|
#include "mali_executor.h"
|
#include "mali_dvfs_policy.h"
|
#include "mali_control_timer.h"
|
|
/* Thresholds for GP bound detection. */
|
#define MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD 240
|
#define MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD 250
|
|
static _mali_osk_spinlock_irq_t *utilization_data_lock;
|
|
static u32 num_running_gp_cores = 0;
|
static u32 num_running_pp_cores = 0;
|
|
static u64 work_start_time_gpu = 0;
|
static u64 work_start_time_gp = 0;
|
static u64 work_start_time_pp = 0;
|
static u64 accumulated_work_time_gpu = 0;
|
static u64 accumulated_work_time_gp = 0;
|
static u64 accumulated_work_time_pp = 0;
|
|
static u32 last_utilization_gpu = 0 ;
|
static u32 last_utilization_gp = 0 ;
|
static u32 last_utilization_pp = 0 ;
|
|
void (*mali_utilization_callback)(struct mali_gpu_utilization_data *data) = NULL;
|
|
/* Define the first timer control timer timeout in milliseconds */
|
static u32 mali_control_first_timeout = 100;
|
static struct mali_gpu_utilization_data mali_util_data = {0, };
|
|
struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer)
|
{
|
u64 time_now;
|
u32 leading_zeroes;
|
u32 shift_val;
|
u32 work_normalized_gpu;
|
u32 work_normalized_gp;
|
u32 work_normalized_pp;
|
u32 period_normalized;
|
u32 utilization_gpu;
|
u32 utilization_gp;
|
u32 utilization_pp;
|
|
mali_utilization_data_lock();
|
|
time_now = _mali_osk_time_get_ns();
|
|
*time_period = time_now - *start_time;
|
|
if (accumulated_work_time_gpu == 0 && work_start_time_gpu == 0) {
|
mali_control_timer_pause();
|
/*
|
* No work done for this period
|
* - No need to reschedule timer
|
* - Report zero usage
|
*/
|
last_utilization_gpu = 0;
|
last_utilization_gp = 0;
|
last_utilization_pp = 0;
|
|
mali_util_data.utilization_gpu = last_utilization_gpu;
|
mali_util_data.utilization_gp = last_utilization_gp;
|
mali_util_data.utilization_pp = last_utilization_pp;
|
|
mali_utilization_data_unlock();
|
|
*need_add_timer = MALI_FALSE;
|
|
mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
|
|
MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
|
MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
|
MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
|
|
return &mali_util_data;
|
}
|
|
/* If we are currently busy, update working period up to now */
|
if (work_start_time_gpu != 0) {
|
accumulated_work_time_gpu += (time_now - work_start_time_gpu);
|
work_start_time_gpu = time_now;
|
|
/* GP and/or PP will also be busy if the GPU is busy at this point */
|
|
if (work_start_time_gp != 0) {
|
accumulated_work_time_gp += (time_now - work_start_time_gp);
|
work_start_time_gp = time_now;
|
}
|
|
if (work_start_time_pp != 0) {
|
accumulated_work_time_pp += (time_now - work_start_time_pp);
|
work_start_time_pp = time_now;
|
}
|
}
|
|
/*
|
* We have two 64-bit values, a dividend and a divisor.
|
* To avoid dependencies to a 64-bit divider, we shift down the two values
|
* equally first.
|
* We shift the dividend up and possibly the divisor down, making the result X in 256.
|
*/
|
|
/* Shift the 64-bit values down so they fit inside a 32-bit integer */
|
leading_zeroes = _mali_osk_clz((u32)(*time_period >> 32));
|
shift_val = 32 - leading_zeroes;
|
work_normalized_gpu = (u32)(accumulated_work_time_gpu >> shift_val);
|
work_normalized_gp = (u32)(accumulated_work_time_gp >> shift_val);
|
work_normalized_pp = (u32)(accumulated_work_time_pp >> shift_val);
|
period_normalized = (u32)(*time_period >> shift_val);
|
|
/*
|
* Now, we should report the usage in parts of 256
|
* this means we must shift up the dividend or down the divisor by 8
|
* (we could do a combination, but we just use one for simplicity,
|
* but the end result should be good enough anyway)
|
*/
|
if (period_normalized > 0x00FFFFFF) {
|
/* The divisor is so big that it is safe to shift it down */
|
period_normalized >>= 8;
|
} else {
|
/*
|
* The divisor is so small that we can shift up the dividend, without loosing any data.
|
* (dividend is always smaller than the divisor)
|
*/
|
work_normalized_gpu <<= 8;
|
work_normalized_gp <<= 8;
|
work_normalized_pp <<= 8;
|
}
|
|
utilization_gpu = work_normalized_gpu / period_normalized;
|
utilization_gp = work_normalized_gp / period_normalized;
|
utilization_pp = work_normalized_pp / period_normalized;
|
|
last_utilization_gpu = utilization_gpu;
|
last_utilization_gp = utilization_gp;
|
last_utilization_pp = utilization_pp;
|
|
if ((MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD < last_utilization_gp) &&
|
(MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD > last_utilization_pp)) {
|
mali_executor_hint_enable(MALI_EXECUTOR_HINT_GP_BOUND);
|
} else {
|
mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
|
}
|
|
/* starting a new period */
|
accumulated_work_time_gpu = 0;
|
accumulated_work_time_gp = 0;
|
accumulated_work_time_pp = 0;
|
|
*start_time = time_now;
|
|
mali_util_data.utilization_gp = last_utilization_gp;
|
mali_util_data.utilization_gpu = last_utilization_gpu;
|
mali_util_data.utilization_pp = last_utilization_pp;
|
|
mali_utilization_data_unlock();
|
|
*need_add_timer = MALI_TRUE;
|
|
MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
|
MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
|
MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
|
|
return &mali_util_data;
|
}
|
|
_mali_osk_errcode_t mali_utilization_init(void)
|
{
|
#if USING_GPU_UTILIZATION
|
_mali_osk_device_data data;
|
|
if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
|
if (NULL != data.utilization_callback) {
|
mali_utilization_callback = data.utilization_callback;
|
MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Utilization handler installed \n"));
|
}
|
}
|
#endif /* defined(USING_GPU_UTILIZATION) */
|
|
if (NULL == mali_utilization_callback) {
|
MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: No platform utilization handler installed\n"));
|
}
|
|
utilization_data_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_UTILIZATION);
|
if (NULL == utilization_data_lock) {
|
return _MALI_OSK_ERR_FAULT;
|
}
|
|
num_running_gp_cores = 0;
|
num_running_pp_cores = 0;
|
|
return _MALI_OSK_ERR_OK;
|
}
|
|
void mali_utilization_term(void)
|
{
|
if (NULL != utilization_data_lock) {
|
_mali_osk_spinlock_irq_term(utilization_data_lock);
|
}
|
}
|
|
void mali_utilization_gp_start(void)
|
{
|
mali_utilization_data_lock();
|
|
++num_running_gp_cores;
|
if (1 == num_running_gp_cores) {
|
u64 time_now = _mali_osk_time_get_ns();
|
|
/* First GP core started, consider GP busy from now and onwards */
|
work_start_time_gp = time_now;
|
|
if (0 == num_running_pp_cores) {
|
mali_bool is_resume = MALI_FALSE;
|
/*
|
* There are no PP cores running, so this is also the point
|
* at which we consider the GPU to be busy as well.
|
*/
|
work_start_time_gpu = time_now;
|
|
is_resume = mali_control_timer_resume(time_now);
|
|
mali_utilization_data_unlock();
|
|
if (is_resume) {
|
/* Do some policy in new period for performance consideration */
|
#if defined(CONFIG_MALI_DVFS)
|
/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
|
mali_session_max_window_num();
|
if (0 == last_utilization_gpu) {
|
/*
|
* for mali_dev_pause is called in set clock,
|
* so each time we change clock, we will set clock to
|
* highest step even if under down clock case,
|
* it is not nessesary, so we only set the clock under
|
* last time utilization equal 0, we stop the timer then
|
* start the GPU again case
|
*/
|
mali_dvfs_policy_new_period();
|
}
|
#endif
|
/*
|
* First timeout using short interval for power consideration
|
* because we give full power in the new period, but if the
|
* job loading is light, finish in 10ms, the other time all keep
|
* in high freq it will wast time.
|
*/
|
mali_control_timer_add(mali_control_first_timeout);
|
}
|
} else {
|
mali_utilization_data_unlock();
|
}
|
|
} else {
|
/* Nothing to do */
|
mali_utilization_data_unlock();
|
}
|
}
|
|
void mali_utilization_pp_start(void)
|
{
|
mali_utilization_data_lock();
|
|
++num_running_pp_cores;
|
if (1 == num_running_pp_cores) {
|
u64 time_now = _mali_osk_time_get_ns();
|
|
/* First PP core started, consider PP busy from now and onwards */
|
work_start_time_pp = time_now;
|
|
if (0 == num_running_gp_cores) {
|
mali_bool is_resume = MALI_FALSE;
|
/*
|
* There are no GP cores running, so this is also the point
|
* at which we consider the GPU to be busy as well.
|
*/
|
work_start_time_gpu = time_now;
|
|
/* Start a new period if stoped */
|
is_resume = mali_control_timer_resume(time_now);
|
|
mali_utilization_data_unlock();
|
|
if (is_resume) {
|
#if defined(CONFIG_MALI_DVFS)
|
/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
|
mali_session_max_window_num();
|
if (0 == last_utilization_gpu) {
|
/*
|
* for mali_dev_pause is called in set clock,
|
* so each time we change clock, we will set clock to
|
* highest step even if under down clock case,
|
* it is not nessesary, so we only set the clock under
|
* last time utilization equal 0, we stop the timer then
|
* start the GPU again case
|
*/
|
mali_dvfs_policy_new_period();
|
}
|
#endif
|
|
/*
|
* First timeout using short interval for power consideration
|
* because we give full power in the new period, but if the
|
* job loading is light, finish in 10ms, the other time all keep
|
* in high freq it will wast time.
|
*/
|
mali_control_timer_add(mali_control_first_timeout);
|
}
|
} else {
|
mali_utilization_data_unlock();
|
}
|
} else {
|
/* Nothing to do */
|
mali_utilization_data_unlock();
|
}
|
}
|
|
void mali_utilization_gp_end(void)
|
{
|
mali_utilization_data_lock();
|
|
--num_running_gp_cores;
|
if (0 == num_running_gp_cores) {
|
u64 time_now = _mali_osk_time_get_ns();
|
|
/* Last GP core ended, consider GP idle from now and onwards */
|
accumulated_work_time_gp += (time_now - work_start_time_gp);
|
work_start_time_gp = 0;
|
|
if (0 == num_running_pp_cores) {
|
/*
|
* There are no PP cores running, so this is also the point
|
* at which we consider the GPU to be idle as well.
|
*/
|
accumulated_work_time_gpu += (time_now - work_start_time_gpu);
|
work_start_time_gpu = 0;
|
}
|
}
|
|
mali_utilization_data_unlock();
|
}
|
|
void mali_utilization_pp_end(void)
|
{
|
mali_utilization_data_lock();
|
|
--num_running_pp_cores;
|
if (0 == num_running_pp_cores) {
|
u64 time_now = _mali_osk_time_get_ns();
|
|
/* Last PP core ended, consider PP idle from now and onwards */
|
accumulated_work_time_pp += (time_now - work_start_time_pp);
|
work_start_time_pp = 0;
|
|
if (0 == num_running_gp_cores) {
|
/*
|
* There are no GP cores running, so this is also the point
|
* at which we consider the GPU to be idle as well.
|
*/
|
accumulated_work_time_gpu += (time_now - work_start_time_gpu);
|
work_start_time_gpu = 0;
|
}
|
}
|
|
mali_utilization_data_unlock();
|
}
|
|
mali_bool mali_utilization_enabled(void)
|
{
|
#if defined(CONFIG_MALI_DVFS)
|
return mali_dvfs_policy_enabled();
|
#else
|
return (NULL != mali_utilization_callback);
|
#endif /* defined(CONFIG_MALI_DVFS) */
|
}
|
|
void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data)
|
{
|
MALI_DEBUG_ASSERT_POINTER(mali_utilization_callback);
|
|
mali_utilization_callback(util_data);
|
}
|
|
void mali_utilization_reset(void)
|
{
|
accumulated_work_time_gpu = 0;
|
accumulated_work_time_gp = 0;
|
accumulated_work_time_pp = 0;
|
|
last_utilization_gpu = 0;
|
last_utilization_gp = 0;
|
last_utilization_pp = 0;
|
}
|
|
void mali_utilization_data_lock(void)
|
{
|
_mali_osk_spinlock_irq_lock(utilization_data_lock);
|
}
|
|
void mali_utilization_data_unlock(void)
|
{
|
_mali_osk_spinlock_irq_unlock(utilization_data_lock);
|
}
|
|
void mali_utilization_data_assert_locked(void)
|
{
|
MALI_DEBUG_ASSERT_LOCK_HELD(utilization_data_lock);
|
}
|
|
u32 _mali_ukk_utilization_gp_pp(void)
|
{
|
return last_utilization_gpu;
|
}
|
|
u32 _mali_ukk_utilization_gp(void)
|
{
|
return last_utilization_gp;
|
}
|
|
u32 _mali_ukk_utilization_pp(void)
|
{
|
return last_utilization_pp;
|
}
|