// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
/*
|
*
|
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
|
*
|
* This program is free software and is provided to you under the terms of the
|
* GNU General Public License version 2 as published by the Free Software
|
* Foundation, and any use by you of this program is subject to the terms
|
* of such GNU license.
|
*
|
* This program is distributed in the hope that it will be useful,
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* GNU General Public License for more details.
|
*
|
* You should have received a copy of the GNU General Public License
|
* along with this program; if not, you can access it online at
|
* http://www.gnu.org/licenses/gpl-2.0.html.
|
*
|
*/
|
|
#include "mali_kbase_ipa_counter_common_csf.h"
|
#include "ipa/mali_kbase_ipa_debugfs.h"
|
|
#define DEFAULT_SCALING_FACTOR 5
|
|
/* If the value of GPU_ACTIVE is below this, use the simple model
|
* instead, to avoid extrapolating small amounts of counter data across
|
* large sample periods.
|
*/
|
#define DEFAULT_MIN_SAMPLE_CYCLES 10000
|
|
/* Typical value for the sampling interval is expected to be less than 100ms,
|
* So 5 seconds is a reasonable upper limit for the time gap between the
|
* 2 samples.
|
*/
|
#define MAX_SAMPLE_INTERVAL_MS ((s64)5000)
|
|
/* Maximum increment that is expected for a counter value during a sampling
|
* interval is derived assuming
|
* - max sampling interval of 1 second.
|
* - max GPU frequency of 2 GHz.
|
* - max number of cores as 32.
|
* - max increment of 4 in per core counter value at every clock cycle.
|
*
|
* So max increment = 2 * 10^9 * 32 * 4 = ~2^38.
|
* If a counter increases by an amount greater than this value, then an error
|
* will be returned and the simple power model will be used.
|
*/
|
#define MAX_COUNTER_INCREMENT (((u64)1 << 38) - 1)
|
|
static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
|
{
|
s64 rtn;
|
|
if (a > 0 && (S64_MAX - a) < b)
|
rtn = S64_MAX;
|
else if (a < 0 && (S64_MIN - a) > b)
|
rtn = S64_MIN;
|
else
|
rtn = a + b;
|
|
return rtn;
|
}
|
|
static s64 kbase_ipa_group_energy(s32 coeff, u64 counter_value)
|
{
|
/* Range: 0 < counter_value < 2^38 */
|
|
/* Range: -2^59 < ret < 2^59 (as -2^21 < coeff < 2^21) */
|
return counter_value * (s64)coeff;
|
}
|
|
/**
|
* kbase_ipa_attach_ipa_control() - register with kbase_ipa_control
|
* @model_data: Pointer to counter model data
|
*
|
* Register IPA counter model as a client of kbase_ipa_control, which
|
* provides an interface to retreive the accumulated value of hardware
|
* counters to calculate energy consumption.
|
*
|
* Return: 0 on success, or an error code.
|
*/
|
static int
|
kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data)
|
{
|
struct kbase_device *kbdev = model_data->kbdev;
|
struct kbase_ipa_control_perf_counter *perf_counters;
|
u32 cnt_idx = 0;
|
int err;
|
size_t i;
|
|
/* Value for GPU_ACTIVE counter also needs to be queried. It is required
|
* for the normalization of top-level and shader core counters.
|
*/
|
model_data->num_counters = 1 + model_data->num_top_level_cntrs +
|
model_data->num_shader_cores_cntrs;
|
|
perf_counters = kcalloc(model_data->num_counters,
|
sizeof(*perf_counters), GFP_KERNEL);
|
|
if (!perf_counters) {
|
dev_err(kbdev->dev,
|
"Failed to allocate memory for perf_counters array");
|
return -ENOMEM;
|
}
|
|
/* Fill in the description for GPU_ACTIVE counter which is always
|
* needed, as mentioned above, regardless of the energy model used
|
* by the CSF GPUs.
|
*/
|
perf_counters[cnt_idx].type = KBASE_IPA_CORE_TYPE_CSHW;
|
perf_counters[cnt_idx].idx = GPU_ACTIVE_CNT_IDX;
|
perf_counters[cnt_idx].gpu_norm = false;
|
perf_counters[cnt_idx].scaling_factor = 1;
|
cnt_idx++;
|
|
for (i = 0; i < model_data->num_top_level_cntrs; ++i) {
|
const struct kbase_ipa_counter *counter =
|
&model_data->top_level_cntrs_def[i];
|
|
perf_counters[cnt_idx].type = counter->counter_block_type;
|
perf_counters[cnt_idx].idx = counter->counter_block_offset;
|
perf_counters[cnt_idx].gpu_norm = false;
|
perf_counters[cnt_idx].scaling_factor = 1;
|
cnt_idx++;
|
}
|
|
for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) {
|
const struct kbase_ipa_counter *counter =
|
&model_data->shader_cores_cntrs_def[i];
|
|
perf_counters[cnt_idx].type = counter->counter_block_type;
|
perf_counters[cnt_idx].idx = counter->counter_block_offset;
|
perf_counters[cnt_idx].gpu_norm = false;
|
perf_counters[cnt_idx].scaling_factor = 1;
|
cnt_idx++;
|
}
|
|
err = kbase_ipa_control_register(kbdev, perf_counters,
|
model_data->num_counters,
|
&model_data->ipa_control_client);
|
if (err)
|
dev_err(kbdev->dev,
|
"Failed to register IPA with kbase_ipa_control");
|
|
kfree(perf_counters);
|
return err;
|
}
|
|
/**
|
* kbase_ipa_detach_ipa_control() - De-register from kbase_ipa_control.
|
* @model_data: Pointer to counter model data
|
*/
|
static void
|
kbase_ipa_detach_ipa_control(struct kbase_ipa_counter_model_data *model_data)
|
{
|
if (model_data->ipa_control_client) {
|
kbase_ipa_control_unregister(model_data->kbdev,
|
model_data->ipa_control_client);
|
model_data->ipa_control_client = NULL;
|
}
|
}
|
|
static int calculate_coeff(struct kbase_ipa_counter_model_data *model_data,
|
const struct kbase_ipa_counter *const cnt_defs,
|
size_t num_counters, s32 *counter_coeffs,
|
u64 *counter_values, u32 active_cycles, u32 *coeffp)
|
{
|
u64 coeff = 0, coeff_mul = 0;
|
s64 total_energy = 0;
|
size_t i;
|
|
/* Range for the 'counter_value' is [0, 2^38)
|
* Range for the 'coeff' is [-2^21, 2^21]
|
* So range for the 'group_energy' is [-2^59, 2^59) and range for the
|
* 'total_energy' is +/- 2^59 * number of IPA groups (~16), i.e.
|
* [-2^63, 2^63).
|
*/
|
for (i = 0; i < num_counters; i++) {
|
s32 coeff = counter_coeffs[i];
|
u64 counter_value = counter_values[i];
|
s64 group_energy = kbase_ipa_group_energy(coeff, counter_value);
|
|
if (counter_value > MAX_COUNTER_INCREMENT) {
|
dev_warn(model_data->kbdev->dev,
|
"Increment in counter %s more than expected",
|
cnt_defs[i].name);
|
return -ERANGE;
|
}
|
|
total_energy =
|
kbase_ipa_add_saturate(total_energy, group_energy);
|
}
|
|
/* Range: 0 <= coeff < 2^63 */
|
if (total_energy >= 0)
|
coeff = total_energy;
|
else
|
dev_dbg(model_data->kbdev->dev,
|
"Energy value came negative as %lld", total_energy);
|
|
/* Range: 0 <= coeff < 2^63 (because active_cycles >= 1). However, this
|
* can be constrained further: the value of counters that are being
|
* used for dynamic power estimation can only increment by about 128
|
* maximum per clock cycle. This is because max number of shader
|
* cores is expected to be 32 (max number of L2 slices is expected to
|
* be 8) and some counters (per shader core) like SC_BEATS_RD_TEX_EXT &
|
* SC_EXEC_STARVE_ARITH can increment by 4 every clock cycle.
|
* Each "beat" is defined as 128 bits and each shader core can
|
* (currently) do 512 bits read and 512 bits write to/from the L2
|
* cache per cycle, so the SC_BEATS_RD_TEX_EXT counter can increment
|
* [0, 4] per shader core per cycle.
|
* We can thus write the range of 'coeff' in terms of active_cycles:
|
*
|
* coeff = SUM(coeffN * counterN * num_cores_for_counterN)
|
* coeff <= SUM(coeffN * counterN) * max_cores
|
* coeff <= num_IPA_groups * max_coeff * max_counter * max_cores
|
* (substitute max_counter = 2^2 * active_cycles)
|
* coeff <= num_IPA_groups * max_coeff * 2^2 * active_cycles * max_cores
|
* coeff <= 2^4 * 2^21 * 2^2 * active_cycles * 2^5
|
* coeff <= 2^32 * active_cycles
|
*
|
* So after the division: 0 <= coeff <= 2^32
|
*/
|
coeff = div_u64(coeff, active_cycles);
|
|
/* Not all models were derived at the same reference voltage. Voltage
|
* scaling is done by multiplying by V^2, so we need to *divide* by
|
* Vref^2 here.
|
* Range: 0 <= coeff <= 2^35
|
*/
|
coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
|
/* Range: 0 <= coeff <= 2^38 */
|
coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
|
|
/* Scale by user-specified integer factor.
|
* Range: 0 <= coeff_mul < 2^43
|
*/
|
coeff_mul = coeff * model_data->scaling_factor;
|
|
/* The power models have results with units
|
* mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this
|
* becomes fW/(Hz V^2), which are the units of coeff_mul. However,
|
* kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide
|
* by 1000.
|
* Range: 0 <= coeff_mul < 2^33
|
*/
|
coeff_mul = div_u64(coeff_mul, 1000u);
|
|
/* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
|
*coeffp = clamp(coeff_mul, (u64)0, (u64)1 << 16);
|
|
return 0;
|
}
|
|
int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
|
{
|
struct kbase_ipa_counter_model_data *model_data =
|
(struct kbase_ipa_counter_model_data *)model->model_data;
|
struct kbase_device *kbdev = model->kbdev;
|
s32 *counter_coeffs_p = model_data->counter_coeffs;
|
u64 *cnt_values_p = model_data->counter_values;
|
const u64 num_counters = model_data->num_counters;
|
u32 active_cycles;
|
ktime_t now, diff;
|
s64 diff_ms;
|
int ret;
|
|
lockdep_assert_held(&kbdev->ipa.lock);
|
|
/* The last argument is supposed to be a pointer to the location that
|
* will store the time for which GPU has been in protected mode since
|
* last query. This can be passed as NULL as counter model itself will
|
* not be used when GPU enters protected mode, as IPA is supposed to
|
* switch to the simple power model.
|
*/
|
ret = kbase_ipa_control_query(kbdev,
|
model_data->ipa_control_client,
|
cnt_values_p, num_counters, NULL);
|
if (WARN_ON(ret))
|
return ret;
|
|
now = ktime_get_raw();
|
diff = ktime_sub(now, kbdev->ipa.last_sample_time);
|
diff_ms = ktime_to_ms(diff);
|
|
kbdev->ipa.last_sample_time = now;
|
|
/* The counter values cannot be relied upon if the sampling interval was
|
* too long. Typically this will happen when the polling is started
|
* after the temperature has risen above a certain trip point. After
|
* that regular calls every 25-100 ms interval are expected.
|
*/
|
if (diff_ms > MAX_SAMPLE_INTERVAL_MS) {
|
dev_dbg(kbdev->dev,
|
"Last sample was taken %lld milli seconds ago",
|
diff_ms);
|
return -EOVERFLOW;
|
}
|
|
/* Range: 0 (GPU not used at all), to the max sampling interval, say
|
* 1 seconds, * max GPU frequency (GPU 100% utilized).
|
* 0 <= active_cycles <= 1 * ~2GHz
|
* 0 <= active_cycles < 2^31
|
*/
|
if (*cnt_values_p > U32_MAX) {
|
dev_warn(kbdev->dev,
|
"Increment in GPU_ACTIVE counter more than expected");
|
return -ERANGE;
|
}
|
|
active_cycles = (u32)*cnt_values_p;
|
|
/* If the value of the active_cycles is less than the threshold, then
|
* return an error so that IPA framework can approximate using the
|
* cached simple model results instead. This may be more accurate
|
* than extrapolating using a very small counter dump.
|
*/
|
if (active_cycles < (u32)max(model_data->min_sample_cycles, 0))
|
return -ENODATA;
|
|
/* Range: 1 <= active_cycles < 2^31 */
|
active_cycles = max(1u, active_cycles);
|
|
cnt_values_p++;
|
ret = calculate_coeff(model_data, model_data->top_level_cntrs_def,
|
model_data->num_top_level_cntrs,
|
counter_coeffs_p, cnt_values_p, active_cycles,
|
&coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]);
|
if (ret)
|
return ret;
|
|
cnt_values_p += model_data->num_top_level_cntrs;
|
counter_coeffs_p += model_data->num_top_level_cntrs;
|
ret = calculate_coeff(model_data, model_data->shader_cores_cntrs_def,
|
model_data->num_shader_cores_cntrs,
|
counter_coeffs_p, cnt_values_p, active_cycles,
|
&coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]);
|
|
return ret;
|
}
|
|
void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model)
|
{
|
struct kbase_ipa_counter_model_data *model_data =
|
(struct kbase_ipa_counter_model_data *)model->model_data;
|
u64 *cnt_values_p = model_data->counter_values;
|
const u64 num_counters = model_data->num_counters;
|
int ret;
|
|
lockdep_assert_held(&model->kbdev->ipa.lock);
|
|
ret = kbase_ipa_control_query(model->kbdev,
|
model_data->ipa_control_client,
|
cnt_values_p, num_counters, NULL);
|
WARN_ON(ret);
|
}
|
|
int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model,
|
const struct kbase_ipa_counter *top_level_cntrs_def,
|
size_t num_top_level_cntrs,
|
const struct kbase_ipa_counter *shader_cores_cntrs_def,
|
size_t num_shader_cores_cntrs,
|
s32 reference_voltage)
|
{
|
struct kbase_ipa_counter_model_data *model_data;
|
s32 *counter_coeffs_p;
|
int err = 0;
|
size_t i;
|
|
if (!model || !top_level_cntrs_def || !shader_cores_cntrs_def ||
|
!num_top_level_cntrs || !num_shader_cores_cntrs)
|
return -EINVAL;
|
|
model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
|
if (!model_data)
|
return -ENOMEM;
|
|
model_data->kbdev = model->kbdev;
|
|
model_data->top_level_cntrs_def = top_level_cntrs_def;
|
model_data->num_top_level_cntrs = num_top_level_cntrs;
|
|
model_data->shader_cores_cntrs_def = shader_cores_cntrs_def;
|
model_data->num_shader_cores_cntrs = num_shader_cores_cntrs;
|
|
model->model_data = (void *)model_data;
|
|
counter_coeffs_p = model_data->counter_coeffs;
|
|
for (i = 0; i < model_data->num_top_level_cntrs; ++i) {
|
const struct kbase_ipa_counter *counter =
|
&model_data->top_level_cntrs_def[i];
|
|
*counter_coeffs_p = counter->coeff_default_value;
|
|
err = kbase_ipa_model_add_param_s32(
|
model, counter->name, counter_coeffs_p, 1, false);
|
if (err)
|
goto exit;
|
|
counter_coeffs_p++;
|
}
|
|
for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) {
|
const struct kbase_ipa_counter *counter =
|
&model_data->shader_cores_cntrs_def[i];
|
|
*counter_coeffs_p = counter->coeff_default_value;
|
|
err = kbase_ipa_model_add_param_s32(
|
model, counter->name, counter_coeffs_p, 1, false);
|
if (err)
|
goto exit;
|
|
counter_coeffs_p++;
|
}
|
|
model_data->scaling_factor = DEFAULT_SCALING_FACTOR;
|
err = kbase_ipa_model_add_param_s32(
|
model, "scale", &model_data->scaling_factor, 1, false);
|
if (err)
|
goto exit;
|
|
model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES;
|
err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles",
|
&model_data->min_sample_cycles, 1,
|
false);
|
if (err)
|
goto exit;
|
|
model_data->reference_voltage = reference_voltage;
|
err = kbase_ipa_model_add_param_s32(model, "reference_voltage",
|
&model_data->reference_voltage, 1,
|
false);
|
if (err)
|
goto exit;
|
|
err = kbase_ipa_attach_ipa_control(model_data);
|
|
exit:
|
if (err) {
|
kbase_ipa_model_param_free_all(model);
|
kfree(model_data);
|
}
|
return err;
|
}
|
|
void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model)
|
{
|
struct kbase_ipa_counter_model_data *model_data =
|
(struct kbase_ipa_counter_model_data *)model->model_data;
|
|
kbase_ipa_detach_ipa_control(model_data);
|
kfree(model_data);
|
}
|