// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
/*
|
*
|
* (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
|
*
|
* This program is free software and is provided to you under the terms of the
|
* GNU General Public License version 2 as published by the Free Software
|
* Foundation, and any use by you of this program is subject to the terms
|
* of such GNU license.
|
*
|
* This program is distributed in the hope that it will be useful,
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* GNU General Public License for more details.
|
*
|
* You should have received a copy of the GNU General Public License
|
* along with this program; if not, you can access it online at
|
* http://www.gnu.org/licenses/gpl-2.0.html.
|
*
|
*/
|
|
#include "mali_kbase_ipa_counter_common_jm.h"
|
#include "ipa/mali_kbase_ipa_debugfs.h"
|
|
#define DEFAULT_SCALING_FACTOR 5
|
|
/* If the value of GPU_ACTIVE is below this, use the simple model
|
* instead, to avoid extrapolating small amounts of counter data across
|
* large sample periods.
|
*/
|
#define DEFAULT_MIN_SAMPLE_CYCLES 10000
|
|
/**
|
* kbase_ipa_read_hwcnt() - read a counter value
|
* @model_data: pointer to model data
|
* @offset: offset, in bytes, into vinstr buffer
|
*
|
* Return: A 32-bit counter value. Range: 0 < value < 2^27 (worst case would be
|
* incrementing every cycle over a ~100ms sample period at a high frequency,
|
* e.g. 1 GHz: 2^30 * 0.1seconds ~= 2^27.
|
*/
|
static inline u32 kbase_ipa_read_hwcnt(
|
struct kbase_ipa_model_vinstr_data *model_data,
|
u32 offset)
|
{
|
u8 *p = (u8 *)model_data->dump_buf.dump_buf;
|
u64 val = *(u64 *)&p[offset];
|
|
return (val > U32_MAX) ? U32_MAX : (u32)val;
|
}
|
|
static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
|
{
|
s64 rtn;
|
|
if (a > 0 && (S64_MAX - a) < b)
|
rtn = S64_MAX;
|
else if (a < 0 && (S64_MIN - a) > b)
|
rtn = S64_MIN;
|
else
|
rtn = a + b;
|
|
return rtn;
|
}
|
|
s64 kbase_ipa_sum_all_shader_cores(
|
struct kbase_ipa_model_vinstr_data *model_data,
|
s32 coeff, u32 counter)
|
{
|
struct kbase_device *kbdev = model_data->kbdev;
|
u64 core_mask;
|
u32 base = 0;
|
s64 ret = 0;
|
|
core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
|
while (core_mask != 0ull) {
|
if ((core_mask & 1ull) != 0ull) {
|
/* 0 < counter_value < 2^27 */
|
u32 counter_value = kbase_ipa_read_hwcnt(model_data,
|
base + counter);
|
|
/* 0 < ret < 2^27 * max_num_cores = 2^32 */
|
ret = kbase_ipa_add_saturate(ret, counter_value);
|
}
|
base += KBASE_IPA_NR_BYTES_PER_BLOCK;
|
core_mask >>= 1;
|
}
|
|
/* Range: -2^54 < ret * coeff < 2^54 */
|
return ret * coeff;
|
}
|
|
s64 kbase_ipa_sum_all_memsys_blocks(
|
struct kbase_ipa_model_vinstr_data *model_data,
|
s32 coeff, u32 counter)
|
{
|
struct kbase_device *kbdev = model_data->kbdev;
|
const u32 num_blocks = kbdev->gpu_props.props.l2_props.num_l2_slices;
|
u32 base = 0;
|
s64 ret = 0;
|
u32 i;
|
|
for (i = 0; i < num_blocks; i++) {
|
/* 0 < counter_value < 2^27 */
|
u32 counter_value = kbase_ipa_read_hwcnt(model_data,
|
base + counter);
|
|
/* 0 < ret < 2^27 * max_num_memsys_blocks = 2^29 */
|
ret = kbase_ipa_add_saturate(ret, counter_value);
|
base += KBASE_IPA_NR_BYTES_PER_BLOCK;
|
}
|
|
/* Range: -2^51 < ret * coeff < 2^51 */
|
return ret * coeff;
|
}
|
|
s64 kbase_ipa_single_counter(
|
struct kbase_ipa_model_vinstr_data *model_data,
|
s32 coeff, u32 counter)
|
{
|
/* Range: 0 < counter_value < 2^27 */
|
const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter);
|
|
/* Range: -2^49 < ret < 2^49 */
|
return counter_value * (s64) coeff;
|
}
|
|
int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
|
{
|
int errcode;
|
struct kbase_device *kbdev = model_data->kbdev;
|
struct kbase_hwcnt_virtualizer *hvirt = kbdev->hwcnt_gpu_virt;
|
struct kbase_hwcnt_enable_map enable_map;
|
const struct kbase_hwcnt_metadata *metadata =
|
kbase_hwcnt_virtualizer_metadata(hvirt);
|
|
if (!metadata)
|
return -1;
|
|
errcode = kbase_hwcnt_enable_map_alloc(metadata, &enable_map);
|
if (errcode) {
|
dev_err(kbdev->dev, "Failed to allocate IPA enable map");
|
return errcode;
|
}
|
|
kbase_hwcnt_enable_map_enable_all(&enable_map);
|
|
/* Disable cycle counter only. */
|
enable_map.clk_enable_map = 0;
|
|
errcode = kbase_hwcnt_virtualizer_client_create(
|
hvirt, &enable_map, &model_data->hvirt_cli);
|
kbase_hwcnt_enable_map_free(&enable_map);
|
if (errcode) {
|
dev_err(kbdev->dev, "Failed to register IPA with virtualizer");
|
model_data->hvirt_cli = NULL;
|
return errcode;
|
}
|
|
errcode = kbase_hwcnt_dump_buffer_alloc(
|
metadata, &model_data->dump_buf);
|
if (errcode) {
|
dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
|
kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli);
|
model_data->hvirt_cli = NULL;
|
return errcode;
|
}
|
|
return 0;
|
}
|
|
void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
|
{
|
if (model_data->hvirt_cli) {
|
kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli);
|
kbase_hwcnt_dump_buffer_free(&model_data->dump_buf);
|
model_data->hvirt_cli = NULL;
|
}
|
}
|
|
int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
|
{
|
struct kbase_ipa_model_vinstr_data *model_data =
|
(struct kbase_ipa_model_vinstr_data *)model->model_data;
|
s64 energy = 0;
|
size_t i;
|
u64 coeff = 0, coeff_mul = 0;
|
u64 start_ts_ns, end_ts_ns;
|
u32 active_cycles;
|
int err = 0;
|
|
err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli,
|
&start_ts_ns, &end_ts_ns, &model_data->dump_buf);
|
if (err)
|
goto err0;
|
|
/* Range: 0 (GPU not used at all), to the max sampling interval, say
|
* 1s, * max GPU frequency (GPU 100% utilized).
|
* 0 <= active_cycles <= 1 * ~2GHz
|
* 0 <= active_cycles < 2^31
|
*/
|
active_cycles = model_data->get_active_cycles(model_data);
|
|
if (active_cycles < (u32) max(model_data->min_sample_cycles, 0)) {
|
err = -ENODATA;
|
goto err0;
|
}
|
|
/* Range: 1 <= active_cycles < 2^31 */
|
active_cycles = max(1u, active_cycles);
|
|
/* Range of 'energy' is +/- 2^54 * number of IPA groups (~8), so around
|
* -2^57 < energy < 2^57
|
*/
|
for (i = 0; i < model_data->groups_def_num; i++) {
|
const struct kbase_ipa_group *group = &model_data->groups_def[i];
|
s32 coeff = model_data->group_values[i];
|
s64 group_energy = group->op(model_data, coeff,
|
group->counter_block_offset);
|
|
energy = kbase_ipa_add_saturate(energy, group_energy);
|
}
|
|
/* Range: 0 <= coeff < 2^57 */
|
if (energy > 0)
|
coeff = energy;
|
|
/* Range: 0 <= coeff < 2^57 (because active_cycles >= 1). However, this
|
* can be constrained further: Counter values can only be increased by
|
* a theoretical maximum of about 64k per clock cycle. Beyond this,
|
* we'd have to sample every 1ms to avoid them overflowing at the
|
* lowest clock frequency (say 100MHz). Therefore, we can write the
|
* range of 'coeff' in terms of active_cycles:
|
*
|
* coeff = SUM(coeffN * counterN * num_cores_for_counterN)
|
* coeff <= SUM(coeffN * counterN) * max_num_cores
|
* coeff <= num_IPA_groups * max_coeff * max_counter * max_num_cores
|
* (substitute max_counter = 2^16 * active_cycles)
|
* coeff <= num_IPA_groups * max_coeff * 2^16 * active_cycles * max_num_cores
|
* coeff <= 2^3 * 2^22 * 2^16 * active_cycles * 2^5
|
* coeff <= 2^46 * active_cycles
|
*
|
* So after the division: 0 <= coeff <= 2^46
|
*/
|
coeff = div_u64(coeff, active_cycles);
|
|
/* Not all models were derived at the same reference voltage. Voltage
|
* scaling is done by multiplying by V^2, so we need to *divide* by
|
* Vref^2 here.
|
* Range: 0 <= coeff <= 2^49
|
*/
|
coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
|
/* Range: 0 <= coeff <= 2^52 */
|
coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
|
|
/* Scale by user-specified integer factor.
|
* Range: 0 <= coeff_mul < 2^57
|
*/
|
coeff_mul = coeff * model_data->scaling_factor;
|
|
/* The power models have results with units
|
* mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this
|
* becomes fW/(Hz V^2), which are the units of coeff_mul. However,
|
* kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide
|
* by 1000.
|
* Range: 0 <= coeff_mul < 2^47
|
*/
|
coeff_mul = div_u64(coeff_mul, 1000u);
|
|
err0:
|
/* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
|
*coeffp = clamp(coeff_mul, (u64) 0, (u64) 1 << 16);
|
return err;
|
}
|
|
void kbase_ipa_vinstr_reset_data(struct kbase_ipa_model *model)
|
{
|
/* Currently not implemented */
|
WARN_ON_ONCE(1);
|
}
|
|
int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
|
const struct kbase_ipa_group *ipa_groups_def,
|
size_t ipa_group_size,
|
kbase_ipa_get_active_cycles_callback get_active_cycles,
|
s32 reference_voltage)
|
{
|
int err = 0;
|
size_t i;
|
struct kbase_ipa_model_vinstr_data *model_data;
|
|
if (!model || !ipa_groups_def || !ipa_group_size || !get_active_cycles)
|
return -EINVAL;
|
|
model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
|
if (!model_data)
|
return -ENOMEM;
|
|
model_data->kbdev = model->kbdev;
|
model_data->groups_def = ipa_groups_def;
|
model_data->groups_def_num = ipa_group_size;
|
model_data->get_active_cycles = get_active_cycles;
|
|
model->model_data = (void *) model_data;
|
|
for (i = 0; i < model_data->groups_def_num; ++i) {
|
const struct kbase_ipa_group *group = &model_data->groups_def[i];
|
|
model_data->group_values[i] = group->default_value;
|
err = kbase_ipa_model_add_param_s32(model, group->name,
|
&model_data->group_values[i],
|
1, false);
|
if (err)
|
goto exit;
|
}
|
|
model_data->scaling_factor = DEFAULT_SCALING_FACTOR;
|
err = kbase_ipa_model_add_param_s32(model, "scale",
|
&model_data->scaling_factor,
|
1, false);
|
if (err)
|
goto exit;
|
|
model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES;
|
err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles",
|
&model_data->min_sample_cycles,
|
1, false);
|
if (err)
|
goto exit;
|
|
model_data->reference_voltage = reference_voltage;
|
err = kbase_ipa_model_add_param_s32(model, "reference_voltage",
|
&model_data->reference_voltage,
|
1, false);
|
if (err)
|
goto exit;
|
|
err = kbase_ipa_attach_vinstr(model_data);
|
|
exit:
|
if (err) {
|
kbase_ipa_model_param_free_all(model);
|
kfree(model_data);
|
}
|
return err;
|
}
|
|
void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model)
|
{
|
struct kbase_ipa_model_vinstr_data *model_data =
|
(struct kbase_ipa_model_vinstr_data *)model->model_data;
|
|
kbase_ipa_detach_vinstr(model_data);
|
kfree(model_data);
|
}
|