| .. | .. |
|---|
| 2 | 2 | #ifndef _LINUX_ENERGY_MODEL_H |
|---|
| 3 | 3 | #define _LINUX_ENERGY_MODEL_H |
|---|
| 4 | 4 | #include <linux/cpumask.h> |
|---|
| 5 | +#include <linux/device.h> |
|---|
| 5 | 6 | #include <linux/jump_label.h> |
|---|
| 6 | 7 | #include <linux/kobject.h> |
|---|
| 7 | 8 | #include <linux/rcupdate.h> |
|---|
| .. | .. |
|---|
| 9 | 10 | #include <linux/sched/topology.h> |
|---|
| 10 | 11 | #include <linux/types.h> |
|---|
| 11 | 12 | |
|---|
| 12 | | -#ifdef CONFIG_ENERGY_MODEL |
|---|
| 13 | 13 | /** |
|---|
| 14 | | - * em_cap_state - Capacity state of a performance domain |
|---|
| 15 | | - * @frequency: The CPU frequency in KHz, for consistency with CPUFreq |
|---|
| 16 | | - * @power: The power consumed by 1 CPU at this level, in milli-watts |
|---|
| 14 | + * em_perf_state - Performance state of a performance domain |
|---|
| 15 | + * @frequency: The frequency in KHz, for consistency with CPUFreq |
|---|
| 16 | + * @power: The power consumed at this level, in milli-watts (by 1 CPU or |
|---|
| 17 | + by a registered device). It can be a total power: static and |
|---|
| 18 | + dynamic. |
|---|
| 17 | 19 | * @cost: The cost coefficient associated with this level, used during |
|---|
| 18 | 20 | * energy calculation. Equal to: power * max_frequency / frequency |
|---|
| 19 | 21 | */ |
|---|
| 20 | | -struct em_cap_state { |
|---|
| 22 | +struct em_perf_state { |
|---|
| 21 | 23 | unsigned long frequency; |
|---|
| 22 | 24 | unsigned long power; |
|---|
| 23 | 25 | unsigned long cost; |
|---|
| .. | .. |
|---|
| 25 | 27 | |
|---|
| 26 | 28 | /** |
|---|
| 27 | 29 | * em_perf_domain - Performance domain |
|---|
| 28 | | - * @table: List of capacity states, in ascending order |
|---|
| 29 | | - * @nr_cap_states: Number of capacity states |
|---|
| 30 | | - * @cpus: Cpumask covering the CPUs of the domain |
|---|
| 30 | + * @table: List of performance states, in ascending order |
|---|
| 31 | + * @nr_perf_states: Number of performance states |
|---|
| 32 | + * @milliwatts: Flag indicating the power values are in milli-Watts |
|---|
| 33 | + * or some other scale. |
|---|
| 34 | + * @cpus: Cpumask covering the CPUs of the domain. It's here |
|---|
| 35 | + * for performance reasons to avoid potential cache |
|---|
| 36 | + * misses during energy calculations in the scheduler |
|---|
| 37 | + * and simplifies allocating/freeing that memory region. |
|---|
| 31 | 38 | * |
|---|
| 32 | | - * A "performance domain" represents a group of CPUs whose performance is |
|---|
| 33 | | - * scaled together. All CPUs of a performance domain must have the same |
|---|
| 34 | | - * micro-architecture. Performance domains often have a 1-to-1 mapping with |
|---|
| 35 | | - * CPUFreq policies. |
|---|
| 39 | + * In case of CPU device, a "performance domain" represents a group of CPUs |
|---|
| 40 | + * whose performance is scaled together. All CPUs of a performance domain |
|---|
| 41 | + * must have the same micro-architecture. Performance domains often have |
|---|
| 42 | + * a 1-to-1 mapping with CPUFreq policies. In case of other devices the @cpus |
|---|
| 43 | + * field is unused. |
|---|
| 36 | 44 | */ |
|---|
| 37 | 45 | struct em_perf_domain { |
|---|
| 38 | | - struct em_cap_state *table; |
|---|
| 39 | | - int nr_cap_states; |
|---|
| 40 | | - unsigned long cpus[0]; |
|---|
| 46 | + struct em_perf_state *table; |
|---|
| 47 | + int nr_perf_states; |
|---|
| 48 | + int milliwatts; |
|---|
| 49 | + unsigned long cpus[]; |
|---|
| 41 | 50 | }; |
|---|
| 42 | 51 | |
|---|
| 43 | | -#define EM_CPU_MAX_POWER 0xFFFF |
|---|
| 52 | +#define em_span_cpus(em) (to_cpumask((em)->cpus)) |
|---|
| 53 | + |
|---|
| 54 | +#ifdef CONFIG_ENERGY_MODEL |
|---|
| 55 | +#define EM_MAX_POWER 0xFFFF |
|---|
| 56 | + |
|---|
| 57 | +/* |
|---|
| 58 | + * Increase resolution of energy estimation calculations for 64-bit |
|---|
| 59 | + * architectures. The extra resolution improves decision made by EAS for the |
|---|
| 60 | + * task placement when two Performance Domains might provide similar energy |
|---|
| 61 | + * estimation values (w/o better resolution the values could be equal). |
|---|
| 62 | + * |
|---|
| 63 | + * We increase resolution only if we have enough bits to allow this increased |
|---|
| 64 | + * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit |
|---|
| 65 | + * are pretty high and the returns do not justify the increased costs. |
|---|
| 66 | + */ |
|---|
| 67 | +#ifdef CONFIG_64BIT |
|---|
| 68 | +#define em_scale_power(p) ((p) * 1000) |
|---|
| 69 | +#else |
|---|
| 70 | +#define em_scale_power(p) (p) |
|---|
| 71 | +#endif |
|---|
| 44 | 72 | |
|---|
| 45 | 73 | struct em_data_callback { |
|---|
| 46 | 74 | /** |
|---|
| 47 | | - * active_power() - Provide power at the next capacity state of a CPU |
|---|
| 48 | | - * @power : Active power at the capacity state in mW (modified) |
|---|
| 49 | | - * @freq : Frequency at the capacity state in kHz (modified) |
|---|
| 50 | | - * @cpu : CPU for which we do this operation |
|---|
| 75 | + * active_power() - Provide power at the next performance state of |
|---|
| 76 | + * a device |
|---|
| 77 | + * @power : Active power at the performance state in mW |
|---|
| 78 | + * (modified) |
|---|
| 79 | + * @freq : Frequency at the performance state in kHz |
|---|
| 80 | + * (modified) |
|---|
| 81 | + * @dev : Device for which we do this operation (can be a CPU) |
|---|
| 51 | 82 | * |
|---|
| 52 | | - * active_power() must find the lowest capacity state of 'cpu' above |
|---|
| 83 | + * active_power() must find the lowest performance state of 'dev' above |
|---|
| 53 | 84 | * 'freq' and update 'power' and 'freq' to the matching active power |
|---|
| 54 | 85 | * and frequency. |
|---|
| 55 | 86 | * |
|---|
| 56 | | - * The power is the one of a single CPU in the domain, expressed in |
|---|
| 57 | | - * milli-watts. It is expected to fit in the [0, EM_CPU_MAX_POWER] |
|---|
| 58 | | - * range. |
|---|
| 87 | + * In case of CPUs, the power is the one of a single CPU in the domain, |
|---|
| 88 | + * expressed in milli-watts. It is expected to fit in the |
|---|
| 89 | + * [0, EM_MAX_POWER] range. |
|---|
| 59 | 90 | * |
|---|
| 60 | 91 | * Return 0 on success. |
|---|
| 61 | 92 | */ |
|---|
| 62 | | - int (*active_power)(unsigned long *power, unsigned long *freq, int cpu); |
|---|
| 93 | + int (*active_power)(unsigned long *power, unsigned long *freq, |
|---|
| 94 | + struct device *dev); |
|---|
| 63 | 95 | }; |
|---|
| 64 | 96 | #define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb } |
|---|
| 65 | 97 | |
|---|
| 66 | 98 | struct em_perf_domain *em_cpu_get(int cpu); |
|---|
| 67 | | -int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, |
|---|
| 68 | | - struct em_data_callback *cb); |
|---|
| 99 | +struct em_perf_domain *em_pd_get(struct device *dev); |
|---|
| 100 | +int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, |
|---|
| 101 | + struct em_data_callback *cb, cpumask_t *span, |
|---|
| 102 | + bool milliwatts); |
|---|
| 103 | +void em_dev_unregister_perf_domain(struct device *dev); |
|---|
| 69 | 104 | |
|---|
| 70 | 105 | /** |
|---|
| 71 | | - * em_pd_energy() - Estimates the energy consumed by the CPUs of a perf. domain |
|---|
| 106 | + * em_cpu_energy() - Estimates the energy consumed by the CPUs of a |
|---|
| 107 | + performance domain |
|---|
| 72 | 108 | * @pd : performance domain for which energy has to be estimated |
|---|
| 73 | 109 | * @max_util : highest utilization among CPUs of the domain |
|---|
| 74 | 110 | * @sum_util : sum of the utilization of all CPUs in the domain |
|---|
| 75 | 111 | * |
|---|
| 112 | + * This function must be used only for CPU devices. There is no validation, |
|---|
| 113 | + * i.e. if the EM is a CPU type and has cpumask allocated. It is called from |
|---|
| 114 | + * the scheduler code quite frequently and that is why there is not checks. |
|---|
| 115 | + * |
|---|
| 76 | 116 | * Return: the sum of the energy consumed by the CPUs of the domain assuming |
|---|
| 77 | 117 | * a capacity state satisfying the max utilization of the domain. |
|---|
| 78 | 118 | */ |
|---|
| 79 | | -static inline unsigned long em_pd_energy(struct em_perf_domain *pd, |
|---|
| 119 | +static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, |
|---|
| 80 | 120 | unsigned long max_util, unsigned long sum_util) |
|---|
| 81 | 121 | { |
|---|
| 82 | 122 | unsigned long freq, scale_cpu; |
|---|
| 83 | | - struct em_cap_state *cs; |
|---|
| 123 | + struct em_perf_state *ps; |
|---|
| 84 | 124 | int i, cpu; |
|---|
| 85 | 125 | |
|---|
| 86 | | - /* |
|---|
| 87 | | - * In order to predict the capacity state, map the utilization of the |
|---|
| 88 | | - * most utilized CPU of the performance domain to a requested frequency, |
|---|
| 89 | | - * like schedutil. |
|---|
| 90 | | - */ |
|---|
| 91 | | - cpu = cpumask_first(to_cpumask(pd->cpus)); |
|---|
| 92 | | - scale_cpu = arch_scale_cpu_capacity(NULL, cpu); |
|---|
| 93 | | - cs = &pd->table[pd->nr_cap_states - 1]; |
|---|
| 94 | | - freq = map_util_freq(max_util, cs->frequency, scale_cpu); |
|---|
| 126 | + if (!sum_util) |
|---|
| 127 | + return 0; |
|---|
| 95 | 128 | |
|---|
| 96 | 129 | /* |
|---|
| 97 | | - * Find the lowest capacity state of the Energy Model above the |
|---|
| 130 | + * In order to predict the performance state, map the utilization of |
|---|
| 131 | + * the most utilized CPU of the performance domain to a requested |
|---|
| 132 | + * frequency, like schedutil. |
|---|
| 133 | + */ |
|---|
| 134 | + cpu = cpumask_first(to_cpumask(pd->cpus)); |
|---|
| 135 | + scale_cpu = arch_scale_cpu_capacity(cpu); |
|---|
| 136 | + ps = &pd->table[pd->nr_perf_states - 1]; |
|---|
| 137 | + freq = map_util_freq(max_util, ps->frequency, scale_cpu); |
|---|
| 138 | + |
|---|
| 139 | + /* |
|---|
| 140 | + * Find the lowest performance state of the Energy Model above the |
|---|
| 98 | 141 | * requested frequency. |
|---|
| 99 | 142 | */ |
|---|
| 100 | | - for (i = 0; i < pd->nr_cap_states; i++) { |
|---|
| 101 | | - cs = &pd->table[i]; |
|---|
| 102 | | - if (cs->frequency >= freq) |
|---|
| 143 | + for (i = 0; i < pd->nr_perf_states; i++) { |
|---|
| 144 | + ps = &pd->table[i]; |
|---|
| 145 | + if (ps->frequency >= freq) |
|---|
| 103 | 146 | break; |
|---|
| 104 | 147 | } |
|---|
| 105 | 148 | |
|---|
| 106 | 149 | /* |
|---|
| 107 | | - * The capacity of a CPU in the domain at that capacity state (cs) |
|---|
| 150 | + * The capacity of a CPU in the domain at the performance state (ps) |
|---|
| 108 | 151 | * can be computed as: |
|---|
| 109 | 152 | * |
|---|
| 110 | | - * cs->freq * scale_cpu |
|---|
| 111 | | - * cs->cap = -------------------- (1) |
|---|
| 153 | + * ps->freq * scale_cpu |
|---|
| 154 | + * ps->cap = -------------------- (1) |
|---|
| 112 | 155 | * cpu_max_freq |
|---|
| 113 | 156 | * |
|---|
| 114 | 157 | * So, ignoring the costs of idle states (which are not available in |
|---|
| 115 | | - * the EM), the energy consumed by this CPU at that capacity state is |
|---|
| 116 | | - * estimated as: |
|---|
| 158 | + * the EM), the energy consumed by this CPU at that performance state |
|---|
| 159 | + * is estimated as: |
|---|
| 117 | 160 | * |
|---|
| 118 | | - * cs->power * cpu_util |
|---|
| 161 | + * ps->power * cpu_util |
|---|
| 119 | 162 | * cpu_nrg = -------------------- (2) |
|---|
| 120 | | - * cs->cap |
|---|
| 163 | + * ps->cap |
|---|
| 121 | 164 | * |
|---|
| 122 | | - * since 'cpu_util / cs->cap' represents its percentage of busy time. |
|---|
| 165 | + * since 'cpu_util / ps->cap' represents its percentage of busy time. |
|---|
| 123 | 166 | * |
|---|
| 124 | 167 | * NOTE: Although the result of this computation actually is in |
|---|
| 125 | 168 | * units of power, it can be manipulated as an energy value |
|---|
| .. | .. |
|---|
| 129 | 172 | * By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product |
|---|
| 130 | 173 | * of two terms: |
|---|
| 131 | 174 | * |
|---|
| 132 | | - * cs->power * cpu_max_freq cpu_util |
|---|
| 175 | + * ps->power * cpu_max_freq cpu_util |
|---|
| 133 | 176 | * cpu_nrg = ------------------------ * --------- (3) |
|---|
| 134 | | - * cs->freq scale_cpu |
|---|
| 177 | + * ps->freq scale_cpu |
|---|
| 135 | 178 | * |
|---|
| 136 | | - * The first term is static, and is stored in the em_cap_state struct |
|---|
| 137 | | - * as 'cs->cost'. |
|---|
| 179 | + * The first term is static, and is stored in the em_perf_state struct |
|---|
| 180 | + * as 'ps->cost'. |
|---|
| 138 | 181 | * |
|---|
| 139 | 182 | * Since all CPUs of the domain have the same micro-architecture, they |
|---|
| 140 | | - * share the same 'cs->cost', and the same CPU capacity. Hence, the |
|---|
| 183 | + * share the same 'ps->cost', and the same CPU capacity. Hence, the |
|---|
| 141 | 184 | * total energy of the domain (which is the simple sum of the energy of |
|---|
| 142 | 185 | * all of its CPUs) can be factorized as: |
|---|
| 143 | 186 | * |
|---|
| 144 | | - * cs->cost * \Sum cpu_util |
|---|
| 187 | + * ps->cost * \Sum cpu_util |
|---|
| 145 | 188 | * pd_nrg = ------------------------ (4) |
|---|
| 146 | 189 | * scale_cpu |
|---|
| 147 | 190 | */ |
|---|
| 148 | | - return cs->cost * sum_util / scale_cpu; |
|---|
| 191 | + return ps->cost * sum_util / scale_cpu; |
|---|
| 149 | 192 | } |
|---|
| 150 | 193 | |
|---|
| 151 | 194 | /** |
|---|
| 152 | | - * em_pd_nr_cap_states() - Get the number of capacity states of a perf. domain |
|---|
| 195 | + * em_pd_nr_perf_states() - Get the number of performance states of a perf. |
|---|
| 196 | + * domain |
|---|
| 153 | 197 | * @pd : performance domain for which this must be done |
|---|
| 154 | 198 | * |
|---|
| 155 | | - * Return: the number of capacity states in the performance domain table |
|---|
| 199 | + * Return: the number of performance states in the performance domain table |
|---|
| 156 | 200 | */ |
|---|
| 157 | | -static inline int em_pd_nr_cap_states(struct em_perf_domain *pd) |
|---|
| 201 | +static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) |
|---|
| 158 | 202 | { |
|---|
| 159 | | - return pd->nr_cap_states; |
|---|
| 203 | + return pd->nr_perf_states; |
|---|
| 160 | 204 | } |
|---|
| 161 | 205 | |
|---|
| 162 | 206 | #else |
|---|
| 163 | | -struct em_perf_domain {}; |
|---|
| 164 | 207 | struct em_data_callback {}; |
|---|
| 165 | 208 | #define EM_DATA_CB(_active_power_cb) { } |
|---|
| 166 | 209 | |
|---|
| 167 | | -static inline int em_register_perf_domain(cpumask_t *span, |
|---|
| 168 | | - unsigned int nr_states, struct em_data_callback *cb) |
|---|
| 210 | +static inline |
|---|
| 211 | +int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, |
|---|
| 212 | + struct em_data_callback *cb, cpumask_t *span, |
|---|
| 213 | + bool milliwatts) |
|---|
| 169 | 214 | { |
|---|
| 170 | 215 | return -EINVAL; |
|---|
| 216 | +} |
|---|
| 217 | +static inline void em_dev_unregister_perf_domain(struct device *dev) |
|---|
| 218 | +{ |
|---|
| 171 | 219 | } |
|---|
| 172 | 220 | static inline struct em_perf_domain *em_cpu_get(int cpu) |
|---|
| 173 | 221 | { |
|---|
| 174 | 222 | return NULL; |
|---|
| 175 | 223 | } |
|---|
| 176 | | -static inline unsigned long em_pd_energy(struct em_perf_domain *pd, |
|---|
| 224 | +static inline struct em_perf_domain *em_pd_get(struct device *dev) |
|---|
| 225 | +{ |
|---|
| 226 | + return NULL; |
|---|
| 227 | +} |
|---|
| 228 | +static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, |
|---|
| 177 | 229 | unsigned long max_util, unsigned long sum_util) |
|---|
| 178 | 230 | { |
|---|
| 179 | 231 | return 0; |
|---|
| 180 | 232 | } |
|---|
| 181 | | -static inline int em_pd_nr_cap_states(struct em_perf_domain *pd) |
|---|
| 233 | +static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) |
|---|
| 182 | 234 | { |
|---|
| 183 | 235 | return 0; |
|---|
| 184 | 236 | } |
|---|