.. | .. |
---|
14 | 14 | #include <linux/acpi.h> |
---|
15 | 15 | #include <linux/arch_topology.h> |
---|
16 | 16 | #include <linux/cacheinfo.h> |
---|
17 | | -#include <linux/cpu.h> |
---|
18 | | -#include <linux/cpumask.h> |
---|
| 17 | +#include <linux/cpufreq.h> |
---|
19 | 18 | #include <linux/init.h> |
---|
20 | 19 | #include <linux/percpu.h> |
---|
21 | | -#include <linux/node.h> |
---|
22 | | -#include <linux/nodemask.h> |
---|
23 | | -#include <linux/of.h> |
---|
24 | | -#include <linux/sched.h> |
---|
25 | | -#include <linux/sched/topology.h> |
---|
26 | | -#include <linux/slab.h> |
---|
27 | | -#include <linux/smp.h> |
---|
28 | | -#include <linux/string.h> |
---|
29 | 20 | |
---|
30 | 21 | #include <asm/cpu.h> |
---|
31 | 22 | #include <asm/cputype.h> |
---|
32 | 23 | #include <asm/topology.h> |
---|
33 | | - |
---|
34 | | -static int __init get_cpu_for_node(struct device_node *node) |
---|
35 | | -{ |
---|
36 | | - struct device_node *cpu_node; |
---|
37 | | - int cpu; |
---|
38 | | - |
---|
39 | | - cpu_node = of_parse_phandle(node, "cpu", 0); |
---|
40 | | - if (!cpu_node) |
---|
41 | | - return -1; |
---|
42 | | - |
---|
43 | | - cpu = of_cpu_node_to_id(cpu_node); |
---|
44 | | - if (cpu >= 0) |
---|
45 | | - topology_parse_cpu_capacity(cpu_node, cpu); |
---|
46 | | - else |
---|
47 | | - pr_crit("Unable to find CPU node for %pOF\n", cpu_node); |
---|
48 | | - |
---|
49 | | - of_node_put(cpu_node); |
---|
50 | | - return cpu; |
---|
51 | | -} |
---|
52 | | - |
---|
53 | | -static int __init parse_core(struct device_node *core, int package_id, |
---|
54 | | - int core_id) |
---|
55 | | -{ |
---|
56 | | - char name[10]; |
---|
57 | | - bool leaf = true; |
---|
58 | | - int i = 0; |
---|
59 | | - int cpu; |
---|
60 | | - struct device_node *t; |
---|
61 | | - |
---|
62 | | - do { |
---|
63 | | - snprintf(name, sizeof(name), "thread%d", i); |
---|
64 | | - t = of_get_child_by_name(core, name); |
---|
65 | | - if (t) { |
---|
66 | | - leaf = false; |
---|
67 | | - cpu = get_cpu_for_node(t); |
---|
68 | | - if (cpu >= 0) { |
---|
69 | | - cpu_topology[cpu].package_id = package_id; |
---|
70 | | - cpu_topology[cpu].core_id = core_id; |
---|
71 | | - cpu_topology[cpu].thread_id = i; |
---|
72 | | - } else { |
---|
73 | | - pr_err("%pOF: Can't get CPU for thread\n", |
---|
74 | | - t); |
---|
75 | | - of_node_put(t); |
---|
76 | | - return -EINVAL; |
---|
77 | | - } |
---|
78 | | - of_node_put(t); |
---|
79 | | - } |
---|
80 | | - i++; |
---|
81 | | - } while (t); |
---|
82 | | - |
---|
83 | | - cpu = get_cpu_for_node(core); |
---|
84 | | - if (cpu >= 0) { |
---|
85 | | - if (!leaf) { |
---|
86 | | - pr_err("%pOF: Core has both threads and CPU\n", |
---|
87 | | - core); |
---|
88 | | - return -EINVAL; |
---|
89 | | - } |
---|
90 | | - |
---|
91 | | - cpu_topology[cpu].package_id = package_id; |
---|
92 | | - cpu_topology[cpu].core_id = core_id; |
---|
93 | | - } else if (leaf) { |
---|
94 | | - pr_err("%pOF: Can't get CPU for leaf core\n", core); |
---|
95 | | - return -EINVAL; |
---|
96 | | - } |
---|
97 | | - |
---|
98 | | - return 0; |
---|
99 | | -} |
---|
100 | | - |
---|
101 | | -static int __init parse_cluster(struct device_node *cluster, int depth) |
---|
102 | | -{ |
---|
103 | | - char name[10]; |
---|
104 | | - bool leaf = true; |
---|
105 | | - bool has_cores = false; |
---|
106 | | - struct device_node *c; |
---|
107 | | - static int package_id __initdata; |
---|
108 | | - int core_id = 0; |
---|
109 | | - int i, ret; |
---|
110 | | - |
---|
111 | | - /* |
---|
112 | | - * First check for child clusters; we currently ignore any |
---|
113 | | - * information about the nesting of clusters and present the |
---|
114 | | - * scheduler with a flat list of them. |
---|
115 | | - */ |
---|
116 | | - i = 0; |
---|
117 | | - do { |
---|
118 | | - snprintf(name, sizeof(name), "cluster%d", i); |
---|
119 | | - c = of_get_child_by_name(cluster, name); |
---|
120 | | - if (c) { |
---|
121 | | - leaf = false; |
---|
122 | | - ret = parse_cluster(c, depth + 1); |
---|
123 | | - of_node_put(c); |
---|
124 | | - if (ret != 0) |
---|
125 | | - return ret; |
---|
126 | | - } |
---|
127 | | - i++; |
---|
128 | | - } while (c); |
---|
129 | | - |
---|
130 | | - /* Now check for cores */ |
---|
131 | | - i = 0; |
---|
132 | | - do { |
---|
133 | | - snprintf(name, sizeof(name), "core%d", i); |
---|
134 | | - c = of_get_child_by_name(cluster, name); |
---|
135 | | - if (c) { |
---|
136 | | - has_cores = true; |
---|
137 | | - |
---|
138 | | - if (depth == 0) { |
---|
139 | | - pr_err("%pOF: cpu-map children should be clusters\n", |
---|
140 | | - c); |
---|
141 | | - of_node_put(c); |
---|
142 | | - return -EINVAL; |
---|
143 | | - } |
---|
144 | | - |
---|
145 | | - if (leaf) { |
---|
146 | | - ret = parse_core(c, package_id, core_id++); |
---|
147 | | - } else { |
---|
148 | | - pr_err("%pOF: Non-leaf cluster with core %s\n", |
---|
149 | | - cluster, name); |
---|
150 | | - ret = -EINVAL; |
---|
151 | | - } |
---|
152 | | - |
---|
153 | | - of_node_put(c); |
---|
154 | | - if (ret != 0) |
---|
155 | | - return ret; |
---|
156 | | - } |
---|
157 | | - i++; |
---|
158 | | - } while (c); |
---|
159 | | - |
---|
160 | | - if (leaf && !has_cores) |
---|
161 | | - pr_warn("%pOF: empty cluster\n", cluster); |
---|
162 | | - |
---|
163 | | - if (leaf) |
---|
164 | | - package_id++; |
---|
165 | | - |
---|
166 | | - return 0; |
---|
167 | | -} |
---|
168 | | - |
---|
169 | | -static int __init parse_dt_topology(void) |
---|
170 | | -{ |
---|
171 | | - struct device_node *cn, *map; |
---|
172 | | - int ret = 0; |
---|
173 | | - int cpu; |
---|
174 | | - |
---|
175 | | - cn = of_find_node_by_path("/cpus"); |
---|
176 | | - if (!cn) { |
---|
177 | | - pr_err("No CPU information found in DT\n"); |
---|
178 | | - return 0; |
---|
179 | | - } |
---|
180 | | - |
---|
181 | | - /* |
---|
182 | | - * When topology is provided cpu-map is essentially a root |
---|
183 | | - * cluster with restricted subnodes. |
---|
184 | | - */ |
---|
185 | | - map = of_get_child_by_name(cn, "cpu-map"); |
---|
186 | | - if (!map) |
---|
187 | | - goto out; |
---|
188 | | - |
---|
189 | | - ret = parse_cluster(map, 0); |
---|
190 | | - if (ret != 0) |
---|
191 | | - goto out_map; |
---|
192 | | - |
---|
193 | | - topology_normalize_cpu_scale(); |
---|
194 | | - |
---|
195 | | - /* |
---|
196 | | - * Check that all cores are in the topology; the SMP code will |
---|
197 | | - * only mark cores described in the DT as possible. |
---|
198 | | - */ |
---|
199 | | - for_each_possible_cpu(cpu) |
---|
200 | | - if (cpu_topology[cpu].package_id == -1) |
---|
201 | | - ret = -EINVAL; |
---|
202 | | - |
---|
203 | | -out_map: |
---|
204 | | - of_node_put(map); |
---|
205 | | -out: |
---|
206 | | - of_node_put(cn); |
---|
207 | | - return ret; |
---|
208 | | -} |
---|
209 | | - |
---|
210 | | -/* |
---|
211 | | - * cpu topology table |
---|
212 | | - */ |
---|
213 | | -struct cpu_topology cpu_topology[NR_CPUS]; |
---|
214 | | -EXPORT_SYMBOL_GPL(cpu_topology); |
---|
215 | | - |
---|
216 | | -const struct cpumask *cpu_coregroup_mask(int cpu) |
---|
217 | | -{ |
---|
218 | | - const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); |
---|
219 | | - |
---|
220 | | - /* Find the smaller of NUMA, core or LLC siblings */ |
---|
221 | | - if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { |
---|
222 | | - /* not numa in package, lets use the package siblings */ |
---|
223 | | - core_mask = &cpu_topology[cpu].core_sibling; |
---|
224 | | - } |
---|
225 | | - if (cpu_topology[cpu].llc_id != -1) { |
---|
226 | | - if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) |
---|
227 | | - core_mask = &cpu_topology[cpu].llc_sibling; |
---|
228 | | - } |
---|
229 | | - |
---|
230 | | - return core_mask; |
---|
231 | | -} |
---|
232 | | - |
---|
233 | | -static void update_siblings_masks(unsigned int cpuid) |
---|
234 | | -{ |
---|
235 | | - struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; |
---|
236 | | - int cpu; |
---|
237 | | - |
---|
238 | | - /* update core and thread sibling masks */ |
---|
239 | | - for_each_online_cpu(cpu) { |
---|
240 | | - cpu_topo = &cpu_topology[cpu]; |
---|
241 | | - |
---|
242 | | - if (cpuid_topo->llc_id == cpu_topo->llc_id) { |
---|
243 | | - cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); |
---|
244 | | - cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); |
---|
245 | | - } |
---|
246 | | - |
---|
247 | | - if (cpuid_topo->package_id != cpu_topo->package_id) |
---|
248 | | - continue; |
---|
249 | | - |
---|
250 | | - cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); |
---|
251 | | - cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); |
---|
252 | | - |
---|
253 | | - if (cpuid_topo->core_id != cpu_topo->core_id) |
---|
254 | | - continue; |
---|
255 | | - |
---|
256 | | - cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); |
---|
257 | | - cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); |
---|
258 | | - } |
---|
259 | | -} |
---|
260 | | - |
---|
261 | | -void store_cpu_topology(unsigned int cpuid) |
---|
262 | | -{ |
---|
263 | | - struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; |
---|
264 | | - u64 mpidr; |
---|
265 | | - |
---|
266 | | - if (cpuid_topo->package_id != -1) |
---|
267 | | - goto topology_populated; |
---|
268 | | - |
---|
269 | | - mpidr = read_cpuid_mpidr(); |
---|
270 | | - |
---|
271 | | - /* Uniprocessor systems can rely on default topology values */ |
---|
272 | | - if (mpidr & MPIDR_UP_BITMASK) |
---|
273 | | - return; |
---|
274 | | - |
---|
275 | | - /* |
---|
276 | | - * This would be the place to create cpu topology based on MPIDR. |
---|
277 | | - * |
---|
278 | | - * However, it cannot be trusted to depict the actual topology; some |
---|
279 | | - * pieces of the architecture enforce an artificial cap on Aff0 values |
---|
280 | | - * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an |
---|
281 | | - * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up |
---|
282 | | - * having absolutely no relationship to the actual underlying system |
---|
283 | | - * topology, and cannot be reasonably used as core / package ID. |
---|
284 | | - * |
---|
285 | | - * If the MT bit is set, Aff0 *could* be used to define a thread ID, but |
---|
286 | | - * we still wouldn't be able to obtain a sane core ID. This means we |
---|
287 | | - * need to entirely ignore MPIDR for any topology deduction. |
---|
288 | | - */ |
---|
289 | | - cpuid_topo->thread_id = -1; |
---|
290 | | - cpuid_topo->core_id = cpuid; |
---|
291 | | - cpuid_topo->package_id = cpu_to_node(cpuid); |
---|
292 | | - |
---|
293 | | - pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", |
---|
294 | | - cpuid, cpuid_topo->package_id, cpuid_topo->core_id, |
---|
295 | | - cpuid_topo->thread_id, mpidr); |
---|
296 | | - |
---|
297 | | -topology_populated: |
---|
298 | | - update_siblings_masks(cpuid); |
---|
299 | | -} |
---|
300 | | - |
---|
301 | | -static void clear_cpu_topology(int cpu) |
---|
302 | | -{ |
---|
303 | | - struct cpu_topology *cpu_topo = &cpu_topology[cpu]; |
---|
304 | | - |
---|
305 | | - cpumask_clear(&cpu_topo->llc_sibling); |
---|
306 | | - cpumask_set_cpu(cpu, &cpu_topo->llc_sibling); |
---|
307 | | - |
---|
308 | | - cpumask_clear(&cpu_topo->core_sibling); |
---|
309 | | - cpumask_set_cpu(cpu, &cpu_topo->core_sibling); |
---|
310 | | - cpumask_clear(&cpu_topo->thread_sibling); |
---|
311 | | - cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); |
---|
312 | | -} |
---|
313 | | - |
---|
314 | | -static void __init reset_cpu_topology(void) |
---|
315 | | -{ |
---|
316 | | - unsigned int cpu; |
---|
317 | | - |
---|
318 | | - for_each_possible_cpu(cpu) { |
---|
319 | | - struct cpu_topology *cpu_topo = &cpu_topology[cpu]; |
---|
320 | | - |
---|
321 | | - cpu_topo->thread_id = -1; |
---|
322 | | - cpu_topo->core_id = 0; |
---|
323 | | - cpu_topo->package_id = -1; |
---|
324 | | - cpu_topo->llc_id = -1; |
---|
325 | | - |
---|
326 | | - clear_cpu_topology(cpu); |
---|
327 | | - } |
---|
328 | | -} |
---|
329 | | - |
---|
330 | | -void remove_cpu_topology(unsigned int cpu) |
---|
331 | | -{ |
---|
332 | | - int sibling; |
---|
333 | | - |
---|
334 | | - for_each_cpu(sibling, topology_core_cpumask(cpu)) |
---|
335 | | - cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); |
---|
336 | | - for_each_cpu(sibling, topology_sibling_cpumask(cpu)) |
---|
337 | | - cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); |
---|
338 | | - for_each_cpu(sibling, topology_llc_cpumask(cpu)) |
---|
339 | | - cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling)); |
---|
340 | | - |
---|
341 | | - clear_cpu_topology(cpu); |
---|
342 | | -} |
---|
343 | 24 | |
---|
344 | 25 | #ifdef CONFIG_ACPI |
---|
345 | 26 | static bool __init acpi_cpu_is_threaded(int cpu) |
---|
.. | .. |
---|
360 | 41 | * Propagate the topology information of the processor_topology_node tree to the |
---|
361 | 42 | * cpu_topology array. |
---|
362 | 43 | */ |
---|
363 | | -static int __init parse_acpi_topology(void) |
---|
| 44 | +int __init parse_acpi_topology(void) |
---|
364 | 45 | { |
---|
365 | 46 | int cpu, topology_id; |
---|
| 47 | + |
---|
| 48 | + if (acpi_disabled) |
---|
| 49 | + return 0; |
---|
366 | 50 | |
---|
367 | 51 | for_each_possible_cpu(cpu) { |
---|
368 | 52 | int i, cache_id; |
---|
.. | .. |
---|
397 | 81 | |
---|
398 | 82 | return 0; |
---|
399 | 83 | } |
---|
400 | | - |
---|
401 | | -#else |
---|
402 | | -static inline int __init parse_acpi_topology(void) |
---|
403 | | -{ |
---|
404 | | - return -EINVAL; |
---|
405 | | -} |
---|
406 | 84 | #endif |
---|
407 | 85 | |
---|
408 | | -void __init init_cpu_topology(void) |
---|
| 86 | +#ifdef CONFIG_ARM64_AMU_EXTN |
---|
| 87 | + |
---|
| 88 | +#undef pr_fmt |
---|
| 89 | +#define pr_fmt(fmt) "AMU: " fmt |
---|
| 90 | + |
---|
| 91 | +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale); |
---|
| 92 | +static DEFINE_PER_CPU(u64, arch_const_cycles_prev); |
---|
| 93 | +static DEFINE_PER_CPU(u64, arch_core_cycles_prev); |
---|
| 94 | +static cpumask_var_t amu_fie_cpus; |
---|
| 95 | + |
---|
| 96 | +/* Initialize counter reference per-cpu variables for the current CPU */ |
---|
| 97 | +void init_cpu_freq_invariance_counters(void) |
---|
409 | 98 | { |
---|
410 | | - reset_cpu_topology(); |
---|
| 99 | + this_cpu_write(arch_core_cycles_prev, |
---|
| 100 | + read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0)); |
---|
| 101 | + this_cpu_write(arch_const_cycles_prev, |
---|
| 102 | + read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)); |
---|
| 103 | +} |
---|
| 104 | + |
---|
| 105 | +static int validate_cpu_freq_invariance_counters(int cpu) |
---|
| 106 | +{ |
---|
| 107 | + u64 max_freq_hz, ratio; |
---|
| 108 | + |
---|
| 109 | + if (!cpu_has_amu_feat(cpu)) { |
---|
| 110 | + pr_debug("CPU%d: counters are not supported.\n", cpu); |
---|
| 111 | + return -EINVAL; |
---|
| 112 | + } |
---|
| 113 | + |
---|
| 114 | + if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || |
---|
| 115 | + !per_cpu(arch_core_cycles_prev, cpu))) { |
---|
| 116 | + pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); |
---|
| 117 | + return -EINVAL; |
---|
| 118 | + } |
---|
| 119 | + |
---|
| 120 | + /* Convert maximum frequency from KHz to Hz and validate */ |
---|
| 121 | + max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000ULL; |
---|
| 122 | + if (unlikely(!max_freq_hz)) { |
---|
| 123 | + pr_debug("CPU%d: invalid maximum frequency.\n", cpu); |
---|
| 124 | + return -EINVAL; |
---|
| 125 | + } |
---|
411 | 126 | |
---|
412 | 127 | /* |
---|
413 | | - * Discard anything that was parsed if we hit an error so we |
---|
414 | | - * don't use partial information. |
---|
| 128 | + * Pre-compute the fixed ratio between the frequency of the constant |
---|
| 129 | + * counter and the maximum frequency of the CPU. |
---|
| 130 | + * |
---|
| 131 | + * const_freq |
---|
| 132 | + * arch_max_freq_scale = ---------------- * SCHED_CAPACITY_SCALE² |
---|
| 133 | + * cpuinfo_max_freq |
---|
| 134 | + * |
---|
| 135 | + * We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE² |
---|
| 136 | + * in order to ensure a good resolution for arch_max_freq_scale for |
---|
| 137 | + * very low arch timer frequencies (down to the KHz range which should |
---|
| 138 | + * be unlikely). |
---|
415 | 139 | */ |
---|
416 | | - if (!acpi_disabled && parse_acpi_topology()) |
---|
417 | | - reset_cpu_topology(); |
---|
418 | | - else if (of_have_populated_dt() && parse_dt_topology()) |
---|
419 | | - reset_cpu_topology(); |
---|
| 140 | + ratio = (u64)arch_timer_get_rate() << (2 * SCHED_CAPACITY_SHIFT); |
---|
| 141 | + ratio = div64_u64(ratio, max_freq_hz); |
---|
| 142 | + if (!ratio) { |
---|
| 143 | + WARN_ONCE(1, "System timer frequency too low.\n"); |
---|
| 144 | + return -EINVAL; |
---|
| 145 | + } |
---|
| 146 | + |
---|
| 147 | + per_cpu(arch_max_freq_scale, cpu) = (unsigned long)ratio; |
---|
| 148 | + |
---|
| 149 | + return 0; |
---|
420 | 150 | } |
---|
| 151 | + |
---|
| 152 | +static inline bool |
---|
| 153 | +enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus) |
---|
| 154 | +{ |
---|
| 155 | + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); |
---|
| 156 | + |
---|
| 157 | + if (!policy) { |
---|
| 158 | + pr_debug("CPU%d: No cpufreq policy found.\n", cpu); |
---|
| 159 | + return false; |
---|
| 160 | + } |
---|
| 161 | + |
---|
| 162 | + if (cpumask_subset(policy->related_cpus, valid_cpus)) |
---|
| 163 | + cpumask_or(amu_fie_cpus, policy->related_cpus, |
---|
| 164 | + amu_fie_cpus); |
---|
| 165 | + |
---|
| 166 | + cpufreq_cpu_put(policy); |
---|
| 167 | + |
---|
| 168 | + return true; |
---|
| 169 | +} |
---|
| 170 | + |
---|
| 171 | +static DEFINE_STATIC_KEY_FALSE(amu_fie_key); |
---|
| 172 | +#define amu_freq_invariant() static_branch_unlikely(&amu_fie_key) |
---|
| 173 | + |
---|
| 174 | +static int __init init_amu_fie(void) |
---|
| 175 | +{ |
---|
| 176 | + cpumask_var_t valid_cpus; |
---|
| 177 | + bool have_policy = false; |
---|
| 178 | + int ret = 0; |
---|
| 179 | + int cpu; |
---|
| 180 | + |
---|
| 181 | + if (!zalloc_cpumask_var(&valid_cpus, GFP_KERNEL)) |
---|
| 182 | + return -ENOMEM; |
---|
| 183 | + |
---|
| 184 | + if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) { |
---|
| 185 | + ret = -ENOMEM; |
---|
| 186 | + goto free_valid_mask; |
---|
| 187 | + } |
---|
| 188 | + |
---|
| 189 | + for_each_present_cpu(cpu) { |
---|
| 190 | + if (validate_cpu_freq_invariance_counters(cpu)) |
---|
| 191 | + continue; |
---|
| 192 | + cpumask_set_cpu(cpu, valid_cpus); |
---|
| 193 | + have_policy |= enable_policy_freq_counters(cpu, valid_cpus); |
---|
| 194 | + } |
---|
| 195 | + |
---|
| 196 | + /* |
---|
| 197 | + * If we are not restricted by cpufreq policies, we only enable |
---|
| 198 | + * the use of the AMU feature for FIE if all CPUs support AMU. |
---|
| 199 | + * Otherwise, enable_policy_freq_counters has already enabled |
---|
| 200 | + * policy cpus. |
---|
| 201 | + */ |
---|
| 202 | + if (!have_policy && cpumask_equal(valid_cpus, cpu_present_mask)) |
---|
| 203 | + cpumask_or(amu_fie_cpus, amu_fie_cpus, valid_cpus); |
---|
| 204 | + |
---|
| 205 | + if (!cpumask_empty(amu_fie_cpus)) { |
---|
| 206 | + pr_info("CPUs[%*pbl]: counters will be used for FIE.", |
---|
| 207 | + cpumask_pr_args(amu_fie_cpus)); |
---|
| 208 | + static_branch_enable(&amu_fie_key); |
---|
| 209 | + } |
---|
| 210 | + |
---|
| 211 | + /* |
---|
| 212 | + * If the system is not fully invariant after AMU init, disable |
---|
| 213 | + * partial use of counters for frequency invariance. |
---|
| 214 | + */ |
---|
| 215 | + if (!topology_scale_freq_invariant()) |
---|
| 216 | + static_branch_disable(&amu_fie_key); |
---|
| 217 | + |
---|
| 218 | +free_valid_mask: |
---|
| 219 | + free_cpumask_var(valid_cpus); |
---|
| 220 | + |
---|
| 221 | + return ret; |
---|
| 222 | +} |
---|
| 223 | +late_initcall_sync(init_amu_fie); |
---|
| 224 | + |
---|
| 225 | +bool arch_freq_counters_available(const struct cpumask *cpus) |
---|
| 226 | +{ |
---|
| 227 | + return amu_freq_invariant() && |
---|
| 228 | + cpumask_subset(cpus, amu_fie_cpus); |
---|
| 229 | +} |
---|
| 230 | + |
---|
| 231 | +void topology_scale_freq_tick(void) |
---|
| 232 | +{ |
---|
| 233 | + u64 prev_core_cnt, prev_const_cnt; |
---|
| 234 | + u64 core_cnt, const_cnt, scale; |
---|
| 235 | + int cpu = smp_processor_id(); |
---|
| 236 | + |
---|
| 237 | + if (!amu_freq_invariant()) |
---|
| 238 | + return; |
---|
| 239 | + |
---|
| 240 | + if (!cpumask_test_cpu(cpu, amu_fie_cpus)) |
---|
| 241 | + return; |
---|
| 242 | + |
---|
| 243 | + const_cnt = read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0); |
---|
| 244 | + core_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0); |
---|
| 245 | + prev_const_cnt = this_cpu_read(arch_const_cycles_prev); |
---|
| 246 | + prev_core_cnt = this_cpu_read(arch_core_cycles_prev); |
---|
| 247 | + |
---|
| 248 | + if (unlikely(core_cnt <= prev_core_cnt || |
---|
| 249 | + const_cnt <= prev_const_cnt)) |
---|
| 250 | + goto store_and_exit; |
---|
| 251 | + |
---|
| 252 | + /* |
---|
| 253 | + * /\core arch_max_freq_scale |
---|
| 254 | + * scale = ------- * -------------------- |
---|
| 255 | + * /\const SCHED_CAPACITY_SCALE |
---|
| 256 | + * |
---|
| 257 | + * See validate_cpu_freq_invariance_counters() for details on |
---|
| 258 | + * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT. |
---|
| 259 | + */ |
---|
| 260 | + scale = core_cnt - prev_core_cnt; |
---|
| 261 | + scale *= this_cpu_read(arch_max_freq_scale); |
---|
| 262 | + scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT, |
---|
| 263 | + const_cnt - prev_const_cnt); |
---|
| 264 | + |
---|
| 265 | + scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); |
---|
| 266 | + this_cpu_write(freq_scale, (unsigned long)scale); |
---|
| 267 | + |
---|
| 268 | +store_and_exit: |
---|
| 269 | + this_cpu_write(arch_core_cycles_prev, core_cnt); |
---|
| 270 | + this_cpu_write(arch_const_cycles_prev, const_cnt); |
---|
| 271 | +} |
---|
| 272 | +#endif /* CONFIG_ARM64_AMU_EXTN */ |
---|