hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/arch/arm64/kernel/topology.c
....@@ -14,332 +14,13 @@
1414 #include <linux/acpi.h>
1515 #include <linux/arch_topology.h>
1616 #include <linux/cacheinfo.h>
17
-#include <linux/cpu.h>
18
-#include <linux/cpumask.h>
17
+#include <linux/cpufreq.h>
1918 #include <linux/init.h>
2019 #include <linux/percpu.h>
21
-#include <linux/node.h>
22
-#include <linux/nodemask.h>
23
-#include <linux/of.h>
24
-#include <linux/sched.h>
25
-#include <linux/sched/topology.h>
26
-#include <linux/slab.h>
27
-#include <linux/smp.h>
28
-#include <linux/string.h>
2920
3021 #include <asm/cpu.h>
3122 #include <asm/cputype.h>
3223 #include <asm/topology.h>
33
-
34
-static int __init get_cpu_for_node(struct device_node *node)
35
-{
36
- struct device_node *cpu_node;
37
- int cpu;
38
-
39
- cpu_node = of_parse_phandle(node, "cpu", 0);
40
- if (!cpu_node)
41
- return -1;
42
-
43
- cpu = of_cpu_node_to_id(cpu_node);
44
- if (cpu >= 0)
45
- topology_parse_cpu_capacity(cpu_node, cpu);
46
- else
47
- pr_crit("Unable to find CPU node for %pOF\n", cpu_node);
48
-
49
- of_node_put(cpu_node);
50
- return cpu;
51
-}
52
-
53
-static int __init parse_core(struct device_node *core, int package_id,
54
- int core_id)
55
-{
56
- char name[10];
57
- bool leaf = true;
58
- int i = 0;
59
- int cpu;
60
- struct device_node *t;
61
-
62
- do {
63
- snprintf(name, sizeof(name), "thread%d", i);
64
- t = of_get_child_by_name(core, name);
65
- if (t) {
66
- leaf = false;
67
- cpu = get_cpu_for_node(t);
68
- if (cpu >= 0) {
69
- cpu_topology[cpu].package_id = package_id;
70
- cpu_topology[cpu].core_id = core_id;
71
- cpu_topology[cpu].thread_id = i;
72
- } else {
73
- pr_err("%pOF: Can't get CPU for thread\n",
74
- t);
75
- of_node_put(t);
76
- return -EINVAL;
77
- }
78
- of_node_put(t);
79
- }
80
- i++;
81
- } while (t);
82
-
83
- cpu = get_cpu_for_node(core);
84
- if (cpu >= 0) {
85
- if (!leaf) {
86
- pr_err("%pOF: Core has both threads and CPU\n",
87
- core);
88
- return -EINVAL;
89
- }
90
-
91
- cpu_topology[cpu].package_id = package_id;
92
- cpu_topology[cpu].core_id = core_id;
93
- } else if (leaf) {
94
- pr_err("%pOF: Can't get CPU for leaf core\n", core);
95
- return -EINVAL;
96
- }
97
-
98
- return 0;
99
-}
100
-
101
-static int __init parse_cluster(struct device_node *cluster, int depth)
102
-{
103
- char name[10];
104
- bool leaf = true;
105
- bool has_cores = false;
106
- struct device_node *c;
107
- static int package_id __initdata;
108
- int core_id = 0;
109
- int i, ret;
110
-
111
- /*
112
- * First check for child clusters; we currently ignore any
113
- * information about the nesting of clusters and present the
114
- * scheduler with a flat list of them.
115
- */
116
- i = 0;
117
- do {
118
- snprintf(name, sizeof(name), "cluster%d", i);
119
- c = of_get_child_by_name(cluster, name);
120
- if (c) {
121
- leaf = false;
122
- ret = parse_cluster(c, depth + 1);
123
- of_node_put(c);
124
- if (ret != 0)
125
- return ret;
126
- }
127
- i++;
128
- } while (c);
129
-
130
- /* Now check for cores */
131
- i = 0;
132
- do {
133
- snprintf(name, sizeof(name), "core%d", i);
134
- c = of_get_child_by_name(cluster, name);
135
- if (c) {
136
- has_cores = true;
137
-
138
- if (depth == 0) {
139
- pr_err("%pOF: cpu-map children should be clusters\n",
140
- c);
141
- of_node_put(c);
142
- return -EINVAL;
143
- }
144
-
145
- if (leaf) {
146
- ret = parse_core(c, package_id, core_id++);
147
- } else {
148
- pr_err("%pOF: Non-leaf cluster with core %s\n",
149
- cluster, name);
150
- ret = -EINVAL;
151
- }
152
-
153
- of_node_put(c);
154
- if (ret != 0)
155
- return ret;
156
- }
157
- i++;
158
- } while (c);
159
-
160
- if (leaf && !has_cores)
161
- pr_warn("%pOF: empty cluster\n", cluster);
162
-
163
- if (leaf)
164
- package_id++;
165
-
166
- return 0;
167
-}
168
-
169
-static int __init parse_dt_topology(void)
170
-{
171
- struct device_node *cn, *map;
172
- int ret = 0;
173
- int cpu;
174
-
175
- cn = of_find_node_by_path("/cpus");
176
- if (!cn) {
177
- pr_err("No CPU information found in DT\n");
178
- return 0;
179
- }
180
-
181
- /*
182
- * When topology is provided cpu-map is essentially a root
183
- * cluster with restricted subnodes.
184
- */
185
- map = of_get_child_by_name(cn, "cpu-map");
186
- if (!map)
187
- goto out;
188
-
189
- ret = parse_cluster(map, 0);
190
- if (ret != 0)
191
- goto out_map;
192
-
193
- topology_normalize_cpu_scale();
194
-
195
- /*
196
- * Check that all cores are in the topology; the SMP code will
197
- * only mark cores described in the DT as possible.
198
- */
199
- for_each_possible_cpu(cpu)
200
- if (cpu_topology[cpu].package_id == -1)
201
- ret = -EINVAL;
202
-
203
-out_map:
204
- of_node_put(map);
205
-out:
206
- of_node_put(cn);
207
- return ret;
208
-}
209
-
210
-/*
211
- * cpu topology table
212
- */
213
-struct cpu_topology cpu_topology[NR_CPUS];
214
-EXPORT_SYMBOL_GPL(cpu_topology);
215
-
216
-const struct cpumask *cpu_coregroup_mask(int cpu)
217
-{
218
- const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
219
-
220
- /* Find the smaller of NUMA, core or LLC siblings */
221
- if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
222
- /* not numa in package, lets use the package siblings */
223
- core_mask = &cpu_topology[cpu].core_sibling;
224
- }
225
- if (cpu_topology[cpu].llc_id != -1) {
226
- if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
227
- core_mask = &cpu_topology[cpu].llc_sibling;
228
- }
229
-
230
- return core_mask;
231
-}
232
-
233
-static void update_siblings_masks(unsigned int cpuid)
234
-{
235
- struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
236
- int cpu;
237
-
238
- /* update core and thread sibling masks */
239
- for_each_online_cpu(cpu) {
240
- cpu_topo = &cpu_topology[cpu];
241
-
242
- if (cpuid_topo->llc_id == cpu_topo->llc_id) {
243
- cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
244
- cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
245
- }
246
-
247
- if (cpuid_topo->package_id != cpu_topo->package_id)
248
- continue;
249
-
250
- cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
251
- cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
252
-
253
- if (cpuid_topo->core_id != cpu_topo->core_id)
254
- continue;
255
-
256
- cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
257
- cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
258
- }
259
-}
260
-
261
-void store_cpu_topology(unsigned int cpuid)
262
-{
263
- struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
264
- u64 mpidr;
265
-
266
- if (cpuid_topo->package_id != -1)
267
- goto topology_populated;
268
-
269
- mpidr = read_cpuid_mpidr();
270
-
271
- /* Uniprocessor systems can rely on default topology values */
272
- if (mpidr & MPIDR_UP_BITMASK)
273
- return;
274
-
275
- /*
276
- * This would be the place to create cpu topology based on MPIDR.
277
- *
278
- * However, it cannot be trusted to depict the actual topology; some
279
- * pieces of the architecture enforce an artificial cap on Aff0 values
280
- * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an
281
- * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up
282
- * having absolutely no relationship to the actual underlying system
283
- * topology, and cannot be reasonably used as core / package ID.
284
- *
285
- * If the MT bit is set, Aff0 *could* be used to define a thread ID, but
286
- * we still wouldn't be able to obtain a sane core ID. This means we
287
- * need to entirely ignore MPIDR for any topology deduction.
288
- */
289
- cpuid_topo->thread_id = -1;
290
- cpuid_topo->core_id = cpuid;
291
- cpuid_topo->package_id = cpu_to_node(cpuid);
292
-
293
- pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n",
294
- cpuid, cpuid_topo->package_id, cpuid_topo->core_id,
295
- cpuid_topo->thread_id, mpidr);
296
-
297
-topology_populated:
298
- update_siblings_masks(cpuid);
299
-}
300
-
301
-static void clear_cpu_topology(int cpu)
302
-{
303
- struct cpu_topology *cpu_topo = &cpu_topology[cpu];
304
-
305
- cpumask_clear(&cpu_topo->llc_sibling);
306
- cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
307
-
308
- cpumask_clear(&cpu_topo->core_sibling);
309
- cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
310
- cpumask_clear(&cpu_topo->thread_sibling);
311
- cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
312
-}
313
-
314
-static void __init reset_cpu_topology(void)
315
-{
316
- unsigned int cpu;
317
-
318
- for_each_possible_cpu(cpu) {
319
- struct cpu_topology *cpu_topo = &cpu_topology[cpu];
320
-
321
- cpu_topo->thread_id = -1;
322
- cpu_topo->core_id = 0;
323
- cpu_topo->package_id = -1;
324
- cpu_topo->llc_id = -1;
325
-
326
- clear_cpu_topology(cpu);
327
- }
328
-}
329
-
330
-void remove_cpu_topology(unsigned int cpu)
331
-{
332
- int sibling;
333
-
334
- for_each_cpu(sibling, topology_core_cpumask(cpu))
335
- cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
336
- for_each_cpu(sibling, topology_sibling_cpumask(cpu))
337
- cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
338
- for_each_cpu(sibling, topology_llc_cpumask(cpu))
339
- cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
340
-
341
- clear_cpu_topology(cpu);
342
-}
34324
34425 #ifdef CONFIG_ACPI
34526 static bool __init acpi_cpu_is_threaded(int cpu)
....@@ -360,9 +41,12 @@
36041 * Propagate the topology information of the processor_topology_node tree to the
36142 * cpu_topology array.
36243 */
363
-static int __init parse_acpi_topology(void)
44
+int __init parse_acpi_topology(void)
36445 {
36546 int cpu, topology_id;
47
+
48
+ if (acpi_disabled)
49
+ return 0;
36650
36751 for_each_possible_cpu(cpu) {
36852 int i, cache_id;
....@@ -397,24 +81,192 @@
39781
39882 return 0;
39983 }
400
-
401
-#else
402
-static inline int __init parse_acpi_topology(void)
403
-{
404
- return -EINVAL;
405
-}
40684 #endif
40785
408
-void __init init_cpu_topology(void)
86
+#ifdef CONFIG_ARM64_AMU_EXTN
87
+
88
+#undef pr_fmt
89
+#define pr_fmt(fmt) "AMU: " fmt
90
+
91
+static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale);
92
+static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
93
+static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
94
+static cpumask_var_t amu_fie_cpus;
95
+
96
+/* Initialize counter reference per-cpu variables for the current CPU */
97
+void init_cpu_freq_invariance_counters(void)
40998 {
410
- reset_cpu_topology();
99
+ this_cpu_write(arch_core_cycles_prev,
100
+ read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0));
101
+ this_cpu_write(arch_const_cycles_prev,
102
+ read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0));
103
+}
104
+
105
+static int validate_cpu_freq_invariance_counters(int cpu)
106
+{
107
+ u64 max_freq_hz, ratio;
108
+
109
+ if (!cpu_has_amu_feat(cpu)) {
110
+ pr_debug("CPU%d: counters are not supported.\n", cpu);
111
+ return -EINVAL;
112
+ }
113
+
114
+ if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) ||
115
+ !per_cpu(arch_core_cycles_prev, cpu))) {
116
+ pr_debug("CPU%d: cycle counters are not enabled.\n", cpu);
117
+ return -EINVAL;
118
+ }
119
+
120
+ /* Convert maximum frequency from KHz to Hz and validate */
121
+ max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000ULL;
122
+ if (unlikely(!max_freq_hz)) {
123
+ pr_debug("CPU%d: invalid maximum frequency.\n", cpu);
124
+ return -EINVAL;
125
+ }
411126
412127 /*
413
- * Discard anything that was parsed if we hit an error so we
414
- * don't use partial information.
128
+ * Pre-compute the fixed ratio between the frequency of the constant
129
+ * counter and the maximum frequency of the CPU.
130
+ *
131
+ * const_freq
132
+ * arch_max_freq_scale = ---------------- * SCHED_CAPACITY_SCALE²
133
+ * cpuinfo_max_freq
134
+ *
135
+ * We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE²
136
+ * in order to ensure a good resolution for arch_max_freq_scale for
137
+ * very low arch timer frequencies (down to the KHz range which should
138
+ * be unlikely).
415139 */
416
- if (!acpi_disabled && parse_acpi_topology())
417
- reset_cpu_topology();
418
- else if (of_have_populated_dt() && parse_dt_topology())
419
- reset_cpu_topology();
140
+ ratio = (u64)arch_timer_get_rate() << (2 * SCHED_CAPACITY_SHIFT);
141
+ ratio = div64_u64(ratio, max_freq_hz);
142
+ if (!ratio) {
143
+ WARN_ONCE(1, "System timer frequency too low.\n");
144
+ return -EINVAL;
145
+ }
146
+
147
+ per_cpu(arch_max_freq_scale, cpu) = (unsigned long)ratio;
148
+
149
+ return 0;
420150 }
151
+
152
+static inline bool
153
+enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus)
154
+{
155
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
156
+
157
+ if (!policy) {
158
+ pr_debug("CPU%d: No cpufreq policy found.\n", cpu);
159
+ return false;
160
+ }
161
+
162
+ if (cpumask_subset(policy->related_cpus, valid_cpus))
163
+ cpumask_or(amu_fie_cpus, policy->related_cpus,
164
+ amu_fie_cpus);
165
+
166
+ cpufreq_cpu_put(policy);
167
+
168
+ return true;
169
+}
170
+
171
+static DEFINE_STATIC_KEY_FALSE(amu_fie_key);
172
+#define amu_freq_invariant() static_branch_unlikely(&amu_fie_key)
173
+
174
+static int __init init_amu_fie(void)
175
+{
176
+ cpumask_var_t valid_cpus;
177
+ bool have_policy = false;
178
+ int ret = 0;
179
+ int cpu;
180
+
181
+ if (!zalloc_cpumask_var(&valid_cpus, GFP_KERNEL))
182
+ return -ENOMEM;
183
+
184
+ if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) {
185
+ ret = -ENOMEM;
186
+ goto free_valid_mask;
187
+ }
188
+
189
+ for_each_present_cpu(cpu) {
190
+ if (validate_cpu_freq_invariance_counters(cpu))
191
+ continue;
192
+ cpumask_set_cpu(cpu, valid_cpus);
193
+ have_policy |= enable_policy_freq_counters(cpu, valid_cpus);
194
+ }
195
+
196
+ /*
197
+ * If we are not restricted by cpufreq policies, we only enable
198
+ * the use of the AMU feature for FIE if all CPUs support AMU.
199
+ * Otherwise, enable_policy_freq_counters has already enabled
200
+ * policy cpus.
201
+ */
202
+ if (!have_policy && cpumask_equal(valid_cpus, cpu_present_mask))
203
+ cpumask_or(amu_fie_cpus, amu_fie_cpus, valid_cpus);
204
+
205
+ if (!cpumask_empty(amu_fie_cpus)) {
206
+ pr_info("CPUs[%*pbl]: counters will be used for FIE.",
207
+ cpumask_pr_args(amu_fie_cpus));
208
+ static_branch_enable(&amu_fie_key);
209
+ }
210
+
211
+ /*
212
+ * If the system is not fully invariant after AMU init, disable
213
+ * partial use of counters for frequency invariance.
214
+ */
215
+ if (!topology_scale_freq_invariant())
216
+ static_branch_disable(&amu_fie_key);
217
+
218
+free_valid_mask:
219
+ free_cpumask_var(valid_cpus);
220
+
221
+ return ret;
222
+}
223
+late_initcall_sync(init_amu_fie);
224
+
225
+bool arch_freq_counters_available(const struct cpumask *cpus)
226
+{
227
+ return amu_freq_invariant() &&
228
+ cpumask_subset(cpus, amu_fie_cpus);
229
+}
230
+
231
+void topology_scale_freq_tick(void)
232
+{
233
+ u64 prev_core_cnt, prev_const_cnt;
234
+ u64 core_cnt, const_cnt, scale;
235
+ int cpu = smp_processor_id();
236
+
237
+ if (!amu_freq_invariant())
238
+ return;
239
+
240
+ if (!cpumask_test_cpu(cpu, amu_fie_cpus))
241
+ return;
242
+
243
+ const_cnt = read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0);
244
+ core_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0);
245
+ prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
246
+ prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
247
+
248
+ if (unlikely(core_cnt <= prev_core_cnt ||
249
+ const_cnt <= prev_const_cnt))
250
+ goto store_and_exit;
251
+
252
+ /*
253
+ * /\core arch_max_freq_scale
254
+ * scale = ------- * --------------------
255
+ * /\const SCHED_CAPACITY_SCALE
256
+ *
257
+ * See validate_cpu_freq_invariance_counters() for details on
258
+ * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
259
+ */
260
+ scale = core_cnt - prev_core_cnt;
261
+ scale *= this_cpu_read(arch_max_freq_scale);
262
+ scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
263
+ const_cnt - prev_const_cnt);
264
+
265
+ scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
266
+ this_cpu_write(freq_scale, (unsigned long)scale);
267
+
268
+store_and_exit:
269
+ this_cpu_write(arch_core_cycles_prev, core_cnt);
270
+ this_cpu_write(arch_const_cycles_prev, const_cnt);
271
+}
272
+#endif /* CONFIG_ARM64_AMU_EXTN */