hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/base/arch_topology.c
....@@ -7,7 +7,6 @@
77 */
88
99 #include <linux/acpi.h>
10
-#include <linux/arch_topology.h>
1110 #include <linux/cpu.h>
1211 #include <linux/cpufreq.h>
1312 #include <linux/device.h>
....@@ -16,51 +15,71 @@
1615 #include <linux/string.h>
1716 #include <linux/sched/topology.h>
1817 #include <linux/cpuset.h>
18
+#include <linux/cpumask.h>
19
+#include <linux/init.h>
20
+#include <linux/percpu.h>
21
+#include <linux/sched.h>
22
+#include <linux/smp.h>
23
+#include <trace/hooks/topology.h>
1924
25
+bool topology_scale_freq_invariant(void)
26
+{
27
+ return cpufreq_supports_freq_invariance() ||
28
+ arch_freq_counters_available(cpu_online_mask);
29
+}
30
+
31
+__weak bool arch_freq_counters_available(const struct cpumask *cpus)
32
+{
33
+ return false;
34
+}
2035 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
21
-DEFINE_PER_CPU(unsigned long, max_cpu_freq);
22
-DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
36
+EXPORT_PER_CPU_SYMBOL_GPL(freq_scale);
2337
24
-void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
25
- unsigned long max_freq)
38
+void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
39
+ unsigned long max_freq)
2640 {
2741 unsigned long scale;
2842 int i;
2943
44
+ if (WARN_ON_ONCE(!cur_freq || !max_freq))
45
+ return;
46
+
47
+ /*
48
+ * If the use of counters for FIE is enabled, just return as we don't
49
+ * want to update the scale factor with information from CPUFREQ.
50
+ * Instead the scale factor will be updated from arch_scale_freq_tick.
51
+ */
52
+ if (arch_freq_counters_available(cpus))
53
+ return;
54
+
3055 scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
3156
32
- for_each_cpu(i, cpus) {
57
+ trace_android_vh_arch_set_freq_scale(cpus, cur_freq, max_freq, &scale);
58
+
59
+ for_each_cpu(i, cpus)
3360 per_cpu(freq_scale, i) = scale;
34
- per_cpu(max_cpu_freq, i) = max_freq;
35
- }
3661 }
3762
38
-void arch_set_max_freq_scale(struct cpumask *cpus,
39
- unsigned long policy_max_freq)
40
-{
41
- unsigned long scale, max_freq;
42
- int cpu = cpumask_first(cpus);
43
-
44
- if (cpu > nr_cpu_ids)
45
- return;
46
-
47
- max_freq = per_cpu(max_cpu_freq, cpu);
48
- if (!max_freq)
49
- return;
50
-
51
- scale = (policy_max_freq << SCHED_CAPACITY_SHIFT) / max_freq;
52
-
53
- for_each_cpu(cpu, cpus)
54
- per_cpu(max_freq_scale, cpu) = scale;
55
-}
56
-
57
-static DEFINE_MUTEX(cpu_scale_mutex);
5863 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
64
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_scale);
5965
6066 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
6167 {
6268 per_cpu(cpu_scale, cpu) = capacity;
6369 }
70
+
71
+DEFINE_PER_CPU(unsigned long, thermal_pressure);
72
+EXPORT_PER_CPU_SYMBOL_GPL(thermal_pressure);
73
+
74
+void topology_set_thermal_pressure(const struct cpumask *cpus,
75
+ unsigned long th_pressure)
76
+{
77
+ int cpu;
78
+
79
+ for_each_cpu(cpu, cpus)
80
+ WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
81
+}
82
+EXPORT_SYMBOL_GPL(topology_set_thermal_pressure);
6483
6584 static ssize_t cpu_capacity_show(struct device *dev,
6685 struct device_attribute *attr,
....@@ -68,43 +87,13 @@
6887 {
6988 struct cpu *cpu = container_of(dev, struct cpu, dev);
7089
71
- return sprintf(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id));
90
+ return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
7291 }
7392
7493 static void update_topology_flags_workfn(struct work_struct *work);
7594 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
7695
77
-static ssize_t cpu_capacity_store(struct device *dev,
78
- struct device_attribute *attr,
79
- const char *buf,
80
- size_t count)
81
-{
82
- struct cpu *cpu = container_of(dev, struct cpu, dev);
83
- int this_cpu = cpu->dev.id;
84
- int i;
85
- unsigned long new_capacity;
86
- ssize_t ret;
87
-
88
- if (!count)
89
- return 0;
90
-
91
- ret = kstrtoul(buf, 0, &new_capacity);
92
- if (ret)
93
- return ret;
94
- if (new_capacity > SCHED_CAPACITY_SCALE)
95
- return -EINVAL;
96
-
97
- mutex_lock(&cpu_scale_mutex);
98
- for_each_cpu(i, &cpu_topology[this_cpu].core_sibling)
99
- topology_set_cpu_scale(i, new_capacity);
100
- mutex_unlock(&cpu_scale_mutex);
101
-
102
- schedule_work(&update_topology_flags_work);
103
-
104
- return count;
105
-}
106
-
107
-static DEVICE_ATTR_RW(cpu_capacity);
96
+static DEVICE_ATTR_RO(cpu_capacity);
10897
10998 static int register_cpu_capacity_sysctl(void)
11099 {
....@@ -126,6 +115,8 @@
126115 subsys_initcall(register_cpu_capacity_sysctl);
127116
128117 static int update_topology;
118
+bool topology_update_done;
119
+EXPORT_SYMBOL_GPL(topology_update_done);
129120
130121 int topology_update_cpu_topology(void)
131122 {
....@@ -140,11 +131,13 @@
140131 {
141132 update_topology = 1;
142133 rebuild_sched_domains();
134
+ topology_update_done = true;
135
+ trace_android_vh_update_topology_flags_workfn(NULL);
143136 pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
144137 update_topology = 0;
145138 }
146139
147
-static u32 capacity_scale;
140
+static DEFINE_PER_CPU(u32, freq_factor) = 1;
148141 static u32 *raw_capacity;
149142
150143 static int free_raw_capacity(void)
....@@ -158,27 +151,32 @@
158151 void topology_normalize_cpu_scale(void)
159152 {
160153 u64 capacity;
154
+ u64 capacity_scale;
161155 int cpu;
162156
163157 if (!raw_capacity)
164158 return;
165159
166
- pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
167
- mutex_lock(&cpu_scale_mutex);
160
+ capacity_scale = 1;
168161 for_each_possible_cpu(cpu) {
169
- pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n",
170
- cpu, raw_capacity[cpu]);
171
- capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
172
- / capacity_scale;
162
+ capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
163
+ capacity_scale = max(capacity, capacity_scale);
164
+ }
165
+
166
+ pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
167
+ for_each_possible_cpu(cpu) {
168
+ capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
169
+ capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
170
+ capacity_scale);
173171 topology_set_cpu_scale(cpu, capacity);
174172 pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
175
- cpu, topology_get_cpu_scale(NULL, cpu));
173
+ cpu, topology_get_cpu_scale(cpu));
176174 }
177
- mutex_unlock(&cpu_scale_mutex);
178175 }
179176
180177 bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
181178 {
179
+ struct clk *cpu_clk;
182180 static bool cap_parsing_failed;
183181 int ret;
184182 u32 cpu_capacity;
....@@ -194,15 +192,26 @@
194192 sizeof(*raw_capacity),
195193 GFP_KERNEL);
196194 if (!raw_capacity) {
197
- pr_err("cpu_capacity: failed to allocate memory for raw capacities\n");
198195 cap_parsing_failed = true;
199196 return false;
200197 }
201198 }
202
- capacity_scale = max(cpu_capacity, capacity_scale);
203199 raw_capacity[cpu] = cpu_capacity;
204200 pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
205201 cpu_node, raw_capacity[cpu]);
202
+
203
+ /*
204
+ * Update freq_factor for calculating early boot cpu capacities.
205
+ * For non-clk CPU DVFS mechanism, there's no way to get the
206
+ * frequency value now, assuming they are running at the same
207
+ * frequency (by keeping the initial freq_factor value).
208
+ */
209
+ cpu_clk = of_clk_get(cpu_node, 0);
210
+ if (!PTR_ERR_OR_ZERO(cpu_clk)) {
211
+ per_cpu(freq_factor, cpu) =
212
+ clk_get_rate(cpu_clk) / 1000;
213
+ clk_put(cpu_clk);
214
+ }
206215 } else {
207216 if (raw_capacity) {
208217 pr_err("cpu_capacity: missing %pOF raw capacity\n",
....@@ -232,7 +241,7 @@
232241 if (!raw_capacity)
233242 return 0;
234243
235
- if (val != CPUFREQ_NOTIFY)
244
+ if (val != CPUFREQ_CREATE_POLICY)
236245 return 0;
237246
238247 pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
....@@ -241,11 +250,8 @@
241250
242251 cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
243252
244
- for_each_cpu(cpu, policy->related_cpus) {
245
- raw_capacity[cpu] = topology_get_cpu_scale(NULL, cpu) *
246
- policy->cpuinfo.max_freq / 1000UL;
247
- capacity_scale = max(raw_capacity[cpu], capacity_scale);
248
- }
253
+ for_each_cpu(cpu, policy->related_cpus)
254
+ per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000;
249255
250256 if (cpumask_empty(cpus_to_visit)) {
251257 topology_normalize_cpu_scale();
....@@ -274,10 +280,8 @@
274280 if (!acpi_disabled || !raw_capacity)
275281 return -EINVAL;
276282
277
- if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) {
278
- pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n");
283
+ if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
279284 return -ENOMEM;
280
- }
281285
282286 cpumask_copy(cpus_to_visit, cpu_possible_mask);
283287
....@@ -301,3 +305,325 @@
301305 #else
302306 core_initcall(free_raw_capacity);
303307 #endif
308
+
309
+#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
310
+/*
311
+ * This function returns the logic cpu number of the node.
312
+ * There are basically three kinds of return values:
313
+ * (1) logic cpu number which is > 0.
314
+ * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
315
+ * there is no possible logical CPU in the kernel to match. This happens
316
+ * when CONFIG_NR_CPUS is configure to be smaller than the number of
317
+ * CPU nodes in DT. We need to just ignore this case.
318
+ * (3) -1 if the node does not exist in the device tree
319
+ */
320
+static int __init get_cpu_for_node(struct device_node *node)
321
+{
322
+ struct device_node *cpu_node;
323
+ int cpu;
324
+
325
+ cpu_node = of_parse_phandle(node, "cpu", 0);
326
+ if (!cpu_node)
327
+ return -1;
328
+
329
+ cpu = of_cpu_node_to_id(cpu_node);
330
+ if (cpu >= 0)
331
+ topology_parse_cpu_capacity(cpu_node, cpu);
332
+ else
333
+ pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
334
+ cpu_node, cpumask_pr_args(cpu_possible_mask));
335
+
336
+ of_node_put(cpu_node);
337
+ return cpu;
338
+}
339
+
340
+static int __init parse_core(struct device_node *core, int package_id,
341
+ int core_id)
342
+{
343
+ char name[20];
344
+ bool leaf = true;
345
+ int i = 0;
346
+ int cpu;
347
+ struct device_node *t;
348
+
349
+ do {
350
+ snprintf(name, sizeof(name), "thread%d", i);
351
+ t = of_get_child_by_name(core, name);
352
+ if (t) {
353
+ leaf = false;
354
+ cpu = get_cpu_for_node(t);
355
+ if (cpu >= 0) {
356
+ cpu_topology[cpu].package_id = package_id;
357
+ cpu_topology[cpu].core_id = core_id;
358
+ cpu_topology[cpu].thread_id = i;
359
+ } else if (cpu != -ENODEV) {
360
+ pr_err("%pOF: Can't get CPU for thread\n", t);
361
+ of_node_put(t);
362
+ return -EINVAL;
363
+ }
364
+ of_node_put(t);
365
+ }
366
+ i++;
367
+ } while (t);
368
+
369
+ cpu = get_cpu_for_node(core);
370
+ if (cpu >= 0) {
371
+ if (!leaf) {
372
+ pr_err("%pOF: Core has both threads and CPU\n",
373
+ core);
374
+ return -EINVAL;
375
+ }
376
+
377
+ cpu_topology[cpu].package_id = package_id;
378
+ cpu_topology[cpu].core_id = core_id;
379
+ } else if (leaf && cpu != -ENODEV) {
380
+ pr_err("%pOF: Can't get CPU for leaf core\n", core);
381
+ return -EINVAL;
382
+ }
383
+
384
+ return 0;
385
+}
386
+
387
+static int __init parse_cluster(struct device_node *cluster, int depth)
388
+{
389
+ char name[20];
390
+ bool leaf = true;
391
+ bool has_cores = false;
392
+ struct device_node *c;
393
+ static int package_id __initdata;
394
+ int core_id = 0;
395
+ int i, ret;
396
+
397
+ /*
398
+ * First check for child clusters; we currently ignore any
399
+ * information about the nesting of clusters and present the
400
+ * scheduler with a flat list of them.
401
+ */
402
+ i = 0;
403
+ do {
404
+ snprintf(name, sizeof(name), "cluster%d", i);
405
+ c = of_get_child_by_name(cluster, name);
406
+ if (c) {
407
+ leaf = false;
408
+ ret = parse_cluster(c, depth + 1);
409
+ of_node_put(c);
410
+ if (ret != 0)
411
+ return ret;
412
+ }
413
+ i++;
414
+ } while (c);
415
+
416
+ /* Now check for cores */
417
+ i = 0;
418
+ do {
419
+ snprintf(name, sizeof(name), "core%d", i);
420
+ c = of_get_child_by_name(cluster, name);
421
+ if (c) {
422
+ has_cores = true;
423
+
424
+ if (depth == 0) {
425
+ pr_err("%pOF: cpu-map children should be clusters\n",
426
+ c);
427
+ of_node_put(c);
428
+ return -EINVAL;
429
+ }
430
+
431
+ if (leaf) {
432
+ ret = parse_core(c, package_id, core_id++);
433
+ } else {
434
+ pr_err("%pOF: Non-leaf cluster with core %s\n",
435
+ cluster, name);
436
+ ret = -EINVAL;
437
+ }
438
+
439
+ of_node_put(c);
440
+ if (ret != 0)
441
+ return ret;
442
+ }
443
+ i++;
444
+ } while (c);
445
+
446
+ if (leaf && !has_cores)
447
+ pr_warn("%pOF: empty cluster\n", cluster);
448
+
449
+ if (leaf)
450
+ package_id++;
451
+
452
+ return 0;
453
+}
454
+
455
+static int __init parse_dt_topology(void)
456
+{
457
+ struct device_node *cn, *map;
458
+ int ret = 0;
459
+ int cpu;
460
+
461
+ cn = of_find_node_by_path("/cpus");
462
+ if (!cn) {
463
+ pr_err("No CPU information found in DT\n");
464
+ return 0;
465
+ }
466
+
467
+ /*
468
+ * When topology is provided cpu-map is essentially a root
469
+ * cluster with restricted subnodes.
470
+ */
471
+ map = of_get_child_by_name(cn, "cpu-map");
472
+ if (!map)
473
+ goto out;
474
+
475
+ ret = parse_cluster(map, 0);
476
+ if (ret != 0)
477
+ goto out_map;
478
+
479
+ topology_normalize_cpu_scale();
480
+
481
+ /*
482
+ * Check that all cores are in the topology; the SMP code will
483
+ * only mark cores described in the DT as possible.
484
+ */
485
+ for_each_possible_cpu(cpu)
486
+ if (cpu_topology[cpu].package_id == -1)
487
+ ret = -EINVAL;
488
+
489
+out_map:
490
+ of_node_put(map);
491
+out:
492
+ of_node_put(cn);
493
+ return ret;
494
+}
495
+#endif
496
+
497
+/*
498
+ * cpu topology table
499
+ */
500
+struct cpu_topology cpu_topology[NR_CPUS];
501
+EXPORT_SYMBOL_GPL(cpu_topology);
502
+
503
+const struct cpumask *cpu_coregroup_mask(int cpu)
504
+{
505
+ const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
506
+
507
+ /* Find the smaller of NUMA, core or LLC siblings */
508
+ if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
509
+ /* not numa in package, lets use the package siblings */
510
+ core_mask = &cpu_topology[cpu].core_sibling;
511
+ }
512
+ if (cpu_topology[cpu].llc_id != -1) {
513
+ if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
514
+ core_mask = &cpu_topology[cpu].llc_sibling;
515
+ }
516
+
517
+ return core_mask;
518
+}
519
+
520
+void update_siblings_masks(unsigned int cpuid)
521
+{
522
+ struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
523
+ int cpu;
524
+
525
+ /* update core and thread sibling masks */
526
+ for_each_online_cpu(cpu) {
527
+ cpu_topo = &cpu_topology[cpu];
528
+
529
+ if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) {
530
+ cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
531
+ cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
532
+ }
533
+
534
+ if (cpuid_topo->package_id != cpu_topo->package_id)
535
+ continue;
536
+
537
+ cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
538
+ cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
539
+
540
+ if (cpuid_topo->core_id != cpu_topo->core_id)
541
+ continue;
542
+
543
+ cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
544
+ cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
545
+ }
546
+}
547
+
548
+static void clear_cpu_topology(int cpu)
549
+{
550
+ struct cpu_topology *cpu_topo = &cpu_topology[cpu];
551
+
552
+ cpumask_clear(&cpu_topo->llc_sibling);
553
+ cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
554
+
555
+ cpumask_clear(&cpu_topo->core_sibling);
556
+ cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
557
+ cpumask_clear(&cpu_topo->thread_sibling);
558
+ cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
559
+}
560
+
561
+void __init reset_cpu_topology(void)
562
+{
563
+ unsigned int cpu;
564
+
565
+ for_each_possible_cpu(cpu) {
566
+ struct cpu_topology *cpu_topo = &cpu_topology[cpu];
567
+
568
+ cpu_topo->thread_id = -1;
569
+ cpu_topo->core_id = -1;
570
+ cpu_topo->package_id = -1;
571
+ cpu_topo->llc_id = -1;
572
+
573
+ clear_cpu_topology(cpu);
574
+ }
575
+}
576
+
577
+void remove_cpu_topology(unsigned int cpu)
578
+{
579
+ int sibling;
580
+
581
+ for_each_cpu(sibling, topology_core_cpumask(cpu))
582
+ cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
583
+ for_each_cpu(sibling, topology_sibling_cpumask(cpu))
584
+ cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
585
+ for_each_cpu(sibling, topology_llc_cpumask(cpu))
586
+ cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
587
+
588
+ clear_cpu_topology(cpu);
589
+}
590
+
591
+__weak int __init parse_acpi_topology(void)
592
+{
593
+ return 0;
594
+}
595
+
596
+#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
597
+void __init init_cpu_topology(void)
598
+{
599
+ reset_cpu_topology();
600
+
601
+ /*
602
+ * Discard anything that was parsed if we hit an error so we
603
+ * don't use partial information.
604
+ */
605
+ if (parse_acpi_topology())
606
+ reset_cpu_topology();
607
+ else if (of_have_populated_dt() && parse_dt_topology())
608
+ reset_cpu_topology();
609
+}
610
+
611
+void store_cpu_topology(unsigned int cpuid)
612
+{
613
+ struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
614
+
615
+ if (cpuid_topo->package_id != -1)
616
+ goto topology_populated;
617
+
618
+ cpuid_topo->thread_id = -1;
619
+ cpuid_topo->core_id = cpuid;
620
+ cpuid_topo->package_id = cpu_to_node(cpuid);
621
+
622
+ pr_debug("CPU%u: package %d core %d thread %d\n",
623
+ cpuid, cpuid_topo->package_id, cpuid_topo->core_id,
624
+ cpuid_topo->thread_id);
625
+
626
+topology_populated:
627
+ update_siblings_masks(cpuid);
628
+}
629
+#endif