| .. | .. |
|---|
| 14 | 14 | #include <linux/acpi.h> |
|---|
| 15 | 15 | #include <linux/arch_topology.h> |
|---|
| 16 | 16 | #include <linux/cacheinfo.h> |
|---|
| 17 | | -#include <linux/cpu.h> |
|---|
| 18 | | -#include <linux/cpumask.h> |
|---|
| 17 | +#include <linux/cpufreq.h> |
|---|
| 19 | 18 | #include <linux/init.h> |
|---|
| 20 | 19 | #include <linux/percpu.h> |
|---|
| 21 | | -#include <linux/node.h> |
|---|
| 22 | | -#include <linux/nodemask.h> |
|---|
| 23 | | -#include <linux/of.h> |
|---|
| 24 | | -#include <linux/sched.h> |
|---|
| 25 | | -#include <linux/sched/topology.h> |
|---|
| 26 | | -#include <linux/slab.h> |
|---|
| 27 | | -#include <linux/smp.h> |
|---|
| 28 | | -#include <linux/string.h> |
|---|
| 29 | 20 | |
|---|
| 30 | 21 | #include <asm/cpu.h> |
|---|
| 31 | 22 | #include <asm/cputype.h> |
|---|
| 32 | 23 | #include <asm/topology.h> |
|---|
| 33 | | - |
|---|
| 34 | | -static int __init get_cpu_for_node(struct device_node *node) |
|---|
| 35 | | -{ |
|---|
| 36 | | - struct device_node *cpu_node; |
|---|
| 37 | | - int cpu; |
|---|
| 38 | | - |
|---|
| 39 | | - cpu_node = of_parse_phandle(node, "cpu", 0); |
|---|
| 40 | | - if (!cpu_node) |
|---|
| 41 | | - return -1; |
|---|
| 42 | | - |
|---|
| 43 | | - cpu = of_cpu_node_to_id(cpu_node); |
|---|
| 44 | | - if (cpu >= 0) |
|---|
| 45 | | - topology_parse_cpu_capacity(cpu_node, cpu); |
|---|
| 46 | | - else |
|---|
| 47 | | - pr_crit("Unable to find CPU node for %pOF\n", cpu_node); |
|---|
| 48 | | - |
|---|
| 49 | | - of_node_put(cpu_node); |
|---|
| 50 | | - return cpu; |
|---|
| 51 | | -} |
|---|
| 52 | | - |
|---|
| 53 | | -static int __init parse_core(struct device_node *core, int package_id, |
|---|
| 54 | | - int core_id) |
|---|
| 55 | | -{ |
|---|
| 56 | | - char name[10]; |
|---|
| 57 | | - bool leaf = true; |
|---|
| 58 | | - int i = 0; |
|---|
| 59 | | - int cpu; |
|---|
| 60 | | - struct device_node *t; |
|---|
| 61 | | - |
|---|
| 62 | | - do { |
|---|
| 63 | | - snprintf(name, sizeof(name), "thread%d", i); |
|---|
| 64 | | - t = of_get_child_by_name(core, name); |
|---|
| 65 | | - if (t) { |
|---|
| 66 | | - leaf = false; |
|---|
| 67 | | - cpu = get_cpu_for_node(t); |
|---|
| 68 | | - if (cpu >= 0) { |
|---|
| 69 | | - cpu_topology[cpu].package_id = package_id; |
|---|
| 70 | | - cpu_topology[cpu].core_id = core_id; |
|---|
| 71 | | - cpu_topology[cpu].thread_id = i; |
|---|
| 72 | | - } else { |
|---|
| 73 | | - pr_err("%pOF: Can't get CPU for thread\n", |
|---|
| 74 | | - t); |
|---|
| 75 | | - of_node_put(t); |
|---|
| 76 | | - return -EINVAL; |
|---|
| 77 | | - } |
|---|
| 78 | | - of_node_put(t); |
|---|
| 79 | | - } |
|---|
| 80 | | - i++; |
|---|
| 81 | | - } while (t); |
|---|
| 82 | | - |
|---|
| 83 | | - cpu = get_cpu_for_node(core); |
|---|
| 84 | | - if (cpu >= 0) { |
|---|
| 85 | | - if (!leaf) { |
|---|
| 86 | | - pr_err("%pOF: Core has both threads and CPU\n", |
|---|
| 87 | | - core); |
|---|
| 88 | | - return -EINVAL; |
|---|
| 89 | | - } |
|---|
| 90 | | - |
|---|
| 91 | | - cpu_topology[cpu].package_id = package_id; |
|---|
| 92 | | - cpu_topology[cpu].core_id = core_id; |
|---|
| 93 | | - } else if (leaf) { |
|---|
| 94 | | - pr_err("%pOF: Can't get CPU for leaf core\n", core); |
|---|
| 95 | | - return -EINVAL; |
|---|
| 96 | | - } |
|---|
| 97 | | - |
|---|
| 98 | | - return 0; |
|---|
| 99 | | -} |
|---|
| 100 | | - |
|---|
| 101 | | -static int __init parse_cluster(struct device_node *cluster, int depth) |
|---|
| 102 | | -{ |
|---|
| 103 | | - char name[10]; |
|---|
| 104 | | - bool leaf = true; |
|---|
| 105 | | - bool has_cores = false; |
|---|
| 106 | | - struct device_node *c; |
|---|
| 107 | | - static int package_id __initdata; |
|---|
| 108 | | - int core_id = 0; |
|---|
| 109 | | - int i, ret; |
|---|
| 110 | | - |
|---|
| 111 | | - /* |
|---|
| 112 | | - * First check for child clusters; we currently ignore any |
|---|
| 113 | | - * information about the nesting of clusters and present the |
|---|
| 114 | | - * scheduler with a flat list of them. |
|---|
| 115 | | - */ |
|---|
| 116 | | - i = 0; |
|---|
| 117 | | - do { |
|---|
| 118 | | - snprintf(name, sizeof(name), "cluster%d", i); |
|---|
| 119 | | - c = of_get_child_by_name(cluster, name); |
|---|
| 120 | | - if (c) { |
|---|
| 121 | | - leaf = false; |
|---|
| 122 | | - ret = parse_cluster(c, depth + 1); |
|---|
| 123 | | - of_node_put(c); |
|---|
| 124 | | - if (ret != 0) |
|---|
| 125 | | - return ret; |
|---|
| 126 | | - } |
|---|
| 127 | | - i++; |
|---|
| 128 | | - } while (c); |
|---|
| 129 | | - |
|---|
| 130 | | - /* Now check for cores */ |
|---|
| 131 | | - i = 0; |
|---|
| 132 | | - do { |
|---|
| 133 | | - snprintf(name, sizeof(name), "core%d", i); |
|---|
| 134 | | - c = of_get_child_by_name(cluster, name); |
|---|
| 135 | | - if (c) { |
|---|
| 136 | | - has_cores = true; |
|---|
| 137 | | - |
|---|
| 138 | | - if (depth == 0) { |
|---|
| 139 | | - pr_err("%pOF: cpu-map children should be clusters\n", |
|---|
| 140 | | - c); |
|---|
| 141 | | - of_node_put(c); |
|---|
| 142 | | - return -EINVAL; |
|---|
| 143 | | - } |
|---|
| 144 | | - |
|---|
| 145 | | - if (leaf) { |
|---|
| 146 | | - ret = parse_core(c, package_id, core_id++); |
|---|
| 147 | | - } else { |
|---|
| 148 | | - pr_err("%pOF: Non-leaf cluster with core %s\n", |
|---|
| 149 | | - cluster, name); |
|---|
| 150 | | - ret = -EINVAL; |
|---|
| 151 | | - } |
|---|
| 152 | | - |
|---|
| 153 | | - of_node_put(c); |
|---|
| 154 | | - if (ret != 0) |
|---|
| 155 | | - return ret; |
|---|
| 156 | | - } |
|---|
| 157 | | - i++; |
|---|
| 158 | | - } while (c); |
|---|
| 159 | | - |
|---|
| 160 | | - if (leaf && !has_cores) |
|---|
| 161 | | - pr_warn("%pOF: empty cluster\n", cluster); |
|---|
| 162 | | - |
|---|
| 163 | | - if (leaf) |
|---|
| 164 | | - package_id++; |
|---|
| 165 | | - |
|---|
| 166 | | - return 0; |
|---|
| 167 | | -} |
|---|
| 168 | | - |
|---|
| 169 | | -static int __init parse_dt_topology(void) |
|---|
| 170 | | -{ |
|---|
| 171 | | - struct device_node *cn, *map; |
|---|
| 172 | | - int ret = 0; |
|---|
| 173 | | - int cpu; |
|---|
| 174 | | - |
|---|
| 175 | | - cn = of_find_node_by_path("/cpus"); |
|---|
| 176 | | - if (!cn) { |
|---|
| 177 | | - pr_err("No CPU information found in DT\n"); |
|---|
| 178 | | - return 0; |
|---|
| 179 | | - } |
|---|
| 180 | | - |
|---|
| 181 | | - /* |
|---|
| 182 | | - * When topology is provided cpu-map is essentially a root |
|---|
| 183 | | - * cluster with restricted subnodes. |
|---|
| 184 | | - */ |
|---|
| 185 | | - map = of_get_child_by_name(cn, "cpu-map"); |
|---|
| 186 | | - if (!map) |
|---|
| 187 | | - goto out; |
|---|
| 188 | | - |
|---|
| 189 | | - ret = parse_cluster(map, 0); |
|---|
| 190 | | - if (ret != 0) |
|---|
| 191 | | - goto out_map; |
|---|
| 192 | | - |
|---|
| 193 | | - topology_normalize_cpu_scale(); |
|---|
| 194 | | - |
|---|
| 195 | | - /* |
|---|
| 196 | | - * Check that all cores are in the topology; the SMP code will |
|---|
| 197 | | - * only mark cores described in the DT as possible. |
|---|
| 198 | | - */ |
|---|
| 199 | | - for_each_possible_cpu(cpu) |
|---|
| 200 | | - if (cpu_topology[cpu].package_id == -1) |
|---|
| 201 | | - ret = -EINVAL; |
|---|
| 202 | | - |
|---|
| 203 | | -out_map: |
|---|
| 204 | | - of_node_put(map); |
|---|
| 205 | | -out: |
|---|
| 206 | | - of_node_put(cn); |
|---|
| 207 | | - return ret; |
|---|
| 208 | | -} |
|---|
| 209 | | - |
|---|
| 210 | | -/* |
|---|
| 211 | | - * cpu topology table |
|---|
| 212 | | - */ |
|---|
| 213 | | -struct cpu_topology cpu_topology[NR_CPUS]; |
|---|
| 214 | | -EXPORT_SYMBOL_GPL(cpu_topology); |
|---|
| 215 | | - |
|---|
| 216 | | -const struct cpumask *cpu_coregroup_mask(int cpu) |
|---|
| 217 | | -{ |
|---|
| 218 | | - const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); |
|---|
| 219 | | - |
|---|
| 220 | | - /* Find the smaller of NUMA, core or LLC siblings */ |
|---|
| 221 | | - if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { |
|---|
| 222 | | - /* not numa in package, lets use the package siblings */ |
|---|
| 223 | | - core_mask = &cpu_topology[cpu].core_sibling; |
|---|
| 224 | | - } |
|---|
| 225 | | - if (cpu_topology[cpu].llc_id != -1) { |
|---|
| 226 | | - if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) |
|---|
| 227 | | - core_mask = &cpu_topology[cpu].llc_sibling; |
|---|
| 228 | | - } |
|---|
| 229 | | - |
|---|
| 230 | | - return core_mask; |
|---|
| 231 | | -} |
|---|
| 232 | | - |
|---|
| 233 | | -static void update_siblings_masks(unsigned int cpuid) |
|---|
| 234 | | -{ |
|---|
| 235 | | - struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; |
|---|
| 236 | | - int cpu; |
|---|
| 237 | | - |
|---|
| 238 | | - /* update core and thread sibling masks */ |
|---|
| 239 | | - for_each_online_cpu(cpu) { |
|---|
| 240 | | - cpu_topo = &cpu_topology[cpu]; |
|---|
| 241 | | - |
|---|
| 242 | | - if (cpuid_topo->llc_id == cpu_topo->llc_id) { |
|---|
| 243 | | - cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); |
|---|
| 244 | | - cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); |
|---|
| 245 | | - } |
|---|
| 246 | | - |
|---|
| 247 | | - if (cpuid_topo->package_id != cpu_topo->package_id) |
|---|
| 248 | | - continue; |
|---|
| 249 | | - |
|---|
| 250 | | - cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); |
|---|
| 251 | | - cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); |
|---|
| 252 | | - |
|---|
| 253 | | - if (cpuid_topo->core_id != cpu_topo->core_id) |
|---|
| 254 | | - continue; |
|---|
| 255 | | - |
|---|
| 256 | | - cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); |
|---|
| 257 | | - cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); |
|---|
| 258 | | - } |
|---|
| 259 | | -} |
|---|
| 260 | | - |
|---|
| 261 | | -void store_cpu_topology(unsigned int cpuid) |
|---|
| 262 | | -{ |
|---|
| 263 | | - struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; |
|---|
| 264 | | - u64 mpidr; |
|---|
| 265 | | - |
|---|
| 266 | | - if (cpuid_topo->package_id != -1) |
|---|
| 267 | | - goto topology_populated; |
|---|
| 268 | | - |
|---|
| 269 | | - mpidr = read_cpuid_mpidr(); |
|---|
| 270 | | - |
|---|
| 271 | | - /* Uniprocessor systems can rely on default topology values */ |
|---|
| 272 | | - if (mpidr & MPIDR_UP_BITMASK) |
|---|
| 273 | | - return; |
|---|
| 274 | | - |
|---|
| 275 | | - /* |
|---|
| 276 | | - * This would be the place to create cpu topology based on MPIDR. |
|---|
| 277 | | - * |
|---|
| 278 | | - * However, it cannot be trusted to depict the actual topology; some |
|---|
| 279 | | - * pieces of the architecture enforce an artificial cap on Aff0 values |
|---|
| 280 | | - * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an |
|---|
| 281 | | - * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up |
|---|
| 282 | | - * having absolutely no relationship to the actual underlying system |
|---|
| 283 | | - * topology, and cannot be reasonably used as core / package ID. |
|---|
| 284 | | - * |
|---|
| 285 | | - * If the MT bit is set, Aff0 *could* be used to define a thread ID, but |
|---|
| 286 | | - * we still wouldn't be able to obtain a sane core ID. This means we |
|---|
| 287 | | - * need to entirely ignore MPIDR for any topology deduction. |
|---|
| 288 | | - */ |
|---|
| 289 | | - cpuid_topo->thread_id = -1; |
|---|
| 290 | | - cpuid_topo->core_id = cpuid; |
|---|
| 291 | | - cpuid_topo->package_id = cpu_to_node(cpuid); |
|---|
| 292 | | - |
|---|
| 293 | | - pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", |
|---|
| 294 | | - cpuid, cpuid_topo->package_id, cpuid_topo->core_id, |
|---|
| 295 | | - cpuid_topo->thread_id, mpidr); |
|---|
| 296 | | - |
|---|
| 297 | | -topology_populated: |
|---|
| 298 | | - update_siblings_masks(cpuid); |
|---|
| 299 | | -} |
|---|
| 300 | | - |
|---|
| 301 | | -static void clear_cpu_topology(int cpu) |
|---|
| 302 | | -{ |
|---|
| 303 | | - struct cpu_topology *cpu_topo = &cpu_topology[cpu]; |
|---|
| 304 | | - |
|---|
| 305 | | - cpumask_clear(&cpu_topo->llc_sibling); |
|---|
| 306 | | - cpumask_set_cpu(cpu, &cpu_topo->llc_sibling); |
|---|
| 307 | | - |
|---|
| 308 | | - cpumask_clear(&cpu_topo->core_sibling); |
|---|
| 309 | | - cpumask_set_cpu(cpu, &cpu_topo->core_sibling); |
|---|
| 310 | | - cpumask_clear(&cpu_topo->thread_sibling); |
|---|
| 311 | | - cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); |
|---|
| 312 | | -} |
|---|
| 313 | | - |
|---|
| 314 | | -static void __init reset_cpu_topology(void) |
|---|
| 315 | | -{ |
|---|
| 316 | | - unsigned int cpu; |
|---|
| 317 | | - |
|---|
| 318 | | - for_each_possible_cpu(cpu) { |
|---|
| 319 | | - struct cpu_topology *cpu_topo = &cpu_topology[cpu]; |
|---|
| 320 | | - |
|---|
| 321 | | - cpu_topo->thread_id = -1; |
|---|
| 322 | | - cpu_topo->core_id = 0; |
|---|
| 323 | | - cpu_topo->package_id = -1; |
|---|
| 324 | | - cpu_topo->llc_id = -1; |
|---|
| 325 | | - |
|---|
| 326 | | - clear_cpu_topology(cpu); |
|---|
| 327 | | - } |
|---|
| 328 | | -} |
|---|
| 329 | | - |
|---|
| 330 | | -void remove_cpu_topology(unsigned int cpu) |
|---|
| 331 | | -{ |
|---|
| 332 | | - int sibling; |
|---|
| 333 | | - |
|---|
| 334 | | - for_each_cpu(sibling, topology_core_cpumask(cpu)) |
|---|
| 335 | | - cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); |
|---|
| 336 | | - for_each_cpu(sibling, topology_sibling_cpumask(cpu)) |
|---|
| 337 | | - cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); |
|---|
| 338 | | - for_each_cpu(sibling, topology_llc_cpumask(cpu)) |
|---|
| 339 | | - cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling)); |
|---|
| 340 | | - |
|---|
| 341 | | - clear_cpu_topology(cpu); |
|---|
| 342 | | -} |
|---|
| 343 | 24 | |
|---|
| 344 | 25 | #ifdef CONFIG_ACPI |
|---|
| 345 | 26 | static bool __init acpi_cpu_is_threaded(int cpu) |
|---|
| .. | .. |
|---|
| 360 | 41 | * Propagate the topology information of the processor_topology_node tree to the |
|---|
| 361 | 42 | * cpu_topology array. |
|---|
| 362 | 43 | */ |
|---|
| 363 | | -static int __init parse_acpi_topology(void) |
|---|
| 44 | +int __init parse_acpi_topology(void) |
|---|
| 364 | 45 | { |
|---|
| 365 | 46 | int cpu, topology_id; |
|---|
| 47 | + |
|---|
| 48 | + if (acpi_disabled) |
|---|
| 49 | + return 0; |
|---|
| 366 | 50 | |
|---|
| 367 | 51 | for_each_possible_cpu(cpu) { |
|---|
| 368 | 52 | int i, cache_id; |
|---|
| .. | .. |
|---|
| 397 | 81 | |
|---|
| 398 | 82 | return 0; |
|---|
| 399 | 83 | } |
|---|
| 400 | | - |
|---|
| 401 | | -#else |
|---|
| 402 | | -static inline int __init parse_acpi_topology(void) |
|---|
| 403 | | -{ |
|---|
| 404 | | - return -EINVAL; |
|---|
| 405 | | -} |
|---|
| 406 | 84 | #endif |
|---|
| 407 | 85 | |
|---|
| 408 | | -void __init init_cpu_topology(void) |
|---|
| 86 | +#ifdef CONFIG_ARM64_AMU_EXTN |
|---|
| 87 | + |
|---|
| 88 | +#undef pr_fmt |
|---|
| 89 | +#define pr_fmt(fmt) "AMU: " fmt |
|---|
| 90 | + |
|---|
| 91 | +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale); |
|---|
| 92 | +static DEFINE_PER_CPU(u64, arch_const_cycles_prev); |
|---|
| 93 | +static DEFINE_PER_CPU(u64, arch_core_cycles_prev); |
|---|
| 94 | +static cpumask_var_t amu_fie_cpus; |
|---|
| 95 | + |
|---|
| 96 | +/* Initialize counter reference per-cpu variables for the current CPU */ |
|---|
| 97 | +void init_cpu_freq_invariance_counters(void) |
|---|
| 409 | 98 | { |
|---|
| 410 | | - reset_cpu_topology(); |
|---|
| 99 | + this_cpu_write(arch_core_cycles_prev, |
|---|
| 100 | + read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0)); |
|---|
| 101 | + this_cpu_write(arch_const_cycles_prev, |
|---|
| 102 | + read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)); |
|---|
| 103 | +} |
|---|
| 104 | + |
|---|
| 105 | +static int validate_cpu_freq_invariance_counters(int cpu) |
|---|
| 106 | +{ |
|---|
| 107 | + u64 max_freq_hz, ratio; |
|---|
| 108 | + |
|---|
| 109 | + if (!cpu_has_amu_feat(cpu)) { |
|---|
| 110 | + pr_debug("CPU%d: counters are not supported.\n", cpu); |
|---|
| 111 | + return -EINVAL; |
|---|
| 112 | + } |
|---|
| 113 | + |
|---|
| 114 | + if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || |
|---|
| 115 | + !per_cpu(arch_core_cycles_prev, cpu))) { |
|---|
| 116 | + pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); |
|---|
| 117 | + return -EINVAL; |
|---|
| 118 | + } |
|---|
| 119 | + |
|---|
| 120 | + /* Convert maximum frequency from KHz to Hz and validate */ |
|---|
| 121 | + max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000ULL; |
|---|
| 122 | + if (unlikely(!max_freq_hz)) { |
|---|
| 123 | + pr_debug("CPU%d: invalid maximum frequency.\n", cpu); |
|---|
| 124 | + return -EINVAL; |
|---|
| 125 | + } |
|---|
| 411 | 126 | |
|---|
| 412 | 127 | /* |
|---|
| 413 | | - * Discard anything that was parsed if we hit an error so we |
|---|
| 414 | | - * don't use partial information. |
|---|
| 128 | + * Pre-compute the fixed ratio between the frequency of the constant |
|---|
| 129 | + * counter and the maximum frequency of the CPU. |
|---|
| 130 | + * |
|---|
| 131 | + * const_freq |
|---|
| 132 | + * arch_max_freq_scale = ---------------- * SCHED_CAPACITY_SCALE² |
|---|
| 133 | + * cpuinfo_max_freq |
|---|
| 134 | + * |
|---|
| 135 | + * We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE² |
|---|
| 136 | + * in order to ensure a good resolution for arch_max_freq_scale for |
|---|
| 137 | + * very low arch timer frequencies (down to the KHz range which should |
|---|
| 138 | + * be unlikely). |
|---|
| 415 | 139 | */ |
|---|
| 416 | | - if (!acpi_disabled && parse_acpi_topology()) |
|---|
| 417 | | - reset_cpu_topology(); |
|---|
| 418 | | - else if (of_have_populated_dt() && parse_dt_topology()) |
|---|
| 419 | | - reset_cpu_topology(); |
|---|
| 140 | + ratio = (u64)arch_timer_get_rate() << (2 * SCHED_CAPACITY_SHIFT); |
|---|
| 141 | + ratio = div64_u64(ratio, max_freq_hz); |
|---|
| 142 | + if (!ratio) { |
|---|
| 143 | + WARN_ONCE(1, "System timer frequency too low.\n"); |
|---|
| 144 | + return -EINVAL; |
|---|
| 145 | + } |
|---|
| 146 | + |
|---|
| 147 | + per_cpu(arch_max_freq_scale, cpu) = (unsigned long)ratio; |
|---|
| 148 | + |
|---|
| 149 | + return 0; |
|---|
| 420 | 150 | } |
|---|
| 151 | + |
|---|
| 152 | +static inline bool |
|---|
| 153 | +enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus) |
|---|
| 154 | +{ |
|---|
| 155 | + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); |
|---|
| 156 | + |
|---|
| 157 | + if (!policy) { |
|---|
| 158 | + pr_debug("CPU%d: No cpufreq policy found.\n", cpu); |
|---|
| 159 | + return false; |
|---|
| 160 | + } |
|---|
| 161 | + |
|---|
| 162 | + if (cpumask_subset(policy->related_cpus, valid_cpus)) |
|---|
| 163 | + cpumask_or(amu_fie_cpus, policy->related_cpus, |
|---|
| 164 | + amu_fie_cpus); |
|---|
| 165 | + |
|---|
| 166 | + cpufreq_cpu_put(policy); |
|---|
| 167 | + |
|---|
| 168 | + return true; |
|---|
| 169 | +} |
|---|
| 170 | + |
|---|
| 171 | +static DEFINE_STATIC_KEY_FALSE(amu_fie_key); |
|---|
| 172 | +#define amu_freq_invariant() static_branch_unlikely(&amu_fie_key) |
|---|
| 173 | + |
|---|
| 174 | +static int __init init_amu_fie(void) |
|---|
| 175 | +{ |
|---|
| 176 | + cpumask_var_t valid_cpus; |
|---|
| 177 | + bool have_policy = false; |
|---|
| 178 | + int ret = 0; |
|---|
| 179 | + int cpu; |
|---|
| 180 | + |
|---|
| 181 | + if (!zalloc_cpumask_var(&valid_cpus, GFP_KERNEL)) |
|---|
| 182 | + return -ENOMEM; |
|---|
| 183 | + |
|---|
| 184 | + if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) { |
|---|
| 185 | + ret = -ENOMEM; |
|---|
| 186 | + goto free_valid_mask; |
|---|
| 187 | + } |
|---|
| 188 | + |
|---|
| 189 | + for_each_present_cpu(cpu) { |
|---|
| 190 | + if (validate_cpu_freq_invariance_counters(cpu)) |
|---|
| 191 | + continue; |
|---|
| 192 | + cpumask_set_cpu(cpu, valid_cpus); |
|---|
| 193 | + have_policy |= enable_policy_freq_counters(cpu, valid_cpus); |
|---|
| 194 | + } |
|---|
| 195 | + |
|---|
| 196 | + /* |
|---|
| 197 | + * If we are not restricted by cpufreq policies, we only enable |
|---|
| 198 | + * the use of the AMU feature for FIE if all CPUs support AMU. |
|---|
| 199 | + * Otherwise, enable_policy_freq_counters has already enabled |
|---|
| 200 | + * policy cpus. |
|---|
| 201 | + */ |
|---|
| 202 | + if (!have_policy && cpumask_equal(valid_cpus, cpu_present_mask)) |
|---|
| 203 | + cpumask_or(amu_fie_cpus, amu_fie_cpus, valid_cpus); |
|---|
| 204 | + |
|---|
| 205 | + if (!cpumask_empty(amu_fie_cpus)) { |
|---|
| 206 | + pr_info("CPUs[%*pbl]: counters will be used for FIE.", |
|---|
| 207 | + cpumask_pr_args(amu_fie_cpus)); |
|---|
| 208 | + static_branch_enable(&amu_fie_key); |
|---|
| 209 | + } |
|---|
| 210 | + |
|---|
| 211 | + /* |
|---|
| 212 | + * If the system is not fully invariant after AMU init, disable |
|---|
| 213 | + * partial use of counters for frequency invariance. |
|---|
| 214 | + */ |
|---|
| 215 | + if (!topology_scale_freq_invariant()) |
|---|
| 216 | + static_branch_disable(&amu_fie_key); |
|---|
| 217 | + |
|---|
| 218 | +free_valid_mask: |
|---|
| 219 | + free_cpumask_var(valid_cpus); |
|---|
| 220 | + |
|---|
| 221 | + return ret; |
|---|
| 222 | +} |
|---|
| 223 | +late_initcall_sync(init_amu_fie); |
|---|
| 224 | + |
|---|
| 225 | +bool arch_freq_counters_available(const struct cpumask *cpus) |
|---|
| 226 | +{ |
|---|
| 227 | + return amu_freq_invariant() && |
|---|
| 228 | + cpumask_subset(cpus, amu_fie_cpus); |
|---|
| 229 | +} |
|---|
| 230 | + |
|---|
| 231 | +void topology_scale_freq_tick(void) |
|---|
| 232 | +{ |
|---|
| 233 | + u64 prev_core_cnt, prev_const_cnt; |
|---|
| 234 | + u64 core_cnt, const_cnt, scale; |
|---|
| 235 | + int cpu = smp_processor_id(); |
|---|
| 236 | + |
|---|
| 237 | + if (!amu_freq_invariant()) |
|---|
| 238 | + return; |
|---|
| 239 | + |
|---|
| 240 | + if (!cpumask_test_cpu(cpu, amu_fie_cpus)) |
|---|
| 241 | + return; |
|---|
| 242 | + |
|---|
| 243 | + const_cnt = read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0); |
|---|
| 244 | + core_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0); |
|---|
| 245 | + prev_const_cnt = this_cpu_read(arch_const_cycles_prev); |
|---|
| 246 | + prev_core_cnt = this_cpu_read(arch_core_cycles_prev); |
|---|
| 247 | + |
|---|
| 248 | + if (unlikely(core_cnt <= prev_core_cnt || |
|---|
| 249 | + const_cnt <= prev_const_cnt)) |
|---|
| 250 | + goto store_and_exit; |
|---|
| 251 | + |
|---|
| 252 | + /* |
|---|
| 253 | + * /\core arch_max_freq_scale |
|---|
| 254 | + * scale = ------- * -------------------- |
|---|
| 255 | + * /\const SCHED_CAPACITY_SCALE |
|---|
| 256 | + * |
|---|
| 257 | + * See validate_cpu_freq_invariance_counters() for details on |
|---|
| 258 | + * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT. |
|---|
| 259 | + */ |
|---|
| 260 | + scale = core_cnt - prev_core_cnt; |
|---|
| 261 | + scale *= this_cpu_read(arch_max_freq_scale); |
|---|
| 262 | + scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT, |
|---|
| 263 | + const_cnt - prev_const_cnt); |
|---|
| 264 | + |
|---|
| 265 | + scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); |
|---|
| 266 | + this_cpu_write(freq_scale, (unsigned long)scale); |
|---|
| 267 | + |
|---|
| 268 | +store_and_exit: |
|---|
| 269 | + this_cpu_write(arch_core_cycles_prev, core_cnt); |
|---|
| 270 | + this_cpu_write(arch_const_cycles_prev, const_cnt); |
|---|
| 271 | +} |
|---|
| 272 | +#endif /* CONFIG_ARM64_AMU_EXTN */ |
|---|