| .. | .. |
|---|
| 4 | 4 | */ |
|---|
| 5 | 5 | #include "sched.h" |
|---|
| 6 | 6 | |
|---|
| 7 | +#include <trace/hooks/sched.h> |
|---|
| 8 | + |
|---|
| 7 | 9 | DEFINE_MUTEX(sched_domains_mutex); |
|---|
| 10 | +#ifdef CONFIG_LOCKDEP |
|---|
| 11 | +EXPORT_SYMBOL_GPL(sched_domains_mutex); |
|---|
| 12 | +#endif |
|---|
| 8 | 13 | |
|---|
| 9 | 14 | /* Protected by sched_domains_mutex: */ |
|---|
| 10 | | -cpumask_var_t sched_domains_tmpmask; |
|---|
| 11 | | -cpumask_var_t sched_domains_tmpmask2; |
|---|
| 15 | +static cpumask_var_t sched_domains_tmpmask; |
|---|
| 16 | +static cpumask_var_t sched_domains_tmpmask2; |
|---|
| 12 | 17 | |
|---|
| 13 | 18 | #ifdef CONFIG_SCHED_DEBUG |
|---|
| 14 | 19 | |
|---|
| .. | .. |
|---|
| 25 | 30 | return sched_debug_enabled; |
|---|
| 26 | 31 | } |
|---|
| 27 | 32 | |
|---|
| 33 | +#define SD_FLAG(_name, mflags) [__##_name] = { .meta_flags = mflags, .name = #_name }, |
|---|
| 34 | +const struct sd_flag_debug sd_flag_debug[] = { |
|---|
| 35 | +#include <linux/sched/sd_flags.h> |
|---|
| 36 | +}; |
|---|
| 37 | +#undef SD_FLAG |
|---|
| 38 | + |
|---|
| 28 | 39 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
|---|
| 29 | 40 | struct cpumask *groupmask) |
|---|
| 30 | 41 | { |
|---|
| 31 | 42 | struct sched_group *group = sd->groups; |
|---|
| 43 | + unsigned long flags = sd->flags; |
|---|
| 44 | + unsigned int idx; |
|---|
| 32 | 45 | |
|---|
| 33 | 46 | cpumask_clear(groupmask); |
|---|
| 34 | 47 | |
|---|
| 35 | 48 | printk(KERN_DEBUG "%*s domain-%d: ", level, "", level); |
|---|
| 36 | | - |
|---|
| 37 | | - if (!(sd->flags & SD_LOAD_BALANCE)) { |
|---|
| 38 | | - printk("does not load-balance\n"); |
|---|
| 39 | | - if (sd->parent) |
|---|
| 40 | | - printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent"); |
|---|
| 41 | | - return -1; |
|---|
| 42 | | - } |
|---|
| 43 | | - |
|---|
| 44 | 49 | printk(KERN_CONT "span=%*pbl level=%s\n", |
|---|
| 45 | 50 | cpumask_pr_args(sched_domain_span(sd)), sd->name); |
|---|
| 46 | 51 | |
|---|
| .. | .. |
|---|
| 49 | 54 | } |
|---|
| 50 | 55 | if (group && !cpumask_test_cpu(cpu, sched_group_span(group))) { |
|---|
| 51 | 56 | printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu); |
|---|
| 57 | + } |
|---|
| 58 | + |
|---|
| 59 | + for_each_set_bit(idx, &flags, __SD_FLAG_CNT) { |
|---|
| 60 | + unsigned int flag = BIT(idx); |
|---|
| 61 | + unsigned int meta_flags = sd_flag_debug[idx].meta_flags; |
|---|
| 62 | + |
|---|
| 63 | + if ((meta_flags & SDF_SHARED_CHILD) && sd->child && |
|---|
| 64 | + !(sd->child->flags & flag)) |
|---|
| 65 | + printk(KERN_ERR "ERROR: flag %s set here but not in child\n", |
|---|
| 66 | + sd_flag_debug[idx].name); |
|---|
| 67 | + |
|---|
| 68 | + if ((meta_flags & SDF_SHARED_PARENT) && sd->parent && |
|---|
| 69 | + !(sd->parent->flags & flag)) |
|---|
| 70 | + printk(KERN_ERR "ERROR: flag %s set here but not in parent\n", |
|---|
| 71 | + sd_flag_debug[idx].name); |
|---|
| 52 | 72 | } |
|---|
| 53 | 73 | |
|---|
| 54 | 74 | printk(KERN_DEBUG "%*s groups:", level + 1, ""); |
|---|
| .. | .. |
|---|
| 145 | 165 | } |
|---|
| 146 | 166 | #endif /* CONFIG_SCHED_DEBUG */ |
|---|
| 147 | 167 | |
|---|
| 168 | +/* Generate a mask of SD flags with the SDF_NEEDS_GROUPS metaflag */ |
|---|
| 169 | +#define SD_FLAG(name, mflags) (name * !!((mflags) & SDF_NEEDS_GROUPS)) | |
|---|
| 170 | +static const unsigned int SD_DEGENERATE_GROUPS_MASK = |
|---|
| 171 | +#include <linux/sched/sd_flags.h> |
|---|
| 172 | +0; |
|---|
| 173 | +#undef SD_FLAG |
|---|
| 174 | + |
|---|
| 148 | 175 | static int sd_degenerate(struct sched_domain *sd) |
|---|
| 149 | 176 | { |
|---|
| 150 | 177 | if (cpumask_weight(sched_domain_span(sd)) == 1) |
|---|
| 151 | 178 | return 1; |
|---|
| 152 | 179 | |
|---|
| 153 | 180 | /* Following flags need at least 2 groups */ |
|---|
| 154 | | - if (sd->flags & (SD_LOAD_BALANCE | |
|---|
| 155 | | - SD_BALANCE_NEWIDLE | |
|---|
| 156 | | - SD_BALANCE_FORK | |
|---|
| 157 | | - SD_BALANCE_EXEC | |
|---|
| 158 | | - SD_SHARE_CPUCAPACITY | |
|---|
| 159 | | - SD_ASYM_CPUCAPACITY | |
|---|
| 160 | | - SD_SHARE_PKG_RESOURCES | |
|---|
| 161 | | - SD_SHARE_POWERDOMAIN)) { |
|---|
| 162 | | - if (sd->groups != sd->groups->next) |
|---|
| 163 | | - return 0; |
|---|
| 164 | | - } |
|---|
| 181 | + if ((sd->flags & SD_DEGENERATE_GROUPS_MASK) && |
|---|
| 182 | + (sd->groups != sd->groups->next)) |
|---|
| 183 | + return 0; |
|---|
| 165 | 184 | |
|---|
| 166 | 185 | /* Following flags don't use groups */ |
|---|
| 167 | 186 | if (sd->flags & (SD_WAKE_AFFINE)) |
|---|
| .. | .. |
|---|
| 182 | 201 | return 0; |
|---|
| 183 | 202 | |
|---|
| 184 | 203 | /* Flags needing groups don't count if only 1 group in parent */ |
|---|
| 185 | | - if (parent->groups == parent->groups->next) { |
|---|
| 186 | | - pflags &= ~(SD_LOAD_BALANCE | |
|---|
| 187 | | - SD_BALANCE_NEWIDLE | |
|---|
| 188 | | - SD_BALANCE_FORK | |
|---|
| 189 | | - SD_BALANCE_EXEC | |
|---|
| 190 | | - SD_ASYM_CPUCAPACITY | |
|---|
| 191 | | - SD_SHARE_CPUCAPACITY | |
|---|
| 192 | | - SD_SHARE_PKG_RESOURCES | |
|---|
| 193 | | - SD_PREFER_SIBLING | |
|---|
| 194 | | - SD_SHARE_POWERDOMAIN); |
|---|
| 195 | | - if (nr_node_ids == 1) |
|---|
| 196 | | - pflags &= ~SD_SERIALIZE; |
|---|
| 197 | | - } |
|---|
| 204 | + if (parent->groups == parent->groups->next) |
|---|
| 205 | + pflags &= ~SD_DEGENERATE_GROUPS_MASK; |
|---|
| 206 | + |
|---|
| 198 | 207 | if (~cflags & pflags) |
|---|
| 199 | 208 | return 0; |
|---|
| 200 | 209 | |
|---|
| 201 | 210 | return 1; |
|---|
| 202 | 211 | } |
|---|
| 203 | 212 | |
|---|
| 204 | | -DEFINE_STATIC_KEY_FALSE(sched_energy_present); |
|---|
| 205 | | - |
|---|
| 206 | | -#ifdef CONFIG_ENERGY_MODEL |
|---|
| 207 | 213 | #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) |
|---|
| 214 | +DEFINE_STATIC_KEY_FALSE(sched_energy_present); |
|---|
| 208 | 215 | unsigned int sysctl_sched_energy_aware = 1; |
|---|
| 209 | 216 | DEFINE_MUTEX(sched_energy_mutex); |
|---|
| 210 | 217 | bool sched_energy_update; |
|---|
| 211 | 218 | |
|---|
| 212 | 219 | #ifdef CONFIG_PROC_SYSCTL |
|---|
| 213 | 220 | int sched_energy_aware_handler(struct ctl_table *table, int write, |
|---|
| 214 | | - void __user *buffer, size_t *lenp, loff_t *ppos) |
|---|
| 221 | + void *buffer, size_t *lenp, loff_t *ppos) |
|---|
| 215 | 222 | { |
|---|
| 216 | 223 | int ret, state; |
|---|
| 217 | 224 | |
|---|
| .. | .. |
|---|
| 233 | 240 | return ret; |
|---|
| 234 | 241 | } |
|---|
| 235 | 242 | #endif |
|---|
| 236 | | -#endif /* defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */ |
|---|
| 237 | 243 | |
|---|
| 238 | 244 | static void free_pd(struct perf_domain *pd) |
|---|
| 239 | 245 | { |
|---|
| .. | .. |
|---|
| 285 | 291 | printk(KERN_DEBUG "root_domain %*pbl:", cpumask_pr_args(cpu_map)); |
|---|
| 286 | 292 | |
|---|
| 287 | 293 | while (pd) { |
|---|
| 288 | | - printk(KERN_CONT " pd%d:{ cpus=%*pbl nr_cstate=%d }", |
|---|
| 294 | + printk(KERN_CONT " pd%d:{ cpus=%*pbl nr_pstate=%d }", |
|---|
| 289 | 295 | cpumask_first(perf_domain_span(pd)), |
|---|
| 290 | 296 | cpumask_pr_args(perf_domain_span(pd)), |
|---|
| 291 | | - em_pd_nr_cap_states(pd->em_pd)); |
|---|
| 297 | + em_pd_nr_perf_states(pd->em_pd)); |
|---|
| 292 | 298 | pd = pd->next; |
|---|
| 293 | 299 | } |
|---|
| 294 | 300 | |
|---|
| .. | .. |
|---|
| 320 | 326 | * EAS can be used on a root domain if it meets all the following conditions: |
|---|
| 321 | 327 | * 1. an Energy Model (EM) is available; |
|---|
| 322 | 328 | * 2. the SD_ASYM_CPUCAPACITY flag is set in the sched_domain hierarchy. |
|---|
| 323 | | - * 3. the EM complexity is low enough to keep scheduling overheads low; |
|---|
| 329 | + * 3. no SMT is detected. |
|---|
| 330 | + * 4. the EM complexity is low enough to keep scheduling overheads low; |
|---|
| 324 | 331 | * |
|---|
| 325 | 332 | * The complexity of the Energy Model is defined as: |
|---|
| 326 | 333 | * |
|---|
| 327 | | - * C = nr_pd * (nr_cpus + nr_cs) |
|---|
| 334 | + * C = nr_pd * (nr_cpus + nr_ps) |
|---|
| 328 | 335 | * |
|---|
| 329 | 336 | * with parameters defined as: |
|---|
| 330 | 337 | * - nr_pd: the number of performance domains |
|---|
| 331 | 338 | * - nr_cpus: the number of CPUs |
|---|
| 332 | | - * - nr_cs: the sum of the number of capacity states of all performance |
|---|
| 339 | + * - nr_ps: the sum of the number of performance states of all performance |
|---|
| 333 | 340 | * domains (for example, on a system with 2 performance domains, |
|---|
| 334 | | - * with 10 capacity states each, nr_cs = 2 * 10 = 20). |
|---|
| 341 | + * with 10 performance states each, nr_ps = 2 * 10 = 20). |
|---|
| 335 | 342 | * |
|---|
| 336 | 343 | * It is generally not a good idea to use such a model in the wake-up path on |
|---|
| 337 | 344 | * very complex platforms because of the associated scheduling overheads. The |
|---|
| 338 | 345 | * arbitrary constraint below prevents that. It makes EAS usable up to 16 CPUs |
|---|
| 339 | | - * with per-CPU DVFS and less than 8 capacity states each, for example. |
|---|
| 346 | + * with per-CPU DVFS and less than 8 performance states each, for example. |
|---|
| 340 | 347 | */ |
|---|
| 341 | 348 | #define EM_MAX_COMPLEXITY 2048 |
|---|
| 342 | 349 | |
|---|
| 343 | 350 | static bool build_perf_domains(const struct cpumask *cpu_map) |
|---|
| 344 | 351 | { |
|---|
| 345 | | - int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map); |
|---|
| 352 | + int i, nr_pd = 0, nr_ps = 0, nr_cpus = cpumask_weight(cpu_map); |
|---|
| 346 | 353 | struct perf_domain *pd = NULL, *tmp; |
|---|
| 347 | 354 | int cpu = cpumask_first(cpu_map); |
|---|
| 348 | 355 | struct root_domain *rd = cpu_rq(cpu)->rd; |
|---|
| 356 | + bool eas_check = false; |
|---|
| 349 | 357 | |
|---|
| 350 | | -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) |
|---|
| 351 | 358 | if (!sysctl_sched_energy_aware) |
|---|
| 352 | 359 | goto free; |
|---|
| 353 | | -#endif |
|---|
| 354 | 360 | |
|---|
| 355 | | - /* EAS is enabled for asymmetric CPU capacity topologies. */ |
|---|
| 356 | | - if (!per_cpu(sd_asym_cpucapacity, cpu)) { |
|---|
| 361 | + /* |
|---|
| 362 | + * EAS is enabled for asymmetric CPU capacity topologies. |
|---|
| 363 | + * Allow vendor to override if desired. |
|---|
| 364 | + */ |
|---|
| 365 | + trace_android_rvh_build_perf_domains(&eas_check); |
|---|
| 366 | + if (!per_cpu(sd_asym_cpucapacity, cpu) && !eas_check) { |
|---|
| 357 | 367 | if (sched_debug()) { |
|---|
| 358 | 368 | pr_info("rd %*pbl: CPUs do not have asymmetric capacities\n", |
|---|
| 359 | 369 | cpumask_pr_args(cpu_map)); |
|---|
| 360 | 370 | } |
|---|
| 371 | + goto free; |
|---|
| 372 | + } |
|---|
| 373 | + |
|---|
| 374 | + /* EAS definitely does *not* handle SMT */ |
|---|
| 375 | + if (sched_smt_active()) { |
|---|
| 376 | + pr_warn("rd %*pbl: Disabling EAS, SMT is not supported\n", |
|---|
| 377 | + cpumask_pr_args(cpu_map)); |
|---|
| 361 | 378 | goto free; |
|---|
| 362 | 379 | } |
|---|
| 363 | 380 | |
|---|
| .. | .. |
|---|
| 374 | 391 | pd = tmp; |
|---|
| 375 | 392 | |
|---|
| 376 | 393 | /* |
|---|
| 377 | | - * Count performance domains and capacity states for the |
|---|
| 394 | + * Count performance domains and performance states for the |
|---|
| 378 | 395 | * complexity check. |
|---|
| 379 | 396 | */ |
|---|
| 380 | 397 | nr_pd++; |
|---|
| 381 | | - nr_cs += em_pd_nr_cap_states(pd->em_pd); |
|---|
| 398 | + nr_ps += em_pd_nr_perf_states(pd->em_pd); |
|---|
| 382 | 399 | } |
|---|
| 383 | 400 | |
|---|
| 384 | 401 | /* Bail out if the Energy Model complexity is too high. */ |
|---|
| 385 | | - if (nr_pd * (nr_cs + nr_cpus) > EM_MAX_COMPLEXITY) { |
|---|
| 402 | + if (nr_pd * (nr_ps + nr_cpus) > EM_MAX_COMPLEXITY) { |
|---|
| 386 | 403 | WARN(1, "rd %*pbl: Failed to start EAS, EM complexity is too high\n", |
|---|
| 387 | 404 | cpumask_pr_args(cpu_map)); |
|---|
| 388 | 405 | goto free; |
|---|
| .. | .. |
|---|
| 409 | 426 | } |
|---|
| 410 | 427 | #else |
|---|
| 411 | 428 | static void free_pd(struct perf_domain *pd) { } |
|---|
| 412 | | -#endif /* CONFIG_ENERGY_MODEL */ |
|---|
| 429 | +#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL*/ |
|---|
| 413 | 430 | |
|---|
| 414 | 431 | static void free_rootdomain(struct rcu_head *rcu) |
|---|
| 415 | 432 | { |
|---|
| .. | .. |
|---|
| 459 | 476 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
|---|
| 460 | 477 | |
|---|
| 461 | 478 | if (old_rd) |
|---|
| 462 | | - call_rcu_sched(&old_rd->rcu, free_rootdomain); |
|---|
| 479 | + call_rcu(&old_rd->rcu, free_rootdomain); |
|---|
| 463 | 480 | } |
|---|
| 464 | 481 | |
|---|
| 465 | 482 | void sched_get_rd(struct root_domain *rd) |
|---|
| .. | .. |
|---|
| 472 | 489 | if (!atomic_dec_and_test(&rd->refcount)) |
|---|
| 473 | 490 | return; |
|---|
| 474 | 491 | |
|---|
| 475 | | - call_rcu_sched(&rd->rcu, free_rootdomain); |
|---|
| 492 | + call_rcu(&rd->rcu, free_rootdomain); |
|---|
| 476 | 493 | } |
|---|
| 477 | 494 | |
|---|
| 478 | 495 | static int init_rootdomain(struct root_domain *rd) |
|---|
| .. | .. |
|---|
| 498 | 515 | |
|---|
| 499 | 516 | if (cpupri_init(&rd->cpupri) != 0) |
|---|
| 500 | 517 | goto free_cpudl; |
|---|
| 501 | | - |
|---|
| 502 | | - init_max_cpu_capacity(&rd->max_cpu_capacity); |
|---|
| 503 | | - |
|---|
| 504 | 518 | return 0; |
|---|
| 505 | 519 | |
|---|
| 506 | 520 | free_cpudl: |
|---|
| .. | .. |
|---|
| 606 | 620 | * the cpumask of the domain), this allows us to quickly tell if |
|---|
| 607 | 621 | * two CPUs are in the same cache domain, see cpus_share_cache(). |
|---|
| 608 | 622 | */ |
|---|
| 609 | | -DEFINE_PER_CPU(struct sched_domain *, sd_llc); |
|---|
| 623 | +DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc); |
|---|
| 610 | 624 | DEFINE_PER_CPU(int, sd_llc_size); |
|---|
| 611 | 625 | DEFINE_PER_CPU(int, sd_llc_id); |
|---|
| 612 | | -DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); |
|---|
| 613 | | -DEFINE_PER_CPU(struct sched_domain *, sd_numa); |
|---|
| 614 | | -DEFINE_PER_CPU(struct sched_domain *, sd_asym_packing); |
|---|
| 615 | | -DEFINE_PER_CPU(struct sched_domain *, sd_asym_cpucapacity); |
|---|
| 626 | +DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared); |
|---|
| 627 | +DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa); |
|---|
| 628 | +DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing); |
|---|
| 629 | +DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity); |
|---|
| 616 | 630 | DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity); |
|---|
| 617 | 631 | |
|---|
| 618 | 632 | static void update_top_cache_domain(int cpu) |
|---|
| .. | .. |
|---|
| 1050 | 1064 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); |
|---|
| 1051 | 1065 | struct sched_domain *child = sd->child; |
|---|
| 1052 | 1066 | struct sched_group *sg; |
|---|
| 1067 | + bool already_visited; |
|---|
| 1053 | 1068 | |
|---|
| 1054 | 1069 | if (child) |
|---|
| 1055 | 1070 | cpu = cpumask_first(sched_domain_span(child)); |
|---|
| .. | .. |
|---|
| 1057 | 1072 | sg = *per_cpu_ptr(sdd->sg, cpu); |
|---|
| 1058 | 1073 | sg->sgc = *per_cpu_ptr(sdd->sgc, cpu); |
|---|
| 1059 | 1074 | |
|---|
| 1060 | | - /* For claim_allocations: */ |
|---|
| 1061 | | - atomic_inc(&sg->ref); |
|---|
| 1062 | | - atomic_inc(&sg->sgc->ref); |
|---|
| 1075 | + /* Increase refcounts for claim_allocations: */ |
|---|
| 1076 | + already_visited = atomic_inc_return(&sg->ref) > 1; |
|---|
| 1077 | + /* sgc visits should follow a similar trend as sg */ |
|---|
| 1078 | + WARN_ON(already_visited != (atomic_inc_return(&sg->sgc->ref) > 1)); |
|---|
| 1079 | + |
|---|
| 1080 | + /* If we have already visited that group, it's already initialized. */ |
|---|
| 1081 | + if (already_visited) |
|---|
| 1082 | + return sg; |
|---|
| 1063 | 1083 | |
|---|
| 1064 | 1084 | if (child) { |
|---|
| 1065 | 1085 | cpumask_copy(sched_group_span(sg), sched_domain_span(child)); |
|---|
| .. | .. |
|---|
| 1078 | 1098 | |
|---|
| 1079 | 1099 | /* |
|---|
| 1080 | 1100 | * build_sched_groups will build a circular linked list of the groups |
|---|
| 1081 | | - * covered by the given span, and will set each group's ->cpumask correctly, |
|---|
| 1082 | | - * and ->cpu_capacity to 0. |
|---|
| 1101 | + * covered by the given span, will set each group's ->cpumask correctly, |
|---|
| 1102 | + * and will initialize their ->sgc. |
|---|
| 1083 | 1103 | * |
|---|
| 1084 | 1104 | * Assumes the sched_domain tree is fully constructed |
|---|
| 1085 | 1105 | */ |
|---|
| .. | .. |
|---|
| 1186 | 1206 | if (!attr || attr->relax_domain_level < 0) { |
|---|
| 1187 | 1207 | if (default_relax_domain_level < 0) |
|---|
| 1188 | 1208 | return; |
|---|
| 1189 | | - else |
|---|
| 1190 | | - request = default_relax_domain_level; |
|---|
| 1209 | + request = default_relax_domain_level; |
|---|
| 1191 | 1210 | } else |
|---|
| 1192 | 1211 | request = attr->relax_domain_level; |
|---|
| 1193 | | - if (request < sd->level) { |
|---|
| 1212 | + |
|---|
| 1213 | + if (sd->level > request) { |
|---|
| 1194 | 1214 | /* Turn off idle balance on this domain: */ |
|---|
| 1195 | 1215 | sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); |
|---|
| 1196 | | - } else { |
|---|
| 1197 | | - /* Turn on idle balance on this domain: */ |
|---|
| 1198 | | - sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); |
|---|
| 1199 | 1216 | } |
|---|
| 1200 | 1217 | } |
|---|
| 1201 | 1218 | |
|---|
| .. | .. |
|---|
| 1209 | 1226 | case sa_rootdomain: |
|---|
| 1210 | 1227 | if (!atomic_read(&d->rd->refcount)) |
|---|
| 1211 | 1228 | free_rootdomain(&d->rd->rcu); |
|---|
| 1212 | | - /* Fall through */ |
|---|
| 1229 | + fallthrough; |
|---|
| 1213 | 1230 | case sa_sd: |
|---|
| 1214 | 1231 | free_percpu(d->sd); |
|---|
| 1215 | | - /* Fall through */ |
|---|
| 1232 | + fallthrough; |
|---|
| 1216 | 1233 | case sa_sd_storage: |
|---|
| 1217 | 1234 | __sdt_free(cpu_map); |
|---|
| 1218 | | - /* Fall through */ |
|---|
| 1235 | + fallthrough; |
|---|
| 1219 | 1236 | case sa_none: |
|---|
| 1220 | 1237 | break; |
|---|
| 1221 | 1238 | } |
|---|
| .. | .. |
|---|
| 1269 | 1286 | int sched_max_numa_distance; |
|---|
| 1270 | 1287 | static int *sched_domains_numa_distance; |
|---|
| 1271 | 1288 | static struct cpumask ***sched_domains_numa_masks; |
|---|
| 1289 | +int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; |
|---|
| 1272 | 1290 | #endif |
|---|
| 1273 | 1291 | |
|---|
| 1274 | 1292 | /* |
|---|
| .. | .. |
|---|
| 1281 | 1299 | * SD_SHARE_CPUCAPACITY - describes SMT topologies |
|---|
| 1282 | 1300 | * SD_SHARE_PKG_RESOURCES - describes shared caches |
|---|
| 1283 | 1301 | * SD_NUMA - describes NUMA topologies |
|---|
| 1284 | | - * SD_SHARE_POWERDOMAIN - describes shared power domain |
|---|
| 1285 | 1302 | * |
|---|
| 1286 | 1303 | * Odd one out, which beside describing the topology has a quirk also |
|---|
| 1287 | 1304 | * prescribes the desired behaviour that goes along with it: |
|---|
| .. | .. |
|---|
| 1292 | 1309 | (SD_SHARE_CPUCAPACITY | \ |
|---|
| 1293 | 1310 | SD_SHARE_PKG_RESOURCES | \ |
|---|
| 1294 | 1311 | SD_NUMA | \ |
|---|
| 1295 | | - SD_ASYM_PACKING | \ |
|---|
| 1296 | | - SD_SHARE_POWERDOMAIN) |
|---|
| 1312 | + SD_ASYM_PACKING) |
|---|
| 1297 | 1313 | |
|---|
| 1298 | 1314 | static struct sched_domain * |
|---|
| 1299 | 1315 | sd_init(struct sched_domain_topology_level *tl, |
|---|
| .. | .. |
|---|
| 1325 | 1341 | *sd = (struct sched_domain){ |
|---|
| 1326 | 1342 | .min_interval = sd_weight, |
|---|
| 1327 | 1343 | .max_interval = 2*sd_weight, |
|---|
| 1328 | | - .busy_factor = 32, |
|---|
| 1329 | | - .imbalance_pct = 125, |
|---|
| 1344 | + .busy_factor = 16, |
|---|
| 1345 | + .imbalance_pct = 117, |
|---|
| 1330 | 1346 | |
|---|
| 1331 | 1347 | .cache_nice_tries = 0, |
|---|
| 1332 | | - .busy_idx = 0, |
|---|
| 1333 | | - .idle_idx = 0, |
|---|
| 1334 | | - .newidle_idx = 0, |
|---|
| 1335 | | - .wake_idx = 0, |
|---|
| 1336 | | - .forkexec_idx = 0, |
|---|
| 1337 | 1348 | |
|---|
| 1338 | | - .flags = 1*SD_LOAD_BALANCE |
|---|
| 1339 | | - | 1*SD_BALANCE_NEWIDLE |
|---|
| 1349 | + .flags = 1*SD_BALANCE_NEWIDLE |
|---|
| 1340 | 1350 | | 1*SD_BALANCE_EXEC |
|---|
| 1341 | 1351 | | 1*SD_BALANCE_FORK |
|---|
| 1342 | 1352 | | 0*SD_BALANCE_WAKE |
|---|
| .. | .. |
|---|
| 1351 | 1361 | |
|---|
| 1352 | 1362 | .last_balance = jiffies, |
|---|
| 1353 | 1363 | .balance_interval = sd_weight, |
|---|
| 1354 | | - .smt_gain = 0, |
|---|
| 1355 | 1364 | .max_newidle_lb_cost = 0, |
|---|
| 1356 | 1365 | .next_decay_max_lb_cost = jiffies, |
|---|
| 1357 | 1366 | .child = child, |
|---|
| .. | .. |
|---|
| 1367 | 1376 | * Convert topological properties into behaviour. |
|---|
| 1368 | 1377 | */ |
|---|
| 1369 | 1378 | |
|---|
| 1370 | | - if (sd->flags & SD_ASYM_CPUCAPACITY) { |
|---|
| 1371 | | - struct sched_domain *t = sd; |
|---|
| 1372 | | - |
|---|
| 1373 | | - /* |
|---|
| 1374 | | - * Don't attempt to spread across CPUs of different capacities. |
|---|
| 1375 | | - */ |
|---|
| 1376 | | - if (sd->child) |
|---|
| 1377 | | - sd->child->flags &= ~SD_PREFER_SIBLING; |
|---|
| 1378 | | - |
|---|
| 1379 | | - for_each_lower_domain(t) |
|---|
| 1380 | | - t->flags |= SD_BALANCE_WAKE; |
|---|
| 1381 | | - } |
|---|
| 1379 | + /* Don't attempt to spread across CPUs of different capacities. */ |
|---|
| 1380 | + if ((sd->flags & SD_ASYM_CPUCAPACITY) && sd->child) |
|---|
| 1381 | + sd->child->flags &= ~SD_PREFER_SIBLING; |
|---|
| 1382 | 1382 | |
|---|
| 1383 | 1383 | if (sd->flags & SD_SHARE_CPUCAPACITY) { |
|---|
| 1384 | 1384 | sd->imbalance_pct = 110; |
|---|
| 1385 | | - sd->smt_gain = 1178; /* ~15% */ |
|---|
| 1386 | 1385 | |
|---|
| 1387 | 1386 | } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { |
|---|
| 1388 | 1387 | sd->imbalance_pct = 117; |
|---|
| 1389 | 1388 | sd->cache_nice_tries = 1; |
|---|
| 1390 | | - sd->busy_idx = 2; |
|---|
| 1391 | 1389 | |
|---|
| 1392 | 1390 | #ifdef CONFIG_NUMA |
|---|
| 1393 | 1391 | } else if (sd->flags & SD_NUMA) { |
|---|
| 1394 | 1392 | sd->cache_nice_tries = 2; |
|---|
| 1395 | | - sd->busy_idx = 3; |
|---|
| 1396 | | - sd->idle_idx = 2; |
|---|
| 1397 | 1393 | |
|---|
| 1398 | 1394 | sd->flags &= ~SD_PREFER_SIBLING; |
|---|
| 1399 | 1395 | sd->flags |= SD_SERIALIZE; |
|---|
| 1400 | | - if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) { |
|---|
| 1396 | + if (sched_domains_numa_distance[tl->numa_level] > node_reclaim_distance) { |
|---|
| 1401 | 1397 | sd->flags &= ~(SD_BALANCE_EXEC | |
|---|
| 1402 | 1398 | SD_BALANCE_FORK | |
|---|
| 1403 | 1399 | SD_WAKE_AFFINE); |
|---|
| .. | .. |
|---|
| 1406 | 1402 | #endif |
|---|
| 1407 | 1403 | } else { |
|---|
| 1408 | 1404 | sd->cache_nice_tries = 1; |
|---|
| 1409 | | - sd->busy_idx = 2; |
|---|
| 1410 | | - sd->idle_idx = 1; |
|---|
| 1411 | 1405 | } |
|---|
| 1412 | 1406 | |
|---|
| 1413 | 1407 | /* |
|---|
| .. | .. |
|---|
| 1548 | 1542 | } |
|---|
| 1549 | 1543 | } |
|---|
| 1550 | 1544 | |
|---|
| 1545 | + |
|---|
| 1546 | +#define NR_DISTANCE_VALUES (1 << DISTANCE_BITS) |
|---|
| 1547 | + |
|---|
| 1551 | 1548 | void sched_init_numa(void) |
|---|
| 1552 | 1549 | { |
|---|
| 1553 | | - int next_distance, curr_distance = node_distance(0, 0); |
|---|
| 1554 | 1550 | struct sched_domain_topology_level *tl; |
|---|
| 1555 | | - int level = 0; |
|---|
| 1556 | | - int i, j, k; |
|---|
| 1557 | | - |
|---|
| 1558 | | - sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL); |
|---|
| 1559 | | - if (!sched_domains_numa_distance) |
|---|
| 1560 | | - return; |
|---|
| 1561 | | - |
|---|
| 1562 | | - /* Includes NUMA identity node at level 0. */ |
|---|
| 1563 | | - sched_domains_numa_distance[level++] = curr_distance; |
|---|
| 1564 | | - sched_domains_numa_levels = level; |
|---|
| 1551 | + unsigned long *distance_map; |
|---|
| 1552 | + int nr_levels = 0; |
|---|
| 1553 | + int i, j; |
|---|
| 1565 | 1554 | |
|---|
| 1566 | 1555 | /* |
|---|
| 1567 | 1556 | * O(nr_nodes^2) deduplicating selection sort -- in order to find the |
|---|
| 1568 | 1557 | * unique distances in the node_distance() table. |
|---|
| 1569 | | - * |
|---|
| 1570 | | - * Assumes node_distance(0,j) includes all distances in |
|---|
| 1571 | | - * node_distance(i,j) in order to avoid cubic time. |
|---|
| 1572 | 1558 | */ |
|---|
| 1573 | | - next_distance = curr_distance; |
|---|
| 1559 | + distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL); |
|---|
| 1560 | + if (!distance_map) |
|---|
| 1561 | + return; |
|---|
| 1562 | + |
|---|
| 1563 | + bitmap_zero(distance_map, NR_DISTANCE_VALUES); |
|---|
| 1574 | 1564 | for (i = 0; i < nr_node_ids; i++) { |
|---|
| 1575 | 1565 | for (j = 0; j < nr_node_ids; j++) { |
|---|
| 1576 | | - for (k = 0; k < nr_node_ids; k++) { |
|---|
| 1577 | | - int distance = node_distance(i, k); |
|---|
| 1566 | + int distance = node_distance(i, j); |
|---|
| 1578 | 1567 | |
|---|
| 1579 | | - if (distance > curr_distance && |
|---|
| 1580 | | - (distance < next_distance || |
|---|
| 1581 | | - next_distance == curr_distance)) |
|---|
| 1582 | | - next_distance = distance; |
|---|
| 1583 | | - |
|---|
| 1584 | | - /* |
|---|
| 1585 | | - * While not a strong assumption it would be nice to know |
|---|
| 1586 | | - * about cases where if node A is connected to B, B is not |
|---|
| 1587 | | - * equally connected to A. |
|---|
| 1588 | | - */ |
|---|
| 1589 | | - if (sched_debug() && node_distance(k, i) != distance) |
|---|
| 1590 | | - sched_numa_warn("Node-distance not symmetric"); |
|---|
| 1591 | | - |
|---|
| 1592 | | - if (sched_debug() && i && !find_numa_distance(distance)) |
|---|
| 1593 | | - sched_numa_warn("Node-0 not representative"); |
|---|
| 1568 | + if (distance < LOCAL_DISTANCE || distance >= NR_DISTANCE_VALUES) { |
|---|
| 1569 | + sched_numa_warn("Invalid distance value range"); |
|---|
| 1570 | + return; |
|---|
| 1594 | 1571 | } |
|---|
| 1595 | | - if (next_distance != curr_distance) { |
|---|
| 1596 | | - sched_domains_numa_distance[level++] = next_distance; |
|---|
| 1597 | | - sched_domains_numa_levels = level; |
|---|
| 1598 | | - curr_distance = next_distance; |
|---|
| 1599 | | - } else break; |
|---|
| 1600 | | - } |
|---|
| 1601 | 1572 | |
|---|
| 1602 | | - /* |
|---|
| 1603 | | - * In case of sched_debug() we verify the above assumption. |
|---|
| 1604 | | - */ |
|---|
| 1605 | | - if (!sched_debug()) |
|---|
| 1606 | | - break; |
|---|
| 1573 | + bitmap_set(distance_map, distance, 1); |
|---|
| 1574 | + } |
|---|
| 1575 | + } |
|---|
| 1576 | + /* |
|---|
| 1577 | + * We can now figure out how many unique distance values there are and |
|---|
| 1578 | + * allocate memory accordingly. |
|---|
| 1579 | + */ |
|---|
| 1580 | + nr_levels = bitmap_weight(distance_map, NR_DISTANCE_VALUES); |
|---|
| 1581 | + |
|---|
| 1582 | + sched_domains_numa_distance = kcalloc(nr_levels, sizeof(int), GFP_KERNEL); |
|---|
| 1583 | + if (!sched_domains_numa_distance) { |
|---|
| 1584 | + bitmap_free(distance_map); |
|---|
| 1585 | + return; |
|---|
| 1607 | 1586 | } |
|---|
| 1608 | 1587 | |
|---|
| 1588 | + for (i = 0, j = 0; i < nr_levels; i++, j++) { |
|---|
| 1589 | + j = find_next_bit(distance_map, NR_DISTANCE_VALUES, j); |
|---|
| 1590 | + sched_domains_numa_distance[i] = j; |
|---|
| 1591 | + } |
|---|
| 1592 | + |
|---|
| 1593 | + bitmap_free(distance_map); |
|---|
| 1594 | + |
|---|
| 1609 | 1595 | /* |
|---|
| 1610 | | - * 'level' contains the number of unique distances |
|---|
| 1596 | + * 'nr_levels' contains the number of unique distances |
|---|
| 1611 | 1597 | * |
|---|
| 1612 | 1598 | * The sched_domains_numa_distance[] array includes the actual distance |
|---|
| 1613 | 1599 | * numbers. |
|---|
| .. | .. |
|---|
| 1616 | 1602 | /* |
|---|
| 1617 | 1603 | * Here, we should temporarily reset sched_domains_numa_levels to 0. |
|---|
| 1618 | 1604 | * If it fails to allocate memory for array sched_domains_numa_masks[][], |
|---|
| 1619 | | - * the array will contain less then 'level' members. This could be |
|---|
| 1605 | + * the array will contain less then 'nr_levels' members. This could be |
|---|
| 1620 | 1606 | * dangerous when we use it to iterate array sched_domains_numa_masks[][] |
|---|
| 1621 | 1607 | * in other functions. |
|---|
| 1622 | 1608 | * |
|---|
| 1623 | | - * We reset it to 'level' at the end of this function. |
|---|
| 1609 | + * We reset it to 'nr_levels' at the end of this function. |
|---|
| 1624 | 1610 | */ |
|---|
| 1625 | 1611 | sched_domains_numa_levels = 0; |
|---|
| 1626 | 1612 | |
|---|
| 1627 | | - sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); |
|---|
| 1613 | + sched_domains_numa_masks = kzalloc(sizeof(void *) * nr_levels, GFP_KERNEL); |
|---|
| 1628 | 1614 | if (!sched_domains_numa_masks) |
|---|
| 1629 | 1615 | return; |
|---|
| 1630 | 1616 | |
|---|
| .. | .. |
|---|
| 1632 | 1618 | * Now for each level, construct a mask per node which contains all |
|---|
| 1633 | 1619 | * CPUs of nodes that are that many hops away from us. |
|---|
| 1634 | 1620 | */ |
|---|
| 1635 | | - for (i = 0; i < level; i++) { |
|---|
| 1621 | + for (i = 0; i < nr_levels; i++) { |
|---|
| 1636 | 1622 | sched_domains_numa_masks[i] = |
|---|
| 1637 | 1623 | kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL); |
|---|
| 1638 | 1624 | if (!sched_domains_numa_masks[i]) |
|---|
| .. | .. |
|---|
| 1640 | 1626 | |
|---|
| 1641 | 1627 | for (j = 0; j < nr_node_ids; j++) { |
|---|
| 1642 | 1628 | struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL); |
|---|
| 1629 | + int k; |
|---|
| 1630 | + |
|---|
| 1643 | 1631 | if (!mask) |
|---|
| 1644 | 1632 | return; |
|---|
| 1645 | 1633 | |
|---|
| 1646 | 1634 | sched_domains_numa_masks[i][j] = mask; |
|---|
| 1647 | 1635 | |
|---|
| 1648 | 1636 | for_each_node(k) { |
|---|
| 1637 | + if (sched_debug() && (node_distance(j, k) != node_distance(k, j))) |
|---|
| 1638 | + sched_numa_warn("Node-distance not symmetric"); |
|---|
| 1639 | + |
|---|
| 1649 | 1640 | if (node_distance(j, k) > sched_domains_numa_distance[i]) |
|---|
| 1650 | 1641 | continue; |
|---|
| 1651 | 1642 | |
|---|
| .. | .. |
|---|
| 1657 | 1648 | /* Compute default topology size */ |
|---|
| 1658 | 1649 | for (i = 0; sched_domain_topology[i].mask; i++); |
|---|
| 1659 | 1650 | |
|---|
| 1660 | | - tl = kzalloc((i + level + 1) * |
|---|
| 1651 | + tl = kzalloc((i + nr_levels + 1) * |
|---|
| 1661 | 1652 | sizeof(struct sched_domain_topology_level), GFP_KERNEL); |
|---|
| 1662 | 1653 | if (!tl) |
|---|
| 1663 | 1654 | return; |
|---|
| .. | .. |
|---|
| 1680 | 1671 | /* |
|---|
| 1681 | 1672 | * .. and append 'j' levels of NUMA goodness. |
|---|
| 1682 | 1673 | */ |
|---|
| 1683 | | - for (j = 1; j < level; i++, j++) { |
|---|
| 1674 | + for (j = 1; j < nr_levels; i++, j++) { |
|---|
| 1684 | 1675 | tl[i] = (struct sched_domain_topology_level){ |
|---|
| 1685 | 1676 | .mask = sd_numa_mask, |
|---|
| 1686 | 1677 | .sd_flags = cpu_numa_flags, |
|---|
| .. | .. |
|---|
| 1692 | 1683 | |
|---|
| 1693 | 1684 | sched_domain_topology = tl; |
|---|
| 1694 | 1685 | |
|---|
| 1695 | | - sched_domains_numa_levels = level; |
|---|
| 1696 | | - sched_max_numa_distance = sched_domains_numa_distance[level - 1]; |
|---|
| 1686 | + sched_domains_numa_levels = nr_levels; |
|---|
| 1687 | + sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1]; |
|---|
| 1697 | 1688 | |
|---|
| 1698 | 1689 | init_numa_topology_type(); |
|---|
| 1699 | 1690 | } |
|---|
| .. | .. |
|---|
| 1719 | 1710 | for (j = 0; j < nr_node_ids; j++) |
|---|
| 1720 | 1711 | cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]); |
|---|
| 1721 | 1712 | } |
|---|
| 1713 | +} |
|---|
| 1714 | + |
|---|
| 1715 | +/* |
|---|
| 1716 | + * sched_numa_find_closest() - given the NUMA topology, find the cpu |
|---|
| 1717 | + * closest to @cpu from @cpumask. |
|---|
| 1718 | + * cpumask: cpumask to find a cpu from |
|---|
| 1719 | + * cpu: cpu to be close to |
|---|
| 1720 | + * |
|---|
| 1721 | + * returns: cpu, or nr_cpu_ids when nothing found. |
|---|
| 1722 | + */ |
|---|
| 1723 | +int sched_numa_find_closest(const struct cpumask *cpus, int cpu) |
|---|
| 1724 | +{ |
|---|
| 1725 | + int i, j = cpu_to_node(cpu); |
|---|
| 1726 | + |
|---|
| 1727 | + for (i = 0; i < sched_domains_numa_levels; i++) { |
|---|
| 1728 | + cpu = cpumask_any_and(cpus, sched_domains_numa_masks[i][j]); |
|---|
| 1729 | + if (cpu < nr_cpu_ids) |
|---|
| 1730 | + return cpu; |
|---|
| 1731 | + } |
|---|
| 1732 | + return nr_cpu_ids; |
|---|
| 1722 | 1733 | } |
|---|
| 1723 | 1734 | |
|---|
| 1724 | 1735 | #endif /* CONFIG_NUMA */ |
|---|
| .. | .. |
|---|
| 1859 | 1870 | } |
|---|
| 1860 | 1871 | |
|---|
| 1861 | 1872 | /* |
|---|
| 1873 | + * Ensure topology masks are sane, i.e. there are no conflicts (overlaps) for |
|---|
| 1874 | + * any two given CPUs at this (non-NUMA) topology level. |
|---|
| 1875 | + */ |
|---|
| 1876 | +static bool topology_span_sane(struct sched_domain_topology_level *tl, |
|---|
| 1877 | + const struct cpumask *cpu_map, int cpu) |
|---|
| 1878 | +{ |
|---|
| 1879 | + int i; |
|---|
| 1880 | + |
|---|
| 1881 | + /* NUMA levels are allowed to overlap */ |
|---|
| 1882 | + if (tl->flags & SDTL_OVERLAP) |
|---|
| 1883 | + return true; |
|---|
| 1884 | + |
|---|
| 1885 | + /* |
|---|
| 1886 | + * Non-NUMA levels cannot partially overlap - they must be either |
|---|
| 1887 | + * completely equal or completely disjoint. Otherwise we can end up |
|---|
| 1888 | + * breaking the sched_group lists - i.e. a later get_group() pass |
|---|
| 1889 | + * breaks the linking done for an earlier span. |
|---|
| 1890 | + */ |
|---|
| 1891 | + for_each_cpu(i, cpu_map) { |
|---|
| 1892 | + if (i == cpu) |
|---|
| 1893 | + continue; |
|---|
| 1894 | + /* |
|---|
| 1895 | + * We should 'and' all those masks with 'cpu_map' to exactly |
|---|
| 1896 | + * match the topology we're about to build, but that can only |
|---|
| 1897 | + * remove CPUs, which only lessens our ability to detect |
|---|
| 1898 | + * overlaps |
|---|
| 1899 | + */ |
|---|
| 1900 | + if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) && |
|---|
| 1901 | + cpumask_intersects(tl->mask(cpu), tl->mask(i))) |
|---|
| 1902 | + return false; |
|---|
| 1903 | + } |
|---|
| 1904 | + |
|---|
| 1905 | + return true; |
|---|
| 1906 | +} |
|---|
| 1907 | + |
|---|
| 1908 | +/* |
|---|
| 1862 | 1909 | * Find the sched_domain_topology_level where all CPU capacities are visible |
|---|
| 1863 | 1910 | * for all CPUs. |
|---|
| 1864 | 1911 | */ |
|---|
| .. | .. |
|---|
| 1871 | 1918 | unsigned long cap; |
|---|
| 1872 | 1919 | |
|---|
| 1873 | 1920 | /* Is there any asymmetry? */ |
|---|
| 1874 | | - cap = arch_scale_cpu_capacity(NULL, cpumask_first(cpu_map)); |
|---|
| 1921 | + cap = arch_scale_cpu_capacity(cpumask_first(cpu_map)); |
|---|
| 1875 | 1922 | |
|---|
| 1876 | 1923 | for_each_cpu(i, cpu_map) { |
|---|
| 1877 | | - if (arch_scale_cpu_capacity(NULL, i) != cap) { |
|---|
| 1924 | + if (arch_scale_cpu_capacity(i) != cap) { |
|---|
| 1878 | 1925 | asym = true; |
|---|
| 1879 | 1926 | break; |
|---|
| 1880 | 1927 | } |
|---|
| .. | .. |
|---|
| 1889 | 1936 | * to everyone. |
|---|
| 1890 | 1937 | */ |
|---|
| 1891 | 1938 | for_each_cpu(i, cpu_map) { |
|---|
| 1892 | | - unsigned long max_capacity = arch_scale_cpu_capacity(NULL, i); |
|---|
| 1939 | + unsigned long max_capacity = arch_scale_cpu_capacity(i); |
|---|
| 1893 | 1940 | int tl_id = 0; |
|---|
| 1894 | 1941 | |
|---|
| 1895 | 1942 | for_each_sd_topology(tl) { |
|---|
| .. | .. |
|---|
| 1899 | 1946 | for_each_cpu_and(j, tl->mask(i), cpu_map) { |
|---|
| 1900 | 1947 | unsigned long capacity; |
|---|
| 1901 | 1948 | |
|---|
| 1902 | | - capacity = arch_scale_cpu_capacity(NULL, j); |
|---|
| 1949 | + capacity = arch_scale_cpu_capacity(j); |
|---|
| 1903 | 1950 | |
|---|
| 1904 | 1951 | if (capacity <= max_capacity) |
|---|
| 1905 | 1952 | continue; |
|---|
| .. | .. |
|---|
| 1924 | 1971 | static int |
|---|
| 1925 | 1972 | build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr) |
|---|
| 1926 | 1973 | { |
|---|
| 1927 | | - enum s_alloc alloc_state; |
|---|
| 1974 | + enum s_alloc alloc_state = sa_none; |
|---|
| 1928 | 1975 | struct sched_domain *sd; |
|---|
| 1929 | 1976 | struct s_data d; |
|---|
| 1977 | + struct rq *rq = NULL; |
|---|
| 1930 | 1978 | int i, ret = -ENOMEM; |
|---|
| 1931 | 1979 | struct sched_domain_topology_level *tl_asym; |
|---|
| 1932 | 1980 | bool has_asym = false; |
|---|
| 1981 | + |
|---|
| 1982 | + if (WARN_ON(cpumask_empty(cpu_map))) |
|---|
| 1983 | + goto error; |
|---|
| 1933 | 1984 | |
|---|
| 1934 | 1985 | alloc_state = __visit_domain_allocation_hell(&d, cpu_map); |
|---|
| 1935 | 1986 | if (alloc_state != sa_rootdomain) |
|---|
| .. | .. |
|---|
| 1940 | 1991 | /* Set up domains for CPUs specified by the cpu_map: */ |
|---|
| 1941 | 1992 | for_each_cpu(i, cpu_map) { |
|---|
| 1942 | 1993 | struct sched_domain_topology_level *tl; |
|---|
| 1994 | + int dflags = 0; |
|---|
| 1943 | 1995 | |
|---|
| 1944 | 1996 | sd = NULL; |
|---|
| 1945 | 1997 | for_each_sd_topology(tl) { |
|---|
| 1946 | | - int dflags = 0; |
|---|
| 1947 | | - |
|---|
| 1948 | 1998 | if (tl == tl_asym) { |
|---|
| 1949 | 1999 | dflags |= SD_ASYM_CPUCAPACITY; |
|---|
| 1950 | 2000 | has_asym = true; |
|---|
| 1951 | 2001 | } |
|---|
| 2002 | + |
|---|
| 2003 | + if (WARN_ON(!topology_span_sane(tl, cpu_map, i))) |
|---|
| 2004 | + goto error; |
|---|
| 1952 | 2005 | |
|---|
| 1953 | 2006 | sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i); |
|---|
| 1954 | 2007 | |
|---|
| .. | .. |
|---|
| 1989 | 2042 | /* Attach the domains */ |
|---|
| 1990 | 2043 | rcu_read_lock(); |
|---|
| 1991 | 2044 | for_each_cpu(i, cpu_map) { |
|---|
| 2045 | + rq = cpu_rq(i); |
|---|
| 1992 | 2046 | sd = *per_cpu_ptr(d.sd, i); |
|---|
| 2047 | + |
|---|
| 2048 | + /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */ |
|---|
| 2049 | + if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity)) |
|---|
| 2050 | + WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig); |
|---|
| 2051 | + |
|---|
| 1993 | 2052 | cpu_attach_domain(sd, d.rd, i); |
|---|
| 1994 | 2053 | } |
|---|
| 1995 | 2054 | rcu_read_unlock(); |
|---|
| 1996 | 2055 | |
|---|
| 1997 | 2056 | if (has_asym) |
|---|
| 1998 | 2057 | static_branch_inc_cpuslocked(&sched_asym_cpucapacity); |
|---|
| 2058 | + |
|---|
| 2059 | + if (rq && sched_debug_enabled) { |
|---|
| 2060 | + pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n", |
|---|
| 2061 | + cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity); |
|---|
| 2062 | + } |
|---|
| 2063 | + trace_android_vh_build_sched_domains(has_asym); |
|---|
| 1999 | 2064 | |
|---|
| 2000 | 2065 | ret = 0; |
|---|
| 2001 | 2066 | error: |
|---|
| .. | .. |
|---|
| 2056 | 2121 | } |
|---|
| 2057 | 2122 | |
|---|
| 2058 | 2123 | /* |
|---|
| 2059 | | - * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
|---|
| 2060 | | - * For now this just excludes isolated CPUs, but could be used to |
|---|
| 2061 | | - * exclude other special cases in the future. |
|---|
| 2124 | + * Set up scheduler domains and groups. For now this just excludes isolated |
|---|
| 2125 | + * CPUs, but could be used to exclude other special cases in the future. |
|---|
| 2062 | 2126 | */ |
|---|
| 2063 | 2127 | int sched_init_domains(const struct cpumask *cpu_map) |
|---|
| 2064 | 2128 | { |
|---|
| .. | .. |
|---|
| 2139 | 2203 | * ndoms_new == 0 is a special case for destroying existing domains, |
|---|
| 2140 | 2204 | * and it will not create the default domain. |
|---|
| 2141 | 2205 | * |
|---|
| 2142 | | - * Call with hotplug lock held |
|---|
| 2206 | + * Call with hotplug lock and sched_domains_mutex held |
|---|
| 2143 | 2207 | */ |
|---|
| 2144 | | -void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], |
|---|
| 2145 | | - struct sched_domain_attr *dattr_new) |
|---|
| 2208 | +void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[], |
|---|
| 2209 | + struct sched_domain_attr *dattr_new) |
|---|
| 2146 | 2210 | { |
|---|
| 2147 | 2211 | bool __maybe_unused has_eas = false; |
|---|
| 2148 | 2212 | int i, j, n; |
|---|
| 2149 | 2213 | int new_topology; |
|---|
| 2150 | 2214 | |
|---|
| 2151 | | - mutex_lock(&sched_domains_mutex); |
|---|
| 2215 | + lockdep_assert_held(&sched_domains_mutex); |
|---|
| 2152 | 2216 | |
|---|
| 2153 | 2217 | /* Always unregister in case we don't destroy any domains: */ |
|---|
| 2154 | 2218 | unregister_sched_domain_sysctl(); |
|---|
| .. | .. |
|---|
| 2173 | 2237 | for (i = 0; i < ndoms_cur; i++) { |
|---|
| 2174 | 2238 | for (j = 0; j < n && !new_topology; j++) { |
|---|
| 2175 | 2239 | if (cpumask_equal(doms_cur[i], doms_new[j]) && |
|---|
| 2176 | | - dattrs_equal(dattr_cur, i, dattr_new, j)) |
|---|
| 2240 | + dattrs_equal(dattr_cur, i, dattr_new, j)) { |
|---|
| 2241 | + struct root_domain *rd; |
|---|
| 2242 | + |
|---|
| 2243 | + /* |
|---|
| 2244 | + * This domain won't be destroyed and as such |
|---|
| 2245 | + * its dl_bw->total_bw needs to be cleared. It |
|---|
| 2246 | + * will be recomputed in function |
|---|
| 2247 | + * update_tasks_root_domain(). |
|---|
| 2248 | + */ |
|---|
| 2249 | + rd = cpu_rq(cpumask_any(doms_cur[i]))->rd; |
|---|
| 2250 | + dl_clear_root_domain(rd); |
|---|
| 2177 | 2251 | goto match1; |
|---|
| 2252 | + } |
|---|
| 2178 | 2253 | } |
|---|
| 2179 | 2254 | /* No match - a current sched domain not in new doms_new[] */ |
|---|
| 2180 | 2255 | detach_destroy_domains(doms_cur[i]); |
|---|
| .. | .. |
|---|
| 2203 | 2278 | ; |
|---|
| 2204 | 2279 | } |
|---|
| 2205 | 2280 | |
|---|
| 2206 | | -#ifdef CONFIG_ENERGY_MODEL |
|---|
| 2281 | +#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) |
|---|
| 2207 | 2282 | /* Build perf. domains: */ |
|---|
| 2208 | 2283 | for (i = 0; i < ndoms_new; i++) { |
|---|
| 2209 | | - for (j = 0; j < n; j++) { |
|---|
| 2284 | + for (j = 0; j < n && !sched_energy_update; j++) { |
|---|
| 2210 | 2285 | if (cpumask_equal(doms_new[i], doms_cur[j]) && |
|---|
| 2211 | 2286 | cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) { |
|---|
| 2212 | 2287 | has_eas = true; |
|---|
| .. | .. |
|---|
| 2231 | 2306 | ndoms_cur = ndoms_new; |
|---|
| 2232 | 2307 | |
|---|
| 2233 | 2308 | register_sched_domain_sysctl(); |
|---|
| 2309 | +} |
|---|
| 2234 | 2310 | |
|---|
| 2311 | +/* |
|---|
| 2312 | + * Call with hotplug lock held |
|---|
| 2313 | + */ |
|---|
| 2314 | +void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], |
|---|
| 2315 | + struct sched_domain_attr *dattr_new) |
|---|
| 2316 | +{ |
|---|
| 2317 | + mutex_lock(&sched_domains_mutex); |
|---|
| 2318 | + partition_sched_domains_locked(ndoms_new, doms_new, dattr_new); |
|---|
| 2235 | 2319 | mutex_unlock(&sched_domains_mutex); |
|---|
| 2236 | 2320 | } |
|---|