.. | .. |
---|
4 | 4 | */ |
---|
5 | 5 | #include "sched.h" |
---|
6 | 6 | |
---|
| 7 | +#include <trace/hooks/sched.h> |
---|
| 8 | + |
---|
7 | 9 | DEFINE_MUTEX(sched_domains_mutex); |
---|
| 10 | +#ifdef CONFIG_LOCKDEP |
---|
| 11 | +EXPORT_SYMBOL_GPL(sched_domains_mutex); |
---|
| 12 | +#endif |
---|
8 | 13 | |
---|
9 | 14 | /* Protected by sched_domains_mutex: */ |
---|
10 | | -cpumask_var_t sched_domains_tmpmask; |
---|
11 | | -cpumask_var_t sched_domains_tmpmask2; |
---|
| 15 | +static cpumask_var_t sched_domains_tmpmask; |
---|
| 16 | +static cpumask_var_t sched_domains_tmpmask2; |
---|
12 | 17 | |
---|
13 | 18 | #ifdef CONFIG_SCHED_DEBUG |
---|
14 | 19 | |
---|
.. | .. |
---|
25 | 30 | return sched_debug_enabled; |
---|
26 | 31 | } |
---|
27 | 32 | |
---|
| 33 | +#define SD_FLAG(_name, mflags) [__##_name] = { .meta_flags = mflags, .name = #_name }, |
---|
| 34 | +const struct sd_flag_debug sd_flag_debug[] = { |
---|
| 35 | +#include <linux/sched/sd_flags.h> |
---|
| 36 | +}; |
---|
| 37 | +#undef SD_FLAG |
---|
| 38 | + |
---|
28 | 39 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
---|
29 | 40 | struct cpumask *groupmask) |
---|
30 | 41 | { |
---|
31 | 42 | struct sched_group *group = sd->groups; |
---|
| 43 | + unsigned long flags = sd->flags; |
---|
| 44 | + unsigned int idx; |
---|
32 | 45 | |
---|
33 | 46 | cpumask_clear(groupmask); |
---|
34 | 47 | |
---|
35 | 48 | printk(KERN_DEBUG "%*s domain-%d: ", level, "", level); |
---|
36 | | - |
---|
37 | | - if (!(sd->flags & SD_LOAD_BALANCE)) { |
---|
38 | | - printk("does not load-balance\n"); |
---|
39 | | - if (sd->parent) |
---|
40 | | - printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent"); |
---|
41 | | - return -1; |
---|
42 | | - } |
---|
43 | | - |
---|
44 | 49 | printk(KERN_CONT "span=%*pbl level=%s\n", |
---|
45 | 50 | cpumask_pr_args(sched_domain_span(sd)), sd->name); |
---|
46 | 51 | |
---|
.. | .. |
---|
49 | 54 | } |
---|
50 | 55 | if (group && !cpumask_test_cpu(cpu, sched_group_span(group))) { |
---|
51 | 56 | printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu); |
---|
| 57 | + } |
---|
| 58 | + |
---|
| 59 | + for_each_set_bit(idx, &flags, __SD_FLAG_CNT) { |
---|
| 60 | + unsigned int flag = BIT(idx); |
---|
| 61 | + unsigned int meta_flags = sd_flag_debug[idx].meta_flags; |
---|
| 62 | + |
---|
| 63 | + if ((meta_flags & SDF_SHARED_CHILD) && sd->child && |
---|
| 64 | + !(sd->child->flags & flag)) |
---|
| 65 | + printk(KERN_ERR "ERROR: flag %s set here but not in child\n", |
---|
| 66 | + sd_flag_debug[idx].name); |
---|
| 67 | + |
---|
| 68 | + if ((meta_flags & SDF_SHARED_PARENT) && sd->parent && |
---|
| 69 | + !(sd->parent->flags & flag)) |
---|
| 70 | + printk(KERN_ERR "ERROR: flag %s set here but not in parent\n", |
---|
| 71 | + sd_flag_debug[idx].name); |
---|
52 | 72 | } |
---|
53 | 73 | |
---|
54 | 74 | printk(KERN_DEBUG "%*s groups:", level + 1, ""); |
---|
.. | .. |
---|
145 | 165 | } |
---|
146 | 166 | #endif /* CONFIG_SCHED_DEBUG */ |
---|
147 | 167 | |
---|
| 168 | +/* Generate a mask of SD flags with the SDF_NEEDS_GROUPS metaflag */ |
---|
| 169 | +#define SD_FLAG(name, mflags) (name * !!((mflags) & SDF_NEEDS_GROUPS)) | |
---|
| 170 | +static const unsigned int SD_DEGENERATE_GROUPS_MASK = |
---|
| 171 | +#include <linux/sched/sd_flags.h> |
---|
| 172 | +0; |
---|
| 173 | +#undef SD_FLAG |
---|
| 174 | + |
---|
148 | 175 | static int sd_degenerate(struct sched_domain *sd) |
---|
149 | 176 | { |
---|
150 | 177 | if (cpumask_weight(sched_domain_span(sd)) == 1) |
---|
151 | 178 | return 1; |
---|
152 | 179 | |
---|
153 | 180 | /* Following flags need at least 2 groups */ |
---|
154 | | - if (sd->flags & (SD_LOAD_BALANCE | |
---|
155 | | - SD_BALANCE_NEWIDLE | |
---|
156 | | - SD_BALANCE_FORK | |
---|
157 | | - SD_BALANCE_EXEC | |
---|
158 | | - SD_SHARE_CPUCAPACITY | |
---|
159 | | - SD_ASYM_CPUCAPACITY | |
---|
160 | | - SD_SHARE_PKG_RESOURCES | |
---|
161 | | - SD_SHARE_POWERDOMAIN)) { |
---|
162 | | - if (sd->groups != sd->groups->next) |
---|
163 | | - return 0; |
---|
164 | | - } |
---|
| 181 | + if ((sd->flags & SD_DEGENERATE_GROUPS_MASK) && |
---|
| 182 | + (sd->groups != sd->groups->next)) |
---|
| 183 | + return 0; |
---|
165 | 184 | |
---|
166 | 185 | /* Following flags don't use groups */ |
---|
167 | 186 | if (sd->flags & (SD_WAKE_AFFINE)) |
---|
.. | .. |
---|
182 | 201 | return 0; |
---|
183 | 202 | |
---|
184 | 203 | /* Flags needing groups don't count if only 1 group in parent */ |
---|
185 | | - if (parent->groups == parent->groups->next) { |
---|
186 | | - pflags &= ~(SD_LOAD_BALANCE | |
---|
187 | | - SD_BALANCE_NEWIDLE | |
---|
188 | | - SD_BALANCE_FORK | |
---|
189 | | - SD_BALANCE_EXEC | |
---|
190 | | - SD_ASYM_CPUCAPACITY | |
---|
191 | | - SD_SHARE_CPUCAPACITY | |
---|
192 | | - SD_SHARE_PKG_RESOURCES | |
---|
193 | | - SD_PREFER_SIBLING | |
---|
194 | | - SD_SHARE_POWERDOMAIN); |
---|
195 | | - if (nr_node_ids == 1) |
---|
196 | | - pflags &= ~SD_SERIALIZE; |
---|
197 | | - } |
---|
| 204 | + if (parent->groups == parent->groups->next) |
---|
| 205 | + pflags &= ~SD_DEGENERATE_GROUPS_MASK; |
---|
| 206 | + |
---|
198 | 207 | if (~cflags & pflags) |
---|
199 | 208 | return 0; |
---|
200 | 209 | |
---|
201 | 210 | return 1; |
---|
202 | 211 | } |
---|
203 | 212 | |
---|
204 | | -DEFINE_STATIC_KEY_FALSE(sched_energy_present); |
---|
205 | | - |
---|
206 | | -#ifdef CONFIG_ENERGY_MODEL |
---|
207 | 213 | #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) |
---|
| 214 | +DEFINE_STATIC_KEY_FALSE(sched_energy_present); |
---|
208 | 215 | unsigned int sysctl_sched_energy_aware = 1; |
---|
209 | 216 | DEFINE_MUTEX(sched_energy_mutex); |
---|
210 | 217 | bool sched_energy_update; |
---|
211 | 218 | |
---|
212 | 219 | #ifdef CONFIG_PROC_SYSCTL |
---|
213 | 220 | int sched_energy_aware_handler(struct ctl_table *table, int write, |
---|
214 | | - void __user *buffer, size_t *lenp, loff_t *ppos) |
---|
| 221 | + void *buffer, size_t *lenp, loff_t *ppos) |
---|
215 | 222 | { |
---|
216 | 223 | int ret, state; |
---|
217 | 224 | |
---|
.. | .. |
---|
233 | 240 | return ret; |
---|
234 | 241 | } |
---|
235 | 242 | #endif |
---|
236 | | -#endif /* defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */ |
---|
237 | 243 | |
---|
238 | 244 | static void free_pd(struct perf_domain *pd) |
---|
239 | 245 | { |
---|
.. | .. |
---|
285 | 291 | printk(KERN_DEBUG "root_domain %*pbl:", cpumask_pr_args(cpu_map)); |
---|
286 | 292 | |
---|
287 | 293 | while (pd) { |
---|
288 | | - printk(KERN_CONT " pd%d:{ cpus=%*pbl nr_cstate=%d }", |
---|
| 294 | + printk(KERN_CONT " pd%d:{ cpus=%*pbl nr_pstate=%d }", |
---|
289 | 295 | cpumask_first(perf_domain_span(pd)), |
---|
290 | 296 | cpumask_pr_args(perf_domain_span(pd)), |
---|
291 | | - em_pd_nr_cap_states(pd->em_pd)); |
---|
| 297 | + em_pd_nr_perf_states(pd->em_pd)); |
---|
292 | 298 | pd = pd->next; |
---|
293 | 299 | } |
---|
294 | 300 | |
---|
.. | .. |
---|
320 | 326 | * EAS can be used on a root domain if it meets all the following conditions: |
---|
321 | 327 | * 1. an Energy Model (EM) is available; |
---|
322 | 328 | * 2. the SD_ASYM_CPUCAPACITY flag is set in the sched_domain hierarchy. |
---|
323 | | - * 3. the EM complexity is low enough to keep scheduling overheads low; |
---|
| 329 | + * 3. no SMT is detected. |
---|
| 330 | + * 4. the EM complexity is low enough to keep scheduling overheads low; |
---|
324 | 331 | * |
---|
325 | 332 | * The complexity of the Energy Model is defined as: |
---|
326 | 333 | * |
---|
327 | | - * C = nr_pd * (nr_cpus + nr_cs) |
---|
| 334 | + * C = nr_pd * (nr_cpus + nr_ps) |
---|
328 | 335 | * |
---|
329 | 336 | * with parameters defined as: |
---|
330 | 337 | * - nr_pd: the number of performance domains |
---|
331 | 338 | * - nr_cpus: the number of CPUs |
---|
332 | | - * - nr_cs: the sum of the number of capacity states of all performance |
---|
| 339 | + * - nr_ps: the sum of the number of performance states of all performance |
---|
333 | 340 | * domains (for example, on a system with 2 performance domains, |
---|
334 | | - * with 10 capacity states each, nr_cs = 2 * 10 = 20). |
---|
| 341 | + * with 10 performance states each, nr_ps = 2 * 10 = 20). |
---|
335 | 342 | * |
---|
336 | 343 | * It is generally not a good idea to use such a model in the wake-up path on |
---|
337 | 344 | * very complex platforms because of the associated scheduling overheads. The |
---|
338 | 345 | * arbitrary constraint below prevents that. It makes EAS usable up to 16 CPUs |
---|
339 | | - * with per-CPU DVFS and less than 8 capacity states each, for example. |
---|
| 346 | + * with per-CPU DVFS and less than 8 performance states each, for example. |
---|
340 | 347 | */ |
---|
341 | 348 | #define EM_MAX_COMPLEXITY 2048 |
---|
342 | 349 | |
---|
343 | 350 | static bool build_perf_domains(const struct cpumask *cpu_map) |
---|
344 | 351 | { |
---|
345 | | - int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map); |
---|
| 352 | + int i, nr_pd = 0, nr_ps = 0, nr_cpus = cpumask_weight(cpu_map); |
---|
346 | 353 | struct perf_domain *pd = NULL, *tmp; |
---|
347 | 354 | int cpu = cpumask_first(cpu_map); |
---|
348 | 355 | struct root_domain *rd = cpu_rq(cpu)->rd; |
---|
| 356 | + bool eas_check = false; |
---|
349 | 357 | |
---|
350 | | -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) |
---|
351 | 358 | if (!sysctl_sched_energy_aware) |
---|
352 | 359 | goto free; |
---|
353 | | -#endif |
---|
354 | 360 | |
---|
355 | | - /* EAS is enabled for asymmetric CPU capacity topologies. */ |
---|
356 | | - if (!per_cpu(sd_asym_cpucapacity, cpu)) { |
---|
| 361 | + /* |
---|
| 362 | + * EAS is enabled for asymmetric CPU capacity topologies. |
---|
| 363 | + * Allow vendor to override if desired. |
---|
| 364 | + */ |
---|
| 365 | + trace_android_rvh_build_perf_domains(&eas_check); |
---|
| 366 | + if (!per_cpu(sd_asym_cpucapacity, cpu) && !eas_check) { |
---|
357 | 367 | if (sched_debug()) { |
---|
358 | 368 | pr_info("rd %*pbl: CPUs do not have asymmetric capacities\n", |
---|
359 | 369 | cpumask_pr_args(cpu_map)); |
---|
360 | 370 | } |
---|
| 371 | + goto free; |
---|
| 372 | + } |
---|
| 373 | + |
---|
| 374 | + /* EAS definitely does *not* handle SMT */ |
---|
| 375 | + if (sched_smt_active()) { |
---|
| 376 | + pr_warn("rd %*pbl: Disabling EAS, SMT is not supported\n", |
---|
| 377 | + cpumask_pr_args(cpu_map)); |
---|
361 | 378 | goto free; |
---|
362 | 379 | } |
---|
363 | 380 | |
---|
.. | .. |
---|
374 | 391 | pd = tmp; |
---|
375 | 392 | |
---|
376 | 393 | /* |
---|
377 | | - * Count performance domains and capacity states for the |
---|
| 394 | + * Count performance domains and performance states for the |
---|
378 | 395 | * complexity check. |
---|
379 | 396 | */ |
---|
380 | 397 | nr_pd++; |
---|
381 | | - nr_cs += em_pd_nr_cap_states(pd->em_pd); |
---|
| 398 | + nr_ps += em_pd_nr_perf_states(pd->em_pd); |
---|
382 | 399 | } |
---|
383 | 400 | |
---|
384 | 401 | /* Bail out if the Energy Model complexity is too high. */ |
---|
385 | | - if (nr_pd * (nr_cs + nr_cpus) > EM_MAX_COMPLEXITY) { |
---|
| 402 | + if (nr_pd * (nr_ps + nr_cpus) > EM_MAX_COMPLEXITY) { |
---|
386 | 403 | WARN(1, "rd %*pbl: Failed to start EAS, EM complexity is too high\n", |
---|
387 | 404 | cpumask_pr_args(cpu_map)); |
---|
388 | 405 | goto free; |
---|
.. | .. |
---|
409 | 426 | } |
---|
410 | 427 | #else |
---|
411 | 428 | static void free_pd(struct perf_domain *pd) { } |
---|
412 | | -#endif /* CONFIG_ENERGY_MODEL */ |
---|
| 429 | +#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL*/ |
---|
413 | 430 | |
---|
414 | 431 | static void free_rootdomain(struct rcu_head *rcu) |
---|
415 | 432 | { |
---|
.. | .. |
---|
459 | 476 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
---|
460 | 477 | |
---|
461 | 478 | if (old_rd) |
---|
462 | | - call_rcu_sched(&old_rd->rcu, free_rootdomain); |
---|
| 479 | + call_rcu(&old_rd->rcu, free_rootdomain); |
---|
463 | 480 | } |
---|
464 | 481 | |
---|
465 | 482 | void sched_get_rd(struct root_domain *rd) |
---|
.. | .. |
---|
472 | 489 | if (!atomic_dec_and_test(&rd->refcount)) |
---|
473 | 490 | return; |
---|
474 | 491 | |
---|
475 | | - call_rcu_sched(&rd->rcu, free_rootdomain); |
---|
| 492 | + call_rcu(&rd->rcu, free_rootdomain); |
---|
476 | 493 | } |
---|
477 | 494 | |
---|
478 | 495 | static int init_rootdomain(struct root_domain *rd) |
---|
.. | .. |
---|
490 | 507 | rd->rto_cpu = -1; |
---|
491 | 508 | raw_spin_lock_init(&rd->rto_lock); |
---|
492 | 509 | init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); |
---|
493 | | - rd->rto_push_work.flags |= IRQ_WORK_HARD_IRQ; |
---|
494 | 510 | #endif |
---|
495 | 511 | |
---|
496 | 512 | init_dl_bw(&rd->dl_bw); |
---|
.. | .. |
---|
499 | 515 | |
---|
500 | 516 | if (cpupri_init(&rd->cpupri) != 0) |
---|
501 | 517 | goto free_cpudl; |
---|
502 | | - |
---|
503 | | - init_max_cpu_capacity(&rd->max_cpu_capacity); |
---|
504 | | - |
---|
505 | 518 | return 0; |
---|
506 | 519 | |
---|
507 | 520 | free_cpudl: |
---|
.. | .. |
---|
607 | 620 | * the cpumask of the domain), this allows us to quickly tell if |
---|
608 | 621 | * two CPUs are in the same cache domain, see cpus_share_cache(). |
---|
609 | 622 | */ |
---|
610 | | -DEFINE_PER_CPU(struct sched_domain *, sd_llc); |
---|
| 623 | +DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc); |
---|
611 | 624 | DEFINE_PER_CPU(int, sd_llc_size); |
---|
612 | 625 | DEFINE_PER_CPU(int, sd_llc_id); |
---|
613 | | -DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); |
---|
614 | | -DEFINE_PER_CPU(struct sched_domain *, sd_numa); |
---|
615 | | -DEFINE_PER_CPU(struct sched_domain *, sd_asym_packing); |
---|
616 | | -DEFINE_PER_CPU(struct sched_domain *, sd_asym_cpucapacity); |
---|
| 626 | +DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared); |
---|
| 627 | +DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa); |
---|
| 628 | +DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing); |
---|
| 629 | +DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity); |
---|
617 | 630 | DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity); |
---|
618 | 631 | |
---|
619 | 632 | static void update_top_cache_domain(int cpu) |
---|
.. | .. |
---|
1051 | 1064 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); |
---|
1052 | 1065 | struct sched_domain *child = sd->child; |
---|
1053 | 1066 | struct sched_group *sg; |
---|
| 1067 | + bool already_visited; |
---|
1054 | 1068 | |
---|
1055 | 1069 | if (child) |
---|
1056 | 1070 | cpu = cpumask_first(sched_domain_span(child)); |
---|
.. | .. |
---|
1058 | 1072 | sg = *per_cpu_ptr(sdd->sg, cpu); |
---|
1059 | 1073 | sg->sgc = *per_cpu_ptr(sdd->sgc, cpu); |
---|
1060 | 1074 | |
---|
1061 | | - /* For claim_allocations: */ |
---|
1062 | | - atomic_inc(&sg->ref); |
---|
1063 | | - atomic_inc(&sg->sgc->ref); |
---|
| 1075 | + /* Increase refcounts for claim_allocations: */ |
---|
| 1076 | + already_visited = atomic_inc_return(&sg->ref) > 1; |
---|
| 1077 | + /* sgc visits should follow a similar trend as sg */ |
---|
| 1078 | + WARN_ON(already_visited != (atomic_inc_return(&sg->sgc->ref) > 1)); |
---|
| 1079 | + |
---|
| 1080 | + /* If we have already visited that group, it's already initialized. */ |
---|
| 1081 | + if (already_visited) |
---|
| 1082 | + return sg; |
---|
1064 | 1083 | |
---|
1065 | 1084 | if (child) { |
---|
1066 | 1085 | cpumask_copy(sched_group_span(sg), sched_domain_span(child)); |
---|
.. | .. |
---|
1079 | 1098 | |
---|
1080 | 1099 | /* |
---|
1081 | 1100 | * build_sched_groups will build a circular linked list of the groups |
---|
1082 | | - * covered by the given span, and will set each group's ->cpumask correctly, |
---|
1083 | | - * and ->cpu_capacity to 0. |
---|
| 1101 | + * covered by the given span, will set each group's ->cpumask correctly, |
---|
| 1102 | + * and will initialize their ->sgc. |
---|
1084 | 1103 | * |
---|
1085 | 1104 | * Assumes the sched_domain tree is fully constructed |
---|
1086 | 1105 | */ |
---|
.. | .. |
---|
1187 | 1206 | if (!attr || attr->relax_domain_level < 0) { |
---|
1188 | 1207 | if (default_relax_domain_level < 0) |
---|
1189 | 1208 | return; |
---|
1190 | | - else |
---|
1191 | | - request = default_relax_domain_level; |
---|
| 1209 | + request = default_relax_domain_level; |
---|
1192 | 1210 | } else |
---|
1193 | 1211 | request = attr->relax_domain_level; |
---|
1194 | | - if (request < sd->level) { |
---|
| 1212 | + |
---|
| 1213 | + if (sd->level > request) { |
---|
1195 | 1214 | /* Turn off idle balance on this domain: */ |
---|
1196 | 1215 | sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); |
---|
1197 | | - } else { |
---|
1198 | | - /* Turn on idle balance on this domain: */ |
---|
1199 | | - sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); |
---|
1200 | 1216 | } |
---|
1201 | 1217 | } |
---|
1202 | 1218 | |
---|
.. | .. |
---|
1210 | 1226 | case sa_rootdomain: |
---|
1211 | 1227 | if (!atomic_read(&d->rd->refcount)) |
---|
1212 | 1228 | free_rootdomain(&d->rd->rcu); |
---|
1213 | | - /* Fall through */ |
---|
| 1229 | + fallthrough; |
---|
1214 | 1230 | case sa_sd: |
---|
1215 | 1231 | free_percpu(d->sd); |
---|
1216 | | - /* Fall through */ |
---|
| 1232 | + fallthrough; |
---|
1217 | 1233 | case sa_sd_storage: |
---|
1218 | 1234 | __sdt_free(cpu_map); |
---|
1219 | | - /* Fall through */ |
---|
| 1235 | + fallthrough; |
---|
1220 | 1236 | case sa_none: |
---|
1221 | 1237 | break; |
---|
1222 | 1238 | } |
---|
.. | .. |
---|
1270 | 1286 | int sched_max_numa_distance; |
---|
1271 | 1287 | static int *sched_domains_numa_distance; |
---|
1272 | 1288 | static struct cpumask ***sched_domains_numa_masks; |
---|
| 1289 | +int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; |
---|
1273 | 1290 | #endif |
---|
1274 | 1291 | |
---|
1275 | 1292 | /* |
---|
.. | .. |
---|
1282 | 1299 | * SD_SHARE_CPUCAPACITY - describes SMT topologies |
---|
1283 | 1300 | * SD_SHARE_PKG_RESOURCES - describes shared caches |
---|
1284 | 1301 | * SD_NUMA - describes NUMA topologies |
---|
1285 | | - * SD_SHARE_POWERDOMAIN - describes shared power domain |
---|
1286 | 1302 | * |
---|
1287 | 1303 | * Odd one out, which beside describing the topology has a quirk also |
---|
1288 | 1304 | * prescribes the desired behaviour that goes along with it: |
---|
.. | .. |
---|
1293 | 1309 | (SD_SHARE_CPUCAPACITY | \ |
---|
1294 | 1310 | SD_SHARE_PKG_RESOURCES | \ |
---|
1295 | 1311 | SD_NUMA | \ |
---|
1296 | | - SD_ASYM_PACKING | \ |
---|
1297 | | - SD_SHARE_POWERDOMAIN) |
---|
| 1312 | + SD_ASYM_PACKING) |
---|
1298 | 1313 | |
---|
1299 | 1314 | static struct sched_domain * |
---|
1300 | 1315 | sd_init(struct sched_domain_topology_level *tl, |
---|
.. | .. |
---|
1326 | 1341 | *sd = (struct sched_domain){ |
---|
1327 | 1342 | .min_interval = sd_weight, |
---|
1328 | 1343 | .max_interval = 2*sd_weight, |
---|
1329 | | - .busy_factor = 32, |
---|
1330 | | - .imbalance_pct = 125, |
---|
| 1344 | + .busy_factor = 16, |
---|
| 1345 | + .imbalance_pct = 117, |
---|
1331 | 1346 | |
---|
1332 | 1347 | .cache_nice_tries = 0, |
---|
1333 | | - .busy_idx = 0, |
---|
1334 | | - .idle_idx = 0, |
---|
1335 | | - .newidle_idx = 0, |
---|
1336 | | - .wake_idx = 0, |
---|
1337 | | - .forkexec_idx = 0, |
---|
1338 | 1348 | |
---|
1339 | | - .flags = 1*SD_LOAD_BALANCE |
---|
1340 | | - | 1*SD_BALANCE_NEWIDLE |
---|
| 1349 | + .flags = 1*SD_BALANCE_NEWIDLE |
---|
1341 | 1350 | | 1*SD_BALANCE_EXEC |
---|
1342 | 1351 | | 1*SD_BALANCE_FORK |
---|
1343 | 1352 | | 0*SD_BALANCE_WAKE |
---|
.. | .. |
---|
1352 | 1361 | |
---|
1353 | 1362 | .last_balance = jiffies, |
---|
1354 | 1363 | .balance_interval = sd_weight, |
---|
1355 | | - .smt_gain = 0, |
---|
1356 | 1364 | .max_newidle_lb_cost = 0, |
---|
1357 | 1365 | .next_decay_max_lb_cost = jiffies, |
---|
1358 | 1366 | .child = child, |
---|
.. | .. |
---|
1368 | 1376 | * Convert topological properties into behaviour. |
---|
1369 | 1377 | */ |
---|
1370 | 1378 | |
---|
1371 | | - if (sd->flags & SD_ASYM_CPUCAPACITY) { |
---|
1372 | | - struct sched_domain *t = sd; |
---|
1373 | | - |
---|
1374 | | - /* |
---|
1375 | | - * Don't attempt to spread across CPUs of different capacities. |
---|
1376 | | - */ |
---|
1377 | | - if (sd->child) |
---|
1378 | | - sd->child->flags &= ~SD_PREFER_SIBLING; |
---|
1379 | | - |
---|
1380 | | - for_each_lower_domain(t) |
---|
1381 | | - t->flags |= SD_BALANCE_WAKE; |
---|
1382 | | - } |
---|
| 1379 | + /* Don't attempt to spread across CPUs of different capacities. */ |
---|
| 1380 | + if ((sd->flags & SD_ASYM_CPUCAPACITY) && sd->child) |
---|
| 1381 | + sd->child->flags &= ~SD_PREFER_SIBLING; |
---|
1383 | 1382 | |
---|
1384 | 1383 | if (sd->flags & SD_SHARE_CPUCAPACITY) { |
---|
1385 | 1384 | sd->imbalance_pct = 110; |
---|
1386 | | - sd->smt_gain = 1178; /* ~15% */ |
---|
1387 | 1385 | |
---|
1388 | 1386 | } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { |
---|
1389 | 1387 | sd->imbalance_pct = 117; |
---|
1390 | 1388 | sd->cache_nice_tries = 1; |
---|
1391 | | - sd->busy_idx = 2; |
---|
1392 | 1389 | |
---|
1393 | 1390 | #ifdef CONFIG_NUMA |
---|
1394 | 1391 | } else if (sd->flags & SD_NUMA) { |
---|
1395 | 1392 | sd->cache_nice_tries = 2; |
---|
1396 | | - sd->busy_idx = 3; |
---|
1397 | | - sd->idle_idx = 2; |
---|
1398 | 1393 | |
---|
1399 | 1394 | sd->flags &= ~SD_PREFER_SIBLING; |
---|
1400 | 1395 | sd->flags |= SD_SERIALIZE; |
---|
1401 | | - if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) { |
---|
| 1396 | + if (sched_domains_numa_distance[tl->numa_level] > node_reclaim_distance) { |
---|
1402 | 1397 | sd->flags &= ~(SD_BALANCE_EXEC | |
---|
1403 | 1398 | SD_BALANCE_FORK | |
---|
1404 | 1399 | SD_WAKE_AFFINE); |
---|
.. | .. |
---|
1407 | 1402 | #endif |
---|
1408 | 1403 | } else { |
---|
1409 | 1404 | sd->cache_nice_tries = 1; |
---|
1410 | | - sd->busy_idx = 2; |
---|
1411 | | - sd->idle_idx = 1; |
---|
1412 | 1405 | } |
---|
1413 | 1406 | |
---|
1414 | 1407 | /* |
---|
.. | .. |
---|
1549 | 1542 | } |
---|
1550 | 1543 | } |
---|
1551 | 1544 | |
---|
| 1545 | + |
---|
| 1546 | +#define NR_DISTANCE_VALUES (1 << DISTANCE_BITS) |
---|
| 1547 | + |
---|
1552 | 1548 | void sched_init_numa(void) |
---|
1553 | 1549 | { |
---|
1554 | | - int next_distance, curr_distance = node_distance(0, 0); |
---|
1555 | 1550 | struct sched_domain_topology_level *tl; |
---|
1556 | | - int level = 0; |
---|
1557 | | - int i, j, k; |
---|
1558 | | - |
---|
1559 | | - sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL); |
---|
1560 | | - if (!sched_domains_numa_distance) |
---|
1561 | | - return; |
---|
1562 | | - |
---|
1563 | | - /* Includes NUMA identity node at level 0. */ |
---|
1564 | | - sched_domains_numa_distance[level++] = curr_distance; |
---|
1565 | | - sched_domains_numa_levels = level; |
---|
| 1551 | + unsigned long *distance_map; |
---|
| 1552 | + int nr_levels = 0; |
---|
| 1553 | + int i, j; |
---|
1566 | 1554 | |
---|
1567 | 1555 | /* |
---|
1568 | 1556 | * O(nr_nodes^2) deduplicating selection sort -- in order to find the |
---|
1569 | 1557 | * unique distances in the node_distance() table. |
---|
1570 | | - * |
---|
1571 | | - * Assumes node_distance(0,j) includes all distances in |
---|
1572 | | - * node_distance(i,j) in order to avoid cubic time. |
---|
1573 | 1558 | */ |
---|
1574 | | - next_distance = curr_distance; |
---|
| 1559 | + distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL); |
---|
| 1560 | + if (!distance_map) |
---|
| 1561 | + return; |
---|
| 1562 | + |
---|
| 1563 | + bitmap_zero(distance_map, NR_DISTANCE_VALUES); |
---|
1575 | 1564 | for (i = 0; i < nr_node_ids; i++) { |
---|
1576 | 1565 | for (j = 0; j < nr_node_ids; j++) { |
---|
1577 | | - for (k = 0; k < nr_node_ids; k++) { |
---|
1578 | | - int distance = node_distance(i, k); |
---|
| 1566 | + int distance = node_distance(i, j); |
---|
1579 | 1567 | |
---|
1580 | | - if (distance > curr_distance && |
---|
1581 | | - (distance < next_distance || |
---|
1582 | | - next_distance == curr_distance)) |
---|
1583 | | - next_distance = distance; |
---|
1584 | | - |
---|
1585 | | - /* |
---|
1586 | | - * While not a strong assumption it would be nice to know |
---|
1587 | | - * about cases where if node A is connected to B, B is not |
---|
1588 | | - * equally connected to A. |
---|
1589 | | - */ |
---|
1590 | | - if (sched_debug() && node_distance(k, i) != distance) |
---|
1591 | | - sched_numa_warn("Node-distance not symmetric"); |
---|
1592 | | - |
---|
1593 | | - if (sched_debug() && i && !find_numa_distance(distance)) |
---|
1594 | | - sched_numa_warn("Node-0 not representative"); |
---|
| 1568 | + if (distance < LOCAL_DISTANCE || distance >= NR_DISTANCE_VALUES) { |
---|
| 1569 | + sched_numa_warn("Invalid distance value range"); |
---|
| 1570 | + return; |
---|
1595 | 1571 | } |
---|
1596 | | - if (next_distance != curr_distance) { |
---|
1597 | | - sched_domains_numa_distance[level++] = next_distance; |
---|
1598 | | - sched_domains_numa_levels = level; |
---|
1599 | | - curr_distance = next_distance; |
---|
1600 | | - } else break; |
---|
1601 | | - } |
---|
1602 | 1572 | |
---|
1603 | | - /* |
---|
1604 | | - * In case of sched_debug() we verify the above assumption. |
---|
1605 | | - */ |
---|
1606 | | - if (!sched_debug()) |
---|
1607 | | - break; |
---|
| 1573 | + bitmap_set(distance_map, distance, 1); |
---|
| 1574 | + } |
---|
| 1575 | + } |
---|
| 1576 | + /* |
---|
| 1577 | + * We can now figure out how many unique distance values there are and |
---|
| 1578 | + * allocate memory accordingly. |
---|
| 1579 | + */ |
---|
| 1580 | + nr_levels = bitmap_weight(distance_map, NR_DISTANCE_VALUES); |
---|
| 1581 | + |
---|
| 1582 | + sched_domains_numa_distance = kcalloc(nr_levels, sizeof(int), GFP_KERNEL); |
---|
| 1583 | + if (!sched_domains_numa_distance) { |
---|
| 1584 | + bitmap_free(distance_map); |
---|
| 1585 | + return; |
---|
1608 | 1586 | } |
---|
1609 | 1587 | |
---|
| 1588 | + for (i = 0, j = 0; i < nr_levels; i++, j++) { |
---|
| 1589 | + j = find_next_bit(distance_map, NR_DISTANCE_VALUES, j); |
---|
| 1590 | + sched_domains_numa_distance[i] = j; |
---|
| 1591 | + } |
---|
| 1592 | + |
---|
| 1593 | + bitmap_free(distance_map); |
---|
| 1594 | + |
---|
1610 | 1595 | /* |
---|
1611 | | - * 'level' contains the number of unique distances |
---|
| 1596 | + * 'nr_levels' contains the number of unique distances |
---|
1612 | 1597 | * |
---|
1613 | 1598 | * The sched_domains_numa_distance[] array includes the actual distance |
---|
1614 | 1599 | * numbers. |
---|
.. | .. |
---|
1617 | 1602 | /* |
---|
1618 | 1603 | * Here, we should temporarily reset sched_domains_numa_levels to 0. |
---|
1619 | 1604 | * If it fails to allocate memory for array sched_domains_numa_masks[][], |
---|
1620 | | - * the array will contain less then 'level' members. This could be |
---|
| 1605 | + * the array will contain less then 'nr_levels' members. This could be |
---|
1621 | 1606 | * dangerous when we use it to iterate array sched_domains_numa_masks[][] |
---|
1622 | 1607 | * in other functions. |
---|
1623 | 1608 | * |
---|
1624 | | - * We reset it to 'level' at the end of this function. |
---|
| 1609 | + * We reset it to 'nr_levels' at the end of this function. |
---|
1625 | 1610 | */ |
---|
1626 | 1611 | sched_domains_numa_levels = 0; |
---|
1627 | 1612 | |
---|
1628 | | - sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); |
---|
| 1613 | + sched_domains_numa_masks = kzalloc(sizeof(void *) * nr_levels, GFP_KERNEL); |
---|
1629 | 1614 | if (!sched_domains_numa_masks) |
---|
1630 | 1615 | return; |
---|
1631 | 1616 | |
---|
.. | .. |
---|
1633 | 1618 | * Now for each level, construct a mask per node which contains all |
---|
1634 | 1619 | * CPUs of nodes that are that many hops away from us. |
---|
1635 | 1620 | */ |
---|
1636 | | - for (i = 0; i < level; i++) { |
---|
| 1621 | + for (i = 0; i < nr_levels; i++) { |
---|
1637 | 1622 | sched_domains_numa_masks[i] = |
---|
1638 | 1623 | kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL); |
---|
1639 | 1624 | if (!sched_domains_numa_masks[i]) |
---|
.. | .. |
---|
1641 | 1626 | |
---|
1642 | 1627 | for (j = 0; j < nr_node_ids; j++) { |
---|
1643 | 1628 | struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL); |
---|
| 1629 | + int k; |
---|
| 1630 | + |
---|
1644 | 1631 | if (!mask) |
---|
1645 | 1632 | return; |
---|
1646 | 1633 | |
---|
1647 | 1634 | sched_domains_numa_masks[i][j] = mask; |
---|
1648 | 1635 | |
---|
1649 | 1636 | for_each_node(k) { |
---|
| 1637 | + if (sched_debug() && (node_distance(j, k) != node_distance(k, j))) |
---|
| 1638 | + sched_numa_warn("Node-distance not symmetric"); |
---|
| 1639 | + |
---|
1650 | 1640 | if (node_distance(j, k) > sched_domains_numa_distance[i]) |
---|
1651 | 1641 | continue; |
---|
1652 | 1642 | |
---|
.. | .. |
---|
1658 | 1648 | /* Compute default topology size */ |
---|
1659 | 1649 | for (i = 0; sched_domain_topology[i].mask; i++); |
---|
1660 | 1650 | |
---|
1661 | | - tl = kzalloc((i + level + 1) * |
---|
| 1651 | + tl = kzalloc((i + nr_levels + 1) * |
---|
1662 | 1652 | sizeof(struct sched_domain_topology_level), GFP_KERNEL); |
---|
1663 | 1653 | if (!tl) |
---|
1664 | 1654 | return; |
---|
.. | .. |
---|
1681 | 1671 | /* |
---|
1682 | 1672 | * .. and append 'j' levels of NUMA goodness. |
---|
1683 | 1673 | */ |
---|
1684 | | - for (j = 1; j < level; i++, j++) { |
---|
| 1674 | + for (j = 1; j < nr_levels; i++, j++) { |
---|
1685 | 1675 | tl[i] = (struct sched_domain_topology_level){ |
---|
1686 | 1676 | .mask = sd_numa_mask, |
---|
1687 | 1677 | .sd_flags = cpu_numa_flags, |
---|
.. | .. |
---|
1693 | 1683 | |
---|
1694 | 1684 | sched_domain_topology = tl; |
---|
1695 | 1685 | |
---|
1696 | | - sched_domains_numa_levels = level; |
---|
1697 | | - sched_max_numa_distance = sched_domains_numa_distance[level - 1]; |
---|
| 1686 | + sched_domains_numa_levels = nr_levels; |
---|
| 1687 | + sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1]; |
---|
1698 | 1688 | |
---|
1699 | 1689 | init_numa_topology_type(); |
---|
1700 | 1690 | } |
---|
.. | .. |
---|
1720 | 1710 | for (j = 0; j < nr_node_ids; j++) |
---|
1721 | 1711 | cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]); |
---|
1722 | 1712 | } |
---|
| 1713 | +} |
---|
| 1714 | + |
---|
| 1715 | +/* |
---|
| 1716 | + * sched_numa_find_closest() - given the NUMA topology, find the cpu |
---|
| 1717 | + * closest to @cpu from @cpumask. |
---|
| 1718 | + * cpumask: cpumask to find a cpu from |
---|
| 1719 | + * cpu: cpu to be close to |
---|
| 1720 | + * |
---|
| 1721 | + * returns: cpu, or nr_cpu_ids when nothing found. |
---|
| 1722 | + */ |
---|
| 1723 | +int sched_numa_find_closest(const struct cpumask *cpus, int cpu) |
---|
| 1724 | +{ |
---|
| 1725 | + int i, j = cpu_to_node(cpu); |
---|
| 1726 | + |
---|
| 1727 | + for (i = 0; i < sched_domains_numa_levels; i++) { |
---|
| 1728 | + cpu = cpumask_any_and(cpus, sched_domains_numa_masks[i][j]); |
---|
| 1729 | + if (cpu < nr_cpu_ids) |
---|
| 1730 | + return cpu; |
---|
| 1731 | + } |
---|
| 1732 | + return nr_cpu_ids; |
---|
1723 | 1733 | } |
---|
1724 | 1734 | |
---|
1725 | 1735 | #endif /* CONFIG_NUMA */ |
---|
.. | .. |
---|
1860 | 1870 | } |
---|
1861 | 1871 | |
---|
1862 | 1872 | /* |
---|
| 1873 | + * Ensure topology masks are sane, i.e. there are no conflicts (overlaps) for |
---|
| 1874 | + * any two given CPUs at this (non-NUMA) topology level. |
---|
| 1875 | + */ |
---|
| 1876 | +static bool topology_span_sane(struct sched_domain_topology_level *tl, |
---|
| 1877 | + const struct cpumask *cpu_map, int cpu) |
---|
| 1878 | +{ |
---|
| 1879 | + int i; |
---|
| 1880 | + |
---|
| 1881 | + /* NUMA levels are allowed to overlap */ |
---|
| 1882 | + if (tl->flags & SDTL_OVERLAP) |
---|
| 1883 | + return true; |
---|
| 1884 | + |
---|
| 1885 | + /* |
---|
| 1886 | + * Non-NUMA levels cannot partially overlap - they must be either |
---|
| 1887 | + * completely equal or completely disjoint. Otherwise we can end up |
---|
| 1888 | + * breaking the sched_group lists - i.e. a later get_group() pass |
---|
| 1889 | + * breaks the linking done for an earlier span. |
---|
| 1890 | + */ |
---|
| 1891 | + for_each_cpu(i, cpu_map) { |
---|
| 1892 | + if (i == cpu) |
---|
| 1893 | + continue; |
---|
| 1894 | + /* |
---|
| 1895 | + * We should 'and' all those masks with 'cpu_map' to exactly |
---|
| 1896 | + * match the topology we're about to build, but that can only |
---|
| 1897 | + * remove CPUs, which only lessens our ability to detect |
---|
| 1898 | + * overlaps |
---|
| 1899 | + */ |
---|
| 1900 | + if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) && |
---|
| 1901 | + cpumask_intersects(tl->mask(cpu), tl->mask(i))) |
---|
| 1902 | + return false; |
---|
| 1903 | + } |
---|
| 1904 | + |
---|
| 1905 | + return true; |
---|
| 1906 | +} |
---|
| 1907 | + |
---|
| 1908 | +/* |
---|
1863 | 1909 | * Find the sched_domain_topology_level where all CPU capacities are visible |
---|
1864 | 1910 | * for all CPUs. |
---|
1865 | 1911 | */ |
---|
.. | .. |
---|
1872 | 1918 | unsigned long cap; |
---|
1873 | 1919 | |
---|
1874 | 1920 | /* Is there any asymmetry? */ |
---|
1875 | | - cap = arch_scale_cpu_capacity(NULL, cpumask_first(cpu_map)); |
---|
| 1921 | + cap = arch_scale_cpu_capacity(cpumask_first(cpu_map)); |
---|
1876 | 1922 | |
---|
1877 | 1923 | for_each_cpu(i, cpu_map) { |
---|
1878 | | - if (arch_scale_cpu_capacity(NULL, i) != cap) { |
---|
| 1924 | + if (arch_scale_cpu_capacity(i) != cap) { |
---|
1879 | 1925 | asym = true; |
---|
1880 | 1926 | break; |
---|
1881 | 1927 | } |
---|
.. | .. |
---|
1890 | 1936 | * to everyone. |
---|
1891 | 1937 | */ |
---|
1892 | 1938 | for_each_cpu(i, cpu_map) { |
---|
1893 | | - unsigned long max_capacity = arch_scale_cpu_capacity(NULL, i); |
---|
| 1939 | + unsigned long max_capacity = arch_scale_cpu_capacity(i); |
---|
1894 | 1940 | int tl_id = 0; |
---|
1895 | 1941 | |
---|
1896 | 1942 | for_each_sd_topology(tl) { |
---|
.. | .. |
---|
1900 | 1946 | for_each_cpu_and(j, tl->mask(i), cpu_map) { |
---|
1901 | 1947 | unsigned long capacity; |
---|
1902 | 1948 | |
---|
1903 | | - capacity = arch_scale_cpu_capacity(NULL, j); |
---|
| 1949 | + capacity = arch_scale_cpu_capacity(j); |
---|
1904 | 1950 | |
---|
1905 | 1951 | if (capacity <= max_capacity) |
---|
1906 | 1952 | continue; |
---|
.. | .. |
---|
1925 | 1971 | static int |
---|
1926 | 1972 | build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr) |
---|
1927 | 1973 | { |
---|
1928 | | - enum s_alloc alloc_state; |
---|
| 1974 | + enum s_alloc alloc_state = sa_none; |
---|
1929 | 1975 | struct sched_domain *sd; |
---|
1930 | 1976 | struct s_data d; |
---|
| 1977 | + struct rq *rq = NULL; |
---|
1931 | 1978 | int i, ret = -ENOMEM; |
---|
1932 | 1979 | struct sched_domain_topology_level *tl_asym; |
---|
1933 | 1980 | bool has_asym = false; |
---|
| 1981 | + |
---|
| 1982 | + if (WARN_ON(cpumask_empty(cpu_map))) |
---|
| 1983 | + goto error; |
---|
1934 | 1984 | |
---|
1935 | 1985 | alloc_state = __visit_domain_allocation_hell(&d, cpu_map); |
---|
1936 | 1986 | if (alloc_state != sa_rootdomain) |
---|
.. | .. |
---|
1941 | 1991 | /* Set up domains for CPUs specified by the cpu_map: */ |
---|
1942 | 1992 | for_each_cpu(i, cpu_map) { |
---|
1943 | 1993 | struct sched_domain_topology_level *tl; |
---|
| 1994 | + int dflags = 0; |
---|
1944 | 1995 | |
---|
1945 | 1996 | sd = NULL; |
---|
1946 | 1997 | for_each_sd_topology(tl) { |
---|
1947 | | - int dflags = 0; |
---|
1948 | | - |
---|
1949 | 1998 | if (tl == tl_asym) { |
---|
1950 | 1999 | dflags |= SD_ASYM_CPUCAPACITY; |
---|
1951 | 2000 | has_asym = true; |
---|
1952 | 2001 | } |
---|
| 2002 | + |
---|
| 2003 | + if (WARN_ON(!topology_span_sane(tl, cpu_map, i))) |
---|
| 2004 | + goto error; |
---|
1953 | 2005 | |
---|
1954 | 2006 | sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i); |
---|
1955 | 2007 | |
---|
.. | .. |
---|
1990 | 2042 | /* Attach the domains */ |
---|
1991 | 2043 | rcu_read_lock(); |
---|
1992 | 2044 | for_each_cpu(i, cpu_map) { |
---|
| 2045 | + rq = cpu_rq(i); |
---|
1993 | 2046 | sd = *per_cpu_ptr(d.sd, i); |
---|
| 2047 | + |
---|
| 2048 | + /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */ |
---|
| 2049 | + if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity)) |
---|
| 2050 | + WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig); |
---|
| 2051 | + |
---|
1994 | 2052 | cpu_attach_domain(sd, d.rd, i); |
---|
1995 | 2053 | } |
---|
1996 | 2054 | rcu_read_unlock(); |
---|
1997 | 2055 | |
---|
1998 | 2056 | if (has_asym) |
---|
1999 | 2057 | static_branch_inc_cpuslocked(&sched_asym_cpucapacity); |
---|
| 2058 | + |
---|
| 2059 | + if (rq && sched_debug_enabled) { |
---|
| 2060 | + pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n", |
---|
| 2061 | + cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity); |
---|
| 2062 | + } |
---|
| 2063 | + trace_android_vh_build_sched_domains(has_asym); |
---|
2000 | 2064 | |
---|
2001 | 2065 | ret = 0; |
---|
2002 | 2066 | error: |
---|
.. | .. |
---|
2057 | 2121 | } |
---|
2058 | 2122 | |
---|
2059 | 2123 | /* |
---|
2060 | | - * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
---|
2061 | | - * For now this just excludes isolated CPUs, but could be used to |
---|
2062 | | - * exclude other special cases in the future. |
---|
| 2124 | + * Set up scheduler domains and groups. For now this just excludes isolated |
---|
| 2125 | + * CPUs, but could be used to exclude other special cases in the future. |
---|
2063 | 2126 | */ |
---|
2064 | 2127 | int sched_init_domains(const struct cpumask *cpu_map) |
---|
2065 | 2128 | { |
---|
.. | .. |
---|
2140 | 2203 | * ndoms_new == 0 is a special case for destroying existing domains, |
---|
2141 | 2204 | * and it will not create the default domain. |
---|
2142 | 2205 | * |
---|
2143 | | - * Call with hotplug lock held |
---|
| 2206 | + * Call with hotplug lock and sched_domains_mutex held |
---|
2144 | 2207 | */ |
---|
2145 | | -void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], |
---|
2146 | | - struct sched_domain_attr *dattr_new) |
---|
| 2208 | +void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[], |
---|
| 2209 | + struct sched_domain_attr *dattr_new) |
---|
2147 | 2210 | { |
---|
2148 | 2211 | bool __maybe_unused has_eas = false; |
---|
2149 | 2212 | int i, j, n; |
---|
2150 | 2213 | int new_topology; |
---|
2151 | 2214 | |
---|
2152 | | - mutex_lock(&sched_domains_mutex); |
---|
| 2215 | + lockdep_assert_held(&sched_domains_mutex); |
---|
2153 | 2216 | |
---|
2154 | 2217 | /* Always unregister in case we don't destroy any domains: */ |
---|
2155 | 2218 | unregister_sched_domain_sysctl(); |
---|
.. | .. |
---|
2174 | 2237 | for (i = 0; i < ndoms_cur; i++) { |
---|
2175 | 2238 | for (j = 0; j < n && !new_topology; j++) { |
---|
2176 | 2239 | if (cpumask_equal(doms_cur[i], doms_new[j]) && |
---|
2177 | | - dattrs_equal(dattr_cur, i, dattr_new, j)) |
---|
| 2240 | + dattrs_equal(dattr_cur, i, dattr_new, j)) { |
---|
| 2241 | + struct root_domain *rd; |
---|
| 2242 | + |
---|
| 2243 | + /* |
---|
| 2244 | + * This domain won't be destroyed and as such |
---|
| 2245 | + * its dl_bw->total_bw needs to be cleared. It |
---|
| 2246 | + * will be recomputed in function |
---|
| 2247 | + * update_tasks_root_domain(). |
---|
| 2248 | + */ |
---|
| 2249 | + rd = cpu_rq(cpumask_any(doms_cur[i]))->rd; |
---|
| 2250 | + dl_clear_root_domain(rd); |
---|
2178 | 2251 | goto match1; |
---|
| 2252 | + } |
---|
2179 | 2253 | } |
---|
2180 | 2254 | /* No match - a current sched domain not in new doms_new[] */ |
---|
2181 | 2255 | detach_destroy_domains(doms_cur[i]); |
---|
.. | .. |
---|
2204 | 2278 | ; |
---|
2205 | 2279 | } |
---|
2206 | 2280 | |
---|
2207 | | -#ifdef CONFIG_ENERGY_MODEL |
---|
| 2281 | +#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) |
---|
2208 | 2282 | /* Build perf. domains: */ |
---|
2209 | 2283 | for (i = 0; i < ndoms_new; i++) { |
---|
2210 | | - for (j = 0; j < n; j++) { |
---|
| 2284 | + for (j = 0; j < n && !sched_energy_update; j++) { |
---|
2211 | 2285 | if (cpumask_equal(doms_new[i], doms_cur[j]) && |
---|
2212 | 2286 | cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) { |
---|
2213 | 2287 | has_eas = true; |
---|
.. | .. |
---|
2232 | 2306 | ndoms_cur = ndoms_new; |
---|
2233 | 2307 | |
---|
2234 | 2308 | register_sched_domain_sysctl(); |
---|
| 2309 | +} |
---|
2235 | 2310 | |
---|
| 2311 | +/* |
---|
| 2312 | + * Call with hotplug lock held |
---|
| 2313 | + */ |
---|
| 2314 | +void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], |
---|
| 2315 | + struct sched_domain_attr *dattr_new) |
---|
| 2316 | +{ |
---|
| 2317 | + mutex_lock(&sched_domains_mutex); |
---|
| 2318 | + partition_sched_domains_locked(ndoms_new, doms_new, dattr_new); |
---|
2236 | 2319 | mutex_unlock(&sched_domains_mutex); |
---|
2237 | 2320 | } |
---|