.. | .. |
---|
10 | 10 | #include <linux/notifier.h> |
---|
11 | 11 | #include <linux/sched/signal.h> |
---|
12 | 12 | #include <linux/sched/hotplug.h> |
---|
| 13 | +#include <linux/sched/isolation.h> |
---|
13 | 14 | #include <linux/sched/task.h> |
---|
14 | 15 | #include <linux/sched/smt.h> |
---|
15 | 16 | #include <linux/unistd.h> |
---|
.. | .. |
---|
30 | 31 | #include <linux/smpboot.h> |
---|
31 | 32 | #include <linux/relay.h> |
---|
32 | 33 | #include <linux/slab.h> |
---|
| 34 | +#include <linux/scs.h> |
---|
33 | 35 | #include <linux/percpu-rwsem.h> |
---|
34 | 36 | #include <linux/cpuset.h> |
---|
| 37 | +#include <linux/random.h> |
---|
| 38 | +#include <uapi/linux/sched/types.h> |
---|
35 | 39 | |
---|
36 | 40 | #include <trace/events/power.h> |
---|
37 | 41 | #define CREATE_TRACE_POINTS |
---|
38 | 42 | #include <trace/events/cpuhp.h> |
---|
| 43 | + |
---|
| 44 | +#undef CREATE_TRACE_POINTS |
---|
| 45 | +#include <trace/hooks/sched.h> |
---|
| 46 | +#include <trace/hooks/cpu.h> |
---|
39 | 47 | |
---|
40 | 48 | #include "smpboot.h" |
---|
41 | 49 | |
---|
.. | .. |
---|
63 | 71 | bool rollback; |
---|
64 | 72 | bool single; |
---|
65 | 73 | bool bringup; |
---|
66 | | - bool booted_once; |
---|
67 | 74 | struct hlist_node *node; |
---|
68 | 75 | struct hlist_node *last; |
---|
69 | 76 | enum cpuhp_state cb_state; |
---|
.. | .. |
---|
76 | 83 | static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = { |
---|
77 | 84 | .fail = CPUHP_INVALID, |
---|
78 | 85 | }; |
---|
| 86 | + |
---|
| 87 | +#ifdef CONFIG_SMP |
---|
| 88 | +cpumask_t cpus_booted_once_mask; |
---|
| 89 | +#endif |
---|
79 | 90 | |
---|
80 | 91 | #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP) |
---|
81 | 92 | static struct lockdep_map cpuhp_state_up_map = |
---|
.. | .. |
---|
269 | 280 | { |
---|
270 | 281 | mutex_lock(&cpu_add_remove_lock); |
---|
271 | 282 | } |
---|
| 283 | +EXPORT_SYMBOL_GPL(cpu_maps_update_begin); |
---|
272 | 284 | |
---|
273 | 285 | void cpu_maps_update_done(void) |
---|
274 | 286 | { |
---|
275 | 287 | mutex_unlock(&cpu_add_remove_lock); |
---|
276 | 288 | } |
---|
| 289 | +EXPORT_SYMBOL_GPL(cpu_maps_update_done); |
---|
277 | 290 | |
---|
278 | 291 | /* |
---|
279 | 292 | * If set, cpu_up and cpu_down will return -EBUSY and do nothing. |
---|
.. | .. |
---|
327 | 340 | percpu_rwsem_assert_held(&cpu_hotplug_lock); |
---|
328 | 341 | } |
---|
329 | 342 | |
---|
| 343 | +static void lockdep_acquire_cpus_lock(void) |
---|
| 344 | +{ |
---|
| 345 | + rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_); |
---|
| 346 | +} |
---|
| 347 | + |
---|
| 348 | +static void lockdep_release_cpus_lock(void) |
---|
| 349 | +{ |
---|
| 350 | + rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_); |
---|
| 351 | +} |
---|
| 352 | + |
---|
330 | 353 | /* |
---|
331 | 354 | * Wait for currently running CPU hotplug operations to complete (if any) and |
---|
332 | 355 | * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects |
---|
.. | .. |
---|
356 | 379 | cpu_maps_update_done(); |
---|
357 | 380 | } |
---|
358 | 381 | EXPORT_SYMBOL_GPL(cpu_hotplug_enable); |
---|
| 382 | + |
---|
| 383 | +#else |
---|
| 384 | + |
---|
| 385 | +static void lockdep_acquire_cpus_lock(void) |
---|
| 386 | +{ |
---|
| 387 | +} |
---|
| 388 | + |
---|
| 389 | +static void lockdep_release_cpus_lock(void) |
---|
| 390 | +{ |
---|
| 391 | +} |
---|
| 392 | + |
---|
359 | 393 | #endif /* CONFIG_HOTPLUG_CPU */ |
---|
360 | 394 | |
---|
361 | 395 | /* |
---|
.. | .. |
---|
369 | 403 | |
---|
370 | 404 | void __init cpu_smt_disable(bool force) |
---|
371 | 405 | { |
---|
372 | | - if (cpu_smt_control == CPU_SMT_FORCE_DISABLED || |
---|
373 | | - cpu_smt_control == CPU_SMT_NOT_SUPPORTED) |
---|
| 406 | + if (!cpu_smt_possible()) |
---|
374 | 407 | return; |
---|
375 | 408 | |
---|
376 | 409 | if (force) { |
---|
.. | .. |
---|
410 | 443 | /* |
---|
411 | 444 | * On x86 it's required to boot all logical CPUs at least once so |
---|
412 | 445 | * that the init code can get a chance to set CR4.MCE on each |
---|
413 | | - * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any |
---|
| 446 | + * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any |
---|
414 | 447 | * core will shutdown the machine. |
---|
415 | 448 | */ |
---|
416 | | - return !per_cpu(cpuhp_state, cpu).booted_once; |
---|
| 449 | + return !cpumask_test_cpu(cpu, &cpus_booted_once_mask); |
---|
417 | 450 | } |
---|
| 451 | + |
---|
| 452 | +/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */ |
---|
| 453 | +bool cpu_smt_possible(void) |
---|
| 454 | +{ |
---|
| 455 | + return cpu_smt_control != CPU_SMT_FORCE_DISABLED && |
---|
| 456 | + cpu_smt_control != CPU_SMT_NOT_SUPPORTED; |
---|
| 457 | +} |
---|
| 458 | +EXPORT_SYMBOL_GPL(cpu_smt_possible); |
---|
418 | 459 | #else |
---|
419 | 460 | static inline bool cpu_smt_allowed(unsigned int cpu) { return true; } |
---|
420 | 461 | #endif |
---|
.. | .. |
---|
501 | 542 | /* |
---|
502 | 543 | * SMT soft disabling on X86 requires to bring the CPU out of the |
---|
503 | 544 | * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The |
---|
504 | | - * CPU marked itself as booted_once in cpu_notify_starting() so the |
---|
| 545 | + * CPU marked itself as booted_once in notify_cpu_starting() so the |
---|
505 | 546 | * cpu_smt_allowed() check will now return false if this is not the |
---|
506 | 547 | * primary sibling. |
---|
507 | 548 | */ |
---|
.. | .. |
---|
518 | 559 | { |
---|
519 | 560 | struct task_struct *idle = idle_thread_get(cpu); |
---|
520 | 561 | int ret; |
---|
| 562 | + |
---|
| 563 | + /* |
---|
| 564 | + * Reset stale stack state from the last time this CPU was online. |
---|
| 565 | + */ |
---|
| 566 | + scs_task_reset(idle); |
---|
| 567 | + kasan_unpoison_task_stack(idle); |
---|
521 | 568 | |
---|
522 | 569 | /* |
---|
523 | 570 | * Some architectures have to walk the irq descriptors to |
---|
.. | .. |
---|
640 | 687 | */ |
---|
641 | 688 | smp_mb(); |
---|
642 | 689 | |
---|
| 690 | + /* |
---|
| 691 | + * The BP holds the hotplug lock, but we're now running on the AP, |
---|
| 692 | + * ensure that anybody asserting the lock is held, will actually find |
---|
| 693 | + * it so. |
---|
| 694 | + */ |
---|
| 695 | + lockdep_acquire_cpus_lock(); |
---|
643 | 696 | cpuhp_lock_acquire(bringup); |
---|
644 | 697 | |
---|
645 | 698 | if (st->single) { |
---|
.. | .. |
---|
685 | 738 | } |
---|
686 | 739 | |
---|
687 | 740 | cpuhp_lock_release(bringup); |
---|
| 741 | + lockdep_release_cpus_lock(); |
---|
688 | 742 | |
---|
689 | 743 | if (!st->should_run) |
---|
690 | 744 | complete_ap_thread(st, bringup); |
---|
.. | .. |
---|
876 | 930 | int err, cpu = smp_processor_id(); |
---|
877 | 931 | int ret; |
---|
878 | 932 | |
---|
879 | | -#ifdef CONFIG_PREEMPT_RT_BASE |
---|
880 | | - /* |
---|
881 | | - * If any tasks disabled migration before we got here, |
---|
882 | | - * go back and sleep again. |
---|
883 | | - */ |
---|
884 | | - if (cpu_nr_pinned(cpu)) |
---|
885 | | - return -EAGAIN; |
---|
886 | | -#endif |
---|
887 | | - |
---|
888 | 933 | /* Ensure this CPU doesn't handle any more interrupts. */ |
---|
889 | 934 | err = __cpu_disable(); |
---|
890 | 935 | if (err < 0) |
---|
.. | .. |
---|
907 | 952 | |
---|
908 | 953 | /* Give up timekeeping duties */ |
---|
909 | 954 | tick_handover_do_timer(); |
---|
| 955 | + /* Remove CPU from timer broadcasting */ |
---|
| 956 | + tick_offline_cpu(cpu); |
---|
910 | 957 | /* Park the stopper thread */ |
---|
911 | 958 | stop_machine_park(cpu); |
---|
912 | 959 | return 0; |
---|
913 | 960 | } |
---|
914 | | - |
---|
915 | | -#ifdef CONFIG_PREEMPT_RT_BASE |
---|
916 | | -struct task_struct *takedown_cpu_task; |
---|
917 | | -#endif |
---|
918 | 961 | |
---|
919 | 962 | static int takedown_cpu(unsigned int cpu) |
---|
920 | 963 | { |
---|
.. | .. |
---|
930 | 973 | */ |
---|
931 | 974 | irq_lock_sparse(); |
---|
932 | 975 | |
---|
933 | | -#ifdef CONFIG_PREEMPT_RT_BASE |
---|
934 | | - WARN_ON_ONCE(takedown_cpu_task); |
---|
935 | | - takedown_cpu_task = current; |
---|
936 | | - |
---|
937 | | -again: |
---|
938 | | - /* |
---|
939 | | - * If a task pins this CPU after we pass this check, take_cpu_down |
---|
940 | | - * will return -EAGAIN. |
---|
941 | | - */ |
---|
942 | | - for (;;) { |
---|
943 | | - int nr_pinned; |
---|
944 | | - |
---|
945 | | - set_current_state(TASK_UNINTERRUPTIBLE); |
---|
946 | | - nr_pinned = cpu_nr_pinned(cpu); |
---|
947 | | - if (nr_pinned == 0) |
---|
948 | | - break; |
---|
949 | | - schedule(); |
---|
950 | | - } |
---|
951 | | - set_current_state(TASK_RUNNING); |
---|
952 | | -#endif |
---|
953 | | - |
---|
954 | 976 | /* |
---|
955 | 977 | * So now all preempt/rcu users must observe !cpu_active(). |
---|
956 | 978 | */ |
---|
957 | 979 | err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu)); |
---|
958 | | -#ifdef CONFIG_PREEMPT_RT_BASE |
---|
959 | | - if (err == -EAGAIN) |
---|
960 | | - goto again; |
---|
961 | | -#endif |
---|
962 | 980 | if (err) { |
---|
963 | | -#ifdef CONFIG_PREEMPT_RT_BASE |
---|
964 | | - takedown_cpu_task = NULL; |
---|
965 | | -#endif |
---|
966 | 981 | /* CPU refused to die */ |
---|
967 | 982 | irq_unlock_sparse(); |
---|
968 | 983 | /* Unpark the hotplug thread so we can rollback there */ |
---|
.. | .. |
---|
981 | 996 | wait_for_ap_thread(st, false); |
---|
982 | 997 | BUG_ON(st->state != CPUHP_AP_IDLE_DEAD); |
---|
983 | 998 | |
---|
984 | | -#ifdef CONFIG_PREEMPT_RT_BASE |
---|
985 | | - takedown_cpu_task = NULL; |
---|
986 | | -#endif |
---|
987 | 999 | /* Interrupts are moved away from the dying cpu, reenable alloc/free */ |
---|
988 | 1000 | irq_unlock_sparse(); |
---|
989 | 1001 | |
---|
.. | .. |
---|
1049 | 1061 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
---|
1050 | 1062 | int prev_state, ret = 0; |
---|
1051 | 1063 | |
---|
1052 | | - if (num_online_cpus() == 1) |
---|
| 1064 | + if (num_active_cpus() == 1 && cpu_active(cpu)) |
---|
1053 | 1065 | return -EBUSY; |
---|
1054 | 1066 | |
---|
1055 | 1067 | if (!cpu_present(cpu)) |
---|
.. | .. |
---|
1112 | 1124 | return _cpu_down(cpu, 0, target); |
---|
1113 | 1125 | } |
---|
1114 | 1126 | |
---|
1115 | | -static int do_cpu_down(unsigned int cpu, enum cpuhp_state target) |
---|
| 1127 | +static int cpu_down(unsigned int cpu, enum cpuhp_state target) |
---|
1116 | 1128 | { |
---|
1117 | 1129 | int err; |
---|
1118 | 1130 | |
---|
.. | .. |
---|
1122 | 1134 | return err; |
---|
1123 | 1135 | } |
---|
1124 | 1136 | |
---|
1125 | | -int cpu_down(unsigned int cpu) |
---|
| 1137 | +/** |
---|
| 1138 | + * cpu_device_down - Bring down a cpu device |
---|
| 1139 | + * @dev: Pointer to the cpu device to offline |
---|
| 1140 | + * |
---|
| 1141 | + * This function is meant to be used by device core cpu subsystem only. |
---|
| 1142 | + * |
---|
| 1143 | + * Other subsystems should use remove_cpu() instead. |
---|
| 1144 | + */ |
---|
| 1145 | +int cpu_device_down(struct device *dev) |
---|
1126 | 1146 | { |
---|
1127 | | - return do_cpu_down(cpu, CPUHP_OFFLINE); |
---|
| 1147 | + return cpu_down(dev->id, CPUHP_OFFLINE); |
---|
1128 | 1148 | } |
---|
1129 | | -EXPORT_SYMBOL(cpu_down); |
---|
| 1149 | + |
---|
| 1150 | +int remove_cpu(unsigned int cpu) |
---|
| 1151 | +{ |
---|
| 1152 | + int ret; |
---|
| 1153 | + |
---|
| 1154 | + lock_device_hotplug(); |
---|
| 1155 | + ret = device_offline(get_cpu_device(cpu)); |
---|
| 1156 | + unlock_device_hotplug(); |
---|
| 1157 | + |
---|
| 1158 | + return ret; |
---|
| 1159 | +} |
---|
| 1160 | +EXPORT_SYMBOL_GPL(remove_cpu); |
---|
| 1161 | + |
---|
| 1162 | +extern int dl_cpu_busy(int cpu, struct task_struct *p); |
---|
| 1163 | + |
---|
| 1164 | +int __pause_drain_rq(struct cpumask *cpus) |
---|
| 1165 | +{ |
---|
| 1166 | + unsigned int cpu; |
---|
| 1167 | + int err = 0; |
---|
| 1168 | + |
---|
| 1169 | + /* |
---|
| 1170 | + * Disabling preemption avoids that one of the stopper, started from |
---|
| 1171 | + * sched_cpu_drain_rq(), blocks firing draining for the whole cpumask. |
---|
| 1172 | + */ |
---|
| 1173 | + preempt_disable(); |
---|
| 1174 | + for_each_cpu(cpu, cpus) { |
---|
| 1175 | + err = sched_cpu_drain_rq(cpu); |
---|
| 1176 | + if (err) |
---|
| 1177 | + break; |
---|
| 1178 | + } |
---|
| 1179 | + preempt_enable(); |
---|
| 1180 | + |
---|
| 1181 | + return err; |
---|
| 1182 | +} |
---|
| 1183 | + |
---|
| 1184 | +void __wait_drain_rq(struct cpumask *cpus) |
---|
| 1185 | +{ |
---|
| 1186 | + unsigned int cpu; |
---|
| 1187 | + |
---|
| 1188 | + for_each_cpu(cpu, cpus) |
---|
| 1189 | + sched_cpu_drain_rq_wait(cpu); |
---|
| 1190 | +} |
---|
| 1191 | + |
---|
| 1192 | +/* if rt task, set to cfs and return previous prio */ |
---|
| 1193 | +static int pause_reduce_prio(void) |
---|
| 1194 | +{ |
---|
| 1195 | + int prev_prio = -1; |
---|
| 1196 | + |
---|
| 1197 | + if (current->prio < MAX_RT_PRIO) { |
---|
| 1198 | + struct sched_param param = { .sched_priority = 0 }; |
---|
| 1199 | + |
---|
| 1200 | + prev_prio = current->prio; |
---|
| 1201 | + sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m); |
---|
| 1202 | + } |
---|
| 1203 | + |
---|
| 1204 | + return prev_prio; |
---|
| 1205 | +} |
---|
| 1206 | + |
---|
| 1207 | +/* if previous prio was set, restore */ |
---|
| 1208 | +static void pause_restore_prio(int prev_prio) |
---|
| 1209 | +{ |
---|
| 1210 | + if (prev_prio >= 0 && prev_prio < MAX_RT_PRIO) { |
---|
| 1211 | + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1-prev_prio }; |
---|
| 1212 | + |
---|
| 1213 | + sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); |
---|
| 1214 | + } |
---|
| 1215 | +} |
---|
| 1216 | + |
---|
| 1217 | +int pause_cpus(struct cpumask *cpus) |
---|
| 1218 | +{ |
---|
| 1219 | + int err = 0; |
---|
| 1220 | + int cpu; |
---|
| 1221 | + u64 start_time = 0; |
---|
| 1222 | + int prev_prio; |
---|
| 1223 | + |
---|
| 1224 | + start_time = sched_clock(); |
---|
| 1225 | + |
---|
| 1226 | + cpu_maps_update_begin(); |
---|
| 1227 | + |
---|
| 1228 | + if (cpu_hotplug_disabled) { |
---|
| 1229 | + err = -EBUSY; |
---|
| 1230 | + goto err_cpu_maps_update; |
---|
| 1231 | + } |
---|
| 1232 | + |
---|
| 1233 | + /* Pausing an already inactive CPU isn't an error */ |
---|
| 1234 | + cpumask_and(cpus, cpus, cpu_active_mask); |
---|
| 1235 | + |
---|
| 1236 | + for_each_cpu(cpu, cpus) { |
---|
| 1237 | + if (!cpu_online(cpu) || dl_cpu_busy(cpu, NULL) || |
---|
| 1238 | + get_cpu_device(cpu)->offline_disabled == true) { |
---|
| 1239 | + err = -EBUSY; |
---|
| 1240 | + goto err_cpu_maps_update; |
---|
| 1241 | + } |
---|
| 1242 | + } |
---|
| 1243 | + |
---|
| 1244 | + if (cpumask_weight(cpus) >= num_active_cpus()) { |
---|
| 1245 | + err = -EBUSY; |
---|
| 1246 | + goto err_cpu_maps_update; |
---|
| 1247 | + } |
---|
| 1248 | + |
---|
| 1249 | + if (cpumask_empty(cpus)) |
---|
| 1250 | + goto err_cpu_maps_update; |
---|
| 1251 | + |
---|
| 1252 | + /* |
---|
| 1253 | + * Lazy migration: |
---|
| 1254 | + * |
---|
| 1255 | + * We do care about how fast a CPU can go idle and stay this in this |
---|
| 1256 | + * state. If we try to take the cpus_write_lock() here, we would have |
---|
| 1257 | + * to wait for a few dozens of ms, as this function might schedule. |
---|
| 1258 | + * However, we can, as a first step, flip the active mask and migrate |
---|
| 1259 | + * anything currently on the run-queue, to give a chance to the paused |
---|
| 1260 | + * CPUs to reach quickly an idle state. There's a risk meanwhile for |
---|
| 1261 | + * another CPU to observe an out-of-date active_mask or to incompletely |
---|
| 1262 | + * update a cpuset. Both problems would be resolved later in the slow |
---|
| 1263 | + * path, which ensures active_mask synchronization, triggers a cpuset |
---|
| 1264 | + * rebuild and migrate any task that would have escaped the lazy |
---|
| 1265 | + * migration. |
---|
| 1266 | + */ |
---|
| 1267 | + for_each_cpu(cpu, cpus) |
---|
| 1268 | + set_cpu_active(cpu, false); |
---|
| 1269 | + err = __pause_drain_rq(cpus); |
---|
| 1270 | + if (err) { |
---|
| 1271 | + __wait_drain_rq(cpus); |
---|
| 1272 | + for_each_cpu(cpu, cpus) |
---|
| 1273 | + set_cpu_active(cpu, true); |
---|
| 1274 | + goto err_cpu_maps_update; |
---|
| 1275 | + } |
---|
| 1276 | + |
---|
| 1277 | + prev_prio = pause_reduce_prio(); |
---|
| 1278 | + |
---|
| 1279 | + /* |
---|
| 1280 | + * Slow path deactivation: |
---|
| 1281 | + * |
---|
| 1282 | + * Now that paused CPUs are most likely idle, we can go through a |
---|
| 1283 | + * complete scheduler deactivation. |
---|
| 1284 | + * |
---|
| 1285 | + * The cpu_active_mask being already set and cpus_write_lock calling |
---|
| 1286 | + * synchronize_rcu(), we know that all preempt-disabled and RCU users |
---|
| 1287 | + * will observe the updated value. |
---|
| 1288 | + */ |
---|
| 1289 | + cpus_write_lock(); |
---|
| 1290 | + |
---|
| 1291 | + __wait_drain_rq(cpus); |
---|
| 1292 | + |
---|
| 1293 | + cpuhp_tasks_frozen = 0; |
---|
| 1294 | + |
---|
| 1295 | + if (sched_cpus_deactivate_nosync(cpus)) { |
---|
| 1296 | + err = -EBUSY; |
---|
| 1297 | + goto err_cpus_write_unlock; |
---|
| 1298 | + } |
---|
| 1299 | + |
---|
| 1300 | + err = __pause_drain_rq(cpus); |
---|
| 1301 | + __wait_drain_rq(cpus); |
---|
| 1302 | + if (err) { |
---|
| 1303 | + for_each_cpu(cpu, cpus) |
---|
| 1304 | + sched_cpu_activate(cpu); |
---|
| 1305 | + goto err_cpus_write_unlock; |
---|
| 1306 | + } |
---|
| 1307 | + |
---|
| 1308 | + /* |
---|
| 1309 | + * Even if living on the side of the regular HP path, pause is using |
---|
| 1310 | + * one of the HP step (CPUHP_AP_ACTIVE). This should be reflected on the |
---|
| 1311 | + * current state of the CPU. |
---|
| 1312 | + */ |
---|
| 1313 | + for_each_cpu(cpu, cpus) { |
---|
| 1314 | + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
---|
| 1315 | + |
---|
| 1316 | + st->state = CPUHP_AP_ACTIVE - 1; |
---|
| 1317 | + st->target = st->state; |
---|
| 1318 | + } |
---|
| 1319 | + |
---|
| 1320 | +err_cpus_write_unlock: |
---|
| 1321 | + cpus_write_unlock(); |
---|
| 1322 | + pause_restore_prio(prev_prio); |
---|
| 1323 | +err_cpu_maps_update: |
---|
| 1324 | + cpu_maps_update_done(); |
---|
| 1325 | + |
---|
| 1326 | + trace_cpuhp_pause(cpus, start_time, 1); |
---|
| 1327 | + |
---|
| 1328 | + return err; |
---|
| 1329 | +} |
---|
| 1330 | +EXPORT_SYMBOL_GPL(pause_cpus); |
---|
| 1331 | + |
---|
| 1332 | +int resume_cpus(struct cpumask *cpus) |
---|
| 1333 | +{ |
---|
| 1334 | + unsigned int cpu; |
---|
| 1335 | + int err = 0; |
---|
| 1336 | + u64 start_time = 0; |
---|
| 1337 | + int prev_prio; |
---|
| 1338 | + |
---|
| 1339 | + start_time = sched_clock(); |
---|
| 1340 | + |
---|
| 1341 | + cpu_maps_update_begin(); |
---|
| 1342 | + |
---|
| 1343 | + if (cpu_hotplug_disabled) { |
---|
| 1344 | + err = -EBUSY; |
---|
| 1345 | + goto err_cpu_maps_update; |
---|
| 1346 | + } |
---|
| 1347 | + |
---|
| 1348 | + /* Resuming an already active CPU isn't an error */ |
---|
| 1349 | + cpumask_andnot(cpus, cpus, cpu_active_mask); |
---|
| 1350 | + |
---|
| 1351 | + for_each_cpu(cpu, cpus) { |
---|
| 1352 | + if (!cpu_online(cpu)) { |
---|
| 1353 | + err = -EBUSY; |
---|
| 1354 | + goto err_cpu_maps_update; |
---|
| 1355 | + } |
---|
| 1356 | + } |
---|
| 1357 | + |
---|
| 1358 | + if (cpumask_empty(cpus)) |
---|
| 1359 | + goto err_cpu_maps_update; |
---|
| 1360 | + |
---|
| 1361 | + for_each_cpu(cpu, cpus) |
---|
| 1362 | + set_cpu_active(cpu, true); |
---|
| 1363 | + |
---|
| 1364 | + trace_android_rvh_resume_cpus(cpus, &err); |
---|
| 1365 | + if (err) |
---|
| 1366 | + goto err_cpu_maps_update; |
---|
| 1367 | + |
---|
| 1368 | + prev_prio = pause_reduce_prio(); |
---|
| 1369 | + |
---|
| 1370 | + /* Lazy Resume. Build domains through schedule a workqueue on |
---|
| 1371 | + * resuming cpu. This is so that the resuming cpu can work more |
---|
| 1372 | + * early, and cannot add additional load to other busy cpu. |
---|
| 1373 | + */ |
---|
| 1374 | + cpuset_update_active_cpus_affine(cpumask_first(cpus)); |
---|
| 1375 | + |
---|
| 1376 | + cpus_write_lock(); |
---|
| 1377 | + |
---|
| 1378 | + cpuhp_tasks_frozen = 0; |
---|
| 1379 | + |
---|
| 1380 | + if (sched_cpus_activate(cpus)) { |
---|
| 1381 | + err = -EBUSY; |
---|
| 1382 | + goto err_cpus_write_unlock; |
---|
| 1383 | + } |
---|
| 1384 | + |
---|
| 1385 | + /* |
---|
| 1386 | + * see pause_cpus. |
---|
| 1387 | + */ |
---|
| 1388 | + for_each_cpu(cpu, cpus) { |
---|
| 1389 | + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
---|
| 1390 | + |
---|
| 1391 | + st->state = CPUHP_ONLINE; |
---|
| 1392 | + st->target = st->state; |
---|
| 1393 | + } |
---|
| 1394 | + |
---|
| 1395 | +err_cpus_write_unlock: |
---|
| 1396 | + cpus_write_unlock(); |
---|
| 1397 | + pause_restore_prio(prev_prio); |
---|
| 1398 | +err_cpu_maps_update: |
---|
| 1399 | + cpu_maps_update_done(); |
---|
| 1400 | + |
---|
| 1401 | + trace_cpuhp_pause(cpus, start_time, 0); |
---|
| 1402 | + |
---|
| 1403 | + return err; |
---|
| 1404 | +} |
---|
| 1405 | +EXPORT_SYMBOL_GPL(resume_cpus); |
---|
| 1406 | + |
---|
| 1407 | +void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) |
---|
| 1408 | +{ |
---|
| 1409 | + unsigned int cpu; |
---|
| 1410 | + int error; |
---|
| 1411 | + |
---|
| 1412 | + cpu_maps_update_begin(); |
---|
| 1413 | + |
---|
| 1414 | + /* |
---|
| 1415 | + * Make certain the cpu I'm about to reboot on is online. |
---|
| 1416 | + * |
---|
| 1417 | + * This is inline to what migrate_to_reboot_cpu() already do. |
---|
| 1418 | + */ |
---|
| 1419 | + if (!cpu_online(primary_cpu)) |
---|
| 1420 | + primary_cpu = cpumask_first(cpu_online_mask); |
---|
| 1421 | + |
---|
| 1422 | + for_each_online_cpu(cpu) { |
---|
| 1423 | + if (cpu == primary_cpu) |
---|
| 1424 | + continue; |
---|
| 1425 | + |
---|
| 1426 | + error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE); |
---|
| 1427 | + if (error) { |
---|
| 1428 | + pr_err("Failed to offline CPU%d - error=%d", |
---|
| 1429 | + cpu, error); |
---|
| 1430 | + break; |
---|
| 1431 | + } |
---|
| 1432 | + } |
---|
| 1433 | + |
---|
| 1434 | + /* |
---|
| 1435 | + * Ensure all but the reboot CPU are offline. |
---|
| 1436 | + */ |
---|
| 1437 | + BUG_ON(num_online_cpus() > 1); |
---|
| 1438 | + |
---|
| 1439 | + /* |
---|
| 1440 | + * Make sure the CPUs won't be enabled by someone else after this |
---|
| 1441 | + * point. Kexec will reboot to a new kernel shortly resetting |
---|
| 1442 | + * everything along the way. |
---|
| 1443 | + */ |
---|
| 1444 | + cpu_hotplug_disabled++; |
---|
| 1445 | + |
---|
| 1446 | + cpu_maps_update_done(); |
---|
| 1447 | +} |
---|
1130 | 1448 | |
---|
1131 | 1449 | #else |
---|
1132 | 1450 | #define takedown_cpu NULL |
---|
.. | .. |
---|
1146 | 1464 | int ret; |
---|
1147 | 1465 | |
---|
1148 | 1466 | rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ |
---|
1149 | | - st->booted_once = true; |
---|
| 1467 | + cpumask_set_cpu(cpu, &cpus_booted_once_mask); |
---|
1150 | 1468 | while (st->state < target) { |
---|
1151 | 1469 | st->state++; |
---|
1152 | 1470 | ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); |
---|
.. | .. |
---|
1180 | 1498 | complete_ap_thread(st, true); |
---|
1181 | 1499 | } |
---|
1182 | 1500 | |
---|
| 1501 | +static int switch_to_rt_policy(void) |
---|
| 1502 | +{ |
---|
| 1503 | + struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; |
---|
| 1504 | + unsigned int policy = current->policy; |
---|
| 1505 | + |
---|
| 1506 | + if (policy == SCHED_NORMAL) |
---|
| 1507 | + /* Switch to SCHED_FIFO from SCHED_NORMAL. */ |
---|
| 1508 | + return sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); |
---|
| 1509 | + else |
---|
| 1510 | + return 1; |
---|
| 1511 | +} |
---|
| 1512 | + |
---|
| 1513 | +static int switch_to_fair_policy(void) |
---|
| 1514 | +{ |
---|
| 1515 | + struct sched_param param = { .sched_priority = 0 }; |
---|
| 1516 | + |
---|
| 1517 | + return sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m); |
---|
| 1518 | +} |
---|
| 1519 | + |
---|
1183 | 1520 | /* Requires cpu_add_remove_lock to be held */ |
---|
1184 | 1521 | static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) |
---|
1185 | 1522 | { |
---|
.. | .. |
---|
1195 | 1532 | } |
---|
1196 | 1533 | |
---|
1197 | 1534 | /* |
---|
1198 | | - * The caller of do_cpu_up might have raced with another |
---|
1199 | | - * caller. Ignore it for now. |
---|
| 1535 | + * The caller of cpu_up() might have raced with another |
---|
| 1536 | + * caller. Nothing to do. |
---|
1200 | 1537 | */ |
---|
1201 | 1538 | if (st->state >= target) |
---|
1202 | 1539 | goto out; |
---|
.. | .. |
---|
1241 | 1578 | return ret; |
---|
1242 | 1579 | } |
---|
1243 | 1580 | |
---|
1244 | | -static int do_cpu_up(unsigned int cpu, enum cpuhp_state target) |
---|
| 1581 | +static int cpu_up(unsigned int cpu, enum cpuhp_state target) |
---|
1245 | 1582 | { |
---|
1246 | 1583 | int err = 0; |
---|
| 1584 | + int switch_err; |
---|
1247 | 1585 | |
---|
1248 | 1586 | if (!cpu_possible(cpu)) { |
---|
1249 | 1587 | pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n", |
---|
.. | .. |
---|
1254 | 1592 | return -EINVAL; |
---|
1255 | 1593 | } |
---|
1256 | 1594 | |
---|
| 1595 | + trace_android_vh_cpu_up(cpu); |
---|
| 1596 | + |
---|
| 1597 | + /* |
---|
| 1598 | + * CPU hotplug operations consists of many steps and each step |
---|
| 1599 | + * calls a callback of core kernel subsystem. CPU hotplug-in |
---|
| 1600 | + * operation may get preempted by other CFS tasks and whole |
---|
| 1601 | + * operation of cpu hotplug in CPU gets delayed. Switch the |
---|
| 1602 | + * current task to SCHED_FIFO from SCHED_NORMAL, so that |
---|
| 1603 | + * hotplug in operation may complete quickly in heavy loaded |
---|
| 1604 | + * conditions and new CPU will start handle the workload. |
---|
| 1605 | + */ |
---|
| 1606 | + |
---|
| 1607 | + switch_err = switch_to_rt_policy(); |
---|
| 1608 | + |
---|
1257 | 1609 | err = try_online_node(cpu_to_node(cpu)); |
---|
1258 | 1610 | if (err) |
---|
1259 | | - return err; |
---|
| 1611 | + goto switch_out; |
---|
1260 | 1612 | |
---|
1261 | 1613 | cpu_maps_update_begin(); |
---|
1262 | 1614 | |
---|
.. | .. |
---|
1272 | 1624 | err = _cpu_up(cpu, 0, target); |
---|
1273 | 1625 | out: |
---|
1274 | 1626 | cpu_maps_update_done(); |
---|
| 1627 | +switch_out: |
---|
| 1628 | + if (!switch_err) { |
---|
| 1629 | + switch_err = switch_to_fair_policy(); |
---|
| 1630 | + if (switch_err) |
---|
| 1631 | + pr_err("Hotplug policy switch err=%d Task %s pid=%d\n", |
---|
| 1632 | + switch_err, current->comm, current->pid); |
---|
| 1633 | + } |
---|
| 1634 | + |
---|
1275 | 1635 | return err; |
---|
1276 | 1636 | } |
---|
1277 | 1637 | |
---|
1278 | | -int cpu_up(unsigned int cpu) |
---|
| 1638 | +/** |
---|
| 1639 | + * cpu_device_up - Bring up a cpu device |
---|
| 1640 | + * @dev: Pointer to the cpu device to online |
---|
| 1641 | + * |
---|
| 1642 | + * This function is meant to be used by device core cpu subsystem only. |
---|
| 1643 | + * |
---|
| 1644 | + * Other subsystems should use add_cpu() instead. |
---|
| 1645 | + */ |
---|
| 1646 | +int cpu_device_up(struct device *dev) |
---|
1279 | 1647 | { |
---|
1280 | | - return do_cpu_up(cpu, CPUHP_ONLINE); |
---|
| 1648 | + return cpu_up(dev->id, CPUHP_ONLINE); |
---|
1281 | 1649 | } |
---|
1282 | | -EXPORT_SYMBOL_GPL(cpu_up); |
---|
| 1650 | + |
---|
| 1651 | +int add_cpu(unsigned int cpu) |
---|
| 1652 | +{ |
---|
| 1653 | + int ret; |
---|
| 1654 | + |
---|
| 1655 | + lock_device_hotplug(); |
---|
| 1656 | + ret = device_online(get_cpu_device(cpu)); |
---|
| 1657 | + unlock_device_hotplug(); |
---|
| 1658 | + |
---|
| 1659 | + return ret; |
---|
| 1660 | +} |
---|
| 1661 | +EXPORT_SYMBOL_GPL(add_cpu); |
---|
| 1662 | + |
---|
| 1663 | +/** |
---|
| 1664 | + * bringup_hibernate_cpu - Bring up the CPU that we hibernated on |
---|
| 1665 | + * @sleep_cpu: The cpu we hibernated on and should be brought up. |
---|
| 1666 | + * |
---|
| 1667 | + * On some architectures like arm64, we can hibernate on any CPU, but on |
---|
| 1668 | + * wake up the CPU we hibernated on might be offline as a side effect of |
---|
| 1669 | + * using maxcpus= for example. |
---|
| 1670 | + */ |
---|
| 1671 | +int bringup_hibernate_cpu(unsigned int sleep_cpu) |
---|
| 1672 | +{ |
---|
| 1673 | + int ret; |
---|
| 1674 | + |
---|
| 1675 | + if (!cpu_online(sleep_cpu)) { |
---|
| 1676 | + pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n"); |
---|
| 1677 | + ret = cpu_up(sleep_cpu, CPUHP_ONLINE); |
---|
| 1678 | + if (ret) { |
---|
| 1679 | + pr_err("Failed to bring hibernate-CPU up!\n"); |
---|
| 1680 | + return ret; |
---|
| 1681 | + } |
---|
| 1682 | + } |
---|
| 1683 | + return 0; |
---|
| 1684 | +} |
---|
| 1685 | + |
---|
| 1686 | +void bringup_nonboot_cpus(unsigned int setup_max_cpus) |
---|
| 1687 | +{ |
---|
| 1688 | + unsigned int cpu; |
---|
| 1689 | + |
---|
| 1690 | + for_each_present_cpu(cpu) { |
---|
| 1691 | + if (num_online_cpus() >= setup_max_cpus) |
---|
| 1692 | + break; |
---|
| 1693 | + if (!cpu_online(cpu)) |
---|
| 1694 | + cpu_up(cpu, CPUHP_ONLINE); |
---|
| 1695 | + } |
---|
| 1696 | +} |
---|
1283 | 1697 | |
---|
1284 | 1698 | #ifdef CONFIG_PM_SLEEP_SMP |
---|
1285 | 1699 | static cpumask_var_t frozen_cpus; |
---|
.. | .. |
---|
1289 | 1703 | int cpu, error = 0; |
---|
1290 | 1704 | |
---|
1291 | 1705 | cpu_maps_update_begin(); |
---|
1292 | | - if (!cpu_online(primary)) |
---|
| 1706 | + if (primary == -1) { |
---|
1293 | 1707 | primary = cpumask_first(cpu_online_mask); |
---|
| 1708 | + if (!housekeeping_cpu(primary, HK_FLAG_TIMER)) |
---|
| 1709 | + primary = housekeeping_any_cpu(HK_FLAG_TIMER); |
---|
| 1710 | + } else { |
---|
| 1711 | + if (!cpu_online(primary)) |
---|
| 1712 | + primary = cpumask_first(cpu_online_mask); |
---|
| 1713 | + } |
---|
| 1714 | + |
---|
1294 | 1715 | /* |
---|
1295 | 1716 | * We take down all of the non-boot CPUs in one shot to avoid races |
---|
1296 | 1717 | * with the userspace trying to use the CPU hotplug at the same time |
---|
.. | .. |
---|
1301 | 1722 | for_each_online_cpu(cpu) { |
---|
1302 | 1723 | if (cpu == primary) |
---|
1303 | 1724 | continue; |
---|
| 1725 | + |
---|
| 1726 | + if (pm_wakeup_pending()) { |
---|
| 1727 | + pr_info("Wakeup pending. Abort CPU freeze\n"); |
---|
| 1728 | + error = -EBUSY; |
---|
| 1729 | + break; |
---|
| 1730 | + } |
---|
| 1731 | + |
---|
1304 | 1732 | trace_suspend_resume(TPS("CPU_OFF"), cpu, true); |
---|
1305 | 1733 | error = _cpu_down(cpu, 1, CPUHP_OFFLINE); |
---|
1306 | 1734 | trace_suspend_resume(TPS("CPU_OFF"), cpu, false); |
---|
.. | .. |
---|
1319 | 1747 | |
---|
1320 | 1748 | /* |
---|
1321 | 1749 | * Make sure the CPUs won't be enabled by someone else. We need to do |
---|
1322 | | - * this even in case of failure as all disable_nonboot_cpus() users are |
---|
1323 | | - * supposed to do enable_nonboot_cpus() on the failure path. |
---|
| 1750 | + * this even in case of failure as all freeze_secondary_cpus() users are |
---|
| 1751 | + * supposed to do thaw_secondary_cpus() on the failure path. |
---|
1324 | 1752 | */ |
---|
1325 | 1753 | cpu_hotplug_disabled++; |
---|
1326 | 1754 | |
---|
.. | .. |
---|
1328 | 1756 | return error; |
---|
1329 | 1757 | } |
---|
1330 | 1758 | |
---|
1331 | | -void __weak arch_enable_nonboot_cpus_begin(void) |
---|
| 1759 | +void __weak arch_thaw_secondary_cpus_begin(void) |
---|
1332 | 1760 | { |
---|
1333 | 1761 | } |
---|
1334 | 1762 | |
---|
1335 | | -void __weak arch_enable_nonboot_cpus_end(void) |
---|
| 1763 | +void __weak arch_thaw_secondary_cpus_end(void) |
---|
1336 | 1764 | { |
---|
1337 | 1765 | } |
---|
1338 | 1766 | |
---|
1339 | | -void enable_nonboot_cpus(void) |
---|
| 1767 | +void thaw_secondary_cpus(void) |
---|
1340 | 1768 | { |
---|
1341 | 1769 | int cpu, error; |
---|
1342 | 1770 | struct device *cpu_device; |
---|
.. | .. |
---|
1349 | 1777 | |
---|
1350 | 1778 | pr_info("Enabling non-boot CPUs ...\n"); |
---|
1351 | 1779 | |
---|
1352 | | - arch_enable_nonboot_cpus_begin(); |
---|
| 1780 | + arch_thaw_secondary_cpus_begin(); |
---|
1353 | 1781 | |
---|
1354 | 1782 | for_each_cpu(cpu, frozen_cpus) { |
---|
1355 | 1783 | trace_suspend_resume(TPS("CPU_ON"), cpu, true); |
---|
.. | .. |
---|
1368 | 1796 | pr_warn("Error taking CPU%d up: %d\n", cpu, error); |
---|
1369 | 1797 | } |
---|
1370 | 1798 | |
---|
1371 | | - arch_enable_nonboot_cpus_end(); |
---|
| 1799 | + arch_thaw_secondary_cpus_end(); |
---|
1372 | 1800 | |
---|
1373 | 1801 | cpumask_clear(frozen_cpus); |
---|
1374 | 1802 | out: |
---|
.. | .. |
---|
1434 | 1862 | |
---|
1435 | 1863 | int __boot_cpu_id; |
---|
1436 | 1864 | |
---|
| 1865 | +/* Horrific hacks because we can't add more to cpuhp_hp_states. */ |
---|
| 1866 | +static int random_and_perf_prepare_fusion(unsigned int cpu) |
---|
| 1867 | +{ |
---|
| 1868 | +#ifdef CONFIG_PERF_EVENTS |
---|
| 1869 | + perf_event_init_cpu(cpu); |
---|
| 1870 | +#endif |
---|
| 1871 | + random_prepare_cpu(cpu); |
---|
| 1872 | + return 0; |
---|
| 1873 | +} |
---|
| 1874 | +static int random_and_workqueue_online_fusion(unsigned int cpu) |
---|
| 1875 | +{ |
---|
| 1876 | + workqueue_online_cpu(cpu); |
---|
| 1877 | + random_online_cpu(cpu); |
---|
| 1878 | + return 0; |
---|
| 1879 | +} |
---|
| 1880 | + |
---|
1437 | 1881 | #endif /* CONFIG_SMP */ |
---|
1438 | 1882 | |
---|
1439 | 1883 | /* Boot processor state steps */ |
---|
.. | .. |
---|
1452 | 1896 | }, |
---|
1453 | 1897 | [CPUHP_PERF_PREPARE] = { |
---|
1454 | 1898 | .name = "perf:prepare", |
---|
1455 | | - .startup.single = perf_event_init_cpu, |
---|
| 1899 | + .startup.single = random_and_perf_prepare_fusion, |
---|
1456 | 1900 | .teardown.single = perf_event_exit_cpu, |
---|
1457 | 1901 | }, |
---|
1458 | 1902 | [CPUHP_WORKQUEUE_PREP] = { |
---|
.. | .. |
---|
1568 | 2012 | }, |
---|
1569 | 2013 | [CPUHP_AP_WORKQUEUE_ONLINE] = { |
---|
1570 | 2014 | .name = "workqueue:online", |
---|
1571 | | - .startup.single = workqueue_online_cpu, |
---|
| 2015 | + .startup.single = random_and_workqueue_online_fusion, |
---|
1572 | 2016 | .teardown.single = workqueue_offline_cpu, |
---|
1573 | 2017 | }, |
---|
1574 | 2018 | [CPUHP_AP_RCUTREE_ONLINE] = { |
---|
.. | .. |
---|
1979 | 2423 | } |
---|
1980 | 2424 | EXPORT_SYMBOL(__cpuhp_remove_state); |
---|
1981 | 2425 | |
---|
| 2426 | +#ifdef CONFIG_HOTPLUG_SMT |
---|
| 2427 | +static void cpuhp_offline_cpu_device(unsigned int cpu) |
---|
| 2428 | +{ |
---|
| 2429 | + struct device *dev = get_cpu_device(cpu); |
---|
| 2430 | + |
---|
| 2431 | + dev->offline = true; |
---|
| 2432 | + /* Tell user space about the state change */ |
---|
| 2433 | + kobject_uevent(&dev->kobj, KOBJ_OFFLINE); |
---|
| 2434 | +} |
---|
| 2435 | + |
---|
| 2436 | +static void cpuhp_online_cpu_device(unsigned int cpu) |
---|
| 2437 | +{ |
---|
| 2438 | + struct device *dev = get_cpu_device(cpu); |
---|
| 2439 | + |
---|
| 2440 | + dev->offline = false; |
---|
| 2441 | + /* Tell user space about the state change */ |
---|
| 2442 | + kobject_uevent(&dev->kobj, KOBJ_ONLINE); |
---|
| 2443 | +} |
---|
| 2444 | + |
---|
| 2445 | +int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) |
---|
| 2446 | +{ |
---|
| 2447 | + int cpu, ret = 0; |
---|
| 2448 | + |
---|
| 2449 | + cpu_maps_update_begin(); |
---|
| 2450 | + for_each_online_cpu(cpu) { |
---|
| 2451 | + if (topology_is_primary_thread(cpu)) |
---|
| 2452 | + continue; |
---|
| 2453 | + ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE); |
---|
| 2454 | + if (ret) |
---|
| 2455 | + break; |
---|
| 2456 | + /* |
---|
| 2457 | + * As this needs to hold the cpu maps lock it's impossible |
---|
| 2458 | + * to call device_offline() because that ends up calling |
---|
| 2459 | + * cpu_down() which takes cpu maps lock. cpu maps lock |
---|
| 2460 | + * needs to be held as this might race against in kernel |
---|
| 2461 | + * abusers of the hotplug machinery (thermal management). |
---|
| 2462 | + * |
---|
| 2463 | + * So nothing would update device:offline state. That would |
---|
| 2464 | + * leave the sysfs entry stale and prevent onlining after |
---|
| 2465 | + * smt control has been changed to 'off' again. This is |
---|
| 2466 | + * called under the sysfs hotplug lock, so it is properly |
---|
| 2467 | + * serialized against the regular offline usage. |
---|
| 2468 | + */ |
---|
| 2469 | + cpuhp_offline_cpu_device(cpu); |
---|
| 2470 | + } |
---|
| 2471 | + if (!ret) |
---|
| 2472 | + cpu_smt_control = ctrlval; |
---|
| 2473 | + cpu_maps_update_done(); |
---|
| 2474 | + return ret; |
---|
| 2475 | +} |
---|
| 2476 | + |
---|
| 2477 | +int cpuhp_smt_enable(void) |
---|
| 2478 | +{ |
---|
| 2479 | + int cpu, ret = 0; |
---|
| 2480 | + |
---|
| 2481 | + cpu_maps_update_begin(); |
---|
| 2482 | + cpu_smt_control = CPU_SMT_ENABLED; |
---|
| 2483 | + for_each_present_cpu(cpu) { |
---|
| 2484 | + /* Skip online CPUs and CPUs on offline nodes */ |
---|
| 2485 | + if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) |
---|
| 2486 | + continue; |
---|
| 2487 | + ret = _cpu_up(cpu, 0, CPUHP_ONLINE); |
---|
| 2488 | + if (ret) |
---|
| 2489 | + break; |
---|
| 2490 | + /* See comment in cpuhp_smt_disable() */ |
---|
| 2491 | + cpuhp_online_cpu_device(cpu); |
---|
| 2492 | + } |
---|
| 2493 | + cpu_maps_update_done(); |
---|
| 2494 | + return ret; |
---|
| 2495 | +} |
---|
| 2496 | +#endif |
---|
| 2497 | + |
---|
1982 | 2498 | #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU) |
---|
1983 | 2499 | static ssize_t show_cpuhp_state(struct device *dev, |
---|
1984 | 2500 | struct device_attribute *attr, char *buf) |
---|
.. | .. |
---|
2021 | 2537 | goto out; |
---|
2022 | 2538 | |
---|
2023 | 2539 | if (st->state < target) |
---|
2024 | | - ret = do_cpu_up(dev->id, target); |
---|
| 2540 | + ret = cpu_up(dev->id, target); |
---|
2025 | 2541 | else |
---|
2026 | | - ret = do_cpu_down(dev->id, target); |
---|
| 2542 | + ret = cpu_down(dev->id, target); |
---|
2027 | 2543 | out: |
---|
2028 | 2544 | unlock_device_hotplug(); |
---|
2029 | 2545 | return ret ? ret : count; |
---|
.. | .. |
---|
2133 | 2649 | |
---|
2134 | 2650 | #ifdef CONFIG_HOTPLUG_SMT |
---|
2135 | 2651 | |
---|
2136 | | -static const char *smt_states[] = { |
---|
2137 | | - [CPU_SMT_ENABLED] = "on", |
---|
2138 | | - [CPU_SMT_DISABLED] = "off", |
---|
2139 | | - [CPU_SMT_FORCE_DISABLED] = "forceoff", |
---|
2140 | | - [CPU_SMT_NOT_SUPPORTED] = "notsupported", |
---|
2141 | | -}; |
---|
2142 | | - |
---|
2143 | 2652 | static ssize_t |
---|
2144 | | -show_smt_control(struct device *dev, struct device_attribute *attr, char *buf) |
---|
2145 | | -{ |
---|
2146 | | - return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]); |
---|
2147 | | -} |
---|
2148 | | - |
---|
2149 | | -static void cpuhp_offline_cpu_device(unsigned int cpu) |
---|
2150 | | -{ |
---|
2151 | | - struct device *dev = get_cpu_device(cpu); |
---|
2152 | | - |
---|
2153 | | - dev->offline = true; |
---|
2154 | | - /* Tell user space about the state change */ |
---|
2155 | | - kobject_uevent(&dev->kobj, KOBJ_OFFLINE); |
---|
2156 | | -} |
---|
2157 | | - |
---|
2158 | | -static void cpuhp_online_cpu_device(unsigned int cpu) |
---|
2159 | | -{ |
---|
2160 | | - struct device *dev = get_cpu_device(cpu); |
---|
2161 | | - |
---|
2162 | | - dev->offline = false; |
---|
2163 | | - /* Tell user space about the state change */ |
---|
2164 | | - kobject_uevent(&dev->kobj, KOBJ_ONLINE); |
---|
2165 | | -} |
---|
2166 | | - |
---|
2167 | | -int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) |
---|
2168 | | -{ |
---|
2169 | | - int cpu, ret = 0; |
---|
2170 | | - |
---|
2171 | | - cpu_maps_update_begin(); |
---|
2172 | | - for_each_online_cpu(cpu) { |
---|
2173 | | - if (topology_is_primary_thread(cpu)) |
---|
2174 | | - continue; |
---|
2175 | | - ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE); |
---|
2176 | | - if (ret) |
---|
2177 | | - break; |
---|
2178 | | - /* |
---|
2179 | | - * As this needs to hold the cpu maps lock it's impossible |
---|
2180 | | - * to call device_offline() because that ends up calling |
---|
2181 | | - * cpu_down() which takes cpu maps lock. cpu maps lock |
---|
2182 | | - * needs to be held as this might race against in kernel |
---|
2183 | | - * abusers of the hotplug machinery (thermal management). |
---|
2184 | | - * |
---|
2185 | | - * So nothing would update device:offline state. That would |
---|
2186 | | - * leave the sysfs entry stale and prevent onlining after |
---|
2187 | | - * smt control has been changed to 'off' again. This is |
---|
2188 | | - * called under the sysfs hotplug lock, so it is properly |
---|
2189 | | - * serialized against the regular offline usage. |
---|
2190 | | - */ |
---|
2191 | | - cpuhp_offline_cpu_device(cpu); |
---|
2192 | | - } |
---|
2193 | | - if (!ret) |
---|
2194 | | - cpu_smt_control = ctrlval; |
---|
2195 | | - cpu_maps_update_done(); |
---|
2196 | | - return ret; |
---|
2197 | | -} |
---|
2198 | | - |
---|
2199 | | -int cpuhp_smt_enable(void) |
---|
2200 | | -{ |
---|
2201 | | - int cpu, ret = 0; |
---|
2202 | | - |
---|
2203 | | - cpu_maps_update_begin(); |
---|
2204 | | - cpu_smt_control = CPU_SMT_ENABLED; |
---|
2205 | | - for_each_present_cpu(cpu) { |
---|
2206 | | - /* Skip online CPUs and CPUs on offline nodes */ |
---|
2207 | | - if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) |
---|
2208 | | - continue; |
---|
2209 | | - ret = _cpu_up(cpu, 0, CPUHP_ONLINE); |
---|
2210 | | - if (ret) |
---|
2211 | | - break; |
---|
2212 | | - /* See comment in cpuhp_smt_disable() */ |
---|
2213 | | - cpuhp_online_cpu_device(cpu); |
---|
2214 | | - } |
---|
2215 | | - cpu_maps_update_done(); |
---|
2216 | | - return ret; |
---|
2217 | | -} |
---|
2218 | | - |
---|
2219 | | -static ssize_t |
---|
2220 | | -store_smt_control(struct device *dev, struct device_attribute *attr, |
---|
2221 | | - const char *buf, size_t count) |
---|
| 2653 | +__store_smt_control(struct device *dev, struct device_attribute *attr, |
---|
| 2654 | + const char *buf, size_t count) |
---|
2222 | 2655 | { |
---|
2223 | 2656 | int ctrlval, ret; |
---|
2224 | 2657 | |
---|
.. | .. |
---|
2256 | 2689 | unlock_device_hotplug(); |
---|
2257 | 2690 | return ret ? ret : count; |
---|
2258 | 2691 | } |
---|
| 2692 | + |
---|
| 2693 | +#else /* !CONFIG_HOTPLUG_SMT */ |
---|
| 2694 | +static ssize_t |
---|
| 2695 | +__store_smt_control(struct device *dev, struct device_attribute *attr, |
---|
| 2696 | + const char *buf, size_t count) |
---|
| 2697 | +{ |
---|
| 2698 | + return -ENODEV; |
---|
| 2699 | +} |
---|
| 2700 | +#endif /* CONFIG_HOTPLUG_SMT */ |
---|
| 2701 | + |
---|
| 2702 | +static const char *smt_states[] = { |
---|
| 2703 | + [CPU_SMT_ENABLED] = "on", |
---|
| 2704 | + [CPU_SMT_DISABLED] = "off", |
---|
| 2705 | + [CPU_SMT_FORCE_DISABLED] = "forceoff", |
---|
| 2706 | + [CPU_SMT_NOT_SUPPORTED] = "notsupported", |
---|
| 2707 | + [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented", |
---|
| 2708 | +}; |
---|
| 2709 | + |
---|
| 2710 | +static ssize_t |
---|
| 2711 | +show_smt_control(struct device *dev, struct device_attribute *attr, char *buf) |
---|
| 2712 | +{ |
---|
| 2713 | + const char *state = smt_states[cpu_smt_control]; |
---|
| 2714 | + |
---|
| 2715 | + return snprintf(buf, PAGE_SIZE - 2, "%s\n", state); |
---|
| 2716 | +} |
---|
| 2717 | + |
---|
| 2718 | +static ssize_t |
---|
| 2719 | +store_smt_control(struct device *dev, struct device_attribute *attr, |
---|
| 2720 | + const char *buf, size_t count) |
---|
| 2721 | +{ |
---|
| 2722 | + return __store_smt_control(dev, attr, buf, count); |
---|
| 2723 | +} |
---|
2259 | 2724 | static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control); |
---|
2260 | 2725 | |
---|
2261 | 2726 | static ssize_t |
---|
2262 | 2727 | show_smt_active(struct device *dev, struct device_attribute *attr, char *buf) |
---|
2263 | 2728 | { |
---|
2264 | | - bool active = topology_max_smt_threads() > 1; |
---|
2265 | | - |
---|
2266 | | - return snprintf(buf, PAGE_SIZE - 2, "%d\n", active); |
---|
| 2729 | + return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active()); |
---|
2267 | 2730 | } |
---|
2268 | 2731 | static DEVICE_ATTR(active, 0444, show_smt_active, NULL); |
---|
2269 | 2732 | |
---|
.. | .. |
---|
2279 | 2742 | NULL |
---|
2280 | 2743 | }; |
---|
2281 | 2744 | |
---|
2282 | | -static int __init cpu_smt_state_init(void) |
---|
| 2745 | +static int __init cpu_smt_sysfs_init(void) |
---|
2283 | 2746 | { |
---|
2284 | 2747 | return sysfs_create_group(&cpu_subsys.dev_root->kobj, |
---|
2285 | 2748 | &cpuhp_smt_attr_group); |
---|
2286 | 2749 | } |
---|
2287 | 2750 | |
---|
2288 | | -#else |
---|
2289 | | -static inline int cpu_smt_state_init(void) { return 0; } |
---|
2290 | | -#endif |
---|
2291 | | - |
---|
2292 | 2751 | static int __init cpuhp_sysfs_init(void) |
---|
2293 | 2752 | { |
---|
2294 | 2753 | int cpu, ret; |
---|
2295 | 2754 | |
---|
2296 | | - ret = cpu_smt_state_init(); |
---|
| 2755 | + ret = cpu_smt_sysfs_init(); |
---|
2297 | 2756 | if (ret) |
---|
2298 | 2757 | return ret; |
---|
2299 | 2758 | |
---|
.. | .. |
---|
2314 | 2773 | return 0; |
---|
2315 | 2774 | } |
---|
2316 | 2775 | device_initcall(cpuhp_sysfs_init); |
---|
2317 | | -#endif |
---|
| 2776 | +#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */ |
---|
2318 | 2777 | |
---|
2319 | 2778 | /* |
---|
2320 | 2779 | * cpu_bit_bitmap[] is a special, "compressed" data structure that |
---|
.. | .. |
---|
2361 | 2820 | struct cpumask __cpu_active_mask __read_mostly; |
---|
2362 | 2821 | EXPORT_SYMBOL(__cpu_active_mask); |
---|
2363 | 2822 | |
---|
2364 | | -struct cpumask __cpu_isolated_mask __read_mostly; |
---|
2365 | | -EXPORT_SYMBOL(__cpu_isolated_mask); |
---|
| 2823 | +atomic_t __num_online_cpus __read_mostly; |
---|
| 2824 | +EXPORT_SYMBOL(__num_online_cpus); |
---|
2366 | 2825 | |
---|
2367 | 2826 | void init_cpu_present(const struct cpumask *src) |
---|
2368 | 2827 | { |
---|
.. | .. |
---|
2377 | 2836 | void init_cpu_online(const struct cpumask *src) |
---|
2378 | 2837 | { |
---|
2379 | 2838 | cpumask_copy(&__cpu_online_mask, src); |
---|
| 2839 | +} |
---|
| 2840 | + |
---|
| 2841 | +void set_cpu_online(unsigned int cpu, bool online) |
---|
| 2842 | +{ |
---|
| 2843 | + /* |
---|
| 2844 | + * atomic_inc/dec() is required to handle the horrid abuse of this |
---|
| 2845 | + * function by the reboot and kexec code which invoke it from |
---|
| 2846 | + * IPI/NMI broadcasts when shutting down CPUs. Invocation from |
---|
| 2847 | + * regular CPU hotplug is properly serialized. |
---|
| 2848 | + * |
---|
| 2849 | + * Note, that the fact that __num_online_cpus is of type atomic_t |
---|
| 2850 | + * does not protect readers which are not serialized against |
---|
| 2851 | + * concurrent hotplug operations. |
---|
| 2852 | + */ |
---|
| 2853 | + if (online) { |
---|
| 2854 | + if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask)) |
---|
| 2855 | + atomic_inc(&__num_online_cpus); |
---|
| 2856 | + } else { |
---|
| 2857 | + if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask)) |
---|
| 2858 | + atomic_dec(&__num_online_cpus); |
---|
| 2859 | + } |
---|
2380 | 2860 | } |
---|
2381 | 2861 | |
---|
2382 | 2862 | /* |
---|
.. | .. |
---|
2403 | 2883 | void __init boot_cpu_hotplug_init(void) |
---|
2404 | 2884 | { |
---|
2405 | 2885 | #ifdef CONFIG_SMP |
---|
2406 | | - this_cpu_write(cpuhp_state.booted_once, true); |
---|
| 2886 | + cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask); |
---|
2407 | 2887 | #endif |
---|
2408 | 2888 | this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); |
---|
2409 | 2889 | } |
---|