~hc/RK356X_SDK_RELEASE.git

..	..	@@ -10,6 +10,7 @@
10	10	#include <linux/notifier.h>
11	11	#include <linux/sched/signal.h>
12	12	#include <linux/sched/hotplug.h>
	13	+#include <linux/sched/isolation.h>
13	14	#include <linux/sched/task.h>
14	15	#include <linux/sched/smt.h>
15	16	#include <linux/unistd.h>
..	..	@@ -30,13 +31,21 @@
30	31	#include <linux/smpboot.h>
31	32	#include <linux/relay.h>
32	33	#include <linux/slab.h>
	34	+#include <linux/scs.h>
33	35	#include <linux/percpu-rwsem.h>
34	36	#include <linux/cpuset.h>
	37	+#include <linux/random.h>
	38	+#include <uapi/linux/sched/types.h>
35	39
36	40	#include <trace/events/power.h>
37	41	#define CREATE_TRACE_POINTS
38	42	#include <trace/events/cpuhp.h>
39	43
	44	+#undef CREATE_TRACE_POINTS
	45	+#include <trace/hooks/sched.h>
	46	+#include <trace/hooks/cpu.h>
	47	+
	48	+#include "sched/sched.h"
40	49	#include "smpboot.h"
41	50
42	51	/**
..	..	@@ -63,7 +72,6 @@
63	72	bool rollback;
64	73	bool single;
65	74	bool bringup;
66		- bool booted_once;
67	75	struct hlist_node *node;
68	76	struct hlist_node *last;
69	77	enum cpuhp_state cb_state;
..	..	@@ -76,6 +84,10 @@
76	84	static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
77	85	.fail = CPUHP_INVALID,
78	86	};
	87	+
	88	+#ifdef CONFIG_SMP
	89	+cpumask_t cpus_booted_once_mask;
	90	+#endif
79	91
80	92	#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
81	93	static struct lockdep_map cpuhp_state_up_map =
..	..	@@ -269,11 +281,13 @@
269	281	{
270	282	mutex_lock(&cpu_add_remove_lock);
271	283	}
	284	+EXPORT_SYMBOL_GPL(cpu_maps_update_begin);
272	285
273	286	void cpu_maps_update_done(void)
274	287	{
275	288	mutex_unlock(&cpu_add_remove_lock);
276	289	}
	290	+EXPORT_SYMBOL_GPL(cpu_maps_update_done);
277	291
278	292	/*
279	293	* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
..	..	@@ -327,6 +341,16 @@
327	341	percpu_rwsem_assert_held(&cpu_hotplug_lock);
328	342	}
329	343
	344	+static void lockdep_acquire_cpus_lock(void)
	345	+{
	346	+ rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
	347	+}
	348	+
	349	+static void lockdep_release_cpus_lock(void)
	350	+{
	351	+ rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
	352	+}
	353	+
330	354	/*
331	355	* Wait for currently running CPU hotplug operations to complete (if any) and
332	356	* disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
..	..	@@ -356,6 +380,17 @@
356	380	cpu_maps_update_done();
357	381	}
358	382	EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
	383	+
	384	+#else
	385	+
	386	+static void lockdep_acquire_cpus_lock(void)
	387	+{
	388	+}
	389	+
	390	+static void lockdep_release_cpus_lock(void)
	391	+{
	392	+}
	393	+
359	394	#endif /* CONFIG_HOTPLUG_CPU */
360	395
361	396	/*
..	..	@@ -369,8 +404,7 @@
369	404
370	405	void __init cpu_smt_disable(bool force)
371	406	{
372		- if (cpu_smt_control == CPU_SMT_FORCE_DISABLED \|\|
373		- cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
	407	+ if (!cpu_smt_possible())
374	408	return;
375	409
376	410	if (force) {
..	..	@@ -410,11 +444,19 @@
410	444	/*
411	445	* On x86 it's required to boot all logical CPUs at least once so
412	446	* that the init code can get a chance to set CR4.MCE on each
413		- * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
	447	+ * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
414	448	* core will shutdown the machine.
415	449	*/
416		- return !per_cpu(cpuhp_state, cpu).booted_once;
	450	+ return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
417	451	}
	452	+
	453	+/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
	454	+bool cpu_smt_possible(void)
	455	+{
	456	+ return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
	457	+ cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
	458	+}
	459	+EXPORT_SYMBOL_GPL(cpu_smt_possible);
418	460	#else
419	461	static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
420	462	#endif
..	..	@@ -501,7 +543,7 @@
501	543	/*
502	544	* SMT soft disabling on X86 requires to bring the CPU out of the
503	545	* BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
504		- * CPU marked itself as booted_once in cpu_notify_starting() so the
	546	+ * CPU marked itself as booted_once in notify_cpu_starting() so the
505	547	* cpu_smt_allowed() check will now return false if this is not the
506	548	* primary sibling.
507	549	*/
..	..	@@ -518,6 +560,12 @@
518	560	{
519	561	struct task_struct *idle = idle_thread_get(cpu);
520	562	int ret;
	563	+
	564	+ /*
	565	+ * Reset stale stack state from the last time this CPU was online.
	566	+ */
	567	+ scs_task_reset(idle);
	568	+ kasan_unpoison_task_stack(idle);
521	569
522	570	/*
523	571	* Some architectures have to walk the irq descriptors to
..	..	@@ -640,6 +688,12 @@
640	688	*/
641	689	smp_mb();
642	690
	691	+ /*
	692	+ * The BP holds the hotplug lock, but we're now running on the AP,
	693	+ * ensure that anybody asserting the lock is held, will actually find
	694	+ * it so.
	695	+ */
	696	+ lockdep_acquire_cpus_lock();
643	697	cpuhp_lock_acquire(bringup);
644	698
645	699	if (st->single) {
..	..	@@ -685,6 +739,7 @@
685	739	}
686	740
687	741	cpuhp_lock_release(bringup);
	742	+ lockdep_release_cpus_lock();
688	743
689	744	if (!st->should_run)
690	745	complete_ap_thread(st, bringup);
..	..	@@ -898,6 +953,8 @@
898	953
899	954	/* Give up timekeeping duties */
900	955	tick_handover_do_timer();
	956	+ /* Remove CPU from timer broadcasting */
	957	+ tick_offline_cpu(cpu);
901	958	/* Park the stopper thread */
902	959	stop_machine_park(cpu);
903	960	return 0;
..	..	@@ -1005,7 +1062,7 @@
1005	1062	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1006	1063	int prev_state, ret = 0;
1007	1064
1008		- if (num_online_cpus() == 1)
	1065	+ if (num_active_cpus() == 1 && cpu_active(cpu))
1009	1066	return -EBUSY;
1010	1067
1011	1068	if (!cpu_present(cpu))
..	..	@@ -1068,7 +1125,7 @@
1068	1125	return _cpu_down(cpu, 0, target);
1069	1126	}
1070	1127
1071		-static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
	1128	+static int cpu_down(unsigned int cpu, enum cpuhp_state target)
1072	1129	{
1073	1130	int err;
1074	1131
..	..	@@ -1078,11 +1135,315 @@
1078	1135	return err;
1079	1136	}
1080	1137
1081		-int cpu_down(unsigned int cpu)
	1138	+/**
	1139	+ * cpu_device_down - Bring down a cpu device
	1140	+ * @dev: Pointer to the cpu device to offline
	1141	+ *
	1142	+ * This function is meant to be used by device core cpu subsystem only.
	1143	+ *
	1144	+ * Other subsystems should use remove_cpu() instead.
	1145	+ */
	1146	+int cpu_device_down(struct device *dev)
1082	1147	{
1083		- return do_cpu_down(cpu, CPUHP_OFFLINE);
	1148	+ return cpu_down(dev->id, CPUHP_OFFLINE);
1084	1149	}
1085		-EXPORT_SYMBOL(cpu_down);
	1150	+
	1151	+int remove_cpu(unsigned int cpu)
	1152	+{
	1153	+ int ret;
	1154	+
	1155	+ lock_device_hotplug();
	1156	+ ret = device_offline(get_cpu_device(cpu));
	1157	+ unlock_device_hotplug();
	1158	+
	1159	+ return ret;
	1160	+}
	1161	+EXPORT_SYMBOL_GPL(remove_cpu);
	1162	+
	1163	+int __pause_drain_rq(struct cpumask *cpus)
	1164	+{
	1165	+ unsigned int cpu;
	1166	+ int err = 0;
	1167	+
	1168	+ /*
	1169	+ * Disabling preemption avoids that one of the stopper, started from
	1170	+ * sched_cpu_drain_rq(), blocks firing draining for the whole cpumask.
	1171	+ */
	1172	+ preempt_disable();
	1173	+ for_each_cpu(cpu, cpus) {
	1174	+ err = sched_cpu_drain_rq(cpu);
	1175	+ if (err)
	1176	+ break;
	1177	+ }
	1178	+ preempt_enable();
	1179	+
	1180	+ return err;
	1181	+}
	1182	+
	1183	+void __wait_drain_rq(struct cpumask *cpus)
	1184	+{
	1185	+ unsigned int cpu;
	1186	+
	1187	+ for_each_cpu(cpu, cpus)
	1188	+ sched_cpu_drain_rq_wait(cpu);
	1189	+}
	1190	+
	1191	+/* if rt task, set to cfs and return previous prio */
	1192	+static int pause_reduce_prio(void)
	1193	+{
	1194	+ int prev_prio = -1;
	1195	+
	1196	+ if (current->prio < MAX_RT_PRIO) {
	1197	+ struct sched_param param = { .sched_priority = 0 };
	1198	+
	1199	+ prev_prio = current->prio;
	1200	+ sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
	1201	+ }
	1202	+
	1203	+ return prev_prio;
	1204	+}
	1205	+
	1206	+/* if previous prio was set, restore */
	1207	+static void pause_restore_prio(int prev_prio)
	1208	+{
	1209	+ if (prev_prio >= 0 && prev_prio < MAX_RT_PRIO) {
	1210	+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1-prev_prio };
	1211	+
	1212	+ sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
	1213	+ }
	1214	+}
	1215	+
	1216	+int pause_cpus(struct cpumask *cpus)
	1217	+{
	1218	+ int err = 0;
	1219	+ int cpu;
	1220	+ u64 start_time = 0;
	1221	+ int prev_prio;
	1222	+
	1223	+ start_time = sched_clock();
	1224	+
	1225	+ cpu_maps_update_begin();
	1226	+
	1227	+ if (cpu_hotplug_disabled) {
	1228	+ err = -EBUSY;
	1229	+ goto err_cpu_maps_update;
	1230	+ }
	1231	+
	1232	+ /* Pausing an already inactive CPU isn't an error */
	1233	+ cpumask_and(cpus, cpus, cpu_active_mask);
	1234	+
	1235	+ for_each_cpu(cpu, cpus) {
	1236	+ if (!cpu_online(cpu) \|\| dl_bw_check_overflow(cpu) \|\|
	1237	+ get_cpu_device(cpu)->offline_disabled == true) {
	1238	+ err = -EBUSY;
	1239	+ goto err_cpu_maps_update;
	1240	+ }
	1241	+ }
	1242	+
	1243	+ if (cpumask_weight(cpus) >= num_active_cpus()) {
	1244	+ err = -EBUSY;
	1245	+ goto err_cpu_maps_update;
	1246	+ }
	1247	+
	1248	+ if (cpumask_empty(cpus))
	1249	+ goto err_cpu_maps_update;
	1250	+
	1251	+ /*
	1252	+ * Lazy migration:
	1253	+ *
	1254	+ * We do care about how fast a CPU can go idle and stay this in this
	1255	+ * state. If we try to take the cpus_write_lock() here, we would have
	1256	+ * to wait for a few dozens of ms, as this function might schedule.
	1257	+ * However, we can, as a first step, flip the active mask and migrate
	1258	+ * anything currently on the run-queue, to give a chance to the paused
	1259	+ * CPUs to reach quickly an idle state. There's a risk meanwhile for
	1260	+ * another CPU to observe an out-of-date active_mask or to incompletely
	1261	+ * update a cpuset. Both problems would be resolved later in the slow
	1262	+ * path, which ensures active_mask synchronization, triggers a cpuset
	1263	+ * rebuild and migrate any task that would have escaped the lazy
	1264	+ * migration.
	1265	+ */
	1266	+ for_each_cpu(cpu, cpus)
	1267	+ set_cpu_active(cpu, false);
	1268	+ err = __pause_drain_rq(cpus);
	1269	+ if (err) {
	1270	+ __wait_drain_rq(cpus);
	1271	+ for_each_cpu(cpu, cpus)
	1272	+ set_cpu_active(cpu, true);
	1273	+ goto err_cpu_maps_update;
	1274	+ }
	1275	+
	1276	+ prev_prio = pause_reduce_prio();
	1277	+
	1278	+ /*
	1279	+ * Slow path deactivation:
	1280	+ *
	1281	+ * Now that paused CPUs are most likely idle, we can go through a
	1282	+ * complete scheduler deactivation.
	1283	+ *
	1284	+ * The cpu_active_mask being already set and cpus_write_lock calling
	1285	+ * synchronize_rcu(), we know that all preempt-disabled and RCU users
	1286	+ * will observe the updated value.
	1287	+ */
	1288	+ cpus_write_lock();
	1289	+
	1290	+ __wait_drain_rq(cpus);
	1291	+
	1292	+ cpuhp_tasks_frozen = 0;
	1293	+
	1294	+ if (sched_cpus_deactivate_nosync(cpus)) {
	1295	+ err = -EBUSY;
	1296	+ goto err_cpus_write_unlock;
	1297	+ }
	1298	+
	1299	+ err = __pause_drain_rq(cpus);
	1300	+ __wait_drain_rq(cpus);
	1301	+ if (err) {
	1302	+ for_each_cpu(cpu, cpus)
	1303	+ sched_cpu_activate(cpu);
	1304	+ goto err_cpus_write_unlock;
	1305	+ }
	1306	+
	1307	+ /*
	1308	+ * Even if living on the side of the regular HP path, pause is using
	1309	+ * one of the HP step (CPUHP_AP_ACTIVE). This should be reflected on the
	1310	+ * current state of the CPU.
	1311	+ */
	1312	+ for_each_cpu(cpu, cpus) {
	1313	+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	1314	+
	1315	+ st->state = CPUHP_AP_ACTIVE - 1;
	1316	+ st->target = st->state;
	1317	+ }
	1318	+
	1319	+err_cpus_write_unlock:
	1320	+ cpus_write_unlock();
	1321	+ pause_restore_prio(prev_prio);
	1322	+err_cpu_maps_update:
	1323	+ cpu_maps_update_done();
	1324	+
	1325	+ trace_cpuhp_pause(cpus, start_time, 1);
	1326	+
	1327	+ return err;
	1328	+}
	1329	+EXPORT_SYMBOL_GPL(pause_cpus);
	1330	+
	1331	+int resume_cpus(struct cpumask *cpus)
	1332	+{
	1333	+ unsigned int cpu;
	1334	+ int err = 0;
	1335	+ u64 start_time = 0;
	1336	+ int prev_prio;
	1337	+
	1338	+ start_time = sched_clock();
	1339	+
	1340	+ cpu_maps_update_begin();
	1341	+
	1342	+ if (cpu_hotplug_disabled) {
	1343	+ err = -EBUSY;
	1344	+ goto err_cpu_maps_update;
	1345	+ }
	1346	+
	1347	+ /* Resuming an already active CPU isn't an error */
	1348	+ cpumask_andnot(cpus, cpus, cpu_active_mask);
	1349	+
	1350	+ for_each_cpu(cpu, cpus) {
	1351	+ if (!cpu_online(cpu)) {
	1352	+ err = -EBUSY;
	1353	+ goto err_cpu_maps_update;
	1354	+ }
	1355	+ }
	1356	+
	1357	+ if (cpumask_empty(cpus))
	1358	+ goto err_cpu_maps_update;
	1359	+
	1360	+ for_each_cpu(cpu, cpus)
	1361	+ set_cpu_active(cpu, true);
	1362	+
	1363	+ trace_android_rvh_resume_cpus(cpus, &err);
	1364	+ if (err)
	1365	+ goto err_cpu_maps_update;
	1366	+
	1367	+ prev_prio = pause_reduce_prio();
	1368	+
	1369	+ /* Lazy Resume. Build domains through schedule a workqueue on
	1370	+ * resuming cpu. This is so that the resuming cpu can work more
	1371	+ * early, and cannot add additional load to other busy cpu.
	1372	+ */
	1373	+ cpuset_update_active_cpus_affine(cpumask_first(cpus));
	1374	+
	1375	+ cpus_write_lock();
	1376	+
	1377	+ cpuhp_tasks_frozen = 0;
	1378	+
	1379	+ if (sched_cpus_activate(cpus)) {
	1380	+ err = -EBUSY;
	1381	+ goto err_cpus_write_unlock;
	1382	+ }
	1383	+
	1384	+ /*
	1385	+ * see pause_cpus.
	1386	+ */
	1387	+ for_each_cpu(cpu, cpus) {
	1388	+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	1389	+
	1390	+ st->state = CPUHP_ONLINE;
	1391	+ st->target = st->state;
	1392	+ }
	1393	+
	1394	+err_cpus_write_unlock:
	1395	+ cpus_write_unlock();
	1396	+ pause_restore_prio(prev_prio);
	1397	+err_cpu_maps_update:
	1398	+ cpu_maps_update_done();
	1399	+
	1400	+ trace_cpuhp_pause(cpus, start_time, 0);
	1401	+
	1402	+ return err;
	1403	+}
	1404	+EXPORT_SYMBOL_GPL(resume_cpus);
	1405	+
	1406	+void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
	1407	+{
	1408	+ unsigned int cpu;
	1409	+ int error;
	1410	+
	1411	+ cpu_maps_update_begin();
	1412	+
	1413	+ /*
	1414	+ * Make certain the cpu I'm about to reboot on is online.
	1415	+ *
	1416	+ * This is inline to what migrate_to_reboot_cpu() already do.
	1417	+ */
	1418	+ if (!cpu_online(primary_cpu))
	1419	+ primary_cpu = cpumask_first(cpu_online_mask);
	1420	+
	1421	+ for_each_online_cpu(cpu) {
	1422	+ if (cpu == primary_cpu)
	1423	+ continue;
	1424	+
	1425	+ error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
	1426	+ if (error) {
	1427	+ pr_err("Failed to offline CPU%d - error=%d",
	1428	+ cpu, error);
	1429	+ break;
	1430	+ }
	1431	+ }
	1432	+
	1433	+ /*
	1434	+ * Ensure all but the reboot CPU are offline.
	1435	+ */
	1436	+ BUG_ON(num_online_cpus() > 1);
	1437	+
	1438	+ /*
	1439	+ * Make sure the CPUs won't be enabled by someone else after this
	1440	+ * point. Kexec will reboot to a new kernel shortly resetting
	1441	+ * everything along the way.
	1442	+ */
	1443	+ cpu_hotplug_disabled++;
	1444	+
	1445	+ cpu_maps_update_done();
	1446	+}
1086	1447
1087	1448	#else
1088	1449	#define takedown_cpu NULL
..	..	@@ -1102,7 +1463,7 @@
1102	1463	int ret;
1103	1464
1104	1465	rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
1105		- st->booted_once = true;
	1466	+ cpumask_set_cpu(cpu, &cpus_booted_once_mask);
1106	1467	while (st->state < target) {
1107	1468	st->state++;
1108	1469	ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
..	..	@@ -1136,6 +1497,25 @@
1136	1497	complete_ap_thread(st, true);
1137	1498	}
1138	1499
	1500	+static int switch_to_rt_policy(void)
	1501	+{
	1502	+ struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
	1503	+ unsigned int policy = current->policy;
	1504	+
	1505	+ if (policy == SCHED_NORMAL)
	1506	+ /* Switch to SCHED_FIFO from SCHED_NORMAL. */
	1507	+ return sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
	1508	+ else
	1509	+ return 1;
	1510	+}
	1511	+
	1512	+static int switch_to_fair_policy(void)
	1513	+{
	1514	+ struct sched_param param = { .sched_priority = 0 };
	1515	+
	1516	+ return sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
	1517	+}
	1518	+
1139	1519	/* Requires cpu_add_remove_lock to be held */
1140	1520	static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1141	1521	{
..	..	@@ -1151,8 +1531,8 @@
1151	1531	}
1152	1532
1153	1533	/*
1154		- * The caller of do_cpu_up might have raced with another
1155		- * caller. Ignore it for now.
	1534	+ * The caller of cpu_up() might have raced with another
	1535	+ * caller. Nothing to do.
1156	1536	*/
1157	1537	if (st->state >= target)
1158	1538	goto out;
..	..	@@ -1197,9 +1577,10 @@
1197	1577	return ret;
1198	1578	}
1199	1579
1200		-static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
	1580	+static int cpu_up(unsigned int cpu, enum cpuhp_state target)
1201	1581	{
1202	1582	int err = 0;
	1583	+ int switch_err;
1203	1584
1204	1585	if (!cpu_possible(cpu)) {
1205	1586	pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
..	..	@@ -1210,9 +1591,23 @@
1210	1591	return -EINVAL;
1211	1592	}
1212	1593
	1594	+ trace_android_vh_cpu_up(cpu);
	1595	+
	1596	+ /*
	1597	+ * CPU hotplug operations consists of many steps and each step
	1598	+ * calls a callback of core kernel subsystem. CPU hotplug-in
	1599	+ * operation may get preempted by other CFS tasks and whole
	1600	+ * operation of cpu hotplug in CPU gets delayed. Switch the
	1601	+ * current task to SCHED_FIFO from SCHED_NORMAL, so that
	1602	+ * hotplug in operation may complete quickly in heavy loaded
	1603	+ * conditions and new CPU will start handle the workload.
	1604	+ */
	1605	+
	1606	+ switch_err = switch_to_rt_policy();
	1607	+
1213	1608	err = try_online_node(cpu_to_node(cpu));
1214	1609	if (err)
1215		- return err;
	1610	+ goto switch_out;
1216	1611
1217	1612	cpu_maps_update_begin();
1218	1613
..	..	@@ -1228,14 +1623,76 @@
1228	1623	err = _cpu_up(cpu, 0, target);
1229	1624	out:
1230	1625	cpu_maps_update_done();
	1626	+switch_out:
	1627	+ if (!switch_err) {
	1628	+ switch_err = switch_to_fair_policy();
	1629	+ if (switch_err)
	1630	+ pr_err("Hotplug policy switch err=%d Task %s pid=%d\n",
	1631	+ switch_err, current->comm, current->pid);
	1632	+ }
	1633	+
1231	1634	return err;
1232	1635	}
1233	1636
1234		-int cpu_up(unsigned int cpu)
	1637	+/**
	1638	+ * cpu_device_up - Bring up a cpu device
	1639	+ * @dev: Pointer to the cpu device to online
	1640	+ *
	1641	+ * This function is meant to be used by device core cpu subsystem only.
	1642	+ *
	1643	+ * Other subsystems should use add_cpu() instead.
	1644	+ */
	1645	+int cpu_device_up(struct device *dev)
1235	1646	{
1236		- return do_cpu_up(cpu, CPUHP_ONLINE);
	1647	+ return cpu_up(dev->id, CPUHP_ONLINE);
1237	1648	}
1238		-EXPORT_SYMBOL_GPL(cpu_up);
	1649	+
	1650	+int add_cpu(unsigned int cpu)
	1651	+{
	1652	+ int ret;
	1653	+
	1654	+ lock_device_hotplug();
	1655	+ ret = device_online(get_cpu_device(cpu));
	1656	+ unlock_device_hotplug();
	1657	+
	1658	+ return ret;
	1659	+}
	1660	+EXPORT_SYMBOL_GPL(add_cpu);
	1661	+
	1662	+/**
	1663	+ * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
	1664	+ * @sleep_cpu: The cpu we hibernated on and should be brought up.
	1665	+ *
	1666	+ * On some architectures like arm64, we can hibernate on any CPU, but on
	1667	+ * wake up the CPU we hibernated on might be offline as a side effect of
	1668	+ * using maxcpus= for example.
	1669	+ */
	1670	+int bringup_hibernate_cpu(unsigned int sleep_cpu)
	1671	+{
	1672	+ int ret;
	1673	+
	1674	+ if (!cpu_online(sleep_cpu)) {
	1675	+ pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
	1676	+ ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
	1677	+ if (ret) {
	1678	+ pr_err("Failed to bring hibernate-CPU up!\n");
	1679	+ return ret;
	1680	+ }
	1681	+ }
	1682	+ return 0;
	1683	+}
	1684	+
	1685	+void bringup_nonboot_cpus(unsigned int setup_max_cpus)
	1686	+{
	1687	+ unsigned int cpu;
	1688	+
	1689	+ for_each_present_cpu(cpu) {
	1690	+ if (num_online_cpus() >= setup_max_cpus)
	1691	+ break;
	1692	+ if (!cpu_online(cpu))
	1693	+ cpu_up(cpu, CPUHP_ONLINE);
	1694	+ }
	1695	+}
1239	1696
1240	1697	#ifdef CONFIG_PM_SLEEP_SMP
1241	1698	static cpumask_var_t frozen_cpus;
..	..	@@ -1245,8 +1702,15 @@
1245	1702	int cpu, error = 0;
1246	1703
1247	1704	cpu_maps_update_begin();
1248		- if (!cpu_online(primary))
	1705	+ if (primary == -1) {
1249	1706	primary = cpumask_first(cpu_online_mask);
	1707	+ if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
	1708	+ primary = housekeeping_any_cpu(HK_FLAG_TIMER);
	1709	+ } else {
	1710	+ if (!cpu_online(primary))
	1711	+ primary = cpumask_first(cpu_online_mask);
	1712	+ }
	1713	+
1250	1714	/*
1251	1715	* We take down all of the non-boot CPUs in one shot to avoid races
1252	1716	* with the userspace trying to use the CPU hotplug at the same time
..	..	@@ -1257,6 +1721,13 @@
1257	1721	for_each_online_cpu(cpu) {
1258	1722	if (cpu == primary)
1259	1723	continue;
	1724	+
	1725	+ if (pm_wakeup_pending()) {
	1726	+ pr_info("Wakeup pending. Abort CPU freeze\n");
	1727	+ error = -EBUSY;
	1728	+ break;
	1729	+ }
	1730	+
1260	1731	trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1261	1732	error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1262	1733	trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
..	..	@@ -1275,8 +1746,8 @@
1275	1746
1276	1747	/*
1277	1748	* Make sure the CPUs won't be enabled by someone else. We need to do
1278		- * this even in case of failure as all disable_nonboot_cpus() users are
1279		- * supposed to do enable_nonboot_cpus() on the failure path.
	1749	+ * this even in case of failure as all freeze_secondary_cpus() users are
	1750	+ * supposed to do thaw_secondary_cpus() on the failure path.
1280	1751	*/
1281	1752	cpu_hotplug_disabled++;
1282	1753
..	..	@@ -1284,15 +1755,15 @@
1284	1755	return error;
1285	1756	}
1286	1757
1287		-void __weak arch_enable_nonboot_cpus_begin(void)
	1758	+void __weak arch_thaw_secondary_cpus_begin(void)
1288	1759	{
1289	1760	}
1290	1761
1291		-void __weak arch_enable_nonboot_cpus_end(void)
	1762	+void __weak arch_thaw_secondary_cpus_end(void)
1292	1763	{
1293	1764	}
1294	1765
1295		-void enable_nonboot_cpus(void)
	1766	+void thaw_secondary_cpus(void)
1296	1767	{
1297	1768	int cpu, error;
1298	1769	struct device *cpu_device;
..	..	@@ -1305,7 +1776,7 @@
1305	1776
1306	1777	pr_info("Enabling non-boot CPUs ...\n");
1307	1778
1308		- arch_enable_nonboot_cpus_begin();
	1779	+ arch_thaw_secondary_cpus_begin();
1309	1780
1310	1781	for_each_cpu(cpu, frozen_cpus) {
1311	1782	trace_suspend_resume(TPS("CPU_ON"), cpu, true);
..	..	@@ -1324,7 +1795,7 @@
1324	1795	pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1325	1796	}
1326	1797
1327		- arch_enable_nonboot_cpus_end();
	1798	+ arch_thaw_secondary_cpus_end();
1328	1799
1329	1800	cpumask_clear(frozen_cpus);
1330	1801	out:
..	..	@@ -1390,6 +1861,22 @@
1390	1861
1391	1862	int __boot_cpu_id;
1392	1863
	1864	+/* Horrific hacks because we can't add more to cpuhp_hp_states. */
	1865	+static int random_and_perf_prepare_fusion(unsigned int cpu)
	1866	+{
	1867	+#ifdef CONFIG_PERF_EVENTS
	1868	+ perf_event_init_cpu(cpu);
	1869	+#endif
	1870	+ random_prepare_cpu(cpu);
	1871	+ return 0;
	1872	+}
	1873	+static int random_and_workqueue_online_fusion(unsigned int cpu)
	1874	+{
	1875	+ workqueue_online_cpu(cpu);
	1876	+ random_online_cpu(cpu);
	1877	+ return 0;
	1878	+}
	1879	+
1393	1880	#endif /* CONFIG_SMP */
1394	1881
1395	1882	/* Boot processor state steps */
..	..	@@ -1408,7 +1895,7 @@
1408	1895	},
1409	1896	[CPUHP_PERF_PREPARE] = {
1410	1897	.name = "perf:prepare",
1411		- .startup.single = perf_event_init_cpu,
	1898	+ .startup.single = random_and_perf_prepare_fusion,
1412	1899	.teardown.single = perf_event_exit_cpu,
1413	1900	},
1414	1901	[CPUHP_WORKQUEUE_PREP] = {
..	..	@@ -1524,7 +2011,7 @@
1524	2011	},
1525	2012	[CPUHP_AP_WORKQUEUE_ONLINE] = {
1526	2013	.name = "workqueue:online",
1527		- .startup.single = workqueue_online_cpu,
	2014	+ .startup.single = random_and_workqueue_online_fusion,
1528	2015	.teardown.single = workqueue_offline_cpu,
1529	2016	},
1530	2017	[CPUHP_AP_RCUTREE_ONLINE] = {
..	..	@@ -1935,6 +2422,78 @@
1935	2422	}
1936	2423	EXPORT_SYMBOL(__cpuhp_remove_state);
1937	2424
	2425	+#ifdef CONFIG_HOTPLUG_SMT
	2426	+static void cpuhp_offline_cpu_device(unsigned int cpu)
	2427	+{
	2428	+ struct device *dev = get_cpu_device(cpu);
	2429	+
	2430	+ dev->offline = true;
	2431	+ /* Tell user space about the state change */
	2432	+ kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
	2433	+}
	2434	+
	2435	+static void cpuhp_online_cpu_device(unsigned int cpu)
	2436	+{
	2437	+ struct device *dev = get_cpu_device(cpu);
	2438	+
	2439	+ dev->offline = false;
	2440	+ /* Tell user space about the state change */
	2441	+ kobject_uevent(&dev->kobj, KOBJ_ONLINE);
	2442	+}
	2443	+
	2444	+int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
	2445	+{
	2446	+ int cpu, ret = 0;
	2447	+
	2448	+ cpu_maps_update_begin();
	2449	+ for_each_online_cpu(cpu) {
	2450	+ if (topology_is_primary_thread(cpu))
	2451	+ continue;
	2452	+ ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
	2453	+ if (ret)
	2454	+ break;
	2455	+ /*
	2456	+ * As this needs to hold the cpu maps lock it's impossible
	2457	+ * to call device_offline() because that ends up calling
	2458	+ * cpu_down() which takes cpu maps lock. cpu maps lock
	2459	+ * needs to be held as this might race against in kernel
	2460	+ * abusers of the hotplug machinery (thermal management).
	2461	+ *
	2462	+ * So nothing would update device:offline state. That would
	2463	+ * leave the sysfs entry stale and prevent onlining after
	2464	+ * smt control has been changed to 'off' again. This is
	2465	+ * called under the sysfs hotplug lock, so it is properly
	2466	+ * serialized against the regular offline usage.
	2467	+ */
	2468	+ cpuhp_offline_cpu_device(cpu);
	2469	+ }
	2470	+ if (!ret)
	2471	+ cpu_smt_control = ctrlval;
	2472	+ cpu_maps_update_done();
	2473	+ return ret;
	2474	+}
	2475	+
	2476	+int cpuhp_smt_enable(void)
	2477	+{
	2478	+ int cpu, ret = 0;
	2479	+
	2480	+ cpu_maps_update_begin();
	2481	+ cpu_smt_control = CPU_SMT_ENABLED;
	2482	+ for_each_present_cpu(cpu) {
	2483	+ /* Skip online CPUs and CPUs on offline nodes */
	2484	+ if (cpu_online(cpu) \|\| !node_online(cpu_to_node(cpu)))
	2485	+ continue;
	2486	+ ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
	2487	+ if (ret)
	2488	+ break;
	2489	+ /* See comment in cpuhp_smt_disable() */
	2490	+ cpuhp_online_cpu_device(cpu);
	2491	+ }
	2492	+ cpu_maps_update_done();
	2493	+ return ret;
	2494	+}
	2495	+#endif
	2496	+
1938	2497	#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
1939	2498	static ssize_t show_cpuhp_state(struct device *dev,
1940	2499	struct device_attribute attr, char buf)
..	..	@@ -1977,9 +2536,11 @@
1977	2536	goto out;
1978	2537
1979	2538	if (st->state < target)
1980		- ret = do_cpu_up(dev->id, target);
1981		- else
1982		- ret = do_cpu_down(dev->id, target);
	2539	+ ret = cpu_up(dev->id, target);
	2540	+ else if (st->state > target)
	2541	+ ret = cpu_down(dev->id, target);
	2542	+ else if (WARN_ON(st->target != target))
	2543	+ st->target = target;
1983	2544	out:
1984	2545	unlock_device_hotplug();
1985	2546	return ret ? ret : count;
..	..	@@ -2089,92 +2650,9 @@
2089	2650
2090	2651	#ifdef CONFIG_HOTPLUG_SMT
2091	2652
2092		-static const char *smt_states[] = {
2093		- [CPU_SMT_ENABLED] = "on",
2094		- [CPU_SMT_DISABLED] = "off",
2095		- [CPU_SMT_FORCE_DISABLED] = "forceoff",
2096		- [CPU_SMT_NOT_SUPPORTED] = "notsupported",
2097		-};
2098		-
2099	2653	static ssize_t
2100		-show_smt_control(struct device dev, struct device_attribute attr, char *buf)
2101		-{
2102		- return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]);
2103		-}
2104		-
2105		-static void cpuhp_offline_cpu_device(unsigned int cpu)
2106		-{
2107		- struct device *dev = get_cpu_device(cpu);
2108		-
2109		- dev->offline = true;
2110		- /* Tell user space about the state change */
2111		- kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2112		-}
2113		-
2114		-static void cpuhp_online_cpu_device(unsigned int cpu)
2115		-{
2116		- struct device *dev = get_cpu_device(cpu);
2117		-
2118		- dev->offline = false;
2119		- /* Tell user space about the state change */
2120		- kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2121		-}
2122		-
2123		-int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2124		-{
2125		- int cpu, ret = 0;
2126		-
2127		- cpu_maps_update_begin();
2128		- for_each_online_cpu(cpu) {
2129		- if (topology_is_primary_thread(cpu))
2130		- continue;
2131		- ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2132		- if (ret)
2133		- break;
2134		- /*
2135		- * As this needs to hold the cpu maps lock it's impossible
2136		- * to call device_offline() because that ends up calling
2137		- * cpu_down() which takes cpu maps lock. cpu maps lock
2138		- * needs to be held as this might race against in kernel
2139		- * abusers of the hotplug machinery (thermal management).
2140		- *
2141		- * So nothing would update device:offline state. That would
2142		- * leave the sysfs entry stale and prevent onlining after
2143		- * smt control has been changed to 'off' again. This is
2144		- * called under the sysfs hotplug lock, so it is properly
2145		- * serialized against the regular offline usage.
2146		- */
2147		- cpuhp_offline_cpu_device(cpu);
2148		- }
2149		- if (!ret)
2150		- cpu_smt_control = ctrlval;
2151		- cpu_maps_update_done();
2152		- return ret;
2153		-}
2154		-
2155		-int cpuhp_smt_enable(void)
2156		-{
2157		- int cpu, ret = 0;
2158		-
2159		- cpu_maps_update_begin();
2160		- cpu_smt_control = CPU_SMT_ENABLED;
2161		- for_each_present_cpu(cpu) {
2162		- /* Skip online CPUs and CPUs on offline nodes */
2163		- if (cpu_online(cpu) \|\| !node_online(cpu_to_node(cpu)))
2164		- continue;
2165		- ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2166		- if (ret)
2167		- break;
2168		- /* See comment in cpuhp_smt_disable() */
2169		- cpuhp_online_cpu_device(cpu);
2170		- }
2171		- cpu_maps_update_done();
2172		- return ret;
2173		-}
2174		-
2175		-static ssize_t
2176		-store_smt_control(struct device dev, struct device_attribute attr,
2177		- const char *buf, size_t count)
	2654	+__store_smt_control(struct device dev, struct device_attribute attr,
	2655	+ const char *buf, size_t count)
2178	2656	{
2179	2657	int ctrlval, ret;
2180	2658
..	..	@@ -2212,14 +2690,44 @@
2212	2690	unlock_device_hotplug();
2213	2691	return ret ? ret : count;
2214	2692	}
	2693	+
	2694	+#else /* !CONFIG_HOTPLUG_SMT */
	2695	+static ssize_t
	2696	+__store_smt_control(struct device dev, struct device_attribute attr,
	2697	+ const char *buf, size_t count)
	2698	+{
	2699	+ return -ENODEV;
	2700	+}
	2701	+#endif /* CONFIG_HOTPLUG_SMT */
	2702	+
	2703	+static const char *smt_states[] = {
	2704	+ [CPU_SMT_ENABLED] = "on",
	2705	+ [CPU_SMT_DISABLED] = "off",
	2706	+ [CPU_SMT_FORCE_DISABLED] = "forceoff",
	2707	+ [CPU_SMT_NOT_SUPPORTED] = "notsupported",
	2708	+ [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
	2709	+};
	2710	+
	2711	+static ssize_t
	2712	+show_smt_control(struct device dev, struct device_attribute attr, char *buf)
	2713	+{
	2714	+ const char *state = smt_states[cpu_smt_control];
	2715	+
	2716	+ return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
	2717	+}
	2718	+
	2719	+static ssize_t
	2720	+store_smt_control(struct device dev, struct device_attribute attr,
	2721	+ const char *buf, size_t count)
	2722	+{
	2723	+ return __store_smt_control(dev, attr, buf, count);
	2724	+}
2215	2725	static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
2216	2726
2217	2727	static ssize_t
2218	2728	show_smt_active(struct device dev, struct device_attribute attr, char *buf)
2219	2729	{
2220		- bool active = topology_max_smt_threads() > 1;
2221		-
2222		- return snprintf(buf, PAGE_SIZE - 2, "%d\n", active);
	2730	+ return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
2223	2731	}
2224	2732	static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
2225	2733
..	..	@@ -2235,21 +2743,17 @@
2235	2743	NULL
2236	2744	};
2237	2745
2238		-static int __init cpu_smt_state_init(void)
	2746	+static int __init cpu_smt_sysfs_init(void)
2239	2747	{
2240	2748	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
2241	2749	&cpuhp_smt_attr_group);
2242	2750	}
2243	2751
2244		-#else
2245		-static inline int cpu_smt_state_init(void) { return 0; }
2246		-#endif
2247		-
2248	2752	static int __init cpuhp_sysfs_init(void)
2249	2753	{
2250	2754	int cpu, ret;
2251	2755
2252		- ret = cpu_smt_state_init();
	2756	+ ret = cpu_smt_sysfs_init();
2253	2757	if (ret)
2254	2758	return ret;
2255	2759
..	..	@@ -2270,7 +2774,7 @@
2270	2774	return 0;
2271	2775	}
2272	2776	device_initcall(cpuhp_sysfs_init);
2273		-#endif
	2777	+#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
2274	2778
2275	2779	/*
2276	2780	* cpu_bit_bitmap[] is a special, "compressed" data structure that
..	..	@@ -2317,8 +2821,8 @@
2317	2821	struct cpumask __cpu_active_mask __read_mostly;
2318	2822	EXPORT_SYMBOL(__cpu_active_mask);
2319	2823
2320		-struct cpumask __cpu_isolated_mask __read_mostly;
2321		-EXPORT_SYMBOL(__cpu_isolated_mask);
	2824	+atomic_t __num_online_cpus __read_mostly;
	2825	+EXPORT_SYMBOL(__num_online_cpus);
2322	2826
2323	2827	void init_cpu_present(const struct cpumask *src)
2324	2828	{
..	..	@@ -2333,6 +2837,27 @@
2333	2837	void init_cpu_online(const struct cpumask *src)
2334	2838	{
2335	2839	cpumask_copy(&__cpu_online_mask, src);
	2840	+}
	2841	+
	2842	+void set_cpu_online(unsigned int cpu, bool online)
	2843	+{
	2844	+ /*
	2845	+ * atomic_inc/dec() is required to handle the horrid abuse of this
	2846	+ * function by the reboot and kexec code which invoke it from
	2847	+ * IPI/NMI broadcasts when shutting down CPUs. Invocation from
	2848	+ * regular CPU hotplug is properly serialized.
	2849	+ *
	2850	+ * Note, that the fact that __num_online_cpus is of type atomic_t
	2851	+ * does not protect readers which are not serialized against
	2852	+ * concurrent hotplug operations.
	2853	+ */
	2854	+ if (online) {
	2855	+ if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
	2856	+ atomic_inc(&__num_online_cpus);
	2857	+ } else {
	2858	+ if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
	2859	+ atomic_dec(&__num_online_cpus);
	2860	+ }
2336	2861	}
2337	2862
2338	2863	/*
..	..	@@ -2359,7 +2884,7 @@
2359	2884	void __init boot_cpu_hotplug_init(void)
2360	2885	{
2361	2886	#ifdef CONFIG_SMP
2362		- this_cpu_write(cpuhp_state.booted_once, true);
	2887	+ cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
2363	2888	#endif
2364	2889	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2365	2890	}