~hc/RK356X_SDK_RELEASE.git

..	..	@@ -10,6 +10,7 @@
10	10	#include <linux/notifier.h>
11	11	#include <linux/sched/signal.h>
12	12	#include <linux/sched/hotplug.h>
	13	+#include <linux/sched/isolation.h>
13	14	#include <linux/sched/task.h>
14	15	#include <linux/sched/smt.h>
15	16	#include <linux/unistd.h>
..	..	@@ -30,12 +31,19 @@
30	31	#include <linux/smpboot.h>
31	32	#include <linux/relay.h>
32	33	#include <linux/slab.h>
	34	+#include <linux/scs.h>
33	35	#include <linux/percpu-rwsem.h>
34	36	#include <linux/cpuset.h>
	37	+#include <linux/random.h>
	38	+#include <uapi/linux/sched/types.h>
35	39
36	40	#include <trace/events/power.h>
37	41	#define CREATE_TRACE_POINTS
38	42	#include <trace/events/cpuhp.h>
	43	+
	44	+#undef CREATE_TRACE_POINTS
	45	+#include <trace/hooks/sched.h>
	46	+#include <trace/hooks/cpu.h>
39	47
40	48	#include "smpboot.h"
41	49
..	..	@@ -63,7 +71,6 @@
63	71	bool rollback;
64	72	bool single;
65	73	bool bringup;
66		- bool booted_once;
67	74	struct hlist_node *node;
68	75	struct hlist_node *last;
69	76	enum cpuhp_state cb_state;
..	..	@@ -76,6 +83,10 @@
76	83	static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
77	84	.fail = CPUHP_INVALID,
78	85	};
	86	+
	87	+#ifdef CONFIG_SMP
	88	+cpumask_t cpus_booted_once_mask;
	89	+#endif
79	90
80	91	#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
81	92	static struct lockdep_map cpuhp_state_up_map =
..	..	@@ -269,11 +280,13 @@
269	280	{
270	281	mutex_lock(&cpu_add_remove_lock);
271	282	}
	283	+EXPORT_SYMBOL_GPL(cpu_maps_update_begin);
272	284
273	285	void cpu_maps_update_done(void)
274	286	{
275	287	mutex_unlock(&cpu_add_remove_lock);
276	288	}
	289	+EXPORT_SYMBOL_GPL(cpu_maps_update_done);
277	290
278	291	/*
279	292	* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
..	..	@@ -327,6 +340,16 @@
327	340	percpu_rwsem_assert_held(&cpu_hotplug_lock);
328	341	}
329	342
	343	+static void lockdep_acquire_cpus_lock(void)
	344	+{
	345	+ rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
	346	+}
	347	+
	348	+static void lockdep_release_cpus_lock(void)
	349	+{
	350	+ rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
	351	+}
	352	+
330	353	/*
331	354	* Wait for currently running CPU hotplug operations to complete (if any) and
332	355	* disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
..	..	@@ -356,6 +379,17 @@
356	379	cpu_maps_update_done();
357	380	}
358	381	EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
	382	+
	383	+#else
	384	+
	385	+static void lockdep_acquire_cpus_lock(void)
	386	+{
	387	+}
	388	+
	389	+static void lockdep_release_cpus_lock(void)
	390	+{
	391	+}
	392	+
359	393	#endif /* CONFIG_HOTPLUG_CPU */
360	394
361	395	/*
..	..	@@ -369,8 +403,7 @@
369	403
370	404	void __init cpu_smt_disable(bool force)
371	405	{
372		- if (cpu_smt_control == CPU_SMT_FORCE_DISABLED \|\|
373		- cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
	406	+ if (!cpu_smt_possible())
374	407	return;
375	408
376	409	if (force) {
..	..	@@ -410,11 +443,19 @@
410	443	/*
411	444	* On x86 it's required to boot all logical CPUs at least once so
412	445	* that the init code can get a chance to set CR4.MCE on each
413		- * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
	446	+ * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
414	447	* core will shutdown the machine.
415	448	*/
416		- return !per_cpu(cpuhp_state, cpu).booted_once;
	449	+ return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
417	450	}
	451	+
	452	+/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
	453	+bool cpu_smt_possible(void)
	454	+{
	455	+ return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
	456	+ cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
	457	+}
	458	+EXPORT_SYMBOL_GPL(cpu_smt_possible);
418	459	#else
419	460	static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
420	461	#endif
..	..	@@ -501,7 +542,7 @@
501	542	/*
502	543	* SMT soft disabling on X86 requires to bring the CPU out of the
503	544	* BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
504		- * CPU marked itself as booted_once in cpu_notify_starting() so the
	545	+ * CPU marked itself as booted_once in notify_cpu_starting() so the
505	546	* cpu_smt_allowed() check will now return false if this is not the
506	547	* primary sibling.
507	548	*/
..	..	@@ -518,6 +559,12 @@
518	559	{
519	560	struct task_struct *idle = idle_thread_get(cpu);
520	561	int ret;
	562	+
	563	+ /*
	564	+ * Reset stale stack state from the last time this CPU was online.
	565	+ */
	566	+ scs_task_reset(idle);
	567	+ kasan_unpoison_task_stack(idle);
521	568
522	569	/*
523	570	* Some architectures have to walk the irq descriptors to
..	..	@@ -640,6 +687,12 @@
640	687	*/
641	688	smp_mb();
642	689
	690	+ /*
	691	+ * The BP holds the hotplug lock, but we're now running on the AP,
	692	+ * ensure that anybody asserting the lock is held, will actually find
	693	+ * it so.
	694	+ */
	695	+ lockdep_acquire_cpus_lock();
643	696	cpuhp_lock_acquire(bringup);
644	697
645	698	if (st->single) {
..	..	@@ -685,6 +738,7 @@
685	738	}
686	739
687	740	cpuhp_lock_release(bringup);
	741	+ lockdep_release_cpus_lock();
688	742
689	743	if (!st->should_run)
690	744	complete_ap_thread(st, bringup);
..	..	@@ -898,6 +952,8 @@
898	952
899	953	/* Give up timekeeping duties */
900	954	tick_handover_do_timer();
	955	+ /* Remove CPU from timer broadcasting */
	956	+ tick_offline_cpu(cpu);
901	957	/* Park the stopper thread */
902	958	stop_machine_park(cpu);
903	959	return 0;
..	..	@@ -1005,7 +1061,7 @@
1005	1061	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1006	1062	int prev_state, ret = 0;
1007	1063
1008		- if (num_online_cpus() == 1)
	1064	+ if (num_active_cpus() == 1 && cpu_active(cpu))
1009	1065	return -EBUSY;
1010	1066
1011	1067	if (!cpu_present(cpu))
..	..	@@ -1068,7 +1124,7 @@
1068	1124	return _cpu_down(cpu, 0, target);
1069	1125	}
1070	1126
1071		-static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
	1127	+static int cpu_down(unsigned int cpu, enum cpuhp_state target)
1072	1128	{
1073	1129	int err;
1074	1130
..	..	@@ -1078,11 +1134,317 @@
1078	1134	return err;
1079	1135	}
1080	1136
1081		-int cpu_down(unsigned int cpu)
	1137	+/**
	1138	+ * cpu_device_down - Bring down a cpu device
	1139	+ * @dev: Pointer to the cpu device to offline
	1140	+ *
	1141	+ * This function is meant to be used by device core cpu subsystem only.
	1142	+ *
	1143	+ * Other subsystems should use remove_cpu() instead.
	1144	+ */
	1145	+int cpu_device_down(struct device *dev)
1082	1146	{
1083		- return do_cpu_down(cpu, CPUHP_OFFLINE);
	1147	+ return cpu_down(dev->id, CPUHP_OFFLINE);
1084	1148	}
1085		-EXPORT_SYMBOL(cpu_down);
	1149	+
	1150	+int remove_cpu(unsigned int cpu)
	1151	+{
	1152	+ int ret;
	1153	+
	1154	+ lock_device_hotplug();
	1155	+ ret = device_offline(get_cpu_device(cpu));
	1156	+ unlock_device_hotplug();
	1157	+
	1158	+ return ret;
	1159	+}
	1160	+EXPORT_SYMBOL_GPL(remove_cpu);
	1161	+
	1162	+extern int dl_cpu_busy(int cpu, struct task_struct *p);
	1163	+
	1164	+int __pause_drain_rq(struct cpumask *cpus)
	1165	+{
	1166	+ unsigned int cpu;
	1167	+ int err = 0;
	1168	+
	1169	+ /*
	1170	+ * Disabling preemption avoids that one of the stopper, started from
	1171	+ * sched_cpu_drain_rq(), blocks firing draining for the whole cpumask.
	1172	+ */
	1173	+ preempt_disable();
	1174	+ for_each_cpu(cpu, cpus) {
	1175	+ err = sched_cpu_drain_rq(cpu);
	1176	+ if (err)
	1177	+ break;
	1178	+ }
	1179	+ preempt_enable();
	1180	+
	1181	+ return err;
	1182	+}
	1183	+
	1184	+void __wait_drain_rq(struct cpumask *cpus)
	1185	+{
	1186	+ unsigned int cpu;
	1187	+
	1188	+ for_each_cpu(cpu, cpus)
	1189	+ sched_cpu_drain_rq_wait(cpu);
	1190	+}
	1191	+
	1192	+/* if rt task, set to cfs and return previous prio */
	1193	+static int pause_reduce_prio(void)
	1194	+{
	1195	+ int prev_prio = -1;
	1196	+
	1197	+ if (current->prio < MAX_RT_PRIO) {
	1198	+ struct sched_param param = { .sched_priority = 0 };
	1199	+
	1200	+ prev_prio = current->prio;
	1201	+ sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
	1202	+ }
	1203	+
	1204	+ return prev_prio;
	1205	+}
	1206	+
	1207	+/* if previous prio was set, restore */
	1208	+static void pause_restore_prio(int prev_prio)
	1209	+{
	1210	+ if (prev_prio >= 0 && prev_prio < MAX_RT_PRIO) {
	1211	+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1-prev_prio };
	1212	+
	1213	+ sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
	1214	+ }
	1215	+}
	1216	+
	1217	+int pause_cpus(struct cpumask *cpus)
	1218	+{
	1219	+ int err = 0;
	1220	+ int cpu;
	1221	+ u64 start_time = 0;
	1222	+ int prev_prio;
	1223	+
	1224	+ start_time = sched_clock();
	1225	+
	1226	+ cpu_maps_update_begin();
	1227	+
	1228	+ if (cpu_hotplug_disabled) {
	1229	+ err = -EBUSY;
	1230	+ goto err_cpu_maps_update;
	1231	+ }
	1232	+
	1233	+ /* Pausing an already inactive CPU isn't an error */
	1234	+ cpumask_and(cpus, cpus, cpu_active_mask);
	1235	+
	1236	+ for_each_cpu(cpu, cpus) {
	1237	+ if (!cpu_online(cpu) \|\| dl_cpu_busy(cpu, NULL) \|\|
	1238	+ get_cpu_device(cpu)->offline_disabled == true) {
	1239	+ err = -EBUSY;
	1240	+ goto err_cpu_maps_update;
	1241	+ }
	1242	+ }
	1243	+
	1244	+ if (cpumask_weight(cpus) >= num_active_cpus()) {
	1245	+ err = -EBUSY;
	1246	+ goto err_cpu_maps_update;
	1247	+ }
	1248	+
	1249	+ if (cpumask_empty(cpus))
	1250	+ goto err_cpu_maps_update;
	1251	+
	1252	+ /*
	1253	+ * Lazy migration:
	1254	+ *
	1255	+ * We do care about how fast a CPU can go idle and stay this in this
	1256	+ * state. If we try to take the cpus_write_lock() here, we would have
	1257	+ * to wait for a few dozens of ms, as this function might schedule.
	1258	+ * However, we can, as a first step, flip the active mask and migrate
	1259	+ * anything currently on the run-queue, to give a chance to the paused
	1260	+ * CPUs to reach quickly an idle state. There's a risk meanwhile for
	1261	+ * another CPU to observe an out-of-date active_mask or to incompletely
	1262	+ * update a cpuset. Both problems would be resolved later in the slow
	1263	+ * path, which ensures active_mask synchronization, triggers a cpuset
	1264	+ * rebuild and migrate any task that would have escaped the lazy
	1265	+ * migration.
	1266	+ */
	1267	+ for_each_cpu(cpu, cpus)
	1268	+ set_cpu_active(cpu, false);
	1269	+ err = __pause_drain_rq(cpus);
	1270	+ if (err) {
	1271	+ __wait_drain_rq(cpus);
	1272	+ for_each_cpu(cpu, cpus)
	1273	+ set_cpu_active(cpu, true);
	1274	+ goto err_cpu_maps_update;
	1275	+ }
	1276	+
	1277	+ prev_prio = pause_reduce_prio();
	1278	+
	1279	+ /*
	1280	+ * Slow path deactivation:
	1281	+ *
	1282	+ * Now that paused CPUs are most likely idle, we can go through a
	1283	+ * complete scheduler deactivation.
	1284	+ *
	1285	+ * The cpu_active_mask being already set and cpus_write_lock calling
	1286	+ * synchronize_rcu(), we know that all preempt-disabled and RCU users
	1287	+ * will observe the updated value.
	1288	+ */
	1289	+ cpus_write_lock();
	1290	+
	1291	+ __wait_drain_rq(cpus);
	1292	+
	1293	+ cpuhp_tasks_frozen = 0;
	1294	+
	1295	+ if (sched_cpus_deactivate_nosync(cpus)) {
	1296	+ err = -EBUSY;
	1297	+ goto err_cpus_write_unlock;
	1298	+ }
	1299	+
	1300	+ err = __pause_drain_rq(cpus);
	1301	+ __wait_drain_rq(cpus);
	1302	+ if (err) {
	1303	+ for_each_cpu(cpu, cpus)
	1304	+ sched_cpu_activate(cpu);
	1305	+ goto err_cpus_write_unlock;
	1306	+ }
	1307	+
	1308	+ /*
	1309	+ * Even if living on the side of the regular HP path, pause is using
	1310	+ * one of the HP step (CPUHP_AP_ACTIVE). This should be reflected on the
	1311	+ * current state of the CPU.
	1312	+ */
	1313	+ for_each_cpu(cpu, cpus) {
	1314	+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	1315	+
	1316	+ st->state = CPUHP_AP_ACTIVE - 1;
	1317	+ st->target = st->state;
	1318	+ }
	1319	+
	1320	+err_cpus_write_unlock:
	1321	+ cpus_write_unlock();
	1322	+ pause_restore_prio(prev_prio);
	1323	+err_cpu_maps_update:
	1324	+ cpu_maps_update_done();
	1325	+
	1326	+ trace_cpuhp_pause(cpus, start_time, 1);
	1327	+
	1328	+ return err;
	1329	+}
	1330	+EXPORT_SYMBOL_GPL(pause_cpus);
	1331	+
	1332	+int resume_cpus(struct cpumask *cpus)
	1333	+{
	1334	+ unsigned int cpu;
	1335	+ int err = 0;
	1336	+ u64 start_time = 0;
	1337	+ int prev_prio;
	1338	+
	1339	+ start_time = sched_clock();
	1340	+
	1341	+ cpu_maps_update_begin();
	1342	+
	1343	+ if (cpu_hotplug_disabled) {
	1344	+ err = -EBUSY;
	1345	+ goto err_cpu_maps_update;
	1346	+ }
	1347	+
	1348	+ /* Resuming an already active CPU isn't an error */
	1349	+ cpumask_andnot(cpus, cpus, cpu_active_mask);
	1350	+
	1351	+ for_each_cpu(cpu, cpus) {
	1352	+ if (!cpu_online(cpu)) {
	1353	+ err = -EBUSY;
	1354	+ goto err_cpu_maps_update;
	1355	+ }
	1356	+ }
	1357	+
	1358	+ if (cpumask_empty(cpus))
	1359	+ goto err_cpu_maps_update;
	1360	+
	1361	+ for_each_cpu(cpu, cpus)
	1362	+ set_cpu_active(cpu, true);
	1363	+
	1364	+ trace_android_rvh_resume_cpus(cpus, &err);
	1365	+ if (err)
	1366	+ goto err_cpu_maps_update;
	1367	+
	1368	+ prev_prio = pause_reduce_prio();
	1369	+
	1370	+ /* Lazy Resume. Build domains through schedule a workqueue on
	1371	+ * resuming cpu. This is so that the resuming cpu can work more
	1372	+ * early, and cannot add additional load to other busy cpu.
	1373	+ */
	1374	+ cpuset_update_active_cpus_affine(cpumask_first(cpus));
	1375	+
	1376	+ cpus_write_lock();
	1377	+
	1378	+ cpuhp_tasks_frozen = 0;
	1379	+
	1380	+ if (sched_cpus_activate(cpus)) {
	1381	+ err = -EBUSY;
	1382	+ goto err_cpus_write_unlock;
	1383	+ }
	1384	+
	1385	+ /*
	1386	+ * see pause_cpus.
	1387	+ */
	1388	+ for_each_cpu(cpu, cpus) {
	1389	+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	1390	+
	1391	+ st->state = CPUHP_ONLINE;
	1392	+ st->target = st->state;
	1393	+ }
	1394	+
	1395	+err_cpus_write_unlock:
	1396	+ cpus_write_unlock();
	1397	+ pause_restore_prio(prev_prio);
	1398	+err_cpu_maps_update:
	1399	+ cpu_maps_update_done();
	1400	+
	1401	+ trace_cpuhp_pause(cpus, start_time, 0);
	1402	+
	1403	+ return err;
	1404	+}
	1405	+EXPORT_SYMBOL_GPL(resume_cpus);
	1406	+
	1407	+void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
	1408	+{
	1409	+ unsigned int cpu;
	1410	+ int error;
	1411	+
	1412	+ cpu_maps_update_begin();
	1413	+
	1414	+ /*
	1415	+ * Make certain the cpu I'm about to reboot on is online.
	1416	+ *
	1417	+ * This is inline to what migrate_to_reboot_cpu() already do.
	1418	+ */
	1419	+ if (!cpu_online(primary_cpu))
	1420	+ primary_cpu = cpumask_first(cpu_online_mask);
	1421	+
	1422	+ for_each_online_cpu(cpu) {
	1423	+ if (cpu == primary_cpu)
	1424	+ continue;
	1425	+
	1426	+ error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
	1427	+ if (error) {
	1428	+ pr_err("Failed to offline CPU%d - error=%d",
	1429	+ cpu, error);
	1430	+ break;
	1431	+ }
	1432	+ }
	1433	+
	1434	+ /*
	1435	+ * Ensure all but the reboot CPU are offline.
	1436	+ */
	1437	+ BUG_ON(num_online_cpus() > 1);
	1438	+
	1439	+ /*
	1440	+ * Make sure the CPUs won't be enabled by someone else after this
	1441	+ * point. Kexec will reboot to a new kernel shortly resetting
	1442	+ * everything along the way.
	1443	+ */
	1444	+ cpu_hotplug_disabled++;
	1445	+
	1446	+ cpu_maps_update_done();
	1447	+}
1086	1448
1087	1449	#else
1088	1450	#define takedown_cpu NULL
..	..	@@ -1102,7 +1464,7 @@
1102	1464	int ret;
1103	1465
1104	1466	rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
1105		- st->booted_once = true;
	1467	+ cpumask_set_cpu(cpu, &cpus_booted_once_mask);
1106	1468	while (st->state < target) {
1107	1469	st->state++;
1108	1470	ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
..	..	@@ -1136,6 +1498,25 @@
1136	1498	complete_ap_thread(st, true);
1137	1499	}
1138	1500
	1501	+static int switch_to_rt_policy(void)
	1502	+{
	1503	+ struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
	1504	+ unsigned int policy = current->policy;
	1505	+
	1506	+ if (policy == SCHED_NORMAL)
	1507	+ /* Switch to SCHED_FIFO from SCHED_NORMAL. */
	1508	+ return sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
	1509	+ else
	1510	+ return 1;
	1511	+}
	1512	+
	1513	+static int switch_to_fair_policy(void)
	1514	+{
	1515	+ struct sched_param param = { .sched_priority = 0 };
	1516	+
	1517	+ return sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
	1518	+}
	1519	+
1139	1520	/* Requires cpu_add_remove_lock to be held */
1140	1521	static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1141	1522	{
..	..	@@ -1151,8 +1532,8 @@
1151	1532	}
1152	1533
1153	1534	/*
1154		- * The caller of do_cpu_up might have raced with another
1155		- * caller. Ignore it for now.
	1535	+ * The caller of cpu_up() might have raced with another
	1536	+ * caller. Nothing to do.
1156	1537	*/
1157	1538	if (st->state >= target)
1158	1539	goto out;
..	..	@@ -1197,9 +1578,10 @@
1197	1578	return ret;
1198	1579	}
1199	1580
1200		-static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
	1581	+static int cpu_up(unsigned int cpu, enum cpuhp_state target)
1201	1582	{
1202	1583	int err = 0;
	1584	+ int switch_err;
1203	1585
1204	1586	if (!cpu_possible(cpu)) {
1205	1587	pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
..	..	@@ -1210,9 +1592,23 @@
1210	1592	return -EINVAL;
1211	1593	}
1212	1594
	1595	+ trace_android_vh_cpu_up(cpu);
	1596	+
	1597	+ /*
	1598	+ * CPU hotplug operations consists of many steps and each step
	1599	+ * calls a callback of core kernel subsystem. CPU hotplug-in
	1600	+ * operation may get preempted by other CFS tasks and whole
	1601	+ * operation of cpu hotplug in CPU gets delayed. Switch the
	1602	+ * current task to SCHED_FIFO from SCHED_NORMAL, so that
	1603	+ * hotplug in operation may complete quickly in heavy loaded
	1604	+ * conditions and new CPU will start handle the workload.
	1605	+ */
	1606	+
	1607	+ switch_err = switch_to_rt_policy();
	1608	+
1213	1609	err = try_online_node(cpu_to_node(cpu));
1214	1610	if (err)
1215		- return err;
	1611	+ goto switch_out;
1216	1612
1217	1613	cpu_maps_update_begin();
1218	1614
..	..	@@ -1228,14 +1624,76 @@
1228	1624	err = _cpu_up(cpu, 0, target);
1229	1625	out:
1230	1626	cpu_maps_update_done();
	1627	+switch_out:
	1628	+ if (!switch_err) {
	1629	+ switch_err = switch_to_fair_policy();
	1630	+ if (switch_err)
	1631	+ pr_err("Hotplug policy switch err=%d Task %s pid=%d\n",
	1632	+ switch_err, current->comm, current->pid);
	1633	+ }
	1634	+
1231	1635	return err;
1232	1636	}
1233	1637
1234		-int cpu_up(unsigned int cpu)
	1638	+/**
	1639	+ * cpu_device_up - Bring up a cpu device
	1640	+ * @dev: Pointer to the cpu device to online
	1641	+ *
	1642	+ * This function is meant to be used by device core cpu subsystem only.
	1643	+ *
	1644	+ * Other subsystems should use add_cpu() instead.
	1645	+ */
	1646	+int cpu_device_up(struct device *dev)
1235	1647	{
1236		- return do_cpu_up(cpu, CPUHP_ONLINE);
	1648	+ return cpu_up(dev->id, CPUHP_ONLINE);
1237	1649	}
1238		-EXPORT_SYMBOL_GPL(cpu_up);
	1650	+
	1651	+int add_cpu(unsigned int cpu)
	1652	+{
	1653	+ int ret;
	1654	+
	1655	+ lock_device_hotplug();
	1656	+ ret = device_online(get_cpu_device(cpu));
	1657	+ unlock_device_hotplug();
	1658	+
	1659	+ return ret;
	1660	+}
	1661	+EXPORT_SYMBOL_GPL(add_cpu);
	1662	+
	1663	+/**
	1664	+ * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
	1665	+ * @sleep_cpu: The cpu we hibernated on and should be brought up.
	1666	+ *
	1667	+ * On some architectures like arm64, we can hibernate on any CPU, but on
	1668	+ * wake up the CPU we hibernated on might be offline as a side effect of
	1669	+ * using maxcpus= for example.
	1670	+ */
	1671	+int bringup_hibernate_cpu(unsigned int sleep_cpu)
	1672	+{
	1673	+ int ret;
	1674	+
	1675	+ if (!cpu_online(sleep_cpu)) {
	1676	+ pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
	1677	+ ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
	1678	+ if (ret) {
	1679	+ pr_err("Failed to bring hibernate-CPU up!\n");
	1680	+ return ret;
	1681	+ }
	1682	+ }
	1683	+ return 0;
	1684	+}
	1685	+
	1686	+void bringup_nonboot_cpus(unsigned int setup_max_cpus)
	1687	+{
	1688	+ unsigned int cpu;
	1689	+
	1690	+ for_each_present_cpu(cpu) {
	1691	+ if (num_online_cpus() >= setup_max_cpus)
	1692	+ break;
	1693	+ if (!cpu_online(cpu))
	1694	+ cpu_up(cpu, CPUHP_ONLINE);
	1695	+ }
	1696	+}
1239	1697
1240	1698	#ifdef CONFIG_PM_SLEEP_SMP
1241	1699	static cpumask_var_t frozen_cpus;
..	..	@@ -1245,8 +1703,15 @@
1245	1703	int cpu, error = 0;
1246	1704
1247	1705	cpu_maps_update_begin();
1248		- if (!cpu_online(primary))
	1706	+ if (primary == -1) {
1249	1707	primary = cpumask_first(cpu_online_mask);
	1708	+ if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
	1709	+ primary = housekeeping_any_cpu(HK_FLAG_TIMER);
	1710	+ } else {
	1711	+ if (!cpu_online(primary))
	1712	+ primary = cpumask_first(cpu_online_mask);
	1713	+ }
	1714	+
1250	1715	/*
1251	1716	* We take down all of the non-boot CPUs in one shot to avoid races
1252	1717	* with the userspace trying to use the CPU hotplug at the same time
..	..	@@ -1257,6 +1722,13 @@
1257	1722	for_each_online_cpu(cpu) {
1258	1723	if (cpu == primary)
1259	1724	continue;
	1725	+
	1726	+ if (pm_wakeup_pending()) {
	1727	+ pr_info("Wakeup pending. Abort CPU freeze\n");
	1728	+ error = -EBUSY;
	1729	+ break;
	1730	+ }
	1731	+
1260	1732	trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1261	1733	error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1262	1734	trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
..	..	@@ -1275,8 +1747,8 @@
1275	1747
1276	1748	/*
1277	1749	* Make sure the CPUs won't be enabled by someone else. We need to do
1278		- * this even in case of failure as all disable_nonboot_cpus() users are
1279		- * supposed to do enable_nonboot_cpus() on the failure path.
	1750	+ * this even in case of failure as all freeze_secondary_cpus() users are
	1751	+ * supposed to do thaw_secondary_cpus() on the failure path.
1280	1752	*/
1281	1753	cpu_hotplug_disabled++;
1282	1754
..	..	@@ -1284,15 +1756,15 @@
1284	1756	return error;
1285	1757	}
1286	1758
1287		-void __weak arch_enable_nonboot_cpus_begin(void)
	1759	+void __weak arch_thaw_secondary_cpus_begin(void)
1288	1760	{
1289	1761	}
1290	1762
1291		-void __weak arch_enable_nonboot_cpus_end(void)
	1763	+void __weak arch_thaw_secondary_cpus_end(void)
1292	1764	{
1293	1765	}
1294	1766
1295		-void enable_nonboot_cpus(void)
	1767	+void thaw_secondary_cpus(void)
1296	1768	{
1297	1769	int cpu, error;
1298	1770	struct device *cpu_device;
..	..	@@ -1305,7 +1777,7 @@
1305	1777
1306	1778	pr_info("Enabling non-boot CPUs ...\n");
1307	1779
1308		- arch_enable_nonboot_cpus_begin();
	1780	+ arch_thaw_secondary_cpus_begin();
1309	1781
1310	1782	for_each_cpu(cpu, frozen_cpus) {
1311	1783	trace_suspend_resume(TPS("CPU_ON"), cpu, true);
..	..	@@ -1324,7 +1796,7 @@
1324	1796	pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1325	1797	}
1326	1798
1327		- arch_enable_nonboot_cpus_end();
	1799	+ arch_thaw_secondary_cpus_end();
1328	1800
1329	1801	cpumask_clear(frozen_cpus);
1330	1802	out:
..	..	@@ -1390,6 +1862,22 @@
1390	1862
1391	1863	int __boot_cpu_id;
1392	1864
	1865	+/* Horrific hacks because we can't add more to cpuhp_hp_states. */
	1866	+static int random_and_perf_prepare_fusion(unsigned int cpu)
	1867	+{
	1868	+#ifdef CONFIG_PERF_EVENTS
	1869	+ perf_event_init_cpu(cpu);
	1870	+#endif
	1871	+ random_prepare_cpu(cpu);
	1872	+ return 0;
	1873	+}
	1874	+static int random_and_workqueue_online_fusion(unsigned int cpu)
	1875	+{
	1876	+ workqueue_online_cpu(cpu);
	1877	+ random_online_cpu(cpu);
	1878	+ return 0;
	1879	+}
	1880	+
1393	1881	#endif /* CONFIG_SMP */
1394	1882
1395	1883	/* Boot processor state steps */
..	..	@@ -1408,7 +1896,7 @@
1408	1896	},
1409	1897	[CPUHP_PERF_PREPARE] = {
1410	1898	.name = "perf:prepare",
1411		- .startup.single = perf_event_init_cpu,
	1899	+ .startup.single = random_and_perf_prepare_fusion,
1412	1900	.teardown.single = perf_event_exit_cpu,
1413	1901	},
1414	1902	[CPUHP_WORKQUEUE_PREP] = {
..	..	@@ -1492,7 +1980,7 @@
1492	1980	.name = "ap:online",
1493	1981	},
1494	1982	/*
1495		- * Handled on controll processor until the plugged processor manages
	1983	+ * Handled on control processor until the plugged processor manages
1496	1984	* this itself.
1497	1985	*/
1498	1986	[CPUHP_TEARDOWN_CPU] = {
..	..	@@ -1501,6 +1989,13 @@
1501	1989	.teardown.single = takedown_cpu,
1502	1990	.cant_stop = true,
1503	1991	},
	1992	+
	1993	+ [CPUHP_AP_SCHED_WAIT_EMPTY] = {
	1994	+ .name = "sched:waitempty",
	1995	+ .startup.single = NULL,
	1996	+ .teardown.single = sched_cpu_wait_empty,
	1997	+ },
	1998	+
1504	1999	/* Handle smpboot threads park/unpark */
1505	2000	[CPUHP_AP_SMPBOOT_THREADS] = {
1506	2001	.name = "smpboot/threads:online",
..	..	@@ -1524,7 +2019,7 @@
1524	2019	},
1525	2020	[CPUHP_AP_WORKQUEUE_ONLINE] = {
1526	2021	.name = "workqueue:online",
1527		- .startup.single = workqueue_online_cpu,
	2022	+ .startup.single = random_and_workqueue_online_fusion,
1528	2023	.teardown.single = workqueue_offline_cpu,
1529	2024	},
1530	2025	[CPUHP_AP_RCUTREE_ONLINE] = {
..	..	@@ -1935,6 +2430,78 @@
1935	2430	}
1936	2431	EXPORT_SYMBOL(__cpuhp_remove_state);
1937	2432
	2433	+#ifdef CONFIG_HOTPLUG_SMT
	2434	+static void cpuhp_offline_cpu_device(unsigned int cpu)
	2435	+{
	2436	+ struct device *dev = get_cpu_device(cpu);
	2437	+
	2438	+ dev->offline = true;
	2439	+ /* Tell user space about the state change */
	2440	+ kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
	2441	+}
	2442	+
	2443	+static void cpuhp_online_cpu_device(unsigned int cpu)
	2444	+{
	2445	+ struct device *dev = get_cpu_device(cpu);
	2446	+
	2447	+ dev->offline = false;
	2448	+ /* Tell user space about the state change */
	2449	+ kobject_uevent(&dev->kobj, KOBJ_ONLINE);
	2450	+}
	2451	+
	2452	+int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
	2453	+{
	2454	+ int cpu, ret = 0;
	2455	+
	2456	+ cpu_maps_update_begin();
	2457	+ for_each_online_cpu(cpu) {
	2458	+ if (topology_is_primary_thread(cpu))
	2459	+ continue;
	2460	+ ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
	2461	+ if (ret)
	2462	+ break;
	2463	+ /*
	2464	+ * As this needs to hold the cpu maps lock it's impossible
	2465	+ * to call device_offline() because that ends up calling
	2466	+ * cpu_down() which takes cpu maps lock. cpu maps lock
	2467	+ * needs to be held as this might race against in kernel
	2468	+ * abusers of the hotplug machinery (thermal management).
	2469	+ *
	2470	+ * So nothing would update device:offline state. That would
	2471	+ * leave the sysfs entry stale and prevent onlining after
	2472	+ * smt control has been changed to 'off' again. This is
	2473	+ * called under the sysfs hotplug lock, so it is properly
	2474	+ * serialized against the regular offline usage.
	2475	+ */
	2476	+ cpuhp_offline_cpu_device(cpu);
	2477	+ }
	2478	+ if (!ret)
	2479	+ cpu_smt_control = ctrlval;
	2480	+ cpu_maps_update_done();
	2481	+ return ret;
	2482	+}
	2483	+
	2484	+int cpuhp_smt_enable(void)
	2485	+{
	2486	+ int cpu, ret = 0;
	2487	+
	2488	+ cpu_maps_update_begin();
	2489	+ cpu_smt_control = CPU_SMT_ENABLED;
	2490	+ for_each_present_cpu(cpu) {
	2491	+ /* Skip online CPUs and CPUs on offline nodes */
	2492	+ if (cpu_online(cpu) \|\| !node_online(cpu_to_node(cpu)))
	2493	+ continue;
	2494	+ ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
	2495	+ if (ret)
	2496	+ break;
	2497	+ /* See comment in cpuhp_smt_disable() */
	2498	+ cpuhp_online_cpu_device(cpu);
	2499	+ }
	2500	+ cpu_maps_update_done();
	2501	+ return ret;
	2502	+}
	2503	+#endif
	2504	+
1938	2505	#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
1939	2506	static ssize_t show_cpuhp_state(struct device *dev,
1940	2507	struct device_attribute attr, char buf)
..	..	@@ -1977,9 +2544,9 @@
1977	2544	goto out;
1978	2545
1979	2546	if (st->state < target)
1980		- ret = do_cpu_up(dev->id, target);
	2547	+ ret = cpu_up(dev->id, target);
1981	2548	else
1982		- ret = do_cpu_down(dev->id, target);
	2549	+ ret = cpu_down(dev->id, target);
1983	2550	out:
1984	2551	unlock_device_hotplug();
1985	2552	return ret ? ret : count;
..	..	@@ -2089,92 +2656,9 @@
2089	2656
2090	2657	#ifdef CONFIG_HOTPLUG_SMT
2091	2658
2092		-static const char *smt_states[] = {
2093		- [CPU_SMT_ENABLED] = "on",
2094		- [CPU_SMT_DISABLED] = "off",
2095		- [CPU_SMT_FORCE_DISABLED] = "forceoff",
2096		- [CPU_SMT_NOT_SUPPORTED] = "notsupported",
2097		-};
2098		-
2099	2659	static ssize_t
2100		-show_smt_control(struct device dev, struct device_attribute attr, char *buf)
2101		-{
2102		- return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]);
2103		-}
2104		-
2105		-static void cpuhp_offline_cpu_device(unsigned int cpu)
2106		-{
2107		- struct device *dev = get_cpu_device(cpu);
2108		-
2109		- dev->offline = true;
2110		- /* Tell user space about the state change */
2111		- kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2112		-}
2113		-
2114		-static void cpuhp_online_cpu_device(unsigned int cpu)
2115		-{
2116		- struct device *dev = get_cpu_device(cpu);
2117		-
2118		- dev->offline = false;
2119		- /* Tell user space about the state change */
2120		- kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2121		-}
2122		-
2123		-int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2124		-{
2125		- int cpu, ret = 0;
2126		-
2127		- cpu_maps_update_begin();
2128		- for_each_online_cpu(cpu) {
2129		- if (topology_is_primary_thread(cpu))
2130		- continue;
2131		- ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2132		- if (ret)
2133		- break;
2134		- /*
2135		- * As this needs to hold the cpu maps lock it's impossible
2136		- * to call device_offline() because that ends up calling
2137		- * cpu_down() which takes cpu maps lock. cpu maps lock
2138		- * needs to be held as this might race against in kernel
2139		- * abusers of the hotplug machinery (thermal management).
2140		- *
2141		- * So nothing would update device:offline state. That would
2142		- * leave the sysfs entry stale and prevent onlining after
2143		- * smt control has been changed to 'off' again. This is
2144		- * called under the sysfs hotplug lock, so it is properly
2145		- * serialized against the regular offline usage.
2146		- */
2147		- cpuhp_offline_cpu_device(cpu);
2148		- }
2149		- if (!ret)
2150		- cpu_smt_control = ctrlval;
2151		- cpu_maps_update_done();
2152		- return ret;
2153		-}
2154		-
2155		-int cpuhp_smt_enable(void)
2156		-{
2157		- int cpu, ret = 0;
2158		-
2159		- cpu_maps_update_begin();
2160		- cpu_smt_control = CPU_SMT_ENABLED;
2161		- for_each_present_cpu(cpu) {
2162		- /* Skip online CPUs and CPUs on offline nodes */
2163		- if (cpu_online(cpu) \|\| !node_online(cpu_to_node(cpu)))
2164		- continue;
2165		- ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2166		- if (ret)
2167		- break;
2168		- /* See comment in cpuhp_smt_disable() */
2169		- cpuhp_online_cpu_device(cpu);
2170		- }
2171		- cpu_maps_update_done();
2172		- return ret;
2173		-}
2174		-
2175		-static ssize_t
2176		-store_smt_control(struct device dev, struct device_attribute attr,
2177		- const char *buf, size_t count)
	2660	+__store_smt_control(struct device dev, struct device_attribute attr,
	2661	+ const char *buf, size_t count)
2178	2662	{
2179	2663	int ctrlval, ret;
2180	2664
..	..	@@ -2212,14 +2696,44 @@
2212	2696	unlock_device_hotplug();
2213	2697	return ret ? ret : count;
2214	2698	}
	2699	+
	2700	+#else /* !CONFIG_HOTPLUG_SMT */
	2701	+static ssize_t
	2702	+__store_smt_control(struct device dev, struct device_attribute attr,
	2703	+ const char *buf, size_t count)
	2704	+{
	2705	+ return -ENODEV;
	2706	+}
	2707	+#endif /* CONFIG_HOTPLUG_SMT */
	2708	+
	2709	+static const char *smt_states[] = {
	2710	+ [CPU_SMT_ENABLED] = "on",
	2711	+ [CPU_SMT_DISABLED] = "off",
	2712	+ [CPU_SMT_FORCE_DISABLED] = "forceoff",
	2713	+ [CPU_SMT_NOT_SUPPORTED] = "notsupported",
	2714	+ [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
	2715	+};
	2716	+
	2717	+static ssize_t
	2718	+show_smt_control(struct device dev, struct device_attribute attr, char *buf)
	2719	+{
	2720	+ const char *state = smt_states[cpu_smt_control];
	2721	+
	2722	+ return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
	2723	+}
	2724	+
	2725	+static ssize_t
	2726	+store_smt_control(struct device dev, struct device_attribute attr,
	2727	+ const char *buf, size_t count)
	2728	+{
	2729	+ return __store_smt_control(dev, attr, buf, count);
	2730	+}
2215	2731	static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
2216	2732
2217	2733	static ssize_t
2218	2734	show_smt_active(struct device dev, struct device_attribute attr, char *buf)
2219	2735	{
2220		- bool active = topology_max_smt_threads() > 1;
2221		-
2222		- return snprintf(buf, PAGE_SIZE - 2, "%d\n", active);
	2736	+ return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
2223	2737	}
2224	2738	static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
2225	2739
..	..	@@ -2235,21 +2749,17 @@
2235	2749	NULL
2236	2750	};
2237	2751
2238		-static int __init cpu_smt_state_init(void)
	2752	+static int __init cpu_smt_sysfs_init(void)
2239	2753	{
2240	2754	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
2241	2755	&cpuhp_smt_attr_group);
2242	2756	}
2243	2757
2244		-#else
2245		-static inline int cpu_smt_state_init(void) { return 0; }
2246		-#endif
2247		-
2248	2758	static int __init cpuhp_sysfs_init(void)
2249	2759	{
2250	2760	int cpu, ret;
2251	2761
2252		- ret = cpu_smt_state_init();
	2762	+ ret = cpu_smt_sysfs_init();
2253	2763	if (ret)
2254	2764	return ret;
2255	2765
..	..	@@ -2270,7 +2780,7 @@
2270	2780	return 0;
2271	2781	}
2272	2782	device_initcall(cpuhp_sysfs_init);
2273		-#endif
	2783	+#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
2274	2784
2275	2785	/*
2276	2786	* cpu_bit_bitmap[] is a special, "compressed" data structure that
..	..	@@ -2317,8 +2827,8 @@
2317	2827	struct cpumask __cpu_active_mask __read_mostly;
2318	2828	EXPORT_SYMBOL(__cpu_active_mask);
2319	2829
2320		-struct cpumask __cpu_isolated_mask __read_mostly;
2321		-EXPORT_SYMBOL(__cpu_isolated_mask);
	2830	+atomic_t __num_online_cpus __read_mostly;
	2831	+EXPORT_SYMBOL(__num_online_cpus);
2322	2832
2323	2833	void init_cpu_present(const struct cpumask *src)
2324	2834	{
..	..	@@ -2333,6 +2843,27 @@
2333	2843	void init_cpu_online(const struct cpumask *src)
2334	2844	{
2335	2845	cpumask_copy(&__cpu_online_mask, src);
	2846	+}
	2847	+
	2848	+void set_cpu_online(unsigned int cpu, bool online)
	2849	+{
	2850	+ /*
	2851	+ * atomic_inc/dec() is required to handle the horrid abuse of this
	2852	+ * function by the reboot and kexec code which invoke it from
	2853	+ * IPI/NMI broadcasts when shutting down CPUs. Invocation from
	2854	+ * regular CPU hotplug is properly serialized.
	2855	+ *
	2856	+ * Note, that the fact that __num_online_cpus is of type atomic_t
	2857	+ * does not protect readers which are not serialized against
	2858	+ * concurrent hotplug operations.
	2859	+ */
	2860	+ if (online) {
	2861	+ if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
	2862	+ atomic_inc(&__num_online_cpus);
	2863	+ } else {
	2864	+ if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
	2865	+ atomic_dec(&__num_online_cpus);
	2866	+ }
2336	2867	}
2337	2868
2338	2869	/*
..	..	@@ -2359,7 +2890,7 @@
2359	2890	void __init boot_cpu_hotplug_init(void)
2360	2891	{
2361	2892	#ifdef CONFIG_SMP
2362		- this_cpu_write(cpuhp_state.booted_once, true);
	2893	+ cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
2363	2894	#endif
2364	2895	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2365	2896	}