~hc/RK356X_SDK_RELEASE.git

..	..	@@ -10,6 +10,7 @@
10	10	#include <linux/notifier.h>
11	11	#include <linux/sched/signal.h>
12	12	#include <linux/sched/hotplug.h>
	13	+#include <linux/sched/isolation.h>
13	14	#include <linux/sched/task.h>
14	15	#include <linux/sched/smt.h>
15	16	#include <linux/unistd.h>
..	..	@@ -30,12 +31,19 @@
30	31	#include <linux/smpboot.h>
31	32	#include <linux/relay.h>
32	33	#include <linux/slab.h>
	34	+#include <linux/scs.h>
33	35	#include <linux/percpu-rwsem.h>
34	36	#include <linux/cpuset.h>
	37	+#include <linux/random.h>
	38	+#include <uapi/linux/sched/types.h>
35	39
36	40	#include <trace/events/power.h>
37	41	#define CREATE_TRACE_POINTS
38	42	#include <trace/events/cpuhp.h>
	43	+
	44	+#undef CREATE_TRACE_POINTS
	45	+#include <trace/hooks/sched.h>
	46	+#include <trace/hooks/cpu.h>
39	47
40	48	#include "smpboot.h"
41	49
..	..	@@ -63,7 +71,6 @@
63	71	bool rollback;
64	72	bool single;
65	73	bool bringup;
66		- bool booted_once;
67	74	struct hlist_node *node;
68	75	struct hlist_node *last;
69	76	enum cpuhp_state cb_state;
..	..	@@ -76,6 +83,10 @@
76	83	static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
77	84	.fail = CPUHP_INVALID,
78	85	};
	86	+
	87	+#ifdef CONFIG_SMP
	88	+cpumask_t cpus_booted_once_mask;
	89	+#endif
79	90
80	91	#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
81	92	static struct lockdep_map cpuhp_state_up_map =
..	..	@@ -269,11 +280,13 @@
269	280	{
270	281	mutex_lock(&cpu_add_remove_lock);
271	282	}
	283	+EXPORT_SYMBOL_GPL(cpu_maps_update_begin);
272	284
273	285	void cpu_maps_update_done(void)
274	286	{
275	287	mutex_unlock(&cpu_add_remove_lock);
276	288	}
	289	+EXPORT_SYMBOL_GPL(cpu_maps_update_done);
277	290
278	291	/*
279	292	* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
..	..	@@ -327,6 +340,16 @@
327	340	percpu_rwsem_assert_held(&cpu_hotplug_lock);
328	341	}
329	342
	343	+static void lockdep_acquire_cpus_lock(void)
	344	+{
	345	+ rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
	346	+}
	347	+
	348	+static void lockdep_release_cpus_lock(void)
	349	+{
	350	+ rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
	351	+}
	352	+
330	353	/*
331	354	* Wait for currently running CPU hotplug operations to complete (if any) and
332	355	* disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
..	..	@@ -356,6 +379,17 @@
356	379	cpu_maps_update_done();
357	380	}
358	381	EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
	382	+
	383	+#else
	384	+
	385	+static void lockdep_acquire_cpus_lock(void)
	386	+{
	387	+}
	388	+
	389	+static void lockdep_release_cpus_lock(void)
	390	+{
	391	+}
	392	+
359	393	#endif /* CONFIG_HOTPLUG_CPU */
360	394
361	395	/*
..	..	@@ -369,8 +403,7 @@
369	403
370	404	void __init cpu_smt_disable(bool force)
371	405	{
372		- if (cpu_smt_control == CPU_SMT_FORCE_DISABLED \|\|
373		- cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
	406	+ if (!cpu_smt_possible())
374	407	return;
375	408
376	409	if (force) {
..	..	@@ -410,11 +443,19 @@
410	443	/*
411	444	* On x86 it's required to boot all logical CPUs at least once so
412	445	* that the init code can get a chance to set CR4.MCE on each
413		- * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
	446	+ * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
414	447	* core will shutdown the machine.
415	448	*/
416		- return !per_cpu(cpuhp_state, cpu).booted_once;
	449	+ return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
417	450	}
	451	+
	452	+/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
	453	+bool cpu_smt_possible(void)
	454	+{
	455	+ return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
	456	+ cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
	457	+}
	458	+EXPORT_SYMBOL_GPL(cpu_smt_possible);
418	459	#else
419	460	static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
420	461	#endif
..	..	@@ -501,7 +542,7 @@
501	542	/*
502	543	* SMT soft disabling on X86 requires to bring the CPU out of the
503	544	* BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
504		- * CPU marked itself as booted_once in cpu_notify_starting() so the
	545	+ * CPU marked itself as booted_once in notify_cpu_starting() so the
505	546	* cpu_smt_allowed() check will now return false if this is not the
506	547	* primary sibling.
507	548	*/
..	..	@@ -518,6 +559,12 @@
518	559	{
519	560	struct task_struct *idle = idle_thread_get(cpu);
520	561	int ret;
	562	+
	563	+ /*
	564	+ * Reset stale stack state from the last time this CPU was online.
	565	+ */
	566	+ scs_task_reset(idle);
	567	+ kasan_unpoison_task_stack(idle);
521	568
522	569	/*
523	570	* Some architectures have to walk the irq descriptors to
..	..	@@ -640,6 +687,12 @@
640	687	*/
641	688	smp_mb();
642	689
	690	+ /*
	691	+ * The BP holds the hotplug lock, but we're now running on the AP,
	692	+ * ensure that anybody asserting the lock is held, will actually find
	693	+ * it so.
	694	+ */
	695	+ lockdep_acquire_cpus_lock();
643	696	cpuhp_lock_acquire(bringup);
644	697
645	698	if (st->single) {
..	..	@@ -685,6 +738,7 @@
685	738	}
686	739
687	740	cpuhp_lock_release(bringup);
	741	+ lockdep_release_cpus_lock();
688	742
689	743	if (!st->should_run)
690	744	complete_ap_thread(st, bringup);
..	..	@@ -876,15 +930,6 @@
876	930	int err, cpu = smp_processor_id();
877	931	int ret;
878	932
879		-#ifdef CONFIG_PREEMPT_RT_BASE
880		- /*
881		- * If any tasks disabled migration before we got here,
882		- * go back and sleep again.
883		- */
884		- if (cpu_nr_pinned(cpu))
885		- return -EAGAIN;
886		-#endif
887		-
888	933	/* Ensure this CPU doesn't handle any more interrupts. */
889	934	err = __cpu_disable();
890	935	if (err < 0)
..	..	@@ -907,14 +952,12 @@
907	952
908	953	/* Give up timekeeping duties */
909	954	tick_handover_do_timer();
	955	+ /* Remove CPU from timer broadcasting */
	956	+ tick_offline_cpu(cpu);
910	957	/* Park the stopper thread */
911	958	stop_machine_park(cpu);
912	959	return 0;
913	960	}
914		-
915		-#ifdef CONFIG_PREEMPT_RT_BASE
916		-struct task_struct *takedown_cpu_task;
917		-#endif
918	961
919	962	static int takedown_cpu(unsigned int cpu)
920	963	{
..	..	@@ -930,39 +973,11 @@
930	973	*/
931	974	irq_lock_sparse();
932	975
933		-#ifdef CONFIG_PREEMPT_RT_BASE
934		- WARN_ON_ONCE(takedown_cpu_task);
935		- takedown_cpu_task = current;
936		-
937		-again:
938		- /*
939		- * If a task pins this CPU after we pass this check, take_cpu_down
940		- * will return -EAGAIN.
941		- */
942		- for (;;) {
943		- int nr_pinned;
944		-
945		- set_current_state(TASK_UNINTERRUPTIBLE);
946		- nr_pinned = cpu_nr_pinned(cpu);
947		- if (nr_pinned == 0)
948		- break;
949		- schedule();
950		- }
951		- set_current_state(TASK_RUNNING);
952		-#endif
953		-
954	976	/*
955	977	* So now all preempt/rcu users must observe !cpu_active().
956	978	*/
957	979	err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
958		-#ifdef CONFIG_PREEMPT_RT_BASE
959		- if (err == -EAGAIN)
960		- goto again;
961		-#endif
962	980	if (err) {
963		-#ifdef CONFIG_PREEMPT_RT_BASE
964		- takedown_cpu_task = NULL;
965		-#endif
966	981	/* CPU refused to die */
967	982	irq_unlock_sparse();
968	983	/* Unpark the hotplug thread so we can rollback there */
..	..	@@ -981,9 +996,6 @@
981	996	wait_for_ap_thread(st, false);
982	997	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
983	998
984		-#ifdef CONFIG_PREEMPT_RT_BASE
985		- takedown_cpu_task = NULL;
986		-#endif
987	999	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
988	1000	irq_unlock_sparse();
989	1001
..	..	@@ -1049,7 +1061,7 @@
1049	1061	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1050	1062	int prev_state, ret = 0;
1051	1063
1052		- if (num_online_cpus() == 1)
	1064	+ if (num_active_cpus() == 1 && cpu_active(cpu))
1053	1065	return -EBUSY;
1054	1066
1055	1067	if (!cpu_present(cpu))
..	..	@@ -1112,7 +1124,7 @@
1112	1124	return _cpu_down(cpu, 0, target);
1113	1125	}
1114	1126
1115		-static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
	1127	+static int cpu_down(unsigned int cpu, enum cpuhp_state target)
1116	1128	{
1117	1129	int err;
1118	1130
..	..	@@ -1122,11 +1134,317 @@
1122	1134	return err;
1123	1135	}
1124	1136
1125		-int cpu_down(unsigned int cpu)
	1137	+/**
	1138	+ * cpu_device_down - Bring down a cpu device
	1139	+ * @dev: Pointer to the cpu device to offline
	1140	+ *
	1141	+ * This function is meant to be used by device core cpu subsystem only.
	1142	+ *
	1143	+ * Other subsystems should use remove_cpu() instead.
	1144	+ */
	1145	+int cpu_device_down(struct device *dev)
1126	1146	{
1127		- return do_cpu_down(cpu, CPUHP_OFFLINE);
	1147	+ return cpu_down(dev->id, CPUHP_OFFLINE);
1128	1148	}
1129		-EXPORT_SYMBOL(cpu_down);
	1149	+
	1150	+int remove_cpu(unsigned int cpu)
	1151	+{
	1152	+ int ret;
	1153	+
	1154	+ lock_device_hotplug();
	1155	+ ret = device_offline(get_cpu_device(cpu));
	1156	+ unlock_device_hotplug();
	1157	+
	1158	+ return ret;
	1159	+}
	1160	+EXPORT_SYMBOL_GPL(remove_cpu);
	1161	+
	1162	+extern int dl_cpu_busy(int cpu, struct task_struct *p);
	1163	+
	1164	+int __pause_drain_rq(struct cpumask *cpus)
	1165	+{
	1166	+ unsigned int cpu;
	1167	+ int err = 0;
	1168	+
	1169	+ /*
	1170	+ * Disabling preemption avoids that one of the stopper, started from
	1171	+ * sched_cpu_drain_rq(), blocks firing draining for the whole cpumask.
	1172	+ */
	1173	+ preempt_disable();
	1174	+ for_each_cpu(cpu, cpus) {
	1175	+ err = sched_cpu_drain_rq(cpu);
	1176	+ if (err)
	1177	+ break;
	1178	+ }
	1179	+ preempt_enable();
	1180	+
	1181	+ return err;
	1182	+}
	1183	+
	1184	+void __wait_drain_rq(struct cpumask *cpus)
	1185	+{
	1186	+ unsigned int cpu;
	1187	+
	1188	+ for_each_cpu(cpu, cpus)
	1189	+ sched_cpu_drain_rq_wait(cpu);
	1190	+}
	1191	+
	1192	+/* if rt task, set to cfs and return previous prio */
	1193	+static int pause_reduce_prio(void)
	1194	+{
	1195	+ int prev_prio = -1;
	1196	+
	1197	+ if (current->prio < MAX_RT_PRIO) {
	1198	+ struct sched_param param = { .sched_priority = 0 };
	1199	+
	1200	+ prev_prio = current->prio;
	1201	+ sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
	1202	+ }
	1203	+
	1204	+ return prev_prio;
	1205	+}
	1206	+
	1207	+/* if previous prio was set, restore */
	1208	+static void pause_restore_prio(int prev_prio)
	1209	+{
	1210	+ if (prev_prio >= 0 && prev_prio < MAX_RT_PRIO) {
	1211	+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1-prev_prio };
	1212	+
	1213	+ sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
	1214	+ }
	1215	+}
	1216	+
	1217	+int pause_cpus(struct cpumask *cpus)
	1218	+{
	1219	+ int err = 0;
	1220	+ int cpu;
	1221	+ u64 start_time = 0;
	1222	+ int prev_prio;
	1223	+
	1224	+ start_time = sched_clock();
	1225	+
	1226	+ cpu_maps_update_begin();
	1227	+
	1228	+ if (cpu_hotplug_disabled) {
	1229	+ err = -EBUSY;
	1230	+ goto err_cpu_maps_update;
	1231	+ }
	1232	+
	1233	+ /* Pausing an already inactive CPU isn't an error */
	1234	+ cpumask_and(cpus, cpus, cpu_active_mask);
	1235	+
	1236	+ for_each_cpu(cpu, cpus) {
	1237	+ if (!cpu_online(cpu) \|\| dl_cpu_busy(cpu, NULL) \|\|
	1238	+ get_cpu_device(cpu)->offline_disabled == true) {
	1239	+ err = -EBUSY;
	1240	+ goto err_cpu_maps_update;
	1241	+ }
	1242	+ }
	1243	+
	1244	+ if (cpumask_weight(cpus) >= num_active_cpus()) {
	1245	+ err = -EBUSY;
	1246	+ goto err_cpu_maps_update;
	1247	+ }
	1248	+
	1249	+ if (cpumask_empty(cpus))
	1250	+ goto err_cpu_maps_update;
	1251	+
	1252	+ /*
	1253	+ * Lazy migration:
	1254	+ *
	1255	+ * We do care about how fast a CPU can go idle and stay this in this
	1256	+ * state. If we try to take the cpus_write_lock() here, we would have
	1257	+ * to wait for a few dozens of ms, as this function might schedule.
	1258	+ * However, we can, as a first step, flip the active mask and migrate
	1259	+ * anything currently on the run-queue, to give a chance to the paused
	1260	+ * CPUs to reach quickly an idle state. There's a risk meanwhile for
	1261	+ * another CPU to observe an out-of-date active_mask or to incompletely
	1262	+ * update a cpuset. Both problems would be resolved later in the slow
	1263	+ * path, which ensures active_mask synchronization, triggers a cpuset
	1264	+ * rebuild and migrate any task that would have escaped the lazy
	1265	+ * migration.
	1266	+ */
	1267	+ for_each_cpu(cpu, cpus)
	1268	+ set_cpu_active(cpu, false);
	1269	+ err = __pause_drain_rq(cpus);
	1270	+ if (err) {
	1271	+ __wait_drain_rq(cpus);
	1272	+ for_each_cpu(cpu, cpus)
	1273	+ set_cpu_active(cpu, true);
	1274	+ goto err_cpu_maps_update;
	1275	+ }
	1276	+
	1277	+ prev_prio = pause_reduce_prio();
	1278	+
	1279	+ /*
	1280	+ * Slow path deactivation:
	1281	+ *
	1282	+ * Now that paused CPUs are most likely idle, we can go through a
	1283	+ * complete scheduler deactivation.
	1284	+ *
	1285	+ * The cpu_active_mask being already set and cpus_write_lock calling
	1286	+ * synchronize_rcu(), we know that all preempt-disabled and RCU users
	1287	+ * will observe the updated value.
	1288	+ */
	1289	+ cpus_write_lock();
	1290	+
	1291	+ __wait_drain_rq(cpus);
	1292	+
	1293	+ cpuhp_tasks_frozen = 0;
	1294	+
	1295	+ if (sched_cpus_deactivate_nosync(cpus)) {
	1296	+ err = -EBUSY;
	1297	+ goto err_cpus_write_unlock;
	1298	+ }
	1299	+
	1300	+ err = __pause_drain_rq(cpus);
	1301	+ __wait_drain_rq(cpus);
	1302	+ if (err) {
	1303	+ for_each_cpu(cpu, cpus)
	1304	+ sched_cpu_activate(cpu);
	1305	+ goto err_cpus_write_unlock;
	1306	+ }
	1307	+
	1308	+ /*
	1309	+ * Even if living on the side of the regular HP path, pause is using
	1310	+ * one of the HP step (CPUHP_AP_ACTIVE). This should be reflected on the
	1311	+ * current state of the CPU.
	1312	+ */
	1313	+ for_each_cpu(cpu, cpus) {
	1314	+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	1315	+
	1316	+ st->state = CPUHP_AP_ACTIVE - 1;
	1317	+ st->target = st->state;
	1318	+ }
	1319	+
	1320	+err_cpus_write_unlock:
	1321	+ cpus_write_unlock();
	1322	+ pause_restore_prio(prev_prio);
	1323	+err_cpu_maps_update:
	1324	+ cpu_maps_update_done();
	1325	+
	1326	+ trace_cpuhp_pause(cpus, start_time, 1);
	1327	+
	1328	+ return err;
	1329	+}
	1330	+EXPORT_SYMBOL_GPL(pause_cpus);
	1331	+
	1332	+int resume_cpus(struct cpumask *cpus)
	1333	+{
	1334	+ unsigned int cpu;
	1335	+ int err = 0;
	1336	+ u64 start_time = 0;
	1337	+ int prev_prio;
	1338	+
	1339	+ start_time = sched_clock();
	1340	+
	1341	+ cpu_maps_update_begin();
	1342	+
	1343	+ if (cpu_hotplug_disabled) {
	1344	+ err = -EBUSY;
	1345	+ goto err_cpu_maps_update;
	1346	+ }
	1347	+
	1348	+ /* Resuming an already active CPU isn't an error */
	1349	+ cpumask_andnot(cpus, cpus, cpu_active_mask);
	1350	+
	1351	+ for_each_cpu(cpu, cpus) {
	1352	+ if (!cpu_online(cpu)) {
	1353	+ err = -EBUSY;
	1354	+ goto err_cpu_maps_update;
	1355	+ }
	1356	+ }
	1357	+
	1358	+ if (cpumask_empty(cpus))
	1359	+ goto err_cpu_maps_update;
	1360	+
	1361	+ for_each_cpu(cpu, cpus)
	1362	+ set_cpu_active(cpu, true);
	1363	+
	1364	+ trace_android_rvh_resume_cpus(cpus, &err);
	1365	+ if (err)
	1366	+ goto err_cpu_maps_update;
	1367	+
	1368	+ prev_prio = pause_reduce_prio();
	1369	+
	1370	+ /* Lazy Resume. Build domains through schedule a workqueue on
	1371	+ * resuming cpu. This is so that the resuming cpu can work more
	1372	+ * early, and cannot add additional load to other busy cpu.
	1373	+ */
	1374	+ cpuset_update_active_cpus_affine(cpumask_first(cpus));
	1375	+
	1376	+ cpus_write_lock();
	1377	+
	1378	+ cpuhp_tasks_frozen = 0;
	1379	+
	1380	+ if (sched_cpus_activate(cpus)) {
	1381	+ err = -EBUSY;
	1382	+ goto err_cpus_write_unlock;
	1383	+ }
	1384	+
	1385	+ /*
	1386	+ * see pause_cpus.
	1387	+ */
	1388	+ for_each_cpu(cpu, cpus) {
	1389	+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	1390	+
	1391	+ st->state = CPUHP_ONLINE;
	1392	+ st->target = st->state;
	1393	+ }
	1394	+
	1395	+err_cpus_write_unlock:
	1396	+ cpus_write_unlock();
	1397	+ pause_restore_prio(prev_prio);
	1398	+err_cpu_maps_update:
	1399	+ cpu_maps_update_done();
	1400	+
	1401	+ trace_cpuhp_pause(cpus, start_time, 0);
	1402	+
	1403	+ return err;
	1404	+}
	1405	+EXPORT_SYMBOL_GPL(resume_cpus);
	1406	+
	1407	+void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
	1408	+{
	1409	+ unsigned int cpu;
	1410	+ int error;
	1411	+
	1412	+ cpu_maps_update_begin();
	1413	+
	1414	+ /*
	1415	+ * Make certain the cpu I'm about to reboot on is online.
	1416	+ *
	1417	+ * This is inline to what migrate_to_reboot_cpu() already do.
	1418	+ */
	1419	+ if (!cpu_online(primary_cpu))
	1420	+ primary_cpu = cpumask_first(cpu_online_mask);
	1421	+
	1422	+ for_each_online_cpu(cpu) {
	1423	+ if (cpu == primary_cpu)
	1424	+ continue;
	1425	+
	1426	+ error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
	1427	+ if (error) {
	1428	+ pr_err("Failed to offline CPU%d - error=%d",
	1429	+ cpu, error);
	1430	+ break;
	1431	+ }
	1432	+ }
	1433	+
	1434	+ /*
	1435	+ * Ensure all but the reboot CPU are offline.
	1436	+ */
	1437	+ BUG_ON(num_online_cpus() > 1);
	1438	+
	1439	+ /*
	1440	+ * Make sure the CPUs won't be enabled by someone else after this
	1441	+ * point. Kexec will reboot to a new kernel shortly resetting
	1442	+ * everything along the way.
	1443	+ */
	1444	+ cpu_hotplug_disabled++;
	1445	+
	1446	+ cpu_maps_update_done();
	1447	+}
1130	1448
1131	1449	#else
1132	1450	#define takedown_cpu NULL
..	..	@@ -1146,7 +1464,7 @@
1146	1464	int ret;
1147	1465
1148	1466	rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
1149		- st->booted_once = true;
	1467	+ cpumask_set_cpu(cpu, &cpus_booted_once_mask);
1150	1468	while (st->state < target) {
1151	1469	st->state++;
1152	1470	ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
..	..	@@ -1180,6 +1498,25 @@
1180	1498	complete_ap_thread(st, true);
1181	1499	}
1182	1500
	1501	+static int switch_to_rt_policy(void)
	1502	+{
	1503	+ struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
	1504	+ unsigned int policy = current->policy;
	1505	+
	1506	+ if (policy == SCHED_NORMAL)
	1507	+ /* Switch to SCHED_FIFO from SCHED_NORMAL. */
	1508	+ return sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
	1509	+ else
	1510	+ return 1;
	1511	+}
	1512	+
	1513	+static int switch_to_fair_policy(void)
	1514	+{
	1515	+ struct sched_param param = { .sched_priority = 0 };
	1516	+
	1517	+ return sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
	1518	+}
	1519	+
1183	1520	/* Requires cpu_add_remove_lock to be held */
1184	1521	static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1185	1522	{
..	..	@@ -1195,8 +1532,8 @@
1195	1532	}
1196	1533
1197	1534	/*
1198		- * The caller of do_cpu_up might have raced with another
1199		- * caller. Ignore it for now.
	1535	+ * The caller of cpu_up() might have raced with another
	1536	+ * caller. Nothing to do.
1200	1537	*/
1201	1538	if (st->state >= target)
1202	1539	goto out;
..	..	@@ -1241,9 +1578,10 @@
1241	1578	return ret;
1242	1579	}
1243	1580
1244		-static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
	1581	+static int cpu_up(unsigned int cpu, enum cpuhp_state target)
1245	1582	{
1246	1583	int err = 0;
	1584	+ int switch_err;
1247	1585
1248	1586	if (!cpu_possible(cpu)) {
1249	1587	pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
..	..	@@ -1254,9 +1592,23 @@
1254	1592	return -EINVAL;
1255	1593	}
1256	1594
	1595	+ trace_android_vh_cpu_up(cpu);
	1596	+
	1597	+ /*
	1598	+ * CPU hotplug operations consists of many steps and each step
	1599	+ * calls a callback of core kernel subsystem. CPU hotplug-in
	1600	+ * operation may get preempted by other CFS tasks and whole
	1601	+ * operation of cpu hotplug in CPU gets delayed. Switch the
	1602	+ * current task to SCHED_FIFO from SCHED_NORMAL, so that
	1603	+ * hotplug in operation may complete quickly in heavy loaded
	1604	+ * conditions and new CPU will start handle the workload.
	1605	+ */
	1606	+
	1607	+ switch_err = switch_to_rt_policy();
	1608	+
1257	1609	err = try_online_node(cpu_to_node(cpu));
1258	1610	if (err)
1259		- return err;
	1611	+ goto switch_out;
1260	1612
1261	1613	cpu_maps_update_begin();
1262	1614
..	..	@@ -1272,14 +1624,76 @@
1272	1624	err = _cpu_up(cpu, 0, target);
1273	1625	out:
1274	1626	cpu_maps_update_done();
	1627	+switch_out:
	1628	+ if (!switch_err) {
	1629	+ switch_err = switch_to_fair_policy();
	1630	+ if (switch_err)
	1631	+ pr_err("Hotplug policy switch err=%d Task %s pid=%d\n",
	1632	+ switch_err, current->comm, current->pid);
	1633	+ }
	1634	+
1275	1635	return err;
1276	1636	}
1277	1637
1278		-int cpu_up(unsigned int cpu)
	1638	+/**
	1639	+ * cpu_device_up - Bring up a cpu device
	1640	+ * @dev: Pointer to the cpu device to online
	1641	+ *
	1642	+ * This function is meant to be used by device core cpu subsystem only.
	1643	+ *
	1644	+ * Other subsystems should use add_cpu() instead.
	1645	+ */
	1646	+int cpu_device_up(struct device *dev)
1279	1647	{
1280		- return do_cpu_up(cpu, CPUHP_ONLINE);
	1648	+ return cpu_up(dev->id, CPUHP_ONLINE);
1281	1649	}
1282		-EXPORT_SYMBOL_GPL(cpu_up);
	1650	+
	1651	+int add_cpu(unsigned int cpu)
	1652	+{
	1653	+ int ret;
	1654	+
	1655	+ lock_device_hotplug();
	1656	+ ret = device_online(get_cpu_device(cpu));
	1657	+ unlock_device_hotplug();
	1658	+
	1659	+ return ret;
	1660	+}
	1661	+EXPORT_SYMBOL_GPL(add_cpu);
	1662	+
	1663	+/**
	1664	+ * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
	1665	+ * @sleep_cpu: The cpu we hibernated on and should be brought up.
	1666	+ *
	1667	+ * On some architectures like arm64, we can hibernate on any CPU, but on
	1668	+ * wake up the CPU we hibernated on might be offline as a side effect of
	1669	+ * using maxcpus= for example.
	1670	+ */
	1671	+int bringup_hibernate_cpu(unsigned int sleep_cpu)
	1672	+{
	1673	+ int ret;
	1674	+
	1675	+ if (!cpu_online(sleep_cpu)) {
	1676	+ pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
	1677	+ ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
	1678	+ if (ret) {
	1679	+ pr_err("Failed to bring hibernate-CPU up!\n");
	1680	+ return ret;
	1681	+ }
	1682	+ }
	1683	+ return 0;
	1684	+}
	1685	+
	1686	+void bringup_nonboot_cpus(unsigned int setup_max_cpus)
	1687	+{
	1688	+ unsigned int cpu;
	1689	+
	1690	+ for_each_present_cpu(cpu) {
	1691	+ if (num_online_cpus() >= setup_max_cpus)
	1692	+ break;
	1693	+ if (!cpu_online(cpu))
	1694	+ cpu_up(cpu, CPUHP_ONLINE);
	1695	+ }
	1696	+}
1283	1697
1284	1698	#ifdef CONFIG_PM_SLEEP_SMP
1285	1699	static cpumask_var_t frozen_cpus;
..	..	@@ -1289,8 +1703,15 @@
1289	1703	int cpu, error = 0;
1290	1704
1291	1705	cpu_maps_update_begin();
1292		- if (!cpu_online(primary))
	1706	+ if (primary == -1) {
1293	1707	primary = cpumask_first(cpu_online_mask);
	1708	+ if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
	1709	+ primary = housekeeping_any_cpu(HK_FLAG_TIMER);
	1710	+ } else {
	1711	+ if (!cpu_online(primary))
	1712	+ primary = cpumask_first(cpu_online_mask);
	1713	+ }
	1714	+
1294	1715	/*
1295	1716	* We take down all of the non-boot CPUs in one shot to avoid races
1296	1717	* with the userspace trying to use the CPU hotplug at the same time
..	..	@@ -1301,6 +1722,13 @@
1301	1722	for_each_online_cpu(cpu) {
1302	1723	if (cpu == primary)
1303	1724	continue;
	1725	+
	1726	+ if (pm_wakeup_pending()) {
	1727	+ pr_info("Wakeup pending. Abort CPU freeze\n");
	1728	+ error = -EBUSY;
	1729	+ break;
	1730	+ }
	1731	+
1304	1732	trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1305	1733	error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1306	1734	trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
..	..	@@ -1319,8 +1747,8 @@
1319	1747
1320	1748	/*
1321	1749	* Make sure the CPUs won't be enabled by someone else. We need to do
1322		- * this even in case of failure as all disable_nonboot_cpus() users are
1323		- * supposed to do enable_nonboot_cpus() on the failure path.
	1750	+ * this even in case of failure as all freeze_secondary_cpus() users are
	1751	+ * supposed to do thaw_secondary_cpus() on the failure path.
1324	1752	*/
1325	1753	cpu_hotplug_disabled++;
1326	1754
..	..	@@ -1328,15 +1756,15 @@
1328	1756	return error;
1329	1757	}
1330	1758
1331		-void __weak arch_enable_nonboot_cpus_begin(void)
	1759	+void __weak arch_thaw_secondary_cpus_begin(void)
1332	1760	{
1333	1761	}
1334	1762
1335		-void __weak arch_enable_nonboot_cpus_end(void)
	1763	+void __weak arch_thaw_secondary_cpus_end(void)
1336	1764	{
1337	1765	}
1338	1766
1339		-void enable_nonboot_cpus(void)
	1767	+void thaw_secondary_cpus(void)
1340	1768	{
1341	1769	int cpu, error;
1342	1770	struct device *cpu_device;
..	..	@@ -1349,7 +1777,7 @@
1349	1777
1350	1778	pr_info("Enabling non-boot CPUs ...\n");
1351	1779
1352		- arch_enable_nonboot_cpus_begin();
	1780	+ arch_thaw_secondary_cpus_begin();
1353	1781
1354	1782	for_each_cpu(cpu, frozen_cpus) {
1355	1783	trace_suspend_resume(TPS("CPU_ON"), cpu, true);
..	..	@@ -1368,7 +1796,7 @@
1368	1796	pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1369	1797	}
1370	1798
1371		- arch_enable_nonboot_cpus_end();
	1799	+ arch_thaw_secondary_cpus_end();
1372	1800
1373	1801	cpumask_clear(frozen_cpus);
1374	1802	out:
..	..	@@ -1434,6 +1862,22 @@
1434	1862
1435	1863	int __boot_cpu_id;
1436	1864
	1865	+/* Horrific hacks because we can't add more to cpuhp_hp_states. */
	1866	+static int random_and_perf_prepare_fusion(unsigned int cpu)
	1867	+{
	1868	+#ifdef CONFIG_PERF_EVENTS
	1869	+ perf_event_init_cpu(cpu);
	1870	+#endif
	1871	+ random_prepare_cpu(cpu);
	1872	+ return 0;
	1873	+}
	1874	+static int random_and_workqueue_online_fusion(unsigned int cpu)
	1875	+{
	1876	+ workqueue_online_cpu(cpu);
	1877	+ random_online_cpu(cpu);
	1878	+ return 0;
	1879	+}
	1880	+
1437	1881	#endif /* CONFIG_SMP */
1438	1882
1439	1883	/* Boot processor state steps */
..	..	@@ -1452,7 +1896,7 @@
1452	1896	},
1453	1897	[CPUHP_PERF_PREPARE] = {
1454	1898	.name = "perf:prepare",
1455		- .startup.single = perf_event_init_cpu,
	1899	+ .startup.single = random_and_perf_prepare_fusion,
1456	1900	.teardown.single = perf_event_exit_cpu,
1457	1901	},
1458	1902	[CPUHP_WORKQUEUE_PREP] = {
..	..	@@ -1568,7 +2012,7 @@
1568	2012	},
1569	2013	[CPUHP_AP_WORKQUEUE_ONLINE] = {
1570	2014	.name = "workqueue:online",
1571		- .startup.single = workqueue_online_cpu,
	2015	+ .startup.single = random_and_workqueue_online_fusion,
1572	2016	.teardown.single = workqueue_offline_cpu,
1573	2017	},
1574	2018	[CPUHP_AP_RCUTREE_ONLINE] = {
..	..	@@ -1979,6 +2423,78 @@
1979	2423	}
1980	2424	EXPORT_SYMBOL(__cpuhp_remove_state);
1981	2425
	2426	+#ifdef CONFIG_HOTPLUG_SMT
	2427	+static void cpuhp_offline_cpu_device(unsigned int cpu)
	2428	+{
	2429	+ struct device *dev = get_cpu_device(cpu);
	2430	+
	2431	+ dev->offline = true;
	2432	+ /* Tell user space about the state change */
	2433	+ kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
	2434	+}
	2435	+
	2436	+static void cpuhp_online_cpu_device(unsigned int cpu)
	2437	+{
	2438	+ struct device *dev = get_cpu_device(cpu);
	2439	+
	2440	+ dev->offline = false;
	2441	+ /* Tell user space about the state change */
	2442	+ kobject_uevent(&dev->kobj, KOBJ_ONLINE);
	2443	+}
	2444	+
	2445	+int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
	2446	+{
	2447	+ int cpu, ret = 0;
	2448	+
	2449	+ cpu_maps_update_begin();
	2450	+ for_each_online_cpu(cpu) {
	2451	+ if (topology_is_primary_thread(cpu))
	2452	+ continue;
	2453	+ ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
	2454	+ if (ret)
	2455	+ break;
	2456	+ /*
	2457	+ * As this needs to hold the cpu maps lock it's impossible
	2458	+ * to call device_offline() because that ends up calling
	2459	+ * cpu_down() which takes cpu maps lock. cpu maps lock
	2460	+ * needs to be held as this might race against in kernel
	2461	+ * abusers of the hotplug machinery (thermal management).
	2462	+ *
	2463	+ * So nothing would update device:offline state. That would
	2464	+ * leave the sysfs entry stale and prevent onlining after
	2465	+ * smt control has been changed to 'off' again. This is
	2466	+ * called under the sysfs hotplug lock, so it is properly
	2467	+ * serialized against the regular offline usage.
	2468	+ */
	2469	+ cpuhp_offline_cpu_device(cpu);
	2470	+ }
	2471	+ if (!ret)
	2472	+ cpu_smt_control = ctrlval;
	2473	+ cpu_maps_update_done();
	2474	+ return ret;
	2475	+}
	2476	+
	2477	+int cpuhp_smt_enable(void)
	2478	+{
	2479	+ int cpu, ret = 0;
	2480	+
	2481	+ cpu_maps_update_begin();
	2482	+ cpu_smt_control = CPU_SMT_ENABLED;
	2483	+ for_each_present_cpu(cpu) {
	2484	+ /* Skip online CPUs and CPUs on offline nodes */
	2485	+ if (cpu_online(cpu) \|\| !node_online(cpu_to_node(cpu)))
	2486	+ continue;
	2487	+ ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
	2488	+ if (ret)
	2489	+ break;
	2490	+ /* See comment in cpuhp_smt_disable() */
	2491	+ cpuhp_online_cpu_device(cpu);
	2492	+ }
	2493	+ cpu_maps_update_done();
	2494	+ return ret;
	2495	+}
	2496	+#endif
	2497	+
1982	2498	#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
1983	2499	static ssize_t show_cpuhp_state(struct device *dev,
1984	2500	struct device_attribute attr, char buf)
..	..	@@ -2021,9 +2537,9 @@
2021	2537	goto out;
2022	2538
2023	2539	if (st->state < target)
2024		- ret = do_cpu_up(dev->id, target);
	2540	+ ret = cpu_up(dev->id, target);
2025	2541	else
2026		- ret = do_cpu_down(dev->id, target);
	2542	+ ret = cpu_down(dev->id, target);
2027	2543	out:
2028	2544	unlock_device_hotplug();
2029	2545	return ret ? ret : count;
..	..	@@ -2133,92 +2649,9 @@
2133	2649
2134	2650	#ifdef CONFIG_HOTPLUG_SMT
2135	2651
2136		-static const char *smt_states[] = {
2137		- [CPU_SMT_ENABLED] = "on",
2138		- [CPU_SMT_DISABLED] = "off",
2139		- [CPU_SMT_FORCE_DISABLED] = "forceoff",
2140		- [CPU_SMT_NOT_SUPPORTED] = "notsupported",
2141		-};
2142		-
2143	2652	static ssize_t
2144		-show_smt_control(struct device dev, struct device_attribute attr, char *buf)
2145		-{
2146		- return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]);
2147		-}
2148		-
2149		-static void cpuhp_offline_cpu_device(unsigned int cpu)
2150		-{
2151		- struct device *dev = get_cpu_device(cpu);
2152		-
2153		- dev->offline = true;
2154		- /* Tell user space about the state change */
2155		- kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2156		-}
2157		-
2158		-static void cpuhp_online_cpu_device(unsigned int cpu)
2159		-{
2160		- struct device *dev = get_cpu_device(cpu);
2161		-
2162		- dev->offline = false;
2163		- /* Tell user space about the state change */
2164		- kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2165		-}
2166		-
2167		-int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2168		-{
2169		- int cpu, ret = 0;
2170		-
2171		- cpu_maps_update_begin();
2172		- for_each_online_cpu(cpu) {
2173		- if (topology_is_primary_thread(cpu))
2174		- continue;
2175		- ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2176		- if (ret)
2177		- break;
2178		- /*
2179		- * As this needs to hold the cpu maps lock it's impossible
2180		- * to call device_offline() because that ends up calling
2181		- * cpu_down() which takes cpu maps lock. cpu maps lock
2182		- * needs to be held as this might race against in kernel
2183		- * abusers of the hotplug machinery (thermal management).
2184		- *
2185		- * So nothing would update device:offline state. That would
2186		- * leave the sysfs entry stale and prevent onlining after
2187		- * smt control has been changed to 'off' again. This is
2188		- * called under the sysfs hotplug lock, so it is properly
2189		- * serialized against the regular offline usage.
2190		- */
2191		- cpuhp_offline_cpu_device(cpu);
2192		- }
2193		- if (!ret)
2194		- cpu_smt_control = ctrlval;
2195		- cpu_maps_update_done();
2196		- return ret;
2197		-}
2198		-
2199		-int cpuhp_smt_enable(void)
2200		-{
2201		- int cpu, ret = 0;
2202		-
2203		- cpu_maps_update_begin();
2204		- cpu_smt_control = CPU_SMT_ENABLED;
2205		- for_each_present_cpu(cpu) {
2206		- /* Skip online CPUs and CPUs on offline nodes */
2207		- if (cpu_online(cpu) \|\| !node_online(cpu_to_node(cpu)))
2208		- continue;
2209		- ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2210		- if (ret)
2211		- break;
2212		- /* See comment in cpuhp_smt_disable() */
2213		- cpuhp_online_cpu_device(cpu);
2214		- }
2215		- cpu_maps_update_done();
2216		- return ret;
2217		-}
2218		-
2219		-static ssize_t
2220		-store_smt_control(struct device dev, struct device_attribute attr,
2221		- const char *buf, size_t count)
	2653	+__store_smt_control(struct device dev, struct device_attribute attr,
	2654	+ const char *buf, size_t count)
2222	2655	{
2223	2656	int ctrlval, ret;
2224	2657
..	..	@@ -2256,14 +2689,44 @@
2256	2689	unlock_device_hotplug();
2257	2690	return ret ? ret : count;
2258	2691	}
	2692	+
	2693	+#else /* !CONFIG_HOTPLUG_SMT */
	2694	+static ssize_t
	2695	+__store_smt_control(struct device dev, struct device_attribute attr,
	2696	+ const char *buf, size_t count)
	2697	+{
	2698	+ return -ENODEV;
	2699	+}
	2700	+#endif /* CONFIG_HOTPLUG_SMT */
	2701	+
	2702	+static const char *smt_states[] = {
	2703	+ [CPU_SMT_ENABLED] = "on",
	2704	+ [CPU_SMT_DISABLED] = "off",
	2705	+ [CPU_SMT_FORCE_DISABLED] = "forceoff",
	2706	+ [CPU_SMT_NOT_SUPPORTED] = "notsupported",
	2707	+ [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
	2708	+};
	2709	+
	2710	+static ssize_t
	2711	+show_smt_control(struct device dev, struct device_attribute attr, char *buf)
	2712	+{
	2713	+ const char *state = smt_states[cpu_smt_control];
	2714	+
	2715	+ return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
	2716	+}
	2717	+
	2718	+static ssize_t
	2719	+store_smt_control(struct device dev, struct device_attribute attr,
	2720	+ const char *buf, size_t count)
	2721	+{
	2722	+ return __store_smt_control(dev, attr, buf, count);
	2723	+}
2259	2724	static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
2260	2725
2261	2726	static ssize_t
2262	2727	show_smt_active(struct device dev, struct device_attribute attr, char *buf)
2263	2728	{
2264		- bool active = topology_max_smt_threads() > 1;
2265		-
2266		- return snprintf(buf, PAGE_SIZE - 2, "%d\n", active);
	2729	+ return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
2267	2730	}
2268	2731	static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
2269	2732
..	..	@@ -2279,21 +2742,17 @@
2279	2742	NULL
2280	2743	};
2281	2744
2282		-static int __init cpu_smt_state_init(void)
	2745	+static int __init cpu_smt_sysfs_init(void)
2283	2746	{
2284	2747	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
2285	2748	&cpuhp_smt_attr_group);
2286	2749	}
2287	2750
2288		-#else
2289		-static inline int cpu_smt_state_init(void) { return 0; }
2290		-#endif
2291		-
2292	2751	static int __init cpuhp_sysfs_init(void)
2293	2752	{
2294	2753	int cpu, ret;
2295	2754
2296		- ret = cpu_smt_state_init();
	2755	+ ret = cpu_smt_sysfs_init();
2297	2756	if (ret)
2298	2757	return ret;
2299	2758
..	..	@@ -2314,7 +2773,7 @@
2314	2773	return 0;
2315	2774	}
2316	2775	device_initcall(cpuhp_sysfs_init);
2317		-#endif
	2776	+#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
2318	2777
2319	2778	/*
2320	2779	* cpu_bit_bitmap[] is a special, "compressed" data structure that
..	..	@@ -2361,8 +2820,8 @@
2361	2820	struct cpumask __cpu_active_mask __read_mostly;
2362	2821	EXPORT_SYMBOL(__cpu_active_mask);
2363	2822
2364		-struct cpumask __cpu_isolated_mask __read_mostly;
2365		-EXPORT_SYMBOL(__cpu_isolated_mask);
	2823	+atomic_t __num_online_cpus __read_mostly;
	2824	+EXPORT_SYMBOL(__num_online_cpus);
2366	2825
2367	2826	void init_cpu_present(const struct cpumask *src)
2368	2827	{
..	..	@@ -2377,6 +2836,27 @@
2377	2836	void init_cpu_online(const struct cpumask *src)
2378	2837	{
2379	2838	cpumask_copy(&__cpu_online_mask, src);
	2839	+}
	2840	+
	2841	+void set_cpu_online(unsigned int cpu, bool online)
	2842	+{
	2843	+ /*
	2844	+ * atomic_inc/dec() is required to handle the horrid abuse of this
	2845	+ * function by the reboot and kexec code which invoke it from
	2846	+ * IPI/NMI broadcasts when shutting down CPUs. Invocation from
	2847	+ * regular CPU hotplug is properly serialized.
	2848	+ *
	2849	+ * Note, that the fact that __num_online_cpus is of type atomic_t
	2850	+ * does not protect readers which are not serialized against
	2851	+ * concurrent hotplug operations.
	2852	+ */
	2853	+ if (online) {
	2854	+ if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
	2855	+ atomic_inc(&__num_online_cpus);
	2856	+ } else {
	2857	+ if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
	2858	+ atomic_dec(&__num_online_cpus);
	2859	+ }
2380	2860	}
2381	2861
2382	2862	/*
..	..	@@ -2403,7 +2883,7 @@
2403	2883	void __init boot_cpu_hotplug_init(void)
2404	2884	{
2405	2885	#ifdef CONFIG_SMP
2406		- this_cpu_write(cpuhp_state.booted_once, true);
	2886	+ cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
2407	2887	#endif
2408	2888	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2409	2889	}