~hc/RK356X_SDK_RELEASE.git

..	..	@@ -10,6 +10,7 @@
10	10	#include <linux/notifier.h>
11	11	#include <linux/sched/signal.h>
12	12	#include <linux/sched/hotplug.h>
	13	+#include <linux/sched/isolation.h>
13	14	#include <linux/sched/task.h>
14	15	#include <linux/sched/smt.h>
15	16	#include <linux/unistd.h>
..	..	@@ -30,13 +31,21 @@
30	31	#include <linux/smpboot.h>
31	32	#include <linux/relay.h>
32	33	#include <linux/slab.h>
	34	+#include <linux/scs.h>
33	35	#include <linux/percpu-rwsem.h>
34	36	#include <linux/cpuset.h>
	37	+#include <linux/random.h>
	38	+#include <uapi/linux/sched/types.h>
35	39
36	40	#include <trace/events/power.h>
37	41	#define CREATE_TRACE_POINTS
38	42	#include <trace/events/cpuhp.h>
39	43
	44	+#undef CREATE_TRACE_POINTS
	45	+#include <trace/hooks/sched.h>
	46	+#include <trace/hooks/cpu.h>
	47	+
	48	+#include "sched/sched.h"
40	49	#include "smpboot.h"
41	50
42	51	/**
..	..	@@ -63,7 +72,6 @@
63	72	bool rollback;
64	73	bool single;
65	74	bool bringup;
66		- bool booted_once;
67	75	struct hlist_node *node;
68	76	struct hlist_node *last;
69	77	enum cpuhp_state cb_state;
..	..	@@ -76,6 +84,10 @@
76	84	static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
77	85	.fail = CPUHP_INVALID,
78	86	};
	87	+
	88	+#ifdef CONFIG_SMP
	89	+cpumask_t cpus_booted_once_mask;
	90	+#endif
79	91
80	92	#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
81	93	static struct lockdep_map cpuhp_state_up_map =
..	..	@@ -269,11 +281,13 @@
269	281	{
270	282	mutex_lock(&cpu_add_remove_lock);
271	283	}
	284	+EXPORT_SYMBOL_GPL(cpu_maps_update_begin);
272	285
273	286	void cpu_maps_update_done(void)
274	287	{
275	288	mutex_unlock(&cpu_add_remove_lock);
276	289	}
	290	+EXPORT_SYMBOL_GPL(cpu_maps_update_done);
277	291
278	292	/*
279	293	* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
..	..	@@ -327,6 +341,16 @@
327	341	percpu_rwsem_assert_held(&cpu_hotplug_lock);
328	342	}
329	343
	344	+static void lockdep_acquire_cpus_lock(void)
	345	+{
	346	+ rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
	347	+}
	348	+
	349	+static void lockdep_release_cpus_lock(void)
	350	+{
	351	+ rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
	352	+}
	353	+
330	354	/*
331	355	* Wait for currently running CPU hotplug operations to complete (if any) and
332	356	* disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
..	..	@@ -356,6 +380,17 @@
356	380	cpu_maps_update_done();
357	381	}
358	382	EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
	383	+
	384	+#else
	385	+
	386	+static void lockdep_acquire_cpus_lock(void)
	387	+{
	388	+}
	389	+
	390	+static void lockdep_release_cpus_lock(void)
	391	+{
	392	+}
	393	+
359	394	#endif /* CONFIG_HOTPLUG_CPU */
360	395
361	396	/*
..	..	@@ -369,8 +404,7 @@
369	404
370	405	void __init cpu_smt_disable(bool force)
371	406	{
372		- if (cpu_smt_control == CPU_SMT_FORCE_DISABLED \|\|
373		- cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
	407	+ if (!cpu_smt_possible())
374	408	return;
375	409
376	410	if (force) {
..	..	@@ -410,11 +444,19 @@
410	444	/*
411	445	* On x86 it's required to boot all logical CPUs at least once so
412	446	* that the init code can get a chance to set CR4.MCE on each
413		- * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
	447	+ * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
414	448	* core will shutdown the machine.
415	449	*/
416		- return !per_cpu(cpuhp_state, cpu).booted_once;
	450	+ return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
417	451	}
	452	+
	453	+/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
	454	+bool cpu_smt_possible(void)
	455	+{
	456	+ return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
	457	+ cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
	458	+}
	459	+EXPORT_SYMBOL_GPL(cpu_smt_possible);
418	460	#else
419	461	static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
420	462	#endif
..	..	@@ -501,7 +543,7 @@
501	543	/*
502	544	* SMT soft disabling on X86 requires to bring the CPU out of the
503	545	* BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
504		- * CPU marked itself as booted_once in cpu_notify_starting() so the
	546	+ * CPU marked itself as booted_once in notify_cpu_starting() so the
505	547	* cpu_smt_allowed() check will now return false if this is not the
506	548	* primary sibling.
507	549	*/
..	..	@@ -518,6 +560,12 @@
518	560	{
519	561	struct task_struct *idle = idle_thread_get(cpu);
520	562	int ret;
	563	+
	564	+ /*
	565	+ * Reset stale stack state from the last time this CPU was online.
	566	+ */
	567	+ scs_task_reset(idle);
	568	+ kasan_unpoison_task_stack(idle);
521	569
522	570	/*
523	571	* Some architectures have to walk the irq descriptors to
..	..	@@ -640,6 +688,12 @@
640	688	*/
641	689	smp_mb();
642	690
	691	+ /*
	692	+ * The BP holds the hotplug lock, but we're now running on the AP,
	693	+ * ensure that anybody asserting the lock is held, will actually find
	694	+ * it so.
	695	+ */
	696	+ lockdep_acquire_cpus_lock();
643	697	cpuhp_lock_acquire(bringup);
644	698
645	699	if (st->single) {
..	..	@@ -685,6 +739,7 @@
685	739	}
686	740
687	741	cpuhp_lock_release(bringup);
	742	+ lockdep_release_cpus_lock();
688	743
689	744	if (!st->should_run)
690	745	complete_ap_thread(st, bringup);
..	..	@@ -876,15 +931,6 @@
876	931	int err, cpu = smp_processor_id();
877	932	int ret;
878	933
879		-#ifdef CONFIG_PREEMPT_RT_BASE
880		- /*
881		- * If any tasks disabled migration before we got here,
882		- * go back and sleep again.
883		- */
884		- if (cpu_nr_pinned(cpu))
885		- return -EAGAIN;
886		-#endif
887		-
888	934	/* Ensure this CPU doesn't handle any more interrupts. */
889	935	err = __cpu_disable();
890	936	if (err < 0)
..	..	@@ -907,14 +953,12 @@
907	953
908	954	/* Give up timekeeping duties */
909	955	tick_handover_do_timer();
	956	+ /* Remove CPU from timer broadcasting */
	957	+ tick_offline_cpu(cpu);
910	958	/* Park the stopper thread */
911	959	stop_machine_park(cpu);
912	960	return 0;
913	961	}
914		-
915		-#ifdef CONFIG_PREEMPT_RT_BASE
916		-struct task_struct *takedown_cpu_task;
917		-#endif
918	962
919	963	static int takedown_cpu(unsigned int cpu)
920	964	{
..	..	@@ -930,39 +974,11 @@
930	974	*/
931	975	irq_lock_sparse();
932	976
933		-#ifdef CONFIG_PREEMPT_RT_BASE
934		- WARN_ON_ONCE(takedown_cpu_task);
935		- takedown_cpu_task = current;
936		-
937		-again:
938		- /*
939		- * If a task pins this CPU after we pass this check, take_cpu_down
940		- * will return -EAGAIN.
941		- */
942		- for (;;) {
943		- int nr_pinned;
944		-
945		- set_current_state(TASK_UNINTERRUPTIBLE);
946		- nr_pinned = cpu_nr_pinned(cpu);
947		- if (nr_pinned == 0)
948		- break;
949		- schedule();
950		- }
951		- set_current_state(TASK_RUNNING);
952		-#endif
953		-
954	977	/*
955	978	* So now all preempt/rcu users must observe !cpu_active().
956	979	*/
957	980	err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
958		-#ifdef CONFIG_PREEMPT_RT_BASE
959		- if (err == -EAGAIN)
960		- goto again;
961		-#endif
962	981	if (err) {
963		-#ifdef CONFIG_PREEMPT_RT_BASE
964		- takedown_cpu_task = NULL;
965		-#endif
966	982	/* CPU refused to die */
967	983	irq_unlock_sparse();
968	984	/* Unpark the hotplug thread so we can rollback there */
..	..	@@ -981,9 +997,6 @@
981	997	wait_for_ap_thread(st, false);
982	998	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
983	999
984		-#ifdef CONFIG_PREEMPT_RT_BASE
985		- takedown_cpu_task = NULL;
986		-#endif
987	1000	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
988	1001	irq_unlock_sparse();
989	1002
..	..	@@ -1049,7 +1062,7 @@
1049	1062	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1050	1063	int prev_state, ret = 0;
1051	1064
1052		- if (num_online_cpus() == 1)
	1065	+ if (num_active_cpus() == 1 && cpu_active(cpu))
1053	1066	return -EBUSY;
1054	1067
1055	1068	if (!cpu_present(cpu))
..	..	@@ -1112,7 +1125,7 @@
1112	1125	return _cpu_down(cpu, 0, target);
1113	1126	}
1114	1127
1115		-static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
	1128	+static int cpu_down(unsigned int cpu, enum cpuhp_state target)
1116	1129	{
1117	1130	int err;
1118	1131
..	..	@@ -1122,11 +1135,315 @@
1122	1135	return err;
1123	1136	}
1124	1137
1125		-int cpu_down(unsigned int cpu)
	1138	+/**
	1139	+ * cpu_device_down - Bring down a cpu device
	1140	+ * @dev: Pointer to the cpu device to offline
	1141	+ *
	1142	+ * This function is meant to be used by device core cpu subsystem only.
	1143	+ *
	1144	+ * Other subsystems should use remove_cpu() instead.
	1145	+ */
	1146	+int cpu_device_down(struct device *dev)
1126	1147	{
1127		- return do_cpu_down(cpu, CPUHP_OFFLINE);
	1148	+ return cpu_down(dev->id, CPUHP_OFFLINE);
1128	1149	}
1129		-EXPORT_SYMBOL(cpu_down);
	1150	+
	1151	+int remove_cpu(unsigned int cpu)
	1152	+{
	1153	+ int ret;
	1154	+
	1155	+ lock_device_hotplug();
	1156	+ ret = device_offline(get_cpu_device(cpu));
	1157	+ unlock_device_hotplug();
	1158	+
	1159	+ return ret;
	1160	+}
	1161	+EXPORT_SYMBOL_GPL(remove_cpu);
	1162	+
	1163	+int __pause_drain_rq(struct cpumask *cpus)
	1164	+{
	1165	+ unsigned int cpu;
	1166	+ int err = 0;
	1167	+
	1168	+ /*
	1169	+ * Disabling preemption avoids that one of the stopper, started from
	1170	+ * sched_cpu_drain_rq(), blocks firing draining for the whole cpumask.
	1171	+ */
	1172	+ preempt_disable();
	1173	+ for_each_cpu(cpu, cpus) {
	1174	+ err = sched_cpu_drain_rq(cpu);
	1175	+ if (err)
	1176	+ break;
	1177	+ }
	1178	+ preempt_enable();
	1179	+
	1180	+ return err;
	1181	+}
	1182	+
	1183	+void __wait_drain_rq(struct cpumask *cpus)
	1184	+{
	1185	+ unsigned int cpu;
	1186	+
	1187	+ for_each_cpu(cpu, cpus)
	1188	+ sched_cpu_drain_rq_wait(cpu);
	1189	+}
	1190	+
	1191	+/* if rt task, set to cfs and return previous prio */
	1192	+static int pause_reduce_prio(void)
	1193	+{
	1194	+ int prev_prio = -1;
	1195	+
	1196	+ if (current->prio < MAX_RT_PRIO) {
	1197	+ struct sched_param param = { .sched_priority = 0 };
	1198	+
	1199	+ prev_prio = current->prio;
	1200	+ sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
	1201	+ }
	1202	+
	1203	+ return prev_prio;
	1204	+}
	1205	+
	1206	+/* if previous prio was set, restore */
	1207	+static void pause_restore_prio(int prev_prio)
	1208	+{
	1209	+ if (prev_prio >= 0 && prev_prio < MAX_RT_PRIO) {
	1210	+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1-prev_prio };
	1211	+
	1212	+ sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
	1213	+ }
	1214	+}
	1215	+
	1216	+int pause_cpus(struct cpumask *cpus)
	1217	+{
	1218	+ int err = 0;
	1219	+ int cpu;
	1220	+ u64 start_time = 0;
	1221	+ int prev_prio;
	1222	+
	1223	+ start_time = sched_clock();
	1224	+
	1225	+ cpu_maps_update_begin();
	1226	+
	1227	+ if (cpu_hotplug_disabled) {
	1228	+ err = -EBUSY;
	1229	+ goto err_cpu_maps_update;
	1230	+ }
	1231	+
	1232	+ /* Pausing an already inactive CPU isn't an error */
	1233	+ cpumask_and(cpus, cpus, cpu_active_mask);
	1234	+
	1235	+ for_each_cpu(cpu, cpus) {
	1236	+ if (!cpu_online(cpu) \|\| dl_bw_check_overflow(cpu) \|\|
	1237	+ get_cpu_device(cpu)->offline_disabled == true) {
	1238	+ err = -EBUSY;
	1239	+ goto err_cpu_maps_update;
	1240	+ }
	1241	+ }
	1242	+
	1243	+ if (cpumask_weight(cpus) >= num_active_cpus()) {
	1244	+ err = -EBUSY;
	1245	+ goto err_cpu_maps_update;
	1246	+ }
	1247	+
	1248	+ if (cpumask_empty(cpus))
	1249	+ goto err_cpu_maps_update;
	1250	+
	1251	+ /*
	1252	+ * Lazy migration:
	1253	+ *
	1254	+ * We do care about how fast a CPU can go idle and stay this in this
	1255	+ * state. If we try to take the cpus_write_lock() here, we would have
	1256	+ * to wait for a few dozens of ms, as this function might schedule.
	1257	+ * However, we can, as a first step, flip the active mask and migrate
	1258	+ * anything currently on the run-queue, to give a chance to the paused
	1259	+ * CPUs to reach quickly an idle state. There's a risk meanwhile for
	1260	+ * another CPU to observe an out-of-date active_mask or to incompletely
	1261	+ * update a cpuset. Both problems would be resolved later in the slow
	1262	+ * path, which ensures active_mask synchronization, triggers a cpuset
	1263	+ * rebuild and migrate any task that would have escaped the lazy
	1264	+ * migration.
	1265	+ */
	1266	+ for_each_cpu(cpu, cpus)
	1267	+ set_cpu_active(cpu, false);
	1268	+ err = __pause_drain_rq(cpus);
	1269	+ if (err) {
	1270	+ __wait_drain_rq(cpus);
	1271	+ for_each_cpu(cpu, cpus)
	1272	+ set_cpu_active(cpu, true);
	1273	+ goto err_cpu_maps_update;
	1274	+ }
	1275	+
	1276	+ prev_prio = pause_reduce_prio();
	1277	+
	1278	+ /*
	1279	+ * Slow path deactivation:
	1280	+ *
	1281	+ * Now that paused CPUs are most likely idle, we can go through a
	1282	+ * complete scheduler deactivation.
	1283	+ *
	1284	+ * The cpu_active_mask being already set and cpus_write_lock calling
	1285	+ * synchronize_rcu(), we know that all preempt-disabled and RCU users
	1286	+ * will observe the updated value.
	1287	+ */
	1288	+ cpus_write_lock();
	1289	+
	1290	+ __wait_drain_rq(cpus);
	1291	+
	1292	+ cpuhp_tasks_frozen = 0;
	1293	+
	1294	+ if (sched_cpus_deactivate_nosync(cpus)) {
	1295	+ err = -EBUSY;
	1296	+ goto err_cpus_write_unlock;
	1297	+ }
	1298	+
	1299	+ err = __pause_drain_rq(cpus);
	1300	+ __wait_drain_rq(cpus);
	1301	+ if (err) {
	1302	+ for_each_cpu(cpu, cpus)
	1303	+ sched_cpu_activate(cpu);
	1304	+ goto err_cpus_write_unlock;
	1305	+ }
	1306	+
	1307	+ /*
	1308	+ * Even if living on the side of the regular HP path, pause is using
	1309	+ * one of the HP step (CPUHP_AP_ACTIVE). This should be reflected on the
	1310	+ * current state of the CPU.
	1311	+ */
	1312	+ for_each_cpu(cpu, cpus) {
	1313	+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	1314	+
	1315	+ st->state = CPUHP_AP_ACTIVE - 1;
	1316	+ st->target = st->state;
	1317	+ }
	1318	+
	1319	+err_cpus_write_unlock:
	1320	+ cpus_write_unlock();
	1321	+ pause_restore_prio(prev_prio);
	1322	+err_cpu_maps_update:
	1323	+ cpu_maps_update_done();
	1324	+
	1325	+ trace_cpuhp_pause(cpus, start_time, 1);
	1326	+
	1327	+ return err;
	1328	+}
	1329	+EXPORT_SYMBOL_GPL(pause_cpus);
	1330	+
	1331	+int resume_cpus(struct cpumask *cpus)
	1332	+{
	1333	+ unsigned int cpu;
	1334	+ int err = 0;
	1335	+ u64 start_time = 0;
	1336	+ int prev_prio;
	1337	+
	1338	+ start_time = sched_clock();
	1339	+
	1340	+ cpu_maps_update_begin();
	1341	+
	1342	+ if (cpu_hotplug_disabled) {
	1343	+ err = -EBUSY;
	1344	+ goto err_cpu_maps_update;
	1345	+ }
	1346	+
	1347	+ /* Resuming an already active CPU isn't an error */
	1348	+ cpumask_andnot(cpus, cpus, cpu_active_mask);
	1349	+
	1350	+ for_each_cpu(cpu, cpus) {
	1351	+ if (!cpu_online(cpu)) {
	1352	+ err = -EBUSY;
	1353	+ goto err_cpu_maps_update;
	1354	+ }
	1355	+ }
	1356	+
	1357	+ if (cpumask_empty(cpus))
	1358	+ goto err_cpu_maps_update;
	1359	+
	1360	+ for_each_cpu(cpu, cpus)
	1361	+ set_cpu_active(cpu, true);
	1362	+
	1363	+ trace_android_rvh_resume_cpus(cpus, &err);
	1364	+ if (err)
	1365	+ goto err_cpu_maps_update;
	1366	+
	1367	+ prev_prio = pause_reduce_prio();
	1368	+
	1369	+ /* Lazy Resume. Build domains through schedule a workqueue on
	1370	+ * resuming cpu. This is so that the resuming cpu can work more
	1371	+ * early, and cannot add additional load to other busy cpu.
	1372	+ */
	1373	+ cpuset_update_active_cpus_affine(cpumask_first(cpus));
	1374	+
	1375	+ cpus_write_lock();
	1376	+
	1377	+ cpuhp_tasks_frozen = 0;
	1378	+
	1379	+ if (sched_cpus_activate(cpus)) {
	1380	+ err = -EBUSY;
	1381	+ goto err_cpus_write_unlock;
	1382	+ }
	1383	+
	1384	+ /*
	1385	+ * see pause_cpus.
	1386	+ */
	1387	+ for_each_cpu(cpu, cpus) {
	1388	+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	1389	+
	1390	+ st->state = CPUHP_ONLINE;
	1391	+ st->target = st->state;
	1392	+ }
	1393	+
	1394	+err_cpus_write_unlock:
	1395	+ cpus_write_unlock();
	1396	+ pause_restore_prio(prev_prio);
	1397	+err_cpu_maps_update:
	1398	+ cpu_maps_update_done();
	1399	+
	1400	+ trace_cpuhp_pause(cpus, start_time, 0);
	1401	+
	1402	+ return err;
	1403	+}
	1404	+EXPORT_SYMBOL_GPL(resume_cpus);
	1405	+
	1406	+void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
	1407	+{
	1408	+ unsigned int cpu;
	1409	+ int error;
	1410	+
	1411	+ cpu_maps_update_begin();
	1412	+
	1413	+ /*
	1414	+ * Make certain the cpu I'm about to reboot on is online.
	1415	+ *
	1416	+ * This is inline to what migrate_to_reboot_cpu() already do.
	1417	+ */
	1418	+ if (!cpu_online(primary_cpu))
	1419	+ primary_cpu = cpumask_first(cpu_online_mask);
	1420	+
	1421	+ for_each_online_cpu(cpu) {
	1422	+ if (cpu == primary_cpu)
	1423	+ continue;
	1424	+
	1425	+ error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
	1426	+ if (error) {
	1427	+ pr_err("Failed to offline CPU%d - error=%d",
	1428	+ cpu, error);
	1429	+ break;
	1430	+ }
	1431	+ }
	1432	+
	1433	+ /*
	1434	+ * Ensure all but the reboot CPU are offline.
	1435	+ */
	1436	+ BUG_ON(num_online_cpus() > 1);
	1437	+
	1438	+ /*
	1439	+ * Make sure the CPUs won't be enabled by someone else after this
	1440	+ * point. Kexec will reboot to a new kernel shortly resetting
	1441	+ * everything along the way.
	1442	+ */
	1443	+ cpu_hotplug_disabled++;
	1444	+
	1445	+ cpu_maps_update_done();
	1446	+}
1130	1447
1131	1448	#else
1132	1449	#define takedown_cpu NULL
..	..	@@ -1146,7 +1463,7 @@
1146	1463	int ret;
1147	1464
1148	1465	rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
1149		- st->booted_once = true;
	1466	+ cpumask_set_cpu(cpu, &cpus_booted_once_mask);
1150	1467	while (st->state < target) {
1151	1468	st->state++;
1152	1469	ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
..	..	@@ -1180,6 +1497,25 @@
1180	1497	complete_ap_thread(st, true);
1181	1498	}
1182	1499
	1500	+static int switch_to_rt_policy(void)
	1501	+{
	1502	+ struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
	1503	+ unsigned int policy = current->policy;
	1504	+
	1505	+ if (policy == SCHED_NORMAL)
	1506	+ /* Switch to SCHED_FIFO from SCHED_NORMAL. */
	1507	+ return sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
	1508	+ else
	1509	+ return 1;
	1510	+}
	1511	+
	1512	+static int switch_to_fair_policy(void)
	1513	+{
	1514	+ struct sched_param param = { .sched_priority = 0 };
	1515	+
	1516	+ return sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
	1517	+}
	1518	+
1183	1519	/* Requires cpu_add_remove_lock to be held */
1184	1520	static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1185	1521	{
..	..	@@ -1195,8 +1531,8 @@
1195	1531	}
1196	1532
1197	1533	/*
1198		- * The caller of do_cpu_up might have raced with another
1199		- * caller. Ignore it for now.
	1534	+ * The caller of cpu_up() might have raced with another
	1535	+ * caller. Nothing to do.
1200	1536	*/
1201	1537	if (st->state >= target)
1202	1538	goto out;
..	..	@@ -1241,9 +1577,10 @@
1241	1577	return ret;
1242	1578	}
1243	1579
1244		-static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
	1580	+static int cpu_up(unsigned int cpu, enum cpuhp_state target)
1245	1581	{
1246	1582	int err = 0;
	1583	+ int switch_err;
1247	1584
1248	1585	if (!cpu_possible(cpu)) {
1249	1586	pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
..	..	@@ -1254,9 +1591,23 @@
1254	1591	return -EINVAL;
1255	1592	}
1256	1593
	1594	+ trace_android_vh_cpu_up(cpu);
	1595	+
	1596	+ /*
	1597	+ * CPU hotplug operations consists of many steps and each step
	1598	+ * calls a callback of core kernel subsystem. CPU hotplug-in
	1599	+ * operation may get preempted by other CFS tasks and whole
	1600	+ * operation of cpu hotplug in CPU gets delayed. Switch the
	1601	+ * current task to SCHED_FIFO from SCHED_NORMAL, so that
	1602	+ * hotplug in operation may complete quickly in heavy loaded
	1603	+ * conditions and new CPU will start handle the workload.
	1604	+ */
	1605	+
	1606	+ switch_err = switch_to_rt_policy();
	1607	+
1257	1608	err = try_online_node(cpu_to_node(cpu));
1258	1609	if (err)
1259		- return err;
	1610	+ goto switch_out;
1260	1611
1261	1612	cpu_maps_update_begin();
1262	1613
..	..	@@ -1272,14 +1623,76 @@
1272	1623	err = _cpu_up(cpu, 0, target);
1273	1624	out:
1274	1625	cpu_maps_update_done();
	1626	+switch_out:
	1627	+ if (!switch_err) {
	1628	+ switch_err = switch_to_fair_policy();
	1629	+ if (switch_err)
	1630	+ pr_err("Hotplug policy switch err=%d Task %s pid=%d\n",
	1631	+ switch_err, current->comm, current->pid);
	1632	+ }
	1633	+
1275	1634	return err;
1276	1635	}
1277	1636
1278		-int cpu_up(unsigned int cpu)
	1637	+/**
	1638	+ * cpu_device_up - Bring up a cpu device
	1639	+ * @dev: Pointer to the cpu device to online
	1640	+ *
	1641	+ * This function is meant to be used by device core cpu subsystem only.
	1642	+ *
	1643	+ * Other subsystems should use add_cpu() instead.
	1644	+ */
	1645	+int cpu_device_up(struct device *dev)
1279	1646	{
1280		- return do_cpu_up(cpu, CPUHP_ONLINE);
	1647	+ return cpu_up(dev->id, CPUHP_ONLINE);
1281	1648	}
1282		-EXPORT_SYMBOL_GPL(cpu_up);
	1649	+
	1650	+int add_cpu(unsigned int cpu)
	1651	+{
	1652	+ int ret;
	1653	+
	1654	+ lock_device_hotplug();
	1655	+ ret = device_online(get_cpu_device(cpu));
	1656	+ unlock_device_hotplug();
	1657	+
	1658	+ return ret;
	1659	+}
	1660	+EXPORT_SYMBOL_GPL(add_cpu);
	1661	+
	1662	+/**
	1663	+ * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
	1664	+ * @sleep_cpu: The cpu we hibernated on and should be brought up.
	1665	+ *
	1666	+ * On some architectures like arm64, we can hibernate on any CPU, but on
	1667	+ * wake up the CPU we hibernated on might be offline as a side effect of
	1668	+ * using maxcpus= for example.
	1669	+ */
	1670	+int bringup_hibernate_cpu(unsigned int sleep_cpu)
	1671	+{
	1672	+ int ret;
	1673	+
	1674	+ if (!cpu_online(sleep_cpu)) {
	1675	+ pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
	1676	+ ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
	1677	+ if (ret) {
	1678	+ pr_err("Failed to bring hibernate-CPU up!\n");
	1679	+ return ret;
	1680	+ }
	1681	+ }
	1682	+ return 0;
	1683	+}
	1684	+
	1685	+void bringup_nonboot_cpus(unsigned int setup_max_cpus)
	1686	+{
	1687	+ unsigned int cpu;
	1688	+
	1689	+ for_each_present_cpu(cpu) {
	1690	+ if (num_online_cpus() >= setup_max_cpus)
	1691	+ break;
	1692	+ if (!cpu_online(cpu))
	1693	+ cpu_up(cpu, CPUHP_ONLINE);
	1694	+ }
	1695	+}
1283	1696
1284	1697	#ifdef CONFIG_PM_SLEEP_SMP
1285	1698	static cpumask_var_t frozen_cpus;
..	..	@@ -1289,8 +1702,15 @@
1289	1702	int cpu, error = 0;
1290	1703
1291	1704	cpu_maps_update_begin();
1292		- if (!cpu_online(primary))
	1705	+ if (primary == -1) {
1293	1706	primary = cpumask_first(cpu_online_mask);
	1707	+ if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
	1708	+ primary = housekeeping_any_cpu(HK_FLAG_TIMER);
	1709	+ } else {
	1710	+ if (!cpu_online(primary))
	1711	+ primary = cpumask_first(cpu_online_mask);
	1712	+ }
	1713	+
1294	1714	/*
1295	1715	* We take down all of the non-boot CPUs in one shot to avoid races
1296	1716	* with the userspace trying to use the CPU hotplug at the same time
..	..	@@ -1301,6 +1721,13 @@
1301	1721	for_each_online_cpu(cpu) {
1302	1722	if (cpu == primary)
1303	1723	continue;
	1724	+
	1725	+ if (pm_wakeup_pending()) {
	1726	+ pr_info("Wakeup pending. Abort CPU freeze\n");
	1727	+ error = -EBUSY;
	1728	+ break;
	1729	+ }
	1730	+
1304	1731	trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1305	1732	error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1306	1733	trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
..	..	@@ -1319,8 +1746,8 @@
1319	1746
1320	1747	/*
1321	1748	* Make sure the CPUs won't be enabled by someone else. We need to do
1322		- * this even in case of failure as all disable_nonboot_cpus() users are
1323		- * supposed to do enable_nonboot_cpus() on the failure path.
	1749	+ * this even in case of failure as all freeze_secondary_cpus() users are
	1750	+ * supposed to do thaw_secondary_cpus() on the failure path.
1324	1751	*/
1325	1752	cpu_hotplug_disabled++;
1326	1753
..	..	@@ -1328,15 +1755,15 @@
1328	1755	return error;
1329	1756	}
1330	1757
1331		-void __weak arch_enable_nonboot_cpus_begin(void)
	1758	+void __weak arch_thaw_secondary_cpus_begin(void)
1332	1759	{
1333	1760	}
1334	1761
1335		-void __weak arch_enable_nonboot_cpus_end(void)
	1762	+void __weak arch_thaw_secondary_cpus_end(void)
1336	1763	{
1337	1764	}
1338	1765
1339		-void enable_nonboot_cpus(void)
	1766	+void thaw_secondary_cpus(void)
1340	1767	{
1341	1768	int cpu, error;
1342	1769	struct device *cpu_device;
..	..	@@ -1349,7 +1776,7 @@
1349	1776
1350	1777	pr_info("Enabling non-boot CPUs ...\n");
1351	1778
1352		- arch_enable_nonboot_cpus_begin();
	1779	+ arch_thaw_secondary_cpus_begin();
1353	1780
1354	1781	for_each_cpu(cpu, frozen_cpus) {
1355	1782	trace_suspend_resume(TPS("CPU_ON"), cpu, true);
..	..	@@ -1368,7 +1795,7 @@
1368	1795	pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1369	1796	}
1370	1797
1371		- arch_enable_nonboot_cpus_end();
	1798	+ arch_thaw_secondary_cpus_end();
1372	1799
1373	1800	cpumask_clear(frozen_cpus);
1374	1801	out:
..	..	@@ -1434,6 +1861,22 @@
1434	1861
1435	1862	int __boot_cpu_id;
1436	1863
	1864	+/* Horrific hacks because we can't add more to cpuhp_hp_states. */
	1865	+static int random_and_perf_prepare_fusion(unsigned int cpu)
	1866	+{
	1867	+#ifdef CONFIG_PERF_EVENTS
	1868	+ perf_event_init_cpu(cpu);
	1869	+#endif
	1870	+ random_prepare_cpu(cpu);
	1871	+ return 0;
	1872	+}
	1873	+static int random_and_workqueue_online_fusion(unsigned int cpu)
	1874	+{
	1875	+ workqueue_online_cpu(cpu);
	1876	+ random_online_cpu(cpu);
	1877	+ return 0;
	1878	+}
	1879	+
1437	1880	#endif /* CONFIG_SMP */
1438	1881
1439	1882	/* Boot processor state steps */
..	..	@@ -1452,7 +1895,7 @@
1452	1895	},
1453	1896	[CPUHP_PERF_PREPARE] = {
1454	1897	.name = "perf:prepare",
1455		- .startup.single = perf_event_init_cpu,
	1898	+ .startup.single = random_and_perf_prepare_fusion,
1456	1899	.teardown.single = perf_event_exit_cpu,
1457	1900	},
1458	1901	[CPUHP_WORKQUEUE_PREP] = {
..	..	@@ -1568,7 +2011,7 @@
1568	2011	},
1569	2012	[CPUHP_AP_WORKQUEUE_ONLINE] = {
1570	2013	.name = "workqueue:online",
1571		- .startup.single = workqueue_online_cpu,
	2014	+ .startup.single = random_and_workqueue_online_fusion,
1572	2015	.teardown.single = workqueue_offline_cpu,
1573	2016	},
1574	2017	[CPUHP_AP_RCUTREE_ONLINE] = {
..	..	@@ -1979,6 +2422,78 @@
1979	2422	}
1980	2423	EXPORT_SYMBOL(__cpuhp_remove_state);
1981	2424
	2425	+#ifdef CONFIG_HOTPLUG_SMT
	2426	+static void cpuhp_offline_cpu_device(unsigned int cpu)
	2427	+{
	2428	+ struct device *dev = get_cpu_device(cpu);
	2429	+
	2430	+ dev->offline = true;
	2431	+ /* Tell user space about the state change */
	2432	+ kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
	2433	+}
	2434	+
	2435	+static void cpuhp_online_cpu_device(unsigned int cpu)
	2436	+{
	2437	+ struct device *dev = get_cpu_device(cpu);
	2438	+
	2439	+ dev->offline = false;
	2440	+ /* Tell user space about the state change */
	2441	+ kobject_uevent(&dev->kobj, KOBJ_ONLINE);
	2442	+}
	2443	+
	2444	+int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
	2445	+{
	2446	+ int cpu, ret = 0;
	2447	+
	2448	+ cpu_maps_update_begin();
	2449	+ for_each_online_cpu(cpu) {
	2450	+ if (topology_is_primary_thread(cpu))
	2451	+ continue;
	2452	+ ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
	2453	+ if (ret)
	2454	+ break;
	2455	+ /*
	2456	+ * As this needs to hold the cpu maps lock it's impossible
	2457	+ * to call device_offline() because that ends up calling
	2458	+ * cpu_down() which takes cpu maps lock. cpu maps lock
	2459	+ * needs to be held as this might race against in kernel
	2460	+ * abusers of the hotplug machinery (thermal management).
	2461	+ *
	2462	+ * So nothing would update device:offline state. That would
	2463	+ * leave the sysfs entry stale and prevent onlining after
	2464	+ * smt control has been changed to 'off' again. This is
	2465	+ * called under the sysfs hotplug lock, so it is properly
	2466	+ * serialized against the regular offline usage.
	2467	+ */
	2468	+ cpuhp_offline_cpu_device(cpu);
	2469	+ }
	2470	+ if (!ret)
	2471	+ cpu_smt_control = ctrlval;
	2472	+ cpu_maps_update_done();
	2473	+ return ret;
	2474	+}
	2475	+
	2476	+int cpuhp_smt_enable(void)
	2477	+{
	2478	+ int cpu, ret = 0;
	2479	+
	2480	+ cpu_maps_update_begin();
	2481	+ cpu_smt_control = CPU_SMT_ENABLED;
	2482	+ for_each_present_cpu(cpu) {
	2483	+ /* Skip online CPUs and CPUs on offline nodes */
	2484	+ if (cpu_online(cpu) \|\| !node_online(cpu_to_node(cpu)))
	2485	+ continue;
	2486	+ ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
	2487	+ if (ret)
	2488	+ break;
	2489	+ /* See comment in cpuhp_smt_disable() */
	2490	+ cpuhp_online_cpu_device(cpu);
	2491	+ }
	2492	+ cpu_maps_update_done();
	2493	+ return ret;
	2494	+}
	2495	+#endif
	2496	+
1982	2497	#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
1983	2498	static ssize_t show_cpuhp_state(struct device *dev,
1984	2499	struct device_attribute attr, char buf)
..	..	@@ -2021,9 +2536,11 @@
2021	2536	goto out;
2022	2537
2023	2538	if (st->state < target)
2024		- ret = do_cpu_up(dev->id, target);
2025		- else
2026		- ret = do_cpu_down(dev->id, target);
	2539	+ ret = cpu_up(dev->id, target);
	2540	+ else if (st->state > target)
	2541	+ ret = cpu_down(dev->id, target);
	2542	+ else if (WARN_ON(st->target != target))
	2543	+ st->target = target;
2027	2544	out:
2028	2545	unlock_device_hotplug();
2029	2546	return ret ? ret : count;
..	..	@@ -2133,92 +2650,9 @@
2133	2650
2134	2651	#ifdef CONFIG_HOTPLUG_SMT
2135	2652
2136		-static const char *smt_states[] = {
2137		- [CPU_SMT_ENABLED] = "on",
2138		- [CPU_SMT_DISABLED] = "off",
2139		- [CPU_SMT_FORCE_DISABLED] = "forceoff",
2140		- [CPU_SMT_NOT_SUPPORTED] = "notsupported",
2141		-};
2142		-
2143	2653	static ssize_t
2144		-show_smt_control(struct device dev, struct device_attribute attr, char *buf)
2145		-{
2146		- return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]);
2147		-}
2148		-
2149		-static void cpuhp_offline_cpu_device(unsigned int cpu)
2150		-{
2151		- struct device *dev = get_cpu_device(cpu);
2152		-
2153		- dev->offline = true;
2154		- /* Tell user space about the state change */
2155		- kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2156		-}
2157		-
2158		-static void cpuhp_online_cpu_device(unsigned int cpu)
2159		-{
2160		- struct device *dev = get_cpu_device(cpu);
2161		-
2162		- dev->offline = false;
2163		- /* Tell user space about the state change */
2164		- kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2165		-}
2166		-
2167		-int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2168		-{
2169		- int cpu, ret = 0;
2170		-
2171		- cpu_maps_update_begin();
2172		- for_each_online_cpu(cpu) {
2173		- if (topology_is_primary_thread(cpu))
2174		- continue;
2175		- ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2176		- if (ret)
2177		- break;
2178		- /*
2179		- * As this needs to hold the cpu maps lock it's impossible
2180		- * to call device_offline() because that ends up calling
2181		- * cpu_down() which takes cpu maps lock. cpu maps lock
2182		- * needs to be held as this might race against in kernel
2183		- * abusers of the hotplug machinery (thermal management).
2184		- *
2185		- * So nothing would update device:offline state. That would
2186		- * leave the sysfs entry stale and prevent onlining after
2187		- * smt control has been changed to 'off' again. This is
2188		- * called under the sysfs hotplug lock, so it is properly
2189		- * serialized against the regular offline usage.
2190		- */
2191		- cpuhp_offline_cpu_device(cpu);
2192		- }
2193		- if (!ret)
2194		- cpu_smt_control = ctrlval;
2195		- cpu_maps_update_done();
2196		- return ret;
2197		-}
2198		-
2199		-int cpuhp_smt_enable(void)
2200		-{
2201		- int cpu, ret = 0;
2202		-
2203		- cpu_maps_update_begin();
2204		- cpu_smt_control = CPU_SMT_ENABLED;
2205		- for_each_present_cpu(cpu) {
2206		- /* Skip online CPUs and CPUs on offline nodes */
2207		- if (cpu_online(cpu) \|\| !node_online(cpu_to_node(cpu)))
2208		- continue;
2209		- ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2210		- if (ret)
2211		- break;
2212		- /* See comment in cpuhp_smt_disable() */
2213		- cpuhp_online_cpu_device(cpu);
2214		- }
2215		- cpu_maps_update_done();
2216		- return ret;
2217		-}
2218		-
2219		-static ssize_t
2220		-store_smt_control(struct device dev, struct device_attribute attr,
2221		- const char *buf, size_t count)
	2654	+__store_smt_control(struct device dev, struct device_attribute attr,
	2655	+ const char *buf, size_t count)
2222	2656	{
2223	2657	int ctrlval, ret;
2224	2658
..	..	@@ -2256,14 +2690,44 @@
2256	2690	unlock_device_hotplug();
2257	2691	return ret ? ret : count;
2258	2692	}
	2693	+
	2694	+#else /* !CONFIG_HOTPLUG_SMT */
	2695	+static ssize_t
	2696	+__store_smt_control(struct device dev, struct device_attribute attr,
	2697	+ const char *buf, size_t count)
	2698	+{
	2699	+ return -ENODEV;
	2700	+}
	2701	+#endif /* CONFIG_HOTPLUG_SMT */
	2702	+
	2703	+static const char *smt_states[] = {
	2704	+ [CPU_SMT_ENABLED] = "on",
	2705	+ [CPU_SMT_DISABLED] = "off",
	2706	+ [CPU_SMT_FORCE_DISABLED] = "forceoff",
	2707	+ [CPU_SMT_NOT_SUPPORTED] = "notsupported",
	2708	+ [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
	2709	+};
	2710	+
	2711	+static ssize_t
	2712	+show_smt_control(struct device dev, struct device_attribute attr, char *buf)
	2713	+{
	2714	+ const char *state = smt_states[cpu_smt_control];
	2715	+
	2716	+ return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
	2717	+}
	2718	+
	2719	+static ssize_t
	2720	+store_smt_control(struct device dev, struct device_attribute attr,
	2721	+ const char *buf, size_t count)
	2722	+{
	2723	+ return __store_smt_control(dev, attr, buf, count);
	2724	+}
2259	2725	static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
2260	2726
2261	2727	static ssize_t
2262	2728	show_smt_active(struct device dev, struct device_attribute attr, char *buf)
2263	2729	{
2264		- bool active = topology_max_smt_threads() > 1;
2265		-
2266		- return snprintf(buf, PAGE_SIZE - 2, "%d\n", active);
	2730	+ return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
2267	2731	}
2268	2732	static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
2269	2733
..	..	@@ -2279,21 +2743,17 @@
2279	2743	NULL
2280	2744	};
2281	2745
2282		-static int __init cpu_smt_state_init(void)
	2746	+static int __init cpu_smt_sysfs_init(void)
2283	2747	{
2284	2748	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
2285	2749	&cpuhp_smt_attr_group);
2286	2750	}
2287	2751
2288		-#else
2289		-static inline int cpu_smt_state_init(void) { return 0; }
2290		-#endif
2291		-
2292	2752	static int __init cpuhp_sysfs_init(void)
2293	2753	{
2294	2754	int cpu, ret;
2295	2755
2296		- ret = cpu_smt_state_init();
	2756	+ ret = cpu_smt_sysfs_init();
2297	2757	if (ret)
2298	2758	return ret;
2299	2759
..	..	@@ -2314,7 +2774,7 @@
2314	2774	return 0;
2315	2775	}
2316	2776	device_initcall(cpuhp_sysfs_init);
2317		-#endif
	2777	+#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
2318	2778
2319	2779	/*
2320	2780	* cpu_bit_bitmap[] is a special, "compressed" data structure that
..	..	@@ -2361,8 +2821,8 @@
2361	2821	struct cpumask __cpu_active_mask __read_mostly;
2362	2822	EXPORT_SYMBOL(__cpu_active_mask);
2363	2823
2364		-struct cpumask __cpu_isolated_mask __read_mostly;
2365		-EXPORT_SYMBOL(__cpu_isolated_mask);
	2824	+atomic_t __num_online_cpus __read_mostly;
	2825	+EXPORT_SYMBOL(__num_online_cpus);
2366	2826
2367	2827	void init_cpu_present(const struct cpumask *src)
2368	2828	{
..	..	@@ -2377,6 +2837,27 @@
2377	2837	void init_cpu_online(const struct cpumask *src)
2378	2838	{
2379	2839	cpumask_copy(&__cpu_online_mask, src);
	2840	+}
	2841	+
	2842	+void set_cpu_online(unsigned int cpu, bool online)
	2843	+{
	2844	+ /*
	2845	+ * atomic_inc/dec() is required to handle the horrid abuse of this
	2846	+ * function by the reboot and kexec code which invoke it from
	2847	+ * IPI/NMI broadcasts when shutting down CPUs. Invocation from
	2848	+ * regular CPU hotplug is properly serialized.
	2849	+ *
	2850	+ * Note, that the fact that __num_online_cpus is of type atomic_t
	2851	+ * does not protect readers which are not serialized against
	2852	+ * concurrent hotplug operations.
	2853	+ */
	2854	+ if (online) {
	2855	+ if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
	2856	+ atomic_inc(&__num_online_cpus);
	2857	+ } else {
	2858	+ if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
	2859	+ atomic_dec(&__num_online_cpus);
	2860	+ }
2380	2861	}
2381	2862
2382	2863	/*
..	..	@@ -2403,7 +2884,7 @@
2403	2884	void __init boot_cpu_hotplug_init(void)
2404	2885	{
2405	2886	#ifdef CONFIG_SMP
2406		- this_cpu_write(cpuhp_state.booted_once, true);
	2887	+ cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
2407	2888	#endif
2408	2889	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2409	2890	}