~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,3 +1,4 @@
	1	+// SPDX-License-Identifier: GPL-2.0-or-later
1	2	/*
2	3	* SMP support for ppc.
3	4	*
..	..	@@ -8,11 +9,6 @@
8	9	*
9	10	* PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
10	11	* Mike Corrigan {engebret\|bergner\|mikec}@us.ibm.com
11		- *
12		- * This program is free software; you can redistribute it and/or
13		- * modify it under the terms of the GNU General Public License
14		- * as published by the Free Software Foundation; either version
15		- * 2 of the License, or (at your option) any later version.
16	12	*/
17	13
18	14	#undef DEBUG
..	..	@@ -20,6 +16,7 @@
20	16	#include <linux/kernel.h>
21	17	#include <linux/export.h>
22	18	#include <linux/sched/mm.h>
	19	+#include <linux/sched/task_stack.h>
23	20	#include <linux/sched/topology.h>
24	21	#include <linux/smp.h>
25	22	#include <linux/interrupt.h>
..	..	@@ -34,6 +31,9 @@
34	31	#include <linux/topology.h>
35	32	#include <linux/profile.h>
36	33	#include <linux/processor.h>
	34	+#include <linux/random.h>
	35	+#include <linux/stackprotector.h>
	36	+#include <linux/pgtable.h>
37	37
38	38	#include <asm/ptrace.h>
39	39	#include <linux/atomic.h>
..	..	@@ -42,7 +42,6 @@
42	42	#include <asm/kvm_ppc.h>
43	43	#include <asm/dbell.h>
44	44	#include <asm/page.h>
45		-#include <asm/pgtable.h>
46	45	#include <asm/prom.h>
47	46	#include <asm/smp.h>
48	47	#include <asm/time.h>
..	..	@@ -60,6 +59,8 @@
60	59	#include <asm/asm-prototypes.h>
61	60	#include <asm/cpu_has_feature.h>
62	61	#include <asm/ftrace.h>
	62	+#include <asm/kup.h>
	63	+#include <asm/fadump.h>
63	64
64	65	#ifdef DEBUG
65	66	#include <asm/udbg.h>
..	..	@@ -73,15 +74,44 @@
73	74	static DEFINE_PER_CPU(int, cpu_state) = { 0 };
74	75	#endif
75	76
76		-struct thread_info *secondary_ti;
	77	+struct task_struct *secondary_current;
	78	+bool has_big_cores;
	79	+bool coregroup_enabled;
77	80
78	81	DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
	82	+DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
79	83	DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
80	84	DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
	85	+DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
81	86
82	87	EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
83	88	EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
84	89	EXPORT_PER_CPU_SYMBOL(cpu_core_map);
	90	+EXPORT_SYMBOL_GPL(has_big_cores);
	91	+
	92	+enum {
	93	+#ifdef CONFIG_SCHED_SMT
	94	+ smt_idx,
	95	+#endif
	96	+ cache_idx,
	97	+ mc_idx,
	98	+ die_idx,
	99	+};
	100	+
	101	+#define MAX_THREAD_LIST_SIZE 8
	102	+#define THREAD_GROUP_SHARE_L1 1
	103	+struct thread_groups {
	104	+ unsigned int property;
	105	+ unsigned int nr_groups;
	106	+ unsigned int threads_per_group;
	107	+ unsigned int thread_list[MAX_THREAD_LIST_SIZE];
	108	+};
	109	+
	110	+/*
	111	+ * On big-cores system, cpu_l1_cache_map for each CPU corresponds to
	112	+ * the set its siblings that share the L1-cache.
	113	+ */
	114	+DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);
85	115
86	116	/* SMP operations for this machine */
87	117	struct smp_ops_t *smp_ops;
..	..	@@ -442,7 +472,8 @@
442	472	* - delay_us > 0 is the delay before giving up waiting for targets to
443	473	* begin executing the handler, == 0 specifies indefinite delay.
444	474	*/
445		-int __smp_send_nmi_ipi(int cpu, void (fn)(struct pt_regs ), u64 delay_us, bool safe)
	475	+static int __smp_send_nmi_ipi(int cpu, void (fn)(struct pt_regs ),
	476	+ u64 delay_us, bool safe)
446	477	{
447	478	unsigned long flags;
448	479	int me = raw_smp_processor_id();
..	..	@@ -582,6 +613,15 @@
582	613	{
583	614	static bool stopped = false;
584	615
	616	+ /*
	617	+ * In case of fadump, register data for all CPUs is captured by f/w
	618	+ * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before
	619	+ * this rtas call to avoid tricky post processing of those CPUs'
	620	+ * backtraces.
	621	+ */
	622	+ if (should_fadump_crash())
	623	+ return;
	624	+
585	625	if (stopped)
586	626	return;
587	627
..	..	@@ -650,7 +690,7 @@
650	690	}
651	691	#endif /* CONFIG_NMI_IPI */
652	692
653		-struct thread_info *current_set[NR_CPUS];
	693	+struct task_struct *current_set[NR_CPUS];
654	694
655	695	static void smp_store_cpu_info(int id)
656	696	{
..	..	@@ -681,6 +721,274 @@
681	721	}
682	722	#endif
683	723
	724	+/*
	725	+ * Extends set_cpus_related. Instead of setting one CPU at a time in
	726	+ * dstmask, set srcmask at oneshot. dstmask should be super set of srcmask.
	727	+ */
	728	+static void or_cpumasks_related(int i, int j, struct cpumask (srcmask)(int),
	729	+ struct cpumask (dstmask)(int))
	730	+{
	731	+ struct cpumask *mask;
	732	+ int k;
	733	+
	734	+ mask = srcmask(j);
	735	+ for_each_cpu(k, srcmask(i))
	736	+ cpumask_or(dstmask(k), dstmask(k), mask);
	737	+
	738	+ if (i == j)
	739	+ return;
	740	+
	741	+ mask = srcmask(i);
	742	+ for_each_cpu(k, srcmask(j))
	743	+ cpumask_or(dstmask(k), dstmask(k), mask);
	744	+}
	745	+
	746	+/*
	747	+ * parse_thread_groups: Parses the "ibm,thread-groups" device tree
	748	+ * property for the CPU device node @dn and stores
	749	+ * the parsed output in the thread_groups
	750	+ * structure @tg if the ibm,thread-groups[0]
	751	+ * matches @property.
	752	+ *
	753	+ * @dn: The device node of the CPU device.
	754	+ * @tg: Pointer to a thread group structure into which the parsed
	755	+ * output of "ibm,thread-groups" is stored.
	756	+ * @property: The property of the thread-group that the caller is
	757	+ * interested in.
	758	+ *
	759	+ * ibm,thread-groups[0..N-1] array defines which group of threads in
	760	+ * the CPU-device node can be grouped together based on the property.
	761	+ *
	762	+ * ibm,thread-groups[0] tells us the property based on which the
	763	+ * threads are being grouped together. If this value is 1, it implies
	764	+ * that the threads in the same group share L1, translation cache.
	765	+ *
	766	+ * ibm,thread-groups[1] tells us how many such thread groups exist.
	767	+ *
	768	+ * ibm,thread-groups[2] tells us the number of threads in each such
	769	+ * group.
	770	+ *
	771	+ * ibm,thread-groups[3..N-1] is the list of threads identified by
	772	+ * "ibm,ppc-interrupt-server#s" arranged as per their membership in
	773	+ * the grouping.
	774	+ *
	775	+ * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it
	776	+ * implies that there are 2 groups of 4 threads each, where each group
	777	+ * of threads share L1, translation cache.
	778	+ *
	779	+ * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8}
	780	+ * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10,
	781	+ * 11, 12} structure
	782	+ *
	783	+ * Returns 0 on success, -EINVAL if the property does not exist,
	784	+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
	785	+ * property data isn't large enough.
	786	+ */
	787	+static int parse_thread_groups(struct device_node *dn,
	788	+ struct thread_groups *tg,
	789	+ unsigned int property)
	790	+{
	791	+ int i;
	792	+ u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
	793	+ u32 *thread_list;
	794	+ size_t total_threads;
	795	+ int ret;
	796	+
	797	+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
	798	+ thread_group_array, 3);
	799	+ if (ret)
	800	+ return ret;
	801	+
	802	+ tg->property = thread_group_array[0];
	803	+ tg->nr_groups = thread_group_array[1];
	804	+ tg->threads_per_group = thread_group_array[2];
	805	+ if (tg->property != property \|\|
	806	+ tg->nr_groups < 1 \|\|
	807	+ tg->threads_per_group < 1)
	808	+ return -ENODATA;
	809	+
	810	+ total_threads = tg->nr_groups * tg->threads_per_group;
	811	+
	812	+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
	813	+ thread_group_array,
	814	+ 3 + total_threads);
	815	+ if (ret)
	816	+ return ret;
	817	+
	818	+ thread_list = &thread_group_array[3];
	819	+
	820	+ for (i = 0 ; i < total_threads; i++)
	821	+ tg->thread_list[i] = thread_list[i];
	822	+
	823	+ return 0;
	824	+}
	825	+
	826	+/*
	827	+ * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
	828	+ * that @cpu belongs to.
	829	+ *
	830	+ * @cpu : The logical CPU whose thread group is being searched.
	831	+ * @tg : The thread-group structure of the CPU node which @cpu belongs
	832	+ * to.
	833	+ *
	834	+ * Returns the index to tg->thread_list that points to the the start
	835	+ * of the thread_group that @cpu belongs to.
	836	+ *
	837	+ * Returns -1 if cpu doesn't belong to any of the groups pointed to by
	838	+ * tg->thread_list.
	839	+ */
	840	+static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
	841	+{
	842	+ int hw_cpu_id = get_hard_smp_processor_id(cpu);
	843	+ int i, j;
	844	+
	845	+ for (i = 0; i < tg->nr_groups; i++) {
	846	+ int group_start = i * tg->threads_per_group;
	847	+
	848	+ for (j = 0; j < tg->threads_per_group; j++) {
	849	+ int idx = group_start + j;
	850	+
	851	+ if (tg->thread_list[idx] == hw_cpu_id)
	852	+ return group_start;
	853	+ }
	854	+ }
	855	+
	856	+ return -1;
	857	+}
	858	+
	859	+static int init_cpu_l1_cache_map(int cpu)
	860	+
	861	+{
	862	+ struct device_node *dn = of_get_cpu_node(cpu, NULL);
	863	+ struct thread_groups tg = {.property = 0,
	864	+ .nr_groups = 0,
	865	+ .threads_per_group = 0};
	866	+ int first_thread = cpu_first_thread_sibling(cpu);
	867	+ int i, cpu_group_start = -1, err = 0;
	868	+
	869	+ if (!dn)
	870	+ return -ENODATA;
	871	+
	872	+ err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
	873	+ if (err)
	874	+ goto out;
	875	+
	876	+ cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
	877	+
	878	+ if (unlikely(cpu_group_start == -1)) {
	879	+ WARN_ON_ONCE(1);
	880	+ err = -ENODATA;
	881	+ goto out;
	882	+ }
	883	+
	884	+ zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
	885	+ GFP_KERNEL, cpu_to_node(cpu));
	886	+
	887	+ for (i = first_thread; i < first_thread + threads_per_core; i++) {
	888	+ int i_group_start = get_cpu_thread_group_start(i, &tg);
	889	+
	890	+ if (unlikely(i_group_start == -1)) {
	891	+ WARN_ON_ONCE(1);
	892	+ err = -ENODATA;
	893	+ goto out;
	894	+ }
	895	+
	896	+ if (i_group_start == cpu_group_start)
	897	+ cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
	898	+ }
	899	+
	900	+out:
	901	+ of_node_put(dn);
	902	+ return err;
	903	+}
	904	+
	905	+static bool shared_caches;
	906	+
	907	+#ifdef CONFIG_SCHED_SMT
	908	+/* cpumask of CPUs with asymmetric SMT dependency */
	909	+static int powerpc_smt_flags(void)
	910	+{
	911	+ int flags = SD_SHARE_CPUCAPACITY \| SD_SHARE_PKG_RESOURCES;
	912	+
	913	+ if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
	914	+ printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
	915	+ flags \|= SD_ASYM_PACKING;
	916	+ }
	917	+ return flags;
	918	+}
	919	+#endif
	920	+
	921	+/*
	922	+ * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
	923	+ * This topology makes it much cheaper to migrate tasks between adjacent cores
	924	+ * since the migrated task remains cache hot. We want to take advantage of this
	925	+ * at the scheduler level so an extra topology level is required.
	926	+ */
	927	+static int powerpc_shared_cache_flags(void)
	928	+{
	929	+ return SD_SHARE_PKG_RESOURCES;
	930	+}
	931	+
	932	+/*
	933	+ * We can't just pass cpu_l2_cache_mask() directly because
	934	+ * returns a non-const pointer and the compiler barfs on that.
	935	+ */
	936	+static const struct cpumask *shared_cache_mask(int cpu)
	937	+{
	938	+ return per_cpu(cpu_l2_cache_map, cpu);
	939	+}
	940	+
	941	+#ifdef CONFIG_SCHED_SMT
	942	+static const struct cpumask *smallcore_smt_mask(int cpu)
	943	+{
	944	+ return cpu_smallcore_mask(cpu);
	945	+}
	946	+#endif
	947	+
	948	+static struct cpumask *cpu_coregroup_mask(int cpu)
	949	+{
	950	+ return per_cpu(cpu_coregroup_map, cpu);
	951	+}
	952	+
	953	+static bool has_coregroup_support(void)
	954	+{
	955	+ return coregroup_enabled;
	956	+}
	957	+
	958	+static const struct cpumask *cpu_mc_mask(int cpu)
	959	+{
	960	+ return cpu_coregroup_mask(cpu);
	961	+}
	962	+
	963	+static struct sched_domain_topology_level powerpc_topology[] = {
	964	+#ifdef CONFIG_SCHED_SMT
	965	+ { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
	966	+#endif
	967	+ { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
	968	+ { cpu_mc_mask, SD_INIT_NAME(MC) },
	969	+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
	970	+ { NULL, },
	971	+};
	972	+
	973	+static int __init init_big_cores(void)
	974	+{
	975	+ int cpu;
	976	+
	977	+ for_each_possible_cpu(cpu) {
	978	+ int err = init_cpu_l1_cache_map(cpu);
	979	+
	980	+ if (err)
	981	+ return err;
	982	+
	983	+ zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
	984	+ GFP_KERNEL,
	985	+ cpu_to_node(cpu));
	986	+ }
	987	+
	988	+ has_big_cores = true;
	989	+ return 0;
	990	+}
	991	+
684	992	void __init smp_prepare_cpus(unsigned int max_cpus)
685	993	{
686	994	unsigned int cpu;
..	..	@@ -704,6 +1012,11 @@
704	1012	GFP_KERNEL, cpu_to_node(cpu));
705	1013	zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
706	1014	GFP_KERNEL, cpu_to_node(cpu));
	1015	+ if (has_coregroup_support())
	1016	+ zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu),
	1017	+ GFP_KERNEL, cpu_to_node(cpu));
	1018	+
	1019	+#ifdef CONFIG_NEED_MULTIPLE_NODES
707	1020	/*
708	1021	* numa_node_id() works after this.
709	1022	*/
..	..	@@ -712,12 +1025,22 @@
712	1025	set_cpu_numa_mem(cpu,
713	1026	local_memory_node(numa_cpu_lookup_table[cpu]));
714	1027	}
	1028	+#endif
715	1029	}
716	1030
717	1031	/* Init the cpumasks so the boot CPU is related to itself */
718	1032	cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
719	1033	cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
720	1034	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
	1035	+
	1036	+ if (has_coregroup_support())
	1037	+ cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
	1038	+
	1039	+ init_big_cores();
	1040	+ if (has_big_cores) {
	1041	+ cpumask_set_cpu(boot_cpuid,
	1042	+ cpu_smallcore_mask(boot_cpuid));
	1043	+ }
721	1044
722	1045	if (smp_ops && smp_ops->probe)
723	1046	smp_ops->probe();
..	..	@@ -730,7 +1053,7 @@
730	1053	paca_ptrs[boot_cpuid]->__current = current;
731	1054	#endif
732	1055	set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
733		- current_set[boot_cpuid] = task_thread_info(current);
	1056	+ current_set[boot_cpuid] = current;
734	1057	}
735	1058
736	1059	#ifdef CONFIG_HOTPLUG_CPU
..	..	@@ -815,14 +1138,13 @@
815	1138
816	1139	static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
817	1140	{
818		- struct thread_info *ti = task_thread_info(idle);
819		-
820	1141	#ifdef CONFIG_PPC64
821	1142	paca_ptrs[cpu]->__current = idle;
822		- paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
	1143	+ paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
	1144	+ THREAD_SIZE - STACK_FRAME_OVERHEAD;
823	1145	#endif
824		- ti->cpu = cpu;
825		- secondary_ti = current_set[cpu] = ti;
	1146	+ idle->cpu = cpu;
	1147	+ secondary_current = current_set[cpu] = idle;
826	1148	}
827	1149
828	1150	int __cpu_up(unsigned int cpu, struct task_struct *tidle)
..	..	@@ -964,26 +1286,46 @@
964	1286	return cache;
965	1287	}
966	1288
967		-static bool update_mask_by_l2(int cpu, struct cpumask (mask_fn)(int))
	1289	+static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
968	1290	{
	1291	+ struct cpumask (submask_fn)(int) = cpu_sibling_mask;
969	1292	struct device_node l2_cache, np;
970	1293	int i;
971	1294
972		- l2_cache = cpu_to_l2cache(cpu);
973		- if (!l2_cache)
974		- return false;
	1295	+ if (has_big_cores)
	1296	+ submask_fn = cpu_smallcore_mask;
975	1297
976		- for_each_cpu(i, cpu_online_mask) {
	1298	+ l2_cache = cpu_to_l2cache(cpu);
	1299	+ if (!l2_cache \|\| !*mask) {
	1300	+ /* Assume only core siblings share cache with this CPU */
	1301	+ for_each_cpu(i, submask_fn(cpu))
	1302	+ set_cpus_related(cpu, i, cpu_l2_cache_mask);
	1303	+
	1304	+ return false;
	1305	+ }
	1306	+
	1307	+ cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
	1308	+
	1309	+ /* Update l2-cache mask with all the CPUs that are part of submask */
	1310	+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
	1311	+
	1312	+ /* Skip all CPUs already part of current CPU l2-cache mask */
	1313	+ cpumask_andnot(mask, mask, cpu_l2_cache_mask(cpu));
	1314	+
	1315	+ for_each_cpu(i, *mask) {
977	1316	/*
978	1317	* when updating the marks the current CPU has not been marked
979	1318	* online, but we need to update the cache masks
980	1319	*/
981	1320	np = cpu_to_l2cache(i);
982		- if (!np)
983		- continue;
984	1321
985		- if (np == l2_cache)
986		- set_cpus_related(cpu, i, mask_fn);
	1322	+ /* Skip all CPUs already part of current CPU l2-cache */
	1323	+ if (np == l2_cache) {
	1324	+ or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask);
	1325	+ cpumask_andnot(mask, mask, submask_fn(i));
	1326	+ } else {
	1327	+ cpumask_andnot(mask, mask, cpu_l2_cache_mask(i));
	1328	+ }
987	1329
988	1330	of_node_put(np);
989	1331	}
..	..	@@ -995,21 +1337,87 @@
995	1337	#ifdef CONFIG_HOTPLUG_CPU
996	1338	static void remove_cpu_from_masks(int cpu)
997	1339	{
	1340	+ struct cpumask (mask_fn)(int) = cpu_sibling_mask;
998	1341	int i;
999	1342
1000		- /* NB: cpu_core_mask is a superset of the others */
1001		- for_each_cpu(i, cpu_core_mask(cpu)) {
1002		- set_cpus_unrelated(cpu, i, cpu_core_mask);
	1343	+ if (shared_caches)
	1344	+ mask_fn = cpu_l2_cache_mask;
	1345	+
	1346	+ for_each_cpu(i, mask_fn(cpu)) {
1003	1347	set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
1004	1348	set_cpus_unrelated(cpu, i, cpu_sibling_mask);
	1349	+ if (has_big_cores)
	1350	+ set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
	1351	+ }
	1352	+
	1353	+ for_each_cpu(i, cpu_core_mask(cpu))
	1354	+ set_cpus_unrelated(cpu, i, cpu_core_mask);
	1355	+
	1356	+ if (has_coregroup_support()) {
	1357	+ for_each_cpu(i, cpu_coregroup_mask(cpu))
	1358	+ set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
1005	1359	}
1006	1360	}
1007	1361	#endif
1008	1362
	1363	+static inline void add_cpu_to_smallcore_masks(int cpu)
	1364	+{
	1365	+ int i;
	1366	+
	1367	+ if (!has_big_cores)
	1368	+ return;
	1369	+
	1370	+ cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
	1371	+
	1372	+ for_each_cpu(i, per_cpu(cpu_l1_cache_map, cpu)) {
	1373	+ if (cpu_online(i))
	1374	+ set_cpus_related(i, cpu, cpu_smallcore_mask);
	1375	+ }
	1376	+}
	1377	+
	1378	+static void update_coregroup_mask(int cpu, cpumask_var_t *mask)
	1379	+{
	1380	+ struct cpumask (submask_fn)(int) = cpu_sibling_mask;
	1381	+ int coregroup_id = cpu_to_coregroup_id(cpu);
	1382	+ int i;
	1383	+
	1384	+ if (shared_caches)
	1385	+ submask_fn = cpu_l2_cache_mask;
	1386	+
	1387	+ if (!*mask) {
	1388	+ /* Assume only siblings are part of this CPU's coregroup */
	1389	+ for_each_cpu(i, submask_fn(cpu))
	1390	+ set_cpus_related(cpu, i, cpu_coregroup_mask);
	1391	+
	1392	+ return;
	1393	+ }
	1394	+
	1395	+ cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
	1396	+
	1397	+ /* Update coregroup mask with all the CPUs that are part of submask */
	1398	+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask);
	1399	+
	1400	+ /* Skip all CPUs already part of coregroup mask */
	1401	+ cpumask_andnot(mask, mask, cpu_coregroup_mask(cpu));
	1402	+
	1403	+ for_each_cpu(i, *mask) {
	1404	+ /* Skip all CPUs not part of this coregroup */
	1405	+ if (coregroup_id == cpu_to_coregroup_id(i)) {
	1406	+ or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask);
	1407	+ cpumask_andnot(mask, mask, submask_fn(i));
	1408	+ } else {
	1409	+ cpumask_andnot(mask, mask, cpu_coregroup_mask(i));
	1410	+ }
	1411	+ }
	1412	+}
	1413	+
1009	1414	static void add_cpu_to_masks(int cpu)
1010	1415	{
	1416	+ struct cpumask (submask_fn)(int) = cpu_sibling_mask;
1011	1417	int first_thread = cpu_first_thread_sibling(cpu);
1012		- int chipid = cpu_to_chip_id(cpu);
	1418	+ int chip_id = cpu_to_chip_id(cpu);
	1419	+ cpumask_var_t mask;
	1420	+ bool ret;
1013	1421	int i;
1014	1422
1015	1423	/*
..	..	@@ -1017,47 +1425,57 @@
1017	1425	* add it to it's own thread sibling mask.
1018	1426	*/
1019	1427	cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
	1428	+ cpumask_set_cpu(cpu, cpu_core_mask(cpu));
1020	1429
1021	1430	for (i = first_thread; i < first_thread + threads_per_core; i++)
1022	1431	if (cpu_online(i))
1023	1432	set_cpus_related(i, cpu, cpu_sibling_mask);
1024	1433
1025		- /*
1026		- * Copy the thread sibling mask into the cache sibling mask
1027		- * and mark any CPUs that share an L2 with this CPU.
1028		- */
1029		- for_each_cpu(i, cpu_sibling_mask(cpu))
1030		- set_cpus_related(cpu, i, cpu_l2_cache_mask);
1031		- update_mask_by_l2(cpu, cpu_l2_cache_mask);
	1434	+ add_cpu_to_smallcore_masks(cpu);
1032	1435
1033		- /*
1034		- * Copy the cache sibling mask into core sibling mask and mark
1035		- * any CPUs on the same chip as this CPU.
1036		- */
1037		- for_each_cpu(i, cpu_l2_cache_mask(cpu))
1038		- set_cpus_related(cpu, i, cpu_core_mask);
	1436	+ /* In CPU-hotplug path, hence use GFP_ATOMIC */
	1437	+ ret = alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu));
	1438	+ update_mask_by_l2(cpu, &mask);
1039	1439
1040		- if (chipid == -1)
1041		- return;
	1440	+ if (has_coregroup_support())
	1441	+ update_coregroup_mask(cpu, &mask);
1042	1442
1043		- for_each_cpu(i, cpu_online_mask)
1044		- if (cpu_to_chip_id(i) == chipid)
1045		- set_cpus_related(cpu, i, cpu_core_mask);
	1443	+ if (shared_caches)
	1444	+ submask_fn = cpu_l2_cache_mask;
	1445	+
	1446	+ /* Update core_mask with all the CPUs that are part of submask */
	1447	+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_core_mask);
	1448	+
	1449	+ /* Skip all CPUs already part of current CPU core mask */
	1450	+ cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu));
	1451	+
	1452	+ /* If chip_id is -1; limit the cpu_core_mask to within DIE*/
	1453	+ if (chip_id == -1)
	1454	+ cpumask_and(mask, mask, cpu_cpu_mask(cpu));
	1455	+
	1456	+ for_each_cpu(i, mask) {
	1457	+ if (chip_id == cpu_to_chip_id(i)) {
	1458	+ or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask);
	1459	+ cpumask_andnot(mask, mask, submask_fn(i));
	1460	+ } else {
	1461	+ cpumask_andnot(mask, mask, cpu_core_mask(i));
	1462	+ }
	1463	+ }
	1464	+
	1465	+ free_cpumask_var(mask);
1046	1466	}
1047		-
1048		-static bool shared_caches;
1049	1467
1050	1468	/* Activate a secondary processor. */
1051	1469	void start_secondary(void *unused)
1052	1470	{
1053		- unsigned int cpu = smp_processor_id();
	1471	+ unsigned int cpu = raw_smp_processor_id();
1054	1472
1055	1473	mmgrab(&init_mm);
1056	1474	current->active_mm = &init_mm;
1057	1475
1058	1476	smp_store_cpu_info(cpu);
1059	1477	set_dec(tb_ticks_per_jiffy);
1060		- preempt_disable();
	1478	+ rcu_cpu_starting(cpu);
1061	1479	cpu_callin_map[cpu] = 1;
1062	1480
1063	1481	if (smp_ops->setup_cpu)
..	..	@@ -1083,12 +1501,22 @@
1083	1501	* Check for any shared caches. Note that this must be done on a
1084	1502	* per-core basis because one core in the pair might be disabled.
1085	1503	*/
1086		- if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu)))
1087		- shared_caches = true;
	1504	+ if (!shared_caches) {
	1505	+ struct cpumask (sibling_mask)(int) = cpu_sibling_mask;
	1506	+ struct cpumask *mask = cpu_l2_cache_mask(cpu);
	1507	+
	1508	+ if (has_big_cores)
	1509	+ sibling_mask = cpu_smallcore_mask;
	1510	+
	1511	+ if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu)))
	1512	+ shared_caches = true;
	1513	+ }
1088	1514
1089	1515	smp_wmb();
1090	1516	notify_cpu_starting(cpu);
1091	1517	set_cpu_online(cpu, true);
	1518	+
	1519	+ boot_init_stack_canary();
1092	1520
1093	1521	local_irq_enable();
1094	1522
..	..	@@ -1107,56 +1535,44 @@
1107	1535	}
1108	1536	#endif
1109	1537
1110		-#ifdef CONFIG_SCHED_SMT
1111		-/* cpumask of CPUs with asymetric SMT dependancy */
1112		-static int powerpc_smt_flags(void)
	1538	+static void fixup_topology(void)
1113	1539	{
1114		- int flags = SD_SHARE_CPUCAPACITY \| SD_SHARE_PKG_RESOURCES;
	1540	+ int i;
1115	1541
1116		- if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
1117		- printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
1118		- flags \|= SD_ASYM_PACKING;
	1542	+#ifdef CONFIG_SCHED_SMT
	1543	+ if (has_big_cores) {
	1544	+ pr_info("Big cores detected but using small core scheduling\n");
	1545	+ powerpc_topology[smt_idx].mask = smallcore_smt_mask;
1119	1546	}
1120		- return flags;
1121		-}
1122	1547	#endif
1123	1548
1124		-static struct sched_domain_topology_level powerpc_topology[] = {
1125		-#ifdef CONFIG_SCHED_SMT
1126		- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
	1549	+ if (!has_coregroup_support())
	1550	+ powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask;
	1551	+
	1552	+ /*
	1553	+ * Try to consolidate topology levels here instead of
	1554	+ * allowing scheduler to degenerate.
	1555	+ * - Dont consolidate if masks are different.
	1556	+ * - Dont consolidate if sd_flags exists and are different.
	1557	+ */
	1558	+ for (i = 1; i <= die_idx; i++) {
	1559	+ if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask)
	1560	+ continue;
	1561	+
	1562	+ if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags &&
	1563	+ powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags)
	1564	+ continue;
	1565	+
	1566	+ if (!powerpc_topology[i - 1].sd_flags)
	1567	+ powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags;
	1568	+
	1569	+ powerpc_topology[i].mask = powerpc_topology[i + 1].mask;
	1570	+ powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags;
	1571	+#ifdef CONFIG_SCHED_DEBUG
	1572	+ powerpc_topology[i].name = powerpc_topology[i + 1].name;
1127	1573	#endif
1128		- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
1129		- { NULL, },
1130		-};
1131		-
1132		-/*
1133		- * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
1134		- * This topology makes it much cheaper to migrate tasks between adjacent cores
1135		- * since the migrated task remains cache hot. We want to take advantage of this
1136		- * at the scheduler level so an extra topology level is required.
1137		- */
1138		-static int powerpc_shared_cache_flags(void)
1139		-{
1140		- return SD_SHARE_PKG_RESOURCES;
	1574	+ }
1141	1575	}
1142		-
1143		-/*
1144		- * We can't just pass cpu_l2_cache_mask() directly because
1145		- * returns a non-const pointer and the compiler barfs on that.
1146		- */
1147		-static const struct cpumask *shared_cache_mask(int cpu)
1148		-{
1149		- return cpu_l2_cache_mask(cpu);
1150		-}
1151		-
1152		-static struct sched_domain_topology_level power9_topology[] = {
1153		-#ifdef CONFIG_SCHED_SMT
1154		- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
1155		-#endif
1156		- { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
1157		- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
1158		- { NULL, },
1159		-};
1160	1576
1161	1577	void __init smp_cpus_done(unsigned int max_cpus)
1162	1578	{
..	..	@@ -1169,24 +1585,10 @@
1169	1585	if (smp_ops && smp_ops->bringup_done)
1170	1586	smp_ops->bringup_done();
1171	1587
1172		- /*
1173		- * On a shared LPAR, associativity needs to be requested.
1174		- * Hence, get numa topology before dumping cpu topology
1175		- */
1176		- shared_proc_topology_init();
1177	1588	dump_numa_cpu_topology();
1178	1589
1179		- /*
1180		- * If any CPU detects that it's sharing a cache with another CPU then
1181		- * use the deeper topology that is aware of this sharing.
1182		- */
1183		- if (shared_caches) {
1184		- pr_info("Using shared cache scheduler topology\n");
1185		- set_sched_topology(power9_topology);
1186		- } else {
1187		- pr_info("Using standard scheduler topology\n");
1188		- set_sched_topology(powerpc_topology);
1189		- }
	1590	+ fixup_topology();
	1591	+ set_sched_topology(powerpc_topology);
1190	1592	}
1191	1593
1192	1594	#ifdef CONFIG_HOTPLUG_CPU
..	..	@@ -1216,7 +1618,7 @@
1216	1618	smp_ops->cpu_die(cpu);
1217	1619	}
1218	1620
1219		-void cpu_die(void)
	1621	+void arch_cpu_idle_dead(void)
1220	1622	{
1221	1623	/*
1222	1624	* Disable on the down path. This will be re-enabled by
..	..	@@ -1224,8 +1626,8 @@
1224	1626	*/
1225	1627	this_cpu_disable_ftrace();
1226	1628
1227		- if (ppc_md.cpu_die)
1228		- ppc_md.cpu_die();
	1629	+ if (smp_ops->cpu_offline_self)
	1630	+ smp_ops->cpu_offline_self();
1229	1631
1230	1632	/* If we return, we re-enter start_secondary */
1231	1633	start_secondary_resume();