~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,17 +1,13 @@
	1	+// SPDX-License-Identifier: GPL-2.0-or-later
1	2	/*
2	3	* pSeries NUMA support
3	4	*
4	5	* Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
5		- *
6		- * This program is free software; you can redistribute it and/or
7		- * modify it under the terms of the GNU General Public License
8		- * as published by the Free Software Foundation; either version
9		- * 2 of the License, or (at your option) any later version.
10	6	*/
11	7	#define pr_fmt(fmt) "numa: " fmt
12	8
13	9	#include <linux/threads.h>
14		-#include <linux/bootmem.h>
	10	+#include <linux/memblock.h>
15	11	#include <linux/init.h>
16	12	#include <linux/mm.h>
17	13	#include <linux/mmzone.h>
..	..	@@ -19,7 +15,6 @@
19	15	#include <linux/nodemask.h>
20	16	#include <linux/cpu.h>
21	17	#include <linux/notifier.h>
22		-#include <linux/memblock.h>
23	18	#include <linux/of.h>
24	19	#include <linux/pfn.h>
25	20	#include <linux/cpuset.h>
..	..	@@ -33,7 +28,6 @@
33	28	#include <asm/sparsemem.h>
34	29	#include <asm/prom.h>
35	30	#include <asm/smp.h>
36		-#include <asm/cputhreads.h>
37	31	#include <asm/topology.h>
38	32	#include <asm/firmware.h>
39	33	#include <asm/paca.h>
..	..	@@ -85,7 +79,7 @@
85	79	alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
86	80
87	81	/* cpumask_of_node() will now work */
88		- dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
	82	+ dbg("Node to cpumask map for %u nodes\n", nr_node_ids);
89	83	}
90	84
91	85	static int __init fake_numa_create_new_node(unsigned long end_pfn,
..	..	@@ -169,6 +163,22 @@
169	163	}
170	164	#endif /* CONFIG_HOTPLUG_CPU \|\| CONFIG_PPC_SPLPAR */
171	165
	166	+int cpu_distance(__be32 cpu1_assoc, __be32 cpu2_assoc)
	167	+{
	168	+ int dist = 0;
	169	+
	170	+ int i, index;
	171	+
	172	+ for (i = 0; i < distance_ref_points_depth; i++) {
	173	+ index = be32_to_cpu(distance_ref_points[i]);
	174	+ if (cpu1_assoc[index] == cpu2_assoc[index])
	175	+ break;
	176	+ dist++;
	177	+ }
	178	+
	179	+ return dist;
	180	+}
	181	+
172	182	/* must hold reference to node during call */
173	183	static const __be32 of_get_associativity(struct device_node dev)
174	184	{
..	..	@@ -211,22 +221,23 @@
211	221	}
212	222	}
213	223
214		-/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
	224	+/*
	225	+ * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
215	226	* info is found.
216	227	*/
217	228	static int associativity_to_nid(const __be32 *associativity)
218	229	{
219		- int nid = -1;
	230	+ int nid = NUMA_NO_NODE;
220	231
221		- if (min_common_depth == -1)
	232	+ if (!numa_enabled)
222	233	goto out;
223	234
224	235	if (of_read_number(associativity, 1) >= min_common_depth)
225	236	nid = of_read_number(&associativity[min_common_depth], 1);
226	237
227	238	/* POWER4 LPAR uses 0xffff as invalid node */
228		- if (nid == 0xffff \|\| nid >= MAX_NUMNODES)
229		- nid = -1;
	239	+ if (nid == 0xffff \|\| nid >= nr_node_ids)
	240	+ nid = NUMA_NO_NODE;
230	241
231	242	if (nid > 0 &&
232	243	of_read_number(associativity, 1) >= distance_ref_points_depth) {
..	..	@@ -245,7 +256,7 @@
245	256	*/
246	257	static int of_node_to_nid_single(struct device_node *device)
247	258	{
248		- int nid = -1;
	259	+ int nid = NUMA_NO_NODE;
249	260	const __be32 *tmp;
250	261
251	262	tmp = of_get_associativity(device);
..	..	@@ -257,7 +268,7 @@
257	268	/* Walk the device tree upwards, looking for an associativity id */
258	269	int of_node_to_nid(struct device_node *device)
259	270	{
260		- int nid = -1;
	271	+ int nid = NUMA_NO_NODE;
261	272
262	273	of_node_get(device);
263	274	while (device) {
..	..	@@ -419,24 +430,26 @@
419	430	* This is like of_node_to_nid_single() for memory represented in the
420	431	* ibm,dynamic-reconfiguration-memory node.
421	432	*/
422		-static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
	433	+int of_drconf_to_nid_single(struct drmem_lmb *lmb)
423	434	{
424	435	struct assoc_arrays aa = { .arrays = NULL };
425		- int default_nid = 0;
	436	+ int default_nid = NUMA_NO_NODE;
426	437	int nid = default_nid;
427	438	int rc, index;
	439	+
	440	+ if ((min_common_depth < 0) \|\| !numa_enabled)
	441	+ return default_nid;
428	442
429	443	rc = of_get_assoc_arrays(&aa);
430	444	if (rc)
431	445	return default_nid;
432	446
433		- if (min_common_depth > 0 && min_common_depth <= aa.array_sz &&
434		- !(lmb->flags & DRCONF_MEM_AI_INVALID) &&
435		- lmb->aa_index < aa.n_arrays) {
	447	+ if (min_common_depth <= aa.array_sz &&
	448	+ !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
436	449	index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
437	450	nid = of_read_number(&aa.arrays[index], 1);
438	451
439		- if (nid == 0xffff \|\| nid >= MAX_NUMNODES)
	452	+ if (nid == 0xffff \|\| nid >= nr_node_ids)
440	453	nid = default_nid;
441	454
442	455	if (nid > 0) {
..	..	@@ -449,24 +462,73 @@
449	462	return nid;
450	463	}
451	464
	465	+#ifdef CONFIG_PPC_SPLPAR
	466	+static int vphn_get_nid(long lcpu)
	467	+{
	468	+ __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
	469	+ long rc, hwid;
	470	+
	471	+ /*
	472	+ * On a shared lpar, device tree will not have node associativity.
	473	+ * At this time lppaca, or its __old_status field may not be
	474	+ * updated. Hence kernel cannot detect if its on a shared lpar. So
	475	+ * request an explicit associativity irrespective of whether the
	476	+ * lpar is shared or dedicated. Use the device tree property as a
	477	+ * fallback. cpu_to_phys_id is only valid between
	478	+ * smp_setup_cpu_maps() and smp_setup_pacas().
	479	+ */
	480	+ if (firmware_has_feature(FW_FEATURE_VPHN)) {
	481	+ if (cpu_to_phys_id)
	482	+ hwid = cpu_to_phys_id[lcpu];
	483	+ else
	484	+ hwid = get_hard_smp_processor_id(lcpu);
	485	+
	486	+ rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity);
	487	+ if (rc == H_SUCCESS)
	488	+ return associativity_to_nid(associativity);
	489	+ }
	490	+
	491	+ return NUMA_NO_NODE;
	492	+}
	493	+#else
	494	+static int vphn_get_nid(long unused)
	495	+{
	496	+ return NUMA_NO_NODE;
	497	+}
	498	+#endif /* CONFIG_PPC_SPLPAR */
	499	+
452	500	/*
453	501	* Figure out to which domain a cpu belongs and stick it there.
454	502	* Return the id of the domain used.
455	503	*/
456	504	static int numa_setup_cpu(unsigned long lcpu)
457	505	{
458		- int nid = -1;
459	506	struct device_node *cpu;
	507	+ int fcpu = cpu_first_thread_sibling(lcpu);
	508	+ int nid = NUMA_NO_NODE;
	509	+
	510	+ if (!cpu_present(lcpu)) {
	511	+ set_cpu_numa_node(lcpu, first_online_node);
	512	+ return first_online_node;
	513	+ }
460	514
461	515	/*
462	516	* If a valid cpu-to-node mapping is already available, use it
463	517	* directly instead of querying the firmware, since it represents
464	518	* the most recent mapping notified to us by the platform (eg: VPHN).
	519	+ * Since cpu_to_node binding remains the same for all threads in the
	520	+ * core. If a valid cpu-to-node mapping is already available, for
	521	+ * the first thread in the core, use it.
465	522	*/
466		- if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) {
	523	+ nid = numa_cpu_lookup_table[fcpu];
	524	+ if (nid >= 0) {
467	525	map_cpu_to_node(lcpu, nid);
468	526	return nid;
469	527	}
	528	+
	529	+ nid = vphn_get_nid(lcpu);
	530	+ if (nid != NUMA_NO_NODE)
	531	+ goto out_present;
470	532
471	533	cpu = of_get_cpu_node(lcpu, NULL);
472	534
..	..	@@ -479,13 +541,26 @@
479	541	}
480	542
481	543	nid = of_node_to_nid_single(cpu);
	544	+ of_node_put(cpu);
482	545
483	546	out_present:
484	547	if (nid < 0 \|\| !node_possible(nid))
485	548	nid = first_online_node;
486	549
	550	+ /*
	551	+ * Update for the first thread of the core. All threads of a core
	552	+ * have to be part of the same node. This not only avoids querying
	553	+ * for every other thread in the core, but always avoids a case
	554	+ * where virtual node associativity change causes subsequent threads
	555	+ * of a core to be associated with different nid. However if first
	556	+ * thread is already online, expect it to have a valid mapping.
	557	+ */
	558	+ if (fcpu != lcpu) {
	559	+ WARN_ON(cpu_online(fcpu));
	560	+ map_cpu_to_node(fcpu, nid);
	561	+ }
	562	+
487	563	map_cpu_to_node(lcpu, nid);
488		- of_node_put(cpu);
489	564	out:
490	565	return nid;
491	566	}
..	..	@@ -575,8 +650,9 @@
575	650	* Extract NUMA information from the ibm,dynamic-reconfiguration-memory
576	651	* node. This assumes n_mem_{addr,size}_cells have been set.
577	652	*/
578		-static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
579		- const __be32 **usm)
	653	+static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
	654	+ const __be32 **usm,
	655	+ void *data)
580	656	{
581	657	unsigned int ranges, is_kexec_kdump = 0;
582	658	unsigned long base, size, sz;
..	..	@@ -588,7 +664,7 @@
588	664	*/
589	665	if ((lmb->flags & DRCONF_MEM_RESERVED)
590	666	\|\| !(lmb->flags & DRCONF_MEM_ASSIGNED))
591		- return;
	667	+ return 0;
592	668
593	669	if (*usm)
594	670	is_kexec_kdump = 1;
..	..	@@ -600,7 +676,7 @@
600	676	if (is_kexec_kdump) {
601	677	ranges = read_usm_ranges(usm);
602	678	if (!ranges) /* there are no (base, size) duple */
603		- return;
	679	+ return 0;
604	680	}
605	681
606	682	do {
..	..	@@ -617,6 +693,8 @@
617	693	if (sz)
618	694	memblock_set_node(base, sz, &memblock.memory, nid);
619	695	} while (--ranges);
	696	+
	697	+ return 0;
620	698	}
621	699
622	700	static int __init parse_numa_properties(void)
..	..	@@ -632,8 +710,14 @@
632	710
633	711	min_common_depth = find_min_common_depth();
634	712
635		- if (min_common_depth < 0)
	713	+ if (min_common_depth < 0) {
	714	+ /*
	715	+ * if we fail to parse min_common_depth from device tree
	716	+ * mark the numa disabled, boot with numa disabled.
	717	+ */
	718	+ numa_enabled = false;
636	719	return min_common_depth;
	720	+ }
637	721
638	722	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
639	723
..	..	@@ -644,21 +728,23 @@
644	728	*/
645	729	for_each_present_cpu(i) {
646	730	struct device_node *cpu;
647		- int nid;
648		-
649		- cpu = of_get_cpu_node(i, NULL);
650		- BUG_ON(!cpu);
651		- nid = of_node_to_nid_single(cpu);
652		- of_node_put(cpu);
	731	+ int nid = vphn_get_nid(i);
653	732
654	733	/*
655	734	* Don't fall back to default_nid yet -- we will plug
656	735	* cpus into nodes once the memory scan has discovered
657	736	* the topology.
658	737	*/
659		- if (nid < 0)
660		- continue;
661		- node_set_online(nid);
	738	+ if (nid == NUMA_NO_NODE) {
	739	+ cpu = of_get_cpu_node(i, NULL);
	740	+ BUG_ON(!cpu);
	741	+ nid = of_node_to_nid_single(cpu);
	742	+ of_node_put(cpu);
	743	+ }
	744	+
	745	+ /* node_set_online() is an UB if 'nid' is negative */
	746	+ if (likely(nid >= 0))
	747	+ node_set_online(nid);
662	748	}
663	749
664	750	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
..	..	@@ -712,7 +798,7 @@
712	798	*/
713	799	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
714	800	if (memory) {
715		- walk_drmem_lmbs(memory, numa_setup_drmem_lmb);
	801	+ walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb);
716	802	of_node_put(memory);
717	803	}
718	804
..	..	@@ -725,17 +811,14 @@
725	811	unsigned long total_ram = memblock_phys_mem_size();
726	812	unsigned long start_pfn, end_pfn;
727	813	unsigned int nid = 0;
728		- struct memblock_region *reg;
	814	+ int i;
729	815
730	816	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
731	817	top_of_ram, total_ram);
732	818	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
733	819	(top_of_ram - total_ram) >> 20);
734	820
735		- for_each_memblock(memory, reg) {
736		- start_pfn = memblock_region_memory_base_pfn(reg);
737		- end_pfn = memblock_region_memory_end_pfn(reg);
738		-
	821	+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
739	822	fake_numa_create_new_node(end_pfn, &nid);
740	823	memblock_set_node(PFN_PHYS(start_pfn),
741	824	PFN_PHYS(end_pfn - start_pfn),
..	..	@@ -749,7 +832,7 @@
749	832	unsigned int node;
750	833	unsigned int cpu, count;
751	834
752		- if (min_common_depth == -1 \|\| !numa_enabled)
	835	+ if (!numa_enabled)
753	836	return;
754	837
755	838	for_each_online_node(node) {
..	..	@@ -788,7 +871,11 @@
788	871	void *nd;
789	872	int tnid;
790	873
791		- nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
	874	+ nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
	875	+ if (!nd_pa)
	876	+ panic("Cannot allocate %zu bytes for node %d data\n",
	877	+ nd_size, nid);
	878	+
792	879	nd = __va(nd_pa);
793	880
794	881	/* report and initialize */
..	..	@@ -808,24 +895,48 @@
808	895	static void __init find_possible_nodes(void)
809	896	{
810	897	struct device_node *rtas;
811		- u32 numnodes, i;
	898	+ const __be32 *domains = NULL;
	899	+ int prop_length, max_nodes;
	900	+ u32 i;
812	901
813		- if (min_common_depth <= 0)
	902	+ if (!numa_enabled)
814	903	return;
815	904
816	905	rtas = of_find_node_by_path("/rtas");
817	906	if (!rtas)
818	907	return;
819	908
820		- if (of_property_read_u32_index(rtas,
821		- "ibm,max-associativity-domains",
822		- min_common_depth, &numnodes))
823		- goto out;
	909	+ /*
	910	+ * ibm,current-associativity-domains is a fairly recent property. If
	911	+ * it doesn't exist, then fallback on ibm,max-associativity-domains.
	912	+ * Current denotes what the platform can support compared to max
	913	+ * which denotes what the Hypervisor can support.
	914	+ *
	915	+ * If the LPAR is migratable, new nodes might be activated after a LPM,
	916	+ * so we should consider the max number in that case.
	917	+ */
	918	+ if (!of_get_property(of_root, "ibm,migratable-partition", NULL))
	919	+ domains = of_get_property(rtas,
	920	+ "ibm,current-associativity-domains",
	921	+ &prop_length);
	922	+ if (!domains) {
	923	+ domains = of_get_property(rtas, "ibm,max-associativity-domains",
	924	+ &prop_length);
	925	+ if (!domains)
	926	+ goto out;
	927	+ }
824	928
825		- for (i = 0; i < numnodes; i++) {
	929	+ max_nodes = of_read_number(&domains[min_common_depth], 1);
	930	+ pr_info("Partition configured for %d NUMA nodes.\n", max_nodes);
	931	+
	932	+ for (i = 0; i < max_nodes; i++) {
826	933	if (!node_possible(i))
827	934	node_set(i, node_possible_map);
828	935	}
	936	+
	937	+ prop_length /= sizeof(int);
	938	+ if (prop_length > min_common_depth + 2)
	939	+ coregroup_enabled = 1;
829	940
830	941	out:
831	942	of_node_put(rtas);
..	..	@@ -834,6 +945,16 @@
834	945	void __init mem_topology_setup(void)
835	946	{
836	947	int cpu;
	948	+
	949	+ /*
	950	+ * Linux/mm assumes node 0 to be online at boot. However this is not
	951	+ * true on PowerPC, where node 0 is similar to any other node, it
	952	+ * could be cpuless, memoryless node. So force node 0 to be offline
	953	+ * for now. This will prevent cpuless, memoryless node 0 showing up
	954	+ * unnecessarily as online. If a node has cpus or memory that need
	955	+ * to be online, then node will anyway be marked online.
	956	+ */
	957	+ node_set_offline(0);
837	958
838	959	if (parse_numa_properties())
839	960	setup_nonnuma();
..	..	@@ -852,8 +973,17 @@
852	973
853	974	reset_numa_cpu_lookup_table();
854	975
855		- for_each_present_cpu(cpu)
	976	+ for_each_possible_cpu(cpu) {
	977	+ /*
	978	+ * Powerpc with CONFIG_NUMA always used to have a node 0,
	979	+ * even if it was memoryless or cpuless. For all cpus that
	980	+ * are possible but not present, cpu_to_node() would point
	981	+ * to node 0. To remove a cpuless, memoryless dummy node,
	982	+ * powerpc need to make sure all possible but not present
	983	+ * cpu_to_node are set to a proper node.
	984	+ */
856	985	numa_setup_cpu(cpu);
	986	+ }
857	987	}
858	988
859	989	void __init initmem_init(void)
..	..	@@ -870,7 +1000,6 @@
870	1000
871	1001	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
872	1002	setup_node_data(nid, start_pfn, end_pfn);
873		- sparse_memory_present_with_active_regions(nid);
874	1003	}
875	1004
876	1005	sparse_init();
..	..	@@ -905,22 +1034,6 @@
905	1034	}
906	1035	early_param("numa", early_numa);
907	1036
908		-static bool topology_updates_enabled = true;
909		-
910		-static int __init early_topology_updates(char *p)
911		-{
912		- if (!p)
913		- return 0;
914		-
915		- if (!strcmp(p, "off")) {
916		- pr_info("Disabling topology updates\n");
917		- topology_updates_enabled = false;
918		- }
919		-
920		- return 0;
921		-}
922		-early_param("topology_updates", early_topology_updates);
923		-
924	1037	#ifdef CONFIG_MEMORY_HOTPLUG
925	1038	/*
926	1039	* Find the node associated with a hot added memory section for
..	..	@@ -931,7 +1044,7 @@
931	1044	{
932	1045	struct drmem_lmb *lmb;
933	1046	unsigned long lmb_size;
934		- int nid = -1;
	1047	+ int nid = NUMA_NO_NODE;
935	1048
936	1049	lmb_size = drmem_lmb_size();
937	1050
..	..	@@ -961,7 +1074,7 @@
961	1074	static int hot_add_node_scn_to_nid(unsigned long scn_addr)
962	1075	{
963	1076	struct device_node *memory;
964		- int nid = -1;
	1077	+ int nid = NUMA_NO_NODE;
965	1078
966	1079	for_each_node_by_type(memory, "memory") {
967	1080	unsigned long start, size;
..	..	@@ -1006,7 +1119,7 @@
1006	1119	struct device_node *memory = NULL;
1007	1120	int nid;
1008	1121
1009		- if (!numa_enabled \|\| (min_common_depth < 0))
	1122	+ if (!numa_enabled)
1010	1123	return first_online_node;
1011	1124
1012	1125	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
..	..	@@ -1059,142 +1172,42 @@
1059	1172
1060	1173	/* Virtual Processor Home Node (VPHN) support */
1061	1174	#ifdef CONFIG_PPC_SPLPAR
1062		-
1063		-#include "vphn.h"
1064		-
1065		-struct topology_update_data {
1066		- struct topology_update_data *next;
1067		- unsigned int cpu;
1068		- int old_nid;
1069		- int new_nid;
1070		-};
1071		-
1072		-#define TOPOLOGY_DEF_TIMER_SECS 60
1073		-
1074		-static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
1075		-static cpumask_t cpu_associativity_changes_mask;
1076		-static int vphn_enabled;
1077		-static int prrn_enabled;
1078		-static void reset_topology_timer(void);
1079		-static int topology_timer_secs = 1;
1080	1175	static int topology_inited;
1081		-
1082		-/*
1083		- * Change polling interval for associativity changes.
1084		- */
1085		-int timed_topology_update(int nsecs)
1086		-{
1087		- if (vphn_enabled) {
1088		- if (nsecs > 0)
1089		- topology_timer_secs = nsecs;
1090		- else
1091		- topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
1092		-
1093		- reset_topology_timer();
1094		- }
1095		-
1096		- return 0;
1097		-}
1098		-
1099		-/*
1100		- * Store the current values of the associativity change counters in the
1101		- * hypervisor.
1102		- */
1103		-static void setup_cpu_associativity_change_counters(void)
1104		-{
1105		- int cpu;
1106		-
1107		- /* The VPHN feature supports a maximum of 8 reference points */
1108		- BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
1109		-
1110		- for_each_possible_cpu(cpu) {
1111		- int i;
1112		- u8 *counts = vphn_cpu_change_counts[cpu];
1113		- volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
1114		-
1115		- for (i = 0; i < distance_ref_points_depth; i++)
1116		- counts[i] = hypervisor_counts[i];
1117		- }
1118		-}
1119		-
1120		-/*
1121		- * The hypervisor maintains a set of 8 associativity change counters in
1122		- * the VPA of each cpu that correspond to the associativity levels in the
1123		- * ibm,associativity-reference-points property. When an associativity
1124		- * level changes, the corresponding counter is incremented.
1125		- *
1126		- * Set a bit in cpu_associativity_changes_mask for each cpu whose home
1127		- * node associativity levels have changed.
1128		- *
1129		- * Returns the number of cpus with unhandled associativity changes.
1130		- */
1131		-static int update_cpu_associativity_changes_mask(void)
1132		-{
1133		- int cpu;
1134		- cpumask_t *changes = &cpu_associativity_changes_mask;
1135		-
1136		- for_each_possible_cpu(cpu) {
1137		- int i, changed = 0;
1138		- u8 *counts = vphn_cpu_change_counts[cpu];
1139		- volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
1140		-
1141		- for (i = 0; i < distance_ref_points_depth; i++) {
1142		- if (hypervisor_counts[i] != counts[i]) {
1143		- counts[i] = hypervisor_counts[i];
1144		- changed = 1;
1145		- }
1146		- }
1147		- if (changed) {
1148		- cpumask_or(changes, changes, cpu_sibling_mask(cpu));
1149		- cpu = cpu_last_thread_sibling(cpu);
1150		- }
1151		- }
1152		-
1153		- return cpumask_weight(changes);
1154		-}
1155	1176
1156	1177	/*
1157	1178	* Retrieve the new associativity information for a virtual processor's
1158	1179	* home node.
1159	1180	*/
1160		-static long hcall_vphn(unsigned long cpu, __be32 *associativity)
1161		-{
1162		- long rc;
1163		- long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
1164		- u64 flags = 1;
1165		- int hwcpu = get_hard_smp_processor_id(cpu);
1166		-
1167		- rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
1168		- vphn_unpack_associativity(retbuf, associativity);
1169		-
1170		- return rc;
1171		-}
1172		-
1173	1181	static long vphn_get_associativity(unsigned long cpu,
1174	1182	__be32 *associativity)
1175	1183	{
1176	1184	long rc;
1177	1185
1178		- rc = hcall_vphn(cpu, associativity);
	1186	+ rc = hcall_vphn(get_hard_smp_processor_id(cpu),
	1187	+ VPHN_FLAG_VCPU, associativity);
1179	1188
1180	1189	switch (rc) {
1181		- case H_FUNCTION:
1182		- printk_once(KERN_INFO
1183		- "VPHN is not supported. Disabling polling...\n");
1184		- stop_topology_update();
1185		- break;
1186		- case H_HARDWARE:
1187		- printk(KERN_ERR
1188		- "hcall_vphn() experienced a hardware fault "
1189		- "preventing VPHN. Disabling polling...\n");
1190		- stop_topology_update();
1191		- break;
1192	1190	case H_SUCCESS:
1193	1191	dbg("VPHN hcall succeeded. Reset polling...\n");
1194		- timed_topology_update(0);
	1192	+ goto out;
	1193	+
	1194	+ case H_FUNCTION:
	1195	+ pr_err_ratelimited("VPHN unsupported. Disabling polling...\n");
	1196	+ break;
	1197	+ case H_HARDWARE:
	1198	+ pr_err_ratelimited("hcall_vphn() experienced a hardware fault "
	1199	+ "preventing VPHN. Disabling polling...\n");
	1200	+ break;
	1201	+ case H_PARAMETER:
	1202	+ pr_err_ratelimited("hcall_vphn() was passed an invalid parameter. "
	1203	+ "Disabling polling...\n");
	1204	+ break;
	1205	+ default:
	1206	+ pr_err_ratelimited("hcall_vphn() returned %ld. Disabling polling...\n"
	1207	+ , rc);
1195	1208	break;
1196	1209	}
1197		-
	1210	+out:
1198	1211	return rc;
1199	1212	}
1200	1213
..	..	@@ -1237,383 +1250,33 @@
1237	1250	return new_nid;
1238	1251	}
1239	1252
1240		-/*
1241		- * Update the CPU maps and sysfs entries for a single CPU when its NUMA
1242		- * characteristics change. This function doesn't perform any locking and is
1243		- * only safe to call from stop_machine().
1244		- */
1245		-static int update_cpu_topology(void *data)
	1253	+int cpu_to_coregroup_id(int cpu)
1246	1254	{
1247		- struct topology_update_data *update;
1248		- unsigned long cpu;
	1255	+ __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
	1256	+ int index;
1249	1257
1250		- if (!data)
1251		- return -EINVAL;
	1258	+ if (cpu < 0 \|\| cpu > nr_cpu_ids)
	1259	+ return -1;
1252	1260
1253		- cpu = smp_processor_id();
1254		-
1255		- for (update = data; update; update = update->next) {
1256		- int new_nid = update->new_nid;
1257		- if (cpu != update->cpu)
1258		- continue;
1259		-
1260		- unmap_cpu_from_node(cpu);
1261		- map_cpu_to_node(cpu, new_nid);
1262		- set_cpu_numa_node(cpu, new_nid);
1263		- set_cpu_numa_mem(cpu, local_memory_node(new_nid));
1264		- vdso_getcpu_init();
1265		- }
1266		-
1267		- return 0;
1268		-}
1269		-
1270		-static int update_lookup_table(void *data)
1271		-{
1272		- struct topology_update_data *update;
1273		-
1274		- if (!data)
1275		- return -EINVAL;
1276		-
1277		- /*
1278		- * Upon topology update, the numa-cpu lookup table needs to be updated
1279		- * for all threads in the core, including offline CPUs, to ensure that
1280		- * future hotplug operations respect the cpu-to-node associativity
1281		- * properly.
1282		- */
1283		- for (update = data; update; update = update->next) {
1284		- int nid, base, j;
1285		-
1286		- nid = update->new_nid;
1287		- base = cpu_first_thread_sibling(update->cpu);
1288		-
1289		- for (j = 0; j < threads_per_core; j++) {
1290		- update_numa_cpu_lookup_table(base + j, nid);
1291		- }
1292		- }
1293		-
1294		- return 0;
1295		-}
1296		-
1297		-/*
1298		- * Update the node maps and sysfs entries for each cpu whose home node
1299		- * has changed. Returns 1 when the topology has changed, and 0 otherwise.
1300		- *
1301		- * cpus_locked says whether we already hold cpu_hotplug_lock.
1302		- */
1303		-int numa_update_cpu_topology(bool cpus_locked)
1304		-{
1305		- unsigned int cpu, sibling, changed = 0;
1306		- struct topology_update_data updates, ud;
1307		- cpumask_t updated_cpus;
1308		- struct device *dev;
1309		- int weight, new_nid, i = 0;
1310		-
1311		- if (!prrn_enabled && !vphn_enabled && topology_inited)
1312		- return 0;
1313		-
1314		- weight = cpumask_weight(&cpu_associativity_changes_mask);
1315		- if (!weight)
1316		- return 0;
1317		-
1318		- updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL);
1319		- if (!updates)
1320		- return 0;
1321		-
1322		- cpumask_clear(&updated_cpus);
1323		-
1324		- for_each_cpu(cpu, &cpu_associativity_changes_mask) {
1325		- /*
1326		- * If siblings aren't flagged for changes, updates list
1327		- * will be too short. Skip on this update and set for next
1328		- * update.
1329		- */
1330		- if (!cpumask_subset(cpu_sibling_mask(cpu),
1331		- &cpu_associativity_changes_mask)) {
1332		- pr_info("Sibling bits not set for associativity "
1333		- "change, cpu%d\n", cpu);
1334		- cpumask_or(&cpu_associativity_changes_mask,
1335		- &cpu_associativity_changes_mask,
1336		- cpu_sibling_mask(cpu));
1337		- cpu = cpu_last_thread_sibling(cpu);
1338		- continue;
1339		- }
1340		-
1341		- new_nid = find_and_online_cpu_nid(cpu);
1342		-
1343		- if (new_nid == numa_cpu_lookup_table[cpu]) {
1344		- cpumask_andnot(&cpu_associativity_changes_mask,
1345		- &cpu_associativity_changes_mask,
1346		- cpu_sibling_mask(cpu));
1347		- dbg("Assoc chg gives same node %d for cpu%d\n",
1348		- new_nid, cpu);
1349		- cpu = cpu_last_thread_sibling(cpu);
1350		- continue;
1351		- }
1352		-
1353		- for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
1354		- ud = &updates[i++];
1355		- ud->next = &updates[i];
1356		- ud->cpu = sibling;
1357		- ud->new_nid = new_nid;
1358		- ud->old_nid = numa_cpu_lookup_table[sibling];
1359		- cpumask_set_cpu(sibling, &updated_cpus);
1360		- }
1361		- cpu = cpu_last_thread_sibling(cpu);
1362		- }
1363		-
1364		- /*
1365		- * Prevent processing of 'updates' from overflowing array
1366		- * where last entry filled in a 'next' pointer.
1367		- */
1368		- if (i)
1369		- updates[i-1].next = NULL;
1370		-
1371		- pr_debug("Topology update for the following CPUs:\n");
1372		- if (cpumask_weight(&updated_cpus)) {
1373		- for (ud = &updates[0]; ud; ud = ud->next) {
1374		- pr_debug("cpu %d moving from node %d "
1375		- "to %d\n", ud->cpu,
1376		- ud->old_nid, ud->new_nid);
1377		- }
1378		- }
1379		-
1380		- /*
1381		- * In cases where we have nothing to update (because the updates list
1382		- * is too short or because the new topology is same as the old one),
1383		- * skip invoking update_cpu_topology() via stop-machine(). This is
1384		- * necessary (and not just a fast-path optimization) since stop-machine
1385		- * can end up electing a random CPU to run update_cpu_topology(), and
1386		- * thus trick us into setting up incorrect cpu-node mappings (since
1387		- * 'updates' is kzalloc()'ed).
1388		- *
1389		- * And for the similar reason, we will skip all the following updating.
1390		- */
1391		- if (!cpumask_weight(&updated_cpus))
	1261	+ if (!coregroup_enabled)
1392	1262	goto out;
1393	1263
1394		- if (cpus_locked)
1395		- stop_machine_cpuslocked(update_cpu_topology, &updates[0],
1396		- &updated_cpus);
1397		- else
1398		- stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
	1264	+ if (!firmware_has_feature(FW_FEATURE_VPHN))
	1265	+ goto out;
1399	1266
1400		- /*
1401		- * Update the numa-cpu lookup table with the new mappings, even for
1402		- * offline CPUs. It is best to perform this update from the stop-
1403		- * machine context.
1404		- */
1405		- if (cpus_locked)
1406		- stop_machine_cpuslocked(update_lookup_table, &updates[0],
1407		- cpumask_of(raw_smp_processor_id()));
1408		- else
1409		- stop_machine(update_lookup_table, &updates[0],
1410		- cpumask_of(raw_smp_processor_id()));
	1267	+ if (vphn_get_associativity(cpu, associativity))
	1268	+ goto out;
1411	1269
1412		- for (ud = &updates[0]; ud; ud = ud->next) {
1413		- unregister_cpu_under_node(ud->cpu, ud->old_nid);
1414		- register_cpu_under_node(ud->cpu, ud->new_nid);
1415		-
1416		- dev = get_cpu_device(ud->cpu);
1417		- if (dev)
1418		- kobject_uevent(&dev->kobj, KOBJ_CHANGE);
1419		- cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
1420		- changed = 1;
1421		- }
	1270	+ index = of_read_number(associativity, 1);
	1271	+ if (index > min_common_depth + 1)
	1272	+ return of_read_number(&associativity[index - 1], 1);
1422	1273
1423	1274	out:
1424		- kfree(updates);
1425		- return changed;
	1275	+ return cpu_to_core_id(cpu);
1426	1276	}
1427		-
1428		-int arch_update_cpu_topology(void)
1429		-{
1430		- return numa_update_cpu_topology(true);
1431		-}
1432		-
1433		-static void topology_work_fn(struct work_struct *work)
1434		-{
1435		- rebuild_sched_domains();
1436		-}
1437		-static DECLARE_WORK(topology_work, topology_work_fn);
1438		-
1439		-static void topology_schedule_update(void)
1440		-{
1441		- schedule_work(&topology_work);
1442		-}
1443		-
1444		-static void topology_timer_fn(struct timer_list *unused)
1445		-{
1446		- if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
1447		- topology_schedule_update();
1448		- else if (vphn_enabled) {
1449		- if (update_cpu_associativity_changes_mask() > 0)
1450		- topology_schedule_update();
1451		- reset_topology_timer();
1452		- }
1453		-}
1454		-static struct timer_list topology_timer;
1455		-
1456		-static void reset_topology_timer(void)
1457		-{
1458		- if (vphn_enabled)
1459		- mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
1460		-}
1461		-
1462		-#ifdef CONFIG_SMP
1463		-
1464		-static int dt_update_callback(struct notifier_block *nb,
1465		- unsigned long action, void *data)
1466		-{
1467		- struct of_reconfig_data *update = data;
1468		- int rc = NOTIFY_DONE;
1469		-
1470		- switch (action) {
1471		- case OF_RECONFIG_UPDATE_PROPERTY:
1472		- if (!of_prop_cmp(update->dn->type, "cpu") &&
1473		- !of_prop_cmp(update->prop->name, "ibm,associativity")) {
1474		- u32 core_id;
1475		- of_property_read_u32(update->dn, "reg", &core_id);
1476		- rc = dlpar_cpu_readd(core_id);
1477		- rc = NOTIFY_OK;
1478		- }
1479		- break;
1480		- }
1481		-
1482		- return rc;
1483		-}
1484		-
1485		-static struct notifier_block dt_update_nb = {
1486		- .notifier_call = dt_update_callback,
1487		-};
1488		-
1489		-#endif
1490		-
1491		-/*
1492		- * Start polling for associativity changes.
1493		- */
1494		-int start_topology_update(void)
1495		-{
1496		- int rc = 0;
1497		-
1498		- if (!topology_updates_enabled)
1499		- return 0;
1500		-
1501		- if (firmware_has_feature(FW_FEATURE_PRRN)) {
1502		- if (!prrn_enabled) {
1503		- prrn_enabled = 1;
1504		-#ifdef CONFIG_SMP
1505		- rc = of_reconfig_notifier_register(&dt_update_nb);
1506		-#endif
1507		- }
1508		- }
1509		- if (firmware_has_feature(FW_FEATURE_VPHN) &&
1510		- lppaca_shared_proc(get_lppaca())) {
1511		- if (!vphn_enabled) {
1512		- vphn_enabled = 1;
1513		- setup_cpu_associativity_change_counters();
1514		- timer_setup(&topology_timer, topology_timer_fn,
1515		- TIMER_DEFERRABLE);
1516		- reset_topology_timer();
1517		- }
1518		- }
1519		-
1520		- return rc;
1521		-}
1522		-
1523		-/*
1524		- * Disable polling for VPHN associativity changes.
1525		- */
1526		-int stop_topology_update(void)
1527		-{
1528		- int rc = 0;
1529		-
1530		- if (!topology_updates_enabled)
1531		- return 0;
1532		-
1533		- if (prrn_enabled) {
1534		- prrn_enabled = 0;
1535		-#ifdef CONFIG_SMP
1536		- rc = of_reconfig_notifier_unregister(&dt_update_nb);
1537		-#endif
1538		- }
1539		- if (vphn_enabled) {
1540		- vphn_enabled = 0;
1541		- rc = del_timer_sync(&topology_timer);
1542		- }
1543		-
1544		- return rc;
1545		-}
1546		-
1547		-int prrn_is_enabled(void)
1548		-{
1549		- return prrn_enabled;
1550		-}
1551		-
1552		-void __init shared_proc_topology_init(void)
1553		-{
1554		- if (lppaca_shared_proc(get_lppaca())) {
1555		- bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask),
1556		- nr_cpumask_bits);
1557		- numa_update_cpu_topology(false);
1558		- }
1559		-}
1560		-
1561		-static int topology_read(struct seq_file file, void v)
1562		-{
1563		- if (vphn_enabled \|\| prrn_enabled)
1564		- seq_puts(file, "on\n");
1565		- else
1566		- seq_puts(file, "off\n");
1567		-
1568		- return 0;
1569		-}
1570		-
1571		-static int topology_open(struct inode inode, struct file file)
1572		-{
1573		- return single_open(file, topology_read, NULL);
1574		-}
1575		-
1576		-static ssize_t topology_write(struct file file, const char __user buf,
1577		- size_t count, loff_t *off)
1578		-{
1579		- char kbuf[4]; /* "on" or "off" plus null. */
1580		- int read_len;
1581		-
1582		- read_len = count < 3 ? count : 3;
1583		- if (copy_from_user(kbuf, buf, read_len))
1584		- return -EINVAL;
1585		-
1586		- kbuf[read_len] = '\0';
1587		-
1588		- if (!strncmp(kbuf, "on", 2)) {
1589		- topology_updates_enabled = true;
1590		- start_topology_update();
1591		- } else if (!strncmp(kbuf, "off", 3)) {
1592		- stop_topology_update();
1593		- topology_updates_enabled = false;
1594		- } else
1595		- return -EINVAL;
1596		-
1597		- return count;
1598		-}
1599		-
1600		-static const struct file_operations topology_ops = {
1601		- .read = seq_read,
1602		- .write = topology_write,
1603		- .open = topology_open,
1604		- .release = single_release
1605		-};
1606	1277
1607	1278	static int topology_update_init(void)
1608	1279	{
1609		- start_topology_update();
1610		-
1611		- if (vphn_enabled)
1612		- topology_schedule_update();
1613		-
1614		- if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
1615		- return -ENOMEM;
1616		-
1617	1280	topology_inited = 1;
1618	1281	return 0;
1619	1282	}