forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/arch/x86/mm/numa.c
....@@ -1,10 +1,10 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /* Common code for 32 and 64-bit NUMA */
23 #include <linux/acpi.h>
34 #include <linux/kernel.h>
45 #include <linux/mm.h>
56 #include <linux/string.h>
67 #include <linux/init.h>
7
-#include <linux/bootmem.h>
88 #include <linux/memblock.h>
99 #include <linux/mmzone.h>
1010 #include <linux/ctype.h>
....@@ -25,11 +25,8 @@
2525 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
2626 EXPORT_SYMBOL(node_data);
2727
28
-static struct numa_meminfo numa_meminfo
29
-#ifndef CONFIG_MEMORY_HOTPLUG
30
-__initdata
31
-#endif
32
-;
28
+static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
29
+static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
3330
3431 static int numa_distance_cnt;
3532 static u8 *numa_distance;
....@@ -40,14 +37,12 @@
4037 return -EINVAL;
4138 if (!strncmp(opt, "off", 3))
4239 numa_off = 1;
43
-#ifdef CONFIG_NUMA_EMU
4440 if (!strncmp(opt, "fake=", 5))
45
- numa_emu_cmdline(opt + 5);
46
-#endif
47
-#ifdef CONFIG_ACPI_NUMA
41
+ return numa_emu_cmdline(opt + 5);
4842 if (!strncmp(opt, "noacpi", 6))
49
- acpi_numa = -1;
50
-#endif
43
+ disable_srat();
44
+ if (!strncmp(opt, "nohmat", 6))
45
+ disable_hmat();
5146 return 0;
5247 }
5348 early_param("numa", numa_setup);
....@@ -124,7 +119,7 @@
124119 alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
125120
126121 /* cpumask_of_node() will now work */
127
- pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
122
+ pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
128123 }
129124
130125 static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
....@@ -169,6 +164,19 @@
169164 }
170165
171166 /**
167
+ * numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
168
+ * @dst: numa_meminfo to append block to
169
+ * @idx: Index of memblk to remove
170
+ * @src: numa_meminfo to remove memblk from
171
+ */
172
+static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
173
+ struct numa_meminfo *src)
174
+{
175
+ dst->blk[dst->nr_blks++] = src->blk[idx];
176
+ numa_remove_memblk_from(idx, src);
177
+}
178
+
179
+/**
172180 * numa_add_memblk - Add one numa_memblk to numa_meminfo
173181 * @nid: NUMA node ID of the new memblk
174182 * @start: Start address of the new memblk
....@@ -196,15 +204,11 @@
196204 * Allocate node data. Try node-local memory and then any node.
197205 * Never allocate in DMA zone.
198206 */
199
- nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
207
+ nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
200208 if (!nd_pa) {
201
- nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES,
202
- MEMBLOCK_ALLOC_ACCESSIBLE);
203
- if (!nd_pa) {
204
- pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
205
- nd_size, nid);
206
- return;
207
- }
209
+ pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
210
+ nd_size, nid);
211
+ return;
208212 }
209213 nd = __va(nd_pa);
210214
....@@ -241,14 +245,25 @@
241245 for (i = 0; i < mi->nr_blks; i++) {
242246 struct numa_memblk *bi = &mi->blk[i];
243247
244
- /* make sure all blocks are inside the limits */
245
- bi->start = max(bi->start, low);
246
- bi->end = min(bi->end, high);
248
+ /* move / save reserved memory ranges */
249
+ if (!memblock_overlaps_region(&memblock.memory,
250
+ bi->start, bi->end - bi->start)) {
251
+ numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
252
+ continue;
253
+ }
247254
248
- /* and there's no empty or non-exist block */
249
- if (bi->start >= bi->end ||
250
- !memblock_overlaps_region(&memblock.memory,
251
- bi->start, bi->end - bi->start))
255
+ /* make sure all non-reserved blocks are inside the limits */
256
+ bi->start = max(bi->start, low);
257
+
258
+ /* preserve info for non-RAM areas above 'max_pfn': */
259
+ if (bi->end > high) {
260
+ numa_add_memblk_to(bi->nid, high, bi->end,
261
+ &numa_reserved_meminfo);
262
+ bi->end = high;
263
+ }
264
+
265
+ /* and there's no empty block */
266
+ if (bi->start >= bi->end)
252267 numa_remove_memblk_from(i--, mi);
253268 }
254269
....@@ -505,9 +520,11 @@
505520 * memory ranges, because quirks such as trim_snb_memory()
506521 * reserve specific pages for Sandy Bridge graphics. ]
507522 */
508
- for_each_memblock(reserved, mb_region) {
509
- if (mb_region->nid != MAX_NUMNODES)
510
- node_set(mb_region->nid, reserved_nodemask);
523
+ for_each_reserved_mem_region(mb_region) {
524
+ int nid = memblock_get_region_node(mb_region);
525
+
526
+ if (nid != MAX_NUMNODES)
527
+ node_set(nid, reserved_nodemask);
511528 }
512529
513530 /*
....@@ -530,7 +547,6 @@
530547
531548 static int __init numa_register_memblks(struct numa_meminfo *mi)
532549 {
533
- unsigned long uninitialized_var(pfn_align);
534550 int i, nid;
535551
536552 /* Account for nodes with cpus and no memory */
....@@ -558,15 +574,16 @@
558574 * If sections array is gonna be used for pfn -> nid mapping, check
559575 * whether its granularity is fine enough.
560576 */
561
-#ifdef NODE_NOT_IN_PAGE_FLAGS
562
- pfn_align = node_map_pfn_alignment();
563
- if (pfn_align && pfn_align < PAGES_PER_SECTION) {
564
- printk(KERN_WARNING "Node alignment %LuMB < min %LuMB, rejecting NUMA config\n",
565
- PFN_PHYS(pfn_align) >> 20,
566
- PFN_PHYS(PAGES_PER_SECTION) >> 20);
567
- return -EINVAL;
577
+ if (IS_ENABLED(NODE_NOT_IN_PAGE_FLAGS)) {
578
+ unsigned long pfn_align = node_map_pfn_alignment();
579
+
580
+ if (pfn_align && pfn_align < PAGES_PER_SECTION) {
581
+ pr_warn("Node alignment %LuMB < min %LuMB, rejecting NUMA config\n",
582
+ PFN_PHYS(pfn_align) >> 20,
583
+ PFN_PHYS(PAGES_PER_SECTION) >> 20);
584
+ return -EINVAL;
585
+ }
568586 }
569
-#endif
570587 if (!numa_meminfo_cover_memory(mi))
571588 return -EINVAL;
572589
....@@ -703,7 +720,7 @@
703720 * x86_numa_init - Initialize NUMA
704721 *
705722 * Try each configured NUMA initialization method until one succeeds. The
706
- * last fallback is dummy single node config encomapssing whole memory and
723
+ * last fallback is dummy single node config encompassing whole memory and
707724 * never fails.
708725 */
709726 void __init x86_numa_init(void)
....@@ -724,17 +741,35 @@
724741
725742 static void __init init_memory_less_node(int nid)
726743 {
727
- unsigned long zones_size[MAX_NR_ZONES] = {0};
728
- unsigned long zholes_size[MAX_NR_ZONES] = {0};
729
-
730744 /* Allocate and initialize node data. Memory-less node is now online.*/
731745 alloc_node_data(nid);
732
- free_area_init_node(nid, zones_size, 0, zholes_size);
746
+ free_area_init_memoryless_node(nid);
733747
734748 /*
735749 * All zonelists will be built later in start_kernel() after per cpu
736750 * areas are initialized.
737751 */
752
+}
753
+
754
+/*
755
+ * A node may exist which has one or more Generic Initiators but no CPUs and no
756
+ * memory.
757
+ *
758
+ * This function must be called after init_cpu_to_node(), to ensure that any
759
+ * memoryless CPU nodes have already been brought online, and before the
760
+ * node_data[nid] is needed for zone list setup in build_all_zonelists().
761
+ *
762
+ * When this function is called, any nodes containing either memory and/or CPUs
763
+ * will already be online and there is no need to do anything extra, even if
764
+ * they also contain one or more Generic Initiators.
765
+ */
766
+void __init init_gi_nodes(void)
767
+{
768
+ int nid;
769
+
770
+ for_each_node_state(nid, N_GENERIC_INITIATOR)
771
+ if (!node_online(nid))
772
+ init_memory_less_node(nid);
738773 }
739774
740775 /*
....@@ -826,7 +861,7 @@
826861 return;
827862 }
828863 mask = node_to_cpumask_map[node];
829
- if (!mask) {
864
+ if (!cpumask_available(mask)) {
830865 pr_err("node_to_cpumask_map[%i] NULL\n", node);
831866 dump_stack();
832867 return;
....@@ -865,14 +900,14 @@
865900 */
866901 const struct cpumask *cpumask_of_node(int node)
867902 {
868
- if (node >= nr_node_ids) {
903
+ if ((unsigned)node >= nr_node_ids) {
869904 printk(KERN_WARNING
870
- "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
905
+ "cpumask_of_node(%d): (unsigned)node >= nr_node_ids(%u)\n",
871906 node, nr_node_ids);
872907 dump_stack();
873908 return cpu_none_mask;
874909 }
875
- if (node_to_cpumask_map[node] == NULL) {
910
+ if (!cpumask_available(node_to_cpumask_map[node])) {
876911 printk(KERN_WARNING
877912 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
878913 node);
....@@ -885,16 +920,38 @@
885920
886921 #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
887922
888
-#ifdef CONFIG_MEMORY_HOTPLUG
889
-int memory_add_physaddr_to_nid(u64 start)
923
+#ifdef CONFIG_NUMA_KEEP_MEMINFO
924
+static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
890925 {
891
- struct numa_meminfo *mi = &numa_meminfo;
892
- int nid = mi->blk[0].nid;
893926 int i;
894927
895928 for (i = 0; i < mi->nr_blks; i++)
896929 if (mi->blk[i].start <= start && mi->blk[i].end > start)
897
- nid = mi->blk[i].nid;
930
+ return mi->blk[i].nid;
931
+ return NUMA_NO_NODE;
932
+}
933
+
934
+int phys_to_target_node(phys_addr_t start)
935
+{
936
+ int nid = meminfo_to_nid(&numa_meminfo, start);
937
+
938
+ /*
939
+ * Prefer online nodes, but if reserved memory might be
940
+ * hot-added continue the search with reserved ranges.
941
+ */
942
+ if (nid != NUMA_NO_NODE)
943
+ return nid;
944
+
945
+ return meminfo_to_nid(&numa_reserved_meminfo, start);
946
+}
947
+EXPORT_SYMBOL_GPL(phys_to_target_node);
948
+
949
+int memory_add_physaddr_to_nid(u64 start)
950
+{
951
+ int nid = meminfo_to_nid(&numa_meminfo, start);
952
+
953
+ if (nid == NUMA_NO_NODE)
954
+ nid = numa_meminfo.blk[0].nid;
898955 return nid;
899956 }
900957 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);