From 072de836f53be56a70cecf70b43ae43b7ce17376 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Mon, 11 Dec 2023 10:08:36 +0000
Subject: [PATCH] mk-rootfs.sh
---
kernel/arch/x86/mm/numa.c | 165 +++++++++++++++++++++++++++++++++++++------------------
1 files changed, 111 insertions(+), 54 deletions(-)
diff --git a/kernel/arch/x86/mm/numa.c b/kernel/arch/x86/mm/numa.c
index fa15085..9dc3199 100644
--- a/kernel/arch/x86/mm/numa.c
+++ b/kernel/arch/x86/mm/numa.c
@@ -1,10 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Common code for 32 and 64-bit NUMA */
#include <linux/acpi.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/init.h>
-#include <linux/bootmem.h>
#include <linux/memblock.h>
#include <linux/mmzone.h>
#include <linux/ctype.h>
@@ -25,11 +25,8 @@
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
-static struct numa_meminfo numa_meminfo
-#ifndef CONFIG_MEMORY_HOTPLUG
-__initdata
-#endif
-;
+static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
+static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
static int numa_distance_cnt;
static u8 *numa_distance;
@@ -40,14 +37,12 @@
return -EINVAL;
if (!strncmp(opt, "off", 3))
numa_off = 1;
-#ifdef CONFIG_NUMA_EMU
if (!strncmp(opt, "fake=", 5))
- numa_emu_cmdline(opt + 5);
-#endif
-#ifdef CONFIG_ACPI_NUMA
+ return numa_emu_cmdline(opt + 5);
if (!strncmp(opt, "noacpi", 6))
- acpi_numa = -1;
-#endif
+ disable_srat();
+ if (!strncmp(opt, "nohmat", 6))
+ disable_hmat();
return 0;
}
early_param("numa", numa_setup);
@@ -124,7 +119,7 @@
alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
/* cpumask_of_node() will now work */
- pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
+ pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
}
static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
@@ -169,6 +164,19 @@
}
/**
+ * numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
+ * @dst: numa_meminfo to append block to
+ * @idx: Index of memblk to remove
+ * @src: numa_meminfo to remove memblk from
+ */
+static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
+ struct numa_meminfo *src)
+{
+ dst->blk[dst->nr_blks++] = src->blk[idx];
+ numa_remove_memblk_from(idx, src);
+}
+
+/**
* numa_add_memblk - Add one numa_memblk to numa_meminfo
* @nid: NUMA node ID of the new memblk
* @start: Start address of the new memblk
@@ -196,15 +204,11 @@
* Allocate node data. Try node-local memory and then any node.
* Never allocate in DMA zone.
*/
- nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
+ nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
if (!nd_pa) {
- nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES,
- MEMBLOCK_ALLOC_ACCESSIBLE);
- if (!nd_pa) {
- pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
- nd_size, nid);
- return;
- }
+ pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
+ nd_size, nid);
+ return;
}
nd = __va(nd_pa);
@@ -241,14 +245,25 @@
for (i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *bi = &mi->blk[i];
- /* make sure all blocks are inside the limits */
- bi->start = max(bi->start, low);
- bi->end = min(bi->end, high);
+ /* move / save reserved memory ranges */
+ if (!memblock_overlaps_region(&memblock.memory,
+ bi->start, bi->end - bi->start)) {
+ numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
+ continue;
+ }
- /* and there's no empty or non-exist block */
- if (bi->start >= bi->end ||
- !memblock_overlaps_region(&memblock.memory,
- bi->start, bi->end - bi->start))
+ /* make sure all non-reserved blocks are inside the limits */
+ bi->start = max(bi->start, low);
+
+ /* preserve info for non-RAM areas above 'max_pfn': */
+ if (bi->end > high) {
+ numa_add_memblk_to(bi->nid, high, bi->end,
+ &numa_reserved_meminfo);
+ bi->end = high;
+ }
+
+ /* and there's no empty block */
+ if (bi->start >= bi->end)
numa_remove_memblk_from(i--, mi);
}
@@ -505,9 +520,11 @@
* memory ranges, because quirks such as trim_snb_memory()
* reserve specific pages for Sandy Bridge graphics. ]
*/
- for_each_memblock(reserved, mb_region) {
- if (mb_region->nid != MAX_NUMNODES)
- node_set(mb_region->nid, reserved_nodemask);
+ for_each_reserved_mem_region(mb_region) {
+ int nid = memblock_get_region_node(mb_region);
+
+ if (nid != MAX_NUMNODES)
+ node_set(nid, reserved_nodemask);
}
/*
@@ -530,7 +547,6 @@
static int __init numa_register_memblks(struct numa_meminfo *mi)
{
- unsigned long uninitialized_var(pfn_align);
int i, nid;
/* Account for nodes with cpus and no memory */
@@ -558,15 +574,16 @@
* If sections array is gonna be used for pfn -> nid mapping, check
* whether its granularity is fine enough.
*/
-#ifdef NODE_NOT_IN_PAGE_FLAGS
- pfn_align = node_map_pfn_alignment();
- if (pfn_align && pfn_align < PAGES_PER_SECTION) {
- printk(KERN_WARNING "Node alignment %LuMB < min %LuMB, rejecting NUMA config\n",
- PFN_PHYS(pfn_align) >> 20,
- PFN_PHYS(PAGES_PER_SECTION) >> 20);
- return -EINVAL;
+ if (IS_ENABLED(NODE_NOT_IN_PAGE_FLAGS)) {
+ unsigned long pfn_align = node_map_pfn_alignment();
+
+ if (pfn_align && pfn_align < PAGES_PER_SECTION) {
+ pr_warn("Node alignment %LuMB < min %LuMB, rejecting NUMA config\n",
+ PFN_PHYS(pfn_align) >> 20,
+ PFN_PHYS(PAGES_PER_SECTION) >> 20);
+ return -EINVAL;
+ }
}
-#endif
if (!numa_meminfo_cover_memory(mi))
return -EINVAL;
@@ -703,7 +720,7 @@
* x86_numa_init - Initialize NUMA
*
* Try each configured NUMA initialization method until one succeeds. The
- * last fallback is dummy single node config encomapssing whole memory and
+ * last fallback is dummy single node config encompassing whole memory and
* never fails.
*/
void __init x86_numa_init(void)
@@ -724,17 +741,35 @@
static void __init init_memory_less_node(int nid)
{
- unsigned long zones_size[MAX_NR_ZONES] = {0};
- unsigned long zholes_size[MAX_NR_ZONES] = {0};
-
/* Allocate and initialize node data. Memory-less node is now online.*/
alloc_node_data(nid);
- free_area_init_node(nid, zones_size, 0, zholes_size);
+ free_area_init_memoryless_node(nid);
/*
* All zonelists will be built later in start_kernel() after per cpu
* areas are initialized.
*/
+}
+
+/*
+ * A node may exist which has one or more Generic Initiators but no CPUs and no
+ * memory.
+ *
+ * This function must be called after init_cpu_to_node(), to ensure that any
+ * memoryless CPU nodes have already been brought online, and before the
+ * node_data[nid] is needed for zone list setup in build_all_zonelists().
+ *
+ * When this function is called, any nodes containing either memory and/or CPUs
+ * will already be online and there is no need to do anything extra, even if
+ * they also contain one or more Generic Initiators.
+ */
+void __init init_gi_nodes(void)
+{
+ int nid;
+
+ for_each_node_state(nid, N_GENERIC_INITIATOR)
+ if (!node_online(nid))
+ init_memory_less_node(nid);
}
/*
@@ -826,7 +861,7 @@
return;
}
mask = node_to_cpumask_map[node];
- if (!mask) {
+ if (!cpumask_available(mask)) {
pr_err("node_to_cpumask_map[%i] NULL\n", node);
dump_stack();
return;
@@ -865,14 +900,14 @@
*/
const struct cpumask *cpumask_of_node(int node)
{
- if (node >= nr_node_ids) {
+ if ((unsigned)node >= nr_node_ids) {
printk(KERN_WARNING
- "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
+ "cpumask_of_node(%d): (unsigned)node >= nr_node_ids(%u)\n",
node, nr_node_ids);
dump_stack();
return cpu_none_mask;
}
- if (node_to_cpumask_map[node] == NULL) {
+ if (!cpumask_available(node_to_cpumask_map[node])) {
printk(KERN_WARNING
"cpumask_of_node(%d): no node_to_cpumask_map!\n",
node);
@@ -885,16 +920,38 @@
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
-#ifdef CONFIG_MEMORY_HOTPLUG
-int memory_add_physaddr_to_nid(u64 start)
+#ifdef CONFIG_NUMA_KEEP_MEMINFO
+static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
{
- struct numa_meminfo *mi = &numa_meminfo;
- int nid = mi->blk[0].nid;
int i;
for (i = 0; i < mi->nr_blks; i++)
if (mi->blk[i].start <= start && mi->blk[i].end > start)
- nid = mi->blk[i].nid;
+ return mi->blk[i].nid;
+ return NUMA_NO_NODE;
+}
+
+int phys_to_target_node(phys_addr_t start)
+{
+ int nid = meminfo_to_nid(&numa_meminfo, start);
+
+ /*
+ * Prefer online nodes, but if reserved memory might be
+ * hot-added continue the search with reserved ranges.
+ */
+ if (nid != NUMA_NO_NODE)
+ return nid;
+
+ return meminfo_to_nid(&numa_reserved_meminfo, start);
+}
+EXPORT_SYMBOL_GPL(phys_to_target_node);
+
+int memory_add_physaddr_to_nid(u64 start)
+{
+ int nid = meminfo_to_nid(&numa_meminfo, start);
+
+ if (nid == NUMA_NO_NODE)
+ nid = numa_meminfo.blk[0].nid;
return nid;
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
--
Gitblit v1.6.2