.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* Common code for 32 and 64-bit NUMA */ |
---|
2 | 3 | #include <linux/acpi.h> |
---|
3 | 4 | #include <linux/kernel.h> |
---|
4 | 5 | #include <linux/mm.h> |
---|
5 | 6 | #include <linux/string.h> |
---|
6 | 7 | #include <linux/init.h> |
---|
7 | | -#include <linux/bootmem.h> |
---|
8 | 8 | #include <linux/memblock.h> |
---|
9 | 9 | #include <linux/mmzone.h> |
---|
10 | 10 | #include <linux/ctype.h> |
---|
.. | .. |
---|
25 | 25 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
---|
26 | 26 | EXPORT_SYMBOL(node_data); |
---|
27 | 27 | |
---|
28 | | -static struct numa_meminfo numa_meminfo |
---|
29 | | -#ifndef CONFIG_MEMORY_HOTPLUG |
---|
30 | | -__initdata |
---|
31 | | -#endif |
---|
32 | | -; |
---|
| 28 | +static struct numa_meminfo numa_meminfo __initdata_or_meminfo; |
---|
| 29 | +static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo; |
---|
33 | 30 | |
---|
34 | 31 | static int numa_distance_cnt; |
---|
35 | 32 | static u8 *numa_distance; |
---|
.. | .. |
---|
40 | 37 | return -EINVAL; |
---|
41 | 38 | if (!strncmp(opt, "off", 3)) |
---|
42 | 39 | numa_off = 1; |
---|
43 | | -#ifdef CONFIG_NUMA_EMU |
---|
44 | 40 | if (!strncmp(opt, "fake=", 5)) |
---|
45 | | - numa_emu_cmdline(opt + 5); |
---|
46 | | -#endif |
---|
47 | | -#ifdef CONFIG_ACPI_NUMA |
---|
| 41 | + return numa_emu_cmdline(opt + 5); |
---|
48 | 42 | if (!strncmp(opt, "noacpi", 6)) |
---|
49 | | - acpi_numa = -1; |
---|
50 | | -#endif |
---|
| 43 | + disable_srat(); |
---|
| 44 | + if (!strncmp(opt, "nohmat", 6)) |
---|
| 45 | + disable_hmat(); |
---|
51 | 46 | return 0; |
---|
52 | 47 | } |
---|
53 | 48 | early_param("numa", numa_setup); |
---|
.. | .. |
---|
124 | 119 | alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); |
---|
125 | 120 | |
---|
126 | 121 | /* cpumask_of_node() will now work */ |
---|
127 | | - pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); |
---|
| 122 | + pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids); |
---|
128 | 123 | } |
---|
129 | 124 | |
---|
130 | 125 | static int __init numa_add_memblk_to(int nid, u64 start, u64 end, |
---|
.. | .. |
---|
169 | 164 | } |
---|
170 | 165 | |
---|
171 | 166 | /** |
---|
| 167 | + * numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another |
---|
| 168 | + * @dst: numa_meminfo to append block to |
---|
| 169 | + * @idx: Index of memblk to remove |
---|
| 170 | + * @src: numa_meminfo to remove memblk from |
---|
| 171 | + */ |
---|
| 172 | +static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx, |
---|
| 173 | + struct numa_meminfo *src) |
---|
| 174 | +{ |
---|
| 175 | + dst->blk[dst->nr_blks++] = src->blk[idx]; |
---|
| 176 | + numa_remove_memblk_from(idx, src); |
---|
| 177 | +} |
---|
| 178 | + |
---|
| 179 | +/** |
---|
172 | 180 | * numa_add_memblk - Add one numa_memblk to numa_meminfo |
---|
173 | 181 | * @nid: NUMA node ID of the new memblk |
---|
174 | 182 | * @start: Start address of the new memblk |
---|
.. | .. |
---|
196 | 204 | * Allocate node data. Try node-local memory and then any node. |
---|
197 | 205 | * Never allocate in DMA zone. |
---|
198 | 206 | */ |
---|
199 | | - nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); |
---|
| 207 | + nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); |
---|
200 | 208 | if (!nd_pa) { |
---|
201 | | - nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES, |
---|
202 | | - MEMBLOCK_ALLOC_ACCESSIBLE); |
---|
203 | | - if (!nd_pa) { |
---|
204 | | - pr_err("Cannot find %zu bytes in any node (initial node: %d)\n", |
---|
205 | | - nd_size, nid); |
---|
206 | | - return; |
---|
207 | | - } |
---|
| 209 | + pr_err("Cannot find %zu bytes in any node (initial node: %d)\n", |
---|
| 210 | + nd_size, nid); |
---|
| 211 | + return; |
---|
208 | 212 | } |
---|
209 | 213 | nd = __va(nd_pa); |
---|
210 | 214 | |
---|
.. | .. |
---|
241 | 245 | for (i = 0; i < mi->nr_blks; i++) { |
---|
242 | 246 | struct numa_memblk *bi = &mi->blk[i]; |
---|
243 | 247 | |
---|
244 | | - /* make sure all blocks are inside the limits */ |
---|
245 | | - bi->start = max(bi->start, low); |
---|
246 | | - bi->end = min(bi->end, high); |
---|
| 248 | + /* move / save reserved memory ranges */ |
---|
| 249 | + if (!memblock_overlaps_region(&memblock.memory, |
---|
| 250 | + bi->start, bi->end - bi->start)) { |
---|
| 251 | + numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi); |
---|
| 252 | + continue; |
---|
| 253 | + } |
---|
247 | 254 | |
---|
248 | | - /* and there's no empty or non-exist block */ |
---|
249 | | - if (bi->start >= bi->end || |
---|
250 | | - !memblock_overlaps_region(&memblock.memory, |
---|
251 | | - bi->start, bi->end - bi->start)) |
---|
| 255 | + /* make sure all non-reserved blocks are inside the limits */ |
---|
| 256 | + bi->start = max(bi->start, low); |
---|
| 257 | + |
---|
| 258 | + /* preserve info for non-RAM areas above 'max_pfn': */ |
---|
| 259 | + if (bi->end > high) { |
---|
| 260 | + numa_add_memblk_to(bi->nid, high, bi->end, |
---|
| 261 | + &numa_reserved_meminfo); |
---|
| 262 | + bi->end = high; |
---|
| 263 | + } |
---|
| 264 | + |
---|
| 265 | + /* and there's no empty block */ |
---|
| 266 | + if (bi->start >= bi->end) |
---|
252 | 267 | numa_remove_memblk_from(i--, mi); |
---|
253 | 268 | } |
---|
254 | 269 | |
---|
.. | .. |
---|
505 | 520 | * memory ranges, because quirks such as trim_snb_memory() |
---|
506 | 521 | * reserve specific pages for Sandy Bridge graphics. ] |
---|
507 | 522 | */ |
---|
508 | | - for_each_memblock(reserved, mb_region) { |
---|
509 | | - if (mb_region->nid != MAX_NUMNODES) |
---|
510 | | - node_set(mb_region->nid, reserved_nodemask); |
---|
| 523 | + for_each_reserved_mem_region(mb_region) { |
---|
| 524 | + int nid = memblock_get_region_node(mb_region); |
---|
| 525 | + |
---|
| 526 | + if (nid != MAX_NUMNODES) |
---|
| 527 | + node_set(nid, reserved_nodemask); |
---|
511 | 528 | } |
---|
512 | 529 | |
---|
513 | 530 | /* |
---|
.. | .. |
---|
530 | 547 | |
---|
531 | 548 | static int __init numa_register_memblks(struct numa_meminfo *mi) |
---|
532 | 549 | { |
---|
533 | | - unsigned long uninitialized_var(pfn_align); |
---|
534 | 550 | int i, nid; |
---|
535 | 551 | |
---|
536 | 552 | /* Account for nodes with cpus and no memory */ |
---|
.. | .. |
---|
558 | 574 | * If sections array is gonna be used for pfn -> nid mapping, check |
---|
559 | 575 | * whether its granularity is fine enough. |
---|
560 | 576 | */ |
---|
561 | | -#ifdef NODE_NOT_IN_PAGE_FLAGS |
---|
562 | | - pfn_align = node_map_pfn_alignment(); |
---|
563 | | - if (pfn_align && pfn_align < PAGES_PER_SECTION) { |
---|
564 | | - printk(KERN_WARNING "Node alignment %LuMB < min %LuMB, rejecting NUMA config\n", |
---|
565 | | - PFN_PHYS(pfn_align) >> 20, |
---|
566 | | - PFN_PHYS(PAGES_PER_SECTION) >> 20); |
---|
567 | | - return -EINVAL; |
---|
| 577 | + if (IS_ENABLED(NODE_NOT_IN_PAGE_FLAGS)) { |
---|
| 578 | + unsigned long pfn_align = node_map_pfn_alignment(); |
---|
| 579 | + |
---|
| 580 | + if (pfn_align && pfn_align < PAGES_PER_SECTION) { |
---|
| 581 | + pr_warn("Node alignment %LuMB < min %LuMB, rejecting NUMA config\n", |
---|
| 582 | + PFN_PHYS(pfn_align) >> 20, |
---|
| 583 | + PFN_PHYS(PAGES_PER_SECTION) >> 20); |
---|
| 584 | + return -EINVAL; |
---|
| 585 | + } |
---|
568 | 586 | } |
---|
569 | | -#endif |
---|
570 | 587 | if (!numa_meminfo_cover_memory(mi)) |
---|
571 | 588 | return -EINVAL; |
---|
572 | 589 | |
---|
.. | .. |
---|
703 | 720 | * x86_numa_init - Initialize NUMA |
---|
704 | 721 | * |
---|
705 | 722 | * Try each configured NUMA initialization method until one succeeds. The |
---|
706 | | - * last fallback is dummy single node config encomapssing whole memory and |
---|
| 723 | + * last fallback is dummy single node config encompassing whole memory and |
---|
707 | 724 | * never fails. |
---|
708 | 725 | */ |
---|
709 | 726 | void __init x86_numa_init(void) |
---|
.. | .. |
---|
724 | 741 | |
---|
725 | 742 | static void __init init_memory_less_node(int nid) |
---|
726 | 743 | { |
---|
727 | | - unsigned long zones_size[MAX_NR_ZONES] = {0}; |
---|
728 | | - unsigned long zholes_size[MAX_NR_ZONES] = {0}; |
---|
729 | | - |
---|
730 | 744 | /* Allocate and initialize node data. Memory-less node is now online.*/ |
---|
731 | 745 | alloc_node_data(nid); |
---|
732 | | - free_area_init_node(nid, zones_size, 0, zholes_size); |
---|
| 746 | + free_area_init_memoryless_node(nid); |
---|
733 | 747 | |
---|
734 | 748 | /* |
---|
735 | 749 | * All zonelists will be built later in start_kernel() after per cpu |
---|
736 | 750 | * areas are initialized. |
---|
737 | 751 | */ |
---|
| 752 | +} |
---|
| 753 | + |
---|
| 754 | +/* |
---|
| 755 | + * A node may exist which has one or more Generic Initiators but no CPUs and no |
---|
| 756 | + * memory. |
---|
| 757 | + * |
---|
| 758 | + * This function must be called after init_cpu_to_node(), to ensure that any |
---|
| 759 | + * memoryless CPU nodes have already been brought online, and before the |
---|
| 760 | + * node_data[nid] is needed for zone list setup in build_all_zonelists(). |
---|
| 761 | + * |
---|
| 762 | + * When this function is called, any nodes containing either memory and/or CPUs |
---|
| 763 | + * will already be online and there is no need to do anything extra, even if |
---|
| 764 | + * they also contain one or more Generic Initiators. |
---|
| 765 | + */ |
---|
| 766 | +void __init init_gi_nodes(void) |
---|
| 767 | +{ |
---|
| 768 | + int nid; |
---|
| 769 | + |
---|
| 770 | + for_each_node_state(nid, N_GENERIC_INITIATOR) |
---|
| 771 | + if (!node_online(nid)) |
---|
| 772 | + init_memory_less_node(nid); |
---|
738 | 773 | } |
---|
739 | 774 | |
---|
740 | 775 | /* |
---|
.. | .. |
---|
826 | 861 | return; |
---|
827 | 862 | } |
---|
828 | 863 | mask = node_to_cpumask_map[node]; |
---|
829 | | - if (!mask) { |
---|
| 864 | + if (!cpumask_available(mask)) { |
---|
830 | 865 | pr_err("node_to_cpumask_map[%i] NULL\n", node); |
---|
831 | 866 | dump_stack(); |
---|
832 | 867 | return; |
---|
.. | .. |
---|
865 | 900 | */ |
---|
866 | 901 | const struct cpumask *cpumask_of_node(int node) |
---|
867 | 902 | { |
---|
868 | | - if (node >= nr_node_ids) { |
---|
| 903 | + if ((unsigned)node >= nr_node_ids) { |
---|
869 | 904 | printk(KERN_WARNING |
---|
870 | | - "cpumask_of_node(%d): node > nr_node_ids(%d)\n", |
---|
| 905 | + "cpumask_of_node(%d): (unsigned)node >= nr_node_ids(%u)\n", |
---|
871 | 906 | node, nr_node_ids); |
---|
872 | 907 | dump_stack(); |
---|
873 | 908 | return cpu_none_mask; |
---|
874 | 909 | } |
---|
875 | | - if (node_to_cpumask_map[node] == NULL) { |
---|
| 910 | + if (!cpumask_available(node_to_cpumask_map[node])) { |
---|
876 | 911 | printk(KERN_WARNING |
---|
877 | 912 | "cpumask_of_node(%d): no node_to_cpumask_map!\n", |
---|
878 | 913 | node); |
---|
.. | .. |
---|
885 | 920 | |
---|
886 | 921 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ |
---|
887 | 922 | |
---|
888 | | -#ifdef CONFIG_MEMORY_HOTPLUG |
---|
889 | | -int memory_add_physaddr_to_nid(u64 start) |
---|
| 923 | +#ifdef CONFIG_NUMA_KEEP_MEMINFO |
---|
| 924 | +static int meminfo_to_nid(struct numa_meminfo *mi, u64 start) |
---|
890 | 925 | { |
---|
891 | | - struct numa_meminfo *mi = &numa_meminfo; |
---|
892 | | - int nid = mi->blk[0].nid; |
---|
893 | 926 | int i; |
---|
894 | 927 | |
---|
895 | 928 | for (i = 0; i < mi->nr_blks; i++) |
---|
896 | 929 | if (mi->blk[i].start <= start && mi->blk[i].end > start) |
---|
897 | | - nid = mi->blk[i].nid; |
---|
| 930 | + return mi->blk[i].nid; |
---|
| 931 | + return NUMA_NO_NODE; |
---|
| 932 | +} |
---|
| 933 | + |
---|
| 934 | +int phys_to_target_node(phys_addr_t start) |
---|
| 935 | +{ |
---|
| 936 | + int nid = meminfo_to_nid(&numa_meminfo, start); |
---|
| 937 | + |
---|
| 938 | + /* |
---|
| 939 | + * Prefer online nodes, but if reserved memory might be |
---|
| 940 | + * hot-added continue the search with reserved ranges. |
---|
| 941 | + */ |
---|
| 942 | + if (nid != NUMA_NO_NODE) |
---|
| 943 | + return nid; |
---|
| 944 | + |
---|
| 945 | + return meminfo_to_nid(&numa_reserved_meminfo, start); |
---|
| 946 | +} |
---|
| 947 | +EXPORT_SYMBOL_GPL(phys_to_target_node); |
---|
| 948 | + |
---|
| 949 | +int memory_add_physaddr_to_nid(u64 start) |
---|
| 950 | +{ |
---|
| 951 | + int nid = meminfo_to_nid(&numa_meminfo, start); |
---|
| 952 | + |
---|
| 953 | + if (nid == NUMA_NO_NODE) |
---|
| 954 | + nid = numa_meminfo.blk[0].nid; |
---|
898 | 955 | return nid; |
---|
899 | 956 | } |
---|
900 | 957 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); |
---|