.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * pSeries NUMA support |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM |
---|
5 | | - * |
---|
6 | | - * This program is free software; you can redistribute it and/or |
---|
7 | | - * modify it under the terms of the GNU General Public License |
---|
8 | | - * as published by the Free Software Foundation; either version |
---|
9 | | - * 2 of the License, or (at your option) any later version. |
---|
10 | 6 | */ |
---|
11 | 7 | #define pr_fmt(fmt) "numa: " fmt |
---|
12 | 8 | |
---|
13 | 9 | #include <linux/threads.h> |
---|
14 | | -#include <linux/bootmem.h> |
---|
| 10 | +#include <linux/memblock.h> |
---|
15 | 11 | #include <linux/init.h> |
---|
16 | 12 | #include <linux/mm.h> |
---|
17 | 13 | #include <linux/mmzone.h> |
---|
.. | .. |
---|
19 | 15 | #include <linux/nodemask.h> |
---|
20 | 16 | #include <linux/cpu.h> |
---|
21 | 17 | #include <linux/notifier.h> |
---|
22 | | -#include <linux/memblock.h> |
---|
23 | 18 | #include <linux/of.h> |
---|
24 | 19 | #include <linux/pfn.h> |
---|
25 | 20 | #include <linux/cpuset.h> |
---|
.. | .. |
---|
33 | 28 | #include <asm/sparsemem.h> |
---|
34 | 29 | #include <asm/prom.h> |
---|
35 | 30 | #include <asm/smp.h> |
---|
36 | | -#include <asm/cputhreads.h> |
---|
37 | 31 | #include <asm/topology.h> |
---|
38 | 32 | #include <asm/firmware.h> |
---|
39 | 33 | #include <asm/paca.h> |
---|
.. | .. |
---|
85 | 79 | alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); |
---|
86 | 80 | |
---|
87 | 81 | /* cpumask_of_node() will now work */ |
---|
88 | | - dbg("Node to cpumask map for %d nodes\n", nr_node_ids); |
---|
| 82 | + dbg("Node to cpumask map for %u nodes\n", nr_node_ids); |
---|
89 | 83 | } |
---|
90 | 84 | |
---|
91 | 85 | static int __init fake_numa_create_new_node(unsigned long end_pfn, |
---|
.. | .. |
---|
169 | 163 | } |
---|
170 | 164 | #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */ |
---|
171 | 165 | |
---|
| 166 | +int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) |
---|
| 167 | +{ |
---|
| 168 | + int dist = 0; |
---|
| 169 | + |
---|
| 170 | + int i, index; |
---|
| 171 | + |
---|
| 172 | + for (i = 0; i < distance_ref_points_depth; i++) { |
---|
| 173 | + index = be32_to_cpu(distance_ref_points[i]); |
---|
| 174 | + if (cpu1_assoc[index] == cpu2_assoc[index]) |
---|
| 175 | + break; |
---|
| 176 | + dist++; |
---|
| 177 | + } |
---|
| 178 | + |
---|
| 179 | + return dist; |
---|
| 180 | +} |
---|
| 181 | + |
---|
172 | 182 | /* must hold reference to node during call */ |
---|
173 | 183 | static const __be32 *of_get_associativity(struct device_node *dev) |
---|
174 | 184 | { |
---|
.. | .. |
---|
211 | 221 | } |
---|
212 | 222 | } |
---|
213 | 223 | |
---|
214 | | -/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa |
---|
| 224 | +/* |
---|
| 225 | + * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA |
---|
215 | 226 | * info is found. |
---|
216 | 227 | */ |
---|
217 | 228 | static int associativity_to_nid(const __be32 *associativity) |
---|
218 | 229 | { |
---|
219 | | - int nid = -1; |
---|
| 230 | + int nid = NUMA_NO_NODE; |
---|
220 | 231 | |
---|
221 | | - if (min_common_depth == -1) |
---|
| 232 | + if (!numa_enabled) |
---|
222 | 233 | goto out; |
---|
223 | 234 | |
---|
224 | 235 | if (of_read_number(associativity, 1) >= min_common_depth) |
---|
225 | 236 | nid = of_read_number(&associativity[min_common_depth], 1); |
---|
226 | 237 | |
---|
227 | 238 | /* POWER4 LPAR uses 0xffff as invalid node */ |
---|
228 | | - if (nid == 0xffff || nid >= MAX_NUMNODES) |
---|
229 | | - nid = -1; |
---|
| 239 | + if (nid == 0xffff || nid >= nr_node_ids) |
---|
| 240 | + nid = NUMA_NO_NODE; |
---|
230 | 241 | |
---|
231 | 242 | if (nid > 0 && |
---|
232 | 243 | of_read_number(associativity, 1) >= distance_ref_points_depth) { |
---|
.. | .. |
---|
245 | 256 | */ |
---|
246 | 257 | static int of_node_to_nid_single(struct device_node *device) |
---|
247 | 258 | { |
---|
248 | | - int nid = -1; |
---|
| 259 | + int nid = NUMA_NO_NODE; |
---|
249 | 260 | const __be32 *tmp; |
---|
250 | 261 | |
---|
251 | 262 | tmp = of_get_associativity(device); |
---|
.. | .. |
---|
257 | 268 | /* Walk the device tree upwards, looking for an associativity id */ |
---|
258 | 269 | int of_node_to_nid(struct device_node *device) |
---|
259 | 270 | { |
---|
260 | | - int nid = -1; |
---|
| 271 | + int nid = NUMA_NO_NODE; |
---|
261 | 272 | |
---|
262 | 273 | of_node_get(device); |
---|
263 | 274 | while (device) { |
---|
.. | .. |
---|
419 | 430 | * This is like of_node_to_nid_single() for memory represented in the |
---|
420 | 431 | * ibm,dynamic-reconfiguration-memory node. |
---|
421 | 432 | */ |
---|
422 | | -static int of_drconf_to_nid_single(struct drmem_lmb *lmb) |
---|
| 433 | +int of_drconf_to_nid_single(struct drmem_lmb *lmb) |
---|
423 | 434 | { |
---|
424 | 435 | struct assoc_arrays aa = { .arrays = NULL }; |
---|
425 | | - int default_nid = 0; |
---|
| 436 | + int default_nid = NUMA_NO_NODE; |
---|
426 | 437 | int nid = default_nid; |
---|
427 | 438 | int rc, index; |
---|
| 439 | + |
---|
| 440 | + if ((min_common_depth < 0) || !numa_enabled) |
---|
| 441 | + return default_nid; |
---|
428 | 442 | |
---|
429 | 443 | rc = of_get_assoc_arrays(&aa); |
---|
430 | 444 | if (rc) |
---|
431 | 445 | return default_nid; |
---|
432 | 446 | |
---|
433 | | - if (min_common_depth > 0 && min_common_depth <= aa.array_sz && |
---|
434 | | - !(lmb->flags & DRCONF_MEM_AI_INVALID) && |
---|
435 | | - lmb->aa_index < aa.n_arrays) { |
---|
| 447 | + if (min_common_depth <= aa.array_sz && |
---|
| 448 | + !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) { |
---|
436 | 449 | index = lmb->aa_index * aa.array_sz + min_common_depth - 1; |
---|
437 | 450 | nid = of_read_number(&aa.arrays[index], 1); |
---|
438 | 451 | |
---|
439 | | - if (nid == 0xffff || nid >= MAX_NUMNODES) |
---|
| 452 | + if (nid == 0xffff || nid >= nr_node_ids) |
---|
440 | 453 | nid = default_nid; |
---|
441 | 454 | |
---|
442 | 455 | if (nid > 0) { |
---|
.. | .. |
---|
449 | 462 | return nid; |
---|
450 | 463 | } |
---|
451 | 464 | |
---|
| 465 | +#ifdef CONFIG_PPC_SPLPAR |
---|
| 466 | +static int vphn_get_nid(long lcpu) |
---|
| 467 | +{ |
---|
| 468 | + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; |
---|
| 469 | + long rc, hwid; |
---|
| 470 | + |
---|
| 471 | + /* |
---|
| 472 | + * On a shared lpar, device tree will not have node associativity. |
---|
| 473 | + * At this time lppaca, or its __old_status field may not be |
---|
| 474 | + * updated. Hence kernel cannot detect if its on a shared lpar. So |
---|
| 475 | + * request an explicit associativity irrespective of whether the |
---|
| 476 | + * lpar is shared or dedicated. Use the device tree property as a |
---|
| 477 | + * fallback. cpu_to_phys_id is only valid between |
---|
| 478 | + * smp_setup_cpu_maps() and smp_setup_pacas(). |
---|
| 479 | + */ |
---|
| 480 | + if (firmware_has_feature(FW_FEATURE_VPHN)) { |
---|
| 481 | + if (cpu_to_phys_id) |
---|
| 482 | + hwid = cpu_to_phys_id[lcpu]; |
---|
| 483 | + else |
---|
| 484 | + hwid = get_hard_smp_processor_id(lcpu); |
---|
| 485 | + |
---|
| 486 | + rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity); |
---|
| 487 | + if (rc == H_SUCCESS) |
---|
| 488 | + return associativity_to_nid(associativity); |
---|
| 489 | + } |
---|
| 490 | + |
---|
| 491 | + return NUMA_NO_NODE; |
---|
| 492 | +} |
---|
| 493 | +#else |
---|
| 494 | +static int vphn_get_nid(long unused) |
---|
| 495 | +{ |
---|
| 496 | + return NUMA_NO_NODE; |
---|
| 497 | +} |
---|
| 498 | +#endif /* CONFIG_PPC_SPLPAR */ |
---|
| 499 | + |
---|
452 | 500 | /* |
---|
453 | 501 | * Figure out to which domain a cpu belongs and stick it there. |
---|
454 | 502 | * Return the id of the domain used. |
---|
455 | 503 | */ |
---|
456 | 504 | static int numa_setup_cpu(unsigned long lcpu) |
---|
457 | 505 | { |
---|
458 | | - int nid = -1; |
---|
459 | 506 | struct device_node *cpu; |
---|
| 507 | + int fcpu = cpu_first_thread_sibling(lcpu); |
---|
| 508 | + int nid = NUMA_NO_NODE; |
---|
| 509 | + |
---|
| 510 | + if (!cpu_present(lcpu)) { |
---|
| 511 | + set_cpu_numa_node(lcpu, first_online_node); |
---|
| 512 | + return first_online_node; |
---|
| 513 | + } |
---|
460 | 514 | |
---|
461 | 515 | /* |
---|
462 | 516 | * If a valid cpu-to-node mapping is already available, use it |
---|
463 | 517 | * directly instead of querying the firmware, since it represents |
---|
464 | 518 | * the most recent mapping notified to us by the platform (eg: VPHN). |
---|
| 519 | + * Since cpu_to_node binding remains the same for all threads in the |
---|
| 520 | + * core. If a valid cpu-to-node mapping is already available, for |
---|
| 521 | + * the first thread in the core, use it. |
---|
465 | 522 | */ |
---|
466 | | - if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) { |
---|
| 523 | + nid = numa_cpu_lookup_table[fcpu]; |
---|
| 524 | + if (nid >= 0) { |
---|
467 | 525 | map_cpu_to_node(lcpu, nid); |
---|
468 | 526 | return nid; |
---|
469 | 527 | } |
---|
| 528 | + |
---|
| 529 | + nid = vphn_get_nid(lcpu); |
---|
| 530 | + if (nid != NUMA_NO_NODE) |
---|
| 531 | + goto out_present; |
---|
470 | 532 | |
---|
471 | 533 | cpu = of_get_cpu_node(lcpu, NULL); |
---|
472 | 534 | |
---|
.. | .. |
---|
479 | 541 | } |
---|
480 | 542 | |
---|
481 | 543 | nid = of_node_to_nid_single(cpu); |
---|
| 544 | + of_node_put(cpu); |
---|
482 | 545 | |
---|
483 | 546 | out_present: |
---|
484 | 547 | if (nid < 0 || !node_possible(nid)) |
---|
485 | 548 | nid = first_online_node; |
---|
486 | 549 | |
---|
| 550 | + /* |
---|
| 551 | + * Update for the first thread of the core. All threads of a core |
---|
| 552 | + * have to be part of the same node. This not only avoids querying |
---|
| 553 | + * for every other thread in the core, but always avoids a case |
---|
| 554 | + * where virtual node associativity change causes subsequent threads |
---|
| 555 | + * of a core to be associated with different nid. However if first |
---|
| 556 | + * thread is already online, expect it to have a valid mapping. |
---|
| 557 | + */ |
---|
| 558 | + if (fcpu != lcpu) { |
---|
| 559 | + WARN_ON(cpu_online(fcpu)); |
---|
| 560 | + map_cpu_to_node(fcpu, nid); |
---|
| 561 | + } |
---|
| 562 | + |
---|
487 | 563 | map_cpu_to_node(lcpu, nid); |
---|
488 | | - of_node_put(cpu); |
---|
489 | 564 | out: |
---|
490 | 565 | return nid; |
---|
491 | 566 | } |
---|
.. | .. |
---|
575 | 650 | * Extract NUMA information from the ibm,dynamic-reconfiguration-memory |
---|
576 | 651 | * node. This assumes n_mem_{addr,size}_cells have been set. |
---|
577 | 652 | */ |
---|
578 | | -static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, |
---|
579 | | - const __be32 **usm) |
---|
| 653 | +static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, |
---|
| 654 | + const __be32 **usm, |
---|
| 655 | + void *data) |
---|
580 | 656 | { |
---|
581 | 657 | unsigned int ranges, is_kexec_kdump = 0; |
---|
582 | 658 | unsigned long base, size, sz; |
---|
.. | .. |
---|
588 | 664 | */ |
---|
589 | 665 | if ((lmb->flags & DRCONF_MEM_RESERVED) |
---|
590 | 666 | || !(lmb->flags & DRCONF_MEM_ASSIGNED)) |
---|
591 | | - return; |
---|
| 667 | + return 0; |
---|
592 | 668 | |
---|
593 | 669 | if (*usm) |
---|
594 | 670 | is_kexec_kdump = 1; |
---|
.. | .. |
---|
600 | 676 | if (is_kexec_kdump) { |
---|
601 | 677 | ranges = read_usm_ranges(usm); |
---|
602 | 678 | if (!ranges) /* there are no (base, size) duple */ |
---|
603 | | - return; |
---|
| 679 | + return 0; |
---|
604 | 680 | } |
---|
605 | 681 | |
---|
606 | 682 | do { |
---|
.. | .. |
---|
617 | 693 | if (sz) |
---|
618 | 694 | memblock_set_node(base, sz, &memblock.memory, nid); |
---|
619 | 695 | } while (--ranges); |
---|
| 696 | + |
---|
| 697 | + return 0; |
---|
620 | 698 | } |
---|
621 | 699 | |
---|
622 | 700 | static int __init parse_numa_properties(void) |
---|
.. | .. |
---|
632 | 710 | |
---|
633 | 711 | min_common_depth = find_min_common_depth(); |
---|
634 | 712 | |
---|
635 | | - if (min_common_depth < 0) |
---|
| 713 | + if (min_common_depth < 0) { |
---|
| 714 | + /* |
---|
| 715 | + * if we fail to parse min_common_depth from device tree |
---|
| 716 | + * mark the numa disabled, boot with numa disabled. |
---|
| 717 | + */ |
---|
| 718 | + numa_enabled = false; |
---|
636 | 719 | return min_common_depth; |
---|
| 720 | + } |
---|
637 | 721 | |
---|
638 | 722 | dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); |
---|
639 | 723 | |
---|
.. | .. |
---|
644 | 728 | */ |
---|
645 | 729 | for_each_present_cpu(i) { |
---|
646 | 730 | struct device_node *cpu; |
---|
647 | | - int nid; |
---|
648 | | - |
---|
649 | | - cpu = of_get_cpu_node(i, NULL); |
---|
650 | | - BUG_ON(!cpu); |
---|
651 | | - nid = of_node_to_nid_single(cpu); |
---|
652 | | - of_node_put(cpu); |
---|
| 731 | + int nid = vphn_get_nid(i); |
---|
653 | 732 | |
---|
654 | 733 | /* |
---|
655 | 734 | * Don't fall back to default_nid yet -- we will plug |
---|
656 | 735 | * cpus into nodes once the memory scan has discovered |
---|
657 | 736 | * the topology. |
---|
658 | 737 | */ |
---|
659 | | - if (nid < 0) |
---|
660 | | - continue; |
---|
661 | | - node_set_online(nid); |
---|
| 738 | + if (nid == NUMA_NO_NODE) { |
---|
| 739 | + cpu = of_get_cpu_node(i, NULL); |
---|
| 740 | + BUG_ON(!cpu); |
---|
| 741 | + nid = of_node_to_nid_single(cpu); |
---|
| 742 | + of_node_put(cpu); |
---|
| 743 | + } |
---|
| 744 | + |
---|
| 745 | + /* node_set_online() is an UB if 'nid' is negative */ |
---|
| 746 | + if (likely(nid >= 0)) |
---|
| 747 | + node_set_online(nid); |
---|
662 | 748 | } |
---|
663 | 749 | |
---|
664 | 750 | get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); |
---|
.. | .. |
---|
712 | 798 | */ |
---|
713 | 799 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); |
---|
714 | 800 | if (memory) { |
---|
715 | | - walk_drmem_lmbs(memory, numa_setup_drmem_lmb); |
---|
| 801 | + walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb); |
---|
716 | 802 | of_node_put(memory); |
---|
717 | 803 | } |
---|
718 | 804 | |
---|
.. | .. |
---|
725 | 811 | unsigned long total_ram = memblock_phys_mem_size(); |
---|
726 | 812 | unsigned long start_pfn, end_pfn; |
---|
727 | 813 | unsigned int nid = 0; |
---|
728 | | - struct memblock_region *reg; |
---|
| 814 | + int i; |
---|
729 | 815 | |
---|
730 | 816 | printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", |
---|
731 | 817 | top_of_ram, total_ram); |
---|
732 | 818 | printk(KERN_DEBUG "Memory hole size: %ldMB\n", |
---|
733 | 819 | (top_of_ram - total_ram) >> 20); |
---|
734 | 820 | |
---|
735 | | - for_each_memblock(memory, reg) { |
---|
736 | | - start_pfn = memblock_region_memory_base_pfn(reg); |
---|
737 | | - end_pfn = memblock_region_memory_end_pfn(reg); |
---|
738 | | - |
---|
| 821 | + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { |
---|
739 | 822 | fake_numa_create_new_node(end_pfn, &nid); |
---|
740 | 823 | memblock_set_node(PFN_PHYS(start_pfn), |
---|
741 | 824 | PFN_PHYS(end_pfn - start_pfn), |
---|
.. | .. |
---|
749 | 832 | unsigned int node; |
---|
750 | 833 | unsigned int cpu, count; |
---|
751 | 834 | |
---|
752 | | - if (min_common_depth == -1 || !numa_enabled) |
---|
| 835 | + if (!numa_enabled) |
---|
753 | 836 | return; |
---|
754 | 837 | |
---|
755 | 838 | for_each_online_node(node) { |
---|
.. | .. |
---|
788 | 871 | void *nd; |
---|
789 | 872 | int tnid; |
---|
790 | 873 | |
---|
791 | | - nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); |
---|
| 874 | + nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); |
---|
| 875 | + if (!nd_pa) |
---|
| 876 | + panic("Cannot allocate %zu bytes for node %d data\n", |
---|
| 877 | + nd_size, nid); |
---|
| 878 | + |
---|
792 | 879 | nd = __va(nd_pa); |
---|
793 | 880 | |
---|
794 | 881 | /* report and initialize */ |
---|
.. | .. |
---|
808 | 895 | static void __init find_possible_nodes(void) |
---|
809 | 896 | { |
---|
810 | 897 | struct device_node *rtas; |
---|
811 | | - u32 numnodes, i; |
---|
| 898 | + const __be32 *domains = NULL; |
---|
| 899 | + int prop_length, max_nodes; |
---|
| 900 | + u32 i; |
---|
812 | 901 | |
---|
813 | | - if (min_common_depth <= 0) |
---|
| 902 | + if (!numa_enabled) |
---|
814 | 903 | return; |
---|
815 | 904 | |
---|
816 | 905 | rtas = of_find_node_by_path("/rtas"); |
---|
817 | 906 | if (!rtas) |
---|
818 | 907 | return; |
---|
819 | 908 | |
---|
820 | | - if (of_property_read_u32_index(rtas, |
---|
821 | | - "ibm,max-associativity-domains", |
---|
822 | | - min_common_depth, &numnodes)) |
---|
823 | | - goto out; |
---|
| 909 | + /* |
---|
| 910 | + * ibm,current-associativity-domains is a fairly recent property. If |
---|
| 911 | + * it doesn't exist, then fallback on ibm,max-associativity-domains. |
---|
| 912 | + * Current denotes what the platform can support compared to max |
---|
| 913 | + * which denotes what the Hypervisor can support. |
---|
| 914 | + * |
---|
| 915 | + * If the LPAR is migratable, new nodes might be activated after a LPM, |
---|
| 916 | + * so we should consider the max number in that case. |
---|
| 917 | + */ |
---|
| 918 | + if (!of_get_property(of_root, "ibm,migratable-partition", NULL)) |
---|
| 919 | + domains = of_get_property(rtas, |
---|
| 920 | + "ibm,current-associativity-domains", |
---|
| 921 | + &prop_length); |
---|
| 922 | + if (!domains) { |
---|
| 923 | + domains = of_get_property(rtas, "ibm,max-associativity-domains", |
---|
| 924 | + &prop_length); |
---|
| 925 | + if (!domains) |
---|
| 926 | + goto out; |
---|
| 927 | + } |
---|
824 | 928 | |
---|
825 | | - for (i = 0; i < numnodes; i++) { |
---|
| 929 | + max_nodes = of_read_number(&domains[min_common_depth], 1); |
---|
| 930 | + pr_info("Partition configured for %d NUMA nodes.\n", max_nodes); |
---|
| 931 | + |
---|
| 932 | + for (i = 0; i < max_nodes; i++) { |
---|
826 | 933 | if (!node_possible(i)) |
---|
827 | 934 | node_set(i, node_possible_map); |
---|
828 | 935 | } |
---|
| 936 | + |
---|
| 937 | + prop_length /= sizeof(int); |
---|
| 938 | + if (prop_length > min_common_depth + 2) |
---|
| 939 | + coregroup_enabled = 1; |
---|
829 | 940 | |
---|
830 | 941 | out: |
---|
831 | 942 | of_node_put(rtas); |
---|
.. | .. |
---|
834 | 945 | void __init mem_topology_setup(void) |
---|
835 | 946 | { |
---|
836 | 947 | int cpu; |
---|
| 948 | + |
---|
| 949 | + /* |
---|
| 950 | + * Linux/mm assumes node 0 to be online at boot. However this is not |
---|
| 951 | + * true on PowerPC, where node 0 is similar to any other node, it |
---|
| 952 | + * could be cpuless, memoryless node. So force node 0 to be offline |
---|
| 953 | + * for now. This will prevent cpuless, memoryless node 0 showing up |
---|
| 954 | + * unnecessarily as online. If a node has cpus or memory that need |
---|
| 955 | + * to be online, then node will anyway be marked online. |
---|
| 956 | + */ |
---|
| 957 | + node_set_offline(0); |
---|
837 | 958 | |
---|
838 | 959 | if (parse_numa_properties()) |
---|
839 | 960 | setup_nonnuma(); |
---|
.. | .. |
---|
852 | 973 | |
---|
853 | 974 | reset_numa_cpu_lookup_table(); |
---|
854 | 975 | |
---|
855 | | - for_each_present_cpu(cpu) |
---|
| 976 | + for_each_possible_cpu(cpu) { |
---|
| 977 | + /* |
---|
| 978 | + * Powerpc with CONFIG_NUMA always used to have a node 0, |
---|
| 979 | + * even if it was memoryless or cpuless. For all cpus that |
---|
| 980 | + * are possible but not present, cpu_to_node() would point |
---|
| 981 | + * to node 0. To remove a cpuless, memoryless dummy node, |
---|
| 982 | + * powerpc need to make sure all possible but not present |
---|
| 983 | + * cpu_to_node are set to a proper node. |
---|
| 984 | + */ |
---|
856 | 985 | numa_setup_cpu(cpu); |
---|
| 986 | + } |
---|
857 | 987 | } |
---|
858 | 988 | |
---|
859 | 989 | void __init initmem_init(void) |
---|
.. | .. |
---|
870 | 1000 | |
---|
871 | 1001 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); |
---|
872 | 1002 | setup_node_data(nid, start_pfn, end_pfn); |
---|
873 | | - sparse_memory_present_with_active_regions(nid); |
---|
874 | 1003 | } |
---|
875 | 1004 | |
---|
876 | 1005 | sparse_init(); |
---|
.. | .. |
---|
905 | 1034 | } |
---|
906 | 1035 | early_param("numa", early_numa); |
---|
907 | 1036 | |
---|
908 | | -static bool topology_updates_enabled = true; |
---|
909 | | - |
---|
910 | | -static int __init early_topology_updates(char *p) |
---|
911 | | -{ |
---|
912 | | - if (!p) |
---|
913 | | - return 0; |
---|
914 | | - |
---|
915 | | - if (!strcmp(p, "off")) { |
---|
916 | | - pr_info("Disabling topology updates\n"); |
---|
917 | | - topology_updates_enabled = false; |
---|
918 | | - } |
---|
919 | | - |
---|
920 | | - return 0; |
---|
921 | | -} |
---|
922 | | -early_param("topology_updates", early_topology_updates); |
---|
923 | | - |
---|
924 | 1037 | #ifdef CONFIG_MEMORY_HOTPLUG |
---|
925 | 1038 | /* |
---|
926 | 1039 | * Find the node associated with a hot added memory section for |
---|
.. | .. |
---|
931 | 1044 | { |
---|
932 | 1045 | struct drmem_lmb *lmb; |
---|
933 | 1046 | unsigned long lmb_size; |
---|
934 | | - int nid = -1; |
---|
| 1047 | + int nid = NUMA_NO_NODE; |
---|
935 | 1048 | |
---|
936 | 1049 | lmb_size = drmem_lmb_size(); |
---|
937 | 1050 | |
---|
.. | .. |
---|
961 | 1074 | static int hot_add_node_scn_to_nid(unsigned long scn_addr) |
---|
962 | 1075 | { |
---|
963 | 1076 | struct device_node *memory; |
---|
964 | | - int nid = -1; |
---|
| 1077 | + int nid = NUMA_NO_NODE; |
---|
965 | 1078 | |
---|
966 | 1079 | for_each_node_by_type(memory, "memory") { |
---|
967 | 1080 | unsigned long start, size; |
---|
.. | .. |
---|
1006 | 1119 | struct device_node *memory = NULL; |
---|
1007 | 1120 | int nid; |
---|
1008 | 1121 | |
---|
1009 | | - if (!numa_enabled || (min_common_depth < 0)) |
---|
| 1122 | + if (!numa_enabled) |
---|
1010 | 1123 | return first_online_node; |
---|
1011 | 1124 | |
---|
1012 | 1125 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); |
---|
.. | .. |
---|
1059 | 1172 | |
---|
1060 | 1173 | /* Virtual Processor Home Node (VPHN) support */ |
---|
1061 | 1174 | #ifdef CONFIG_PPC_SPLPAR |
---|
1062 | | - |
---|
1063 | | -#include "vphn.h" |
---|
1064 | | - |
---|
1065 | | -struct topology_update_data { |
---|
1066 | | - struct topology_update_data *next; |
---|
1067 | | - unsigned int cpu; |
---|
1068 | | - int old_nid; |
---|
1069 | | - int new_nid; |
---|
1070 | | -}; |
---|
1071 | | - |
---|
1072 | | -#define TOPOLOGY_DEF_TIMER_SECS 60 |
---|
1073 | | - |
---|
1074 | | -static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; |
---|
1075 | | -static cpumask_t cpu_associativity_changes_mask; |
---|
1076 | | -static int vphn_enabled; |
---|
1077 | | -static int prrn_enabled; |
---|
1078 | | -static void reset_topology_timer(void); |
---|
1079 | | -static int topology_timer_secs = 1; |
---|
1080 | 1175 | static int topology_inited; |
---|
1081 | | - |
---|
1082 | | -/* |
---|
1083 | | - * Change polling interval for associativity changes. |
---|
1084 | | - */ |
---|
1085 | | -int timed_topology_update(int nsecs) |
---|
1086 | | -{ |
---|
1087 | | - if (vphn_enabled) { |
---|
1088 | | - if (nsecs > 0) |
---|
1089 | | - topology_timer_secs = nsecs; |
---|
1090 | | - else |
---|
1091 | | - topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS; |
---|
1092 | | - |
---|
1093 | | - reset_topology_timer(); |
---|
1094 | | - } |
---|
1095 | | - |
---|
1096 | | - return 0; |
---|
1097 | | -} |
---|
1098 | | - |
---|
1099 | | -/* |
---|
1100 | | - * Store the current values of the associativity change counters in the |
---|
1101 | | - * hypervisor. |
---|
1102 | | - */ |
---|
1103 | | -static void setup_cpu_associativity_change_counters(void) |
---|
1104 | | -{ |
---|
1105 | | - int cpu; |
---|
1106 | | - |
---|
1107 | | - /* The VPHN feature supports a maximum of 8 reference points */ |
---|
1108 | | - BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8); |
---|
1109 | | - |
---|
1110 | | - for_each_possible_cpu(cpu) { |
---|
1111 | | - int i; |
---|
1112 | | - u8 *counts = vphn_cpu_change_counts[cpu]; |
---|
1113 | | - volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; |
---|
1114 | | - |
---|
1115 | | - for (i = 0; i < distance_ref_points_depth; i++) |
---|
1116 | | - counts[i] = hypervisor_counts[i]; |
---|
1117 | | - } |
---|
1118 | | -} |
---|
1119 | | - |
---|
1120 | | -/* |
---|
1121 | | - * The hypervisor maintains a set of 8 associativity change counters in |
---|
1122 | | - * the VPA of each cpu that correspond to the associativity levels in the |
---|
1123 | | - * ibm,associativity-reference-points property. When an associativity |
---|
1124 | | - * level changes, the corresponding counter is incremented. |
---|
1125 | | - * |
---|
1126 | | - * Set a bit in cpu_associativity_changes_mask for each cpu whose home |
---|
1127 | | - * node associativity levels have changed. |
---|
1128 | | - * |
---|
1129 | | - * Returns the number of cpus with unhandled associativity changes. |
---|
1130 | | - */ |
---|
1131 | | -static int update_cpu_associativity_changes_mask(void) |
---|
1132 | | -{ |
---|
1133 | | - int cpu; |
---|
1134 | | - cpumask_t *changes = &cpu_associativity_changes_mask; |
---|
1135 | | - |
---|
1136 | | - for_each_possible_cpu(cpu) { |
---|
1137 | | - int i, changed = 0; |
---|
1138 | | - u8 *counts = vphn_cpu_change_counts[cpu]; |
---|
1139 | | - volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; |
---|
1140 | | - |
---|
1141 | | - for (i = 0; i < distance_ref_points_depth; i++) { |
---|
1142 | | - if (hypervisor_counts[i] != counts[i]) { |
---|
1143 | | - counts[i] = hypervisor_counts[i]; |
---|
1144 | | - changed = 1; |
---|
1145 | | - } |
---|
1146 | | - } |
---|
1147 | | - if (changed) { |
---|
1148 | | - cpumask_or(changes, changes, cpu_sibling_mask(cpu)); |
---|
1149 | | - cpu = cpu_last_thread_sibling(cpu); |
---|
1150 | | - } |
---|
1151 | | - } |
---|
1152 | | - |
---|
1153 | | - return cpumask_weight(changes); |
---|
1154 | | -} |
---|
1155 | 1176 | |
---|
1156 | 1177 | /* |
---|
1157 | 1178 | * Retrieve the new associativity information for a virtual processor's |
---|
1158 | 1179 | * home node. |
---|
1159 | 1180 | */ |
---|
1160 | | -static long hcall_vphn(unsigned long cpu, __be32 *associativity) |
---|
1161 | | -{ |
---|
1162 | | - long rc; |
---|
1163 | | - long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; |
---|
1164 | | - u64 flags = 1; |
---|
1165 | | - int hwcpu = get_hard_smp_processor_id(cpu); |
---|
1166 | | - |
---|
1167 | | - rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); |
---|
1168 | | - vphn_unpack_associativity(retbuf, associativity); |
---|
1169 | | - |
---|
1170 | | - return rc; |
---|
1171 | | -} |
---|
1172 | | - |
---|
1173 | 1181 | static long vphn_get_associativity(unsigned long cpu, |
---|
1174 | 1182 | __be32 *associativity) |
---|
1175 | 1183 | { |
---|
1176 | 1184 | long rc; |
---|
1177 | 1185 | |
---|
1178 | | - rc = hcall_vphn(cpu, associativity); |
---|
| 1186 | + rc = hcall_vphn(get_hard_smp_processor_id(cpu), |
---|
| 1187 | + VPHN_FLAG_VCPU, associativity); |
---|
1179 | 1188 | |
---|
1180 | 1189 | switch (rc) { |
---|
1181 | | - case H_FUNCTION: |
---|
1182 | | - printk_once(KERN_INFO |
---|
1183 | | - "VPHN is not supported. Disabling polling...\n"); |
---|
1184 | | - stop_topology_update(); |
---|
1185 | | - break; |
---|
1186 | | - case H_HARDWARE: |
---|
1187 | | - printk(KERN_ERR |
---|
1188 | | - "hcall_vphn() experienced a hardware fault " |
---|
1189 | | - "preventing VPHN. Disabling polling...\n"); |
---|
1190 | | - stop_topology_update(); |
---|
1191 | | - break; |
---|
1192 | 1190 | case H_SUCCESS: |
---|
1193 | 1191 | dbg("VPHN hcall succeeded. Reset polling...\n"); |
---|
1194 | | - timed_topology_update(0); |
---|
| 1192 | + goto out; |
---|
| 1193 | + |
---|
| 1194 | + case H_FUNCTION: |
---|
| 1195 | + pr_err_ratelimited("VPHN unsupported. Disabling polling...\n"); |
---|
| 1196 | + break; |
---|
| 1197 | + case H_HARDWARE: |
---|
| 1198 | + pr_err_ratelimited("hcall_vphn() experienced a hardware fault " |
---|
| 1199 | + "preventing VPHN. Disabling polling...\n"); |
---|
| 1200 | + break; |
---|
| 1201 | + case H_PARAMETER: |
---|
| 1202 | + pr_err_ratelimited("hcall_vphn() was passed an invalid parameter. " |
---|
| 1203 | + "Disabling polling...\n"); |
---|
| 1204 | + break; |
---|
| 1205 | + default: |
---|
| 1206 | + pr_err_ratelimited("hcall_vphn() returned %ld. Disabling polling...\n" |
---|
| 1207 | + , rc); |
---|
1195 | 1208 | break; |
---|
1196 | 1209 | } |
---|
1197 | | - |
---|
| 1210 | +out: |
---|
1198 | 1211 | return rc; |
---|
1199 | 1212 | } |
---|
1200 | 1213 | |
---|
.. | .. |
---|
1237 | 1250 | return new_nid; |
---|
1238 | 1251 | } |
---|
1239 | 1252 | |
---|
1240 | | -/* |
---|
1241 | | - * Update the CPU maps and sysfs entries for a single CPU when its NUMA |
---|
1242 | | - * characteristics change. This function doesn't perform any locking and is |
---|
1243 | | - * only safe to call from stop_machine(). |
---|
1244 | | - */ |
---|
1245 | | -static int update_cpu_topology(void *data) |
---|
| 1253 | +int cpu_to_coregroup_id(int cpu) |
---|
1246 | 1254 | { |
---|
1247 | | - struct topology_update_data *update; |
---|
1248 | | - unsigned long cpu; |
---|
| 1255 | + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; |
---|
| 1256 | + int index; |
---|
1249 | 1257 | |
---|
1250 | | - if (!data) |
---|
1251 | | - return -EINVAL; |
---|
| 1258 | + if (cpu < 0 || cpu > nr_cpu_ids) |
---|
| 1259 | + return -1; |
---|
1252 | 1260 | |
---|
1253 | | - cpu = smp_processor_id(); |
---|
1254 | | - |
---|
1255 | | - for (update = data; update; update = update->next) { |
---|
1256 | | - int new_nid = update->new_nid; |
---|
1257 | | - if (cpu != update->cpu) |
---|
1258 | | - continue; |
---|
1259 | | - |
---|
1260 | | - unmap_cpu_from_node(cpu); |
---|
1261 | | - map_cpu_to_node(cpu, new_nid); |
---|
1262 | | - set_cpu_numa_node(cpu, new_nid); |
---|
1263 | | - set_cpu_numa_mem(cpu, local_memory_node(new_nid)); |
---|
1264 | | - vdso_getcpu_init(); |
---|
1265 | | - } |
---|
1266 | | - |
---|
1267 | | - return 0; |
---|
1268 | | -} |
---|
1269 | | - |
---|
1270 | | -static int update_lookup_table(void *data) |
---|
1271 | | -{ |
---|
1272 | | - struct topology_update_data *update; |
---|
1273 | | - |
---|
1274 | | - if (!data) |
---|
1275 | | - return -EINVAL; |
---|
1276 | | - |
---|
1277 | | - /* |
---|
1278 | | - * Upon topology update, the numa-cpu lookup table needs to be updated |
---|
1279 | | - * for all threads in the core, including offline CPUs, to ensure that |
---|
1280 | | - * future hotplug operations respect the cpu-to-node associativity |
---|
1281 | | - * properly. |
---|
1282 | | - */ |
---|
1283 | | - for (update = data; update; update = update->next) { |
---|
1284 | | - int nid, base, j; |
---|
1285 | | - |
---|
1286 | | - nid = update->new_nid; |
---|
1287 | | - base = cpu_first_thread_sibling(update->cpu); |
---|
1288 | | - |
---|
1289 | | - for (j = 0; j < threads_per_core; j++) { |
---|
1290 | | - update_numa_cpu_lookup_table(base + j, nid); |
---|
1291 | | - } |
---|
1292 | | - } |
---|
1293 | | - |
---|
1294 | | - return 0; |
---|
1295 | | -} |
---|
1296 | | - |
---|
1297 | | -/* |
---|
1298 | | - * Update the node maps and sysfs entries for each cpu whose home node |
---|
1299 | | - * has changed. Returns 1 when the topology has changed, and 0 otherwise. |
---|
1300 | | - * |
---|
1301 | | - * cpus_locked says whether we already hold cpu_hotplug_lock. |
---|
1302 | | - */ |
---|
1303 | | -int numa_update_cpu_topology(bool cpus_locked) |
---|
1304 | | -{ |
---|
1305 | | - unsigned int cpu, sibling, changed = 0; |
---|
1306 | | - struct topology_update_data *updates, *ud; |
---|
1307 | | - cpumask_t updated_cpus; |
---|
1308 | | - struct device *dev; |
---|
1309 | | - int weight, new_nid, i = 0; |
---|
1310 | | - |
---|
1311 | | - if (!prrn_enabled && !vphn_enabled && topology_inited) |
---|
1312 | | - return 0; |
---|
1313 | | - |
---|
1314 | | - weight = cpumask_weight(&cpu_associativity_changes_mask); |
---|
1315 | | - if (!weight) |
---|
1316 | | - return 0; |
---|
1317 | | - |
---|
1318 | | - updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL); |
---|
1319 | | - if (!updates) |
---|
1320 | | - return 0; |
---|
1321 | | - |
---|
1322 | | - cpumask_clear(&updated_cpus); |
---|
1323 | | - |
---|
1324 | | - for_each_cpu(cpu, &cpu_associativity_changes_mask) { |
---|
1325 | | - /* |
---|
1326 | | - * If siblings aren't flagged for changes, updates list |
---|
1327 | | - * will be too short. Skip on this update and set for next |
---|
1328 | | - * update. |
---|
1329 | | - */ |
---|
1330 | | - if (!cpumask_subset(cpu_sibling_mask(cpu), |
---|
1331 | | - &cpu_associativity_changes_mask)) { |
---|
1332 | | - pr_info("Sibling bits not set for associativity " |
---|
1333 | | - "change, cpu%d\n", cpu); |
---|
1334 | | - cpumask_or(&cpu_associativity_changes_mask, |
---|
1335 | | - &cpu_associativity_changes_mask, |
---|
1336 | | - cpu_sibling_mask(cpu)); |
---|
1337 | | - cpu = cpu_last_thread_sibling(cpu); |
---|
1338 | | - continue; |
---|
1339 | | - } |
---|
1340 | | - |
---|
1341 | | - new_nid = find_and_online_cpu_nid(cpu); |
---|
1342 | | - |
---|
1343 | | - if (new_nid == numa_cpu_lookup_table[cpu]) { |
---|
1344 | | - cpumask_andnot(&cpu_associativity_changes_mask, |
---|
1345 | | - &cpu_associativity_changes_mask, |
---|
1346 | | - cpu_sibling_mask(cpu)); |
---|
1347 | | - dbg("Assoc chg gives same node %d for cpu%d\n", |
---|
1348 | | - new_nid, cpu); |
---|
1349 | | - cpu = cpu_last_thread_sibling(cpu); |
---|
1350 | | - continue; |
---|
1351 | | - } |
---|
1352 | | - |
---|
1353 | | - for_each_cpu(sibling, cpu_sibling_mask(cpu)) { |
---|
1354 | | - ud = &updates[i++]; |
---|
1355 | | - ud->next = &updates[i]; |
---|
1356 | | - ud->cpu = sibling; |
---|
1357 | | - ud->new_nid = new_nid; |
---|
1358 | | - ud->old_nid = numa_cpu_lookup_table[sibling]; |
---|
1359 | | - cpumask_set_cpu(sibling, &updated_cpus); |
---|
1360 | | - } |
---|
1361 | | - cpu = cpu_last_thread_sibling(cpu); |
---|
1362 | | - } |
---|
1363 | | - |
---|
1364 | | - /* |
---|
1365 | | - * Prevent processing of 'updates' from overflowing array |
---|
1366 | | - * where last entry filled in a 'next' pointer. |
---|
1367 | | - */ |
---|
1368 | | - if (i) |
---|
1369 | | - updates[i-1].next = NULL; |
---|
1370 | | - |
---|
1371 | | - pr_debug("Topology update for the following CPUs:\n"); |
---|
1372 | | - if (cpumask_weight(&updated_cpus)) { |
---|
1373 | | - for (ud = &updates[0]; ud; ud = ud->next) { |
---|
1374 | | - pr_debug("cpu %d moving from node %d " |
---|
1375 | | - "to %d\n", ud->cpu, |
---|
1376 | | - ud->old_nid, ud->new_nid); |
---|
1377 | | - } |
---|
1378 | | - } |
---|
1379 | | - |
---|
1380 | | - /* |
---|
1381 | | - * In cases where we have nothing to update (because the updates list |
---|
1382 | | - * is too short or because the new topology is same as the old one), |
---|
1383 | | - * skip invoking update_cpu_topology() via stop-machine(). This is |
---|
1384 | | - * necessary (and not just a fast-path optimization) since stop-machine |
---|
1385 | | - * can end up electing a random CPU to run update_cpu_topology(), and |
---|
1386 | | - * thus trick us into setting up incorrect cpu-node mappings (since |
---|
1387 | | - * 'updates' is kzalloc()'ed). |
---|
1388 | | - * |
---|
1389 | | - * And for the similar reason, we will skip all the following updating. |
---|
1390 | | - */ |
---|
1391 | | - if (!cpumask_weight(&updated_cpus)) |
---|
| 1261 | + if (!coregroup_enabled) |
---|
1392 | 1262 | goto out; |
---|
1393 | 1263 | |
---|
1394 | | - if (cpus_locked) |
---|
1395 | | - stop_machine_cpuslocked(update_cpu_topology, &updates[0], |
---|
1396 | | - &updated_cpus); |
---|
1397 | | - else |
---|
1398 | | - stop_machine(update_cpu_topology, &updates[0], &updated_cpus); |
---|
| 1264 | + if (!firmware_has_feature(FW_FEATURE_VPHN)) |
---|
| 1265 | + goto out; |
---|
1399 | 1266 | |
---|
1400 | | - /* |
---|
1401 | | - * Update the numa-cpu lookup table with the new mappings, even for |
---|
1402 | | - * offline CPUs. It is best to perform this update from the stop- |
---|
1403 | | - * machine context. |
---|
1404 | | - */ |
---|
1405 | | - if (cpus_locked) |
---|
1406 | | - stop_machine_cpuslocked(update_lookup_table, &updates[0], |
---|
1407 | | - cpumask_of(raw_smp_processor_id())); |
---|
1408 | | - else |
---|
1409 | | - stop_machine(update_lookup_table, &updates[0], |
---|
1410 | | - cpumask_of(raw_smp_processor_id())); |
---|
| 1267 | + if (vphn_get_associativity(cpu, associativity)) |
---|
| 1268 | + goto out; |
---|
1411 | 1269 | |
---|
1412 | | - for (ud = &updates[0]; ud; ud = ud->next) { |
---|
1413 | | - unregister_cpu_under_node(ud->cpu, ud->old_nid); |
---|
1414 | | - register_cpu_under_node(ud->cpu, ud->new_nid); |
---|
1415 | | - |
---|
1416 | | - dev = get_cpu_device(ud->cpu); |
---|
1417 | | - if (dev) |
---|
1418 | | - kobject_uevent(&dev->kobj, KOBJ_CHANGE); |
---|
1419 | | - cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask); |
---|
1420 | | - changed = 1; |
---|
1421 | | - } |
---|
| 1270 | + index = of_read_number(associativity, 1); |
---|
| 1271 | + if (index > min_common_depth + 1) |
---|
| 1272 | + return of_read_number(&associativity[index - 1], 1); |
---|
1422 | 1273 | |
---|
1423 | 1274 | out: |
---|
1424 | | - kfree(updates); |
---|
1425 | | - return changed; |
---|
| 1275 | + return cpu_to_core_id(cpu); |
---|
1426 | 1276 | } |
---|
1427 | | - |
---|
1428 | | -int arch_update_cpu_topology(void) |
---|
1429 | | -{ |
---|
1430 | | - return numa_update_cpu_topology(true); |
---|
1431 | | -} |
---|
1432 | | - |
---|
1433 | | -static void topology_work_fn(struct work_struct *work) |
---|
1434 | | -{ |
---|
1435 | | - rebuild_sched_domains(); |
---|
1436 | | -} |
---|
1437 | | -static DECLARE_WORK(topology_work, topology_work_fn); |
---|
1438 | | - |
---|
1439 | | -static void topology_schedule_update(void) |
---|
1440 | | -{ |
---|
1441 | | - schedule_work(&topology_work); |
---|
1442 | | -} |
---|
1443 | | - |
---|
1444 | | -static void topology_timer_fn(struct timer_list *unused) |
---|
1445 | | -{ |
---|
1446 | | - if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask)) |
---|
1447 | | - topology_schedule_update(); |
---|
1448 | | - else if (vphn_enabled) { |
---|
1449 | | - if (update_cpu_associativity_changes_mask() > 0) |
---|
1450 | | - topology_schedule_update(); |
---|
1451 | | - reset_topology_timer(); |
---|
1452 | | - } |
---|
1453 | | -} |
---|
1454 | | -static struct timer_list topology_timer; |
---|
1455 | | - |
---|
1456 | | -static void reset_topology_timer(void) |
---|
1457 | | -{ |
---|
1458 | | - if (vphn_enabled) |
---|
1459 | | - mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ); |
---|
1460 | | -} |
---|
1461 | | - |
---|
1462 | | -#ifdef CONFIG_SMP |
---|
1463 | | - |
---|
1464 | | -static int dt_update_callback(struct notifier_block *nb, |
---|
1465 | | - unsigned long action, void *data) |
---|
1466 | | -{ |
---|
1467 | | - struct of_reconfig_data *update = data; |
---|
1468 | | - int rc = NOTIFY_DONE; |
---|
1469 | | - |
---|
1470 | | - switch (action) { |
---|
1471 | | - case OF_RECONFIG_UPDATE_PROPERTY: |
---|
1472 | | - if (!of_prop_cmp(update->dn->type, "cpu") && |
---|
1473 | | - !of_prop_cmp(update->prop->name, "ibm,associativity")) { |
---|
1474 | | - u32 core_id; |
---|
1475 | | - of_property_read_u32(update->dn, "reg", &core_id); |
---|
1476 | | - rc = dlpar_cpu_readd(core_id); |
---|
1477 | | - rc = NOTIFY_OK; |
---|
1478 | | - } |
---|
1479 | | - break; |
---|
1480 | | - } |
---|
1481 | | - |
---|
1482 | | - return rc; |
---|
1483 | | -} |
---|
1484 | | - |
---|
1485 | | -static struct notifier_block dt_update_nb = { |
---|
1486 | | - .notifier_call = dt_update_callback, |
---|
1487 | | -}; |
---|
1488 | | - |
---|
1489 | | -#endif |
---|
1490 | | - |
---|
1491 | | -/* |
---|
1492 | | - * Start polling for associativity changes. |
---|
1493 | | - */ |
---|
1494 | | -int start_topology_update(void) |
---|
1495 | | -{ |
---|
1496 | | - int rc = 0; |
---|
1497 | | - |
---|
1498 | | - if (!topology_updates_enabled) |
---|
1499 | | - return 0; |
---|
1500 | | - |
---|
1501 | | - if (firmware_has_feature(FW_FEATURE_PRRN)) { |
---|
1502 | | - if (!prrn_enabled) { |
---|
1503 | | - prrn_enabled = 1; |
---|
1504 | | -#ifdef CONFIG_SMP |
---|
1505 | | - rc = of_reconfig_notifier_register(&dt_update_nb); |
---|
1506 | | -#endif |
---|
1507 | | - } |
---|
1508 | | - } |
---|
1509 | | - if (firmware_has_feature(FW_FEATURE_VPHN) && |
---|
1510 | | - lppaca_shared_proc(get_lppaca())) { |
---|
1511 | | - if (!vphn_enabled) { |
---|
1512 | | - vphn_enabled = 1; |
---|
1513 | | - setup_cpu_associativity_change_counters(); |
---|
1514 | | - timer_setup(&topology_timer, topology_timer_fn, |
---|
1515 | | - TIMER_DEFERRABLE); |
---|
1516 | | - reset_topology_timer(); |
---|
1517 | | - } |
---|
1518 | | - } |
---|
1519 | | - |
---|
1520 | | - return rc; |
---|
1521 | | -} |
---|
1522 | | - |
---|
1523 | | -/* |
---|
1524 | | - * Disable polling for VPHN associativity changes. |
---|
1525 | | - */ |
---|
1526 | | -int stop_topology_update(void) |
---|
1527 | | -{ |
---|
1528 | | - int rc = 0; |
---|
1529 | | - |
---|
1530 | | - if (!topology_updates_enabled) |
---|
1531 | | - return 0; |
---|
1532 | | - |
---|
1533 | | - if (prrn_enabled) { |
---|
1534 | | - prrn_enabled = 0; |
---|
1535 | | -#ifdef CONFIG_SMP |
---|
1536 | | - rc = of_reconfig_notifier_unregister(&dt_update_nb); |
---|
1537 | | -#endif |
---|
1538 | | - } |
---|
1539 | | - if (vphn_enabled) { |
---|
1540 | | - vphn_enabled = 0; |
---|
1541 | | - rc = del_timer_sync(&topology_timer); |
---|
1542 | | - } |
---|
1543 | | - |
---|
1544 | | - return rc; |
---|
1545 | | -} |
---|
1546 | | - |
---|
1547 | | -int prrn_is_enabled(void) |
---|
1548 | | -{ |
---|
1549 | | - return prrn_enabled; |
---|
1550 | | -} |
---|
1551 | | - |
---|
1552 | | -void __init shared_proc_topology_init(void) |
---|
1553 | | -{ |
---|
1554 | | - if (lppaca_shared_proc(get_lppaca())) { |
---|
1555 | | - bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask), |
---|
1556 | | - nr_cpumask_bits); |
---|
1557 | | - numa_update_cpu_topology(false); |
---|
1558 | | - } |
---|
1559 | | -} |
---|
1560 | | - |
---|
1561 | | -static int topology_read(struct seq_file *file, void *v) |
---|
1562 | | -{ |
---|
1563 | | - if (vphn_enabled || prrn_enabled) |
---|
1564 | | - seq_puts(file, "on\n"); |
---|
1565 | | - else |
---|
1566 | | - seq_puts(file, "off\n"); |
---|
1567 | | - |
---|
1568 | | - return 0; |
---|
1569 | | -} |
---|
1570 | | - |
---|
1571 | | -static int topology_open(struct inode *inode, struct file *file) |
---|
1572 | | -{ |
---|
1573 | | - return single_open(file, topology_read, NULL); |
---|
1574 | | -} |
---|
1575 | | - |
---|
1576 | | -static ssize_t topology_write(struct file *file, const char __user *buf, |
---|
1577 | | - size_t count, loff_t *off) |
---|
1578 | | -{ |
---|
1579 | | - char kbuf[4]; /* "on" or "off" plus null. */ |
---|
1580 | | - int read_len; |
---|
1581 | | - |
---|
1582 | | - read_len = count < 3 ? count : 3; |
---|
1583 | | - if (copy_from_user(kbuf, buf, read_len)) |
---|
1584 | | - return -EINVAL; |
---|
1585 | | - |
---|
1586 | | - kbuf[read_len] = '\0'; |
---|
1587 | | - |
---|
1588 | | - if (!strncmp(kbuf, "on", 2)) { |
---|
1589 | | - topology_updates_enabled = true; |
---|
1590 | | - start_topology_update(); |
---|
1591 | | - } else if (!strncmp(kbuf, "off", 3)) { |
---|
1592 | | - stop_topology_update(); |
---|
1593 | | - topology_updates_enabled = false; |
---|
1594 | | - } else |
---|
1595 | | - return -EINVAL; |
---|
1596 | | - |
---|
1597 | | - return count; |
---|
1598 | | -} |
---|
1599 | | - |
---|
1600 | | -static const struct file_operations topology_ops = { |
---|
1601 | | - .read = seq_read, |
---|
1602 | | - .write = topology_write, |
---|
1603 | | - .open = topology_open, |
---|
1604 | | - .release = single_release |
---|
1605 | | -}; |
---|
1606 | 1277 | |
---|
1607 | 1278 | static int topology_update_init(void) |
---|
1608 | 1279 | { |
---|
1609 | | - start_topology_update(); |
---|
1610 | | - |
---|
1611 | | - if (vphn_enabled) |
---|
1612 | | - topology_schedule_update(); |
---|
1613 | | - |
---|
1614 | | - if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops)) |
---|
1615 | | - return -ENOMEM; |
---|
1616 | | - |
---|
1617 | 1280 | topology_inited = 1; |
---|
1618 | 1281 | return 0; |
---|
1619 | 1282 | } |
---|