| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * pSeries NUMA support |
|---|
| 3 | 4 | * |
|---|
| 4 | 5 | * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM |
|---|
| 5 | | - * |
|---|
| 6 | | - * This program is free software; you can redistribute it and/or |
|---|
| 7 | | - * modify it under the terms of the GNU General Public License |
|---|
| 8 | | - * as published by the Free Software Foundation; either version |
|---|
| 9 | | - * 2 of the License, or (at your option) any later version. |
|---|
| 10 | 6 | */ |
|---|
| 11 | 7 | #define pr_fmt(fmt) "numa: " fmt |
|---|
| 12 | 8 | |
|---|
| 13 | 9 | #include <linux/threads.h> |
|---|
| 14 | | -#include <linux/bootmem.h> |
|---|
| 10 | +#include <linux/memblock.h> |
|---|
| 15 | 11 | #include <linux/init.h> |
|---|
| 16 | 12 | #include <linux/mm.h> |
|---|
| 17 | 13 | #include <linux/mmzone.h> |
|---|
| .. | .. |
|---|
| 19 | 15 | #include <linux/nodemask.h> |
|---|
| 20 | 16 | #include <linux/cpu.h> |
|---|
| 21 | 17 | #include <linux/notifier.h> |
|---|
| 22 | | -#include <linux/memblock.h> |
|---|
| 23 | 18 | #include <linux/of.h> |
|---|
| 24 | 19 | #include <linux/pfn.h> |
|---|
| 25 | 20 | #include <linux/cpuset.h> |
|---|
| .. | .. |
|---|
| 33 | 28 | #include <asm/sparsemem.h> |
|---|
| 34 | 29 | #include <asm/prom.h> |
|---|
| 35 | 30 | #include <asm/smp.h> |
|---|
| 36 | | -#include <asm/cputhreads.h> |
|---|
| 37 | 31 | #include <asm/topology.h> |
|---|
| 38 | 32 | #include <asm/firmware.h> |
|---|
| 39 | 33 | #include <asm/paca.h> |
|---|
| .. | .. |
|---|
| 85 | 79 | alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); |
|---|
| 86 | 80 | |
|---|
| 87 | 81 | /* cpumask_of_node() will now work */ |
|---|
| 88 | | - dbg("Node to cpumask map for %d nodes\n", nr_node_ids); |
|---|
| 82 | + dbg("Node to cpumask map for %u nodes\n", nr_node_ids); |
|---|
| 89 | 83 | } |
|---|
| 90 | 84 | |
|---|
| 91 | 85 | static int __init fake_numa_create_new_node(unsigned long end_pfn, |
|---|
| .. | .. |
|---|
| 169 | 163 | } |
|---|
| 170 | 164 | #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */ |
|---|
| 171 | 165 | |
|---|
| 166 | +int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) |
|---|
| 167 | +{ |
|---|
| 168 | + int dist = 0; |
|---|
| 169 | + |
|---|
| 170 | + int i, index; |
|---|
| 171 | + |
|---|
| 172 | + for (i = 0; i < distance_ref_points_depth; i++) { |
|---|
| 173 | + index = be32_to_cpu(distance_ref_points[i]); |
|---|
| 174 | + if (cpu1_assoc[index] == cpu2_assoc[index]) |
|---|
| 175 | + break; |
|---|
| 176 | + dist++; |
|---|
| 177 | + } |
|---|
| 178 | + |
|---|
| 179 | + return dist; |
|---|
| 180 | +} |
|---|
| 181 | + |
|---|
| 172 | 182 | /* must hold reference to node during call */ |
|---|
| 173 | 183 | static const __be32 *of_get_associativity(struct device_node *dev) |
|---|
| 174 | 184 | { |
|---|
| .. | .. |
|---|
| 211 | 221 | } |
|---|
| 212 | 222 | } |
|---|
| 213 | 223 | |
|---|
| 214 | | -/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa |
|---|
| 224 | +/* |
|---|
| 225 | + * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA |
|---|
| 215 | 226 | * info is found. |
|---|
| 216 | 227 | */ |
|---|
| 217 | 228 | static int associativity_to_nid(const __be32 *associativity) |
|---|
| 218 | 229 | { |
|---|
| 219 | | - int nid = -1; |
|---|
| 230 | + int nid = NUMA_NO_NODE; |
|---|
| 220 | 231 | |
|---|
| 221 | | - if (min_common_depth == -1) |
|---|
| 232 | + if (!numa_enabled) |
|---|
| 222 | 233 | goto out; |
|---|
| 223 | 234 | |
|---|
| 224 | 235 | if (of_read_number(associativity, 1) >= min_common_depth) |
|---|
| 225 | 236 | nid = of_read_number(&associativity[min_common_depth], 1); |
|---|
| 226 | 237 | |
|---|
| 227 | 238 | /* POWER4 LPAR uses 0xffff as invalid node */ |
|---|
| 228 | | - if (nid == 0xffff || nid >= MAX_NUMNODES) |
|---|
| 229 | | - nid = -1; |
|---|
| 239 | + if (nid == 0xffff || nid >= nr_node_ids) |
|---|
| 240 | + nid = NUMA_NO_NODE; |
|---|
| 230 | 241 | |
|---|
| 231 | 242 | if (nid > 0 && |
|---|
| 232 | 243 | of_read_number(associativity, 1) >= distance_ref_points_depth) { |
|---|
| .. | .. |
|---|
| 245 | 256 | */ |
|---|
| 246 | 257 | static int of_node_to_nid_single(struct device_node *device) |
|---|
| 247 | 258 | { |
|---|
| 248 | | - int nid = -1; |
|---|
| 259 | + int nid = NUMA_NO_NODE; |
|---|
| 249 | 260 | const __be32 *tmp; |
|---|
| 250 | 261 | |
|---|
| 251 | 262 | tmp = of_get_associativity(device); |
|---|
| .. | .. |
|---|
| 257 | 268 | /* Walk the device tree upwards, looking for an associativity id */ |
|---|
| 258 | 269 | int of_node_to_nid(struct device_node *device) |
|---|
| 259 | 270 | { |
|---|
| 260 | | - int nid = -1; |
|---|
| 271 | + int nid = NUMA_NO_NODE; |
|---|
| 261 | 272 | |
|---|
| 262 | 273 | of_node_get(device); |
|---|
| 263 | 274 | while (device) { |
|---|
| .. | .. |
|---|
| 419 | 430 | * This is like of_node_to_nid_single() for memory represented in the |
|---|
| 420 | 431 | * ibm,dynamic-reconfiguration-memory node. |
|---|
| 421 | 432 | */ |
|---|
| 422 | | -static int of_drconf_to_nid_single(struct drmem_lmb *lmb) |
|---|
| 433 | +int of_drconf_to_nid_single(struct drmem_lmb *lmb) |
|---|
| 423 | 434 | { |
|---|
| 424 | 435 | struct assoc_arrays aa = { .arrays = NULL }; |
|---|
| 425 | | - int default_nid = 0; |
|---|
| 436 | + int default_nid = NUMA_NO_NODE; |
|---|
| 426 | 437 | int nid = default_nid; |
|---|
| 427 | 438 | int rc, index; |
|---|
| 439 | + |
|---|
| 440 | + if ((min_common_depth < 0) || !numa_enabled) |
|---|
| 441 | + return default_nid; |
|---|
| 428 | 442 | |
|---|
| 429 | 443 | rc = of_get_assoc_arrays(&aa); |
|---|
| 430 | 444 | if (rc) |
|---|
| 431 | 445 | return default_nid; |
|---|
| 432 | 446 | |
|---|
| 433 | | - if (min_common_depth > 0 && min_common_depth <= aa.array_sz && |
|---|
| 434 | | - !(lmb->flags & DRCONF_MEM_AI_INVALID) && |
|---|
| 435 | | - lmb->aa_index < aa.n_arrays) { |
|---|
| 447 | + if (min_common_depth <= aa.array_sz && |
|---|
| 448 | + !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) { |
|---|
| 436 | 449 | index = lmb->aa_index * aa.array_sz + min_common_depth - 1; |
|---|
| 437 | 450 | nid = of_read_number(&aa.arrays[index], 1); |
|---|
| 438 | 451 | |
|---|
| 439 | | - if (nid == 0xffff || nid >= MAX_NUMNODES) |
|---|
| 452 | + if (nid == 0xffff || nid >= nr_node_ids) |
|---|
| 440 | 453 | nid = default_nid; |
|---|
| 441 | 454 | |
|---|
| 442 | 455 | if (nid > 0) { |
|---|
| .. | .. |
|---|
| 449 | 462 | return nid; |
|---|
| 450 | 463 | } |
|---|
| 451 | 464 | |
|---|
| 465 | +#ifdef CONFIG_PPC_SPLPAR |
|---|
| 466 | +static int vphn_get_nid(long lcpu) |
|---|
| 467 | +{ |
|---|
| 468 | + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; |
|---|
| 469 | + long rc, hwid; |
|---|
| 470 | + |
|---|
| 471 | + /* |
|---|
| 472 | + * On a shared lpar, device tree will not have node associativity. |
|---|
| 473 | + * At this time lppaca, or its __old_status field may not be |
|---|
| 474 | + * updated. Hence kernel cannot detect if its on a shared lpar. So |
|---|
| 475 | + * request an explicit associativity irrespective of whether the |
|---|
| 476 | + * lpar is shared or dedicated. Use the device tree property as a |
|---|
| 477 | + * fallback. cpu_to_phys_id is only valid between |
|---|
| 478 | + * smp_setup_cpu_maps() and smp_setup_pacas(). |
|---|
| 479 | + */ |
|---|
| 480 | + if (firmware_has_feature(FW_FEATURE_VPHN)) { |
|---|
| 481 | + if (cpu_to_phys_id) |
|---|
| 482 | + hwid = cpu_to_phys_id[lcpu]; |
|---|
| 483 | + else |
|---|
| 484 | + hwid = get_hard_smp_processor_id(lcpu); |
|---|
| 485 | + |
|---|
| 486 | + rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity); |
|---|
| 487 | + if (rc == H_SUCCESS) |
|---|
| 488 | + return associativity_to_nid(associativity); |
|---|
| 489 | + } |
|---|
| 490 | + |
|---|
| 491 | + return NUMA_NO_NODE; |
|---|
| 492 | +} |
|---|
| 493 | +#else |
|---|
| 494 | +static int vphn_get_nid(long unused) |
|---|
| 495 | +{ |
|---|
| 496 | + return NUMA_NO_NODE; |
|---|
| 497 | +} |
|---|
| 498 | +#endif /* CONFIG_PPC_SPLPAR */ |
|---|
| 499 | + |
|---|
| 452 | 500 | /* |
|---|
| 453 | 501 | * Figure out to which domain a cpu belongs and stick it there. |
|---|
| 454 | 502 | * Return the id of the domain used. |
|---|
| 455 | 503 | */ |
|---|
| 456 | 504 | static int numa_setup_cpu(unsigned long lcpu) |
|---|
| 457 | 505 | { |
|---|
| 458 | | - int nid = -1; |
|---|
| 459 | 506 | struct device_node *cpu; |
|---|
| 507 | + int fcpu = cpu_first_thread_sibling(lcpu); |
|---|
| 508 | + int nid = NUMA_NO_NODE; |
|---|
| 509 | + |
|---|
| 510 | + if (!cpu_present(lcpu)) { |
|---|
| 511 | + set_cpu_numa_node(lcpu, first_online_node); |
|---|
| 512 | + return first_online_node; |
|---|
| 513 | + } |
|---|
| 460 | 514 | |
|---|
| 461 | 515 | /* |
|---|
| 462 | 516 | * If a valid cpu-to-node mapping is already available, use it |
|---|
| 463 | 517 | * directly instead of querying the firmware, since it represents |
|---|
| 464 | 518 | * the most recent mapping notified to us by the platform (eg: VPHN). |
|---|
| 519 | + * Since cpu_to_node binding remains the same for all threads in the |
|---|
| 520 | + * core. If a valid cpu-to-node mapping is already available, for |
|---|
| 521 | + * the first thread in the core, use it. |
|---|
| 465 | 522 | */ |
|---|
| 466 | | - if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) { |
|---|
| 523 | + nid = numa_cpu_lookup_table[fcpu]; |
|---|
| 524 | + if (nid >= 0) { |
|---|
| 467 | 525 | map_cpu_to_node(lcpu, nid); |
|---|
| 468 | 526 | return nid; |
|---|
| 469 | 527 | } |
|---|
| 528 | + |
|---|
| 529 | + nid = vphn_get_nid(lcpu); |
|---|
| 530 | + if (nid != NUMA_NO_NODE) |
|---|
| 531 | + goto out_present; |
|---|
| 470 | 532 | |
|---|
| 471 | 533 | cpu = of_get_cpu_node(lcpu, NULL); |
|---|
| 472 | 534 | |
|---|
| .. | .. |
|---|
| 479 | 541 | } |
|---|
| 480 | 542 | |
|---|
| 481 | 543 | nid = of_node_to_nid_single(cpu); |
|---|
| 544 | + of_node_put(cpu); |
|---|
| 482 | 545 | |
|---|
| 483 | 546 | out_present: |
|---|
| 484 | 547 | if (nid < 0 || !node_possible(nid)) |
|---|
| 485 | 548 | nid = first_online_node; |
|---|
| 486 | 549 | |
|---|
| 550 | + /* |
|---|
| 551 | + * Update for the first thread of the core. All threads of a core |
|---|
| 552 | + * have to be part of the same node. This not only avoids querying |
|---|
| 553 | + * for every other thread in the core, but always avoids a case |
|---|
| 554 | + * where virtual node associativity change causes subsequent threads |
|---|
| 555 | + * of a core to be associated with different nid. However if first |
|---|
| 556 | + * thread is already online, expect it to have a valid mapping. |
|---|
| 557 | + */ |
|---|
| 558 | + if (fcpu != lcpu) { |
|---|
| 559 | + WARN_ON(cpu_online(fcpu)); |
|---|
| 560 | + map_cpu_to_node(fcpu, nid); |
|---|
| 561 | + } |
|---|
| 562 | + |
|---|
| 487 | 563 | map_cpu_to_node(lcpu, nid); |
|---|
| 488 | | - of_node_put(cpu); |
|---|
| 489 | 564 | out: |
|---|
| 490 | 565 | return nid; |
|---|
| 491 | 566 | } |
|---|
| .. | .. |
|---|
| 575 | 650 | * Extract NUMA information from the ibm,dynamic-reconfiguration-memory |
|---|
| 576 | 651 | * node. This assumes n_mem_{addr,size}_cells have been set. |
|---|
| 577 | 652 | */ |
|---|
| 578 | | -static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, |
|---|
| 579 | | - const __be32 **usm) |
|---|
| 653 | +static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, |
|---|
| 654 | + const __be32 **usm, |
|---|
| 655 | + void *data) |
|---|
| 580 | 656 | { |
|---|
| 581 | 657 | unsigned int ranges, is_kexec_kdump = 0; |
|---|
| 582 | 658 | unsigned long base, size, sz; |
|---|
| .. | .. |
|---|
| 588 | 664 | */ |
|---|
| 589 | 665 | if ((lmb->flags & DRCONF_MEM_RESERVED) |
|---|
| 590 | 666 | || !(lmb->flags & DRCONF_MEM_ASSIGNED)) |
|---|
| 591 | | - return; |
|---|
| 667 | + return 0; |
|---|
| 592 | 668 | |
|---|
| 593 | 669 | if (*usm) |
|---|
| 594 | 670 | is_kexec_kdump = 1; |
|---|
| .. | .. |
|---|
| 600 | 676 | if (is_kexec_kdump) { |
|---|
| 601 | 677 | ranges = read_usm_ranges(usm); |
|---|
| 602 | 678 | if (!ranges) /* there are no (base, size) duple */ |
|---|
| 603 | | - return; |
|---|
| 679 | + return 0; |
|---|
| 604 | 680 | } |
|---|
| 605 | 681 | |
|---|
| 606 | 682 | do { |
|---|
| .. | .. |
|---|
| 617 | 693 | if (sz) |
|---|
| 618 | 694 | memblock_set_node(base, sz, &memblock.memory, nid); |
|---|
| 619 | 695 | } while (--ranges); |
|---|
| 696 | + |
|---|
| 697 | + return 0; |
|---|
| 620 | 698 | } |
|---|
| 621 | 699 | |
|---|
| 622 | 700 | static int __init parse_numa_properties(void) |
|---|
| .. | .. |
|---|
| 632 | 710 | |
|---|
| 633 | 711 | min_common_depth = find_min_common_depth(); |
|---|
| 634 | 712 | |
|---|
| 635 | | - if (min_common_depth < 0) |
|---|
| 713 | + if (min_common_depth < 0) { |
|---|
| 714 | + /* |
|---|
| 715 | + * if we fail to parse min_common_depth from device tree |
|---|
| 716 | + * mark the numa disabled, boot with numa disabled. |
|---|
| 717 | + */ |
|---|
| 718 | + numa_enabled = false; |
|---|
| 636 | 719 | return min_common_depth; |
|---|
| 720 | + } |
|---|
| 637 | 721 | |
|---|
| 638 | 722 | dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); |
|---|
| 639 | 723 | |
|---|
| .. | .. |
|---|
| 644 | 728 | */ |
|---|
| 645 | 729 | for_each_present_cpu(i) { |
|---|
| 646 | 730 | struct device_node *cpu; |
|---|
| 647 | | - int nid; |
|---|
| 648 | | - |
|---|
| 649 | | - cpu = of_get_cpu_node(i, NULL); |
|---|
| 650 | | - BUG_ON(!cpu); |
|---|
| 651 | | - nid = of_node_to_nid_single(cpu); |
|---|
| 652 | | - of_node_put(cpu); |
|---|
| 731 | + int nid = vphn_get_nid(i); |
|---|
| 653 | 732 | |
|---|
| 654 | 733 | /* |
|---|
| 655 | 734 | * Don't fall back to default_nid yet -- we will plug |
|---|
| 656 | 735 | * cpus into nodes once the memory scan has discovered |
|---|
| 657 | 736 | * the topology. |
|---|
| 658 | 737 | */ |
|---|
| 659 | | - if (nid < 0) |
|---|
| 660 | | - continue; |
|---|
| 661 | | - node_set_online(nid); |
|---|
| 738 | + if (nid == NUMA_NO_NODE) { |
|---|
| 739 | + cpu = of_get_cpu_node(i, NULL); |
|---|
| 740 | + BUG_ON(!cpu); |
|---|
| 741 | + nid = of_node_to_nid_single(cpu); |
|---|
| 742 | + of_node_put(cpu); |
|---|
| 743 | + } |
|---|
| 744 | + |
|---|
| 745 | + /* node_set_online() is an UB if 'nid' is negative */ |
|---|
| 746 | + if (likely(nid >= 0)) |
|---|
| 747 | + node_set_online(nid); |
|---|
| 662 | 748 | } |
|---|
| 663 | 749 | |
|---|
| 664 | 750 | get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); |
|---|
| .. | .. |
|---|
| 712 | 798 | */ |
|---|
| 713 | 799 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); |
|---|
| 714 | 800 | if (memory) { |
|---|
| 715 | | - walk_drmem_lmbs(memory, numa_setup_drmem_lmb); |
|---|
| 801 | + walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb); |
|---|
| 716 | 802 | of_node_put(memory); |
|---|
| 717 | 803 | } |
|---|
| 718 | 804 | |
|---|
| .. | .. |
|---|
| 725 | 811 | unsigned long total_ram = memblock_phys_mem_size(); |
|---|
| 726 | 812 | unsigned long start_pfn, end_pfn; |
|---|
| 727 | 813 | unsigned int nid = 0; |
|---|
| 728 | | - struct memblock_region *reg; |
|---|
| 814 | + int i; |
|---|
| 729 | 815 | |
|---|
| 730 | 816 | printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", |
|---|
| 731 | 817 | top_of_ram, total_ram); |
|---|
| 732 | 818 | printk(KERN_DEBUG "Memory hole size: %ldMB\n", |
|---|
| 733 | 819 | (top_of_ram - total_ram) >> 20); |
|---|
| 734 | 820 | |
|---|
| 735 | | - for_each_memblock(memory, reg) { |
|---|
| 736 | | - start_pfn = memblock_region_memory_base_pfn(reg); |
|---|
| 737 | | - end_pfn = memblock_region_memory_end_pfn(reg); |
|---|
| 738 | | - |
|---|
| 821 | + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { |
|---|
| 739 | 822 | fake_numa_create_new_node(end_pfn, &nid); |
|---|
| 740 | 823 | memblock_set_node(PFN_PHYS(start_pfn), |
|---|
| 741 | 824 | PFN_PHYS(end_pfn - start_pfn), |
|---|
| .. | .. |
|---|
| 749 | 832 | unsigned int node; |
|---|
| 750 | 833 | unsigned int cpu, count; |
|---|
| 751 | 834 | |
|---|
| 752 | | - if (min_common_depth == -1 || !numa_enabled) |
|---|
| 835 | + if (!numa_enabled) |
|---|
| 753 | 836 | return; |
|---|
| 754 | 837 | |
|---|
| 755 | 838 | for_each_online_node(node) { |
|---|
| .. | .. |
|---|
| 788 | 871 | void *nd; |
|---|
| 789 | 872 | int tnid; |
|---|
| 790 | 873 | |
|---|
| 791 | | - nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); |
|---|
| 874 | + nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); |
|---|
| 875 | + if (!nd_pa) |
|---|
| 876 | + panic("Cannot allocate %zu bytes for node %d data\n", |
|---|
| 877 | + nd_size, nid); |
|---|
| 878 | + |
|---|
| 792 | 879 | nd = __va(nd_pa); |
|---|
| 793 | 880 | |
|---|
| 794 | 881 | /* report and initialize */ |
|---|
| .. | .. |
|---|
| 808 | 895 | static void __init find_possible_nodes(void) |
|---|
| 809 | 896 | { |
|---|
| 810 | 897 | struct device_node *rtas; |
|---|
| 811 | | - u32 numnodes, i; |
|---|
| 898 | + const __be32 *domains = NULL; |
|---|
| 899 | + int prop_length, max_nodes; |
|---|
| 900 | + u32 i; |
|---|
| 812 | 901 | |
|---|
| 813 | | - if (min_common_depth <= 0) |
|---|
| 902 | + if (!numa_enabled) |
|---|
| 814 | 903 | return; |
|---|
| 815 | 904 | |
|---|
| 816 | 905 | rtas = of_find_node_by_path("/rtas"); |
|---|
| 817 | 906 | if (!rtas) |
|---|
| 818 | 907 | return; |
|---|
| 819 | 908 | |
|---|
| 820 | | - if (of_property_read_u32_index(rtas, |
|---|
| 821 | | - "ibm,max-associativity-domains", |
|---|
| 822 | | - min_common_depth, &numnodes)) |
|---|
| 823 | | - goto out; |
|---|
| 909 | + /* |
|---|
| 910 | + * ibm,current-associativity-domains is a fairly recent property. If |
|---|
| 911 | + * it doesn't exist, then fallback on ibm,max-associativity-domains. |
|---|
| 912 | + * Current denotes what the platform can support compared to max |
|---|
| 913 | + * which denotes what the Hypervisor can support. |
|---|
| 914 | + * |
|---|
| 915 | + * If the LPAR is migratable, new nodes might be activated after a LPM, |
|---|
| 916 | + * so we should consider the max number in that case. |
|---|
| 917 | + */ |
|---|
| 918 | + if (!of_get_property(of_root, "ibm,migratable-partition", NULL)) |
|---|
| 919 | + domains = of_get_property(rtas, |
|---|
| 920 | + "ibm,current-associativity-domains", |
|---|
| 921 | + &prop_length); |
|---|
| 922 | + if (!domains) { |
|---|
| 923 | + domains = of_get_property(rtas, "ibm,max-associativity-domains", |
|---|
| 924 | + &prop_length); |
|---|
| 925 | + if (!domains) |
|---|
| 926 | + goto out; |
|---|
| 927 | + } |
|---|
| 824 | 928 | |
|---|
| 825 | | - for (i = 0; i < numnodes; i++) { |
|---|
| 929 | + max_nodes = of_read_number(&domains[min_common_depth], 1); |
|---|
| 930 | + pr_info("Partition configured for %d NUMA nodes.\n", max_nodes); |
|---|
| 931 | + |
|---|
| 932 | + for (i = 0; i < max_nodes; i++) { |
|---|
| 826 | 933 | if (!node_possible(i)) |
|---|
| 827 | 934 | node_set(i, node_possible_map); |
|---|
| 828 | 935 | } |
|---|
| 936 | + |
|---|
| 937 | + prop_length /= sizeof(int); |
|---|
| 938 | + if (prop_length > min_common_depth + 2) |
|---|
| 939 | + coregroup_enabled = 1; |
|---|
| 829 | 940 | |
|---|
| 830 | 941 | out: |
|---|
| 831 | 942 | of_node_put(rtas); |
|---|
| .. | .. |
|---|
| 834 | 945 | void __init mem_topology_setup(void) |
|---|
| 835 | 946 | { |
|---|
| 836 | 947 | int cpu; |
|---|
| 948 | + |
|---|
| 949 | + /* |
|---|
| 950 | + * Linux/mm assumes node 0 to be online at boot. However this is not |
|---|
| 951 | + * true on PowerPC, where node 0 is similar to any other node, it |
|---|
| 952 | + * could be cpuless, memoryless node. So force node 0 to be offline |
|---|
| 953 | + * for now. This will prevent cpuless, memoryless node 0 showing up |
|---|
| 954 | + * unnecessarily as online. If a node has cpus or memory that need |
|---|
| 955 | + * to be online, then node will anyway be marked online. |
|---|
| 956 | + */ |
|---|
| 957 | + node_set_offline(0); |
|---|
| 837 | 958 | |
|---|
| 838 | 959 | if (parse_numa_properties()) |
|---|
| 839 | 960 | setup_nonnuma(); |
|---|
| .. | .. |
|---|
| 852 | 973 | |
|---|
| 853 | 974 | reset_numa_cpu_lookup_table(); |
|---|
| 854 | 975 | |
|---|
| 855 | | - for_each_present_cpu(cpu) |
|---|
| 976 | + for_each_possible_cpu(cpu) { |
|---|
| 977 | + /* |
|---|
| 978 | + * Powerpc with CONFIG_NUMA always used to have a node 0, |
|---|
| 979 | + * even if it was memoryless or cpuless. For all cpus that |
|---|
| 980 | + * are possible but not present, cpu_to_node() would point |
|---|
| 981 | + * to node 0. To remove a cpuless, memoryless dummy node, |
|---|
| 982 | + * powerpc need to make sure all possible but not present |
|---|
| 983 | + * cpu_to_node are set to a proper node. |
|---|
| 984 | + */ |
|---|
| 856 | 985 | numa_setup_cpu(cpu); |
|---|
| 986 | + } |
|---|
| 857 | 987 | } |
|---|
| 858 | 988 | |
|---|
| 859 | 989 | void __init initmem_init(void) |
|---|
| .. | .. |
|---|
| 870 | 1000 | |
|---|
| 871 | 1001 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); |
|---|
| 872 | 1002 | setup_node_data(nid, start_pfn, end_pfn); |
|---|
| 873 | | - sparse_memory_present_with_active_regions(nid); |
|---|
| 874 | 1003 | } |
|---|
| 875 | 1004 | |
|---|
| 876 | 1005 | sparse_init(); |
|---|
| .. | .. |
|---|
| 905 | 1034 | } |
|---|
| 906 | 1035 | early_param("numa", early_numa); |
|---|
| 907 | 1036 | |
|---|
| 908 | | -static bool topology_updates_enabled = true; |
|---|
| 909 | | - |
|---|
| 910 | | -static int __init early_topology_updates(char *p) |
|---|
| 911 | | -{ |
|---|
| 912 | | - if (!p) |
|---|
| 913 | | - return 0; |
|---|
| 914 | | - |
|---|
| 915 | | - if (!strcmp(p, "off")) { |
|---|
| 916 | | - pr_info("Disabling topology updates\n"); |
|---|
| 917 | | - topology_updates_enabled = false; |
|---|
| 918 | | - } |
|---|
| 919 | | - |
|---|
| 920 | | - return 0; |
|---|
| 921 | | -} |
|---|
| 922 | | -early_param("topology_updates", early_topology_updates); |
|---|
| 923 | | - |
|---|
| 924 | 1037 | #ifdef CONFIG_MEMORY_HOTPLUG |
|---|
| 925 | 1038 | /* |
|---|
| 926 | 1039 | * Find the node associated with a hot added memory section for |
|---|
| .. | .. |
|---|
| 931 | 1044 | { |
|---|
| 932 | 1045 | struct drmem_lmb *lmb; |
|---|
| 933 | 1046 | unsigned long lmb_size; |
|---|
| 934 | | - int nid = -1; |
|---|
| 1047 | + int nid = NUMA_NO_NODE; |
|---|
| 935 | 1048 | |
|---|
| 936 | 1049 | lmb_size = drmem_lmb_size(); |
|---|
| 937 | 1050 | |
|---|
| .. | .. |
|---|
| 961 | 1074 | static int hot_add_node_scn_to_nid(unsigned long scn_addr) |
|---|
| 962 | 1075 | { |
|---|
| 963 | 1076 | struct device_node *memory; |
|---|
| 964 | | - int nid = -1; |
|---|
| 1077 | + int nid = NUMA_NO_NODE; |
|---|
| 965 | 1078 | |
|---|
| 966 | 1079 | for_each_node_by_type(memory, "memory") { |
|---|
| 967 | 1080 | unsigned long start, size; |
|---|
| .. | .. |
|---|
| 1006 | 1119 | struct device_node *memory = NULL; |
|---|
| 1007 | 1120 | int nid; |
|---|
| 1008 | 1121 | |
|---|
| 1009 | | - if (!numa_enabled || (min_common_depth < 0)) |
|---|
| 1122 | + if (!numa_enabled) |
|---|
| 1010 | 1123 | return first_online_node; |
|---|
| 1011 | 1124 | |
|---|
| 1012 | 1125 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); |
|---|
| .. | .. |
|---|
| 1059 | 1172 | |
|---|
| 1060 | 1173 | /* Virtual Processor Home Node (VPHN) support */ |
|---|
| 1061 | 1174 | #ifdef CONFIG_PPC_SPLPAR |
|---|
| 1062 | | - |
|---|
| 1063 | | -#include "vphn.h" |
|---|
| 1064 | | - |
|---|
| 1065 | | -struct topology_update_data { |
|---|
| 1066 | | - struct topology_update_data *next; |
|---|
| 1067 | | - unsigned int cpu; |
|---|
| 1068 | | - int old_nid; |
|---|
| 1069 | | - int new_nid; |
|---|
| 1070 | | -}; |
|---|
| 1071 | | - |
|---|
| 1072 | | -#define TOPOLOGY_DEF_TIMER_SECS 60 |
|---|
| 1073 | | - |
|---|
| 1074 | | -static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; |
|---|
| 1075 | | -static cpumask_t cpu_associativity_changes_mask; |
|---|
| 1076 | | -static int vphn_enabled; |
|---|
| 1077 | | -static int prrn_enabled; |
|---|
| 1078 | | -static void reset_topology_timer(void); |
|---|
| 1079 | | -static int topology_timer_secs = 1; |
|---|
| 1080 | 1175 | static int topology_inited; |
|---|
| 1081 | | - |
|---|
| 1082 | | -/* |
|---|
| 1083 | | - * Change polling interval for associativity changes. |
|---|
| 1084 | | - */ |
|---|
| 1085 | | -int timed_topology_update(int nsecs) |
|---|
| 1086 | | -{ |
|---|
| 1087 | | - if (vphn_enabled) { |
|---|
| 1088 | | - if (nsecs > 0) |
|---|
| 1089 | | - topology_timer_secs = nsecs; |
|---|
| 1090 | | - else |
|---|
| 1091 | | - topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS; |
|---|
| 1092 | | - |
|---|
| 1093 | | - reset_topology_timer(); |
|---|
| 1094 | | - } |
|---|
| 1095 | | - |
|---|
| 1096 | | - return 0; |
|---|
| 1097 | | -} |
|---|
| 1098 | | - |
|---|
| 1099 | | -/* |
|---|
| 1100 | | - * Store the current values of the associativity change counters in the |
|---|
| 1101 | | - * hypervisor. |
|---|
| 1102 | | - */ |
|---|
| 1103 | | -static void setup_cpu_associativity_change_counters(void) |
|---|
| 1104 | | -{ |
|---|
| 1105 | | - int cpu; |
|---|
| 1106 | | - |
|---|
| 1107 | | - /* The VPHN feature supports a maximum of 8 reference points */ |
|---|
| 1108 | | - BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8); |
|---|
| 1109 | | - |
|---|
| 1110 | | - for_each_possible_cpu(cpu) { |
|---|
| 1111 | | - int i; |
|---|
| 1112 | | - u8 *counts = vphn_cpu_change_counts[cpu]; |
|---|
| 1113 | | - volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; |
|---|
| 1114 | | - |
|---|
| 1115 | | - for (i = 0; i < distance_ref_points_depth; i++) |
|---|
| 1116 | | - counts[i] = hypervisor_counts[i]; |
|---|
| 1117 | | - } |
|---|
| 1118 | | -} |
|---|
| 1119 | | - |
|---|
| 1120 | | -/* |
|---|
| 1121 | | - * The hypervisor maintains a set of 8 associativity change counters in |
|---|
| 1122 | | - * the VPA of each cpu that correspond to the associativity levels in the |
|---|
| 1123 | | - * ibm,associativity-reference-points property. When an associativity |
|---|
| 1124 | | - * level changes, the corresponding counter is incremented. |
|---|
| 1125 | | - * |
|---|
| 1126 | | - * Set a bit in cpu_associativity_changes_mask for each cpu whose home |
|---|
| 1127 | | - * node associativity levels have changed. |
|---|
| 1128 | | - * |
|---|
| 1129 | | - * Returns the number of cpus with unhandled associativity changes. |
|---|
| 1130 | | - */ |
|---|
| 1131 | | -static int update_cpu_associativity_changes_mask(void) |
|---|
| 1132 | | -{ |
|---|
| 1133 | | - int cpu; |
|---|
| 1134 | | - cpumask_t *changes = &cpu_associativity_changes_mask; |
|---|
| 1135 | | - |
|---|
| 1136 | | - for_each_possible_cpu(cpu) { |
|---|
| 1137 | | - int i, changed = 0; |
|---|
| 1138 | | - u8 *counts = vphn_cpu_change_counts[cpu]; |
|---|
| 1139 | | - volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; |
|---|
| 1140 | | - |
|---|
| 1141 | | - for (i = 0; i < distance_ref_points_depth; i++) { |
|---|
| 1142 | | - if (hypervisor_counts[i] != counts[i]) { |
|---|
| 1143 | | - counts[i] = hypervisor_counts[i]; |
|---|
| 1144 | | - changed = 1; |
|---|
| 1145 | | - } |
|---|
| 1146 | | - } |
|---|
| 1147 | | - if (changed) { |
|---|
| 1148 | | - cpumask_or(changes, changes, cpu_sibling_mask(cpu)); |
|---|
| 1149 | | - cpu = cpu_last_thread_sibling(cpu); |
|---|
| 1150 | | - } |
|---|
| 1151 | | - } |
|---|
| 1152 | | - |
|---|
| 1153 | | - return cpumask_weight(changes); |
|---|
| 1154 | | -} |
|---|
| 1155 | 1176 | |
|---|
| 1156 | 1177 | /* |
|---|
| 1157 | 1178 | * Retrieve the new associativity information for a virtual processor's |
|---|
| 1158 | 1179 | * home node. |
|---|
| 1159 | 1180 | */ |
|---|
| 1160 | | -static long hcall_vphn(unsigned long cpu, __be32 *associativity) |
|---|
| 1161 | | -{ |
|---|
| 1162 | | - long rc; |
|---|
| 1163 | | - long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; |
|---|
| 1164 | | - u64 flags = 1; |
|---|
| 1165 | | - int hwcpu = get_hard_smp_processor_id(cpu); |
|---|
| 1166 | | - |
|---|
| 1167 | | - rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); |
|---|
| 1168 | | - vphn_unpack_associativity(retbuf, associativity); |
|---|
| 1169 | | - |
|---|
| 1170 | | - return rc; |
|---|
| 1171 | | -} |
|---|
| 1172 | | - |
|---|
| 1173 | 1181 | static long vphn_get_associativity(unsigned long cpu, |
|---|
| 1174 | 1182 | __be32 *associativity) |
|---|
| 1175 | 1183 | { |
|---|
| 1176 | 1184 | long rc; |
|---|
| 1177 | 1185 | |
|---|
| 1178 | | - rc = hcall_vphn(cpu, associativity); |
|---|
| 1186 | + rc = hcall_vphn(get_hard_smp_processor_id(cpu), |
|---|
| 1187 | + VPHN_FLAG_VCPU, associativity); |
|---|
| 1179 | 1188 | |
|---|
| 1180 | 1189 | switch (rc) { |
|---|
| 1181 | | - case H_FUNCTION: |
|---|
| 1182 | | - printk_once(KERN_INFO |
|---|
| 1183 | | - "VPHN is not supported. Disabling polling...\n"); |
|---|
| 1184 | | - stop_topology_update(); |
|---|
| 1185 | | - break; |
|---|
| 1186 | | - case H_HARDWARE: |
|---|
| 1187 | | - printk(KERN_ERR |
|---|
| 1188 | | - "hcall_vphn() experienced a hardware fault " |
|---|
| 1189 | | - "preventing VPHN. Disabling polling...\n"); |
|---|
| 1190 | | - stop_topology_update(); |
|---|
| 1191 | | - break; |
|---|
| 1192 | 1190 | case H_SUCCESS: |
|---|
| 1193 | 1191 | dbg("VPHN hcall succeeded. Reset polling...\n"); |
|---|
| 1194 | | - timed_topology_update(0); |
|---|
| 1192 | + goto out; |
|---|
| 1193 | + |
|---|
| 1194 | + case H_FUNCTION: |
|---|
| 1195 | + pr_err_ratelimited("VPHN unsupported. Disabling polling...\n"); |
|---|
| 1196 | + break; |
|---|
| 1197 | + case H_HARDWARE: |
|---|
| 1198 | + pr_err_ratelimited("hcall_vphn() experienced a hardware fault " |
|---|
| 1199 | + "preventing VPHN. Disabling polling...\n"); |
|---|
| 1200 | + break; |
|---|
| 1201 | + case H_PARAMETER: |
|---|
| 1202 | + pr_err_ratelimited("hcall_vphn() was passed an invalid parameter. " |
|---|
| 1203 | + "Disabling polling...\n"); |
|---|
| 1204 | + break; |
|---|
| 1205 | + default: |
|---|
| 1206 | + pr_err_ratelimited("hcall_vphn() returned %ld. Disabling polling...\n" |
|---|
| 1207 | + , rc); |
|---|
| 1195 | 1208 | break; |
|---|
| 1196 | 1209 | } |
|---|
| 1197 | | - |
|---|
| 1210 | +out: |
|---|
| 1198 | 1211 | return rc; |
|---|
| 1199 | 1212 | } |
|---|
| 1200 | 1213 | |
|---|
| .. | .. |
|---|
| 1237 | 1250 | return new_nid; |
|---|
| 1238 | 1251 | } |
|---|
| 1239 | 1252 | |
|---|
| 1240 | | -/* |
|---|
| 1241 | | - * Update the CPU maps and sysfs entries for a single CPU when its NUMA |
|---|
| 1242 | | - * characteristics change. This function doesn't perform any locking and is |
|---|
| 1243 | | - * only safe to call from stop_machine(). |
|---|
| 1244 | | - */ |
|---|
| 1245 | | -static int update_cpu_topology(void *data) |
|---|
| 1253 | +int cpu_to_coregroup_id(int cpu) |
|---|
| 1246 | 1254 | { |
|---|
| 1247 | | - struct topology_update_data *update; |
|---|
| 1248 | | - unsigned long cpu; |
|---|
| 1255 | + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; |
|---|
| 1256 | + int index; |
|---|
| 1249 | 1257 | |
|---|
| 1250 | | - if (!data) |
|---|
| 1251 | | - return -EINVAL; |
|---|
| 1258 | + if (cpu < 0 || cpu > nr_cpu_ids) |
|---|
| 1259 | + return -1; |
|---|
| 1252 | 1260 | |
|---|
| 1253 | | - cpu = smp_processor_id(); |
|---|
| 1254 | | - |
|---|
| 1255 | | - for (update = data; update; update = update->next) { |
|---|
| 1256 | | - int new_nid = update->new_nid; |
|---|
| 1257 | | - if (cpu != update->cpu) |
|---|
| 1258 | | - continue; |
|---|
| 1259 | | - |
|---|
| 1260 | | - unmap_cpu_from_node(cpu); |
|---|
| 1261 | | - map_cpu_to_node(cpu, new_nid); |
|---|
| 1262 | | - set_cpu_numa_node(cpu, new_nid); |
|---|
| 1263 | | - set_cpu_numa_mem(cpu, local_memory_node(new_nid)); |
|---|
| 1264 | | - vdso_getcpu_init(); |
|---|
| 1265 | | - } |
|---|
| 1266 | | - |
|---|
| 1267 | | - return 0; |
|---|
| 1268 | | -} |
|---|
| 1269 | | - |
|---|
| 1270 | | -static int update_lookup_table(void *data) |
|---|
| 1271 | | -{ |
|---|
| 1272 | | - struct topology_update_data *update; |
|---|
| 1273 | | - |
|---|
| 1274 | | - if (!data) |
|---|
| 1275 | | - return -EINVAL; |
|---|
| 1276 | | - |
|---|
| 1277 | | - /* |
|---|
| 1278 | | - * Upon topology update, the numa-cpu lookup table needs to be updated |
|---|
| 1279 | | - * for all threads in the core, including offline CPUs, to ensure that |
|---|
| 1280 | | - * future hotplug operations respect the cpu-to-node associativity |
|---|
| 1281 | | - * properly. |
|---|
| 1282 | | - */ |
|---|
| 1283 | | - for (update = data; update; update = update->next) { |
|---|
| 1284 | | - int nid, base, j; |
|---|
| 1285 | | - |
|---|
| 1286 | | - nid = update->new_nid; |
|---|
| 1287 | | - base = cpu_first_thread_sibling(update->cpu); |
|---|
| 1288 | | - |
|---|
| 1289 | | - for (j = 0; j < threads_per_core; j++) { |
|---|
| 1290 | | - update_numa_cpu_lookup_table(base + j, nid); |
|---|
| 1291 | | - } |
|---|
| 1292 | | - } |
|---|
| 1293 | | - |
|---|
| 1294 | | - return 0; |
|---|
| 1295 | | -} |
|---|
| 1296 | | - |
|---|
| 1297 | | -/* |
|---|
| 1298 | | - * Update the node maps and sysfs entries for each cpu whose home node |
|---|
| 1299 | | - * has changed. Returns 1 when the topology has changed, and 0 otherwise. |
|---|
| 1300 | | - * |
|---|
| 1301 | | - * cpus_locked says whether we already hold cpu_hotplug_lock. |
|---|
| 1302 | | - */ |
|---|
| 1303 | | -int numa_update_cpu_topology(bool cpus_locked) |
|---|
| 1304 | | -{ |
|---|
| 1305 | | - unsigned int cpu, sibling, changed = 0; |
|---|
| 1306 | | - struct topology_update_data *updates, *ud; |
|---|
| 1307 | | - cpumask_t updated_cpus; |
|---|
| 1308 | | - struct device *dev; |
|---|
| 1309 | | - int weight, new_nid, i = 0; |
|---|
| 1310 | | - |
|---|
| 1311 | | - if (!prrn_enabled && !vphn_enabled && topology_inited) |
|---|
| 1312 | | - return 0; |
|---|
| 1313 | | - |
|---|
| 1314 | | - weight = cpumask_weight(&cpu_associativity_changes_mask); |
|---|
| 1315 | | - if (!weight) |
|---|
| 1316 | | - return 0; |
|---|
| 1317 | | - |
|---|
| 1318 | | - updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL); |
|---|
| 1319 | | - if (!updates) |
|---|
| 1320 | | - return 0; |
|---|
| 1321 | | - |
|---|
| 1322 | | - cpumask_clear(&updated_cpus); |
|---|
| 1323 | | - |
|---|
| 1324 | | - for_each_cpu(cpu, &cpu_associativity_changes_mask) { |
|---|
| 1325 | | - /* |
|---|
| 1326 | | - * If siblings aren't flagged for changes, updates list |
|---|
| 1327 | | - * will be too short. Skip on this update and set for next |
|---|
| 1328 | | - * update. |
|---|
| 1329 | | - */ |
|---|
| 1330 | | - if (!cpumask_subset(cpu_sibling_mask(cpu), |
|---|
| 1331 | | - &cpu_associativity_changes_mask)) { |
|---|
| 1332 | | - pr_info("Sibling bits not set for associativity " |
|---|
| 1333 | | - "change, cpu%d\n", cpu); |
|---|
| 1334 | | - cpumask_or(&cpu_associativity_changes_mask, |
|---|
| 1335 | | - &cpu_associativity_changes_mask, |
|---|
| 1336 | | - cpu_sibling_mask(cpu)); |
|---|
| 1337 | | - cpu = cpu_last_thread_sibling(cpu); |
|---|
| 1338 | | - continue; |
|---|
| 1339 | | - } |
|---|
| 1340 | | - |
|---|
| 1341 | | - new_nid = find_and_online_cpu_nid(cpu); |
|---|
| 1342 | | - |
|---|
| 1343 | | - if (new_nid == numa_cpu_lookup_table[cpu]) { |
|---|
| 1344 | | - cpumask_andnot(&cpu_associativity_changes_mask, |
|---|
| 1345 | | - &cpu_associativity_changes_mask, |
|---|
| 1346 | | - cpu_sibling_mask(cpu)); |
|---|
| 1347 | | - dbg("Assoc chg gives same node %d for cpu%d\n", |
|---|
| 1348 | | - new_nid, cpu); |
|---|
| 1349 | | - cpu = cpu_last_thread_sibling(cpu); |
|---|
| 1350 | | - continue; |
|---|
| 1351 | | - } |
|---|
| 1352 | | - |
|---|
| 1353 | | - for_each_cpu(sibling, cpu_sibling_mask(cpu)) { |
|---|
| 1354 | | - ud = &updates[i++]; |
|---|
| 1355 | | - ud->next = &updates[i]; |
|---|
| 1356 | | - ud->cpu = sibling; |
|---|
| 1357 | | - ud->new_nid = new_nid; |
|---|
| 1358 | | - ud->old_nid = numa_cpu_lookup_table[sibling]; |
|---|
| 1359 | | - cpumask_set_cpu(sibling, &updated_cpus); |
|---|
| 1360 | | - } |
|---|
| 1361 | | - cpu = cpu_last_thread_sibling(cpu); |
|---|
| 1362 | | - } |
|---|
| 1363 | | - |
|---|
| 1364 | | - /* |
|---|
| 1365 | | - * Prevent processing of 'updates' from overflowing array |
|---|
| 1366 | | - * where last entry filled in a 'next' pointer. |
|---|
| 1367 | | - */ |
|---|
| 1368 | | - if (i) |
|---|
| 1369 | | - updates[i-1].next = NULL; |
|---|
| 1370 | | - |
|---|
| 1371 | | - pr_debug("Topology update for the following CPUs:\n"); |
|---|
| 1372 | | - if (cpumask_weight(&updated_cpus)) { |
|---|
| 1373 | | - for (ud = &updates[0]; ud; ud = ud->next) { |
|---|
| 1374 | | - pr_debug("cpu %d moving from node %d " |
|---|
| 1375 | | - "to %d\n", ud->cpu, |
|---|
| 1376 | | - ud->old_nid, ud->new_nid); |
|---|
| 1377 | | - } |
|---|
| 1378 | | - } |
|---|
| 1379 | | - |
|---|
| 1380 | | - /* |
|---|
| 1381 | | - * In cases where we have nothing to update (because the updates list |
|---|
| 1382 | | - * is too short or because the new topology is same as the old one), |
|---|
| 1383 | | - * skip invoking update_cpu_topology() via stop-machine(). This is |
|---|
| 1384 | | - * necessary (and not just a fast-path optimization) since stop-machine |
|---|
| 1385 | | - * can end up electing a random CPU to run update_cpu_topology(), and |
|---|
| 1386 | | - * thus trick us into setting up incorrect cpu-node mappings (since |
|---|
| 1387 | | - * 'updates' is kzalloc()'ed). |
|---|
| 1388 | | - * |
|---|
| 1389 | | - * And for the similar reason, we will skip all the following updating. |
|---|
| 1390 | | - */ |
|---|
| 1391 | | - if (!cpumask_weight(&updated_cpus)) |
|---|
| 1261 | + if (!coregroup_enabled) |
|---|
| 1392 | 1262 | goto out; |
|---|
| 1393 | 1263 | |
|---|
| 1394 | | - if (cpus_locked) |
|---|
| 1395 | | - stop_machine_cpuslocked(update_cpu_topology, &updates[0], |
|---|
| 1396 | | - &updated_cpus); |
|---|
| 1397 | | - else |
|---|
| 1398 | | - stop_machine(update_cpu_topology, &updates[0], &updated_cpus); |
|---|
| 1264 | + if (!firmware_has_feature(FW_FEATURE_VPHN)) |
|---|
| 1265 | + goto out; |
|---|
| 1399 | 1266 | |
|---|
| 1400 | | - /* |
|---|
| 1401 | | - * Update the numa-cpu lookup table with the new mappings, even for |
|---|
| 1402 | | - * offline CPUs. It is best to perform this update from the stop- |
|---|
| 1403 | | - * machine context. |
|---|
| 1404 | | - */ |
|---|
| 1405 | | - if (cpus_locked) |
|---|
| 1406 | | - stop_machine_cpuslocked(update_lookup_table, &updates[0], |
|---|
| 1407 | | - cpumask_of(raw_smp_processor_id())); |
|---|
| 1408 | | - else |
|---|
| 1409 | | - stop_machine(update_lookup_table, &updates[0], |
|---|
| 1410 | | - cpumask_of(raw_smp_processor_id())); |
|---|
| 1267 | + if (vphn_get_associativity(cpu, associativity)) |
|---|
| 1268 | + goto out; |
|---|
| 1411 | 1269 | |
|---|
| 1412 | | - for (ud = &updates[0]; ud; ud = ud->next) { |
|---|
| 1413 | | - unregister_cpu_under_node(ud->cpu, ud->old_nid); |
|---|
| 1414 | | - register_cpu_under_node(ud->cpu, ud->new_nid); |
|---|
| 1415 | | - |
|---|
| 1416 | | - dev = get_cpu_device(ud->cpu); |
|---|
| 1417 | | - if (dev) |
|---|
| 1418 | | - kobject_uevent(&dev->kobj, KOBJ_CHANGE); |
|---|
| 1419 | | - cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask); |
|---|
| 1420 | | - changed = 1; |
|---|
| 1421 | | - } |
|---|
| 1270 | + index = of_read_number(associativity, 1); |
|---|
| 1271 | + if (index > min_common_depth + 1) |
|---|
| 1272 | + return of_read_number(&associativity[index - 1], 1); |
|---|
| 1422 | 1273 | |
|---|
| 1423 | 1274 | out: |
|---|
| 1424 | | - kfree(updates); |
|---|
| 1425 | | - return changed; |
|---|
| 1275 | + return cpu_to_core_id(cpu); |
|---|
| 1426 | 1276 | } |
|---|
| 1427 | | - |
|---|
| 1428 | | -int arch_update_cpu_topology(void) |
|---|
| 1429 | | -{ |
|---|
| 1430 | | - return numa_update_cpu_topology(true); |
|---|
| 1431 | | -} |
|---|
| 1432 | | - |
|---|
| 1433 | | -static void topology_work_fn(struct work_struct *work) |
|---|
| 1434 | | -{ |
|---|
| 1435 | | - rebuild_sched_domains(); |
|---|
| 1436 | | -} |
|---|
| 1437 | | -static DECLARE_WORK(topology_work, topology_work_fn); |
|---|
| 1438 | | - |
|---|
| 1439 | | -static void topology_schedule_update(void) |
|---|
| 1440 | | -{ |
|---|
| 1441 | | - schedule_work(&topology_work); |
|---|
| 1442 | | -} |
|---|
| 1443 | | - |
|---|
| 1444 | | -static void topology_timer_fn(struct timer_list *unused) |
|---|
| 1445 | | -{ |
|---|
| 1446 | | - if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask)) |
|---|
| 1447 | | - topology_schedule_update(); |
|---|
| 1448 | | - else if (vphn_enabled) { |
|---|
| 1449 | | - if (update_cpu_associativity_changes_mask() > 0) |
|---|
| 1450 | | - topology_schedule_update(); |
|---|
| 1451 | | - reset_topology_timer(); |
|---|
| 1452 | | - } |
|---|
| 1453 | | -} |
|---|
| 1454 | | -static struct timer_list topology_timer; |
|---|
| 1455 | | - |
|---|
| 1456 | | -static void reset_topology_timer(void) |
|---|
| 1457 | | -{ |
|---|
| 1458 | | - if (vphn_enabled) |
|---|
| 1459 | | - mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ); |
|---|
| 1460 | | -} |
|---|
| 1461 | | - |
|---|
| 1462 | | -#ifdef CONFIG_SMP |
|---|
| 1463 | | - |
|---|
| 1464 | | -static int dt_update_callback(struct notifier_block *nb, |
|---|
| 1465 | | - unsigned long action, void *data) |
|---|
| 1466 | | -{ |
|---|
| 1467 | | - struct of_reconfig_data *update = data; |
|---|
| 1468 | | - int rc = NOTIFY_DONE; |
|---|
| 1469 | | - |
|---|
| 1470 | | - switch (action) { |
|---|
| 1471 | | - case OF_RECONFIG_UPDATE_PROPERTY: |
|---|
| 1472 | | - if (!of_prop_cmp(update->dn->type, "cpu") && |
|---|
| 1473 | | - !of_prop_cmp(update->prop->name, "ibm,associativity")) { |
|---|
| 1474 | | - u32 core_id; |
|---|
| 1475 | | - of_property_read_u32(update->dn, "reg", &core_id); |
|---|
| 1476 | | - rc = dlpar_cpu_readd(core_id); |
|---|
| 1477 | | - rc = NOTIFY_OK; |
|---|
| 1478 | | - } |
|---|
| 1479 | | - break; |
|---|
| 1480 | | - } |
|---|
| 1481 | | - |
|---|
| 1482 | | - return rc; |
|---|
| 1483 | | -} |
|---|
| 1484 | | - |
|---|
| 1485 | | -static struct notifier_block dt_update_nb = { |
|---|
| 1486 | | - .notifier_call = dt_update_callback, |
|---|
| 1487 | | -}; |
|---|
| 1488 | | - |
|---|
| 1489 | | -#endif |
|---|
| 1490 | | - |
|---|
| 1491 | | -/* |
|---|
| 1492 | | - * Start polling for associativity changes. |
|---|
| 1493 | | - */ |
|---|
| 1494 | | -int start_topology_update(void) |
|---|
| 1495 | | -{ |
|---|
| 1496 | | - int rc = 0; |
|---|
| 1497 | | - |
|---|
| 1498 | | - if (!topology_updates_enabled) |
|---|
| 1499 | | - return 0; |
|---|
| 1500 | | - |
|---|
| 1501 | | - if (firmware_has_feature(FW_FEATURE_PRRN)) { |
|---|
| 1502 | | - if (!prrn_enabled) { |
|---|
| 1503 | | - prrn_enabled = 1; |
|---|
| 1504 | | -#ifdef CONFIG_SMP |
|---|
| 1505 | | - rc = of_reconfig_notifier_register(&dt_update_nb); |
|---|
| 1506 | | -#endif |
|---|
| 1507 | | - } |
|---|
| 1508 | | - } |
|---|
| 1509 | | - if (firmware_has_feature(FW_FEATURE_VPHN) && |
|---|
| 1510 | | - lppaca_shared_proc(get_lppaca())) { |
|---|
| 1511 | | - if (!vphn_enabled) { |
|---|
| 1512 | | - vphn_enabled = 1; |
|---|
| 1513 | | - setup_cpu_associativity_change_counters(); |
|---|
| 1514 | | - timer_setup(&topology_timer, topology_timer_fn, |
|---|
| 1515 | | - TIMER_DEFERRABLE); |
|---|
| 1516 | | - reset_topology_timer(); |
|---|
| 1517 | | - } |
|---|
| 1518 | | - } |
|---|
| 1519 | | - |
|---|
| 1520 | | - return rc; |
|---|
| 1521 | | -} |
|---|
| 1522 | | - |
|---|
| 1523 | | -/* |
|---|
| 1524 | | - * Disable polling for VPHN associativity changes. |
|---|
| 1525 | | - */ |
|---|
| 1526 | | -int stop_topology_update(void) |
|---|
| 1527 | | -{ |
|---|
| 1528 | | - int rc = 0; |
|---|
| 1529 | | - |
|---|
| 1530 | | - if (!topology_updates_enabled) |
|---|
| 1531 | | - return 0; |
|---|
| 1532 | | - |
|---|
| 1533 | | - if (prrn_enabled) { |
|---|
| 1534 | | - prrn_enabled = 0; |
|---|
| 1535 | | -#ifdef CONFIG_SMP |
|---|
| 1536 | | - rc = of_reconfig_notifier_unregister(&dt_update_nb); |
|---|
| 1537 | | -#endif |
|---|
| 1538 | | - } |
|---|
| 1539 | | - if (vphn_enabled) { |
|---|
| 1540 | | - vphn_enabled = 0; |
|---|
| 1541 | | - rc = del_timer_sync(&topology_timer); |
|---|
| 1542 | | - } |
|---|
| 1543 | | - |
|---|
| 1544 | | - return rc; |
|---|
| 1545 | | -} |
|---|
| 1546 | | - |
|---|
| 1547 | | -int prrn_is_enabled(void) |
|---|
| 1548 | | -{ |
|---|
| 1549 | | - return prrn_enabled; |
|---|
| 1550 | | -} |
|---|
| 1551 | | - |
|---|
| 1552 | | -void __init shared_proc_topology_init(void) |
|---|
| 1553 | | -{ |
|---|
| 1554 | | - if (lppaca_shared_proc(get_lppaca())) { |
|---|
| 1555 | | - bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask), |
|---|
| 1556 | | - nr_cpumask_bits); |
|---|
| 1557 | | - numa_update_cpu_topology(false); |
|---|
| 1558 | | - } |
|---|
| 1559 | | -} |
|---|
| 1560 | | - |
|---|
| 1561 | | -static int topology_read(struct seq_file *file, void *v) |
|---|
| 1562 | | -{ |
|---|
| 1563 | | - if (vphn_enabled || prrn_enabled) |
|---|
| 1564 | | - seq_puts(file, "on\n"); |
|---|
| 1565 | | - else |
|---|
| 1566 | | - seq_puts(file, "off\n"); |
|---|
| 1567 | | - |
|---|
| 1568 | | - return 0; |
|---|
| 1569 | | -} |
|---|
| 1570 | | - |
|---|
| 1571 | | -static int topology_open(struct inode *inode, struct file *file) |
|---|
| 1572 | | -{ |
|---|
| 1573 | | - return single_open(file, topology_read, NULL); |
|---|
| 1574 | | -} |
|---|
| 1575 | | - |
|---|
| 1576 | | -static ssize_t topology_write(struct file *file, const char __user *buf, |
|---|
| 1577 | | - size_t count, loff_t *off) |
|---|
| 1578 | | -{ |
|---|
| 1579 | | - char kbuf[4]; /* "on" or "off" plus null. */ |
|---|
| 1580 | | - int read_len; |
|---|
| 1581 | | - |
|---|
| 1582 | | - read_len = count < 3 ? count : 3; |
|---|
| 1583 | | - if (copy_from_user(kbuf, buf, read_len)) |
|---|
| 1584 | | - return -EINVAL; |
|---|
| 1585 | | - |
|---|
| 1586 | | - kbuf[read_len] = '\0'; |
|---|
| 1587 | | - |
|---|
| 1588 | | - if (!strncmp(kbuf, "on", 2)) { |
|---|
| 1589 | | - topology_updates_enabled = true; |
|---|
| 1590 | | - start_topology_update(); |
|---|
| 1591 | | - } else if (!strncmp(kbuf, "off", 3)) { |
|---|
| 1592 | | - stop_topology_update(); |
|---|
| 1593 | | - topology_updates_enabled = false; |
|---|
| 1594 | | - } else |
|---|
| 1595 | | - return -EINVAL; |
|---|
| 1596 | | - |
|---|
| 1597 | | - return count; |
|---|
| 1598 | | -} |
|---|
| 1599 | | - |
|---|
| 1600 | | -static const struct file_operations topology_ops = { |
|---|
| 1601 | | - .read = seq_read, |
|---|
| 1602 | | - .write = topology_write, |
|---|
| 1603 | | - .open = topology_open, |
|---|
| 1604 | | - .release = single_release |
|---|
| 1605 | | -}; |
|---|
| 1606 | 1277 | |
|---|
| 1607 | 1278 | static int topology_update_init(void) |
|---|
| 1608 | 1279 | { |
|---|
| 1609 | | - start_topology_update(); |
|---|
| 1610 | | - |
|---|
| 1611 | | - if (vphn_enabled) |
|---|
| 1612 | | - topology_schedule_update(); |
|---|
| 1613 | | - |
|---|
| 1614 | | - if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops)) |
|---|
| 1615 | | - return -ENOMEM; |
|---|
| 1616 | | - |
|---|
| 1617 | 1280 | topology_inited = 1; |
|---|
| 1618 | 1281 | return 0; |
|---|
| 1619 | 1282 | } |
|---|