| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * |
|---|
| 3 | 4 | * Common boot and setup code. |
|---|
| 4 | 5 | * |
|---|
| 5 | 6 | * Copyright (C) 2001 PPC64 Team, IBM Corp |
|---|
| 6 | | - * |
|---|
| 7 | | - * This program is free software; you can redistribute it and/or |
|---|
| 8 | | - * modify it under the terms of the GNU General Public License |
|---|
| 9 | | - * as published by the Free Software Foundation; either version |
|---|
| 10 | | - * 2 of the License, or (at your option) any later version. |
|---|
| 11 | 7 | */ |
|---|
| 12 | 8 | |
|---|
| 13 | 9 | #include <linux/export.h> |
|---|
| .. | .. |
|---|
| 29 | 25 | #include <linux/unistd.h> |
|---|
| 30 | 26 | #include <linux/serial.h> |
|---|
| 31 | 27 | #include <linux/serial_8250.h> |
|---|
| 32 | | -#include <linux/bootmem.h> |
|---|
| 28 | +#include <linux/memblock.h> |
|---|
| 33 | 29 | #include <linux/pci.h> |
|---|
| 34 | 30 | #include <linux/lockdep.h> |
|---|
| 35 | | -#include <linux/memblock.h> |
|---|
| 36 | 31 | #include <linux/memory.h> |
|---|
| 37 | 32 | #include <linux/nmi.h> |
|---|
| 33 | +#include <linux/pgtable.h> |
|---|
| 38 | 34 | |
|---|
| 39 | 35 | #include <asm/debugfs.h> |
|---|
| 40 | 36 | #include <asm/io.h> |
|---|
| 41 | 37 | #include <asm/kdump.h> |
|---|
| 42 | 38 | #include <asm/prom.h> |
|---|
| 43 | 39 | #include <asm/processor.h> |
|---|
| 44 | | -#include <asm/pgtable.h> |
|---|
| 45 | 40 | #include <asm/smp.h> |
|---|
| 46 | 41 | #include <asm/elf.h> |
|---|
| 47 | 42 | #include <asm/machdep.h> |
|---|
| .. | .. |
|---|
| 69 | 64 | #include <asm/cputhreads.h> |
|---|
| 70 | 65 | #include <asm/hw_irq.h> |
|---|
| 71 | 66 | #include <asm/feature-fixups.h> |
|---|
| 67 | +#include <asm/kup.h> |
|---|
| 68 | +#include <asm/early_ioremap.h> |
|---|
| 69 | +#include <asm/pgalloc.h> |
|---|
| 72 | 70 | |
|---|
| 73 | 71 | #include "setup.h" |
|---|
| 74 | | - |
|---|
| 75 | | -#ifdef DEBUG |
|---|
| 76 | | -#define DBG(fmt...) udbg_printf(fmt) |
|---|
| 77 | | -#else |
|---|
| 78 | | -#define DBG(fmt...) |
|---|
| 79 | | -#endif |
|---|
| 80 | 72 | |
|---|
| 81 | 73 | int spinning_secondaries; |
|---|
| 82 | 74 | u64 ppc64_pft_size; |
|---|
| .. | .. |
|---|
| 205 | 197 | /* Under a PAPR hypervisor, we need hypercalls */ |
|---|
| 206 | 198 | if (firmware_has_feature(FW_FEATURE_SET_MODE)) { |
|---|
| 207 | 199 | /* Enable AIL if possible */ |
|---|
| 208 | | - pseries_enable_reloc_on_exc(); |
|---|
| 200 | + if (!pseries_enable_reloc_on_exc()) { |
|---|
| 201 | + init_task.thread.fscr &= ~FSCR_SCV; |
|---|
| 202 | + cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV; |
|---|
| 203 | + } |
|---|
| 209 | 204 | |
|---|
| 210 | 205 | /* |
|---|
| 211 | 206 | * Tell the hypervisor that we want our exceptions to |
|---|
| .. | .. |
|---|
| 236 | 231 | * If we are not in hypervisor mode the job is done once for |
|---|
| 237 | 232 | * the whole partition in configure_exceptions(). |
|---|
| 238 | 233 | */ |
|---|
| 239 | | - if (cpu_has_feature(CPU_FTR_HVMODE) && |
|---|
| 240 | | - cpu_has_feature(CPU_FTR_ARCH_207S)) { |
|---|
| 234 | + if (cpu_has_feature(CPU_FTR_HVMODE)) { |
|---|
| 241 | 235 | unsigned long lpcr = mfspr(SPRN_LPCR); |
|---|
| 242 | | - mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); |
|---|
| 236 | + unsigned long new_lpcr = lpcr; |
|---|
| 237 | + |
|---|
| 238 | + if (cpu_has_feature(CPU_FTR_ARCH_31)) { |
|---|
| 239 | + /* P10 DD1 does not have HAIL */ |
|---|
| 240 | + if (pvr_version_is(PVR_POWER10) && |
|---|
| 241 | + (mfspr(SPRN_PVR) & 0xf00) == 0x100) |
|---|
| 242 | + new_lpcr |= LPCR_AIL_3; |
|---|
| 243 | + else |
|---|
| 244 | + new_lpcr |= LPCR_HAIL; |
|---|
| 245 | + } else if (cpu_has_feature(CPU_FTR_ARCH_207S)) { |
|---|
| 246 | + new_lpcr |= LPCR_AIL_3; |
|---|
| 247 | + } |
|---|
| 248 | + |
|---|
| 249 | + if (new_lpcr != lpcr) |
|---|
| 250 | + mtspr(SPRN_LPCR, new_lpcr); |
|---|
| 243 | 251 | } |
|---|
| 244 | 252 | |
|---|
| 245 | 253 | /* |
|---|
| .. | .. |
|---|
| 294 | 302 | |
|---|
| 295 | 303 | /* -------- printk is _NOT_ safe to use here ! ------- */ |
|---|
| 296 | 304 | |
|---|
| 297 | | - /* Try new device tree based feature discovery ... */ |
|---|
| 298 | | - if (!dt_cpu_ftrs_init(__va(dt_ptr))) |
|---|
| 299 | | - /* Otherwise use the old style CPU table */ |
|---|
| 300 | | - identify_cpu(0, mfspr(SPRN_PVR)); |
|---|
| 301 | | - |
|---|
| 302 | | - /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ |
|---|
| 305 | + /* |
|---|
| 306 | + * Assume we're on cpu 0 for now. |
|---|
| 307 | + * |
|---|
| 308 | + * We need to load a PACA very early for a few reasons. |
|---|
| 309 | + * |
|---|
| 310 | + * The stack protector canary is stored in the paca, so as soon as we |
|---|
| 311 | + * call any stack protected code we need r13 pointing somewhere valid. |
|---|
| 312 | + * |
|---|
| 313 | + * If we are using kcov it will call in_task() in its instrumentation, |
|---|
| 314 | + * which relies on the current task from the PACA. |
|---|
| 315 | + * |
|---|
| 316 | + * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as |
|---|
| 317 | + * printk(), which can trigger both stack protector and kcov. |
|---|
| 318 | + * |
|---|
| 319 | + * percpu variables and spin locks also use the paca. |
|---|
| 320 | + * |
|---|
| 321 | + * So set up a temporary paca. It will be replaced below once we know |
|---|
| 322 | + * what CPU we are on. |
|---|
| 323 | + */ |
|---|
| 303 | 324 | initialise_paca(&boot_paca, 0); |
|---|
| 304 | 325 | setup_paca(&boot_paca); |
|---|
| 305 | 326 | fixup_boot_paca(); |
|---|
| 306 | 327 | |
|---|
| 307 | 328 | /* -------- printk is now safe to use ------- */ |
|---|
| 308 | 329 | |
|---|
| 330 | + /* Try new device tree based feature discovery ... */ |
|---|
| 331 | + if (!dt_cpu_ftrs_init(__va(dt_ptr))) |
|---|
| 332 | + /* Otherwise use the old style CPU table */ |
|---|
| 333 | + identify_cpu(0, mfspr(SPRN_PVR)); |
|---|
| 334 | + |
|---|
| 309 | 335 | /* Enable early debugging if any specified (see udbg.h) */ |
|---|
| 310 | 336 | udbg_early_init(); |
|---|
| 311 | 337 | |
|---|
| 312 | | - DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr); |
|---|
| 338 | + udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr); |
|---|
| 313 | 339 | |
|---|
| 314 | 340 | /* |
|---|
| 315 | 341 | * Do early initialization using the flattened device |
|---|
| .. | .. |
|---|
| 332 | 358 | */ |
|---|
| 333 | 359 | configure_exceptions(); |
|---|
| 334 | 360 | |
|---|
| 361 | + /* |
|---|
| 362 | + * Configure Kernel Userspace Protection. This needs to happen before |
|---|
| 363 | + * feature fixups for platforms that implement this using features. |
|---|
| 364 | + */ |
|---|
| 365 | + setup_kup(); |
|---|
| 366 | + |
|---|
| 335 | 367 | /* Apply all the dynamic patching */ |
|---|
| 336 | 368 | apply_feature_fixups(); |
|---|
| 337 | 369 | setup_feature_keys(); |
|---|
| 338 | 370 | |
|---|
| 339 | 371 | /* Initialize the hash table or TLB handling */ |
|---|
| 340 | 372 | early_init_mmu(); |
|---|
| 373 | + |
|---|
| 374 | + early_ioremap_setup(); |
|---|
| 341 | 375 | |
|---|
| 342 | 376 | /* |
|---|
| 343 | 377 | * After firmware and early platform setup code has set things up, |
|---|
| .. | .. |
|---|
| 360 | 394 | */ |
|---|
| 361 | 395 | this_cpu_enable_ftrace(); |
|---|
| 362 | 396 | |
|---|
| 363 | | - DBG(" <- early_setup()\n"); |
|---|
| 397 | + udbg_printf(" <- %s()\n", __func__); |
|---|
| 364 | 398 | |
|---|
| 365 | 399 | #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX |
|---|
| 366 | 400 | /* |
|---|
| 367 | | - * This needs to be done *last* (after the above DBG() even) |
|---|
| 401 | + * This needs to be done *last* (after the above udbg_printf() even) |
|---|
| 368 | 402 | * |
|---|
| 369 | 403 | * Right after we return from this function, we turn on the MMU |
|---|
| 370 | 404 | * which means the real-mode access trick that btext does will |
|---|
| .. | .. |
|---|
| 383 | 417 | |
|---|
| 384 | 418 | /* Initialize the hash table or TLB handling */ |
|---|
| 385 | 419 | early_init_mmu_secondary(); |
|---|
| 420 | + |
|---|
| 421 | + /* Perform any KUP setup that is per-cpu */ |
|---|
| 422 | + setup_kup(); |
|---|
| 386 | 423 | |
|---|
| 387 | 424 | /* |
|---|
| 388 | 425 | * At this point, we can let interrupts switch to virtual mode |
|---|
| .. | .. |
|---|
| 431 | 468 | if (!use_spinloop()) |
|---|
| 432 | 469 | return; |
|---|
| 433 | 470 | |
|---|
| 434 | | - DBG(" -> smp_release_cpus()\n"); |
|---|
| 435 | | - |
|---|
| 436 | 471 | /* All secondary cpus are spinning on a common spinloop, release them |
|---|
| 437 | 472 | * all now so they can start to spin on their individual paca |
|---|
| 438 | 473 | * spinloops. For non SMP kernels, the secondary cpus never get out |
|---|
| .. | .. |
|---|
| 451 | 486 | break; |
|---|
| 452 | 487 | udelay(1); |
|---|
| 453 | 488 | } |
|---|
| 454 | | - DBG("spinning_secondaries = %d\n", spinning_secondaries); |
|---|
| 455 | | - |
|---|
| 456 | | - DBG(" <- smp_release_cpus()\n"); |
|---|
| 489 | + pr_debug("spinning_secondaries = %d\n", spinning_secondaries); |
|---|
| 457 | 490 | } |
|---|
| 458 | 491 | #endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */ |
|---|
| 459 | 492 | |
|---|
| .. | .. |
|---|
| 548 | 581 | struct device_node *cpu = NULL, *l2, *l3 = NULL; |
|---|
| 549 | 582 | u32 pvr; |
|---|
| 550 | 583 | |
|---|
| 551 | | - DBG(" -> initialize_cache_info()\n"); |
|---|
| 552 | | - |
|---|
| 553 | 584 | /* |
|---|
| 554 | 585 | * All shipping POWER8 machines have a firmware bug that |
|---|
| 555 | 586 | * puts incorrect information in the device-tree. This will |
|---|
| .. | .. |
|---|
| 573 | 604 | */ |
|---|
| 574 | 605 | if (cpu) { |
|---|
| 575 | 606 | if (!parse_cache_info(cpu, false, &ppc64_caches.l1d)) |
|---|
| 576 | | - DBG("Argh, can't find dcache properties !\n"); |
|---|
| 607 | + pr_warn("Argh, can't find dcache properties !\n"); |
|---|
| 577 | 608 | |
|---|
| 578 | 609 | if (!parse_cache_info(cpu, true, &ppc64_caches.l1i)) |
|---|
| 579 | | - DBG("Argh, can't find icache properties !\n"); |
|---|
| 610 | + pr_warn("Argh, can't find icache properties !\n"); |
|---|
| 580 | 611 | |
|---|
| 581 | 612 | /* |
|---|
| 582 | 613 | * Try to find the L2 and L3 if any. Assume they are |
|---|
| .. | .. |
|---|
| 601 | 632 | |
|---|
| 602 | 633 | cur_cpu_spec->dcache_bsize = dcache_bsize; |
|---|
| 603 | 634 | cur_cpu_spec->icache_bsize = icache_bsize; |
|---|
| 604 | | - |
|---|
| 605 | | - DBG(" <- initialize_cache_info()\n"); |
|---|
| 606 | 635 | } |
|---|
| 607 | 636 | |
|---|
| 608 | 637 | /* |
|---|
| .. | .. |
|---|
| 637 | 666 | |
|---|
| 638 | 667 | static void *__init alloc_stack(unsigned long limit, int cpu) |
|---|
| 639 | 668 | { |
|---|
| 640 | | - unsigned long pa; |
|---|
| 669 | + void *ptr; |
|---|
| 641 | 670 | |
|---|
| 642 | | - pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit, |
|---|
| 643 | | - early_cpu_to_node(cpu), MEMBLOCK_NONE); |
|---|
| 644 | | - if (!pa) { |
|---|
| 645 | | - pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); |
|---|
| 646 | | - if (!pa) |
|---|
| 647 | | - panic("cannot allocate stacks"); |
|---|
| 648 | | - } |
|---|
| 671 | + BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16); |
|---|
| 649 | 672 | |
|---|
| 650 | | - return __va(pa); |
|---|
| 673 | + ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_ALIGN, |
|---|
| 674 | + MEMBLOCK_LOW_LIMIT, limit, |
|---|
| 675 | + early_cpu_to_node(cpu)); |
|---|
| 676 | + if (!ptr) |
|---|
| 677 | + panic("cannot allocate stacks"); |
|---|
| 678 | + |
|---|
| 679 | + return ptr; |
|---|
| 651 | 680 | } |
|---|
| 652 | 681 | |
|---|
| 653 | 682 | void __init irqstack_early_init(void) |
|---|
| .. | .. |
|---|
| 693 | 722 | #endif |
|---|
| 694 | 723 | |
|---|
| 695 | 724 | /* |
|---|
| 696 | | - * Emergency stacks are used for a range of things, from asynchronous |
|---|
| 697 | | - * NMIs (system reset, machine check) to synchronous, process context. |
|---|
| 698 | | - * We set preempt_count to zero, even though that isn't necessarily correct. To |
|---|
| 699 | | - * get the right value we'd need to copy it from the previous thread_info, but |
|---|
| 700 | | - * doing that might fault causing more problems. |
|---|
| 701 | | - * TODO: what to do with accounting? |
|---|
| 702 | | - */ |
|---|
| 703 | | -static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu) |
|---|
| 704 | | -{ |
|---|
| 705 | | - ti->task = NULL; |
|---|
| 706 | | - ti->cpu = cpu; |
|---|
| 707 | | - ti->preempt_count = 0; |
|---|
| 708 | | - ti->local_flags = 0; |
|---|
| 709 | | - ti->flags = 0; |
|---|
| 710 | | - klp_init_thread_info(ti); |
|---|
| 711 | | -} |
|---|
| 712 | | - |
|---|
| 713 | | -/* |
|---|
| 714 | 725 | * Stack space used when we detect a bad kernel stack pointer, and |
|---|
| 715 | 726 | * early in SMP boots before relocation is enabled. Exclusive emergency |
|---|
| 716 | 727 | * stack for machine checks. |
|---|
| 717 | 728 | */ |
|---|
| 718 | 729 | void __init emergency_stack_init(void) |
|---|
| 719 | 730 | { |
|---|
| 720 | | - u64 limit; |
|---|
| 731 | + u64 limit, mce_limit; |
|---|
| 721 | 732 | unsigned int i; |
|---|
| 722 | 733 | |
|---|
| 723 | 734 | /* |
|---|
| .. | .. |
|---|
| 734 | 745 | * initialized in kernel/irq.c. These are initialized here in order |
|---|
| 735 | 746 | * to have emergency stacks available as early as possible. |
|---|
| 736 | 747 | */ |
|---|
| 737 | | - limit = min(ppc64_bolted_size(), ppc64_rma_size); |
|---|
| 748 | + limit = mce_limit = min(ppc64_bolted_size(), ppc64_rma_size); |
|---|
| 749 | + |
|---|
| 750 | + /* |
|---|
| 751 | + * Machine check on pseries calls rtas, but can't use the static |
|---|
| 752 | + * rtas_args due to a machine check hitting while the lock is held. |
|---|
| 753 | + * rtas args have to be under 4GB, so the machine check stack is |
|---|
| 754 | + * limited to 4GB so args can be put on stack. |
|---|
| 755 | + */ |
|---|
| 756 | + if (firmware_has_feature(FW_FEATURE_LPAR) && mce_limit > SZ_4G) |
|---|
| 757 | + mce_limit = SZ_4G; |
|---|
| 738 | 758 | |
|---|
| 739 | 759 | for_each_possible_cpu(i) { |
|---|
| 740 | | - struct thread_info *ti; |
|---|
| 741 | | - |
|---|
| 742 | | - ti = alloc_stack(limit, i); |
|---|
| 743 | | - memset(ti, 0, THREAD_SIZE); |
|---|
| 744 | | - emerg_stack_init_thread_info(ti, i); |
|---|
| 745 | | - paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE; |
|---|
| 760 | + paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; |
|---|
| 746 | 761 | |
|---|
| 747 | 762 | #ifdef CONFIG_PPC_BOOK3S_64 |
|---|
| 748 | 763 | /* emergency stack for NMI exception handling. */ |
|---|
| 749 | | - ti = alloc_stack(limit, i); |
|---|
| 750 | | - memset(ti, 0, THREAD_SIZE); |
|---|
| 751 | | - emerg_stack_init_thread_info(ti, i); |
|---|
| 752 | | - paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE; |
|---|
| 764 | + paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; |
|---|
| 753 | 765 | |
|---|
| 754 | 766 | /* emergency stack for machine check exception handling. */ |
|---|
| 755 | | - ti = alloc_stack(limit, i); |
|---|
| 756 | | - memset(ti, 0, THREAD_SIZE); |
|---|
| 757 | | - emerg_stack_init_thread_info(ti, i); |
|---|
| 758 | | - paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE; |
|---|
| 767 | + paca_ptrs[i]->mc_emergency_sp = alloc_stack(mce_limit, i) + THREAD_SIZE; |
|---|
| 759 | 768 | #endif |
|---|
| 760 | 769 | } |
|---|
| 761 | 770 | } |
|---|
| 762 | 771 | |
|---|
| 763 | 772 | #ifdef CONFIG_SMP |
|---|
| 764 | | -#define PCPU_DYN_SIZE () |
|---|
| 765 | | - |
|---|
| 766 | | -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) |
|---|
| 773 | +/** |
|---|
| 774 | + * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu |
|---|
| 775 | + * @cpu: cpu to allocate for |
|---|
| 776 | + * @size: size allocation in bytes |
|---|
| 777 | + * @align: alignment |
|---|
| 778 | + * |
|---|
| 779 | + * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper |
|---|
| 780 | + * does the right thing for NUMA regardless of the current |
|---|
| 781 | + * configuration. |
|---|
| 782 | + * |
|---|
| 783 | + * RETURNS: |
|---|
| 784 | + * Pointer to the allocated area on success, NULL on failure. |
|---|
| 785 | + */ |
|---|
| 786 | +static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, |
|---|
| 787 | + size_t align) |
|---|
| 767 | 788 | { |
|---|
| 768 | | - return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align, |
|---|
| 769 | | - __pa(MAX_DMA_ADDRESS)); |
|---|
| 789 | + const unsigned long goal = __pa(MAX_DMA_ADDRESS); |
|---|
| 790 | +#ifdef CONFIG_NEED_MULTIPLE_NODES |
|---|
| 791 | + int node = early_cpu_to_node(cpu); |
|---|
| 792 | + void *ptr; |
|---|
| 793 | + |
|---|
| 794 | + if (!node_online(node) || !NODE_DATA(node)) { |
|---|
| 795 | + ptr = memblock_alloc_from(size, align, goal); |
|---|
| 796 | + pr_info("cpu %d has no node %d or node-local memory\n", |
|---|
| 797 | + cpu, node); |
|---|
| 798 | + pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", |
|---|
| 799 | + cpu, size, __pa(ptr)); |
|---|
| 800 | + } else { |
|---|
| 801 | + ptr = memblock_alloc_try_nid(size, align, goal, |
|---|
| 802 | + MEMBLOCK_ALLOC_ACCESSIBLE, node); |
|---|
| 803 | + pr_debug("per cpu data for cpu%d %lu bytes on node%d at " |
|---|
| 804 | + "%016lx\n", cpu, size, node, __pa(ptr)); |
|---|
| 805 | + } |
|---|
| 806 | + return ptr; |
|---|
| 807 | +#else |
|---|
| 808 | + return memblock_alloc_from(size, align, goal); |
|---|
| 809 | +#endif |
|---|
| 770 | 810 | } |
|---|
| 771 | 811 | |
|---|
| 772 | | -static void __init pcpu_fc_free(void *ptr, size_t size) |
|---|
| 812 | +static void __init pcpu_free_bootmem(void *ptr, size_t size) |
|---|
| 773 | 813 | { |
|---|
| 774 | | - free_bootmem(__pa(ptr), size); |
|---|
| 814 | + memblock_free(__pa(ptr), size); |
|---|
| 775 | 815 | } |
|---|
| 776 | 816 | |
|---|
| 777 | 817 | static int pcpu_cpu_distance(unsigned int from, unsigned int to) |
|---|
| .. | .. |
|---|
| 785 | 825 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; |
|---|
| 786 | 826 | EXPORT_SYMBOL(__per_cpu_offset); |
|---|
| 787 | 827 | |
|---|
| 828 | +static void __init pcpu_populate_pte(unsigned long addr) |
|---|
| 829 | +{ |
|---|
| 830 | + pgd_t *pgd = pgd_offset_k(addr); |
|---|
| 831 | + p4d_t *p4d; |
|---|
| 832 | + pud_t *pud; |
|---|
| 833 | + pmd_t *pmd; |
|---|
| 834 | + |
|---|
| 835 | + p4d = p4d_offset(pgd, addr); |
|---|
| 836 | + if (p4d_none(*p4d)) { |
|---|
| 837 | + pud_t *new; |
|---|
| 838 | + |
|---|
| 839 | + new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); |
|---|
| 840 | + if (!new) |
|---|
| 841 | + goto err_alloc; |
|---|
| 842 | + p4d_populate(&init_mm, p4d, new); |
|---|
| 843 | + } |
|---|
| 844 | + |
|---|
| 845 | + pud = pud_offset(p4d, addr); |
|---|
| 846 | + if (pud_none(*pud)) { |
|---|
| 847 | + pmd_t *new; |
|---|
| 848 | + |
|---|
| 849 | + new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); |
|---|
| 850 | + if (!new) |
|---|
| 851 | + goto err_alloc; |
|---|
| 852 | + pud_populate(&init_mm, pud, new); |
|---|
| 853 | + } |
|---|
| 854 | + |
|---|
| 855 | + pmd = pmd_offset(pud, addr); |
|---|
| 856 | + if (!pmd_present(*pmd)) { |
|---|
| 857 | + pte_t *new; |
|---|
| 858 | + |
|---|
| 859 | + new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); |
|---|
| 860 | + if (!new) |
|---|
| 861 | + goto err_alloc; |
|---|
| 862 | + pmd_populate_kernel(&init_mm, pmd, new); |
|---|
| 863 | + } |
|---|
| 864 | + |
|---|
| 865 | + return; |
|---|
| 866 | + |
|---|
| 867 | +err_alloc: |
|---|
| 868 | + panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", |
|---|
| 869 | + __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); |
|---|
| 870 | +} |
|---|
| 871 | + |
|---|
| 872 | + |
|---|
| 788 | 873 | void __init setup_per_cpu_areas(void) |
|---|
| 789 | 874 | { |
|---|
| 790 | 875 | const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; |
|---|
| 791 | 876 | size_t atom_size; |
|---|
| 792 | 877 | unsigned long delta; |
|---|
| 793 | 878 | unsigned int cpu; |
|---|
| 794 | | - int rc; |
|---|
| 879 | + int rc = -EINVAL; |
|---|
| 795 | 880 | |
|---|
| 796 | 881 | /* |
|---|
| 797 | 882 | * Linear mapping is one of 4K, 1M and 16M. For 4K, no need |
|---|
| .. | .. |
|---|
| 803 | 888 | else |
|---|
| 804 | 889 | atom_size = 1 << 20; |
|---|
| 805 | 890 | |
|---|
| 806 | | - rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, |
|---|
| 807 | | - pcpu_fc_alloc, pcpu_fc_free); |
|---|
| 891 | + if (pcpu_chosen_fc != PCPU_FC_PAGE) { |
|---|
| 892 | + rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, |
|---|
| 893 | + pcpu_alloc_bootmem, pcpu_free_bootmem); |
|---|
| 894 | + if (rc) |
|---|
| 895 | + pr_warn("PERCPU: %s allocator failed (%d), " |
|---|
| 896 | + "falling back to page size\n", |
|---|
| 897 | + pcpu_fc_names[pcpu_chosen_fc], rc); |
|---|
| 898 | + } |
|---|
| 899 | + |
|---|
| 900 | + if (rc < 0) |
|---|
| 901 | + rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem, |
|---|
| 902 | + pcpu_populate_pte); |
|---|
| 808 | 903 | if (rc < 0) |
|---|
| 809 | 904 | panic("cannot initialize percpu area (err=%d)", rc); |
|---|
| 810 | 905 | |
|---|
| .. | .. |
|---|
| 980 | 1075 | * hardware prefetch runoff. We don't have a recipe for load patterns to |
|---|
| 981 | 1076 | * reliably avoid the prefetcher. |
|---|
| 982 | 1077 | */ |
|---|
| 983 | | - l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); |
|---|
| 984 | | - memset(l1d_flush_fallback_area, 0, l1d_size * 2); |
|---|
| 1078 | + l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2, |
|---|
| 1079 | + l1d_size, MEMBLOCK_LOW_LIMIT, |
|---|
| 1080 | + limit, NUMA_NO_NODE); |
|---|
| 1081 | + if (!l1d_flush_fallback_area) |
|---|
| 1082 | + panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n", |
|---|
| 1083 | + __func__, l1d_size * 2, l1d_size, &limit); |
|---|
| 1084 | + |
|---|
| 985 | 1085 | |
|---|
| 986 | 1086 | for_each_possible_cpu(cpu) { |
|---|
| 987 | 1087 | struct paca_struct *paca = paca_ptrs[cpu]; |
|---|