From cf4ce59b3b70238352c7f1729f0f7223214828ad Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Fri, 20 Sep 2024 01:46:19 +0000 Subject: [PATCH] rtl88x2CE_WiFi_linux add concurrent mode --- kernel/arch/powerpc/kernel/setup_64.c | 286 ++++++++++++++++++++++++++++++++++++++------------------ 1 files changed, 193 insertions(+), 93 deletions(-) diff --git a/kernel/arch/powerpc/kernel/setup_64.c b/kernel/arch/powerpc/kernel/setup_64.c index 1223656..3f8426b 100644 --- a/kernel/arch/powerpc/kernel/setup_64.c +++ b/kernel/arch/powerpc/kernel/setup_64.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * * Common boot and setup code. * * Copyright (C) 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/export.h> @@ -29,19 +25,18 @@ #include <linux/unistd.h> #include <linux/serial.h> #include <linux/serial_8250.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/pci.h> #include <linux/lockdep.h> -#include <linux/memblock.h> #include <linux/memory.h> #include <linux/nmi.h> +#include <linux/pgtable.h> #include <asm/debugfs.h> #include <asm/io.h> #include <asm/kdump.h> #include <asm/prom.h> #include <asm/processor.h> -#include <asm/pgtable.h> #include <asm/smp.h> #include <asm/elf.h> #include <asm/machdep.h> @@ -69,14 +64,11 @@ #include <asm/cputhreads.h> #include <asm/hw_irq.h> #include <asm/feature-fixups.h> +#include <asm/kup.h> +#include <asm/early_ioremap.h> +#include <asm/pgalloc.h> #include "setup.h" - -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif int spinning_secondaries; u64 ppc64_pft_size; @@ -205,7 +197,10 @@ /* Under a PAPR hypervisor, we need hypercalls */ if (firmware_has_feature(FW_FEATURE_SET_MODE)) { /* Enable AIL if possible */ - pseries_enable_reloc_on_exc(); + if (!pseries_enable_reloc_on_exc()) { + init_task.thread.fscr &= ~FSCR_SCV; + cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV; + } /* * Tell the hypervisor that we want our exceptions to @@ -236,10 +231,23 @@ * If we are not in hypervisor mode the job is done once for * the whole partition in configure_exceptions(). */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) { + if (cpu_has_feature(CPU_FTR_HVMODE)) { unsigned long lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + unsigned long new_lpcr = lpcr; + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + /* P10 DD1 does not have HAIL */ + if (pvr_version_is(PVR_POWER10) && + (mfspr(SPRN_PVR) & 0xf00) == 0x100) + new_lpcr |= LPCR_AIL_3; + else + new_lpcr |= LPCR_HAIL; + } else if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + new_lpcr |= LPCR_AIL_3; + } + + if (new_lpcr != lpcr) + mtspr(SPRN_LPCR, new_lpcr); } /* @@ -294,22 +302,40 @@ /* -------- printk is _NOT_ safe to use here ! ------- */ - /* Try new device tree based feature discovery ... */ - if (!dt_cpu_ftrs_init(__va(dt_ptr))) - /* Otherwise use the old style CPU table */ - identify_cpu(0, mfspr(SPRN_PVR)); - - /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ + /* + * Assume we're on cpu 0 for now. + * + * We need to load a PACA very early for a few reasons. + * + * The stack protector canary is stored in the paca, so as soon as we + * call any stack protected code we need r13 pointing somewhere valid. + * + * If we are using kcov it will call in_task() in its instrumentation, + * which relies on the current task from the PACA. + * + * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as + * printk(), which can trigger both stack protector and kcov. + * + * percpu variables and spin locks also use the paca. + * + * So set up a temporary paca. It will be replaced below once we know + * what CPU we are on. + */ initialise_paca(&boot_paca, 0); setup_paca(&boot_paca); fixup_boot_paca(); /* -------- printk is now safe to use ------- */ + /* Try new device tree based feature discovery ... */ + if (!dt_cpu_ftrs_init(__va(dt_ptr))) + /* Otherwise use the old style CPU table */ + identify_cpu(0, mfspr(SPRN_PVR)); + /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr); + udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr); /* * Do early initialization using the flattened device @@ -332,12 +358,20 @@ */ configure_exceptions(); + /* + * Configure Kernel Userspace Protection. This needs to happen before + * feature fixups for platforms that implement this using features. + */ + setup_kup(); + /* Apply all the dynamic patching */ apply_feature_fixups(); setup_feature_keys(); /* Initialize the hash table or TLB handling */ early_init_mmu(); + + early_ioremap_setup(); /* * After firmware and early platform setup code has set things up, @@ -360,11 +394,11 @@ */ this_cpu_enable_ftrace(); - DBG(" <- early_setup()\n"); + udbg_printf(" <- %s()\n", __func__); #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX /* - * This needs to be done *last* (after the above DBG() even) + * This needs to be done *last* (after the above udbg_printf() even) * * Right after we return from this function, we turn on the MMU * which means the real-mode access trick that btext does will @@ -383,6 +417,9 @@ /* Initialize the hash table or TLB handling */ early_init_mmu_secondary(); + + /* Perform any KUP setup that is per-cpu */ + setup_kup(); /* * At this point, we can let interrupts switch to virtual mode @@ -431,8 +468,6 @@ if (!use_spinloop()) return; - DBG(" -> smp_release_cpus()\n"); - /* All secondary cpus are spinning on a common spinloop, release them * all now so they can start to spin on their individual paca * spinloops. For non SMP kernels, the secondary cpus never get out @@ -451,9 +486,7 @@ break; udelay(1); } - DBG("spinning_secondaries = %d\n", spinning_secondaries); - - DBG(" <- smp_release_cpus()\n"); + pr_debug("spinning_secondaries = %d\n", spinning_secondaries); } #endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */ @@ -548,8 +581,6 @@ struct device_node *cpu = NULL, *l2, *l3 = NULL; u32 pvr; - DBG(" -> initialize_cache_info()\n"); - /* * All shipping POWER8 machines have a firmware bug that * puts incorrect information in the device-tree. This will @@ -573,10 +604,10 @@ */ if (cpu) { if (!parse_cache_info(cpu, false, &ppc64_caches.l1d)) - DBG("Argh, can't find dcache properties !\n"); + pr_warn("Argh, can't find dcache properties !\n"); if (!parse_cache_info(cpu, true, &ppc64_caches.l1i)) - DBG("Argh, can't find icache properties !\n"); + pr_warn("Argh, can't find icache properties !\n"); /* * Try to find the L2 and L3 if any. Assume they are @@ -601,8 +632,6 @@ cur_cpu_spec->dcache_bsize = dcache_bsize; cur_cpu_spec->icache_bsize = icache_bsize; - - DBG(" <- initialize_cache_info()\n"); } /* @@ -637,17 +666,17 @@ static void *__init alloc_stack(unsigned long limit, int cpu) { - unsigned long pa; + void *ptr; - pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit, - early_cpu_to_node(cpu), MEMBLOCK_NONE); - if (!pa) { - pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); - if (!pa) - panic("cannot allocate stacks"); - } + BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16); - return __va(pa); + ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_ALIGN, + MEMBLOCK_LOW_LIMIT, limit, + early_cpu_to_node(cpu)); + if (!ptr) + panic("cannot allocate stacks"); + + return ptr; } void __init irqstack_early_init(void) @@ -693,31 +722,13 @@ #endif /* - * Emergency stacks are used for a range of things, from asynchronous - * NMIs (system reset, machine check) to synchronous, process context. - * We set preempt_count to zero, even though that isn't necessarily correct. To - * get the right value we'd need to copy it from the previous thread_info, but - * doing that might fault causing more problems. - * TODO: what to do with accounting? - */ -static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu) -{ - ti->task = NULL; - ti->cpu = cpu; - ti->preempt_count = 0; - ti->local_flags = 0; - ti->flags = 0; - klp_init_thread_info(ti); -} - -/* * Stack space used when we detect a bad kernel stack pointer, and * early in SMP boots before relocation is enabled. Exclusive emergency * stack for machine checks. */ void __init emergency_stack_init(void) { - u64 limit; + u64 limit, mce_limit; unsigned int i; /* @@ -734,44 +745,73 @@ * initialized in kernel/irq.c. These are initialized here in order * to have emergency stacks available as early as possible. */ - limit = min(ppc64_bolted_size(), ppc64_rma_size); + limit = mce_limit = min(ppc64_bolted_size(), ppc64_rma_size); + + /* + * Machine check on pseries calls rtas, but can't use the static + * rtas_args due to a machine check hitting while the lock is held. + * rtas args have to be under 4GB, so the machine check stack is + * limited to 4GB so args can be put on stack. + */ + if (firmware_has_feature(FW_FEATURE_LPAR) && mce_limit > SZ_4G) + mce_limit = SZ_4G; for_each_possible_cpu(i) { - struct thread_info *ti; - - ti = alloc_stack(limit, i); - memset(ti, 0, THREAD_SIZE); - emerg_stack_init_thread_info(ti, i); - paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE; + paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 /* emergency stack for NMI exception handling. */ - ti = alloc_stack(limit, i); - memset(ti, 0, THREAD_SIZE); - emerg_stack_init_thread_info(ti, i); - paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE; + paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; /* emergency stack for machine check exception handling. */ - ti = alloc_stack(limit, i); - memset(ti, 0, THREAD_SIZE); - emerg_stack_init_thread_info(ti, i); - paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE; + paca_ptrs[i]->mc_emergency_sp = alloc_stack(mce_limit, i) + THREAD_SIZE; #endif } } #ifdef CONFIG_SMP -#define PCPU_DYN_SIZE () - -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) +/** + * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu + * @cpu: cpu to allocate for + * @size: size allocation in bytes + * @align: alignment + * + * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper + * does the right thing for NUMA regardless of the current + * configuration. + * + * RETURNS: + * Pointer to the allocated area on success, NULL on failure. + */ +static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, + size_t align) { - return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align, - __pa(MAX_DMA_ADDRESS)); + const unsigned long goal = __pa(MAX_DMA_ADDRESS); +#ifdef CONFIG_NEED_MULTIPLE_NODES + int node = early_cpu_to_node(cpu); + void *ptr; + + if (!node_online(node) || !NODE_DATA(node)) { + ptr = memblock_alloc_from(size, align, goal); + pr_info("cpu %d has no node %d or node-local memory\n", + cpu, node); + pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", + cpu, size, __pa(ptr)); + } else { + ptr = memblock_alloc_try_nid(size, align, goal, + MEMBLOCK_ALLOC_ACCESSIBLE, node); + pr_debug("per cpu data for cpu%d %lu bytes on node%d at " + "%016lx\n", cpu, size, node, __pa(ptr)); + } + return ptr; +#else + return memblock_alloc_from(size, align, goal); +#endif } -static void __init pcpu_fc_free(void *ptr, size_t size) +static void __init pcpu_free_bootmem(void *ptr, size_t size) { - free_bootmem(__pa(ptr), size); + memblock_free(__pa(ptr), size); } static int pcpu_cpu_distance(unsigned int from, unsigned int to) @@ -785,13 +825,58 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); +static void __init pcpu_populate_pte(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + pud_t *new; + + new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); + if (!new) + goto err_alloc; + p4d_populate(&init_mm, p4d, new); + } + + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + pmd_t *new; + + new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); + if (!new) + goto err_alloc; + pud_populate(&init_mm, pud, new); + } + + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) { + pte_t *new; + + new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); + if (!new) + goto err_alloc; + pmd_populate_kernel(&init_mm, pmd, new); + } + + return; + +err_alloc: + panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); +} + + void __init setup_per_cpu_areas(void) { const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; size_t atom_size; unsigned long delta; unsigned int cpu; - int rc; + int rc = -EINVAL; /* * Linear mapping is one of 4K, 1M and 16M. For 4K, no need @@ -803,8 +888,18 @@ else atom_size = 1 << 20; - rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, - pcpu_fc_alloc, pcpu_fc_free); + if (pcpu_chosen_fc != PCPU_FC_PAGE) { + rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, + pcpu_alloc_bootmem, pcpu_free_bootmem); + if (rc) + pr_warn("PERCPU: %s allocator failed (%d), " + "falling back to page size\n", + pcpu_fc_names[pcpu_chosen_fc], rc); + } + + if (rc < 0) + rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem, + pcpu_populate_pte); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); @@ -980,8 +1075,13 @@ * hardware prefetch runoff. We don't have a recipe for load patterns to * reliably avoid the prefetcher. */ - l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); - memset(l1d_flush_fallback_area, 0, l1d_size * 2); + l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2, + l1d_size, MEMBLOCK_LOW_LIMIT, + limit, NUMA_NO_NODE); + if (!l1d_flush_fallback_area) + panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n", + __func__, l1d_size * 2, l1d_size, &limit); + for_each_possible_cpu(cpu) { struct paca_struct *paca = paca_ptrs[cpu]; -- Gitblit v1.6.2