From 95099d4622f8cb224d94e314c7a8e0df60b13f87 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Sat, 09 Dec 2023 08:38:01 +0000
Subject: [PATCH] enable docker ppp
---
kernel/arch/powerpc/kernel/setup_64.c | 286 ++++++++++++++++++++++++++++++++++++++------------------
1 files changed, 193 insertions(+), 93 deletions(-)
diff --git a/kernel/arch/powerpc/kernel/setup_64.c b/kernel/arch/powerpc/kernel/setup_64.c
index 1223656..3f8426b 100644
--- a/kernel/arch/powerpc/kernel/setup_64.c
+++ b/kernel/arch/powerpc/kernel/setup_64.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
*
* Common boot and setup code.
*
* Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/export.h>
@@ -29,19 +25,18 @@
#include <linux/unistd.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/pci.h>
#include <linux/lockdep.h>
-#include <linux/memblock.h>
#include <linux/memory.h>
#include <linux/nmi.h>
+#include <linux/pgtable.h>
#include <asm/debugfs.h>
#include <asm/io.h>
#include <asm/kdump.h>
#include <asm/prom.h>
#include <asm/processor.h>
-#include <asm/pgtable.h>
#include <asm/smp.h>
#include <asm/elf.h>
#include <asm/machdep.h>
@@ -69,14 +64,11 @@
#include <asm/cputhreads.h>
#include <asm/hw_irq.h>
#include <asm/feature-fixups.h>
+#include <asm/kup.h>
+#include <asm/early_ioremap.h>
+#include <asm/pgalloc.h>
#include "setup.h"
-
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
int spinning_secondaries;
u64 ppc64_pft_size;
@@ -205,7 +197,10 @@
/* Under a PAPR hypervisor, we need hypercalls */
if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
/* Enable AIL if possible */
- pseries_enable_reloc_on_exc();
+ if (!pseries_enable_reloc_on_exc()) {
+ init_task.thread.fscr &= ~FSCR_SCV;
+ cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
+ }
/*
* Tell the hypervisor that we want our exceptions to
@@ -236,10 +231,23 @@
* If we are not in hypervisor mode the job is done once for
* the whole partition in configure_exceptions().
*/
- if (cpu_has_feature(CPU_FTR_HVMODE) &&
- cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
unsigned long lpcr = mfspr(SPRN_LPCR);
- mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
+ unsigned long new_lpcr = lpcr;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ /* P10 DD1 does not have HAIL */
+ if (pvr_version_is(PVR_POWER10) &&
+ (mfspr(SPRN_PVR) & 0xf00) == 0x100)
+ new_lpcr |= LPCR_AIL_3;
+ else
+ new_lpcr |= LPCR_HAIL;
+ } else if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ new_lpcr |= LPCR_AIL_3;
+ }
+
+ if (new_lpcr != lpcr)
+ mtspr(SPRN_LPCR, new_lpcr);
}
/*
@@ -294,22 +302,40 @@
/* -------- printk is _NOT_ safe to use here ! ------- */
- /* Try new device tree based feature discovery ... */
- if (!dt_cpu_ftrs_init(__va(dt_ptr)))
- /* Otherwise use the old style CPU table */
- identify_cpu(0, mfspr(SPRN_PVR));
-
- /* Assume we're on cpu 0 for now. Don't write to the paca yet! */
+ /*
+ * Assume we're on cpu 0 for now.
+ *
+ * We need to load a PACA very early for a few reasons.
+ *
+ * The stack protector canary is stored in the paca, so as soon as we
+ * call any stack protected code we need r13 pointing somewhere valid.
+ *
+ * If we are using kcov it will call in_task() in its instrumentation,
+ * which relies on the current task from the PACA.
+ *
+ * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as
+ * printk(), which can trigger both stack protector and kcov.
+ *
+ * percpu variables and spin locks also use the paca.
+ *
+ * So set up a temporary paca. It will be replaced below once we know
+ * what CPU we are on.
+ */
initialise_paca(&boot_paca, 0);
setup_paca(&boot_paca);
fixup_boot_paca();
/* -------- printk is now safe to use ------- */
+ /* Try new device tree based feature discovery ... */
+ if (!dt_cpu_ftrs_init(__va(dt_ptr)))
+ /* Otherwise use the old style CPU table */
+ identify_cpu(0, mfspr(SPRN_PVR));
+
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
- DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
+ udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr);
/*
* Do early initialization using the flattened device
@@ -332,12 +358,20 @@
*/
configure_exceptions();
+ /*
+ * Configure Kernel Userspace Protection. This needs to happen before
+ * feature fixups for platforms that implement this using features.
+ */
+ setup_kup();
+
/* Apply all the dynamic patching */
apply_feature_fixups();
setup_feature_keys();
/* Initialize the hash table or TLB handling */
early_init_mmu();
+
+ early_ioremap_setup();
/*
* After firmware and early platform setup code has set things up,
@@ -360,11 +394,11 @@
*/
this_cpu_enable_ftrace();
- DBG(" <- early_setup()\n");
+ udbg_printf(" <- %s()\n", __func__);
#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
/*
- * This needs to be done *last* (after the above DBG() even)
+ * This needs to be done *last* (after the above udbg_printf() even)
*
* Right after we return from this function, we turn on the MMU
* which means the real-mode access trick that btext does will
@@ -383,6 +417,9 @@
/* Initialize the hash table or TLB handling */
early_init_mmu_secondary();
+
+ /* Perform any KUP setup that is per-cpu */
+ setup_kup();
/*
* At this point, we can let interrupts switch to virtual mode
@@ -431,8 +468,6 @@
if (!use_spinloop())
return;
- DBG(" -> smp_release_cpus()\n");
-
/* All secondary cpus are spinning on a common spinloop, release them
* all now so they can start to spin on their individual paca
* spinloops. For non SMP kernels, the secondary cpus never get out
@@ -451,9 +486,7 @@
break;
udelay(1);
}
- DBG("spinning_secondaries = %d\n", spinning_secondaries);
-
- DBG(" <- smp_release_cpus()\n");
+ pr_debug("spinning_secondaries = %d\n", spinning_secondaries);
}
#endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
@@ -548,8 +581,6 @@
struct device_node *cpu = NULL, *l2, *l3 = NULL;
u32 pvr;
- DBG(" -> initialize_cache_info()\n");
-
/*
* All shipping POWER8 machines have a firmware bug that
* puts incorrect information in the device-tree. This will
@@ -573,10 +604,10 @@
*/
if (cpu) {
if (!parse_cache_info(cpu, false, &ppc64_caches.l1d))
- DBG("Argh, can't find dcache properties !\n");
+ pr_warn("Argh, can't find dcache properties !\n");
if (!parse_cache_info(cpu, true, &ppc64_caches.l1i))
- DBG("Argh, can't find icache properties !\n");
+ pr_warn("Argh, can't find icache properties !\n");
/*
* Try to find the L2 and L3 if any. Assume they are
@@ -601,8 +632,6 @@
cur_cpu_spec->dcache_bsize = dcache_bsize;
cur_cpu_spec->icache_bsize = icache_bsize;
-
- DBG(" <- initialize_cache_info()\n");
}
/*
@@ -637,17 +666,17 @@
static void *__init alloc_stack(unsigned long limit, int cpu)
{
- unsigned long pa;
+ void *ptr;
- pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit,
- early_cpu_to_node(cpu), MEMBLOCK_NONE);
- if (!pa) {
- pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
- if (!pa)
- panic("cannot allocate stacks");
- }
+ BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
- return __va(pa);
+ ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_ALIGN,
+ MEMBLOCK_LOW_LIMIT, limit,
+ early_cpu_to_node(cpu));
+ if (!ptr)
+ panic("cannot allocate stacks");
+
+ return ptr;
}
void __init irqstack_early_init(void)
@@ -693,31 +722,13 @@
#endif
/*
- * Emergency stacks are used for a range of things, from asynchronous
- * NMIs (system reset, machine check) to synchronous, process context.
- * We set preempt_count to zero, even though that isn't necessarily correct. To
- * get the right value we'd need to copy it from the previous thread_info, but
- * doing that might fault causing more problems.
- * TODO: what to do with accounting?
- */
-static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
-{
- ti->task = NULL;
- ti->cpu = cpu;
- ti->preempt_count = 0;
- ti->local_flags = 0;
- ti->flags = 0;
- klp_init_thread_info(ti);
-}
-
-/*
* Stack space used when we detect a bad kernel stack pointer, and
* early in SMP boots before relocation is enabled. Exclusive emergency
* stack for machine checks.
*/
void __init emergency_stack_init(void)
{
- u64 limit;
+ u64 limit, mce_limit;
unsigned int i;
/*
@@ -734,44 +745,73 @@
* initialized in kernel/irq.c. These are initialized here in order
* to have emergency stacks available as early as possible.
*/
- limit = min(ppc64_bolted_size(), ppc64_rma_size);
+ limit = mce_limit = min(ppc64_bolted_size(), ppc64_rma_size);
+
+ /*
+ * Machine check on pseries calls rtas, but can't use the static
+ * rtas_args due to a machine check hitting while the lock is held.
+ * rtas args have to be under 4GB, so the machine check stack is
+ * limited to 4GB so args can be put on stack.
+ */
+ if (firmware_has_feature(FW_FEATURE_LPAR) && mce_limit > SZ_4G)
+ mce_limit = SZ_4G;
for_each_possible_cpu(i) {
- struct thread_info *ti;
-
- ti = alloc_stack(limit, i);
- memset(ti, 0, THREAD_SIZE);
- emerg_stack_init_thread_info(ti, i);
- paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
- ti = alloc_stack(limit, i);
- memset(ti, 0, THREAD_SIZE);
- emerg_stack_init_thread_info(ti, i);
- paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
- ti = alloc_stack(limit, i);
- memset(ti, 0, THREAD_SIZE);
- emerg_stack_init_thread_info(ti, i);
- paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->mc_emergency_sp = alloc_stack(mce_limit, i) + THREAD_SIZE;
#endif
}
}
#ifdef CONFIG_SMP
-#define PCPU_DYN_SIZE ()
-
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
+/**
+ * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
+ * @cpu: cpu to allocate for
+ * @size: size allocation in bytes
+ * @align: alignment
+ *
+ * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
+ * does the right thing for NUMA regardless of the current
+ * configuration.
+ *
+ * RETURNS:
+ * Pointer to the allocated area on success, NULL on failure.
+ */
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
+ size_t align)
{
- return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align,
- __pa(MAX_DMA_ADDRESS));
+ const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ int node = early_cpu_to_node(cpu);
+ void *ptr;
+
+ if (!node_online(node) || !NODE_DATA(node)) {
+ ptr = memblock_alloc_from(size, align, goal);
+ pr_info("cpu %d has no node %d or node-local memory\n",
+ cpu, node);
+ pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
+ cpu, size, __pa(ptr));
+ } else {
+ ptr = memblock_alloc_try_nid(size, align, goal,
+ MEMBLOCK_ALLOC_ACCESSIBLE, node);
+ pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
+ "%016lx\n", cpu, size, node, __pa(ptr));
+ }
+ return ptr;
+#else
+ return memblock_alloc_from(size, align, goal);
+#endif
}
-static void __init pcpu_fc_free(void *ptr, size_t size)
+static void __init pcpu_free_bootmem(void *ptr, size_t size)
{
- free_bootmem(__pa(ptr), size);
+ memblock_free(__pa(ptr), size);
}
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
@@ -785,13 +825,58 @@
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
+static void __init pcpu_populate_pte(unsigned long addr)
+{
+ pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d)) {
+ pud_t *new;
+
+ new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ p4d_populate(&init_mm, p4d, new);
+ }
+
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud)) {
+ pmd_t *new;
+
+ new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pud_populate(&init_mm, pud, new);
+ }
+
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd)) {
+ pte_t *new;
+
+ new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pmd_populate_kernel(&init_mm, pmd, new);
+ }
+
+ return;
+
+err_alloc:
+ panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+}
+
+
void __init setup_per_cpu_areas(void)
{
const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
size_t atom_size;
unsigned long delta;
unsigned int cpu;
- int rc;
+ int rc = -EINVAL;
/*
* Linear mapping is one of 4K, 1M and 16M. For 4K, no need
@@ -803,8 +888,18 @@
else
atom_size = 1 << 20;
- rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
- pcpu_fc_alloc, pcpu_fc_free);
+ if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+ rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
+ pcpu_alloc_bootmem, pcpu_free_bootmem);
+ if (rc)
+ pr_warn("PERCPU: %s allocator failed (%d), "
+ "falling back to page size\n",
+ pcpu_fc_names[pcpu_chosen_fc], rc);
+ }
+
+ if (rc < 0)
+ rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
+ pcpu_populate_pte);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
@@ -980,8 +1075,13 @@
* hardware prefetch runoff. We don't have a recipe for load patterns to
* reliably avoid the prefetcher.
*/
- l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
- memset(l1d_flush_fallback_area, 0, l1d_size * 2);
+ l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2,
+ l1d_size, MEMBLOCK_LOW_LIMIT,
+ limit, NUMA_NO_NODE);
+ if (!l1d_flush_fallback_area)
+ panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n",
+ __func__, l1d_size * 2, l1d_size, &limit);
+
for_each_possible_cpu(cpu) {
struct paca_struct *paca = paca_ptrs[cpu];
--
Gitblit v1.6.2