From 9370bb92b2d16684ee45cf24e879c93c509162da Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 19 Dec 2024 01:47:39 +0000
Subject: [PATCH] add wifi6 8852be driver
---
kernel/arch/powerpc/kernel/smp.c | 630 ++++++++++++++++++++++++++++++++++++++++++++++----------
1 files changed, 516 insertions(+), 114 deletions(-)
diff --git a/kernel/arch/powerpc/kernel/smp.c b/kernel/arch/powerpc/kernel/smp.c
index 60fc3c7..cf99f57 100644
--- a/kernel/arch/powerpc/kernel/smp.c
+++ b/kernel/arch/powerpc/kernel/smp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SMP support for ppc.
*
@@ -8,11 +9,6 @@
*
* PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
* Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#undef DEBUG
@@ -20,6 +16,7 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
#include <linux/sched/topology.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
@@ -34,6 +31,9 @@
#include <linux/topology.h>
#include <linux/profile.h>
#include <linux/processor.h>
+#include <linux/random.h>
+#include <linux/stackprotector.h>
+#include <linux/pgtable.h>
#include <asm/ptrace.h>
#include <linux/atomic.h>
@@ -42,7 +42,6 @@
#include <asm/kvm_ppc.h>
#include <asm/dbell.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/time.h>
@@ -60,6 +59,8 @@
#include <asm/asm-prototypes.h>
#include <asm/cpu_has_feature.h>
#include <asm/ftrace.h>
+#include <asm/kup.h>
+#include <asm/fadump.h>
#ifdef DEBUG
#include <asm/udbg.h>
@@ -73,15 +74,44 @@
static DEFINE_PER_CPU(int, cpu_state) = { 0 };
#endif
-struct thread_info *secondary_ti;
+struct task_struct *secondary_current;
+bool has_big_cores;
+bool coregroup_enabled;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+EXPORT_SYMBOL_GPL(has_big_cores);
+
+enum {
+#ifdef CONFIG_SCHED_SMT
+ smt_idx,
+#endif
+ cache_idx,
+ mc_idx,
+ die_idx,
+};
+
+#define MAX_THREAD_LIST_SIZE 8
+#define THREAD_GROUP_SHARE_L1 1
+struct thread_groups {
+ unsigned int property;
+ unsigned int nr_groups;
+ unsigned int threads_per_group;
+ unsigned int thread_list[MAX_THREAD_LIST_SIZE];
+};
+
+/*
+ * On big-cores system, cpu_l1_cache_map for each CPU corresponds to
+ * the set its siblings that share the L1-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);
/* SMP operations for this machine */
struct smp_ops_t *smp_ops;
@@ -442,7 +472,8 @@
* - delay_us > 0 is the delay before giving up waiting for targets to
* begin executing the handler, == 0 specifies indefinite delay.
*/
-int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool safe)
+static int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *),
+ u64 delay_us, bool safe)
{
unsigned long flags;
int me = raw_smp_processor_id();
@@ -582,6 +613,15 @@
{
static bool stopped = false;
+ /*
+ * In case of fadump, register data for all CPUs is captured by f/w
+ * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before
+ * this rtas call to avoid tricky post processing of those CPUs'
+ * backtraces.
+ */
+ if (should_fadump_crash())
+ return;
+
if (stopped)
return;
@@ -650,7 +690,7 @@
}
#endif /* CONFIG_NMI_IPI */
-struct thread_info *current_set[NR_CPUS];
+struct task_struct *current_set[NR_CPUS];
static void smp_store_cpu_info(int id)
{
@@ -681,6 +721,274 @@
}
#endif
+/*
+ * Extends set_cpus_related. Instead of setting one CPU at a time in
+ * dstmask, set srcmask at oneshot. dstmask should be super set of srcmask.
+ */
+static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int),
+ struct cpumask *(*dstmask)(int))
+{
+ struct cpumask *mask;
+ int k;
+
+ mask = srcmask(j);
+ for_each_cpu(k, srcmask(i))
+ cpumask_or(dstmask(k), dstmask(k), mask);
+
+ if (i == j)
+ return;
+
+ mask = srcmask(i);
+ for_each_cpu(k, srcmask(j))
+ cpumask_or(dstmask(k), dstmask(k), mask);
+}
+
+/*
+ * parse_thread_groups: Parses the "ibm,thread-groups" device tree
+ * property for the CPU device node @dn and stores
+ * the parsed output in the thread_groups
+ * structure @tg if the ibm,thread-groups[0]
+ * matches @property.
+ *
+ * @dn: The device node of the CPU device.
+ * @tg: Pointer to a thread group structure into which the parsed
+ * output of "ibm,thread-groups" is stored.
+ * @property: The property of the thread-group that the caller is
+ * interested in.
+ *
+ * ibm,thread-groups[0..N-1] array defines which group of threads in
+ * the CPU-device node can be grouped together based on the property.
+ *
+ * ibm,thread-groups[0] tells us the property based on which the
+ * threads are being grouped together. If this value is 1, it implies
+ * that the threads in the same group share L1, translation cache.
+ *
+ * ibm,thread-groups[1] tells us how many such thread groups exist.
+ *
+ * ibm,thread-groups[2] tells us the number of threads in each such
+ * group.
+ *
+ * ibm,thread-groups[3..N-1] is the list of threads identified by
+ * "ibm,ppc-interrupt-server#s" arranged as per their membership in
+ * the grouping.
+ *
+ * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it
+ * implies that there are 2 groups of 4 threads each, where each group
+ * of threads share L1, translation cache.
+ *
+ * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8}
+ * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10,
+ * 11, 12} structure
+ *
+ * Returns 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ */
+static int parse_thread_groups(struct device_node *dn,
+ struct thread_groups *tg,
+ unsigned int property)
+{
+ int i;
+ u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
+ u32 *thread_list;
+ size_t total_threads;
+ int ret;
+
+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
+ thread_group_array, 3);
+ if (ret)
+ return ret;
+
+ tg->property = thread_group_array[0];
+ tg->nr_groups = thread_group_array[1];
+ tg->threads_per_group = thread_group_array[2];
+ if (tg->property != property ||
+ tg->nr_groups < 1 ||
+ tg->threads_per_group < 1)
+ return -ENODATA;
+
+ total_threads = tg->nr_groups * tg->threads_per_group;
+
+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
+ thread_group_array,
+ 3 + total_threads);
+ if (ret)
+ return ret;
+
+ thread_list = &thread_group_array[3];
+
+ for (i = 0 ; i < total_threads; i++)
+ tg->thread_list[i] = thread_list[i];
+
+ return 0;
+}
+
+/*
+ * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
+ * that @cpu belongs to.
+ *
+ * @cpu : The logical CPU whose thread group is being searched.
+ * @tg : The thread-group structure of the CPU node which @cpu belongs
+ * to.
+ *
+ * Returns the index to tg->thread_list that points to the the start
+ * of the thread_group that @cpu belongs to.
+ *
+ * Returns -1 if cpu doesn't belong to any of the groups pointed to by
+ * tg->thread_list.
+ */
+static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
+{
+ int hw_cpu_id = get_hard_smp_processor_id(cpu);
+ int i, j;
+
+ for (i = 0; i < tg->nr_groups; i++) {
+ int group_start = i * tg->threads_per_group;
+
+ for (j = 0; j < tg->threads_per_group; j++) {
+ int idx = group_start + j;
+
+ if (tg->thread_list[idx] == hw_cpu_id)
+ return group_start;
+ }
+ }
+
+ return -1;
+}
+
+static int init_cpu_l1_cache_map(int cpu)
+
+{
+ struct device_node *dn = of_get_cpu_node(cpu, NULL);
+ struct thread_groups tg = {.property = 0,
+ .nr_groups = 0,
+ .threads_per_group = 0};
+ int first_thread = cpu_first_thread_sibling(cpu);
+ int i, cpu_group_start = -1, err = 0;
+
+ if (!dn)
+ return -ENODATA;
+
+ err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
+ if (err)
+ goto out;
+
+ cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
+
+ if (unlikely(cpu_group_start == -1)) {
+ WARN_ON_ONCE(1);
+ err = -ENODATA;
+ goto out;
+ }
+
+ zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
+
+ for (i = first_thread; i < first_thread + threads_per_core; i++) {
+ int i_group_start = get_cpu_thread_group_start(i, &tg);
+
+ if (unlikely(i_group_start == -1)) {
+ WARN_ON_ONCE(1);
+ err = -ENODATA;
+ goto out;
+ }
+
+ if (i_group_start == cpu_group_start)
+ cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
+ }
+
+out:
+ of_node_put(dn);
+ return err;
+}
+
+static bool shared_caches;
+
+#ifdef CONFIG_SCHED_SMT
+/* cpumask of CPUs with asymmetric SMT dependency */
+static int powerpc_smt_flags(void)
+{
+ int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+
+ if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+ printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+ flags |= SD_ASYM_PACKING;
+ }
+ return flags;
+}
+#endif
+
+/*
+ * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
+ * This topology makes it *much* cheaper to migrate tasks between adjacent cores
+ * since the migrated task remains cache hot. We want to take advantage of this
+ * at the scheduler level so an extra topology level is required.
+ */
+static int powerpc_shared_cache_flags(void)
+{
+ return SD_SHARE_PKG_RESOURCES;
+}
+
+/*
+ * We can't just pass cpu_l2_cache_mask() directly because
+ * returns a non-const pointer and the compiler barfs on that.
+ */
+static const struct cpumask *shared_cache_mask(int cpu)
+{
+ return per_cpu(cpu_l2_cache_map, cpu);
+}
+
+#ifdef CONFIG_SCHED_SMT
+static const struct cpumask *smallcore_smt_mask(int cpu)
+{
+ return cpu_smallcore_mask(cpu);
+}
+#endif
+
+static struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return per_cpu(cpu_coregroup_map, cpu);
+}
+
+static bool has_coregroup_support(void)
+{
+ return coregroup_enabled;
+}
+
+static const struct cpumask *cpu_mc_mask(int cpu)
+{
+ return cpu_coregroup_mask(cpu);
+}
+
+static struct sched_domain_topology_level powerpc_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+ { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+ { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
+ { cpu_mc_mask, SD_INIT_NAME(MC) },
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
+static int __init init_big_cores(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ int err = init_cpu_l1_cache_map(cpu);
+
+ if (err)
+ return err;
+
+ zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+ }
+
+ has_big_cores = true;
+ return 0;
+}
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
unsigned int cpu;
@@ -704,6 +1012,11 @@
GFP_KERNEL, cpu_to_node(cpu));
zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
+ if (has_coregroup_support())
+ zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
/*
* numa_node_id() works after this.
*/
@@ -712,12 +1025,22 @@
set_cpu_numa_mem(cpu,
local_memory_node(numa_cpu_lookup_table[cpu]));
}
+#endif
}
/* Init the cpumasks so the boot CPU is related to itself */
cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
+
+ if (has_coregroup_support())
+ cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
+
+ init_big_cores();
+ if (has_big_cores) {
+ cpumask_set_cpu(boot_cpuid,
+ cpu_smallcore_mask(boot_cpuid));
+ }
if (smp_ops && smp_ops->probe)
smp_ops->probe();
@@ -730,7 +1053,7 @@
paca_ptrs[boot_cpuid]->__current = current;
#endif
set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
- current_set[boot_cpuid] = task_thread_info(current);
+ current_set[boot_cpuid] = current;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -815,14 +1138,13 @@
static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
{
- struct thread_info *ti = task_thread_info(idle);
-
#ifdef CONFIG_PPC64
paca_ptrs[cpu]->__current = idle;
- paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
+ paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
+ THREAD_SIZE - STACK_FRAME_OVERHEAD;
#endif
- ti->cpu = cpu;
- secondary_ti = current_set[cpu] = ti;
+ idle->cpu = cpu;
+ secondary_current = current_set[cpu] = idle;
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
@@ -964,26 +1286,46 @@
return cache;
}
-static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
+static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
{
+ struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
struct device_node *l2_cache, *np;
int i;
- l2_cache = cpu_to_l2cache(cpu);
- if (!l2_cache)
- return false;
+ if (has_big_cores)
+ submask_fn = cpu_smallcore_mask;
- for_each_cpu(i, cpu_online_mask) {
+ l2_cache = cpu_to_l2cache(cpu);
+ if (!l2_cache || !*mask) {
+ /* Assume only core siblings share cache with this CPU */
+ for_each_cpu(i, submask_fn(cpu))
+ set_cpus_related(cpu, i, cpu_l2_cache_mask);
+
+ return false;
+ }
+
+ cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
+
+ /* Update l2-cache mask with all the CPUs that are part of submask */
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
+
+ /* Skip all CPUs already part of current CPU l2-cache mask */
+ cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(cpu));
+
+ for_each_cpu(i, *mask) {
/*
* when updating the marks the current CPU has not been marked
* online, but we need to update the cache masks
*/
np = cpu_to_l2cache(i);
- if (!np)
- continue;
- if (np == l2_cache)
- set_cpus_related(cpu, i, mask_fn);
+ /* Skip all CPUs already part of current CPU l2-cache */
+ if (np == l2_cache) {
+ or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask);
+ cpumask_andnot(*mask, *mask, submask_fn(i));
+ } else {
+ cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(i));
+ }
of_node_put(np);
}
@@ -995,21 +1337,87 @@
#ifdef CONFIG_HOTPLUG_CPU
static void remove_cpu_from_masks(int cpu)
{
+ struct cpumask *(*mask_fn)(int) = cpu_sibling_mask;
int i;
- /* NB: cpu_core_mask is a superset of the others */
- for_each_cpu(i, cpu_core_mask(cpu)) {
- set_cpus_unrelated(cpu, i, cpu_core_mask);
+ if (shared_caches)
+ mask_fn = cpu_l2_cache_mask;
+
+ for_each_cpu(i, mask_fn(cpu)) {
set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
set_cpus_unrelated(cpu, i, cpu_sibling_mask);
+ if (has_big_cores)
+ set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
+ }
+
+ for_each_cpu(i, cpu_core_mask(cpu))
+ set_cpus_unrelated(cpu, i, cpu_core_mask);
+
+ if (has_coregroup_support()) {
+ for_each_cpu(i, cpu_coregroup_mask(cpu))
+ set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
}
}
#endif
+static inline void add_cpu_to_smallcore_masks(int cpu)
+{
+ int i;
+
+ if (!has_big_cores)
+ return;
+
+ cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
+
+ for_each_cpu(i, per_cpu(cpu_l1_cache_map, cpu)) {
+ if (cpu_online(i))
+ set_cpus_related(i, cpu, cpu_smallcore_mask);
+ }
+}
+
+static void update_coregroup_mask(int cpu, cpumask_var_t *mask)
+{
+ struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
+ int coregroup_id = cpu_to_coregroup_id(cpu);
+ int i;
+
+ if (shared_caches)
+ submask_fn = cpu_l2_cache_mask;
+
+ if (!*mask) {
+ /* Assume only siblings are part of this CPU's coregroup */
+ for_each_cpu(i, submask_fn(cpu))
+ set_cpus_related(cpu, i, cpu_coregroup_mask);
+
+ return;
+ }
+
+ cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
+
+ /* Update coregroup mask with all the CPUs that are part of submask */
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask);
+
+ /* Skip all CPUs already part of coregroup mask */
+ cpumask_andnot(*mask, *mask, cpu_coregroup_mask(cpu));
+
+ for_each_cpu(i, *mask) {
+ /* Skip all CPUs not part of this coregroup */
+ if (coregroup_id == cpu_to_coregroup_id(i)) {
+ or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask);
+ cpumask_andnot(*mask, *mask, submask_fn(i));
+ } else {
+ cpumask_andnot(*mask, *mask, cpu_coregroup_mask(i));
+ }
+ }
+}
+
static void add_cpu_to_masks(int cpu)
{
+ struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
int first_thread = cpu_first_thread_sibling(cpu);
- int chipid = cpu_to_chip_id(cpu);
+ int chip_id = cpu_to_chip_id(cpu);
+ cpumask_var_t mask;
+ bool ret;
int i;
/*
@@ -1017,47 +1425,57 @@
* add it to it's own thread sibling mask.
*/
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+ cpumask_set_cpu(cpu, cpu_core_mask(cpu));
for (i = first_thread; i < first_thread + threads_per_core; i++)
if (cpu_online(i))
set_cpus_related(i, cpu, cpu_sibling_mask);
- /*
- * Copy the thread sibling mask into the cache sibling mask
- * and mark any CPUs that share an L2 with this CPU.
- */
- for_each_cpu(i, cpu_sibling_mask(cpu))
- set_cpus_related(cpu, i, cpu_l2_cache_mask);
- update_mask_by_l2(cpu, cpu_l2_cache_mask);
+ add_cpu_to_smallcore_masks(cpu);
- /*
- * Copy the cache sibling mask into core sibling mask and mark
- * any CPUs on the same chip as this CPU.
- */
- for_each_cpu(i, cpu_l2_cache_mask(cpu))
- set_cpus_related(cpu, i, cpu_core_mask);
+ /* In CPU-hotplug path, hence use GFP_ATOMIC */
+ ret = alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu));
+ update_mask_by_l2(cpu, &mask);
- if (chipid == -1)
- return;
+ if (has_coregroup_support())
+ update_coregroup_mask(cpu, &mask);
- for_each_cpu(i, cpu_online_mask)
- if (cpu_to_chip_id(i) == chipid)
- set_cpus_related(cpu, i, cpu_core_mask);
+ if (shared_caches)
+ submask_fn = cpu_l2_cache_mask;
+
+ /* Update core_mask with all the CPUs that are part of submask */
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_core_mask);
+
+ /* Skip all CPUs already part of current CPU core mask */
+ cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu));
+
+ /* If chip_id is -1; limit the cpu_core_mask to within DIE*/
+ if (chip_id == -1)
+ cpumask_and(mask, mask, cpu_cpu_mask(cpu));
+
+ for_each_cpu(i, mask) {
+ if (chip_id == cpu_to_chip_id(i)) {
+ or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask);
+ cpumask_andnot(mask, mask, submask_fn(i));
+ } else {
+ cpumask_andnot(mask, mask, cpu_core_mask(i));
+ }
+ }
+
+ free_cpumask_var(mask);
}
-
-static bool shared_caches;
/* Activate a secondary processor. */
void start_secondary(void *unused)
{
- unsigned int cpu = smp_processor_id();
+ unsigned int cpu = raw_smp_processor_id();
mmgrab(&init_mm);
current->active_mm = &init_mm;
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
- preempt_disable();
+ rcu_cpu_starting(cpu);
cpu_callin_map[cpu] = 1;
if (smp_ops->setup_cpu)
@@ -1083,12 +1501,22 @@
* Check for any shared caches. Note that this must be done on a
* per-core basis because one core in the pair might be disabled.
*/
- if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu)))
- shared_caches = true;
+ if (!shared_caches) {
+ struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
+ struct cpumask *mask = cpu_l2_cache_mask(cpu);
+
+ if (has_big_cores)
+ sibling_mask = cpu_smallcore_mask;
+
+ if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu)))
+ shared_caches = true;
+ }
smp_wmb();
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
+
+ boot_init_stack_canary();
local_irq_enable();
@@ -1107,56 +1535,44 @@
}
#endif
-#ifdef CONFIG_SCHED_SMT
-/* cpumask of CPUs with asymetric SMT dependancy */
-static int powerpc_smt_flags(void)
+static void fixup_topology(void)
{
- int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+ int i;
- if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
- printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
- flags |= SD_ASYM_PACKING;
+#ifdef CONFIG_SCHED_SMT
+ if (has_big_cores) {
+ pr_info("Big cores detected but using small core scheduling\n");
+ powerpc_topology[smt_idx].mask = smallcore_smt_mask;
}
- return flags;
-}
#endif
-static struct sched_domain_topology_level powerpc_topology[] = {
-#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+ if (!has_coregroup_support())
+ powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask;
+
+ /*
+ * Try to consolidate topology levels here instead of
+ * allowing scheduler to degenerate.
+ * - Dont consolidate if masks are different.
+ * - Dont consolidate if sd_flags exists and are different.
+ */
+ for (i = 1; i <= die_idx; i++) {
+ if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask)
+ continue;
+
+ if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags &&
+ powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags)
+ continue;
+
+ if (!powerpc_topology[i - 1].sd_flags)
+ powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags;
+
+ powerpc_topology[i].mask = powerpc_topology[i + 1].mask;
+ powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags;
+#ifdef CONFIG_SCHED_DEBUG
+ powerpc_topology[i].name = powerpc_topology[i + 1].name;
#endif
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
- { NULL, },
-};
-
-/*
- * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
- * This topology makes it *much* cheaper to migrate tasks between adjacent cores
- * since the migrated task remains cache hot. We want to take advantage of this
- * at the scheduler level so an extra topology level is required.
- */
-static int powerpc_shared_cache_flags(void)
-{
- return SD_SHARE_PKG_RESOURCES;
+ }
}
-
-/*
- * We can't just pass cpu_l2_cache_mask() directly because
- * returns a non-const pointer and the compiler barfs on that.
- */
-static const struct cpumask *shared_cache_mask(int cpu)
-{
- return cpu_l2_cache_mask(cpu);
-}
-
-static struct sched_domain_topology_level power9_topology[] = {
-#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
-#endif
- { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
- { NULL, },
-};
void __init smp_cpus_done(unsigned int max_cpus)
{
@@ -1169,24 +1585,10 @@
if (smp_ops && smp_ops->bringup_done)
smp_ops->bringup_done();
- /*
- * On a shared LPAR, associativity needs to be requested.
- * Hence, get numa topology before dumping cpu topology
- */
- shared_proc_topology_init();
dump_numa_cpu_topology();
- /*
- * If any CPU detects that it's sharing a cache with another CPU then
- * use the deeper topology that is aware of this sharing.
- */
- if (shared_caches) {
- pr_info("Using shared cache scheduler topology\n");
- set_sched_topology(power9_topology);
- } else {
- pr_info("Using standard scheduler topology\n");
- set_sched_topology(powerpc_topology);
- }
+ fixup_topology();
+ set_sched_topology(powerpc_topology);
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -1216,7 +1618,7 @@
smp_ops->cpu_die(cpu);
}
-void cpu_die(void)
+void arch_cpu_idle_dead(void)
{
/*
* Disable on the down path. This will be re-enabled by
@@ -1224,8 +1626,8 @@
*/
this_cpu_disable_ftrace();
- if (ppc_md.cpu_die)
- ppc_md.cpu_die();
+ if (smp_ops->cpu_offline_self)
+ smp_ops->cpu_offline_self();
/* If we return, we re-enter start_secondary */
start_secondary_resume();
--
Gitblit v1.6.2