hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/arch/powerpc/kernel/smp.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * SMP support for ppc.
34 *
....@@ -8,11 +9,6 @@
89 *
910 * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
1011 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
11
- *
12
- * This program is free software; you can redistribute it and/or
13
- * modify it under the terms of the GNU General Public License
14
- * as published by the Free Software Foundation; either version
15
- * 2 of the License, or (at your option) any later version.
1612 */
1713
1814 #undef DEBUG
....@@ -20,6 +16,7 @@
2016 #include <linux/kernel.h>
2117 #include <linux/export.h>
2218 #include <linux/sched/mm.h>
19
+#include <linux/sched/task_stack.h>
2320 #include <linux/sched/topology.h>
2421 #include <linux/smp.h>
2522 #include <linux/interrupt.h>
....@@ -34,6 +31,9 @@
3431 #include <linux/topology.h>
3532 #include <linux/profile.h>
3633 #include <linux/processor.h>
34
+#include <linux/random.h>
35
+#include <linux/stackprotector.h>
36
+#include <linux/pgtable.h>
3737
3838 #include <asm/ptrace.h>
3939 #include <linux/atomic.h>
....@@ -42,7 +42,6 @@
4242 #include <asm/kvm_ppc.h>
4343 #include <asm/dbell.h>
4444 #include <asm/page.h>
45
-#include <asm/pgtable.h>
4645 #include <asm/prom.h>
4746 #include <asm/smp.h>
4847 #include <asm/time.h>
....@@ -60,6 +59,8 @@
6059 #include <asm/asm-prototypes.h>
6160 #include <asm/cpu_has_feature.h>
6261 #include <asm/ftrace.h>
62
+#include <asm/kup.h>
63
+#include <asm/fadump.h>
6364
6465 #ifdef DEBUG
6566 #include <asm/udbg.h>
....@@ -73,15 +74,44 @@
7374 static DEFINE_PER_CPU(int, cpu_state) = { 0 };
7475 #endif
7576
76
-struct thread_info *secondary_ti;
77
+struct task_struct *secondary_current;
78
+bool has_big_cores;
79
+bool coregroup_enabled;
7780
7881 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
82
+DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
7983 DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
8084 DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
85
+DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
8186
8287 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
8388 EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
8489 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
90
+EXPORT_SYMBOL_GPL(has_big_cores);
91
+
92
+enum {
93
+#ifdef CONFIG_SCHED_SMT
94
+ smt_idx,
95
+#endif
96
+ cache_idx,
97
+ mc_idx,
98
+ die_idx,
99
+};
100
+
101
+#define MAX_THREAD_LIST_SIZE 8
102
+#define THREAD_GROUP_SHARE_L1 1
103
+struct thread_groups {
104
+ unsigned int property;
105
+ unsigned int nr_groups;
106
+ unsigned int threads_per_group;
107
+ unsigned int thread_list[MAX_THREAD_LIST_SIZE];
108
+};
109
+
110
+/*
111
+ * On big-cores system, cpu_l1_cache_map for each CPU corresponds to
112
+ * the set its siblings that share the L1-cache.
113
+ */
114
+DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);
85115
86116 /* SMP operations for this machine */
87117 struct smp_ops_t *smp_ops;
....@@ -442,7 +472,8 @@
442472 * - delay_us > 0 is the delay before giving up waiting for targets to
443473 * begin executing the handler, == 0 specifies indefinite delay.
444474 */
445
-int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool safe)
475
+static int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *),
476
+ u64 delay_us, bool safe)
446477 {
447478 unsigned long flags;
448479 int me = raw_smp_processor_id();
....@@ -582,6 +613,15 @@
582613 {
583614 static bool stopped = false;
584615
616
+ /*
617
+ * In case of fadump, register data for all CPUs is captured by f/w
618
+ * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before
619
+ * this rtas call to avoid tricky post processing of those CPUs'
620
+ * backtraces.
621
+ */
622
+ if (should_fadump_crash())
623
+ return;
624
+
585625 if (stopped)
586626 return;
587627
....@@ -650,7 +690,7 @@
650690 }
651691 #endif /* CONFIG_NMI_IPI */
652692
653
-struct thread_info *current_set[NR_CPUS];
693
+struct task_struct *current_set[NR_CPUS];
654694
655695 static void smp_store_cpu_info(int id)
656696 {
....@@ -681,6 +721,274 @@
681721 }
682722 #endif
683723
724
+/*
725
+ * Extends set_cpus_related. Instead of setting one CPU at a time in
726
+ * dstmask, set srcmask at oneshot. dstmask should be super set of srcmask.
727
+ */
728
+static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int),
729
+ struct cpumask *(*dstmask)(int))
730
+{
731
+ struct cpumask *mask;
732
+ int k;
733
+
734
+ mask = srcmask(j);
735
+ for_each_cpu(k, srcmask(i))
736
+ cpumask_or(dstmask(k), dstmask(k), mask);
737
+
738
+ if (i == j)
739
+ return;
740
+
741
+ mask = srcmask(i);
742
+ for_each_cpu(k, srcmask(j))
743
+ cpumask_or(dstmask(k), dstmask(k), mask);
744
+}
745
+
746
+/*
747
+ * parse_thread_groups: Parses the "ibm,thread-groups" device tree
748
+ * property for the CPU device node @dn and stores
749
+ * the parsed output in the thread_groups
750
+ * structure @tg if the ibm,thread-groups[0]
751
+ * matches @property.
752
+ *
753
+ * @dn: The device node of the CPU device.
754
+ * @tg: Pointer to a thread group structure into which the parsed
755
+ * output of "ibm,thread-groups" is stored.
756
+ * @property: The property of the thread-group that the caller is
757
+ * interested in.
758
+ *
759
+ * ibm,thread-groups[0..N-1] array defines which group of threads in
760
+ * the CPU-device node can be grouped together based on the property.
761
+ *
762
+ * ibm,thread-groups[0] tells us the property based on which the
763
+ * threads are being grouped together. If this value is 1, it implies
764
+ * that the threads in the same group share L1, translation cache.
765
+ *
766
+ * ibm,thread-groups[1] tells us how many such thread groups exist.
767
+ *
768
+ * ibm,thread-groups[2] tells us the number of threads in each such
769
+ * group.
770
+ *
771
+ * ibm,thread-groups[3..N-1] is the list of threads identified by
772
+ * "ibm,ppc-interrupt-server#s" arranged as per their membership in
773
+ * the grouping.
774
+ *
775
+ * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it
776
+ * implies that there are 2 groups of 4 threads each, where each group
777
+ * of threads share L1, translation cache.
778
+ *
779
+ * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8}
780
+ * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10,
781
+ * 11, 12} structure
782
+ *
783
+ * Returns 0 on success, -EINVAL if the property does not exist,
784
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
785
+ * property data isn't large enough.
786
+ */
787
+static int parse_thread_groups(struct device_node *dn,
788
+ struct thread_groups *tg,
789
+ unsigned int property)
790
+{
791
+ int i;
792
+ u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
793
+ u32 *thread_list;
794
+ size_t total_threads;
795
+ int ret;
796
+
797
+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
798
+ thread_group_array, 3);
799
+ if (ret)
800
+ return ret;
801
+
802
+ tg->property = thread_group_array[0];
803
+ tg->nr_groups = thread_group_array[1];
804
+ tg->threads_per_group = thread_group_array[2];
805
+ if (tg->property != property ||
806
+ tg->nr_groups < 1 ||
807
+ tg->threads_per_group < 1)
808
+ return -ENODATA;
809
+
810
+ total_threads = tg->nr_groups * tg->threads_per_group;
811
+
812
+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
813
+ thread_group_array,
814
+ 3 + total_threads);
815
+ if (ret)
816
+ return ret;
817
+
818
+ thread_list = &thread_group_array[3];
819
+
820
+ for (i = 0 ; i < total_threads; i++)
821
+ tg->thread_list[i] = thread_list[i];
822
+
823
+ return 0;
824
+}
825
+
826
+/*
827
+ * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
828
+ * that @cpu belongs to.
829
+ *
830
+ * @cpu : The logical CPU whose thread group is being searched.
831
+ * @tg : The thread-group structure of the CPU node which @cpu belongs
832
+ * to.
833
+ *
834
+ * Returns the index to tg->thread_list that points to the the start
835
+ * of the thread_group that @cpu belongs to.
836
+ *
837
+ * Returns -1 if cpu doesn't belong to any of the groups pointed to by
838
+ * tg->thread_list.
839
+ */
840
+static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
841
+{
842
+ int hw_cpu_id = get_hard_smp_processor_id(cpu);
843
+ int i, j;
844
+
845
+ for (i = 0; i < tg->nr_groups; i++) {
846
+ int group_start = i * tg->threads_per_group;
847
+
848
+ for (j = 0; j < tg->threads_per_group; j++) {
849
+ int idx = group_start + j;
850
+
851
+ if (tg->thread_list[idx] == hw_cpu_id)
852
+ return group_start;
853
+ }
854
+ }
855
+
856
+ return -1;
857
+}
858
+
859
+static int init_cpu_l1_cache_map(int cpu)
860
+
861
+{
862
+ struct device_node *dn = of_get_cpu_node(cpu, NULL);
863
+ struct thread_groups tg = {.property = 0,
864
+ .nr_groups = 0,
865
+ .threads_per_group = 0};
866
+ int first_thread = cpu_first_thread_sibling(cpu);
867
+ int i, cpu_group_start = -1, err = 0;
868
+
869
+ if (!dn)
870
+ return -ENODATA;
871
+
872
+ err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
873
+ if (err)
874
+ goto out;
875
+
876
+ cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
877
+
878
+ if (unlikely(cpu_group_start == -1)) {
879
+ WARN_ON_ONCE(1);
880
+ err = -ENODATA;
881
+ goto out;
882
+ }
883
+
884
+ zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
885
+ GFP_KERNEL, cpu_to_node(cpu));
886
+
887
+ for (i = first_thread; i < first_thread + threads_per_core; i++) {
888
+ int i_group_start = get_cpu_thread_group_start(i, &tg);
889
+
890
+ if (unlikely(i_group_start == -1)) {
891
+ WARN_ON_ONCE(1);
892
+ err = -ENODATA;
893
+ goto out;
894
+ }
895
+
896
+ if (i_group_start == cpu_group_start)
897
+ cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
898
+ }
899
+
900
+out:
901
+ of_node_put(dn);
902
+ return err;
903
+}
904
+
905
+static bool shared_caches;
906
+
907
+#ifdef CONFIG_SCHED_SMT
908
+/* cpumask of CPUs with asymmetric SMT dependency */
909
+static int powerpc_smt_flags(void)
910
+{
911
+ int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
912
+
913
+ if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
914
+ printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
915
+ flags |= SD_ASYM_PACKING;
916
+ }
917
+ return flags;
918
+}
919
+#endif
920
+
921
+/*
922
+ * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
923
+ * This topology makes it *much* cheaper to migrate tasks between adjacent cores
924
+ * since the migrated task remains cache hot. We want to take advantage of this
925
+ * at the scheduler level so an extra topology level is required.
926
+ */
927
+static int powerpc_shared_cache_flags(void)
928
+{
929
+ return SD_SHARE_PKG_RESOURCES;
930
+}
931
+
932
+/*
933
+ * We can't just pass cpu_l2_cache_mask() directly because
934
+ * returns a non-const pointer and the compiler barfs on that.
935
+ */
936
+static const struct cpumask *shared_cache_mask(int cpu)
937
+{
938
+ return per_cpu(cpu_l2_cache_map, cpu);
939
+}
940
+
941
+#ifdef CONFIG_SCHED_SMT
942
+static const struct cpumask *smallcore_smt_mask(int cpu)
943
+{
944
+ return cpu_smallcore_mask(cpu);
945
+}
946
+#endif
947
+
948
+static struct cpumask *cpu_coregroup_mask(int cpu)
949
+{
950
+ return per_cpu(cpu_coregroup_map, cpu);
951
+}
952
+
953
+static bool has_coregroup_support(void)
954
+{
955
+ return coregroup_enabled;
956
+}
957
+
958
+static const struct cpumask *cpu_mc_mask(int cpu)
959
+{
960
+ return cpu_coregroup_mask(cpu);
961
+}
962
+
963
+static struct sched_domain_topology_level powerpc_topology[] = {
964
+#ifdef CONFIG_SCHED_SMT
965
+ { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
966
+#endif
967
+ { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
968
+ { cpu_mc_mask, SD_INIT_NAME(MC) },
969
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
970
+ { NULL, },
971
+};
972
+
973
+static int __init init_big_cores(void)
974
+{
975
+ int cpu;
976
+
977
+ for_each_possible_cpu(cpu) {
978
+ int err = init_cpu_l1_cache_map(cpu);
979
+
980
+ if (err)
981
+ return err;
982
+
983
+ zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
984
+ GFP_KERNEL,
985
+ cpu_to_node(cpu));
986
+ }
987
+
988
+ has_big_cores = true;
989
+ return 0;
990
+}
991
+
684992 void __init smp_prepare_cpus(unsigned int max_cpus)
685993 {
686994 unsigned int cpu;
....@@ -704,6 +1012,11 @@
7041012 GFP_KERNEL, cpu_to_node(cpu));
7051013 zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
7061014 GFP_KERNEL, cpu_to_node(cpu));
1015
+ if (has_coregroup_support())
1016
+ zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu),
1017
+ GFP_KERNEL, cpu_to_node(cpu));
1018
+
1019
+#ifdef CONFIG_NEED_MULTIPLE_NODES
7071020 /*
7081021 * numa_node_id() works after this.
7091022 */
....@@ -712,12 +1025,22 @@
7121025 set_cpu_numa_mem(cpu,
7131026 local_memory_node(numa_cpu_lookup_table[cpu]));
7141027 }
1028
+#endif
7151029 }
7161030
7171031 /* Init the cpumasks so the boot CPU is related to itself */
7181032 cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
7191033 cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
7201034 cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
1035
+
1036
+ if (has_coregroup_support())
1037
+ cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
1038
+
1039
+ init_big_cores();
1040
+ if (has_big_cores) {
1041
+ cpumask_set_cpu(boot_cpuid,
1042
+ cpu_smallcore_mask(boot_cpuid));
1043
+ }
7211044
7221045 if (smp_ops && smp_ops->probe)
7231046 smp_ops->probe();
....@@ -730,7 +1053,7 @@
7301053 paca_ptrs[boot_cpuid]->__current = current;
7311054 #endif
7321055 set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
733
- current_set[boot_cpuid] = task_thread_info(current);
1056
+ current_set[boot_cpuid] = current;
7341057 }
7351058
7361059 #ifdef CONFIG_HOTPLUG_CPU
....@@ -815,14 +1138,13 @@
8151138
8161139 static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
8171140 {
818
- struct thread_info *ti = task_thread_info(idle);
819
-
8201141 #ifdef CONFIG_PPC64
8211142 paca_ptrs[cpu]->__current = idle;
822
- paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
1143
+ paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
1144
+ THREAD_SIZE - STACK_FRAME_OVERHEAD;
8231145 #endif
824
- ti->cpu = cpu;
825
- secondary_ti = current_set[cpu] = ti;
1146
+ idle->cpu = cpu;
1147
+ secondary_current = current_set[cpu] = idle;
8261148 }
8271149
8281150 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
....@@ -964,26 +1286,46 @@
9641286 return cache;
9651287 }
9661288
967
-static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
1289
+static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
9681290 {
1291
+ struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
9691292 struct device_node *l2_cache, *np;
9701293 int i;
9711294
972
- l2_cache = cpu_to_l2cache(cpu);
973
- if (!l2_cache)
974
- return false;
1295
+ if (has_big_cores)
1296
+ submask_fn = cpu_smallcore_mask;
9751297
976
- for_each_cpu(i, cpu_online_mask) {
1298
+ l2_cache = cpu_to_l2cache(cpu);
1299
+ if (!l2_cache || !*mask) {
1300
+ /* Assume only core siblings share cache with this CPU */
1301
+ for_each_cpu(i, submask_fn(cpu))
1302
+ set_cpus_related(cpu, i, cpu_l2_cache_mask);
1303
+
1304
+ return false;
1305
+ }
1306
+
1307
+ cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
1308
+
1309
+ /* Update l2-cache mask with all the CPUs that are part of submask */
1310
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
1311
+
1312
+ /* Skip all CPUs already part of current CPU l2-cache mask */
1313
+ cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(cpu));
1314
+
1315
+ for_each_cpu(i, *mask) {
9771316 /*
9781317 * when updating the marks the current CPU has not been marked
9791318 * online, but we need to update the cache masks
9801319 */
9811320 np = cpu_to_l2cache(i);
982
- if (!np)
983
- continue;
9841321
985
- if (np == l2_cache)
986
- set_cpus_related(cpu, i, mask_fn);
1322
+ /* Skip all CPUs already part of current CPU l2-cache */
1323
+ if (np == l2_cache) {
1324
+ or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask);
1325
+ cpumask_andnot(*mask, *mask, submask_fn(i));
1326
+ } else {
1327
+ cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(i));
1328
+ }
9871329
9881330 of_node_put(np);
9891331 }
....@@ -995,21 +1337,87 @@
9951337 #ifdef CONFIG_HOTPLUG_CPU
9961338 static void remove_cpu_from_masks(int cpu)
9971339 {
1340
+ struct cpumask *(*mask_fn)(int) = cpu_sibling_mask;
9981341 int i;
9991342
1000
- /* NB: cpu_core_mask is a superset of the others */
1001
- for_each_cpu(i, cpu_core_mask(cpu)) {
1002
- set_cpus_unrelated(cpu, i, cpu_core_mask);
1343
+ if (shared_caches)
1344
+ mask_fn = cpu_l2_cache_mask;
1345
+
1346
+ for_each_cpu(i, mask_fn(cpu)) {
10031347 set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
10041348 set_cpus_unrelated(cpu, i, cpu_sibling_mask);
1349
+ if (has_big_cores)
1350
+ set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
1351
+ }
1352
+
1353
+ for_each_cpu(i, cpu_core_mask(cpu))
1354
+ set_cpus_unrelated(cpu, i, cpu_core_mask);
1355
+
1356
+ if (has_coregroup_support()) {
1357
+ for_each_cpu(i, cpu_coregroup_mask(cpu))
1358
+ set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
10051359 }
10061360 }
10071361 #endif
10081362
1363
+static inline void add_cpu_to_smallcore_masks(int cpu)
1364
+{
1365
+ int i;
1366
+
1367
+ if (!has_big_cores)
1368
+ return;
1369
+
1370
+ cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
1371
+
1372
+ for_each_cpu(i, per_cpu(cpu_l1_cache_map, cpu)) {
1373
+ if (cpu_online(i))
1374
+ set_cpus_related(i, cpu, cpu_smallcore_mask);
1375
+ }
1376
+}
1377
+
1378
+static void update_coregroup_mask(int cpu, cpumask_var_t *mask)
1379
+{
1380
+ struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
1381
+ int coregroup_id = cpu_to_coregroup_id(cpu);
1382
+ int i;
1383
+
1384
+ if (shared_caches)
1385
+ submask_fn = cpu_l2_cache_mask;
1386
+
1387
+ if (!*mask) {
1388
+ /* Assume only siblings are part of this CPU's coregroup */
1389
+ for_each_cpu(i, submask_fn(cpu))
1390
+ set_cpus_related(cpu, i, cpu_coregroup_mask);
1391
+
1392
+ return;
1393
+ }
1394
+
1395
+ cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
1396
+
1397
+ /* Update coregroup mask with all the CPUs that are part of submask */
1398
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask);
1399
+
1400
+ /* Skip all CPUs already part of coregroup mask */
1401
+ cpumask_andnot(*mask, *mask, cpu_coregroup_mask(cpu));
1402
+
1403
+ for_each_cpu(i, *mask) {
1404
+ /* Skip all CPUs not part of this coregroup */
1405
+ if (coregroup_id == cpu_to_coregroup_id(i)) {
1406
+ or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask);
1407
+ cpumask_andnot(*mask, *mask, submask_fn(i));
1408
+ } else {
1409
+ cpumask_andnot(*mask, *mask, cpu_coregroup_mask(i));
1410
+ }
1411
+ }
1412
+}
1413
+
10091414 static void add_cpu_to_masks(int cpu)
10101415 {
1416
+ struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
10111417 int first_thread = cpu_first_thread_sibling(cpu);
1012
- int chipid = cpu_to_chip_id(cpu);
1418
+ int chip_id = cpu_to_chip_id(cpu);
1419
+ cpumask_var_t mask;
1420
+ bool ret;
10131421 int i;
10141422
10151423 /*
....@@ -1017,47 +1425,57 @@
10171425 * add it to it's own thread sibling mask.
10181426 */
10191427 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
1428
+ cpumask_set_cpu(cpu, cpu_core_mask(cpu));
10201429
10211430 for (i = first_thread; i < first_thread + threads_per_core; i++)
10221431 if (cpu_online(i))
10231432 set_cpus_related(i, cpu, cpu_sibling_mask);
10241433
1025
- /*
1026
- * Copy the thread sibling mask into the cache sibling mask
1027
- * and mark any CPUs that share an L2 with this CPU.
1028
- */
1029
- for_each_cpu(i, cpu_sibling_mask(cpu))
1030
- set_cpus_related(cpu, i, cpu_l2_cache_mask);
1031
- update_mask_by_l2(cpu, cpu_l2_cache_mask);
1434
+ add_cpu_to_smallcore_masks(cpu);
10321435
1033
- /*
1034
- * Copy the cache sibling mask into core sibling mask and mark
1035
- * any CPUs on the same chip as this CPU.
1036
- */
1037
- for_each_cpu(i, cpu_l2_cache_mask(cpu))
1038
- set_cpus_related(cpu, i, cpu_core_mask);
1436
+ /* In CPU-hotplug path, hence use GFP_ATOMIC */
1437
+ ret = alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu));
1438
+ update_mask_by_l2(cpu, &mask);
10391439
1040
- if (chipid == -1)
1041
- return;
1440
+ if (has_coregroup_support())
1441
+ update_coregroup_mask(cpu, &mask);
10421442
1043
- for_each_cpu(i, cpu_online_mask)
1044
- if (cpu_to_chip_id(i) == chipid)
1045
- set_cpus_related(cpu, i, cpu_core_mask);
1443
+ if (shared_caches)
1444
+ submask_fn = cpu_l2_cache_mask;
1445
+
1446
+ /* Update core_mask with all the CPUs that are part of submask */
1447
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_core_mask);
1448
+
1449
+ /* Skip all CPUs already part of current CPU core mask */
1450
+ cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu));
1451
+
1452
+ /* If chip_id is -1; limit the cpu_core_mask to within DIE*/
1453
+ if (chip_id == -1)
1454
+ cpumask_and(mask, mask, cpu_cpu_mask(cpu));
1455
+
1456
+ for_each_cpu(i, mask) {
1457
+ if (chip_id == cpu_to_chip_id(i)) {
1458
+ or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask);
1459
+ cpumask_andnot(mask, mask, submask_fn(i));
1460
+ } else {
1461
+ cpumask_andnot(mask, mask, cpu_core_mask(i));
1462
+ }
1463
+ }
1464
+
1465
+ free_cpumask_var(mask);
10461466 }
1047
-
1048
-static bool shared_caches;
10491467
10501468 /* Activate a secondary processor. */
10511469 void start_secondary(void *unused)
10521470 {
1053
- unsigned int cpu = smp_processor_id();
1471
+ unsigned int cpu = raw_smp_processor_id();
10541472
10551473 mmgrab(&init_mm);
10561474 current->active_mm = &init_mm;
10571475
10581476 smp_store_cpu_info(cpu);
10591477 set_dec(tb_ticks_per_jiffy);
1060
- preempt_disable();
1478
+ rcu_cpu_starting(cpu);
10611479 cpu_callin_map[cpu] = 1;
10621480
10631481 if (smp_ops->setup_cpu)
....@@ -1083,12 +1501,22 @@
10831501 * Check for any shared caches. Note that this must be done on a
10841502 * per-core basis because one core in the pair might be disabled.
10851503 */
1086
- if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu)))
1087
- shared_caches = true;
1504
+ if (!shared_caches) {
1505
+ struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
1506
+ struct cpumask *mask = cpu_l2_cache_mask(cpu);
1507
+
1508
+ if (has_big_cores)
1509
+ sibling_mask = cpu_smallcore_mask;
1510
+
1511
+ if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu)))
1512
+ shared_caches = true;
1513
+ }
10881514
10891515 smp_wmb();
10901516 notify_cpu_starting(cpu);
10911517 set_cpu_online(cpu, true);
1518
+
1519
+ boot_init_stack_canary();
10921520
10931521 local_irq_enable();
10941522
....@@ -1107,56 +1535,44 @@
11071535 }
11081536 #endif
11091537
1110
-#ifdef CONFIG_SCHED_SMT
1111
-/* cpumask of CPUs with asymetric SMT dependancy */
1112
-static int powerpc_smt_flags(void)
1538
+static void fixup_topology(void)
11131539 {
1114
- int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
1540
+ int i;
11151541
1116
- if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
1117
- printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
1118
- flags |= SD_ASYM_PACKING;
1542
+#ifdef CONFIG_SCHED_SMT
1543
+ if (has_big_cores) {
1544
+ pr_info("Big cores detected but using small core scheduling\n");
1545
+ powerpc_topology[smt_idx].mask = smallcore_smt_mask;
11191546 }
1120
- return flags;
1121
-}
11221547 #endif
11231548
1124
-static struct sched_domain_topology_level powerpc_topology[] = {
1125
-#ifdef CONFIG_SCHED_SMT
1126
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
1549
+ if (!has_coregroup_support())
1550
+ powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask;
1551
+
1552
+ /*
1553
+ * Try to consolidate topology levels here instead of
1554
+ * allowing scheduler to degenerate.
1555
+ * - Dont consolidate if masks are different.
1556
+ * - Dont consolidate if sd_flags exists and are different.
1557
+ */
1558
+ for (i = 1; i <= die_idx; i++) {
1559
+ if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask)
1560
+ continue;
1561
+
1562
+ if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags &&
1563
+ powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags)
1564
+ continue;
1565
+
1566
+ if (!powerpc_topology[i - 1].sd_flags)
1567
+ powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags;
1568
+
1569
+ powerpc_topology[i].mask = powerpc_topology[i + 1].mask;
1570
+ powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags;
1571
+#ifdef CONFIG_SCHED_DEBUG
1572
+ powerpc_topology[i].name = powerpc_topology[i + 1].name;
11271573 #endif
1128
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
1129
- { NULL, },
1130
-};
1131
-
1132
-/*
1133
- * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
1134
- * This topology makes it *much* cheaper to migrate tasks between adjacent cores
1135
- * since the migrated task remains cache hot. We want to take advantage of this
1136
- * at the scheduler level so an extra topology level is required.
1137
- */
1138
-static int powerpc_shared_cache_flags(void)
1139
-{
1140
- return SD_SHARE_PKG_RESOURCES;
1574
+ }
11411575 }
1142
-
1143
-/*
1144
- * We can't just pass cpu_l2_cache_mask() directly because
1145
- * returns a non-const pointer and the compiler barfs on that.
1146
- */
1147
-static const struct cpumask *shared_cache_mask(int cpu)
1148
-{
1149
- return cpu_l2_cache_mask(cpu);
1150
-}
1151
-
1152
-static struct sched_domain_topology_level power9_topology[] = {
1153
-#ifdef CONFIG_SCHED_SMT
1154
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
1155
-#endif
1156
- { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
1157
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
1158
- { NULL, },
1159
-};
11601576
11611577 void __init smp_cpus_done(unsigned int max_cpus)
11621578 {
....@@ -1169,24 +1585,10 @@
11691585 if (smp_ops && smp_ops->bringup_done)
11701586 smp_ops->bringup_done();
11711587
1172
- /*
1173
- * On a shared LPAR, associativity needs to be requested.
1174
- * Hence, get numa topology before dumping cpu topology
1175
- */
1176
- shared_proc_topology_init();
11771588 dump_numa_cpu_topology();
11781589
1179
- /*
1180
- * If any CPU detects that it's sharing a cache with another CPU then
1181
- * use the deeper topology that is aware of this sharing.
1182
- */
1183
- if (shared_caches) {
1184
- pr_info("Using shared cache scheduler topology\n");
1185
- set_sched_topology(power9_topology);
1186
- } else {
1187
- pr_info("Using standard scheduler topology\n");
1188
- set_sched_topology(powerpc_topology);
1189
- }
1590
+ fixup_topology();
1591
+ set_sched_topology(powerpc_topology);
11901592 }
11911593
11921594 #ifdef CONFIG_HOTPLUG_CPU
....@@ -1216,7 +1618,7 @@
12161618 smp_ops->cpu_die(cpu);
12171619 }
12181620
1219
-void cpu_die(void)
1621
+void arch_cpu_idle_dead(void)
12201622 {
12211623 /*
12221624 * Disable on the down path. This will be re-enabled by
....@@ -1224,8 +1626,8 @@
12241626 */
12251627 this_cpu_disable_ftrace();
12261628
1227
- if (ppc_md.cpu_die)
1228
- ppc_md.cpu_die();
1629
+ if (smp_ops->cpu_offline_self)
1630
+ smp_ops->cpu_offline_self();
12291631
12301632 /* If we return, we re-enter start_secondary */
12311633 start_secondary_resume();