hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/arch/s390/kernel/smp.c
....@@ -20,7 +20,7 @@
2020 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
2121
2222 #include <linux/workqueue.h>
23
-#include <linux/bootmem.h>
23
+#include <linux/memblock.h>
2424 #include <linux/export.h>
2525 #include <linux/init.h>
2626 #include <linux/mm.h>
....@@ -35,7 +35,6 @@
3535 #include <linux/sched/hotplug.h>
3636 #include <linux/sched/task_stack.h>
3737 #include <linux/crash_dump.h>
38
-#include <linux/memblock.h>
3938 #include <linux/kprobes.h>
4039 #include <asm/asm-offsets.h>
4140 #include <asm/diag.h>
....@@ -54,6 +53,7 @@
5453 #include <asm/sigp.h>
5554 #include <asm/idle.h>
5655 #include <asm/nmi.h>
56
+#include <asm/stacktrace.h>
5757 #include <asm/topology.h>
5858 #include "entry.h"
5959
....@@ -61,6 +61,7 @@
6161 ec_schedule = 0,
6262 ec_call_function_single,
6363 ec_stop_cpu,
64
+ ec_mcck_pending,
6465 };
6566
6667 enum {
....@@ -145,7 +146,7 @@
145146
146147 static inline int pcpu_stopped(struct pcpu *pcpu)
147148 {
148
- u32 uninitialized_var(status);
149
+ u32 status;
149150
150151 if (__pcpu_sigp(pcpu->address, SIGP_SENSE,
151152 0, &status) != SIGP_CC_STATUS_STORED)
....@@ -186,36 +187,37 @@
186187 pcpu_sigp_retry(pcpu, order, 0);
187188 }
188189
189
-#define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
190
-#define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
191
-
192190 static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
193191 {
194
- unsigned long async_stack, panic_stack;
192
+ unsigned long async_stack, nodat_stack;
195193 struct lowcore *lc;
196194
197195 if (pcpu != &pcpu_devices[0]) {
198196 pcpu->lowcore = (struct lowcore *)
199197 __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
200
- async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
201
- panic_stack = __get_free_page(GFP_KERNEL);
202
- if (!pcpu->lowcore || !panic_stack || !async_stack)
198
+ nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
199
+ if (!pcpu->lowcore || !nodat_stack)
203200 goto out;
204201 } else {
205
- async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
206
- panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
202
+ nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET;
207203 }
204
+ async_stack = stack_alloc();
205
+ if (!async_stack)
206
+ goto out;
208207 lc = pcpu->lowcore;
209208 memcpy(lc, &S390_lowcore, 512);
210209 memset((char *) lc + 512, 0, sizeof(*lc) - 512);
211
- lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
212
- lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
210
+ lc->async_stack = async_stack + STACK_INIT_OFFSET;
211
+ lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET;
213212 lc->cpu_nr = cpu;
214213 lc->spinlock_lockval = arch_spin_lockval(cpu);
215214 lc->spinlock_index = 0;
216215 lc->br_r1_trampoline = 0x07f1; /* br %r1 */
216
+ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
217
+ lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
218
+ lc->preempt_count = PREEMPT_DISABLED;
217219 if (nmi_alloc_per_cpu(lc))
218
- goto out;
220
+ goto out_async;
219221 if (vdso_alloc_per_cpu(lc))
220222 goto out_mcesa;
221223 lowcore_ptr[cpu] = lc;
....@@ -224,31 +226,34 @@
224226
225227 out_mcesa:
226228 nmi_free_per_cpu(lc);
229
+out_async:
230
+ stack_free(async_stack);
227231 out:
228232 if (pcpu != &pcpu_devices[0]) {
229
- free_page(panic_stack);
230
- free_pages(async_stack, ASYNC_ORDER);
233
+ free_pages(nodat_stack, THREAD_SIZE_ORDER);
231234 free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
232235 }
233236 return -ENOMEM;
234237 }
235238
236
-#ifdef CONFIG_HOTPLUG_CPU
237
-
238239 static void pcpu_free_lowcore(struct pcpu *pcpu)
239240 {
241
+ unsigned long async_stack, nodat_stack, lowcore;
242
+
243
+ nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET;
244
+ async_stack = pcpu->lowcore->async_stack - STACK_INIT_OFFSET;
245
+ lowcore = (unsigned long) pcpu->lowcore;
246
+
240247 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
241248 lowcore_ptr[pcpu - pcpu_devices] = NULL;
242249 vdso_free_per_cpu(pcpu->lowcore);
243250 nmi_free_per_cpu(pcpu->lowcore);
251
+ stack_free(async_stack);
244252 if (pcpu == &pcpu_devices[0])
245253 return;
246
- free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
247
- free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
248
- free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
254
+ free_pages(nodat_stack, THREAD_SIZE_ORDER);
255
+ free_pages(lowcore, LC_ORDER);
249256 }
250
-
251
-#endif /* CONFIG_HOTPLUG_CPU */
252257
253258 static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
254259 {
....@@ -263,7 +268,8 @@
263268 lc->kernel_asce = S390_lowcore.kernel_asce;
264269 lc->user_asce = S390_lowcore.kernel_asce;
265270 lc->machine_flags = S390_lowcore.machine_flags;
266
- lc->user_timer = lc->system_timer = lc->steal_timer = 0;
271
+ lc->user_timer = lc->system_timer =
272
+ lc->steal_timer = lc->avg_steal_timer = 0;
267273 __ctl_store(lc->cregs_save_area, 0, 15);
268274 lc->cregs_save_area[1] = lc->kernel_asce;
269275 lc->cregs_save_area[7] = lc->vdso_asce;
....@@ -296,7 +302,7 @@
296302 {
297303 struct lowcore *lc = pcpu->lowcore;
298304
299
- lc->restart_stack = lc->kernel_stack;
305
+ lc->restart_stack = lc->nodat_stack;
300306 lc->restart_fn = (unsigned long) func;
301307 lc->restart_data = (unsigned long) data;
302308 lc->restart_source = -1UL;
....@@ -306,15 +312,21 @@
306312 /*
307313 * Call function via PSW restart on pcpu and stop the current cpu.
308314 */
309
-static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
310
- void *data, unsigned long stack)
315
+static void __pcpu_delegate(void (*func)(void*), void *data)
316
+{
317
+ func(data); /* should not return */
318
+}
319
+
320
+static void __no_sanitize_address pcpu_delegate(struct pcpu *pcpu,
321
+ void (*func)(void *),
322
+ void *data, unsigned long stack)
311323 {
312324 struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
313325 unsigned long source_cpu = stap();
314326
315
- __load_psw_mask(PSW_KERNEL_BITS);
327
+ __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
316328 if (pcpu->address == source_cpu)
317
- func(data); /* should not return */
329
+ CALL_ON_STACK(__pcpu_delegate, stack, 2, func, data);
318330 /* Stop target cpu (if func returns this stops the current cpu). */
319331 pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
320332 /* Restart func on the target cpu and stop the current cpu. */
....@@ -380,7 +392,7 @@
380392 lc = &S390_lowcore;
381393
382394 pcpu_delegate(&pcpu_devices[0], func, data,
383
- lc->panic_stack - PANIC_FRAME_OFFSET + PAGE_SIZE);
395
+ lc->nodat_stack);
384396 }
385397
386398 int smp_find_processor_id(u16 address)
....@@ -391,6 +403,11 @@
391403 if (pcpu_devices[cpu].address == address)
392404 return cpu;
393405 return -1;
406
+}
407
+
408
+void schedule_mcck_handler(void)
409
+{
410
+ pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending);
394411 }
395412
396413 bool notrace arch_vcpu_is_preempted(int cpu)
....@@ -405,14 +422,11 @@
405422
406423 void notrace smp_yield_cpu(int cpu)
407424 {
408
- if (MACHINE_HAS_DIAG9C) {
409
- diag_stat_inc_norecursion(DIAG_STAT_X09C);
410
- asm volatile("diag %0,0,0x9c"
411
- : : "d" (pcpu_devices[cpu].address));
412
- } else if (MACHINE_HAS_DIAG44) {
413
- diag_stat_inc_norecursion(DIAG_STAT_X044);
414
- asm volatile("diag 0,0,0x44");
415
- }
425
+ if (!MACHINE_HAS_DIAG9C)
426
+ return;
427
+ diag_stat_inc_norecursion(DIAG_STAT_X09C);
428
+ asm volatile("diag %0,0,0x9c"
429
+ : : "d" (pcpu_devices[cpu].address));
416430 }
417431
418432 /*
....@@ -490,6 +504,8 @@
490504 scheduler_ipi();
491505 if (test_bit(ec_call_function_single, &bits))
492506 generic_smp_call_function_single_interrupt();
507
+ if (test_bit(ec_mcck_pending, &bits))
508
+ s390_handle_mcck();
493509 }
494510
495511 static void do_ext_call_interrupt(struct ext_code ext_code,
....@@ -591,14 +607,14 @@
591607 /*
592608 * Collect CPU state of the previous, crashed system.
593609 * There are four cases:
594
- * 1) standard zfcp dump
595
- * condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
610
+ * 1) standard zfcp/nvme dump
611
+ * condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true
596612 * The state for all CPUs except the boot CPU needs to be collected
597613 * with sigp stop-and-store-status. The boot CPU state is located in
598614 * the absolute lowcore of the memory stored in the HSA. The zcore code
599615 * will copy the boot CPU state from the HSA.
600
- * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory)
601
- * condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
616
+ * 2) stand-alone kdump for SCSI/NVMe (zfcp/nvme dump with swapped memory)
617
+ * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == true
602618 * The state for all CPUs except the boot CPU needs to be collected
603619 * with sigp stop-and-store-status. The firmware or the boot-loader
604620 * stored the registers of the boot CPU in the absolute lowcore in the
....@@ -645,11 +661,15 @@
645661 unsigned long page;
646662 bool is_boot_cpu;
647663
648
- if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP))
664
+ if (!(OLDMEM_BASE || is_ipl_type_dump()))
649665 /* No previous system present, normal boot. */
650666 return;
651667 /* Allocate a page as dumping area for the store status sigps */
652
- page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31);
668
+ page = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, 1UL << 31);
669
+ if (!page)
670
+ panic("ERROR: Failed to allocate %lx bytes below %lx\n",
671
+ PAGE_SIZE, 1UL << 31);
672
+
653673 /* Set multi-threading state to the previous system. */
654674 pcpu_set_smt(sclp.mtid_prev);
655675 boot_cpu_addr = stap();
....@@ -667,7 +687,7 @@
667687 /* Get the vector registers */
668688 smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page);
669689 /*
670
- * For a zfcp dump OLDMEM_BASE == NULL and the registers
690
+ * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers
671691 * of the boot CPU are stored in the HSA. To retrieve
672692 * these registers an SCLP request is required which is
673693 * done by drivers/s390/char/zcore.c:init_cpu_info()
....@@ -677,7 +697,7 @@
677697 smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
678698 }
679699 memblock_free(page, PAGE_SIZE);
680
- diag308_reset();
700
+ diag_dma_ops.diag308_reset();
681701 pcpu_set_smt(0);
682702 }
683703 #endif /* CONFIG_CRASH_DUMP */
....@@ -690,6 +710,11 @@
690710 int smp_cpu_get_polarization(int cpu)
691711 {
692712 return pcpu_devices[cpu].polarization;
713
+}
714
+
715
+int smp_cpu_get_cpu_address(int cpu)
716
+{
717
+ return pcpu_devices[cpu].address;
693718 }
694719
695720 static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
....@@ -786,7 +811,10 @@
786811 u16 address;
787812
788813 /* Get CPU information */
789
- info = memblock_virt_alloc(sizeof(*info), 8);
814
+ info = memblock_alloc(sizeof(*info), 8);
815
+ if (!info)
816
+ panic("%s: Failed to allocate %zu bytes align=0x%x\n",
817
+ __func__, sizeof(*info), 8);
790818 smp_get_core_info(info, 1);
791819 /* Find boot CPU type */
792820 if (sclp.has_core_type) {
....@@ -826,26 +854,16 @@
826854 memblock_free_early((unsigned long)info, sizeof(*info));
827855 }
828856
829
-/*
830
- * Activate a secondary processor.
831
- */
832
-static void smp_start_secondary(void *cpuvoid)
857
+static void smp_init_secondary(void)
833858 {
834859 int cpu = raw_smp_processor_id();
835860
836861 S390_lowcore.last_update_clock = get_tod_clock();
837
- S390_lowcore.restart_stack = (unsigned long) restart_stack;
838
- S390_lowcore.restart_fn = (unsigned long) do_restart;
839
- S390_lowcore.restart_data = 0;
840
- S390_lowcore.restart_source = -1UL;
841862 restore_access_regs(S390_lowcore.access_regs_save_area);
842
- __ctl_load(S390_lowcore.cregs_save_area, 0, 15);
843
- __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
844863 set_cpu_flag(CIF_ASCE_PRIMARY);
845864 set_cpu_flag(CIF_ASCE_SECONDARY);
846865 cpu_init();
847866 rcu_cpu_starting(cpu);
848
- preempt_disable();
849867 init_cpu_timer();
850868 vtime_init();
851869 pfault_init();
....@@ -855,9 +873,24 @@
855873 else
856874 clear_cpu_flag(CIF_DEDICATED_CPU);
857875 set_cpu_online(cpu, true);
876
+ update_cpu_masks();
858877 inc_irq_stat(CPU_RST);
859878 local_irq_enable();
860879 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
880
+}
881
+
882
+/*
883
+ * Activate a secondary processor.
884
+ */
885
+static void __no_sanitize_address smp_start_secondary(void *cpuvoid)
886
+{
887
+ S390_lowcore.restart_stack = (unsigned long) restart_stack;
888
+ S390_lowcore.restart_fn = (unsigned long) do_restart;
889
+ S390_lowcore.restart_data = 0;
890
+ S390_lowcore.restart_source = -1UL;
891
+ __ctl_load(S390_lowcore.cregs_save_area, 0, 15);
892
+ __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
893
+ CALL_ON_STACK_NORETURN(smp_init_secondary, S390_lowcore.kernel_stack);
861894 }
862895
863896 /* Upping and downing of CPUs */
....@@ -893,8 +926,6 @@
893926 }
894927 early_param("possible_cpus", _setup_possible_cpus);
895928
896
-#ifdef CONFIG_HOTPLUG_CPU
897
-
898929 int __cpu_disable(void)
899930 {
900931 unsigned long cregs[16];
....@@ -902,6 +933,7 @@
902933 /* Handle possible pending IPIs */
903934 smp_handle_ext_call();
904935 set_cpu_online(smp_processor_id(), false);
936
+ update_cpu_masks();
905937 /* Disable pseudo page faults on this cpu. */
906938 pfault_fini();
907939 /* Disable interrupt sources via control register. */
....@@ -934,8 +966,6 @@
934966 pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
935967 for (;;) ;
936968 }
937
-
938
-#endif /* CONFIG_HOTPLUG_CPU */
939969
940970 void __init smp_fill_possible_mask(void)
941971 {
....@@ -971,10 +1001,6 @@
9711001 smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
9721002 }
9731003
974
-void __init smp_cpus_done(unsigned int max_cpus)
975
-{
976
-}
977
-
9781004 void __init smp_setup_processor_id(void)
9791005 {
9801006 pcpu_devices[0].address = stap();
....@@ -994,7 +1020,6 @@
9941020 return 0;
9951021 }
9961022
997
-#ifdef CONFIG_HOTPLUG_CPU
9981023 static ssize_t cpu_configure_show(struct device *dev,
9991024 struct device_attribute *attr, char *buf)
10001025 {
....@@ -1071,7 +1096,6 @@
10711096 return rc ? rc : count;
10721097 }
10731098 static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
1074
-#endif /* CONFIG_HOTPLUG_CPU */
10751099
10761100 static ssize_t show_cpu_address(struct device *dev,
10771101 struct device_attribute *attr, char *buf)
....@@ -1081,9 +1105,7 @@
10811105 static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
10821106
10831107 static struct attribute *cpu_common_attrs[] = {
1084
-#ifdef CONFIG_HOTPLUG_CPU
10851108 &dev_attr_configure.attr,
1086
-#endif
10871109 &dev_attr_address.attr,
10881110 NULL,
10891111 };
....@@ -1108,6 +1130,7 @@
11081130
11091131 return sysfs_create_group(&s->kobj, &cpu_online_attr_group);
11101132 }
1133
+
11111134 static int smp_cpu_pre_down(unsigned int cpu)
11121135 {
11131136 struct device *s = &per_cpu(cpu_device, cpu)->dev;
....@@ -1142,14 +1165,10 @@
11421165 out_topology:
11431166 sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
11441167 out_cpu:
1145
-#ifdef CONFIG_HOTPLUG_CPU
11461168 unregister_cpu(c);
1147
-#endif
11481169 out:
11491170 return rc;
11501171 }
1151
-
1152
-#ifdef CONFIG_HOTPLUG_CPU
11531172
11541173 int __ref smp_rescan_cpus(void)
11551174 {
....@@ -1186,17 +1205,14 @@
11861205 return rc ? rc : count;
11871206 }
11881207 static DEVICE_ATTR_WO(rescan);
1189
-#endif /* CONFIG_HOTPLUG_CPU */
11901208
11911209 static int __init s390_smp_init(void)
11921210 {
11931211 int cpu, rc = 0;
11941212
1195
-#ifdef CONFIG_HOTPLUG_CPU
11961213 rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
11971214 if (rc)
11981215 return rc;
1199
-#endif
12001216 for_each_present_cpu(cpu) {
12011217 rc = smp_add_present_cpu(cpu);
12021218 if (rc)