.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Architecture specific (i386/x86_64) functions for kexec based crash dumps. |
---|
3 | 4 | * |
---|
.. | .. |
---|
23 | 24 | #include <linux/export.h> |
---|
24 | 25 | #include <linux/slab.h> |
---|
25 | 26 | #include <linux/vmalloc.h> |
---|
| 27 | +#include <linux/memblock.h> |
---|
26 | 28 | |
---|
27 | 29 | #include <asm/processor.h> |
---|
28 | 30 | #include <asm/hardirq.h> |
---|
.. | .. |
---|
35 | 37 | #include <linux/kdebug.h> |
---|
36 | 38 | #include <asm/cpu.h> |
---|
37 | 39 | #include <asm/reboot.h> |
---|
38 | | -#include <asm/virtext.h> |
---|
39 | 40 | #include <asm/intel_pt.h> |
---|
| 41 | +#include <asm/crash.h> |
---|
| 42 | +#include <asm/cmdline.h> |
---|
40 | 43 | |
---|
41 | 44 | /* Used while preparing memory map entries for second kernel */ |
---|
42 | 45 | struct crash_memmap_data { |
---|
.. | .. |
---|
54 | 57 | */ |
---|
55 | 58 | crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL; |
---|
56 | 59 | EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); |
---|
57 | | -unsigned long crash_zero_bytes; |
---|
58 | 60 | |
---|
59 | 61 | static inline void cpu_crash_vmclear_loaded_vmcss(void) |
---|
60 | 62 | { |
---|
.. | .. |
---|
67 | 69 | rcu_read_unlock(); |
---|
68 | 70 | } |
---|
69 | 71 | |
---|
| 72 | +/* |
---|
| 73 | + * When the crashkernel option is specified, only use the low |
---|
| 74 | + * 1M for the real mode trampoline. |
---|
| 75 | + */ |
---|
| 76 | +void __init crash_reserve_low_1M(void) |
---|
| 77 | +{ |
---|
| 78 | + if (cmdline_find_option(boot_command_line, "crashkernel", NULL, 0) < 0) |
---|
| 79 | + return; |
---|
| 80 | + |
---|
| 81 | + memblock_reserve(0, 1<<20); |
---|
| 82 | + pr_info("Reserving the low 1M of memory for crashkernel\n"); |
---|
| 83 | +} |
---|
| 84 | + |
---|
70 | 85 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
---|
71 | 86 | |
---|
72 | 87 | static void kdump_nmi_callback(int cpu, struct pt_regs *regs) |
---|
73 | 88 | { |
---|
74 | | -#ifdef CONFIG_X86_32 |
---|
75 | | - struct pt_regs fixed_regs; |
---|
76 | | - |
---|
77 | | - if (!user_mode(regs)) { |
---|
78 | | - crash_fixup_ss_esp(&fixed_regs, regs); |
---|
79 | | - regs = &fixed_regs; |
---|
80 | | - } |
---|
81 | | -#endif |
---|
82 | 89 | crash_save_cpu(regs, cpu); |
---|
83 | 90 | |
---|
84 | 91 | /* |
---|
85 | 92 | * VMCLEAR VMCSs loaded on all cpus if needed. |
---|
86 | 93 | */ |
---|
87 | 94 | cpu_crash_vmclear_loaded_vmcss(); |
---|
88 | | - |
---|
89 | | - /* Disable VMX or SVM if needed. |
---|
90 | | - * |
---|
91 | | - * We need to disable virtualization on all CPUs. |
---|
92 | | - * Having VMX or SVM enabled on any CPU may break rebooting |
---|
93 | | - * after the kdump kernel has finished its task. |
---|
94 | | - */ |
---|
95 | | - cpu_emergency_vmxoff(); |
---|
96 | | - cpu_emergency_svm_disable(); |
---|
97 | 95 | |
---|
98 | 96 | /* |
---|
99 | 97 | * Disable Intel PT to stop its logging |
---|
.. | .. |
---|
153 | 151 | */ |
---|
154 | 152 | cpu_crash_vmclear_loaded_vmcss(); |
---|
155 | 153 | |
---|
156 | | - /* Booting kdump kernel with VMX or SVM enabled won't work, |
---|
157 | | - * because (among other limitations) we can't disable paging |
---|
158 | | - * with the virt flags. |
---|
159 | | - */ |
---|
160 | | - cpu_emergency_vmxoff(); |
---|
161 | | - cpu_emergency_svm_disable(); |
---|
| 154 | + cpu_emergency_disable_virtualization(); |
---|
162 | 155 | |
---|
163 | 156 | /* |
---|
164 | 157 | * Disable Intel PT to stop its logging |
---|
.. | .. |
---|
179 | 172 | } |
---|
180 | 173 | |
---|
181 | 174 | #ifdef CONFIG_KEXEC_FILE |
---|
| 175 | + |
---|
182 | 176 | static int get_nr_ram_ranges_callback(struct resource *res, void *arg) |
---|
183 | 177 | { |
---|
184 | 178 | unsigned int *nr_ranges = arg; |
---|
.. | .. |
---|
193 | 187 | unsigned int nr_ranges = 0; |
---|
194 | 188 | struct crash_mem *cmem; |
---|
195 | 189 | |
---|
196 | | - walk_system_ram_res(0, -1, &nr_ranges, |
---|
197 | | - get_nr_ram_ranges_callback); |
---|
| 190 | + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); |
---|
198 | 191 | if (!nr_ranges) |
---|
199 | 192 | return NULL; |
---|
200 | 193 | |
---|
.. | .. |
---|
203 | 196 | * another range split. So add extra two slots here. |
---|
204 | 197 | */ |
---|
205 | 198 | nr_ranges += 2; |
---|
206 | | - cmem = vzalloc(sizeof(struct crash_mem) + |
---|
207 | | - sizeof(struct crash_mem_range) * nr_ranges); |
---|
| 199 | + cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); |
---|
208 | 200 | if (!cmem) |
---|
209 | 201 | return NULL; |
---|
210 | 202 | |
---|
.. | .. |
---|
222 | 214 | { |
---|
223 | 215 | int ret = 0; |
---|
224 | 216 | |
---|
| 217 | + /* Exclude the low 1M because it is always reserved */ |
---|
| 218 | + ret = crash_exclude_mem_range(cmem, 0, (1<<20)-1); |
---|
| 219 | + if (ret) |
---|
| 220 | + return ret; |
---|
| 221 | + |
---|
225 | 222 | /* Exclude crashkernel region */ |
---|
226 | 223 | ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); |
---|
227 | 224 | if (ret) |
---|
228 | 225 | return ret; |
---|
229 | 226 | |
---|
230 | | - if (crashk_low_res.end) { |
---|
| 227 | + if (crashk_low_res.end) |
---|
231 | 228 | ret = crash_exclude_mem_range(cmem, crashk_low_res.start, |
---|
232 | | - crashk_low_res.end); |
---|
233 | | - if (ret) |
---|
234 | | - return ret; |
---|
235 | | - } |
---|
| 229 | + crashk_low_res.end); |
---|
236 | 230 | |
---|
237 | 231 | return ret; |
---|
238 | 232 | } |
---|
.. | .. |
---|
253 | 247 | unsigned long *sz) |
---|
254 | 248 | { |
---|
255 | 249 | struct crash_mem *cmem; |
---|
256 | | - Elf64_Ehdr *ehdr; |
---|
257 | | - Elf64_Phdr *phdr; |
---|
258 | | - int ret, i; |
---|
| 250 | + int ret; |
---|
259 | 251 | |
---|
260 | 252 | cmem = fill_up_crash_elf_data(); |
---|
261 | 253 | if (!cmem) |
---|
262 | 254 | return -ENOMEM; |
---|
263 | 255 | |
---|
264 | | - ret = walk_system_ram_res(0, -1, cmem, |
---|
265 | | - prepare_elf64_ram_headers_callback); |
---|
| 256 | + ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); |
---|
266 | 257 | if (ret) |
---|
267 | 258 | goto out; |
---|
268 | 259 | |
---|
.. | .. |
---|
272 | 263 | goto out; |
---|
273 | 264 | |
---|
274 | 265 | /* By default prepare 64bit headers */ |
---|
275 | | - ret = crash_prepare_elf64_headers(cmem, |
---|
276 | | - IS_ENABLED(CONFIG_X86_64), addr, sz); |
---|
277 | | - if (ret) |
---|
278 | | - goto out; |
---|
| 266 | + ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz); |
---|
279 | 267 | |
---|
280 | | - /* |
---|
281 | | - * If a range matches backup region, adjust offset to backup |
---|
282 | | - * segment. |
---|
283 | | - */ |
---|
284 | | - ehdr = (Elf64_Ehdr *)*addr; |
---|
285 | | - phdr = (Elf64_Phdr *)(ehdr + 1); |
---|
286 | | - for (i = 0; i < ehdr->e_phnum; phdr++, i++) |
---|
287 | | - if (phdr->p_type == PT_LOAD && |
---|
288 | | - phdr->p_paddr == image->arch.backup_src_start && |
---|
289 | | - phdr->p_memsz == image->arch.backup_src_sz) { |
---|
290 | | - phdr->p_offset = image->arch.backup_load_addr; |
---|
291 | | - break; |
---|
292 | | - } |
---|
293 | 268 | out: |
---|
294 | 269 | vfree(cmem); |
---|
295 | 270 | return ret; |
---|
.. | .. |
---|
303 | 278 | if (nr_e820_entries >= E820_MAX_ENTRIES_ZEROPAGE) |
---|
304 | 279 | return 1; |
---|
305 | 280 | |
---|
306 | | - memcpy(¶ms->e820_table[nr_e820_entries], entry, |
---|
307 | | - sizeof(struct e820_entry)); |
---|
| 281 | + memcpy(¶ms->e820_table[nr_e820_entries], entry, sizeof(struct e820_entry)); |
---|
308 | 282 | params->e820_entries++; |
---|
309 | 283 | return 0; |
---|
310 | 284 | } |
---|
.. | .. |
---|
328 | 302 | unsigned long long mend) |
---|
329 | 303 | { |
---|
330 | 304 | unsigned long start, end; |
---|
331 | | - int ret = 0; |
---|
332 | 305 | |
---|
333 | 306 | cmem->ranges[0].start = mstart; |
---|
334 | 307 | cmem->ranges[0].end = mend; |
---|
335 | 308 | cmem->nr_ranges = 1; |
---|
336 | | - |
---|
337 | | - /* Exclude Backup region */ |
---|
338 | | - start = image->arch.backup_load_addr; |
---|
339 | | - end = start + image->arch.backup_src_sz - 1; |
---|
340 | | - ret = crash_exclude_mem_range(cmem, start, end); |
---|
341 | | - if (ret) |
---|
342 | | - return ret; |
---|
343 | 309 | |
---|
344 | 310 | /* Exclude elf header region */ |
---|
345 | 311 | start = image->arch.elf_load_addr; |
---|
.. | .. |
---|
363 | 329 | memset(&cmd, 0, sizeof(struct crash_memmap_data)); |
---|
364 | 330 | cmd.params = params; |
---|
365 | 331 | |
---|
366 | | - /* Add first 640K segment */ |
---|
367 | | - ei.addr = image->arch.backup_src_start; |
---|
368 | | - ei.size = image->arch.backup_src_sz; |
---|
369 | | - ei.type = E820_TYPE_RAM; |
---|
370 | | - add_e820_entry(params, &ei); |
---|
| 332 | + /* Add the low 1M */ |
---|
| 333 | + cmd.type = E820_TYPE_RAM; |
---|
| 334 | + flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; |
---|
| 335 | + walk_iomem_res_desc(IORES_DESC_NONE, flags, 0, (1<<20)-1, &cmd, |
---|
| 336 | + memmap_entry_callback); |
---|
371 | 337 | |
---|
372 | 338 | /* Add ACPI tables */ |
---|
373 | 339 | cmd.type = E820_TYPE_ACPI; |
---|
374 | 340 | flags = IORESOURCE_MEM | IORESOURCE_BUSY; |
---|
375 | 341 | walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd, |
---|
376 | | - memmap_entry_callback); |
---|
| 342 | + memmap_entry_callback); |
---|
377 | 343 | |
---|
378 | 344 | /* Add ACPI Non-volatile Storage */ |
---|
379 | 345 | cmd.type = E820_TYPE_NVS; |
---|
380 | 346 | walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd, |
---|
381 | | - memmap_entry_callback); |
---|
| 347 | + memmap_entry_callback); |
---|
| 348 | + |
---|
| 349 | + /* Add e820 reserved ranges */ |
---|
| 350 | + cmd.type = E820_TYPE_RESERVED; |
---|
| 351 | + flags = IORESOURCE_MEM; |
---|
| 352 | + walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd, |
---|
| 353 | + memmap_entry_callback); |
---|
382 | 354 | |
---|
383 | 355 | /* Add crashk_low_res region */ |
---|
384 | 356 | if (crashk_low_res.end) { |
---|
385 | 357 | ei.addr = crashk_low_res.start; |
---|
386 | | - ei.size = crashk_low_res.end - crashk_low_res.start + 1; |
---|
| 358 | + ei.size = resource_size(&crashk_low_res); |
---|
387 | 359 | ei.type = E820_TYPE_RAM; |
---|
388 | 360 | add_e820_entry(params, &ei); |
---|
389 | 361 | } |
---|
390 | 362 | |
---|
391 | 363 | /* Exclude some ranges from crashk_res and add rest to memmap */ |
---|
392 | | - ret = memmap_exclude_ranges(image, cmem, crashk_res.start, |
---|
393 | | - crashk_res.end); |
---|
| 364 | + ret = memmap_exclude_ranges(image, cmem, crashk_res.start, crashk_res.end); |
---|
394 | 365 | if (ret) |
---|
395 | 366 | goto out; |
---|
396 | 367 | |
---|
.. | .. |
---|
410 | 381 | return ret; |
---|
411 | 382 | } |
---|
412 | 383 | |
---|
413 | | -static int determine_backup_region(struct resource *res, void *arg) |
---|
414 | | -{ |
---|
415 | | - struct kimage *image = arg; |
---|
416 | | - |
---|
417 | | - image->arch.backup_src_start = res->start; |
---|
418 | | - image->arch.backup_src_sz = resource_size(res); |
---|
419 | | - |
---|
420 | | - /* Expecting only one range for backup region */ |
---|
421 | | - return 1; |
---|
422 | | -} |
---|
423 | | - |
---|
424 | 384 | int crash_load_segments(struct kimage *image) |
---|
425 | 385 | { |
---|
426 | 386 | int ret; |
---|
427 | 387 | struct kexec_buf kbuf = { .image = image, .buf_min = 0, |
---|
428 | 388 | .buf_max = ULONG_MAX, .top_down = false }; |
---|
429 | | - |
---|
430 | | - /* |
---|
431 | | - * Determine and load a segment for backup area. First 640K RAM |
---|
432 | | - * region is backup source |
---|
433 | | - */ |
---|
434 | | - |
---|
435 | | - ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END, |
---|
436 | | - image, determine_backup_region); |
---|
437 | | - |
---|
438 | | - /* Zero or postive return values are ok */ |
---|
439 | | - if (ret < 0) |
---|
440 | | - return ret; |
---|
441 | | - |
---|
442 | | - /* Add backup segment. */ |
---|
443 | | - if (image->arch.backup_src_sz) { |
---|
444 | | - kbuf.buffer = &crash_zero_bytes; |
---|
445 | | - kbuf.bufsz = sizeof(crash_zero_bytes); |
---|
446 | | - kbuf.memsz = image->arch.backup_src_sz; |
---|
447 | | - kbuf.buf_align = PAGE_SIZE; |
---|
448 | | - /* |
---|
449 | | - * Ideally there is no source for backup segment. This is |
---|
450 | | - * copied in purgatory after crash. Just add a zero filled |
---|
451 | | - * segment for now to make sure checksum logic works fine. |
---|
452 | | - */ |
---|
453 | | - ret = kexec_add_buffer(&kbuf); |
---|
454 | | - if (ret) |
---|
455 | | - return ret; |
---|
456 | | - image->arch.backup_load_addr = kbuf.mem; |
---|
457 | | - pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n", |
---|
458 | | - image->arch.backup_load_addr, |
---|
459 | | - image->arch.backup_src_start, kbuf.memsz); |
---|
460 | | - } |
---|
461 | 389 | |
---|
462 | 390 | /* Prepare elf headers and add a segment */ |
---|
463 | 391 | ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz); |
---|
.. | .. |
---|
469 | 397 | |
---|
470 | 398 | kbuf.memsz = kbuf.bufsz; |
---|
471 | 399 | kbuf.buf_align = ELF_CORE_HEADER_ALIGN; |
---|
| 400 | + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; |
---|
472 | 401 | ret = kexec_add_buffer(&kbuf); |
---|
473 | 402 | if (ret) { |
---|
474 | 403 | vfree((void *)image->arch.elf_headers); |
---|