| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Architecture specific (i386/x86_64) functions for kexec based crash dumps. |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 23 | 24 | #include <linux/export.h> |
|---|
| 24 | 25 | #include <linux/slab.h> |
|---|
| 25 | 26 | #include <linux/vmalloc.h> |
|---|
| 27 | +#include <linux/memblock.h> |
|---|
| 26 | 28 | |
|---|
| 27 | 29 | #include <asm/processor.h> |
|---|
| 28 | 30 | #include <asm/hardirq.h> |
|---|
| .. | .. |
|---|
| 35 | 37 | #include <linux/kdebug.h> |
|---|
| 36 | 38 | #include <asm/cpu.h> |
|---|
| 37 | 39 | #include <asm/reboot.h> |
|---|
| 38 | | -#include <asm/virtext.h> |
|---|
| 39 | 40 | #include <asm/intel_pt.h> |
|---|
| 41 | +#include <asm/crash.h> |
|---|
| 42 | +#include <asm/cmdline.h> |
|---|
| 40 | 43 | |
|---|
| 41 | 44 | /* Used while preparing memory map entries for second kernel */ |
|---|
| 42 | 45 | struct crash_memmap_data { |
|---|
| .. | .. |
|---|
| 54 | 57 | */ |
|---|
| 55 | 58 | crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL; |
|---|
| 56 | 59 | EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); |
|---|
| 57 | | -unsigned long crash_zero_bytes; |
|---|
| 58 | 60 | |
|---|
| 59 | 61 | static inline void cpu_crash_vmclear_loaded_vmcss(void) |
|---|
| 60 | 62 | { |
|---|
| .. | .. |
|---|
| 67 | 69 | rcu_read_unlock(); |
|---|
| 68 | 70 | } |
|---|
| 69 | 71 | |
|---|
| 72 | +/* |
|---|
| 73 | + * When the crashkernel option is specified, only use the low |
|---|
| 74 | + * 1M for the real mode trampoline. |
|---|
| 75 | + */ |
|---|
| 76 | +void __init crash_reserve_low_1M(void) |
|---|
| 77 | +{ |
|---|
| 78 | + if (cmdline_find_option(boot_command_line, "crashkernel", NULL, 0) < 0) |
|---|
| 79 | + return; |
|---|
| 80 | + |
|---|
| 81 | + memblock_reserve(0, 1<<20); |
|---|
| 82 | + pr_info("Reserving the low 1M of memory for crashkernel\n"); |
|---|
| 83 | +} |
|---|
| 84 | + |
|---|
| 70 | 85 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
|---|
| 71 | 86 | |
|---|
| 72 | 87 | static void kdump_nmi_callback(int cpu, struct pt_regs *regs) |
|---|
| 73 | 88 | { |
|---|
| 74 | | -#ifdef CONFIG_X86_32 |
|---|
| 75 | | - struct pt_regs fixed_regs; |
|---|
| 76 | | - |
|---|
| 77 | | - if (!user_mode(regs)) { |
|---|
| 78 | | - crash_fixup_ss_esp(&fixed_regs, regs); |
|---|
| 79 | | - regs = &fixed_regs; |
|---|
| 80 | | - } |
|---|
| 81 | | -#endif |
|---|
| 82 | 89 | crash_save_cpu(regs, cpu); |
|---|
| 83 | 90 | |
|---|
| 84 | 91 | /* |
|---|
| 85 | 92 | * VMCLEAR VMCSs loaded on all cpus if needed. |
|---|
| 86 | 93 | */ |
|---|
| 87 | 94 | cpu_crash_vmclear_loaded_vmcss(); |
|---|
| 88 | | - |
|---|
| 89 | | - /* Disable VMX or SVM if needed. |
|---|
| 90 | | - * |
|---|
| 91 | | - * We need to disable virtualization on all CPUs. |
|---|
| 92 | | - * Having VMX or SVM enabled on any CPU may break rebooting |
|---|
| 93 | | - * after the kdump kernel has finished its task. |
|---|
| 94 | | - */ |
|---|
| 95 | | - cpu_emergency_vmxoff(); |
|---|
| 96 | | - cpu_emergency_svm_disable(); |
|---|
| 97 | 95 | |
|---|
| 98 | 96 | /* |
|---|
| 99 | 97 | * Disable Intel PT to stop its logging |
|---|
| .. | .. |
|---|
| 153 | 151 | */ |
|---|
| 154 | 152 | cpu_crash_vmclear_loaded_vmcss(); |
|---|
| 155 | 153 | |
|---|
| 156 | | - /* Booting kdump kernel with VMX or SVM enabled won't work, |
|---|
| 157 | | - * because (among other limitations) we can't disable paging |
|---|
| 158 | | - * with the virt flags. |
|---|
| 159 | | - */ |
|---|
| 160 | | - cpu_emergency_vmxoff(); |
|---|
| 161 | | - cpu_emergency_svm_disable(); |
|---|
| 154 | + cpu_emergency_disable_virtualization(); |
|---|
| 162 | 155 | |
|---|
| 163 | 156 | /* |
|---|
| 164 | 157 | * Disable Intel PT to stop its logging |
|---|
| .. | .. |
|---|
| 179 | 172 | } |
|---|
| 180 | 173 | |
|---|
| 181 | 174 | #ifdef CONFIG_KEXEC_FILE |
|---|
| 175 | + |
|---|
| 182 | 176 | static int get_nr_ram_ranges_callback(struct resource *res, void *arg) |
|---|
| 183 | 177 | { |
|---|
| 184 | 178 | unsigned int *nr_ranges = arg; |
|---|
| .. | .. |
|---|
| 193 | 187 | unsigned int nr_ranges = 0; |
|---|
| 194 | 188 | struct crash_mem *cmem; |
|---|
| 195 | 189 | |
|---|
| 196 | | - walk_system_ram_res(0, -1, &nr_ranges, |
|---|
| 197 | | - get_nr_ram_ranges_callback); |
|---|
| 190 | + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); |
|---|
| 198 | 191 | if (!nr_ranges) |
|---|
| 199 | 192 | return NULL; |
|---|
| 200 | 193 | |
|---|
| .. | .. |
|---|
| 203 | 196 | * another range split. So add extra two slots here. |
|---|
| 204 | 197 | */ |
|---|
| 205 | 198 | nr_ranges += 2; |
|---|
| 206 | | - cmem = vzalloc(sizeof(struct crash_mem) + |
|---|
| 207 | | - sizeof(struct crash_mem_range) * nr_ranges); |
|---|
| 199 | + cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); |
|---|
| 208 | 200 | if (!cmem) |
|---|
| 209 | 201 | return NULL; |
|---|
| 210 | 202 | |
|---|
| .. | .. |
|---|
| 222 | 214 | { |
|---|
| 223 | 215 | int ret = 0; |
|---|
| 224 | 216 | |
|---|
| 217 | + /* Exclude the low 1M because it is always reserved */ |
|---|
| 218 | + ret = crash_exclude_mem_range(cmem, 0, (1<<20)-1); |
|---|
| 219 | + if (ret) |
|---|
| 220 | + return ret; |
|---|
| 221 | + |
|---|
| 225 | 222 | /* Exclude crashkernel region */ |
|---|
| 226 | 223 | ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); |
|---|
| 227 | 224 | if (ret) |
|---|
| 228 | 225 | return ret; |
|---|
| 229 | 226 | |
|---|
| 230 | | - if (crashk_low_res.end) { |
|---|
| 227 | + if (crashk_low_res.end) |
|---|
| 231 | 228 | ret = crash_exclude_mem_range(cmem, crashk_low_res.start, |
|---|
| 232 | | - crashk_low_res.end); |
|---|
| 233 | | - if (ret) |
|---|
| 234 | | - return ret; |
|---|
| 235 | | - } |
|---|
| 229 | + crashk_low_res.end); |
|---|
| 236 | 230 | |
|---|
| 237 | 231 | return ret; |
|---|
| 238 | 232 | } |
|---|
| .. | .. |
|---|
| 253 | 247 | unsigned long *sz) |
|---|
| 254 | 248 | { |
|---|
| 255 | 249 | struct crash_mem *cmem; |
|---|
| 256 | | - Elf64_Ehdr *ehdr; |
|---|
| 257 | | - Elf64_Phdr *phdr; |
|---|
| 258 | | - int ret, i; |
|---|
| 250 | + int ret; |
|---|
| 259 | 251 | |
|---|
| 260 | 252 | cmem = fill_up_crash_elf_data(); |
|---|
| 261 | 253 | if (!cmem) |
|---|
| 262 | 254 | return -ENOMEM; |
|---|
| 263 | 255 | |
|---|
| 264 | | - ret = walk_system_ram_res(0, -1, cmem, |
|---|
| 265 | | - prepare_elf64_ram_headers_callback); |
|---|
| 256 | + ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); |
|---|
| 266 | 257 | if (ret) |
|---|
| 267 | 258 | goto out; |
|---|
| 268 | 259 | |
|---|
| .. | .. |
|---|
| 272 | 263 | goto out; |
|---|
| 273 | 264 | |
|---|
| 274 | 265 | /* By default prepare 64bit headers */ |
|---|
| 275 | | - ret = crash_prepare_elf64_headers(cmem, |
|---|
| 276 | | - IS_ENABLED(CONFIG_X86_64), addr, sz); |
|---|
| 277 | | - if (ret) |
|---|
| 278 | | - goto out; |
|---|
| 266 | + ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz); |
|---|
| 279 | 267 | |
|---|
| 280 | | - /* |
|---|
| 281 | | - * If a range matches backup region, adjust offset to backup |
|---|
| 282 | | - * segment. |
|---|
| 283 | | - */ |
|---|
| 284 | | - ehdr = (Elf64_Ehdr *)*addr; |
|---|
| 285 | | - phdr = (Elf64_Phdr *)(ehdr + 1); |
|---|
| 286 | | - for (i = 0; i < ehdr->e_phnum; phdr++, i++) |
|---|
| 287 | | - if (phdr->p_type == PT_LOAD && |
|---|
| 288 | | - phdr->p_paddr == image->arch.backup_src_start && |
|---|
| 289 | | - phdr->p_memsz == image->arch.backup_src_sz) { |
|---|
| 290 | | - phdr->p_offset = image->arch.backup_load_addr; |
|---|
| 291 | | - break; |
|---|
| 292 | | - } |
|---|
| 293 | 268 | out: |
|---|
| 294 | 269 | vfree(cmem); |
|---|
| 295 | 270 | return ret; |
|---|
| .. | .. |
|---|
| 303 | 278 | if (nr_e820_entries >= E820_MAX_ENTRIES_ZEROPAGE) |
|---|
| 304 | 279 | return 1; |
|---|
| 305 | 280 | |
|---|
| 306 | | - memcpy(¶ms->e820_table[nr_e820_entries], entry, |
|---|
| 307 | | - sizeof(struct e820_entry)); |
|---|
| 281 | + memcpy(¶ms->e820_table[nr_e820_entries], entry, sizeof(struct e820_entry)); |
|---|
| 308 | 282 | params->e820_entries++; |
|---|
| 309 | 283 | return 0; |
|---|
| 310 | 284 | } |
|---|
| .. | .. |
|---|
| 328 | 302 | unsigned long long mend) |
|---|
| 329 | 303 | { |
|---|
| 330 | 304 | unsigned long start, end; |
|---|
| 331 | | - int ret = 0; |
|---|
| 332 | 305 | |
|---|
| 333 | 306 | cmem->ranges[0].start = mstart; |
|---|
| 334 | 307 | cmem->ranges[0].end = mend; |
|---|
| 335 | 308 | cmem->nr_ranges = 1; |
|---|
| 336 | | - |
|---|
| 337 | | - /* Exclude Backup region */ |
|---|
| 338 | | - start = image->arch.backup_load_addr; |
|---|
| 339 | | - end = start + image->arch.backup_src_sz - 1; |
|---|
| 340 | | - ret = crash_exclude_mem_range(cmem, start, end); |
|---|
| 341 | | - if (ret) |
|---|
| 342 | | - return ret; |
|---|
| 343 | 309 | |
|---|
| 344 | 310 | /* Exclude elf header region */ |
|---|
| 345 | 311 | start = image->arch.elf_load_addr; |
|---|
| .. | .. |
|---|
| 363 | 329 | memset(&cmd, 0, sizeof(struct crash_memmap_data)); |
|---|
| 364 | 330 | cmd.params = params; |
|---|
| 365 | 331 | |
|---|
| 366 | | - /* Add first 640K segment */ |
|---|
| 367 | | - ei.addr = image->arch.backup_src_start; |
|---|
| 368 | | - ei.size = image->arch.backup_src_sz; |
|---|
| 369 | | - ei.type = E820_TYPE_RAM; |
|---|
| 370 | | - add_e820_entry(params, &ei); |
|---|
| 332 | + /* Add the low 1M */ |
|---|
| 333 | + cmd.type = E820_TYPE_RAM; |
|---|
| 334 | + flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; |
|---|
| 335 | + walk_iomem_res_desc(IORES_DESC_NONE, flags, 0, (1<<20)-1, &cmd, |
|---|
| 336 | + memmap_entry_callback); |
|---|
| 371 | 337 | |
|---|
| 372 | 338 | /* Add ACPI tables */ |
|---|
| 373 | 339 | cmd.type = E820_TYPE_ACPI; |
|---|
| 374 | 340 | flags = IORESOURCE_MEM | IORESOURCE_BUSY; |
|---|
| 375 | 341 | walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd, |
|---|
| 376 | | - memmap_entry_callback); |
|---|
| 342 | + memmap_entry_callback); |
|---|
| 377 | 343 | |
|---|
| 378 | 344 | /* Add ACPI Non-volatile Storage */ |
|---|
| 379 | 345 | cmd.type = E820_TYPE_NVS; |
|---|
| 380 | 346 | walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd, |
|---|
| 381 | | - memmap_entry_callback); |
|---|
| 347 | + memmap_entry_callback); |
|---|
| 348 | + |
|---|
| 349 | + /* Add e820 reserved ranges */ |
|---|
| 350 | + cmd.type = E820_TYPE_RESERVED; |
|---|
| 351 | + flags = IORESOURCE_MEM; |
|---|
| 352 | + walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd, |
|---|
| 353 | + memmap_entry_callback); |
|---|
| 382 | 354 | |
|---|
| 383 | 355 | /* Add crashk_low_res region */ |
|---|
| 384 | 356 | if (crashk_low_res.end) { |
|---|
| 385 | 357 | ei.addr = crashk_low_res.start; |
|---|
| 386 | | - ei.size = crashk_low_res.end - crashk_low_res.start + 1; |
|---|
| 358 | + ei.size = resource_size(&crashk_low_res); |
|---|
| 387 | 359 | ei.type = E820_TYPE_RAM; |
|---|
| 388 | 360 | add_e820_entry(params, &ei); |
|---|
| 389 | 361 | } |
|---|
| 390 | 362 | |
|---|
| 391 | 363 | /* Exclude some ranges from crashk_res and add rest to memmap */ |
|---|
| 392 | | - ret = memmap_exclude_ranges(image, cmem, crashk_res.start, |
|---|
| 393 | | - crashk_res.end); |
|---|
| 364 | + ret = memmap_exclude_ranges(image, cmem, crashk_res.start, crashk_res.end); |
|---|
| 394 | 365 | if (ret) |
|---|
| 395 | 366 | goto out; |
|---|
| 396 | 367 | |
|---|
| .. | .. |
|---|
| 410 | 381 | return ret; |
|---|
| 411 | 382 | } |
|---|
| 412 | 383 | |
|---|
| 413 | | -static int determine_backup_region(struct resource *res, void *arg) |
|---|
| 414 | | -{ |
|---|
| 415 | | - struct kimage *image = arg; |
|---|
| 416 | | - |
|---|
| 417 | | - image->arch.backup_src_start = res->start; |
|---|
| 418 | | - image->arch.backup_src_sz = resource_size(res); |
|---|
| 419 | | - |
|---|
| 420 | | - /* Expecting only one range for backup region */ |
|---|
| 421 | | - return 1; |
|---|
| 422 | | -} |
|---|
| 423 | | - |
|---|
| 424 | 384 | int crash_load_segments(struct kimage *image) |
|---|
| 425 | 385 | { |
|---|
| 426 | 386 | int ret; |
|---|
| 427 | 387 | struct kexec_buf kbuf = { .image = image, .buf_min = 0, |
|---|
| 428 | 388 | .buf_max = ULONG_MAX, .top_down = false }; |
|---|
| 429 | | - |
|---|
| 430 | | - /* |
|---|
| 431 | | - * Determine and load a segment for backup area. First 640K RAM |
|---|
| 432 | | - * region is backup source |
|---|
| 433 | | - */ |
|---|
| 434 | | - |
|---|
| 435 | | - ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END, |
|---|
| 436 | | - image, determine_backup_region); |
|---|
| 437 | | - |
|---|
| 438 | | - /* Zero or postive return values are ok */ |
|---|
| 439 | | - if (ret < 0) |
|---|
| 440 | | - return ret; |
|---|
| 441 | | - |
|---|
| 442 | | - /* Add backup segment. */ |
|---|
| 443 | | - if (image->arch.backup_src_sz) { |
|---|
| 444 | | - kbuf.buffer = &crash_zero_bytes; |
|---|
| 445 | | - kbuf.bufsz = sizeof(crash_zero_bytes); |
|---|
| 446 | | - kbuf.memsz = image->arch.backup_src_sz; |
|---|
| 447 | | - kbuf.buf_align = PAGE_SIZE; |
|---|
| 448 | | - /* |
|---|
| 449 | | - * Ideally there is no source for backup segment. This is |
|---|
| 450 | | - * copied in purgatory after crash. Just add a zero filled |
|---|
| 451 | | - * segment for now to make sure checksum logic works fine. |
|---|
| 452 | | - */ |
|---|
| 453 | | - ret = kexec_add_buffer(&kbuf); |
|---|
| 454 | | - if (ret) |
|---|
| 455 | | - return ret; |
|---|
| 456 | | - image->arch.backup_load_addr = kbuf.mem; |
|---|
| 457 | | - pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n", |
|---|
| 458 | | - image->arch.backup_load_addr, |
|---|
| 459 | | - image->arch.backup_src_start, kbuf.memsz); |
|---|
| 460 | | - } |
|---|
| 461 | 389 | |
|---|
| 462 | 390 | /* Prepare elf headers and add a segment */ |
|---|
| 463 | 391 | ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz); |
|---|
| .. | .. |
|---|
| 469 | 397 | |
|---|
| 470 | 398 | kbuf.memsz = kbuf.bufsz; |
|---|
| 471 | 399 | kbuf.buf_align = ELF_CORE_HEADER_ALIGN; |
|---|
| 400 | + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; |
|---|
| 472 | 401 | ret = kexec_add_buffer(&kbuf); |
|---|
| 473 | 402 | if (ret) { |
|---|
| 474 | 403 | vfree((void *)image->arch.elf_headers); |
|---|