| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | #define pr_fmt(fmt) "efi: " fmt |
|---|
| 2 | 3 | |
|---|
| 3 | 4 | #include <linux/init.h> |
|---|
| .. | .. |
|---|
| 8 | 9 | #include <linux/efi.h> |
|---|
| 9 | 10 | #include <linux/slab.h> |
|---|
| 10 | 11 | #include <linux/memblock.h> |
|---|
| 11 | | -#include <linux/bootmem.h> |
|---|
| 12 | 12 | #include <linux/acpi.h> |
|---|
| 13 | 13 | #include <linux/dmi.h> |
|---|
| 14 | 14 | |
|---|
| .. | .. |
|---|
| 16 | 16 | #include <asm/efi.h> |
|---|
| 17 | 17 | #include <asm/uv/uv.h> |
|---|
| 18 | 18 | #include <asm/cpu_device_id.h> |
|---|
| 19 | +#include <asm/realmode.h> |
|---|
| 20 | +#include <asm/reboot.h> |
|---|
| 19 | 21 | |
|---|
| 20 | 22 | #define EFI_MIN_RESERVE 5120 |
|---|
| 21 | 23 | |
|---|
| .. | .. |
|---|
| 242 | 244 | */ |
|---|
| 243 | 245 | void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) |
|---|
| 244 | 246 | { |
|---|
| 245 | | - phys_addr_t new_phys, new_size; |
|---|
| 247 | + struct efi_memory_map_data data = { 0 }; |
|---|
| 246 | 248 | struct efi_mem_range mr; |
|---|
| 247 | 249 | efi_memory_desc_t md; |
|---|
| 248 | 250 | int num_entries; |
|---|
| .. | .. |
|---|
| 270 | 272 | num_entries = efi_memmap_split_count(&md, &mr.range); |
|---|
| 271 | 273 | num_entries += efi.memmap.nr_map; |
|---|
| 272 | 274 | |
|---|
| 273 | | - new_size = efi.memmap.desc_size * num_entries; |
|---|
| 274 | | - |
|---|
| 275 | | - new_phys = efi_memmap_alloc(num_entries); |
|---|
| 276 | | - if (!new_phys) { |
|---|
| 275 | + if (efi_memmap_alloc(num_entries, &data) != 0) { |
|---|
| 277 | 276 | pr_err("Could not allocate boot services memmap\n"); |
|---|
| 278 | 277 | return; |
|---|
| 279 | 278 | } |
|---|
| 280 | 279 | |
|---|
| 281 | | - new = early_memremap_prot(new_phys, new_size, |
|---|
| 280 | + new = early_memremap_prot(data.phys_map, data.size, |
|---|
| 282 | 281 | pgprot_val(pgprot_encrypted(FIXMAP_PAGE_NORMAL))); |
|---|
| 283 | 282 | if (!new) { |
|---|
| 284 | 283 | pr_err("Failed to map new boot services memmap\n"); |
|---|
| .. | .. |
|---|
| 286 | 285 | } |
|---|
| 287 | 286 | |
|---|
| 288 | 287 | efi_memmap_insert(&efi.memmap, new, &mr); |
|---|
| 289 | | - early_memunmap(new, new_size); |
|---|
| 288 | + early_memunmap(new, data.size); |
|---|
| 290 | 289 | |
|---|
| 291 | | - efi_memmap_install(new_phys, num_entries); |
|---|
| 290 | + efi_memmap_install(&data); |
|---|
| 292 | 291 | e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); |
|---|
| 293 | 292 | e820__update_table(e820_table); |
|---|
| 294 | 293 | } |
|---|
| .. | .. |
|---|
| 303 | 302 | * - Not within any part of the kernel |
|---|
| 304 | 303 | * - Not the BIOS reserved area (E820_TYPE_RESERVED, E820_TYPE_NVS, etc) |
|---|
| 305 | 304 | */ |
|---|
| 306 | | -static bool can_free_region(u64 start, u64 size) |
|---|
| 305 | +static __init bool can_free_region(u64 start, u64 size) |
|---|
| 307 | 306 | { |
|---|
| 308 | 307 | if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end)) |
|---|
| 309 | 308 | return false; |
|---|
| .. | .. |
|---|
| 318 | 317 | { |
|---|
| 319 | 318 | efi_memory_desc_t *md; |
|---|
| 320 | 319 | |
|---|
| 320 | + if (!efi_enabled(EFI_MEMMAP)) |
|---|
| 321 | + return; |
|---|
| 322 | + |
|---|
| 321 | 323 | for_each_efi_memory_desc(md) { |
|---|
| 322 | 324 | u64 start = md->phys_addr; |
|---|
| 323 | 325 | u64 size = md->num_pages << EFI_PAGE_SHIFT; |
|---|
| .. | .. |
|---|
| 331 | 333 | |
|---|
| 332 | 334 | /* |
|---|
| 333 | 335 | * Because the following memblock_reserve() is paired |
|---|
| 334 | | - * with free_bootmem_late() for this region in |
|---|
| 336 | + * with memblock_free_late() for this region in |
|---|
| 335 | 337 | * efi_free_boot_services(), we must be extremely |
|---|
| 336 | 338 | * careful not to reserve, and subsequently free, |
|---|
| 337 | 339 | * critical regions of memory (like the kernel image) or |
|---|
| .. | .. |
|---|
| 362 | 364 | * doesn't make sense as far as the firmware is |
|---|
| 363 | 365 | * concerned, but it does provide us with a way to tag |
|---|
| 364 | 366 | * those regions that must not be paired with |
|---|
| 365 | | - * free_bootmem_late(). |
|---|
| 367 | + * memblock_free_late(). |
|---|
| 366 | 368 | */ |
|---|
| 367 | 369 | md->attribute |= EFI_MEMORY_RUNTIME; |
|---|
| 368 | 370 | } |
|---|
| 369 | 371 | } |
|---|
| 370 | 372 | |
|---|
| 373 | +/* |
|---|
| 374 | + * Apart from having VA mappings for EFI boot services code/data regions, |
|---|
| 375 | + * (duplicate) 1:1 mappings were also created as a quirk for buggy firmware. So, |
|---|
| 376 | + * unmap both 1:1 and VA mappings. |
|---|
| 377 | + */ |
|---|
| 378 | +static void __init efi_unmap_pages(efi_memory_desc_t *md) |
|---|
| 379 | +{ |
|---|
| 380 | + pgd_t *pgd = efi_mm.pgd; |
|---|
| 381 | + u64 pa = md->phys_addr; |
|---|
| 382 | + u64 va = md->virt_addr; |
|---|
| 383 | + |
|---|
| 384 | + /* |
|---|
| 385 | + * EFI mixed mode has all RAM mapped to access arguments while making |
|---|
| 386 | + * EFI runtime calls, hence don't unmap EFI boot services code/data |
|---|
| 387 | + * regions. |
|---|
| 388 | + */ |
|---|
| 389 | + if (efi_is_mixed()) |
|---|
| 390 | + return; |
|---|
| 391 | + |
|---|
| 392 | + if (kernel_unmap_pages_in_pgd(pgd, pa, md->num_pages)) |
|---|
| 393 | + pr_err("Failed to unmap 1:1 mapping for 0x%llx\n", pa); |
|---|
| 394 | + |
|---|
| 395 | + if (kernel_unmap_pages_in_pgd(pgd, va, md->num_pages)) |
|---|
| 396 | + pr_err("Failed to unmap VA mapping for 0x%llx\n", va); |
|---|
| 397 | +} |
|---|
| 398 | + |
|---|
| 371 | 399 | void __init efi_free_boot_services(void) |
|---|
| 372 | 400 | { |
|---|
| 373 | | - phys_addr_t new_phys, new_size; |
|---|
| 401 | + struct efi_memory_map_data data = { 0 }; |
|---|
| 374 | 402 | efi_memory_desc_t *md; |
|---|
| 375 | 403 | int num_entries = 0; |
|---|
| 376 | 404 | void *new, *new_md; |
|---|
| 405 | + |
|---|
| 406 | + /* Keep all regions for /sys/kernel/debug/efi */ |
|---|
| 407 | + if (efi_enabled(EFI_DBG)) |
|---|
| 408 | + return; |
|---|
| 377 | 409 | |
|---|
| 378 | 410 | for_each_efi_memory_desc(md) { |
|---|
| 379 | 411 | unsigned long long start = md->phys_addr; |
|---|
| .. | .. |
|---|
| 393 | 425 | } |
|---|
| 394 | 426 | |
|---|
| 395 | 427 | /* |
|---|
| 428 | + * Before calling set_virtual_address_map(), EFI boot services |
|---|
| 429 | + * code/data regions were mapped as a quirk for buggy firmware. |
|---|
| 430 | + * Unmap them from efi_pgd before freeing them up. |
|---|
| 431 | + */ |
|---|
| 432 | + efi_unmap_pages(md); |
|---|
| 433 | + |
|---|
| 434 | + /* |
|---|
| 396 | 435 | * Nasty quirk: if all sub-1MB memory is used for boot |
|---|
| 397 | 436 | * services, we can get here without having allocated the |
|---|
| 398 | 437 | * real mode trampoline. It's too late to hand boot services |
|---|
| .. | .. |
|---|
| 407 | 446 | */ |
|---|
| 408 | 447 | rm_size = real_mode_size_needed(); |
|---|
| 409 | 448 | if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { |
|---|
| 410 | | - set_real_mode_mem(start, rm_size); |
|---|
| 449 | + set_real_mode_mem(start); |
|---|
| 411 | 450 | start += rm_size; |
|---|
| 412 | 451 | size -= rm_size; |
|---|
| 413 | 452 | } |
|---|
| 414 | 453 | |
|---|
| 415 | | - free_bootmem_late(start, size); |
|---|
| 454 | + memblock_free_late(start, size); |
|---|
| 416 | 455 | } |
|---|
| 417 | 456 | |
|---|
| 418 | 457 | if (!num_entries) |
|---|
| 419 | 458 | return; |
|---|
| 420 | 459 | |
|---|
| 421 | | - new_size = efi.memmap.desc_size * num_entries; |
|---|
| 422 | | - new_phys = efi_memmap_alloc(num_entries); |
|---|
| 423 | | - if (!new_phys) { |
|---|
| 460 | + if (efi_memmap_alloc(num_entries, &data) != 0) { |
|---|
| 424 | 461 | pr_err("Failed to allocate new EFI memmap\n"); |
|---|
| 425 | 462 | return; |
|---|
| 426 | 463 | } |
|---|
| 427 | 464 | |
|---|
| 428 | | - new = memremap(new_phys, new_size, MEMREMAP_WB); |
|---|
| 465 | + new = memremap(data.phys_map, data.size, MEMREMAP_WB); |
|---|
| 429 | 466 | if (!new) { |
|---|
| 430 | 467 | pr_err("Failed to map new EFI memmap\n"); |
|---|
| 431 | 468 | return; |
|---|
| .. | .. |
|---|
| 449 | 486 | |
|---|
| 450 | 487 | memunmap(new); |
|---|
| 451 | 488 | |
|---|
| 452 | | - if (efi_memmap_install(new_phys, num_entries)) { |
|---|
| 489 | + if (efi_memmap_install(&data) != 0) { |
|---|
| 453 | 490 | pr_err("Could not install new EFI memmap\n"); |
|---|
| 454 | 491 | return; |
|---|
| 455 | 492 | } |
|---|
| .. | .. |
|---|
| 469 | 506 | int i, sz, ret = 0; |
|---|
| 470 | 507 | void *p, *tablep; |
|---|
| 471 | 508 | struct efi_setup_data *data; |
|---|
| 509 | + |
|---|
| 510 | + if (nr_tables == 0) |
|---|
| 511 | + return 0; |
|---|
| 472 | 512 | |
|---|
| 473 | 513 | if (!efi_setup) |
|---|
| 474 | 514 | return 0; |
|---|
| .. | .. |
|---|
| 494 | 534 | goto out_memremap; |
|---|
| 495 | 535 | } |
|---|
| 496 | 536 | |
|---|
| 497 | | - for (i = 0; i < efi.systab->nr_tables; i++) { |
|---|
| 537 | + for (i = 0; i < nr_tables; i++) { |
|---|
| 498 | 538 | efi_guid_t guid; |
|---|
| 499 | 539 | |
|---|
| 500 | 540 | guid = ((efi_config_table_64_t *)p)->guid; |
|---|
| .. | .. |
|---|
| 511 | 551 | return ret; |
|---|
| 512 | 552 | } |
|---|
| 513 | 553 | |
|---|
| 514 | | -static const struct dmi_system_id sgi_uv1_dmi[] = { |
|---|
| 515 | | - { NULL, "SGI UV1", |
|---|
| 516 | | - { DMI_MATCH(DMI_PRODUCT_NAME, "Stoutland Platform"), |
|---|
| 517 | | - DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), |
|---|
| 518 | | - DMI_MATCH(DMI_BIOS_VENDOR, "SGI.COM"), |
|---|
| 519 | | - } |
|---|
| 520 | | - }, |
|---|
| 521 | | - { } /* NULL entry stops DMI scanning */ |
|---|
| 522 | | -}; |
|---|
| 523 | | - |
|---|
| 524 | 554 | void __init efi_apply_memmap_quirks(void) |
|---|
| 525 | 555 | { |
|---|
| 526 | 556 | /* |
|---|
| .. | .. |
|---|
| 532 | 562 | pr_info("Setup done, disabling due to 32/64-bit mismatch\n"); |
|---|
| 533 | 563 | efi_memmap_unmap(); |
|---|
| 534 | 564 | } |
|---|
| 535 | | - |
|---|
| 536 | | - /* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */ |
|---|
| 537 | | - if (dmi_check_system(sgi_uv1_dmi)) |
|---|
| 538 | | - set_bit(EFI_OLD_MEMMAP, &efi.flags); |
|---|
| 539 | 565 | } |
|---|
| 540 | 566 | |
|---|
| 541 | 567 | /* |
|---|
| .. | .. |
|---|
| 609 | 635 | return 1; |
|---|
| 610 | 636 | } |
|---|
| 611 | 637 | |
|---|
| 612 | | -#define ICPU(family, model, quirk_handler) \ |
|---|
| 613 | | - { X86_VENDOR_INTEL, family, model, X86_FEATURE_ANY, \ |
|---|
| 614 | | - (unsigned long)&quirk_handler } |
|---|
| 615 | | - |
|---|
| 616 | 638 | static const struct x86_cpu_id efi_capsule_quirk_ids[] = { |
|---|
| 617 | | - ICPU(5, 9, qrk_capsule_setup_info), /* Intel Quark X1000 */ |
|---|
| 639 | + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, |
|---|
| 640 | + &qrk_capsule_setup_info), |
|---|
| 618 | 641 | { } |
|---|
| 619 | 642 | }; |
|---|
| 620 | 643 | |
|---|
| .. | .. |
|---|
| 653 | 676 | } |
|---|
| 654 | 677 | |
|---|
| 655 | 678 | #endif |
|---|
| 679 | + |
|---|
| 680 | +/* |
|---|
| 681 | + * If any access by any efi runtime service causes a page fault, then, |
|---|
| 682 | + * 1. If it's efi_reset_system(), reboot through BIOS. |
|---|
| 683 | + * 2. If any other efi runtime service, then |
|---|
| 684 | + * a. Return error status to the efi caller process. |
|---|
| 685 | + * b. Disable EFI Runtime Services forever and |
|---|
| 686 | + * c. Freeze efi_rts_wq and schedule new process. |
|---|
| 687 | + * |
|---|
| 688 | + * @return: Returns, if the page fault is not handled. This function |
|---|
| 689 | + * will never return if the page fault is handled successfully. |
|---|
| 690 | + */ |
|---|
| 691 | +void efi_recover_from_page_fault(unsigned long phys_addr) |
|---|
| 692 | +{ |
|---|
| 693 | + if (!IS_ENABLED(CONFIG_X86_64)) |
|---|
| 694 | + return; |
|---|
| 695 | + |
|---|
| 696 | + /* |
|---|
| 697 | + * Make sure that an efi runtime service caused the page fault. |
|---|
| 698 | + */ |
|---|
| 699 | + if (efi_rts_work.efi_rts_id == EFI_NONE) |
|---|
| 700 | + return; |
|---|
| 701 | + |
|---|
| 702 | + /* |
|---|
| 703 | + * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so |
|---|
| 704 | + * page faulting on these addresses isn't expected. |
|---|
| 705 | + */ |
|---|
| 706 | + if (phys_addr <= 0x0fff) |
|---|
| 707 | + return; |
|---|
| 708 | + |
|---|
| 709 | + /* |
|---|
| 710 | + * Print stack trace as it might be useful to know which EFI Runtime |
|---|
| 711 | + * Service is buggy. |
|---|
| 712 | + */ |
|---|
| 713 | + WARN(1, FW_BUG "Page fault caused by firmware at PA: 0x%lx\n", |
|---|
| 714 | + phys_addr); |
|---|
| 715 | + |
|---|
| 716 | + /* |
|---|
| 717 | + * Buggy efi_reset_system() is handled differently from other EFI |
|---|
| 718 | + * Runtime Services as it doesn't use efi_rts_wq. Although, |
|---|
| 719 | + * native_machine_emergency_restart() says that machine_real_restart() |
|---|
| 720 | + * could fail, it's better not to compilcate this fault handler |
|---|
| 721 | + * because this case occurs *very* rarely and hence could be improved |
|---|
| 722 | + * on a need by basis. |
|---|
| 723 | + */ |
|---|
| 724 | + if (efi_rts_work.efi_rts_id == EFI_RESET_SYSTEM) { |
|---|
| 725 | + pr_info("efi_reset_system() buggy! Reboot through BIOS\n"); |
|---|
| 726 | + machine_real_restart(MRR_BIOS); |
|---|
| 727 | + return; |
|---|
| 728 | + } |
|---|
| 729 | + |
|---|
| 730 | + /* |
|---|
| 731 | + * Before calling EFI Runtime Service, the kernel has switched the |
|---|
| 732 | + * calling process to efi_mm. Hence, switch back to task_mm. |
|---|
| 733 | + */ |
|---|
| 734 | + arch_efi_call_virt_teardown(); |
|---|
| 735 | + |
|---|
| 736 | + /* Signal error status to the efi caller process */ |
|---|
| 737 | + efi_rts_work.status = EFI_ABORTED; |
|---|
| 738 | + complete(&efi_rts_work.efi_rts_comp); |
|---|
| 739 | + |
|---|
| 740 | + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); |
|---|
| 741 | + pr_info("Froze efi_rts_wq and disabled EFI Runtime Services\n"); |
|---|
| 742 | + |
|---|
| 743 | + /* |
|---|
| 744 | + * Call schedule() in an infinite loop, so that any spurious wake ups |
|---|
| 745 | + * will never run efi_rts_wq again. |
|---|
| 746 | + */ |
|---|
| 747 | + for (;;) { |
|---|
| 748 | + set_current_state(TASK_IDLE); |
|---|
| 749 | + schedule(); |
|---|
| 750 | + } |
|---|
| 751 | + |
|---|
| 752 | + return; |
|---|
| 753 | +} |
|---|