.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | #define pr_fmt(fmt) "efi: " fmt |
---|
2 | 3 | |
---|
3 | 4 | #include <linux/init.h> |
---|
.. | .. |
---|
8 | 9 | #include <linux/efi.h> |
---|
9 | 10 | #include <linux/slab.h> |
---|
10 | 11 | #include <linux/memblock.h> |
---|
11 | | -#include <linux/bootmem.h> |
---|
12 | 12 | #include <linux/acpi.h> |
---|
13 | 13 | #include <linux/dmi.h> |
---|
14 | 14 | |
---|
.. | .. |
---|
16 | 16 | #include <asm/efi.h> |
---|
17 | 17 | #include <asm/uv/uv.h> |
---|
18 | 18 | #include <asm/cpu_device_id.h> |
---|
| 19 | +#include <asm/realmode.h> |
---|
| 20 | +#include <asm/reboot.h> |
---|
19 | 21 | |
---|
20 | 22 | #define EFI_MIN_RESERVE 5120 |
---|
21 | 23 | |
---|
.. | .. |
---|
242 | 244 | */ |
---|
243 | 245 | void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) |
---|
244 | 246 | { |
---|
245 | | - phys_addr_t new_phys, new_size; |
---|
| 247 | + struct efi_memory_map_data data = { 0 }; |
---|
246 | 248 | struct efi_mem_range mr; |
---|
247 | 249 | efi_memory_desc_t md; |
---|
248 | 250 | int num_entries; |
---|
.. | .. |
---|
270 | 272 | num_entries = efi_memmap_split_count(&md, &mr.range); |
---|
271 | 273 | num_entries += efi.memmap.nr_map; |
---|
272 | 274 | |
---|
273 | | - new_size = efi.memmap.desc_size * num_entries; |
---|
274 | | - |
---|
275 | | - new_phys = efi_memmap_alloc(num_entries); |
---|
276 | | - if (!new_phys) { |
---|
| 275 | + if (efi_memmap_alloc(num_entries, &data) != 0) { |
---|
277 | 276 | pr_err("Could not allocate boot services memmap\n"); |
---|
278 | 277 | return; |
---|
279 | 278 | } |
---|
280 | 279 | |
---|
281 | | - new = early_memremap_prot(new_phys, new_size, |
---|
| 280 | + new = early_memremap_prot(data.phys_map, data.size, |
---|
282 | 281 | pgprot_val(pgprot_encrypted(FIXMAP_PAGE_NORMAL))); |
---|
283 | 282 | if (!new) { |
---|
284 | 283 | pr_err("Failed to map new boot services memmap\n"); |
---|
.. | .. |
---|
286 | 285 | } |
---|
287 | 286 | |
---|
288 | 287 | efi_memmap_insert(&efi.memmap, new, &mr); |
---|
289 | | - early_memunmap(new, new_size); |
---|
| 288 | + early_memunmap(new, data.size); |
---|
290 | 289 | |
---|
291 | | - efi_memmap_install(new_phys, num_entries); |
---|
| 290 | + efi_memmap_install(&data); |
---|
292 | 291 | e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); |
---|
293 | 292 | e820__update_table(e820_table); |
---|
294 | 293 | } |
---|
.. | .. |
---|
303 | 302 | * - Not within any part of the kernel |
---|
304 | 303 | * - Not the BIOS reserved area (E820_TYPE_RESERVED, E820_TYPE_NVS, etc) |
---|
305 | 304 | */ |
---|
306 | | -static bool can_free_region(u64 start, u64 size) |
---|
| 305 | +static __init bool can_free_region(u64 start, u64 size) |
---|
307 | 306 | { |
---|
308 | 307 | if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end)) |
---|
309 | 308 | return false; |
---|
.. | .. |
---|
318 | 317 | { |
---|
319 | 318 | efi_memory_desc_t *md; |
---|
320 | 319 | |
---|
| 320 | + if (!efi_enabled(EFI_MEMMAP)) |
---|
| 321 | + return; |
---|
| 322 | + |
---|
321 | 323 | for_each_efi_memory_desc(md) { |
---|
322 | 324 | u64 start = md->phys_addr; |
---|
323 | 325 | u64 size = md->num_pages << EFI_PAGE_SHIFT; |
---|
.. | .. |
---|
331 | 333 | |
---|
332 | 334 | /* |
---|
333 | 335 | * Because the following memblock_reserve() is paired |
---|
334 | | - * with free_bootmem_late() for this region in |
---|
| 336 | + * with memblock_free_late() for this region in |
---|
335 | 337 | * efi_free_boot_services(), we must be extremely |
---|
336 | 338 | * careful not to reserve, and subsequently free, |
---|
337 | 339 | * critical regions of memory (like the kernel image) or |
---|
.. | .. |
---|
362 | 364 | * doesn't make sense as far as the firmware is |
---|
363 | 365 | * concerned, but it does provide us with a way to tag |
---|
364 | 366 | * those regions that must not be paired with |
---|
365 | | - * free_bootmem_late(). |
---|
| 367 | + * memblock_free_late(). |
---|
366 | 368 | */ |
---|
367 | 369 | md->attribute |= EFI_MEMORY_RUNTIME; |
---|
368 | 370 | } |
---|
369 | 371 | } |
---|
370 | 372 | |
---|
| 373 | +/* |
---|
| 374 | + * Apart from having VA mappings for EFI boot services code/data regions, |
---|
| 375 | + * (duplicate) 1:1 mappings were also created as a quirk for buggy firmware. So, |
---|
| 376 | + * unmap both 1:1 and VA mappings. |
---|
| 377 | + */ |
---|
| 378 | +static void __init efi_unmap_pages(efi_memory_desc_t *md) |
---|
| 379 | +{ |
---|
| 380 | + pgd_t *pgd = efi_mm.pgd; |
---|
| 381 | + u64 pa = md->phys_addr; |
---|
| 382 | + u64 va = md->virt_addr; |
---|
| 383 | + |
---|
| 384 | + /* |
---|
| 385 | + * EFI mixed mode has all RAM mapped to access arguments while making |
---|
| 386 | + * EFI runtime calls, hence don't unmap EFI boot services code/data |
---|
| 387 | + * regions. |
---|
| 388 | + */ |
---|
| 389 | + if (efi_is_mixed()) |
---|
| 390 | + return; |
---|
| 391 | + |
---|
| 392 | + if (kernel_unmap_pages_in_pgd(pgd, pa, md->num_pages)) |
---|
| 393 | + pr_err("Failed to unmap 1:1 mapping for 0x%llx\n", pa); |
---|
| 394 | + |
---|
| 395 | + if (kernel_unmap_pages_in_pgd(pgd, va, md->num_pages)) |
---|
| 396 | + pr_err("Failed to unmap VA mapping for 0x%llx\n", va); |
---|
| 397 | +} |
---|
| 398 | + |
---|
371 | 399 | void __init efi_free_boot_services(void) |
---|
372 | 400 | { |
---|
373 | | - phys_addr_t new_phys, new_size; |
---|
| 401 | + struct efi_memory_map_data data = { 0 }; |
---|
374 | 402 | efi_memory_desc_t *md; |
---|
375 | 403 | int num_entries = 0; |
---|
376 | 404 | void *new, *new_md; |
---|
| 405 | + |
---|
| 406 | + /* Keep all regions for /sys/kernel/debug/efi */ |
---|
| 407 | + if (efi_enabled(EFI_DBG)) |
---|
| 408 | + return; |
---|
377 | 409 | |
---|
378 | 410 | for_each_efi_memory_desc(md) { |
---|
379 | 411 | unsigned long long start = md->phys_addr; |
---|
.. | .. |
---|
393 | 425 | } |
---|
394 | 426 | |
---|
395 | 427 | /* |
---|
| 428 | + * Before calling set_virtual_address_map(), EFI boot services |
---|
| 429 | + * code/data regions were mapped as a quirk for buggy firmware. |
---|
| 430 | + * Unmap them from efi_pgd before freeing them up. |
---|
| 431 | + */ |
---|
| 432 | + efi_unmap_pages(md); |
---|
| 433 | + |
---|
| 434 | + /* |
---|
396 | 435 | * Nasty quirk: if all sub-1MB memory is used for boot |
---|
397 | 436 | * services, we can get here without having allocated the |
---|
398 | 437 | * real mode trampoline. It's too late to hand boot services |
---|
.. | .. |
---|
407 | 446 | */ |
---|
408 | 447 | rm_size = real_mode_size_needed(); |
---|
409 | 448 | if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { |
---|
410 | | - set_real_mode_mem(start, rm_size); |
---|
| 449 | + set_real_mode_mem(start); |
---|
411 | 450 | start += rm_size; |
---|
412 | 451 | size -= rm_size; |
---|
413 | 452 | } |
---|
414 | 453 | |
---|
415 | | - free_bootmem_late(start, size); |
---|
| 454 | + memblock_free_late(start, size); |
---|
416 | 455 | } |
---|
417 | 456 | |
---|
418 | 457 | if (!num_entries) |
---|
419 | 458 | return; |
---|
420 | 459 | |
---|
421 | | - new_size = efi.memmap.desc_size * num_entries; |
---|
422 | | - new_phys = efi_memmap_alloc(num_entries); |
---|
423 | | - if (!new_phys) { |
---|
| 460 | + if (efi_memmap_alloc(num_entries, &data) != 0) { |
---|
424 | 461 | pr_err("Failed to allocate new EFI memmap\n"); |
---|
425 | 462 | return; |
---|
426 | 463 | } |
---|
427 | 464 | |
---|
428 | | - new = memremap(new_phys, new_size, MEMREMAP_WB); |
---|
| 465 | + new = memremap(data.phys_map, data.size, MEMREMAP_WB); |
---|
429 | 466 | if (!new) { |
---|
430 | 467 | pr_err("Failed to map new EFI memmap\n"); |
---|
431 | 468 | return; |
---|
.. | .. |
---|
449 | 486 | |
---|
450 | 487 | memunmap(new); |
---|
451 | 488 | |
---|
452 | | - if (efi_memmap_install(new_phys, num_entries)) { |
---|
| 489 | + if (efi_memmap_install(&data) != 0) { |
---|
453 | 490 | pr_err("Could not install new EFI memmap\n"); |
---|
454 | 491 | return; |
---|
455 | 492 | } |
---|
.. | .. |
---|
469 | 506 | int i, sz, ret = 0; |
---|
470 | 507 | void *p, *tablep; |
---|
471 | 508 | struct efi_setup_data *data; |
---|
| 509 | + |
---|
| 510 | + if (nr_tables == 0) |
---|
| 511 | + return 0; |
---|
472 | 512 | |
---|
473 | 513 | if (!efi_setup) |
---|
474 | 514 | return 0; |
---|
.. | .. |
---|
494 | 534 | goto out_memremap; |
---|
495 | 535 | } |
---|
496 | 536 | |
---|
497 | | - for (i = 0; i < efi.systab->nr_tables; i++) { |
---|
| 537 | + for (i = 0; i < nr_tables; i++) { |
---|
498 | 538 | efi_guid_t guid; |
---|
499 | 539 | |
---|
500 | 540 | guid = ((efi_config_table_64_t *)p)->guid; |
---|
.. | .. |
---|
511 | 551 | return ret; |
---|
512 | 552 | } |
---|
513 | 553 | |
---|
514 | | -static const struct dmi_system_id sgi_uv1_dmi[] = { |
---|
515 | | - { NULL, "SGI UV1", |
---|
516 | | - { DMI_MATCH(DMI_PRODUCT_NAME, "Stoutland Platform"), |
---|
517 | | - DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), |
---|
518 | | - DMI_MATCH(DMI_BIOS_VENDOR, "SGI.COM"), |
---|
519 | | - } |
---|
520 | | - }, |
---|
521 | | - { } /* NULL entry stops DMI scanning */ |
---|
522 | | -}; |
---|
523 | | - |
---|
524 | 554 | void __init efi_apply_memmap_quirks(void) |
---|
525 | 555 | { |
---|
526 | 556 | /* |
---|
.. | .. |
---|
532 | 562 | pr_info("Setup done, disabling due to 32/64-bit mismatch\n"); |
---|
533 | 563 | efi_memmap_unmap(); |
---|
534 | 564 | } |
---|
535 | | - |
---|
536 | | - /* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */ |
---|
537 | | - if (dmi_check_system(sgi_uv1_dmi)) |
---|
538 | | - set_bit(EFI_OLD_MEMMAP, &efi.flags); |
---|
539 | 565 | } |
---|
540 | 566 | |
---|
541 | 567 | /* |
---|
.. | .. |
---|
609 | 635 | return 1; |
---|
610 | 636 | } |
---|
611 | 637 | |
---|
612 | | -#define ICPU(family, model, quirk_handler) \ |
---|
613 | | - { X86_VENDOR_INTEL, family, model, X86_FEATURE_ANY, \ |
---|
614 | | - (unsigned long)&quirk_handler } |
---|
615 | | - |
---|
616 | 638 | static const struct x86_cpu_id efi_capsule_quirk_ids[] = { |
---|
617 | | - ICPU(5, 9, qrk_capsule_setup_info), /* Intel Quark X1000 */ |
---|
| 639 | + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, |
---|
| 640 | + &qrk_capsule_setup_info), |
---|
618 | 641 | { } |
---|
619 | 642 | }; |
---|
620 | 643 | |
---|
.. | .. |
---|
653 | 676 | } |
---|
654 | 677 | |
---|
655 | 678 | #endif |
---|
| 679 | + |
---|
| 680 | +/* |
---|
| 681 | + * If any access by any efi runtime service causes a page fault, then, |
---|
| 682 | + * 1. If it's efi_reset_system(), reboot through BIOS. |
---|
| 683 | + * 2. If any other efi runtime service, then |
---|
| 684 | + * a. Return error status to the efi caller process. |
---|
| 685 | + * b. Disable EFI Runtime Services forever and |
---|
| 686 | + * c. Freeze efi_rts_wq and schedule new process. |
---|
| 687 | + * |
---|
| 688 | + * @return: Returns, if the page fault is not handled. This function |
---|
| 689 | + * will never return if the page fault is handled successfully. |
---|
| 690 | + */ |
---|
| 691 | +void efi_recover_from_page_fault(unsigned long phys_addr) |
---|
| 692 | +{ |
---|
| 693 | + if (!IS_ENABLED(CONFIG_X86_64)) |
---|
| 694 | + return; |
---|
| 695 | + |
---|
| 696 | + /* |
---|
| 697 | + * Make sure that an efi runtime service caused the page fault. |
---|
| 698 | + */ |
---|
| 699 | + if (efi_rts_work.efi_rts_id == EFI_NONE) |
---|
| 700 | + return; |
---|
| 701 | + |
---|
| 702 | + /* |
---|
| 703 | + * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so |
---|
| 704 | + * page faulting on these addresses isn't expected. |
---|
| 705 | + */ |
---|
| 706 | + if (phys_addr <= 0x0fff) |
---|
| 707 | + return; |
---|
| 708 | + |
---|
| 709 | + /* |
---|
| 710 | + * Print stack trace as it might be useful to know which EFI Runtime |
---|
| 711 | + * Service is buggy. |
---|
| 712 | + */ |
---|
| 713 | + WARN(1, FW_BUG "Page fault caused by firmware at PA: 0x%lx\n", |
---|
| 714 | + phys_addr); |
---|
| 715 | + |
---|
| 716 | + /* |
---|
| 717 | + * Buggy efi_reset_system() is handled differently from other EFI |
---|
| 718 | + * Runtime Services as it doesn't use efi_rts_wq. Although, |
---|
| 719 | + * native_machine_emergency_restart() says that machine_real_restart() |
---|
| 720 | + * could fail, it's better not to compilcate this fault handler |
---|
| 721 | + * because this case occurs *very* rarely and hence could be improved |
---|
| 722 | + * on a need by basis. |
---|
| 723 | + */ |
---|
| 724 | + if (efi_rts_work.efi_rts_id == EFI_RESET_SYSTEM) { |
---|
| 725 | + pr_info("efi_reset_system() buggy! Reboot through BIOS\n"); |
---|
| 726 | + machine_real_restart(MRR_BIOS); |
---|
| 727 | + return; |
---|
| 728 | + } |
---|
| 729 | + |
---|
| 730 | + /* |
---|
| 731 | + * Before calling EFI Runtime Service, the kernel has switched the |
---|
| 732 | + * calling process to efi_mm. Hence, switch back to task_mm. |
---|
| 733 | + */ |
---|
| 734 | + arch_efi_call_virt_teardown(); |
---|
| 735 | + |
---|
| 736 | + /* Signal error status to the efi caller process */ |
---|
| 737 | + efi_rts_work.status = EFI_ABORTED; |
---|
| 738 | + complete(&efi_rts_work.efi_rts_comp); |
---|
| 739 | + |
---|
| 740 | + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); |
---|
| 741 | + pr_info("Froze efi_rts_wq and disabled EFI Runtime Services\n"); |
---|
| 742 | + |
---|
| 743 | + /* |
---|
| 744 | + * Call schedule() in an infinite loop, so that any spurious wake ups |
---|
| 745 | + * will never run efi_rts_wq again. |
---|
| 746 | + */ |
---|
| 747 | + for (;;) { |
---|
| 748 | + set_current_state(TASK_IDLE); |
---|
| 749 | + schedule(); |
---|
| 750 | + } |
---|
| 751 | + |
---|
| 752 | + return; |
---|
| 753 | +} |
---|