.. | .. |
---|
19 | 19 | #include <linux/uaccess.h> /* faulthandler_disabled() */ |
---|
20 | 20 | #include <linux/efi.h> /* efi_recover_from_page_fault()*/ |
---|
21 | 21 | #include <linux/mm_types.h> |
---|
| 22 | +#include <linux/irqstage.h> |
---|
22 | 23 | |
---|
23 | 24 | #include <asm/cpufeature.h> /* boot_cpu_has, ... */ |
---|
24 | 25 | #include <asm/traps.h> /* dotraplinkage, ... */ |
---|
.. | .. |
---|
656 | 657 | * the below recursive fault logic only apply to a faults from |
---|
657 | 658 | * task context. |
---|
658 | 659 | */ |
---|
659 | | - if (in_interrupt()) |
---|
| 660 | + if (running_oob() || in_interrupt()) |
---|
660 | 661 | return; |
---|
661 | 662 | |
---|
662 | 663 | /* |
---|
.. | .. |
---|
666 | 667 | * faulting through the emulate_vsyscall() logic. |
---|
667 | 668 | */ |
---|
668 | 669 | if (current->thread.sig_on_uaccess_err && signal) { |
---|
| 670 | + oob_trap_notify(X86_TRAP_PF, regs); |
---|
669 | 671 | set_signal_archinfo(address, error_code); |
---|
670 | 672 | |
---|
671 | 673 | /* XXX: hwpoison faults will set the wrong code. */ |
---|
672 | 674 | force_sig_fault(signal, si_code, (void __user *)address); |
---|
| 675 | + oob_trap_unwind(X86_TRAP_PF, regs); |
---|
673 | 676 | } |
---|
674 | 677 | |
---|
675 | 678 | /* |
---|
.. | .. |
---|
677 | 680 | */ |
---|
678 | 681 | return; |
---|
679 | 682 | } |
---|
| 683 | + |
---|
| 684 | + /* |
---|
| 685 | + * Do not bother unwinding the notification context on |
---|
| 686 | + * CPU/firmware/kernel bug. |
---|
| 687 | + */ |
---|
| 688 | + oob_trap_notify(X86_TRAP_PF, regs); |
---|
680 | 689 | |
---|
681 | 690 | #ifdef CONFIG_VMAP_STACK |
---|
682 | 691 | /* |
---|
.. | .. |
---|
796 | 805 | return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); |
---|
797 | 806 | } |
---|
798 | 807 | |
---|
| 808 | +#ifdef CONFIG_IRQ_PIPELINE |
---|
| 809 | + |
---|
| 810 | +static inline void cond_reenable_irqs_user(void) |
---|
| 811 | +{ |
---|
| 812 | + hard_local_irq_enable(); |
---|
| 813 | + |
---|
| 814 | + if (running_inband()) |
---|
| 815 | + local_irq_enable(); |
---|
| 816 | +} |
---|
| 817 | + |
---|
| 818 | +static inline void cond_reenable_irqs_kernel(irqentry_state_t state, |
---|
| 819 | + struct pt_regs *regs) |
---|
| 820 | +{ |
---|
| 821 | + if (regs->flags & X86_EFLAGS_IF) { |
---|
| 822 | + hard_local_irq_enable(); |
---|
| 823 | + if (state.stage_info == IRQENTRY_INBAND_UNSTALLED) |
---|
| 824 | + local_irq_enable(); |
---|
| 825 | + } |
---|
| 826 | +} |
---|
| 827 | + |
---|
| 828 | +static inline void cond_disable_irqs(void) |
---|
| 829 | +{ |
---|
| 830 | + hard_local_irq_disable(); |
---|
| 831 | + |
---|
| 832 | + if (running_inband()) |
---|
| 833 | + local_irq_disable(); |
---|
| 834 | +} |
---|
| 835 | + |
---|
| 836 | +#else /* !CONFIG_IRQ_PIPELINE */ |
---|
| 837 | + |
---|
| 838 | +static inline void cond_reenable_irqs_user(void) |
---|
| 839 | +{ |
---|
| 840 | + local_irq_enable(); |
---|
| 841 | +} |
---|
| 842 | + |
---|
| 843 | +static inline void cond_reenable_irqs_kernel(irqentry_state_t state, |
---|
| 844 | + struct pt_regs *regs) |
---|
| 845 | +{ |
---|
| 846 | + if (regs->flags & X86_EFLAGS_IF) |
---|
| 847 | + local_irq_enable(); |
---|
| 848 | +} |
---|
| 849 | + |
---|
| 850 | +static inline void cond_disable_irqs(void) |
---|
| 851 | +{ |
---|
| 852 | + local_irq_disable(); |
---|
| 853 | +} |
---|
| 854 | + |
---|
| 855 | +#endif /* !CONFIG_IRQ_PIPELINE */ |
---|
| 856 | + |
---|
799 | 857 | static void |
---|
800 | 858 | __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, |
---|
801 | 859 | unsigned long address, u32 pkey, int si_code) |
---|
.. | .. |
---|
807 | 865 | /* |
---|
808 | 866 | * It's possible to have interrupts off here: |
---|
809 | 867 | */ |
---|
810 | | - local_irq_enable(); |
---|
| 868 | + cond_reenable_irqs_user(); |
---|
811 | 869 | |
---|
812 | 870 | /* |
---|
813 | 871 | * Valid to do another page fault here because this one came |
---|
.. | .. |
---|
818 | 876 | |
---|
819 | 877 | if (is_errata100(regs, address)) |
---|
820 | 878 | return; |
---|
| 879 | + |
---|
| 880 | + oob_trap_notify(X86_TRAP_PF, regs); |
---|
| 881 | + if (!running_inband()) { |
---|
| 882 | + local_irq_disable_full(); |
---|
| 883 | + return; |
---|
| 884 | + } |
---|
821 | 885 | |
---|
822 | 886 | /* |
---|
823 | 887 | * To avoid leaking information about the kernel page table |
---|
.. | .. |
---|
837 | 901 | |
---|
838 | 902 | force_sig_fault(SIGSEGV, si_code, (void __user *)address); |
---|
839 | 903 | |
---|
840 | | - local_irq_disable(); |
---|
| 904 | + local_irq_disable_full(); |
---|
| 905 | + |
---|
| 906 | + oob_trap_unwind(X86_TRAP_PF, regs); |
---|
841 | 907 | |
---|
842 | 908 | return; |
---|
843 | 909 | } |
---|
.. | .. |
---|
1225 | 1291 | static inline |
---|
1226 | 1292 | void do_user_addr_fault(struct pt_regs *regs, |
---|
1227 | 1293 | unsigned long hw_error_code, |
---|
1228 | | - unsigned long address) |
---|
| 1294 | + unsigned long address, |
---|
| 1295 | + irqentry_state_t state) |
---|
1229 | 1296 | { |
---|
1230 | 1297 | struct vm_area_struct *vma = NULL; |
---|
1231 | 1298 | struct task_struct *tsk; |
---|
.. | .. |
---|
1266 | 1333 | * If we're in an interrupt, have no user context or are running |
---|
1267 | 1334 | * in a region with pagefaults disabled then we must not take the fault |
---|
1268 | 1335 | */ |
---|
1269 | | - if (unlikely(faulthandler_disabled() || !mm)) { |
---|
| 1336 | + if (unlikely(running_inband() && (faulthandler_disabled() || !mm))) { |
---|
1270 | 1337 | bad_area_nosemaphore(regs, hw_error_code, address); |
---|
1271 | 1338 | return; |
---|
1272 | 1339 | } |
---|
.. | .. |
---|
1279 | 1346 | * potential system fault or CPU buglet: |
---|
1280 | 1347 | */ |
---|
1281 | 1348 | if (user_mode(regs)) { |
---|
1282 | | - local_irq_enable(); |
---|
| 1349 | + cond_reenable_irqs_user(); |
---|
1283 | 1350 | flags |= FAULT_FLAG_USER; |
---|
1284 | 1351 | } else { |
---|
1285 | | - if (regs->flags & X86_EFLAGS_IF) |
---|
1286 | | - local_irq_enable(); |
---|
| 1352 | + cond_reenable_irqs_kernel(state, regs); |
---|
1287 | 1353 | } |
---|
| 1354 | + |
---|
| 1355 | + /* |
---|
| 1356 | + * At this point, we would have to stop running |
---|
| 1357 | + * out-of-band. Tell the companion core about the page fault |
---|
| 1358 | + * event, so that it might switch current to in-band mode if |
---|
| 1359 | + * need be. If it does not, then we may assume that it would |
---|
| 1360 | + * also handle the fixups. |
---|
| 1361 | + */ |
---|
| 1362 | + oob_trap_notify(X86_TRAP_PF, regs); |
---|
| 1363 | + if (!running_inband()) |
---|
| 1364 | + return; |
---|
1288 | 1365 | |
---|
1289 | 1366 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); |
---|
1290 | 1367 | |
---|
.. | .. |
---|
1307 | 1384 | */ |
---|
1308 | 1385 | if (is_vsyscall_vaddr(address)) { |
---|
1309 | 1386 | if (emulate_vsyscall(hw_error_code, regs, address)) |
---|
1310 | | - return; |
---|
| 1387 | + goto out; |
---|
1311 | 1388 | } |
---|
1312 | 1389 | #endif |
---|
1313 | 1390 | |
---|
.. | .. |
---|
1340 | 1417 | * which we do not expect faults. |
---|
1341 | 1418 | */ |
---|
1342 | 1419 | bad_area_nosemaphore(regs, hw_error_code, address); |
---|
1343 | | - return; |
---|
| 1420 | + goto out; |
---|
1344 | 1421 | } |
---|
1345 | 1422 | retry: |
---|
1346 | 1423 | mmap_read_lock(mm); |
---|
.. | .. |
---|
1357 | 1434 | vma = find_vma(mm, address); |
---|
1358 | 1435 | if (unlikely(!vma)) { |
---|
1359 | 1436 | bad_area(regs, hw_error_code, address); |
---|
1360 | | - return; |
---|
| 1437 | + goto out; |
---|
1361 | 1438 | } |
---|
1362 | 1439 | if (likely(vma->vm_start <= address)) |
---|
1363 | 1440 | goto good_area; |
---|
1364 | 1441 | if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { |
---|
1365 | 1442 | bad_area(regs, hw_error_code, address); |
---|
1366 | | - return; |
---|
| 1443 | + goto out; |
---|
1367 | 1444 | } |
---|
1368 | 1445 | if (unlikely(expand_stack(vma, address))) { |
---|
1369 | 1446 | bad_area(regs, hw_error_code, address); |
---|
1370 | | - return; |
---|
| 1447 | + goto out; |
---|
1371 | 1448 | } |
---|
1372 | 1449 | |
---|
1373 | 1450 | /* |
---|
.. | .. |
---|
1377 | 1454 | good_area: |
---|
1378 | 1455 | if (unlikely(access_error(hw_error_code, vma))) { |
---|
1379 | 1456 | bad_area_access_error(regs, hw_error_code, address, vma); |
---|
1380 | | - return; |
---|
| 1457 | + goto out; |
---|
1381 | 1458 | } |
---|
1382 | 1459 | |
---|
1383 | 1460 | /* |
---|
.. | .. |
---|
1400 | 1477 | if (!user_mode(regs)) |
---|
1401 | 1478 | no_context(regs, hw_error_code, address, SIGBUS, |
---|
1402 | 1479 | BUS_ADRERR); |
---|
1403 | | - return; |
---|
| 1480 | + goto out; |
---|
1404 | 1481 | } |
---|
1405 | 1482 | |
---|
1406 | 1483 | /* |
---|
.. | .. |
---|
1426 | 1503 | done: |
---|
1427 | 1504 | if (unlikely(fault & VM_FAULT_ERROR)) { |
---|
1428 | 1505 | mm_fault_error(regs, hw_error_code, address, fault); |
---|
1429 | | - return; |
---|
| 1506 | + goto out; |
---|
1430 | 1507 | } |
---|
1431 | 1508 | |
---|
1432 | 1509 | check_v8086_mode(regs, address, tsk); |
---|
| 1510 | +out: |
---|
| 1511 | + oob_trap_unwind(X86_TRAP_PF, regs); |
---|
1433 | 1512 | } |
---|
1434 | 1513 | NOKPROBE_SYMBOL(do_user_addr_fault); |
---|
1435 | 1514 | |
---|
.. | .. |
---|
1448 | 1527 | |
---|
1449 | 1528 | static __always_inline void |
---|
1450 | 1529 | handle_page_fault(struct pt_regs *regs, unsigned long error_code, |
---|
1451 | | - unsigned long address) |
---|
| 1530 | + unsigned long address, |
---|
| 1531 | + irqentry_state_t state) |
---|
1452 | 1532 | { |
---|
1453 | 1533 | trace_page_fault_entries(regs, error_code, address); |
---|
1454 | 1534 | |
---|
.. | .. |
---|
1459 | 1539 | if (unlikely(fault_in_kernel_space(address))) { |
---|
1460 | 1540 | do_kern_addr_fault(regs, error_code, address); |
---|
1461 | 1541 | } else { |
---|
1462 | | - do_user_addr_fault(regs, error_code, address); |
---|
| 1542 | + do_user_addr_fault(regs, error_code, address, state); |
---|
1463 | 1543 | /* |
---|
1464 | 1544 | * User address page fault handling might have reenabled |
---|
1465 | 1545 | * interrupts. Fixing up all potential exit points of |
---|
.. | .. |
---|
1467 | 1547 | * doable w/o creating an unholy mess or turning the code |
---|
1468 | 1548 | * upside down. |
---|
1469 | 1549 | */ |
---|
1470 | | - local_irq_disable(); |
---|
| 1550 | + cond_disable_irqs(); |
---|
1471 | 1551 | } |
---|
1472 | 1552 | } |
---|
1473 | 1553 | |
---|
.. | .. |
---|
1515 | 1595 | state = irqentry_enter(regs); |
---|
1516 | 1596 | |
---|
1517 | 1597 | instrumentation_begin(); |
---|
1518 | | - handle_page_fault(regs, error_code, address); |
---|
| 1598 | + handle_page_fault(regs, error_code, address, state); |
---|
1519 | 1599 | instrumentation_end(); |
---|
1520 | 1600 | |
---|
1521 | 1601 | irqentry_exit(regs, state); |
---|
1522 | 1602 | } |
---|
| 1603 | + |
---|
| 1604 | +#ifdef CONFIG_DOVETAIL |
---|
| 1605 | + |
---|
| 1606 | +void arch_advertise_page_mapping(unsigned long start, unsigned long end) |
---|
| 1607 | +{ |
---|
| 1608 | + unsigned long next, addr = start; |
---|
| 1609 | + pgd_t *pgd, *pgd_ref; |
---|
| 1610 | + struct page *page; |
---|
| 1611 | + |
---|
| 1612 | + /* |
---|
| 1613 | + * APEI may create temporary mappings in interrupt context - |
---|
| 1614 | + * nothing we can and need to propagate globally. |
---|
| 1615 | + */ |
---|
| 1616 | + if (in_interrupt()) |
---|
| 1617 | + return; |
---|
| 1618 | + |
---|
| 1619 | + if (!(start >= VMALLOC_START && start < VMALLOC_END)) |
---|
| 1620 | + return; |
---|
| 1621 | + |
---|
| 1622 | + do { |
---|
| 1623 | + next = pgd_addr_end(addr, end); |
---|
| 1624 | + pgd_ref = pgd_offset_k(addr); |
---|
| 1625 | + if (pgd_none(*pgd_ref)) |
---|
| 1626 | + continue; |
---|
| 1627 | + spin_lock(&pgd_lock); |
---|
| 1628 | + list_for_each_entry(page, &pgd_list, lru) { |
---|
| 1629 | + pgd = page_address(page) + pgd_index(addr); |
---|
| 1630 | + if (pgd_none(*pgd)) |
---|
| 1631 | + set_pgd(pgd, *pgd_ref); |
---|
| 1632 | + } |
---|
| 1633 | + spin_unlock(&pgd_lock); |
---|
| 1634 | + addr = next; |
---|
| 1635 | + } while (addr != end); |
---|
| 1636 | + |
---|
| 1637 | + arch_flush_lazy_mmu_mode(); |
---|
| 1638 | +} |
---|
| 1639 | + |
---|
| 1640 | +#endif |
---|