| .. | .. |
|---|
| 19 | 19 | #include <linux/uaccess.h> /* faulthandler_disabled() */ |
|---|
| 20 | 20 | #include <linux/efi.h> /* efi_recover_from_page_fault()*/ |
|---|
| 21 | 21 | #include <linux/mm_types.h> |
|---|
| 22 | +#include <linux/irqstage.h> |
|---|
| 22 | 23 | |
|---|
| 23 | 24 | #include <asm/cpufeature.h> /* boot_cpu_has, ... */ |
|---|
| 24 | 25 | #include <asm/traps.h> /* dotraplinkage, ... */ |
|---|
| .. | .. |
|---|
| 656 | 657 | * the below recursive fault logic only apply to a faults from |
|---|
| 657 | 658 | * task context. |
|---|
| 658 | 659 | */ |
|---|
| 659 | | - if (in_interrupt()) |
|---|
| 660 | + if (running_oob() || in_interrupt()) |
|---|
| 660 | 661 | return; |
|---|
| 661 | 662 | |
|---|
| 662 | 663 | /* |
|---|
| .. | .. |
|---|
| 666 | 667 | * faulting through the emulate_vsyscall() logic. |
|---|
| 667 | 668 | */ |
|---|
| 668 | 669 | if (current->thread.sig_on_uaccess_err && signal) { |
|---|
| 670 | + oob_trap_notify(X86_TRAP_PF, regs); |
|---|
| 669 | 671 | set_signal_archinfo(address, error_code); |
|---|
| 670 | 672 | |
|---|
| 671 | 673 | /* XXX: hwpoison faults will set the wrong code. */ |
|---|
| 672 | 674 | force_sig_fault(signal, si_code, (void __user *)address); |
|---|
| 675 | + oob_trap_unwind(X86_TRAP_PF, regs); |
|---|
| 673 | 676 | } |
|---|
| 674 | 677 | |
|---|
| 675 | 678 | /* |
|---|
| .. | .. |
|---|
| 677 | 680 | */ |
|---|
| 678 | 681 | return; |
|---|
| 679 | 682 | } |
|---|
| 683 | + |
|---|
| 684 | + /* |
|---|
| 685 | + * Do not bother unwinding the notification context on |
|---|
| 686 | + * CPU/firmware/kernel bug. |
|---|
| 687 | + */ |
|---|
| 688 | + oob_trap_notify(X86_TRAP_PF, regs); |
|---|
| 680 | 689 | |
|---|
| 681 | 690 | #ifdef CONFIG_VMAP_STACK |
|---|
| 682 | 691 | /* |
|---|
| .. | .. |
|---|
| 796 | 805 | return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); |
|---|
| 797 | 806 | } |
|---|
| 798 | 807 | |
|---|
| 808 | +#ifdef CONFIG_IRQ_PIPELINE |
|---|
| 809 | + |
|---|
| 810 | +static inline void cond_reenable_irqs_user(void) |
|---|
| 811 | +{ |
|---|
| 812 | + hard_local_irq_enable(); |
|---|
| 813 | + |
|---|
| 814 | + if (running_inband()) |
|---|
| 815 | + local_irq_enable(); |
|---|
| 816 | +} |
|---|
| 817 | + |
|---|
| 818 | +static inline void cond_reenable_irqs_kernel(irqentry_state_t state, |
|---|
| 819 | + struct pt_regs *regs) |
|---|
| 820 | +{ |
|---|
| 821 | + if (regs->flags & X86_EFLAGS_IF) { |
|---|
| 822 | + hard_local_irq_enable(); |
|---|
| 823 | + if (state.stage_info == IRQENTRY_INBAND_UNSTALLED) |
|---|
| 824 | + local_irq_enable(); |
|---|
| 825 | + } |
|---|
| 826 | +} |
|---|
| 827 | + |
|---|
| 828 | +static inline void cond_disable_irqs(void) |
|---|
| 829 | +{ |
|---|
| 830 | + hard_local_irq_disable(); |
|---|
| 831 | + |
|---|
| 832 | + if (running_inband()) |
|---|
| 833 | + local_irq_disable(); |
|---|
| 834 | +} |
|---|
| 835 | + |
|---|
| 836 | +#else /* !CONFIG_IRQ_PIPELINE */ |
|---|
| 837 | + |
|---|
| 838 | +static inline void cond_reenable_irqs_user(void) |
|---|
| 839 | +{ |
|---|
| 840 | + local_irq_enable(); |
|---|
| 841 | +} |
|---|
| 842 | + |
|---|
| 843 | +static inline void cond_reenable_irqs_kernel(irqentry_state_t state, |
|---|
| 844 | + struct pt_regs *regs) |
|---|
| 845 | +{ |
|---|
| 846 | + if (regs->flags & X86_EFLAGS_IF) |
|---|
| 847 | + local_irq_enable(); |
|---|
| 848 | +} |
|---|
| 849 | + |
|---|
| 850 | +static inline void cond_disable_irqs(void) |
|---|
| 851 | +{ |
|---|
| 852 | + local_irq_disable(); |
|---|
| 853 | +} |
|---|
| 854 | + |
|---|
| 855 | +#endif /* !CONFIG_IRQ_PIPELINE */ |
|---|
| 856 | + |
|---|
| 799 | 857 | static void |
|---|
| 800 | 858 | __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, |
|---|
| 801 | 859 | unsigned long address, u32 pkey, int si_code) |
|---|
| .. | .. |
|---|
| 807 | 865 | /* |
|---|
| 808 | 866 | * It's possible to have interrupts off here: |
|---|
| 809 | 867 | */ |
|---|
| 810 | | - local_irq_enable(); |
|---|
| 868 | + cond_reenable_irqs_user(); |
|---|
| 811 | 869 | |
|---|
| 812 | 870 | /* |
|---|
| 813 | 871 | * Valid to do another page fault here because this one came |
|---|
| .. | .. |
|---|
| 818 | 876 | |
|---|
| 819 | 877 | if (is_errata100(regs, address)) |
|---|
| 820 | 878 | return; |
|---|
| 879 | + |
|---|
| 880 | + oob_trap_notify(X86_TRAP_PF, regs); |
|---|
| 881 | + if (!running_inband()) { |
|---|
| 882 | + local_irq_disable_full(); |
|---|
| 883 | + return; |
|---|
| 884 | + } |
|---|
| 821 | 885 | |
|---|
| 822 | 886 | /* |
|---|
| 823 | 887 | * To avoid leaking information about the kernel page table |
|---|
| .. | .. |
|---|
| 837 | 901 | |
|---|
| 838 | 902 | force_sig_fault(SIGSEGV, si_code, (void __user *)address); |
|---|
| 839 | 903 | |
|---|
| 840 | | - local_irq_disable(); |
|---|
| 904 | + local_irq_disable_full(); |
|---|
| 905 | + |
|---|
| 906 | + oob_trap_unwind(X86_TRAP_PF, regs); |
|---|
| 841 | 907 | |
|---|
| 842 | 908 | return; |
|---|
| 843 | 909 | } |
|---|
| .. | .. |
|---|
| 1225 | 1291 | static inline |
|---|
| 1226 | 1292 | void do_user_addr_fault(struct pt_regs *regs, |
|---|
| 1227 | 1293 | unsigned long hw_error_code, |
|---|
| 1228 | | - unsigned long address) |
|---|
| 1294 | + unsigned long address, |
|---|
| 1295 | + irqentry_state_t state) |
|---|
| 1229 | 1296 | { |
|---|
| 1230 | 1297 | struct vm_area_struct *vma = NULL; |
|---|
| 1231 | 1298 | struct task_struct *tsk; |
|---|
| .. | .. |
|---|
| 1266 | 1333 | * If we're in an interrupt, have no user context or are running |
|---|
| 1267 | 1334 | * in a region with pagefaults disabled then we must not take the fault |
|---|
| 1268 | 1335 | */ |
|---|
| 1269 | | - if (unlikely(faulthandler_disabled() || !mm)) { |
|---|
| 1336 | + if (unlikely(running_inband() && (faulthandler_disabled() || !mm))) { |
|---|
| 1270 | 1337 | bad_area_nosemaphore(regs, hw_error_code, address); |
|---|
| 1271 | 1338 | return; |
|---|
| 1272 | 1339 | } |
|---|
| .. | .. |
|---|
| 1279 | 1346 | * potential system fault or CPU buglet: |
|---|
| 1280 | 1347 | */ |
|---|
| 1281 | 1348 | if (user_mode(regs)) { |
|---|
| 1282 | | - local_irq_enable(); |
|---|
| 1349 | + cond_reenable_irqs_user(); |
|---|
| 1283 | 1350 | flags |= FAULT_FLAG_USER; |
|---|
| 1284 | 1351 | } else { |
|---|
| 1285 | | - if (regs->flags & X86_EFLAGS_IF) |
|---|
| 1286 | | - local_irq_enable(); |
|---|
| 1352 | + cond_reenable_irqs_kernel(state, regs); |
|---|
| 1287 | 1353 | } |
|---|
| 1354 | + |
|---|
| 1355 | + /* |
|---|
| 1356 | + * At this point, we would have to stop running |
|---|
| 1357 | + * out-of-band. Tell the companion core about the page fault |
|---|
| 1358 | + * event, so that it might switch current to in-band mode if |
|---|
| 1359 | + * need be. If it does not, then we may assume that it would |
|---|
| 1360 | + * also handle the fixups. |
|---|
| 1361 | + */ |
|---|
| 1362 | + oob_trap_notify(X86_TRAP_PF, regs); |
|---|
| 1363 | + if (!running_inband()) |
|---|
| 1364 | + return; |
|---|
| 1288 | 1365 | |
|---|
| 1289 | 1366 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); |
|---|
| 1290 | 1367 | |
|---|
| .. | .. |
|---|
| 1307 | 1384 | */ |
|---|
| 1308 | 1385 | if (is_vsyscall_vaddr(address)) { |
|---|
| 1309 | 1386 | if (emulate_vsyscall(hw_error_code, regs, address)) |
|---|
| 1310 | | - return; |
|---|
| 1387 | + goto out; |
|---|
| 1311 | 1388 | } |
|---|
| 1312 | 1389 | #endif |
|---|
| 1313 | 1390 | |
|---|
| .. | .. |
|---|
| 1340 | 1417 | * which we do not expect faults. |
|---|
| 1341 | 1418 | */ |
|---|
| 1342 | 1419 | bad_area_nosemaphore(regs, hw_error_code, address); |
|---|
| 1343 | | - return; |
|---|
| 1420 | + goto out; |
|---|
| 1344 | 1421 | } |
|---|
| 1345 | 1422 | retry: |
|---|
| 1346 | 1423 | mmap_read_lock(mm); |
|---|
| .. | .. |
|---|
| 1357 | 1434 | vma = find_vma(mm, address); |
|---|
| 1358 | 1435 | if (unlikely(!vma)) { |
|---|
| 1359 | 1436 | bad_area(regs, hw_error_code, address); |
|---|
| 1360 | | - return; |
|---|
| 1437 | + goto out; |
|---|
| 1361 | 1438 | } |
|---|
| 1362 | 1439 | if (likely(vma->vm_start <= address)) |
|---|
| 1363 | 1440 | goto good_area; |
|---|
| 1364 | 1441 | if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { |
|---|
| 1365 | 1442 | bad_area(regs, hw_error_code, address); |
|---|
| 1366 | | - return; |
|---|
| 1443 | + goto out; |
|---|
| 1367 | 1444 | } |
|---|
| 1368 | 1445 | if (unlikely(expand_stack(vma, address))) { |
|---|
| 1369 | 1446 | bad_area(regs, hw_error_code, address); |
|---|
| 1370 | | - return; |
|---|
| 1447 | + goto out; |
|---|
| 1371 | 1448 | } |
|---|
| 1372 | 1449 | |
|---|
| 1373 | 1450 | /* |
|---|
| .. | .. |
|---|
| 1377 | 1454 | good_area: |
|---|
| 1378 | 1455 | if (unlikely(access_error(hw_error_code, vma))) { |
|---|
| 1379 | 1456 | bad_area_access_error(regs, hw_error_code, address, vma); |
|---|
| 1380 | | - return; |
|---|
| 1457 | + goto out; |
|---|
| 1381 | 1458 | } |
|---|
| 1382 | 1459 | |
|---|
| 1383 | 1460 | /* |
|---|
| .. | .. |
|---|
| 1400 | 1477 | if (!user_mode(regs)) |
|---|
| 1401 | 1478 | no_context(regs, hw_error_code, address, SIGBUS, |
|---|
| 1402 | 1479 | BUS_ADRERR); |
|---|
| 1403 | | - return; |
|---|
| 1480 | + goto out; |
|---|
| 1404 | 1481 | } |
|---|
| 1405 | 1482 | |
|---|
| 1406 | 1483 | /* |
|---|
| .. | .. |
|---|
| 1426 | 1503 | done: |
|---|
| 1427 | 1504 | if (unlikely(fault & VM_FAULT_ERROR)) { |
|---|
| 1428 | 1505 | mm_fault_error(regs, hw_error_code, address, fault); |
|---|
| 1429 | | - return; |
|---|
| 1506 | + goto out; |
|---|
| 1430 | 1507 | } |
|---|
| 1431 | 1508 | |
|---|
| 1432 | 1509 | check_v8086_mode(regs, address, tsk); |
|---|
| 1510 | +out: |
|---|
| 1511 | + oob_trap_unwind(X86_TRAP_PF, regs); |
|---|
| 1433 | 1512 | } |
|---|
| 1434 | 1513 | NOKPROBE_SYMBOL(do_user_addr_fault); |
|---|
| 1435 | 1514 | |
|---|
| .. | .. |
|---|
| 1448 | 1527 | |
|---|
| 1449 | 1528 | static __always_inline void |
|---|
| 1450 | 1529 | handle_page_fault(struct pt_regs *regs, unsigned long error_code, |
|---|
| 1451 | | - unsigned long address) |
|---|
| 1530 | + unsigned long address, |
|---|
| 1531 | + irqentry_state_t state) |
|---|
| 1452 | 1532 | { |
|---|
| 1453 | 1533 | trace_page_fault_entries(regs, error_code, address); |
|---|
| 1454 | 1534 | |
|---|
| .. | .. |
|---|
| 1459 | 1539 | if (unlikely(fault_in_kernel_space(address))) { |
|---|
| 1460 | 1540 | do_kern_addr_fault(regs, error_code, address); |
|---|
| 1461 | 1541 | } else { |
|---|
| 1462 | | - do_user_addr_fault(regs, error_code, address); |
|---|
| 1542 | + do_user_addr_fault(regs, error_code, address, state); |
|---|
| 1463 | 1543 | /* |
|---|
| 1464 | 1544 | * User address page fault handling might have reenabled |
|---|
| 1465 | 1545 | * interrupts. Fixing up all potential exit points of |
|---|
| .. | .. |
|---|
| 1467 | 1547 | * doable w/o creating an unholy mess or turning the code |
|---|
| 1468 | 1548 | * upside down. |
|---|
| 1469 | 1549 | */ |
|---|
| 1470 | | - local_irq_disable(); |
|---|
| 1550 | + cond_disable_irqs(); |
|---|
| 1471 | 1551 | } |
|---|
| 1472 | 1552 | } |
|---|
| 1473 | 1553 | |
|---|
| .. | .. |
|---|
| 1515 | 1595 | state = irqentry_enter(regs); |
|---|
| 1516 | 1596 | |
|---|
| 1517 | 1597 | instrumentation_begin(); |
|---|
| 1518 | | - handle_page_fault(regs, error_code, address); |
|---|
| 1598 | + handle_page_fault(regs, error_code, address, state); |
|---|
| 1519 | 1599 | instrumentation_end(); |
|---|
| 1520 | 1600 | |
|---|
| 1521 | 1601 | irqentry_exit(regs, state); |
|---|
| 1522 | 1602 | } |
|---|
| 1603 | + |
|---|
| 1604 | +#ifdef CONFIG_DOVETAIL |
|---|
| 1605 | + |
|---|
| 1606 | +void arch_advertise_page_mapping(unsigned long start, unsigned long end) |
|---|
| 1607 | +{ |
|---|
| 1608 | + unsigned long next, addr = start; |
|---|
| 1609 | + pgd_t *pgd, *pgd_ref; |
|---|
| 1610 | + struct page *page; |
|---|
| 1611 | + |
|---|
| 1612 | + /* |
|---|
| 1613 | + * APEI may create temporary mappings in interrupt context - |
|---|
| 1614 | + * nothing we can and need to propagate globally. |
|---|
| 1615 | + */ |
|---|
| 1616 | + if (in_interrupt()) |
|---|
| 1617 | + return; |
|---|
| 1618 | + |
|---|
| 1619 | + if (!(start >= VMALLOC_START && start < VMALLOC_END)) |
|---|
| 1620 | + return; |
|---|
| 1621 | + |
|---|
| 1622 | + do { |
|---|
| 1623 | + next = pgd_addr_end(addr, end); |
|---|
| 1624 | + pgd_ref = pgd_offset_k(addr); |
|---|
| 1625 | + if (pgd_none(*pgd_ref)) |
|---|
| 1626 | + continue; |
|---|
| 1627 | + spin_lock(&pgd_lock); |
|---|
| 1628 | + list_for_each_entry(page, &pgd_list, lru) { |
|---|
| 1629 | + pgd = page_address(page) + pgd_index(addr); |
|---|
| 1630 | + if (pgd_none(*pgd)) |
|---|
| 1631 | + set_pgd(pgd, *pgd_ref); |
|---|
| 1632 | + } |
|---|
| 1633 | + spin_unlock(&pgd_lock); |
|---|
| 1634 | + addr = next; |
|---|
| 1635 | + } while (addr != end); |
|---|
| 1636 | + |
|---|
| 1637 | + arch_flush_lazy_mmu_mode(); |
|---|
| 1638 | +} |
|---|
| 1639 | + |
|---|
| 1640 | +#endif |
|---|