From a01b5c9f91adaee088a817861603a5dbe14775c2 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Fri, 01 Nov 2024 02:40:28 +0000
Subject: [PATCH] rootfs patch
---
kernel/arch/x86/mm/fault.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++------
1 files changed, 138 insertions(+), 20 deletions(-)
diff --git a/kernel/arch/x86/mm/fault.c b/kernel/arch/x86/mm/fault.c
index e9afbf8..a4d3b18 100644
--- a/kernel/arch/x86/mm/fault.c
+++ b/kernel/arch/x86/mm/fault.c
@@ -19,6 +19,7 @@
#include <linux/uaccess.h> /* faulthandler_disabled() */
#include <linux/efi.h> /* efi_recover_from_page_fault()*/
#include <linux/mm_types.h>
+#include <linux/irqstage.h>
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
#include <asm/traps.h> /* dotraplinkage, ... */
@@ -656,7 +657,7 @@
* the below recursive fault logic only apply to a faults from
* task context.
*/
- if (in_interrupt())
+ if (running_oob() || in_interrupt())
return;
/*
@@ -666,10 +667,12 @@
* faulting through the emulate_vsyscall() logic.
*/
if (current->thread.sig_on_uaccess_err && signal) {
+ oob_trap_notify(X86_TRAP_PF, regs);
set_signal_archinfo(address, error_code);
/* XXX: hwpoison faults will set the wrong code. */
force_sig_fault(signal, si_code, (void __user *)address);
+ oob_trap_unwind(X86_TRAP_PF, regs);
}
/*
@@ -677,6 +680,12 @@
*/
return;
}
+
+ /*
+ * Do not bother unwinding the notification context on
+ * CPU/firmware/kernel bug.
+ */
+ oob_trap_notify(X86_TRAP_PF, regs);
#ifdef CONFIG_VMAP_STACK
/*
@@ -796,6 +805,55 @@
return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
}
+#ifdef CONFIG_IRQ_PIPELINE
+
+static inline void cond_reenable_irqs_user(void)
+{
+ hard_local_irq_enable();
+
+ if (running_inband())
+ local_irq_enable();
+}
+
+static inline void cond_reenable_irqs_kernel(irqentry_state_t state,
+ struct pt_regs *regs)
+{
+ if (regs->flags & X86_EFLAGS_IF) {
+ hard_local_irq_enable();
+ if (state.stage_info == IRQENTRY_INBAND_UNSTALLED)
+ local_irq_enable();
+ }
+}
+
+static inline void cond_disable_irqs(void)
+{
+ hard_local_irq_disable();
+
+ if (running_inband())
+ local_irq_disable();
+}
+
+#else /* !CONFIG_IRQ_PIPELINE */
+
+static inline void cond_reenable_irqs_user(void)
+{
+ local_irq_enable();
+}
+
+static inline void cond_reenable_irqs_kernel(irqentry_state_t state,
+ struct pt_regs *regs)
+{
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_enable();
+}
+
+static inline void cond_disable_irqs(void)
+{
+ local_irq_disable();
+}
+
+#endif /* !CONFIG_IRQ_PIPELINE */
+
static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
unsigned long address, u32 pkey, int si_code)
@@ -807,7 +865,7 @@
/*
* It's possible to have interrupts off here:
*/
- local_irq_enable();
+ cond_reenable_irqs_user();
/*
* Valid to do another page fault here because this one came
@@ -818,6 +876,12 @@
if (is_errata100(regs, address))
return;
+
+ oob_trap_notify(X86_TRAP_PF, regs);
+ if (!running_inband()) {
+ local_irq_disable_full();
+ return;
+ }
/*
* To avoid leaking information about the kernel page table
@@ -837,7 +901,9 @@
force_sig_fault(SIGSEGV, si_code, (void __user *)address);
- local_irq_disable();
+ local_irq_disable_full();
+
+ oob_trap_unwind(X86_TRAP_PF, regs);
return;
}
@@ -1225,7 +1291,8 @@
static inline
void do_user_addr_fault(struct pt_regs *regs,
unsigned long hw_error_code,
- unsigned long address)
+ unsigned long address,
+ irqentry_state_t state)
{
struct vm_area_struct *vma = NULL;
struct task_struct *tsk;
@@ -1266,7 +1333,7 @@
* If we're in an interrupt, have no user context or are running
* in a region with pagefaults disabled then we must not take the fault
*/
- if (unlikely(faulthandler_disabled() || !mm)) {
+ if (unlikely(running_inband() && (faulthandler_disabled() || !mm))) {
bad_area_nosemaphore(regs, hw_error_code, address);
return;
}
@@ -1279,12 +1346,22 @@
* potential system fault or CPU buglet:
*/
if (user_mode(regs)) {
- local_irq_enable();
+ cond_reenable_irqs_user();
flags |= FAULT_FLAG_USER;
} else {
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_enable();
+ cond_reenable_irqs_kernel(state, regs);
}
+
+ /*
+ * At this point, we would have to stop running
+ * out-of-band. Tell the companion core about the page fault
+ * event, so that it might switch current to in-band mode if
+ * need be. If it does not, then we may assume that it would
+ * also handle the fixups.
+ */
+ oob_trap_notify(X86_TRAP_PF, regs);
+ if (!running_inband())
+ return;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
@@ -1307,7 +1384,7 @@
*/
if (is_vsyscall_vaddr(address)) {
if (emulate_vsyscall(hw_error_code, regs, address))
- return;
+ goto out;
}
#endif
@@ -1340,7 +1417,7 @@
* which we do not expect faults.
*/
bad_area_nosemaphore(regs, hw_error_code, address);
- return;
+ goto out;
}
retry:
mmap_read_lock(mm);
@@ -1357,17 +1434,17 @@
vma = find_vma(mm, address);
if (unlikely(!vma)) {
bad_area(regs, hw_error_code, address);
- return;
+ goto out;
}
if (likely(vma->vm_start <= address))
goto good_area;
if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
bad_area(regs, hw_error_code, address);
- return;
+ goto out;
}
if (unlikely(expand_stack(vma, address))) {
bad_area(regs, hw_error_code, address);
- return;
+ goto out;
}
/*
@@ -1377,7 +1454,7 @@
good_area:
if (unlikely(access_error(hw_error_code, vma))) {
bad_area_access_error(regs, hw_error_code, address, vma);
- return;
+ goto out;
}
/*
@@ -1400,7 +1477,7 @@
if (!user_mode(regs))
no_context(regs, hw_error_code, address, SIGBUS,
BUS_ADRERR);
- return;
+ goto out;
}
/*
@@ -1426,10 +1503,12 @@
done:
if (unlikely(fault & VM_FAULT_ERROR)) {
mm_fault_error(regs, hw_error_code, address, fault);
- return;
+ goto out;
}
check_v8086_mode(regs, address, tsk);
+out:
+ oob_trap_unwind(X86_TRAP_PF, regs);
}
NOKPROBE_SYMBOL(do_user_addr_fault);
@@ -1448,7 +1527,8 @@
static __always_inline void
handle_page_fault(struct pt_regs *regs, unsigned long error_code,
- unsigned long address)
+ unsigned long address,
+ irqentry_state_t state)
{
trace_page_fault_entries(regs, error_code, address);
@@ -1459,7 +1539,7 @@
if (unlikely(fault_in_kernel_space(address))) {
do_kern_addr_fault(regs, error_code, address);
} else {
- do_user_addr_fault(regs, error_code, address);
+ do_user_addr_fault(regs, error_code, address, state);
/*
* User address page fault handling might have reenabled
* interrupts. Fixing up all potential exit points of
@@ -1467,7 +1547,7 @@
* doable w/o creating an unholy mess or turning the code
* upside down.
*/
- local_irq_disable();
+ cond_disable_irqs();
}
}
@@ -1515,8 +1595,46 @@
state = irqentry_enter(regs);
instrumentation_begin();
- handle_page_fault(regs, error_code, address);
+ handle_page_fault(regs, error_code, address, state);
instrumentation_end();
irqentry_exit(regs, state);
}
+
+#ifdef CONFIG_DOVETAIL
+
+void arch_advertise_page_mapping(unsigned long start, unsigned long end)
+{
+ unsigned long next, addr = start;
+ pgd_t *pgd, *pgd_ref;
+ struct page *page;
+
+ /*
+ * APEI may create temporary mappings in interrupt context -
+ * nothing we can and need to propagate globally.
+ */
+ if (in_interrupt())
+ return;
+
+ if (!(start >= VMALLOC_START && start < VMALLOC_END))
+ return;
+
+ do {
+ next = pgd_addr_end(addr, end);
+ pgd_ref = pgd_offset_k(addr);
+ if (pgd_none(*pgd_ref))
+ continue;
+ spin_lock(&pgd_lock);
+ list_for_each_entry(page, &pgd_list, lru) {
+ pgd = page_address(page) + pgd_index(addr);
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ }
+ spin_unlock(&pgd_lock);
+ addr = next;
+ } while (addr != end);
+
+ arch_flush_lazy_mmu_mode();
+}
+
+#endif
--
Gitblit v1.6.2