From 2f529f9b558ca1c1bd74be7437a84e4711743404 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Fri, 01 Nov 2024 02:11:33 +0000
Subject: [PATCH] add xenomai

---
 kernel/arch/x86/mm/fault.c |  158 ++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 138 insertions(+), 20 deletions(-)

diff --git a/kernel/arch/x86/mm/fault.c b/kernel/arch/x86/mm/fault.c
index e9afbf8..a4d3b18 100644
--- a/kernel/arch/x86/mm/fault.c
+++ b/kernel/arch/x86/mm/fault.c
@@ -19,6 +19,7 @@
 #include <linux/uaccess.h>		/* faulthandler_disabled()	*/
 #include <linux/efi.h>			/* efi_recover_from_page_fault()*/
 #include <linux/mm_types.h>
+#include <linux/irqstage.h>
 
 #include <asm/cpufeature.h>		/* boot_cpu_has, ...		*/
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
@@ -656,7 +657,7 @@
 		 * the below recursive fault logic only apply to a faults from
 		 * task context.
 		 */
-		if (in_interrupt())
+		if (running_oob() || in_interrupt())
 			return;
 
 		/*
@@ -666,10 +667,12 @@
 		 * faulting through the emulate_vsyscall() logic.
 		 */
 		if (current->thread.sig_on_uaccess_err && signal) {
+			oob_trap_notify(X86_TRAP_PF, regs);
 			set_signal_archinfo(address, error_code);
 
 			/* XXX: hwpoison faults will set the wrong code. */
 			force_sig_fault(signal, si_code, (void __user *)address);
+			oob_trap_unwind(X86_TRAP_PF, regs);
 		}
 
 		/*
@@ -677,6 +680,12 @@
 		 */
 		return;
 	}
+
+	/*
+	 * Do not bother unwinding the notification context on
+	 * CPU/firmware/kernel bug.
+	 */
+	oob_trap_notify(X86_TRAP_PF, regs);
 
 #ifdef CONFIG_VMAP_STACK
 	/*
@@ -796,6 +805,55 @@
 	return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
 }
 
+#ifdef CONFIG_IRQ_PIPELINE
+
+static inline void cond_reenable_irqs_user(void)
+{
+	hard_local_irq_enable();
+
+	if (running_inband())
+		local_irq_enable();
+}
+
+static inline void cond_reenable_irqs_kernel(irqentry_state_t state,
+					struct pt_regs *regs)
+{
+	if (regs->flags & X86_EFLAGS_IF) {
+		hard_local_irq_enable();
+		if (state.stage_info == IRQENTRY_INBAND_UNSTALLED)
+			local_irq_enable();
+	}
+}
+
+static inline void cond_disable_irqs(void)
+{
+	hard_local_irq_disable();
+
+	if (running_inband())
+		local_irq_disable();
+}
+
+#else  /* !CONFIG_IRQ_PIPELINE */
+
+static inline void cond_reenable_irqs_user(void)
+{
+	local_irq_enable();
+}
+
+static inline void cond_reenable_irqs_kernel(irqentry_state_t state,
+					struct pt_regs *regs)
+{
+	if (regs->flags & X86_EFLAGS_IF)
+		local_irq_enable();
+}
+
+static inline void cond_disable_irqs(void)
+{
+	local_irq_disable();
+}
+
+#endif  /* !CONFIG_IRQ_PIPELINE */
+
 static void
 __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 		       unsigned long address, u32 pkey, int si_code)
@@ -807,7 +865,7 @@
 		/*
 		 * It's possible to have interrupts off here:
 		 */
-		local_irq_enable();
+		cond_reenable_irqs_user();
 
 		/*
 		 * Valid to do another page fault here because this one came
@@ -818,6 +876,12 @@
 
 		if (is_errata100(regs, address))
 			return;
+
+		oob_trap_notify(X86_TRAP_PF, regs);
+		if (!running_inband()) {
+			local_irq_disable_full();
+			return;
+		}
 
 		/*
 		 * To avoid leaking information about the kernel page table
@@ -837,7 +901,9 @@
 
 		force_sig_fault(SIGSEGV, si_code, (void __user *)address);
 
-		local_irq_disable();
+		local_irq_disable_full();
+
+		oob_trap_unwind(X86_TRAP_PF, regs);
 
 		return;
 	}
@@ -1225,7 +1291,8 @@
 static inline
 void do_user_addr_fault(struct pt_regs *regs,
 			unsigned long hw_error_code,
-			unsigned long address)
+			unsigned long address,
+			irqentry_state_t state)
 {
 	struct vm_area_struct *vma = NULL;
 	struct task_struct *tsk;
@@ -1266,7 +1333,7 @@
 	 * If we're in an interrupt, have no user context or are running
 	 * in a region with pagefaults disabled then we must not take the fault
 	 */
-	if (unlikely(faulthandler_disabled() || !mm)) {
+	if (unlikely(running_inband() && (faulthandler_disabled() || !mm))) {
 		bad_area_nosemaphore(regs, hw_error_code, address);
 		return;
 	}
@@ -1279,12 +1346,22 @@
 	 * potential system fault or CPU buglet:
 	 */
 	if (user_mode(regs)) {
-		local_irq_enable();
+		cond_reenable_irqs_user();
 		flags |= FAULT_FLAG_USER;
 	} else {
-		if (regs->flags & X86_EFLAGS_IF)
-			local_irq_enable();
+		cond_reenable_irqs_kernel(state, regs);
 	}
+
+	/*
+	 * At this point, we would have to stop running
+	 * out-of-band. Tell the companion core about the page fault
+	 * event, so that it might switch current to in-band mode if
+	 * need be. If it does not, then we may assume that it would
+	 * also handle the fixups.
+	 */
+	oob_trap_notify(X86_TRAP_PF, regs);
+	if (!running_inband())
+		return;
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
@@ -1307,7 +1384,7 @@
 	 */
 	if (is_vsyscall_vaddr(address)) {
 		if (emulate_vsyscall(hw_error_code, regs, address))
-			return;
+			goto out;
 	}
 #endif
 
@@ -1340,7 +1417,7 @@
 			 * which we do not expect faults.
 			 */
 			bad_area_nosemaphore(regs, hw_error_code, address);
-			return;
+			goto out;
 		}
 retry:
 		mmap_read_lock(mm);
@@ -1357,17 +1434,17 @@
 		vma = find_vma(mm, address);
 	if (unlikely(!vma)) {
 		bad_area(regs, hw_error_code, address);
-		return;
+		goto out;
 	}
 	if (likely(vma->vm_start <= address))
 		goto good_area;
 	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
 		bad_area(regs, hw_error_code, address);
-		return;
+		goto out;
 	}
 	if (unlikely(expand_stack(vma, address))) {
 		bad_area(regs, hw_error_code, address);
-		return;
+		goto out;
 	}
 
 	/*
@@ -1377,7 +1454,7 @@
 good_area:
 	if (unlikely(access_error(hw_error_code, vma))) {
 		bad_area_access_error(regs, hw_error_code, address, vma);
-		return;
+		goto out;
 	}
 
 	/*
@@ -1400,7 +1477,7 @@
 		if (!user_mode(regs))
 			no_context(regs, hw_error_code, address, SIGBUS,
 				   BUS_ADRERR);
-		return;
+		goto out;
 	}
 
 	/*
@@ -1426,10 +1503,12 @@
 done:
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		mm_fault_error(regs, hw_error_code, address, fault);
-		return;
+		goto out;
 	}
 
 	check_v8086_mode(regs, address, tsk);
+out:
+	oob_trap_unwind(X86_TRAP_PF, regs);
 }
 NOKPROBE_SYMBOL(do_user_addr_fault);
 
@@ -1448,7 +1527,8 @@
 
 static __always_inline void
 handle_page_fault(struct pt_regs *regs, unsigned long error_code,
-			      unsigned long address)
+		unsigned long address,
+		irqentry_state_t state)
 {
 	trace_page_fault_entries(regs, error_code, address);
 
@@ -1459,7 +1539,7 @@
 	if (unlikely(fault_in_kernel_space(address))) {
 		do_kern_addr_fault(regs, error_code, address);
 	} else {
-		do_user_addr_fault(regs, error_code, address);
+		do_user_addr_fault(regs, error_code, address, state);
 		/*
 		 * User address page fault handling might have reenabled
 		 * interrupts. Fixing up all potential exit points of
@@ -1467,7 +1547,7 @@
 		 * doable w/o creating an unholy mess or turning the code
 		 * upside down.
 		 */
-		local_irq_disable();
+		cond_disable_irqs();
 	}
 }
 
@@ -1515,8 +1595,46 @@
 	state = irqentry_enter(regs);
 
 	instrumentation_begin();
-	handle_page_fault(regs, error_code, address);
+	handle_page_fault(regs, error_code, address, state);
 	instrumentation_end();
 
 	irqentry_exit(regs, state);
 }
+
+#ifdef CONFIG_DOVETAIL
+
+void arch_advertise_page_mapping(unsigned long start, unsigned long end)
+{
+	unsigned long next, addr = start;
+	pgd_t *pgd, *pgd_ref;
+	struct page *page;
+
+	/*
+	 * APEI may create temporary mappings in interrupt context -
+	 * nothing we can and need to propagate globally.
+	 */
+	if (in_interrupt())
+		return;
+
+	if (!(start >= VMALLOC_START && start < VMALLOC_END))
+		return;
+
+	do {
+		next = pgd_addr_end(addr, end);
+		pgd_ref = pgd_offset_k(addr);
+		if (pgd_none(*pgd_ref))
+			continue;
+		spin_lock(&pgd_lock);
+		list_for_each_entry(page, &pgd_list, lru) {
+			pgd = page_address(page) + pgd_index(addr);
+			if (pgd_none(*pgd))
+				set_pgd(pgd, *pgd_ref);
+		}
+		spin_unlock(&pgd_lock);
+		addr = next;
+	} while (addr != end);
+
+	arch_flush_lazy_mmu_mode();
+}
+
+#endif

--
Gitblit v1.6.2