From 2f529f9b558ca1c1bd74be7437a84e4711743404 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Fri, 01 Nov 2024 02:11:33 +0000
Subject: [PATCH] add xenomai

---
 kernel/kernel/entry/common.c |  196 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 189 insertions(+), 7 deletions(-)

diff --git a/kernel/kernel/entry/common.c b/kernel/kernel/entry/common.c
index 09f5885..5f043bb 100644
--- a/kernel/kernel/entry/common.c
+++ b/kernel/kernel/entry/common.c
@@ -2,6 +2,7 @@
 
 #include <linux/context_tracking.h>
 #include <linux/entry-common.h>
+#include <linux/irq_pipeline.h>
 #include <linux/livepatch.h>
 #include <linux/audit.h>
 
@@ -71,10 +72,45 @@
 	return ret ? : syscall;
 }
 
+static __always_inline void
+syscall_enter_from_user_enable_irqs(void)
+{
+	if (running_inband()) {
+		/*
+		 * If pipelining interrupts, prepare for emulating a
+		 * stall -> unstall transition (we are currently
+		 * unstalled), fixing up the IRQ trace state in order
+		 * to keep lockdep happy (and silent).
+		 */
+		stall_inband_nocheck();
+		hard_cond_local_irq_enable();
+		local_irq_enable();
+	} else {
+		/*
+		 * We are running on the out-of-band stage, don't mess
+		 * with the in-band interrupt state. This is none of
+		 * our business. We may manipulate the hardware state
+		 * only.
+		 */
+		hard_local_irq_enable();
+	}
+}
+
 static __always_inline long
 __syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
 {
 	unsigned long ti_work;
+	int ret;
+
+	/*
+	 * Pipeline the syscall to the companion core if the current
+	 * task wants this. Compiled out if not dovetailing.
+	 */
+	ret = pipeline_syscall(syscall, regs);
+	if (ret > 0)	/* out-of-band, bail out. */
+		return EXIT_SYSCALL_OOB;
+	if (ret < 0)		/* in-band, tail work only. */
+		return EXIT_SYSCALL_TAIL;
 
 	ti_work = READ_ONCE(current_thread_info()->flags);
 	if (ti_work & SYSCALL_ENTER_WORK)
@@ -95,7 +131,7 @@
 	enter_from_user_mode(regs);
 
 	instrumentation_begin();
-	local_irq_enable();
+	syscall_enter_from_user_enable_irqs();
 	ret = __syscall_enter_from_user_work(regs, syscall);
 	instrumentation_end();
 
@@ -106,7 +142,7 @@
 {
 	enter_from_user_mode(regs);
 	instrumentation_begin();
-	local_irq_enable();
+	syscall_enter_from_user_enable_irqs();
 	instrumentation_end();
 }
 
@@ -121,6 +157,7 @@
  * 3) Invoke architecture specific last minute exit code, e.g. speculation
  *    mitigations, etc.
  * 4) Tell lockdep that interrupts are enabled
+ * 5) Unstall the in-band stage of the interrupt pipeline if current
  */
 static __always_inline void exit_to_user_mode(void)
 {
@@ -132,6 +169,8 @@
 	user_enter_irqoff();
 	arch_exit_to_user_mode();
 	lockdep_hardirqs_on(CALLER_ADDR0);
+	if (running_inband())
+		unstall_inband();
 }
 
 /* Workaround to allow gradual conversion of architecture code */
@@ -155,6 +194,12 @@
 	while (ti_work & EXIT_TO_USER_MODE_WORK) {
 
 		local_irq_enable_exit_to_user(ti_work);
+
+		/*
+		 * Check that local_irq_enable_exit_to_user() does the
+		 * right thing when pipelining.
+		 */
+		WARN_ON_ONCE(irq_pipeline_debug() && hard_irqs_disabled());
 
 		if (ti_work & _TIF_NEED_RESCHED)
 			schedule();
@@ -182,6 +227,7 @@
 		 * enabled above.
 		 */
 		local_irq_disable_exit_to_user();
+		WARN_ON_ONCE(irq_pipeline_debug() && !hard_irqs_disabled());
 		ti_work = READ_ONCE(current_thread_info()->flags);
 	}
 
@@ -189,16 +235,36 @@
 	return ti_work;
 }
 
+static inline bool do_retuser(unsigned long ti_work)
+{
+	if (dovetailing() && (ti_work & _TIF_RETUSER)) {
+		hard_local_irq_enable();
+		inband_retuser_notify();
+		hard_local_irq_disable();
+		/* RETUSER might have switched oob */
+		return running_inband();
+	}
+
+	return false;
+}
+
 static void exit_to_user_mode_prepare(struct pt_regs *regs)
 {
-	unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
+	unsigned long ti_work;
+
+	check_hard_irqs_disabled();
 
 	lockdep_assert_irqs_disabled();
+again:
+	ti_work = READ_ONCE(current_thread_info()->flags);
 
 	if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
 		ti_work = exit_to_user_mode_loop(regs, ti_work);
 
 	arch_exit_to_user_mode_prepare(regs, ti_work);
+
+	if (do_retuser(ti_work))
+		goto again;
 
 	/* Ensure that the address limit is intact and no locks are held */
 	addr_limit_user_check();
@@ -252,7 +318,7 @@
 
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
 		if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
-			local_irq_enable();
+			local_irq_enable_full();
 	}
 
 	rseq_syscall(regs);
@@ -261,8 +327,15 @@
 	 * Do one-time syscall specific work. If these work items are
 	 * enabled, we want to run them exactly once per syscall exit with
 	 * interrupts enabled.
+	 *
+	 * Dovetail: if this does not look like an in-band syscall, it
+	 * has to belong to the companion core. Typically,
+	 * __OOB_SYSCALL_BIT would be set in this value. Skip the
+	 * work for those syscalls.
 	 */
-	if (unlikely(cached_flags & SYSCALL_EXIT_WORK))
+	if (unlikely((cached_flags & SYSCALL_EXIT_WORK) &&
+		(!irqs_pipelined() ||
+			syscall_get_nr(current, regs) < NR_syscalls)))
 		syscall_exit_work(regs, cached_flags);
 }
 
@@ -278,6 +351,8 @@
 
 noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
 {
+	WARN_ON_ONCE(irq_pipeline_debug() && irqs_disabled());
+	stall_inband_nocheck();
 	enter_from_user_mode(regs);
 }
 
@@ -293,12 +368,36 @@
 {
 	irqentry_state_t ret = {
 		.exit_rcu = false,
+#ifdef CONFIG_IRQ_PIPELINE
+		.stage_info = IRQENTRY_INBAND_STALLED,
+#endif
 	};
 
+#ifdef CONFIG_IRQ_PIPELINE
+	if (running_oob()) {
+		WARN_ON_ONCE(irq_pipeline_debug() && oob_irqs_disabled());
+		ret.stage_info = IRQENTRY_OOB;
+		return ret;
+	}
+#endif
+
 	if (user_mode(regs)) {
+#ifdef CONFIG_IRQ_PIPELINE
+		ret.stage_info = IRQENTRY_INBAND_UNSTALLED;
+#endif
 		irqentry_enter_from_user_mode(regs);
 		return ret;
 	}
+
+#ifdef CONFIG_IRQ_PIPELINE
+	/*
+	 * IRQ pipeline: If we trapped from kernel space, the virtual
+	 * state may or may not match the hardware state. Since hard
+	 * irqs are off on entry, we have to stall the in-band stage.
+	 */
+	if (!test_and_stall_inband_nocheck())
+		ret.stage_info = IRQENTRY_INBAND_UNSTALLED;
+#endif
 
 	/*
 	 * If this entry hit the idle task invoke rcu_irq_enter() whether
@@ -366,14 +465,91 @@
 	}
 }
 
+#ifdef CONFIG_IRQ_PIPELINE
+
+static inline
+bool irqexit_may_preempt_schedule(irqentry_state_t state,
+				struct pt_regs *regs)
+{
+	return state.stage_info == IRQENTRY_INBAND_UNSTALLED;
+}
+
+#else
+
+static inline
+bool irqexit_may_preempt_schedule(irqentry_state_t state,
+				struct pt_regs *regs)
+{
+	return !regs_irqs_disabled(regs);
+}
+
+#endif
+
+#ifdef CONFIG_IRQ_PIPELINE
+
+static bool irqentry_syncstage(irqentry_state_t state) /* hard irqs off */
+{
+	/*
+	 * If pipelining interrupts, enable in-band IRQs then
+	 * synchronize the interrupt log on exit if:
+	 *
+	 * - irqentry_enter() stalled the stage in order to mirror the
+	 * hardware state.
+	 *
+	 * - we where coming from oob, thus went through a stage migration
+	 * that was caused by taking a CPU exception, e.g., a fault.
+	 *
+	 * We run before preempt_schedule_irq() may be called later on
+	 * by preemptible kernels, so that any rescheduling request
+	 * triggered by in-band IRQ handlers is considered.
+	 */
+	if (state.stage_info == IRQENTRY_INBAND_UNSTALLED ||
+		state.stage_info == IRQENTRY_OOB) {
+		unstall_inband_nocheck();
+		synchronize_pipeline_on_irq();
+		stall_inband_nocheck();
+		return true;
+	}
+
+	return false;
+}
+
+static void irqentry_unstall(void)
+{
+	unstall_inband_nocheck();
+}
+
+#else
+
+static bool irqentry_syncstage(irqentry_state_t state)
+{
+	return false;
+}
+
+static void irqentry_unstall(void)
+{
+}
+
+#endif
+
 noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
 {
+	bool synchronized = false;
+
+	if (running_oob())
+		return;
+
 	lockdep_assert_irqs_disabled();
 
 	/* Check whether this returns to user mode */
 	if (user_mode(regs)) {
 		irqentry_exit_to_user_mode(regs);
-	} else if (!regs_irqs_disabled(regs)) {
+		return;
+	}
+
+	synchronized = irqentry_syncstage(state);
+
+	if (irqexit_may_preempt_schedule(state, regs)) {
 		/*
 		 * If RCU was not watching on entry this needs to be done
 		 * carefully and needs the same ordering of lockdep/tracing
@@ -387,7 +563,7 @@
 			instrumentation_end();
 			rcu_irq_exit();
 			lockdep_hardirqs_on(CALLER_ADDR0);
-			return;
+			goto out;
 		}
 
 		instrumentation_begin();
@@ -404,6 +580,12 @@
 		if (state.exit_rcu)
 			rcu_irq_exit();
 	}
+
+out:
+	if (synchronized)
+		irqentry_unstall();
+
+	return;
 }
 
 irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)

--
Gitblit v1.6.2