/*
|
* SPDX-License-Identifier: GPL-2.0
|
*
|
* Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>.
|
*/
|
#include <linux/kernel.h>
|
#include <linux/sched.h>
|
#include <linux/interrupt.h>
|
#include <linux/irq.h>
|
#include <linux/irqdomain.h>
|
#include <linux/irq_pipeline.h>
|
#include <linux/irq_work.h>
|
#include <linux/jhash.h>
|
#include <linux/debug_locks.h>
|
#include <linux/dovetail.h>
|
#include <dovetail/irq.h>
|
#include <trace/events/irq.h>
|
#include "internals.h"
|
|
#ifdef CONFIG_DEBUG_IRQ_PIPELINE
|
#define trace_on_debug
|
#else
|
#define trace_on_debug notrace
|
#endif
|
|
struct irq_stage inband_stage = {
|
.name = "Linux",
|
};
|
EXPORT_SYMBOL_GPL(inband_stage);
|
|
struct irq_stage oob_stage;
|
EXPORT_SYMBOL_GPL(oob_stage);
|
|
struct irq_domain *synthetic_irq_domain;
|
EXPORT_SYMBOL_GPL(synthetic_irq_domain);
|
|
bool irq_pipeline_oopsing;
|
EXPORT_SYMBOL_GPL(irq_pipeline_oopsing);
|
|
bool irq_pipeline_active;
|
EXPORT_SYMBOL_GPL(irq_pipeline_active);
|
|
#define IRQ_L1_MAPSZ BITS_PER_LONG
|
#define IRQ_L2_MAPSZ (BITS_PER_LONG * BITS_PER_LONG)
|
#define IRQ_FLAT_MAPSZ DIV_ROUND_UP(IRQ_BITMAP_BITS, BITS_PER_LONG)
|
|
#if IRQ_FLAT_MAPSZ > IRQ_L2_MAPSZ
|
#define __IRQ_STAGE_MAP_LEVELS 4 /* up to 4/16M vectors */
|
#elif IRQ_FLAT_MAPSZ > IRQ_L1_MAPSZ
|
#define __IRQ_STAGE_MAP_LEVELS 3 /* up to 64/256M vectors */
|
#else
|
#define __IRQ_STAGE_MAP_LEVELS 2 /* up to 1024/4096 vectors */
|
#endif
|
|
struct irq_event_map {
|
#if __IRQ_STAGE_MAP_LEVELS >= 3
|
unsigned long index_1[IRQ_L1_MAPSZ];
|
#if __IRQ_STAGE_MAP_LEVELS >= 4
|
unsigned long index_2[IRQ_L2_MAPSZ];
|
#endif
|
#endif
|
unsigned long flat[IRQ_FLAT_MAPSZ];
|
};
|
|
#ifdef CONFIG_SMP
|
|
static struct irq_event_map bootup_irq_map __initdata;
|
|
static DEFINE_PER_CPU(struct irq_event_map, irq_map_array[2]);
|
|
DEFINE_PER_CPU(struct irq_pipeline_data, irq_pipeline) = {
|
.stages = {
|
[0] = {
|
.log = {
|
.map = &bootup_irq_map,
|
},
|
.stage = &inband_stage,
|
},
|
},
|
};
|
|
#else /* !CONFIG_SMP */
|
|
static struct irq_event_map inband_irq_map;
|
|
static struct irq_event_map oob_irq_map;
|
|
DEFINE_PER_CPU(struct irq_pipeline_data, irq_pipeline) = {
|
.stages = {
|
[0] = {
|
.log = {
|
.map = &inband_irq_map,
|
},
|
.stage = &inband_stage,
|
},
|
[1] = {
|
.log = {
|
.map = &oob_irq_map,
|
},
|
},
|
},
|
};
|
|
#endif /* !CONFIG_SMP */
|
|
EXPORT_PER_CPU_SYMBOL(irq_pipeline);
|
|
static void sirq_noop(struct irq_data *data) { }
|
|
/* Virtual interrupt controller for synthetic IRQs. */
|
static struct irq_chip sirq_chip = {
|
.name = "SIRQC",
|
.irq_enable = sirq_noop,
|
.irq_disable = sirq_noop,
|
.flags = IRQCHIP_PIPELINE_SAFE | IRQCHIP_SKIP_SET_WAKE,
|
};
|
|
static int sirq_map(struct irq_domain *d, unsigned int irq,
|
irq_hw_number_t hwirq)
|
{
|
irq_set_percpu_devid(irq);
|
irq_set_chip_and_handler(irq, &sirq_chip, handle_synthetic_irq);
|
|
return 0;
|
}
|
|
static struct irq_domain_ops sirq_domain_ops = {
|
.map = sirq_map,
|
};
|
|
#ifdef CONFIG_SPARSE_IRQ
|
/*
|
* The performances of the radix tree in sparse mode are really ugly
|
* under mm stress on some hw, use a local descriptor cache to ease
|
* the pain.
|
*/
|
#define DESC_CACHE_SZ 128
|
|
static struct irq_desc *desc_cache[DESC_CACHE_SZ] __cacheline_aligned;
|
|
static inline u32 hash_irq(unsigned int irq)
|
{
|
return jhash(&irq, sizeof(irq), irq) % DESC_CACHE_SZ;
|
}
|
|
static __always_inline
|
struct irq_desc *irq_to_cached_desc(unsigned int irq)
|
{
|
int hval = hash_irq(irq);
|
struct irq_desc *desc = desc_cache[hval];
|
|
if (unlikely(desc == NULL || irq_desc_get_irq(desc) != irq)) {
|
desc = irq_to_desc(irq);
|
desc_cache[hval] = desc;
|
}
|
|
return desc;
|
}
|
|
void uncache_irq_desc(unsigned int irq)
|
{
|
int hval = hash_irq(irq);
|
|
desc_cache[hval] = NULL;
|
}
|
|
#else
|
|
static struct irq_desc *irq_to_cached_desc(unsigned int irq)
|
{
|
return irq_to_desc(irq);
|
}
|
|
#endif
|
|
/**
|
* handle_synthetic_irq - synthetic irq handler
|
* @desc: the interrupt description structure for this irq
|
*
|
* Handles synthetic interrupts flowing down the IRQ pipeline
|
* with per-CPU semantics.
|
*
|
* CAUTION: synthetic IRQs may be used to map hardware-generated
|
* events (e.g. IPIs or traps), we must start handling them as
|
* common interrupts.
|
*/
|
void handle_synthetic_irq(struct irq_desc *desc)
|
{
|
unsigned int irq = irq_desc_get_irq(desc);
|
struct irqaction *action;
|
irqreturn_t ret;
|
void *dev_id;
|
|
if (on_pipeline_entry()) {
|
handle_oob_irq(desc);
|
return;
|
}
|
|
action = desc->action;
|
if (action == NULL) {
|
if (printk_ratelimit())
|
printk(KERN_WARNING
|
"CPU%d: WARNING: synthetic IRQ%d has no action.\n",
|
smp_processor_id(), irq);
|
return;
|
}
|
|
__kstat_incr_irqs_this_cpu(desc);
|
trace_irq_handler_entry(irq, action);
|
dev_id = raw_cpu_ptr(action->percpu_dev_id);
|
ret = action->handler(irq, dev_id);
|
trace_irq_handler_exit(irq, action, ret);
|
}
|
|
void sync_irq_stage(struct irq_stage *top)
|
{
|
struct irq_stage_data *p;
|
struct irq_stage *stage;
|
|
/* We must enter over the inband stage with hardirqs off. */
|
if (irq_pipeline_debug()) {
|
WARN_ON_ONCE(!hard_irqs_disabled());
|
WARN_ON_ONCE(current_irq_stage != &inband_stage);
|
}
|
|
stage = top;
|
|
for (;;) {
|
if (stage == &inband_stage) {
|
if (test_inband_stall())
|
break;
|
} else {
|
if (test_oob_stall())
|
break;
|
}
|
|
p = this_staged(stage);
|
if (stage_irqs_pending(p)) {
|
if (stage == &inband_stage)
|
sync_current_irq_stage();
|
else {
|
/* Switch to oob before synchronizing. */
|
switch_oob(p);
|
sync_current_irq_stage();
|
/* Then back to the inband stage. */
|
switch_inband(this_inband_staged());
|
}
|
}
|
|
if (stage == &inband_stage)
|
break;
|
|
stage = &inband_stage;
|
}
|
}
|
|
void synchronize_pipeline(void) /* hardirqs off */
|
{
|
struct irq_stage *top = &oob_stage;
|
int stalled = test_oob_stall();
|
|
if (unlikely(!oob_stage_present())) {
|
top = &inband_stage;
|
stalled = test_inband_stall();
|
}
|
|
if (current_irq_stage != top)
|
sync_irq_stage(top);
|
else if (!stalled)
|
sync_current_irq_stage();
|
}
|
|
static void __inband_irq_enable(void)
|
{
|
struct irq_stage_data *p;
|
unsigned long flags;
|
|
check_inband_stage();
|
|
flags = hard_local_irq_save();
|
|
unstall_inband_nocheck();
|
|
p = this_inband_staged();
|
if (unlikely(stage_irqs_pending(p) && !in_pipeline())) {
|
sync_current_irq_stage();
|
hard_local_irq_restore(flags);
|
preempt_check_resched();
|
} else {
|
hard_local_irq_restore(flags);
|
}
|
}
|
|
/**
|
* inband_irq_enable - enable interrupts for the inband stage
|
*
|
* Enable interrupts for the inband stage, allowing interrupts to
|
* preempt the in-band code. If in-band IRQs are pending for the
|
* inband stage in the per-CPU log at the time of this call, they
|
* are played back.
|
*
|
* The caller is expected to tell the tracer about the change, by
|
* calling trace_hardirqs_on().
|
*/
|
notrace void inband_irq_enable(void)
|
{
|
/*
|
* We are NOT supposed to enter this code with hard IRQs off.
|
* If we do, then the caller might be wrongly assuming that
|
* invoking local_irq_enable() implies enabling hard
|
* interrupts like the legacy I-pipe did, which is not the
|
* case anymore. Relax this requirement when oopsing, since
|
* the kernel may be in a weird state.
|
*/
|
WARN_ON_ONCE(irq_pipeline_debug() && hard_irqs_disabled());
|
__inband_irq_enable();
|
}
|
EXPORT_SYMBOL(inband_irq_enable);
|
|
/**
|
* inband_irq_disable - disable interrupts for the inband stage
|
*
|
* Disable interrupts for the inband stage, disabling in-band
|
* interrupts. Out-of-band interrupts can still be taken and
|
* delivered to their respective handlers though.
|
*/
|
notrace void inband_irq_disable(void)
|
{
|
check_inband_stage();
|
stall_inband_nocheck();
|
}
|
EXPORT_SYMBOL(inband_irq_disable);
|
|
/**
|
* inband_irqs_disabled - test the virtual interrupt state
|
*
|
* Returns non-zero if interrupts are currently disabled for the
|
* inband stage, zero otherwise.
|
*
|
* May be used from the oob stage too (e.g. for tracing
|
* purpose).
|
*/
|
noinstr int inband_irqs_disabled(void)
|
{
|
return test_inband_stall();
|
}
|
EXPORT_SYMBOL(inband_irqs_disabled);
|
|
/**
|
* inband_irq_save - test and disable (virtual) interrupts
|
*
|
* Save the virtual interrupt state then disables interrupts for
|
* the inband stage.
|
*
|
* Returns the original interrupt state.
|
*/
|
trace_on_debug unsigned long inband_irq_save(void)
|
{
|
check_inband_stage();
|
return test_and_stall_inband_nocheck();
|
}
|
EXPORT_SYMBOL(inband_irq_save);
|
|
/**
|
* inband_irq_restore - restore the (virtual) interrupt state
|
* @x: Interrupt state to restore
|
*
|
* Restore the virtual interrupt state from x. If the inband
|
* stage is unstalled as a consequence of this operation, any
|
* interrupt pending for the inband stage in the per-CPU log is
|
* played back.
|
*/
|
trace_on_debug void inband_irq_restore(unsigned long flags)
|
{
|
if (flags)
|
inband_irq_disable();
|
else
|
__inband_irq_enable();
|
}
|
EXPORT_SYMBOL(inband_irq_restore);
|
|
/**
|
* oob_irq_enable - enable interrupts in the CPU
|
*
|
* Enable interrupts in the CPU, allowing out-of-band interrupts
|
* to preempt any code. If out-of-band IRQs are pending in the
|
* per-CPU log for the oob stage at the time of this call, they
|
* are played back.
|
*/
|
trace_on_debug void oob_irq_enable(void)
|
{
|
struct irq_stage_data *p;
|
|
hard_local_irq_disable();
|
|
unstall_oob();
|
|
p = this_oob_staged();
|
if (unlikely(stage_irqs_pending(p)))
|
synchronize_pipeline();
|
|
hard_local_irq_enable();
|
}
|
EXPORT_SYMBOL(oob_irq_enable);
|
|
/**
|
* oob_irq_restore - restore the hardware interrupt state
|
* @x: Interrupt state to restore
|
*
|
* Restore the harware interrupt state from x. If the oob stage
|
* is unstalled as a consequence of this operation, any interrupt
|
* pending for the oob stage in the per-CPU log is played back
|
* prior to turning IRQs on.
|
*
|
* NOTE: Stalling the oob stage must always be paired with
|
* disabling hard irqs and conversely when calling
|
* oob_irq_restore(), otherwise the latter would badly misbehave
|
* in unbalanced conditions.
|
*/
|
trace_on_debug void __oob_irq_restore(unsigned long flags) /* hw interrupt off */
|
{
|
struct irq_stage_data *p = this_oob_staged();
|
|
check_hard_irqs_disabled();
|
|
if (!flags) {
|
unstall_oob();
|
if (unlikely(stage_irqs_pending(p)))
|
synchronize_pipeline();
|
hard_local_irq_enable();
|
}
|
}
|
EXPORT_SYMBOL(__oob_irq_restore);
|
|
/**
|
* stage_disabled - test the interrupt state of the current stage
|
*
|
* Returns non-zero if interrupts are currently disabled for the
|
* current interrupt stage, zero otherwise.
|
* In other words, returns non-zero either if:
|
* - interrupts are disabled for the OOB context (i.e. hard disabled),
|
* - the inband stage is current and inband interrupts are disabled.
|
*/
|
noinstr bool stage_disabled(void)
|
{
|
bool ret = true;
|
|
if (!hard_irqs_disabled()) {
|
ret = false;
|
if (running_inband())
|
ret = test_inband_stall();
|
}
|
|
return ret;
|
}
|
EXPORT_SYMBOL_GPL(stage_disabled);
|
|
/**
|
* test_and_lock_stage - test and disable interrupts for the current stage
|
* @irqsoff: Pointer to boolean denoting stage_disabled()
|
* on entry
|
*
|
* Fully disables interrupts for the current stage. When the
|
* inband stage is current, the stall bit is raised and hardware
|
* IRQs are masked as well. Only the latter operation is
|
* performed when the oob stage is current.
|
*
|
* Returns the combined interrupt state on entry including the
|
* real/hardware (in CPU) and virtual (inband stage) states. For
|
* this reason, [test_and_]lock_stage() must be paired with
|
* unlock_stage() exclusively. The combined irq state returned by
|
* the former may NOT be passed to hard_local_irq_restore().
|
*
|
* The interrupt state of the current stage in the return value
|
* (i.e. stall bit for the inband stage, hardware interrupt bit
|
* for the oob stage) must be testable using
|
* arch_irqs_disabled_flags().
|
*
|
* Notice that test_and_lock_stage(), unlock_stage() are raw
|
* level ops, which substitute to raw_local_irq_save(),
|
* raw_local_irq_restore() in lockdep code. Therefore, changes to
|
* the in-band stall bit must not be propagated to the tracing
|
* core (i.e. no trace_hardirqs_*() annotations).
|
*/
|
noinstr unsigned long test_and_lock_stage(int *irqsoff)
|
{
|
unsigned long flags;
|
int stalled, dummy;
|
|
if (irqsoff == NULL)
|
irqsoff = &dummy;
|
|
/*
|
* Combine the hard irq flag and the stall bit into a single
|
* state word. We need to fill in the stall bit only if the
|
* inband stage is current, otherwise it is not relevant.
|
*/
|
flags = hard_local_irq_save();
|
*irqsoff = hard_irqs_disabled_flags(flags);
|
if (running_inband()) {
|
stalled = test_and_stall_inband_nocheck();
|
flags = irqs_merge_flags(flags, stalled);
|
if (stalled)
|
*irqsoff = 1;
|
}
|
|
/*
|
* CAUTION: don't ever pass this verbatim to
|
* hard_local_irq_restore(). Only unlock_stage() knows how to
|
* decode and use a combined state word.
|
*/
|
return flags;
|
}
|
EXPORT_SYMBOL_GPL(test_and_lock_stage);
|
|
/**
|
* unlock_stage - restore interrupts for the current stage
|
* @flags: Combined interrupt state to restore as received from
|
* test_and_lock_stage()
|
*
|
* Restore the virtual interrupt state if the inband stage is
|
* current, and the hardware interrupt state unconditionally.
|
* The per-CPU log is not played for any stage.
|
*/
|
noinstr void unlock_stage(unsigned long irqstate)
|
{
|
unsigned long flags = irqstate;
|
int stalled;
|
|
WARN_ON_ONCE(irq_pipeline_debug_locking() && !hard_irqs_disabled());
|
|
if (running_inband()) {
|
flags = irqs_split_flags(irqstate, &stalled);
|
if (!stalled)
|
unstall_inband_nocheck();
|
}
|
|
/*
|
* The hardware interrupt bit is the only flag which may be
|
* present in the combined state at this point, all other
|
* status bits have been cleared by irqs_merge_flags(), so
|
* don't ever try to reload the hardware status register with
|
* such value directly!
|
*/
|
if (!hard_irqs_disabled_flags(flags))
|
hard_local_irq_enable();
|
}
|
EXPORT_SYMBOL_GPL(unlock_stage);
|
|
/**
|
* sync_inband_irqs - Synchronize the inband log
|
*
|
* Play any deferred interrupt which might have been logged for the
|
* in-band stage while running with hard irqs on but stalled.
|
*
|
* Called from the unstalled in-band stage. Returns with hard irqs off.
|
*/
|
void sync_inband_irqs(void)
|
{
|
struct irq_stage_data *p;
|
|
check_inband_stage();
|
WARN_ON_ONCE(irq_pipeline_debug() && irqs_disabled());
|
|
if (!hard_irqs_disabled())
|
hard_local_irq_disable();
|
|
p = this_inband_staged();
|
if (unlikely(stage_irqs_pending(p))) {
|
/* Do not pile up preemption frames. */
|
preempt_disable_notrace();
|
sync_current_irq_stage();
|
preempt_enable_no_resched_notrace();
|
}
|
}
|
|
static inline bool irq_post_check(struct irq_stage *stage, unsigned int irq)
|
{
|
if (irq_pipeline_debug()) {
|
if (WARN_ONCE(!hard_irqs_disabled(),
|
"hard irqs on posting IRQ%u to %s\n",
|
irq, stage->name))
|
return true;
|
if (WARN_ONCE(irq >= IRQ_BITMAP_BITS,
|
"cannot post invalid IRQ%u to %s\n",
|
irq, stage->name))
|
return true;
|
}
|
|
return false;
|
}
|
|
#if __IRQ_STAGE_MAP_LEVELS == 4
|
|
/* Must be called hard irqs off. */
|
void irq_post_stage(struct irq_stage *stage, unsigned int irq)
|
{
|
struct irq_stage_data *p = this_staged(stage);
|
int l0b, l1b, l2b;
|
|
if (irq_post_check(stage, irq))
|
return;
|
|
l0b = irq / (BITS_PER_LONG * BITS_PER_LONG * BITS_PER_LONG);
|
l1b = irq / (BITS_PER_LONG * BITS_PER_LONG);
|
l2b = irq / BITS_PER_LONG;
|
|
__set_bit(irq, p->log.map->flat);
|
__set_bit(l2b, p->log.map->index_2);
|
__set_bit(l1b, p->log.map->index_1);
|
__set_bit(l0b, &p->log.index_0);
|
}
|
EXPORT_SYMBOL_GPL(irq_post_stage);
|
|
#define ltob_1(__n) ((__n) * BITS_PER_LONG)
|
#define ltob_2(__n) (ltob_1(__n) * BITS_PER_LONG)
|
#define ltob_3(__n) (ltob_2(__n) * BITS_PER_LONG)
|
|
static inline int pull_next_irq(struct irq_stage_data *p)
|
{
|
unsigned long l0m, l1m, l2m, l3m;
|
int l0b, l1b, l2b, l3b;
|
unsigned int irq;
|
|
l0m = p->log.index_0;
|
if (l0m == 0)
|
return -1;
|
l0b = __ffs(l0m);
|
irq = ltob_3(l0b);
|
|
l1m = p->log.map->index_1[l0b];
|
if (unlikely(l1m == 0)) {
|
WARN_ON_ONCE(1);
|
return -1;
|
}
|
l1b = __ffs(l1m);
|
irq += ltob_2(l1b);
|
|
l2m = p->log.map->index_2[ltob_1(l0b) + l1b];
|
if (unlikely(l2m == 0)) {
|
WARN_ON_ONCE(1);
|
return -1;
|
}
|
l2b = __ffs(l2m);
|
irq += ltob_1(l2b);
|
|
l3m = p->log.map->flat[ltob_2(l0b) + ltob_1(l1b) + l2b];
|
if (unlikely(l3m == 0))
|
return -1;
|
l3b = __ffs(l3m);
|
irq += l3b;
|
|
__clear_bit(irq, p->log.map->flat);
|
if (p->log.map->flat[irq / BITS_PER_LONG] == 0) {
|
__clear_bit(l2b, &p->log.map->index_2[ltob_1(l0b) + l1b]);
|
if (p->log.map->index_2[ltob_1(l0b) + l1b] == 0) {
|
__clear_bit(l1b, &p->log.map->index_1[l0b]);
|
if (p->log.map->index_1[l0b] == 0)
|
__clear_bit(l0b, &p->log.index_0);
|
}
|
}
|
|
return irq;
|
}
|
|
#elif __IRQ_STAGE_MAP_LEVELS == 3
|
|
/* Must be called hard irqs off. */
|
void irq_post_stage(struct irq_stage *stage, unsigned int irq)
|
{
|
struct irq_stage_data *p = this_staged(stage);
|
int l0b, l1b;
|
|
if (irq_post_check(stage, irq))
|
return;
|
|
l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
|
l1b = irq / BITS_PER_LONG;
|
|
__set_bit(irq, p->log.map->flat);
|
__set_bit(l1b, p->log.map->index_1);
|
__set_bit(l0b, &p->log.index_0);
|
}
|
EXPORT_SYMBOL_GPL(irq_post_stage);
|
|
static inline int pull_next_irq(struct irq_stage_data *p)
|
{
|
unsigned long l0m, l1m, l2m;
|
int l0b, l1b, l2b, irq;
|
|
l0m = p->log.index_0;
|
if (unlikely(l0m == 0))
|
return -1;
|
|
l0b = __ffs(l0m);
|
l1m = p->log.map->index_1[l0b];
|
if (l1m == 0)
|
return -1;
|
|
l1b = __ffs(l1m) + l0b * BITS_PER_LONG;
|
l2m = p->log.map->flat[l1b];
|
if (unlikely(l2m == 0)) {
|
WARN_ON_ONCE(1);
|
return -1;
|
}
|
|
l2b = __ffs(l2m);
|
irq = l1b * BITS_PER_LONG + l2b;
|
|
__clear_bit(irq, p->log.map->flat);
|
if (p->log.map->flat[l1b] == 0) {
|
__clear_bit(l1b, p->log.map->index_1);
|
if (p->log.map->index_1[l0b] == 0)
|
__clear_bit(l0b, &p->log.index_0);
|
}
|
|
return irq;
|
}
|
|
#else /* __IRQ_STAGE_MAP_LEVELS == 2 */
|
|
/* Must be called hard irqs off. */
|
void irq_post_stage(struct irq_stage *stage, unsigned int irq)
|
{
|
struct irq_stage_data *p = this_staged(stage);
|
int l0b = irq / BITS_PER_LONG;
|
|
if (irq_post_check(stage, irq))
|
return;
|
|
__set_bit(irq, p->log.map->flat);
|
__set_bit(l0b, &p->log.index_0);
|
}
|
EXPORT_SYMBOL_GPL(irq_post_stage);
|
|
static inline int pull_next_irq(struct irq_stage_data *p)
|
{
|
unsigned long l0m, l1m;
|
int l0b, l1b;
|
|
l0m = p->log.index_0;
|
if (l0m == 0)
|
return -1;
|
|
l0b = __ffs(l0m);
|
l1m = p->log.map->flat[l0b];
|
if (unlikely(l1m == 0)) {
|
WARN_ON_ONCE(1);
|
return -1;
|
}
|
|
l1b = __ffs(l1m);
|
__clear_bit(l1b, &p->log.map->flat[l0b]);
|
if (p->log.map->flat[l0b] == 0)
|
__clear_bit(l0b, &p->log.index_0);
|
|
return l0b * BITS_PER_LONG + l1b;
|
}
|
|
#endif /* __IRQ_STAGE_MAP_LEVELS == 2 */
|
|
/**
|
* hard_preempt_disable - Disable preemption the hard way
|
*
|
* Disable hardware interrupts in the CPU, and disable preemption
|
* if currently running in-band code on the inband stage.
|
*
|
* Return the hardware interrupt state.
|
*/
|
unsigned long hard_preempt_disable(void)
|
{
|
unsigned long flags = hard_local_irq_save();
|
|
if (running_inband())
|
preempt_disable();
|
|
return flags;
|
}
|
EXPORT_SYMBOL_GPL(hard_preempt_disable);
|
|
/**
|
* hard_preempt_enable - Enable preemption the hard way
|
*
|
* Enable preemption if currently running in-band code on the
|
* inband stage, restoring the hardware interrupt state in the CPU.
|
* The per-CPU log is not played for the oob stage.
|
*/
|
void hard_preempt_enable(unsigned long flags)
|
{
|
if (running_inband()) {
|
preempt_enable_no_resched();
|
hard_local_irq_restore(flags);
|
if (!hard_irqs_disabled_flags(flags))
|
preempt_check_resched();
|
} else
|
hard_local_irq_restore(flags);
|
}
|
EXPORT_SYMBOL_GPL(hard_preempt_enable);
|
|
static void handle_unexpected_irq(struct irq_desc *desc, irqreturn_t ret)
|
{
|
unsigned int irq = irq_desc_get_irq(desc);
|
struct irqaction *action;
|
|
/*
|
* Since IRQ_HANDLED was not received from any handler, we may
|
* have a problem dealing with an OOB interrupt. The error
|
* detection logic is as follows:
|
*
|
* - check and complain about any bogus return value from a
|
* out-of-band IRQ handler: we only allow IRQ_HANDLED and
|
* IRQ_NONE from those routines.
|
*
|
* - filter out spurious IRQs which may have been due to bus
|
* asynchronicity, those tend to happen infrequently and
|
* should not cause us to pull the break (see
|
* note_interrupt()).
|
*
|
* - otherwise, stop pipelining the IRQ line after a thousand
|
* consecutive unhandled events.
|
*
|
* NOTE: we should already be holding desc->lock for non
|
* per-cpu IRQs, since we should only get there from the
|
* pipeline entry context.
|
*/
|
|
WARN_ON_ONCE(irq_pipeline_debug() &&
|
!irq_settings_is_per_cpu(desc) &&
|
!raw_spin_is_locked(&desc->lock));
|
|
if (ret != IRQ_NONE) {
|
printk(KERN_ERR "out-of-band irq event %d: bogus return value %x\n",
|
irq, ret);
|
for_each_action_of_desc(desc, action)
|
printk(KERN_ERR "[<%p>] %pf",
|
action->handler, action->handler);
|
printk(KERN_CONT "\n");
|
return;
|
}
|
|
if (time_after(jiffies, desc->last_unhandled + HZ/10))
|
desc->irqs_unhandled = 0;
|
else
|
desc->irqs_unhandled++;
|
|
desc->last_unhandled = jiffies;
|
|
if (unlikely(desc->irqs_unhandled > 1000)) {
|
printk(KERN_ERR "out-of-band irq %d: stuck or unexpected\n", irq);
|
irq_settings_clr_oob(desc);
|
desc->istate |= IRQS_SPURIOUS_DISABLED;
|
irq_disable(desc);
|
}
|
}
|
|
static inline void incr_irq_kstat(struct irq_desc *desc)
|
{
|
if (irq_settings_is_per_cpu_devid(desc))
|
__kstat_incr_irqs_this_cpu(desc);
|
else
|
kstat_incr_irqs_this_cpu(desc);
|
}
|
|
/*
|
* do_oob_irq() - Handles interrupts over the oob stage. Hard irqs
|
* off.
|
*/
|
static void do_oob_irq(struct irq_desc *desc)
|
{
|
bool percpu_devid = irq_settings_is_per_cpu_devid(desc);
|
unsigned int irq = irq_desc_get_irq(desc);
|
irqreturn_t ret = IRQ_NONE, res;
|
struct irqaction *action;
|
void *dev_id;
|
|
action = desc->action;
|
if (unlikely(action == NULL))
|
goto done;
|
|
if (percpu_devid) {
|
trace_irq_handler_entry(irq, action);
|
dev_id = raw_cpu_ptr(action->percpu_dev_id);
|
ret = action->handler(irq, dev_id);
|
trace_irq_handler_exit(irq, action, ret);
|
} else {
|
desc->istate &= ~IRQS_PENDING;
|
if (unlikely(irqd_irq_disabled(&desc->irq_data)))
|
return;
|
irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
|
raw_spin_unlock(&desc->lock);
|
for_each_action_of_desc(desc, action) {
|
trace_irq_handler_entry(irq, action);
|
dev_id = action->dev_id;
|
res = action->handler(irq, dev_id);
|
trace_irq_handler_exit(irq, action, res);
|
ret |= res;
|
}
|
raw_spin_lock(&desc->lock);
|
irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
|
}
|
done:
|
incr_irq_kstat(desc);
|
|
if (likely(ret & IRQ_HANDLED)) {
|
desc->irqs_unhandled = 0;
|
return;
|
}
|
|
handle_unexpected_irq(desc, ret);
|
}
|
|
/*
|
* Over the inband stage, IRQs must be dispatched by the arch-specific
|
* arch_do_IRQ_pipelined() routine.
|
*
|
* Entered with hardirqs on, inband stalled.
|
*/
|
static inline
|
void do_inband_irq(struct irq_desc *desc)
|
{
|
arch_do_IRQ_pipelined(desc);
|
WARN_ON_ONCE(irq_pipeline_debug() && !irqs_disabled());
|
}
|
|
static inline bool is_active_edge_event(struct irq_desc *desc)
|
{
|
return (desc->istate & IRQS_PENDING) &&
|
!irqd_irq_disabled(&desc->irq_data);
|
}
|
|
bool handle_oob_irq(struct irq_desc *desc) /* hardirqs off */
|
{
|
struct irq_stage_data *oobd = this_oob_staged();
|
unsigned int irq = irq_desc_get_irq(desc);
|
int stalled;
|
|
/*
|
* Flow handlers of chained interrupts have no business
|
* running here: they should decode the event, invoking
|
* generic_handle_irq() for each cascaded IRQ.
|
*/
|
if (WARN_ON_ONCE(irq_pipeline_debug() &&
|
irq_settings_is_chained(desc)))
|
return false;
|
|
/*
|
* If no oob stage is present, all interrupts must go to the
|
* inband stage through the interrupt log. Otherwise,
|
* out-of-band IRQs are immediately delivered to the oob
|
* stage, while in-band IRQs still go through the inband stage
|
* log.
|
*
|
* This routine returns a boolean status telling the caller
|
* whether an out-of-band interrupt was delivered.
|
*/
|
if (!oob_stage_present() || !irq_settings_is_oob(desc)) {
|
irq_post_stage(&inband_stage, irq);
|
return false;
|
}
|
|
if (WARN_ON_ONCE(irq_pipeline_debug() && running_inband()))
|
return false;
|
|
stalled = test_and_stall_oob();
|
|
if (unlikely(desc->istate & IRQS_EDGE)) {
|
do {
|
if (is_active_edge_event(desc)) {
|
if (irqd_irq_masked(&desc->irq_data))
|
unmask_irq(desc);
|
}
|
do_oob_irq(desc);
|
} while (is_active_edge_event(desc));
|
} else {
|
do_oob_irq(desc);
|
}
|
|
/*
|
* Cascaded interrupts enter handle_oob_irq() on the stalled
|
* out-of-band stage during the parent invocation. Make sure
|
* to restore the stall bit accordingly.
|
*/
|
if (likely(!stalled))
|
unstall_oob();
|
|
/*
|
* CPU migration and/or stage switching over the handler are
|
* NOT allowed. These should take place over
|
* irq_exit_pipeline().
|
*/
|
if (irq_pipeline_debug()) {
|
/* No CPU migration allowed. */
|
WARN_ON_ONCE(this_oob_staged() != oobd);
|
/* No stage migration allowed. */
|
WARN_ON_ONCE(current_irq_staged != oobd);
|
}
|
|
return true;
|
}
|
|
static inline
|
void copy_timer_regs(struct irq_desc *desc, struct pt_regs *regs)
|
{
|
struct irq_pipeline_data *p;
|
|
if (desc->action == NULL || !(desc->action->flags & __IRQF_TIMER))
|
return;
|
/*
|
* Given our deferred dispatching model for regular IRQs, we
|
* record the preempted context registers only for the latest
|
* timer interrupt, so that the regular tick handler charges
|
* CPU times properly. It is assumed that no other interrupt
|
* handler cares for such information.
|
*/
|
p = raw_cpu_ptr(&irq_pipeline);
|
arch_save_timer_regs(&p->tick_regs, regs);
|
}
|
|
static __always_inline
|
struct irq_stage_data *switch_stage_on_irq(void)
|
{
|
struct irq_stage_data *prevd = current_irq_staged, *nextd;
|
|
if (oob_stage_present()) {
|
nextd = this_oob_staged();
|
if (prevd != nextd)
|
switch_oob(nextd);
|
}
|
|
return prevd;
|
}
|
|
static __always_inline
|
void restore_stage_on_irq(struct irq_stage_data *prevd)
|
{
|
/*
|
* CPU migration and/or stage switching over
|
* irq_exit_pipeline() are allowed. Our exit logic is as
|
* follows:
|
*
|
* ENTRY EXIT EPILOGUE
|
*
|
* oob oob nop
|
* inband oob switch inband
|
* oob inband nop
|
* inband inband nop
|
*/
|
if (prevd->stage == &inband_stage &&
|
current_irq_staged == this_oob_staged())
|
switch_inband(this_inband_staged());
|
}
|
|
/**
|
* generic_pipeline_irq_desc - Pass an IRQ to the pipeline
|
* @desc: Descriptor of the IRQ to pass
|
* @regs: Register file coming from the low-level handling code
|
*
|
* Inject an IRQ into the pipeline from a CPU interrupt or trap
|
* context. A flow handler runs next for this IRQ.
|
*
|
* Hard irqs must be off on entry. Caller should have pushed the
|
* IRQ regs using set_irq_regs().
|
*/
|
void generic_pipeline_irq_desc(struct irq_desc *desc, struct pt_regs *regs)
|
{
|
int irq = irq_desc_get_irq(desc);
|
|
if (irq_pipeline_debug() && !hard_irqs_disabled()) {
|
hard_local_irq_disable();
|
pr_err("IRQ pipeline: interrupts enabled on entry (IRQ%u)\n", irq);
|
}
|
|
trace_irq_pipeline_entry(irq);
|
copy_timer_regs(desc, regs);
|
generic_handle_irq_desc(desc);
|
trace_irq_pipeline_exit(irq);
|
}
|
|
void generic_pipeline_irq(unsigned int irq, struct pt_regs *regs)
|
{
|
struct irq_desc *desc = irq_to_cached_desc(irq);
|
struct pt_regs *old_regs;
|
|
old_regs = set_irq_regs(regs);
|
generic_pipeline_irq_desc(desc, regs);
|
set_irq_regs(old_regs);
|
}
|
|
struct irq_stage_data *handle_irq_pipelined_prepare(struct pt_regs *regs)
|
{
|
struct irq_stage_data *prevd;
|
|
/*
|
* Running with the oob stage stalled implies hardirqs off.
|
* For this reason, if the oob stage is stalled when we
|
* receive an interrupt from the hardware, something is badly
|
* broken in our interrupt state. Try fixing up, but without
|
* great hopes.
|
*/
|
if (irq_pipeline_debug()) {
|
if (test_oob_stall()) {
|
pr_err("IRQ pipeline: out-of-band stage stalled on IRQ entry\n");
|
unstall_oob();
|
}
|
WARN_ON(on_pipeline_entry());
|
}
|
|
/*
|
* Switch early on to the out-of-band stage if present,
|
* anticipating a companion kernel is going to handle the
|
* incoming event. If not, never mind, we will switch back
|
* in-band before synchronizing interrupts.
|
*/
|
prevd = switch_stage_on_irq();
|
|
/* Tell the companion core about the entry. */
|
irq_enter_pipeline();
|
|
/*
|
* Invariant: IRQs may not pile up in the section covered by
|
* the PIPELINE_OFFSET marker, because:
|
*
|
* - out-of-band handlers called from handle_oob_irq() may NOT
|
* re-enable hard interrupts. Ever.
|
*
|
* - synchronizing the in-band log with hard interrupts
|
* enabled is done outside of this section.
|
*/
|
preempt_count_add(PIPELINE_OFFSET);
|
|
/*
|
* From the standpoint of the in-band context when pipelining
|
* is in effect, an interrupt entry is unsafe in a similar way
|
* a NMI is, since it may preempt almost anywhere as IRQs are
|
* only virtually masked most of the time, including inside
|
* (virtually) interrupt-free sections. Declare a NMI entry so
|
* that the low handling code is allowed to enter RCU read
|
* sides (e.g. handle_domain_irq() needs this to resolve IRQ
|
* mappings).
|
*/
|
rcu_nmi_enter();
|
|
return prevd;
|
}
|
|
int handle_irq_pipelined_finish(struct irq_stage_data *prevd,
|
struct pt_regs *regs)
|
{
|
/*
|
* Leave the (pseudo-)NMI entry for RCU before the out-of-band
|
* core might reschedule in irq_exit_pipeline(), and
|
* interrupts are hard enabled again on this CPU as a result
|
* of switching context.
|
*/
|
rcu_nmi_exit();
|
|
/*
|
* Make sure to leave the pipeline entry context before
|
* allowing the companion core to reschedule, and eventually
|
* synchronizing interrupts.
|
*/
|
preempt_count_sub(PIPELINE_OFFSET);
|
|
/* Allow the companion core to reschedule. */
|
irq_exit_pipeline();
|
|
/* Back to the preempted stage. */
|
restore_stage_on_irq(prevd);
|
|
/*
|
* We have to synchronize interrupts because some might have
|
* been logged while we were busy handling an out-of-band
|
* event coming from the hardware:
|
*
|
* - as a result of calling an out-of-band handler which in
|
* turn posted them.
|
*
|
* - because we posted them directly for scheduling the
|
* interrupt to happen from the in-band stage.
|
*/
|
synchronize_pipeline_on_irq();
|
|
#ifdef CONFIG_DOVETAIL
|
/*
|
* Sending MAYDAY is in essence a rare case, so prefer test
|
* then maybe clear over test_and_clear.
|
*/
|
if (user_mode(regs) && test_thread_flag(TIF_MAYDAY))
|
dovetail_call_mayday(regs);
|
#endif
|
|
return running_inband() && !irqs_disabled();
|
}
|
|
int handle_irq_pipelined(struct pt_regs *regs)
|
{
|
struct irq_stage_data *prevd;
|
|
prevd = handle_irq_pipelined_prepare(regs);
|
handle_arch_irq(regs);
|
return handle_irq_pipelined_finish(prevd, regs);
|
}
|
|
/**
|
* irq_inject_pipeline - Inject a software-generated IRQ into the
|
* pipeline @irq: IRQ to inject
|
*
|
* Inject an IRQ into the pipeline by software as if such
|
* hardware event had happened on the current CPU.
|
*/
|
int irq_inject_pipeline(unsigned int irq)
|
{
|
struct irq_stage_data *oobd, *prevd;
|
struct irq_desc *desc;
|
unsigned long flags;
|
|
desc = irq_to_cached_desc(irq);
|
if (desc == NULL)
|
return -EINVAL;
|
|
flags = hard_local_irq_save();
|
|
/*
|
* Handle the case of an IRQ sent to a stalled oob stage here,
|
* which allows to trap the same condition in handle_oob_irq()
|
* in a debug check (see comment there).
|
*/
|
oobd = this_oob_staged();
|
if (oob_stage_present() &&
|
irq_settings_is_oob(desc) &&
|
test_oob_stall()) {
|
irq_post_stage(&oob_stage, irq);
|
} else {
|
prevd = switch_stage_on_irq();
|
irq_enter_pipeline();
|
handle_oob_irq(desc);
|
irq_exit_pipeline();
|
restore_stage_on_irq(prevd);
|
synchronize_pipeline_on_irq();
|
}
|
|
hard_local_irq_restore(flags);
|
|
return 0;
|
|
}
|
EXPORT_SYMBOL_GPL(irq_inject_pipeline);
|
|
/*
|
* sync_current_irq_stage() -- Flush the pending IRQs for the current
|
* stage (and processor). This routine flushes the interrupt log (see
|
* "Optimistic interrupt protection" from D. Stodolsky et al. for more
|
* on the deferred interrupt scheme). Every interrupt which has
|
* occurred while the pipeline was stalled gets played.
|
*
|
* CAUTION: CPU migration may occur over this routine if running over
|
* the inband stage.
|
*/
|
void sync_current_irq_stage(void) /* hard irqs off */
|
{
|
struct irq_stage_data *p;
|
struct irq_stage *stage;
|
struct irq_desc *desc;
|
int irq;
|
|
WARN_ON_ONCE(irq_pipeline_debug() && on_pipeline_entry());
|
check_hard_irqs_disabled();
|
|
p = current_irq_staged;
|
respin:
|
stage = p->stage;
|
if (stage == &inband_stage) {
|
/*
|
* Since we manipulate the stall bit directly, we have
|
* to open code the IRQ state tracing.
|
*/
|
stall_inband_nocheck();
|
trace_hardirqs_off();
|
} else {
|
stall_oob();
|
}
|
|
for (;;) {
|
irq = pull_next_irq(p);
|
if (irq < 0)
|
break;
|
/*
|
* Make sure the compiler does not reorder wrongly, so
|
* that all updates to maps are done before the
|
* handler gets called.
|
*/
|
barrier();
|
|
desc = irq_to_cached_desc(irq);
|
|
if (stage == &inband_stage) {
|
hard_local_irq_enable();
|
do_inband_irq(desc);
|
hard_local_irq_disable();
|
} else {
|
do_oob_irq(desc);
|
}
|
|
/*
|
* We might have switched from the oob stage to the
|
* in-band one on return from the handler, in which
|
* case we might also have migrated to a different CPU
|
* (the converse in-band -> oob switch is NOT allowed
|
* though). Reload the current per-cpu context
|
* pointer, so that we further pull pending interrupts
|
* from the proper in-band log.
|
*/
|
p = current_irq_staged;
|
if (p->stage != stage) {
|
if (WARN_ON_ONCE(irq_pipeline_debug() &&
|
stage == &inband_stage))
|
break;
|
goto respin;
|
}
|
}
|
|
if (stage == &inband_stage) {
|
trace_hardirqs_on();
|
unstall_inband_nocheck();
|
} else {
|
unstall_oob();
|
}
|
}
|
|
#ifndef CONFIG_GENERIC_ENTRY
|
|
/*
|
* These helpers are normally called from the kernel entry/exit code
|
* in the asm section by architectures which do not use the generic
|
* kernel entry code, in order to save the interrupt and lockdep
|
* states for the in-band stage on entry, restoring them when leaving
|
* the kernel. The per-architecture arch_kentry_set/get_irqstate()
|
* calls determine where this information should be kept while running
|
* in kernel context, indexed on the current register frame.
|
*/
|
|
#define KENTRY_STALL_BIT BIT(0) /* Tracks INBAND_STALL_BIT */
|
#define KENTRY_LOCKDEP_BIT BIT(1) /* Tracks hardirqs_enabled */
|
|
asmlinkage __visible noinstr void kentry_enter_pipelined(struct pt_regs *regs)
|
{
|
long irqstate = 0;
|
|
WARN_ON(irq_pipeline_debug() && !hard_irqs_disabled());
|
|
if (!running_inband())
|
return;
|
|
if (lockdep_read_irqs_state())
|
irqstate |= KENTRY_LOCKDEP_BIT;
|
|
if (irqs_disabled())
|
irqstate |= KENTRY_STALL_BIT;
|
else
|
trace_hardirqs_off();
|
|
arch_kentry_set_irqstate(regs, irqstate);
|
}
|
|
asmlinkage void __visible noinstr kentry_exit_pipelined(struct pt_regs *regs)
|
{
|
long irqstate;
|
|
WARN_ON(irq_pipeline_debug() && !hard_irqs_disabled());
|
|
if (!running_inband())
|
return;
|
|
/*
|
* If the in-band stage of the kernel is current but the IRQ
|
* is not going to be delivered because the latter is stalled,
|
* keep the tracing logic unaware of the receipt, so that no
|
* false positive is triggered in lockdep (e.g. IN-HARDIRQ-W
|
* -> HARDIRQ-ON-W). In this case, we still have to restore
|
* the lockdep irq state independently, since it might not be
|
* in sync with the stall bit (e.g. raw_local_irq_disable/save
|
* do flip the stall bit, but are not tracked by lockdep).
|
*/
|
|
irqstate = arch_kentry_get_irqstate(regs);
|
if (!(irqstate & KENTRY_STALL_BIT)) {
|
stall_inband_nocheck();
|
trace_hardirqs_on();
|
unstall_inband_nocheck();
|
} else {
|
lockdep_write_irqs_state(!!(irqstate & KENTRY_LOCKDEP_BIT));
|
}
|
}
|
|
#endif /* !CONFIG_GENERIC_ENTRY */
|
|
/**
|
* run_oob_call - escalate function call to the oob stage
|
* @fn: address of routine
|
* @arg: routine argument
|
*
|
* Make the specified function run on the oob stage, switching
|
* the current stage accordingly if needed. The escalated call is
|
* allowed to perform a stage migration in the process.
|
*/
|
int notrace run_oob_call(int (*fn)(void *arg), void *arg)
|
{
|
struct irq_stage_data *p, *old;
|
struct irq_stage *oob;
|
unsigned long flags;
|
int ret, s;
|
|
flags = hard_local_irq_save();
|
|
/* Switch to the oob stage if not current. */
|
p = this_oob_staged();
|
oob = p->stage;
|
old = current_irq_staged;
|
if (old != p)
|
switch_oob(p);
|
|
s = test_and_stall_oob();
|
barrier();
|
ret = fn(arg);
|
hard_local_irq_disable();
|
if (!s)
|
unstall_oob();
|
|
/*
|
* The exit logic is as follows:
|
*
|
* ON-ENTRY AFTER-CALL EPILOGUE
|
*
|
* oob oob sync current stage if !stalled
|
* inband oob switch to inband + sync all stages
|
* oob inband sync all stages
|
* inband inband sync all stages
|
*
|
* Each path which has stalled the oob stage while running on
|
* the inband stage at some point during the escalation
|
* process must synchronize all stages of the pipeline on
|
* exit. Otherwise, we may restrict the synchronization scope
|
* to the current stage when the whole sequence ran on the oob
|
* stage.
|
*/
|
p = this_oob_staged();
|
if (likely(current_irq_staged == p)) {
|
if (old->stage == oob) {
|
if (!s && stage_irqs_pending(p))
|
sync_current_irq_stage();
|
goto out;
|
}
|
switch_inband(this_inband_staged());
|
}
|
|
sync_irq_stage(oob);
|
out:
|
hard_local_irq_restore(flags);
|
|
return ret;
|
}
|
EXPORT_SYMBOL_GPL(run_oob_call);
|
|
int enable_oob_stage(const char *name)
|
{
|
struct irq_event_map *map;
|
struct irq_stage_data *p;
|
int cpu, ret;
|
|
if (oob_stage_present())
|
return -EBUSY;
|
|
/* Set up the out-of-band interrupt stage on all CPUs. */
|
|
for_each_possible_cpu(cpu) {
|
p = &per_cpu(irq_pipeline.stages, cpu)[1];
|
map = p->log.map; /* save/restore after memset(). */
|
memset(p, 0, sizeof(*p));
|
p->stage = &oob_stage;
|
memset(map, 0, sizeof(struct irq_event_map));
|
p->log.map = map;
|
#ifdef CONFIG_DEBUG_IRQ_PIPELINE
|
p->cpu = cpu;
|
#endif
|
}
|
|
ret = arch_enable_oob_stage();
|
if (ret)
|
return ret;
|
|
oob_stage.name = name;
|
smp_wmb();
|
oob_stage.index = 1;
|
|
pr_info("IRQ pipeline: high-priority %s stage added.\n", name);
|
|
return 0;
|
}
|
EXPORT_SYMBOL_GPL(enable_oob_stage);
|
|
void disable_oob_stage(void)
|
{
|
const char *name = oob_stage.name;
|
|
WARN_ON(!running_inband() || !oob_stage_present());
|
|
oob_stage.index = 0;
|
smp_wmb();
|
|
pr_info("IRQ pipeline: %s stage removed.\n", name);
|
}
|
EXPORT_SYMBOL_GPL(disable_oob_stage);
|
|
void irq_pipeline_oops(void)
|
{
|
irq_pipeline_oopsing = true;
|
local_irq_disable_full();
|
}
|
|
/*
|
* Used to save/restore the status bits of the inband stage across runs
|
* of NMI-triggered code, so that we can restore the original pipeline
|
* state before leaving NMI context.
|
*/
|
static DEFINE_PER_CPU(unsigned long, nmi_saved_stall_bits);
|
|
noinstr void irq_pipeline_nmi_enter(void)
|
{
|
raw_cpu_write(nmi_saved_stall_bits, current->stall_bits);
|
|
}
|
EXPORT_SYMBOL(irq_pipeline_nmi_enter);
|
|
noinstr void irq_pipeline_nmi_exit(void)
|
{
|
current->stall_bits = raw_cpu_read(nmi_saved_stall_bits);
|
}
|
EXPORT_SYMBOL(irq_pipeline_nmi_exit);
|
|
bool __weak irq_cpuidle_control(struct cpuidle_device *dev,
|
struct cpuidle_state *state)
|
{
|
/*
|
* Allow entering the idle state by default, matching the
|
* original behavior when CPU_IDLE is turned
|
* on. irq_cpuidle_control() may be overriden by an
|
* out-of-band code for determining whether the CPU may
|
* actually enter the idle state.
|
*/
|
return true;
|
}
|
|
/**
|
* irq_cpuidle_enter - Prepare for entering the next idle state
|
* @dev: CPUIDLE device
|
* @state: CPUIDLE state to be entered
|
*
|
* Flush the in-band interrupt log before the caller idles, so
|
* that no event lingers before we actually wait for the next
|
* IRQ, in which case we ask the caller to abort the idling
|
* process altogether. The companion core is also given the
|
* opportunity to block the idling process by having
|
* irq_cpuidle_control() return @false.
|
*
|
* Returns @true if caller may proceed with idling, @false
|
* otherwise. The in-band log is guaranteed empty on return, hard
|
* irqs left off so that no event might sneak in until the caller
|
* actually idles.
|
*/
|
bool irq_cpuidle_enter(struct cpuidle_device *dev,
|
struct cpuidle_state *state)
|
{
|
WARN_ON_ONCE(irq_pipeline_debug() && !irqs_disabled());
|
|
hard_local_irq_disable();
|
|
if (stage_irqs_pending(this_inband_staged())) {
|
unstall_inband_nocheck();
|
synchronize_pipeline();
|
stall_inband_nocheck();
|
trace_hardirqs_off();
|
return false;
|
}
|
|
return irq_cpuidle_control(dev, state);
|
}
|
|
static unsigned int inband_work_sirq;
|
|
static irqreturn_t inband_work_interrupt(int sirq, void *dev_id)
|
{
|
irq_work_run();
|
|
return IRQ_HANDLED;
|
}
|
|
static struct irqaction inband_work = {
|
.handler = inband_work_interrupt,
|
.name = "in-band work",
|
.flags = IRQF_NO_THREAD,
|
};
|
|
void irq_local_work_raise(void)
|
{
|
unsigned long flags;
|
|
/*
|
* irq_work_queue() may be called from the in-band stage too
|
* in case we want to delay a work until the hard irqs are on
|
* again, so we may only sync the in-band log when unstalled,
|
* with hard irqs on.
|
*/
|
flags = hard_local_irq_save();
|
irq_post_inband(inband_work_sirq);
|
if (running_inband() &&
|
!hard_irqs_disabled_flags(flags) && !irqs_disabled())
|
sync_current_irq_stage();
|
hard_local_irq_restore(flags);
|
}
|
|
#ifdef CONFIG_DEBUG_IRQ_PIPELINE
|
|
#ifdef CONFIG_LOCKDEP
|
static inline bool lockdep_on_error(void)
|
{
|
return !debug_locks;
|
}
|
#else
|
static inline bool lockdep_on_error(void)
|
{
|
return false;
|
}
|
#endif
|
|
notrace void check_inband_stage(void)
|
{
|
struct irq_stage *this_stage;
|
unsigned long flags;
|
|
flags = hard_local_irq_save();
|
|
this_stage = current_irq_stage;
|
if (likely(this_stage == &inband_stage && !test_oob_stall())) {
|
hard_local_irq_restore(flags);
|
return;
|
}
|
|
if (in_nmi() || irq_pipeline_oopsing || lockdep_on_error()) {
|
hard_local_irq_restore(flags);
|
return;
|
}
|
|
/*
|
* This will disable all further pipeline debug checks, since
|
* a wrecked interrupt state is likely to trigger many of
|
* them, ending up in a terrible mess. IOW, the current
|
* situation must be fixed prior to investigating any
|
* subsequent issue that might still exist.
|
*/
|
irq_pipeline_oopsing = true;
|
|
hard_local_irq_restore(flags);
|
|
if (this_stage != &inband_stage)
|
pr_err("IRQ pipeline: some code running in oob context '%s'\n"
|
" called an in-band only routine\n",
|
this_stage->name);
|
else
|
pr_err("IRQ pipeline: oob stage found stalled while modifying in-band\n"
|
" interrupt state and/or running sleeping code\n");
|
|
dump_stack();
|
}
|
EXPORT_SYMBOL(check_inband_stage);
|
|
void check_spinlock_context(void)
|
{
|
WARN_ON_ONCE(in_pipeline() || running_oob());
|
|
}
|
EXPORT_SYMBOL(check_spinlock_context);
|
|
#endif /* CONFIG_DEBUG_IRQ_PIPELINE */
|
|
static inline void fixup_percpu_data(void)
|
{
|
#ifdef CONFIG_SMP
|
struct irq_pipeline_data *p;
|
int cpu;
|
|
/*
|
* A temporary event log is used by the inband stage during the
|
* early boot up (bootup_irq_map), until the per-cpu areas
|
* have been set up.
|
*
|
* Obviously, this code must run over the boot CPU, before SMP
|
* operations start, with hard IRQs off so that nothing can
|
* change under our feet.
|
*/
|
WARN_ON(!hard_irqs_disabled());
|
|
memcpy(&per_cpu(irq_map_array, 0)[0], &bootup_irq_map,
|
sizeof(struct irq_event_map));
|
|
for_each_possible_cpu(cpu) {
|
p = &per_cpu(irq_pipeline, cpu);
|
p->stages[0].stage = &inband_stage;
|
p->stages[0].log.map = &per_cpu(irq_map_array, cpu)[0];
|
p->stages[1].log.map = &per_cpu(irq_map_array, cpu)[1];
|
#ifdef CONFIG_DEBUG_IRQ_PIPELINE
|
p->stages[0].cpu = cpu;
|
p->stages[1].cpu = cpu;
|
#endif
|
}
|
#endif
|
}
|
|
void __init irq_pipeline_init_early(void)
|
{
|
/*
|
* This is called early from start_kernel(), even before the
|
* actual number of IRQs is known. We are running on the boot
|
* CPU, hw interrupts are off, and secondary CPUs are still
|
* lost in space. Careful.
|
*/
|
fixup_percpu_data();
|
}
|
|
/**
|
* irq_pipeline_init - Main pipeline core inits
|
*
|
* This is step #2 of the 3-step pipeline initialization, which
|
* should happen right after init_IRQ() has run. The internal
|
* service interrupts are created along with the synthetic IRQ
|
* domain, and the arch-specific init chores are performed too.
|
*
|
* Interrupt pipelining should be fully functional when this
|
* routine returns.
|
*/
|
void __init irq_pipeline_init(void)
|
{
|
WARN_ON(!hard_irqs_disabled());
|
|
synthetic_irq_domain = irq_domain_add_nomap(NULL, ~0,
|
&sirq_domain_ops,
|
NULL);
|
inband_work_sirq = irq_create_direct_mapping(synthetic_irq_domain);
|
setup_percpu_irq(inband_work_sirq, &inband_work);
|
|
/*
|
* We are running on the boot CPU, hw interrupts are off, and
|
* secondary CPUs are still lost in space. Now we may run
|
* arch-specific code for enabling the pipeline.
|
*/
|
arch_irq_pipeline_init();
|
|
irq_pipeline_active = true;
|
|
pr_info("IRQ pipeline enabled\n");
|
}
|
|
#ifndef CONFIG_SPARSE_IRQ
|
EXPORT_SYMBOL_GPL(irq_desc);
|
#endif
|