.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra |
---|
3 | 4 | * |
---|
.. | .. |
---|
17 | 18 | #include <linux/cpu.h> |
---|
18 | 19 | #include <linux/notifier.h> |
---|
19 | 20 | #include <linux/smp.h> |
---|
| 21 | +#include <linux/smpboot.h> |
---|
20 | 22 | #include <linux/interrupt.h> |
---|
21 | 23 | #include <asm/processor.h> |
---|
22 | 24 | |
---|
23 | 25 | |
---|
24 | 26 | static DEFINE_PER_CPU(struct llist_head, raised_list); |
---|
25 | 27 | static DEFINE_PER_CPU(struct llist_head, lazy_list); |
---|
| 28 | +static DEFINE_PER_CPU(struct task_struct *, irq_workd); |
---|
| 29 | + |
---|
| 30 | +static void wake_irq_workd(void) |
---|
| 31 | +{ |
---|
| 32 | + struct task_struct *tsk = __this_cpu_read(irq_workd); |
---|
| 33 | + |
---|
| 34 | + if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk) |
---|
| 35 | + wake_up_process(tsk); |
---|
| 36 | +} |
---|
| 37 | + |
---|
| 38 | +#ifdef CONFIG_SMP |
---|
| 39 | +static void irq_work_wake(struct irq_work *entry) |
---|
| 40 | +{ |
---|
| 41 | + wake_irq_workd(); |
---|
| 42 | +} |
---|
| 43 | + |
---|
| 44 | +static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) = |
---|
| 45 | + IRQ_WORK_INIT_HARD(irq_work_wake); |
---|
| 46 | +#endif |
---|
| 47 | + |
---|
| 48 | +static int irq_workd_should_run(unsigned int cpu) |
---|
| 49 | +{ |
---|
| 50 | + return !llist_empty(this_cpu_ptr(&lazy_list)); |
---|
| 51 | +} |
---|
26 | 52 | |
---|
27 | 53 | /* |
---|
28 | 54 | * Claim the entry so that no one else will poke at it. |
---|
29 | 55 | */ |
---|
30 | 56 | static bool irq_work_claim(struct irq_work *work) |
---|
31 | 57 | { |
---|
32 | | - unsigned long flags, oflags, nflags; |
---|
| 58 | + int oflags; |
---|
33 | 59 | |
---|
| 60 | + oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->flags); |
---|
34 | 61 | /* |
---|
35 | | - * Start with our best wish as a premise but only trust any |
---|
36 | | - * flag value after cmpxchg() result. |
---|
| 62 | + * If the work is already pending, no need to raise the IPI. |
---|
| 63 | + * The pairing atomic_fetch_andnot() in irq_work_run() makes sure |
---|
| 64 | + * everything we did before is visible. |
---|
37 | 65 | */ |
---|
38 | | - flags = work->flags & ~IRQ_WORK_PENDING; |
---|
39 | | - for (;;) { |
---|
40 | | - nflags = flags | IRQ_WORK_CLAIMED; |
---|
41 | | - oflags = cmpxchg(&work->flags, flags, nflags); |
---|
42 | | - if (oflags == flags) |
---|
43 | | - break; |
---|
44 | | - if (oflags & IRQ_WORK_PENDING) |
---|
45 | | - return false; |
---|
46 | | - flags = oflags; |
---|
47 | | - cpu_relax(); |
---|
48 | | - } |
---|
49 | | - |
---|
| 66 | + if (oflags & IRQ_WORK_PENDING) |
---|
| 67 | + return false; |
---|
50 | 68 | return true; |
---|
51 | 69 | } |
---|
52 | 70 | |
---|
.. | .. |
---|
58 | 76 | } |
---|
59 | 77 | |
---|
60 | 78 | /* Enqueue on current CPU, work must already be claimed and preempt disabled */ |
---|
61 | | -static void __irq_work_queue_local(struct irq_work *work, struct llist_head *list) |
---|
| 79 | +static void __irq_work_queue_local(struct irq_work *work) |
---|
62 | 80 | { |
---|
63 | | - bool empty; |
---|
| 81 | + struct llist_head *list; |
---|
| 82 | + bool rt_lazy_work = false; |
---|
| 83 | + bool lazy_work = false; |
---|
| 84 | + int work_flags; |
---|
64 | 85 | |
---|
65 | | - empty = llist_add(&work->llnode, list); |
---|
| 86 | + work_flags = atomic_read(&work->flags); |
---|
| 87 | + if (work_flags & IRQ_WORK_LAZY) |
---|
| 88 | + lazy_work = true; |
---|
| 89 | + else if (IS_ENABLED(CONFIG_PREEMPT_RT) && |
---|
| 90 | + !(work_flags & IRQ_WORK_HARD_IRQ)) |
---|
| 91 | + rt_lazy_work = true; |
---|
66 | 92 | |
---|
67 | | - if (empty && |
---|
68 | | - (!(work->flags & IRQ_WORK_LAZY) || |
---|
69 | | - tick_nohz_tick_stopped())) |
---|
| 93 | + if (lazy_work || rt_lazy_work) |
---|
| 94 | + list = this_cpu_ptr(&lazy_list); |
---|
| 95 | + else |
---|
| 96 | + list = this_cpu_ptr(&raised_list); |
---|
| 97 | + |
---|
| 98 | + if (!llist_add(&work->llnode, list)) |
---|
| 99 | + return; |
---|
| 100 | + |
---|
| 101 | + /* If the work is "lazy", handle it from next tick if any */ |
---|
| 102 | + if (!lazy_work || tick_nohz_tick_stopped()) |
---|
70 | 103 | arch_irq_work_raise(); |
---|
71 | | -} |
---|
72 | | - |
---|
73 | | -static inline bool use_lazy_list(struct irq_work *work) |
---|
74 | | -{ |
---|
75 | | - return (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ)) |
---|
76 | | - || (work->flags & IRQ_WORK_LAZY); |
---|
77 | 104 | } |
---|
78 | 105 | |
---|
79 | 106 | /* Enqueue the irq work @work on the current CPU */ |
---|
80 | 107 | bool irq_work_queue(struct irq_work *work) |
---|
81 | 108 | { |
---|
82 | | - struct llist_head *list; |
---|
83 | | - |
---|
84 | 109 | /* Only queue if not already pending */ |
---|
85 | 110 | if (!irq_work_claim(work)) |
---|
86 | 111 | return false; |
---|
87 | 112 | |
---|
88 | 113 | /* Queue the entry and raise the IPI if needed. */ |
---|
89 | 114 | preempt_disable(); |
---|
90 | | - if (use_lazy_list(work)) |
---|
91 | | - list = this_cpu_ptr(&lazy_list); |
---|
92 | | - else |
---|
93 | | - list = this_cpu_ptr(&raised_list); |
---|
94 | | - __irq_work_queue_local(work, list); |
---|
| 115 | + __irq_work_queue_local(work); |
---|
95 | 116 | preempt_enable(); |
---|
96 | 117 | |
---|
97 | 118 | return true; |
---|
.. | .. |
---|
110 | 131 | return irq_work_queue(work); |
---|
111 | 132 | |
---|
112 | 133 | #else /* CONFIG_SMP: */ |
---|
113 | | - struct llist_head *list; |
---|
114 | | - |
---|
115 | 134 | /* All work should have been flushed before going offline */ |
---|
116 | 135 | WARN_ON_ONCE(cpu_is_offline(cpu)); |
---|
117 | 136 | |
---|
.. | .. |
---|
120 | 139 | return false; |
---|
121 | 140 | |
---|
122 | 141 | preempt_disable(); |
---|
123 | | - if (use_lazy_list(work)) |
---|
124 | | - list = &per_cpu(lazy_list, cpu); |
---|
125 | | - else |
---|
126 | | - list = &per_cpu(raised_list, cpu); |
---|
127 | | - |
---|
128 | 142 | if (cpu != smp_processor_id()) { |
---|
129 | 143 | /* Arch remote IPI send/receive backend aren't NMI safe */ |
---|
130 | 144 | WARN_ON_ONCE(in_nmi()); |
---|
131 | | - if (llist_add(&work->llnode, list)) |
---|
132 | | - arch_send_call_function_single_ipi(cpu); |
---|
| 145 | + |
---|
| 146 | + /* |
---|
| 147 | + * On PREEMPT_RT the items which are not marked as |
---|
| 148 | + * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work |
---|
| 149 | + * item is used on the remote CPU to wake the thread. |
---|
| 150 | + */ |
---|
| 151 | + if (IS_ENABLED(CONFIG_PREEMPT_RT) && |
---|
| 152 | + !(atomic_read(&work->flags) & IRQ_WORK_HARD_IRQ)) { |
---|
| 153 | + |
---|
| 154 | + if (!llist_add(&work->llnode, &per_cpu(lazy_list, cpu))) |
---|
| 155 | + goto out; |
---|
| 156 | + |
---|
| 157 | + work = &per_cpu(irq_work_wakeup, cpu); |
---|
| 158 | + if (!irq_work_claim(work)) |
---|
| 159 | + goto out; |
---|
| 160 | + } |
---|
| 161 | + |
---|
| 162 | + __smp_call_single_queue(cpu, &work->llnode); |
---|
133 | 163 | } else { |
---|
134 | | - __irq_work_queue_local(work, list); |
---|
| 164 | + __irq_work_queue_local(work); |
---|
135 | 165 | } |
---|
| 166 | +out: |
---|
136 | 167 | preempt_enable(); |
---|
137 | 168 | |
---|
138 | 169 | return true; |
---|
139 | 170 | #endif /* CONFIG_SMP */ |
---|
140 | 171 | } |
---|
141 | | - |
---|
| 172 | +EXPORT_SYMBOL_GPL(irq_work_queue_on); |
---|
142 | 173 | |
---|
143 | 174 | bool irq_work_needs_cpu(void) |
---|
144 | 175 | { |
---|
.. | .. |
---|
156 | 187 | return true; |
---|
157 | 188 | } |
---|
158 | 189 | |
---|
| 190 | +void irq_work_single(void *arg) |
---|
| 191 | +{ |
---|
| 192 | + struct irq_work *work = arg; |
---|
| 193 | + int flags; |
---|
| 194 | + |
---|
| 195 | + /* |
---|
| 196 | + * Clear the PENDING bit, after this point the @work |
---|
| 197 | + * can be re-used. |
---|
| 198 | + * Make it immediately visible so that other CPUs trying |
---|
| 199 | + * to claim that work don't rely on us to handle their data |
---|
| 200 | + * while we are in the middle of the func. |
---|
| 201 | + */ |
---|
| 202 | + flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags); |
---|
| 203 | + |
---|
| 204 | + lockdep_irq_work_enter(work); |
---|
| 205 | + work->func(work); |
---|
| 206 | + lockdep_irq_work_exit(work); |
---|
| 207 | + /* |
---|
| 208 | + * Clear the BUSY bit and return to the free state if |
---|
| 209 | + * no-one else claimed it meanwhile. |
---|
| 210 | + */ |
---|
| 211 | + flags &= ~IRQ_WORK_PENDING; |
---|
| 212 | + (void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY); |
---|
| 213 | + |
---|
| 214 | + if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || |
---|
| 215 | + !arch_irq_work_has_interrupt()) |
---|
| 216 | + rcuwait_wake_up(&work->irqwait); |
---|
| 217 | +} |
---|
| 218 | + |
---|
159 | 219 | static void irq_work_run_list(struct llist_head *list) |
---|
160 | 220 | { |
---|
161 | 221 | struct irq_work *work, *tmp; |
---|
162 | 222 | struct llist_node *llnode; |
---|
163 | | - unsigned long flags; |
---|
164 | 223 | |
---|
165 | | -#ifndef CONFIG_PREEMPT_RT_FULL |
---|
166 | 224 | /* |
---|
167 | | - * nort: On RT IRQ-work may run in SOFTIRQ context. |
---|
| 225 | + * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed |
---|
| 226 | + * in a per-CPU thread in preemptible context. Only the items which are |
---|
| 227 | + * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context. |
---|
168 | 228 | */ |
---|
169 | | - BUG_ON(!irqs_disabled()); |
---|
170 | | -#endif |
---|
| 229 | + BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT)); |
---|
| 230 | + |
---|
171 | 231 | if (llist_empty(list)) |
---|
172 | 232 | return; |
---|
173 | 233 | |
---|
174 | 234 | llnode = llist_del_all(list); |
---|
175 | | - llist_for_each_entry_safe(work, tmp, llnode, llnode) { |
---|
176 | | - /* |
---|
177 | | - * Clear the PENDING bit, after this point the @work |
---|
178 | | - * can be re-used. |
---|
179 | | - * Make it immediately visible so that other CPUs trying |
---|
180 | | - * to claim that work don't rely on us to handle their data |
---|
181 | | - * while we are in the middle of the func. |
---|
182 | | - */ |
---|
183 | | - flags = work->flags & ~IRQ_WORK_PENDING; |
---|
184 | | - xchg(&work->flags, flags); |
---|
185 | | - |
---|
186 | | - work->func(work); |
---|
187 | | - /* |
---|
188 | | - * Clear the BUSY bit and return to the free state if |
---|
189 | | - * no-one else claimed it meanwhile. |
---|
190 | | - */ |
---|
191 | | - (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY); |
---|
192 | | - } |
---|
| 235 | + llist_for_each_entry_safe(work, tmp, llnode, llnode) |
---|
| 236 | + irq_work_single(work); |
---|
193 | 237 | } |
---|
194 | 238 | |
---|
195 | 239 | /* |
---|
.. | .. |
---|
199 | 243 | void irq_work_run(void) |
---|
200 | 244 | { |
---|
201 | 245 | irq_work_run_list(this_cpu_ptr(&raised_list)); |
---|
202 | | - if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) { |
---|
203 | | - /* |
---|
204 | | - * NOTE: we raise softirq via IPI for safety, |
---|
205 | | - * and execute in irq_work_tick() to move the |
---|
206 | | - * overhead from hard to soft irq context. |
---|
207 | | - */ |
---|
208 | | - if (!llist_empty(this_cpu_ptr(&lazy_list))) |
---|
209 | | - raise_softirq(TIMER_SOFTIRQ); |
---|
210 | | - } else |
---|
| 246 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
---|
211 | 247 | irq_work_run_list(this_cpu_ptr(&lazy_list)); |
---|
| 248 | + else |
---|
| 249 | + wake_irq_workd(); |
---|
212 | 250 | } |
---|
213 | 251 | EXPORT_SYMBOL_GPL(irq_work_run); |
---|
214 | 252 | |
---|
.. | .. |
---|
219 | 257 | if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) |
---|
220 | 258 | irq_work_run_list(raised); |
---|
221 | 259 | |
---|
222 | | - if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) |
---|
| 260 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
---|
223 | 261 | irq_work_run_list(this_cpu_ptr(&lazy_list)); |
---|
| 262 | + else |
---|
| 263 | + wake_irq_workd(); |
---|
224 | 264 | } |
---|
225 | | - |
---|
226 | | -#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) |
---|
227 | | -void irq_work_tick_soft(void) |
---|
228 | | -{ |
---|
229 | | - irq_work_run_list(this_cpu_ptr(&lazy_list)); |
---|
230 | | -} |
---|
231 | | -#endif |
---|
232 | 265 | |
---|
233 | 266 | /* |
---|
234 | 267 | * Synchronize against the irq_work @entry, ensures the entry is not |
---|
.. | .. |
---|
237 | 270 | void irq_work_sync(struct irq_work *work) |
---|
238 | 271 | { |
---|
239 | 272 | lockdep_assert_irqs_enabled(); |
---|
| 273 | + might_sleep(); |
---|
240 | 274 | |
---|
241 | | - while (work->flags & IRQ_WORK_BUSY) |
---|
| 275 | + if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || |
---|
| 276 | + !arch_irq_work_has_interrupt()) { |
---|
| 277 | + rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work), |
---|
| 278 | + TASK_UNINTERRUPTIBLE); |
---|
| 279 | + return; |
---|
| 280 | + } |
---|
| 281 | + |
---|
| 282 | + while (atomic_read(&work->flags) & IRQ_WORK_BUSY) |
---|
242 | 283 | cpu_relax(); |
---|
243 | 284 | } |
---|
244 | 285 | EXPORT_SYMBOL_GPL(irq_work_sync); |
---|
| 286 | + |
---|
| 287 | +static void run_irq_workd(unsigned int cpu) |
---|
| 288 | +{ |
---|
| 289 | + irq_work_run_list(this_cpu_ptr(&lazy_list)); |
---|
| 290 | +} |
---|
| 291 | + |
---|
| 292 | +static void irq_workd_setup(unsigned int cpu) |
---|
| 293 | +{ |
---|
| 294 | + sched_set_fifo_low(current); |
---|
| 295 | +} |
---|
| 296 | + |
---|
| 297 | +static struct smp_hotplug_thread irqwork_threads = { |
---|
| 298 | + .store = &irq_workd, |
---|
| 299 | + .setup = irq_workd_setup, |
---|
| 300 | + .thread_should_run = irq_workd_should_run, |
---|
| 301 | + .thread_fn = run_irq_workd, |
---|
| 302 | + .thread_comm = "irq_work/%u", |
---|
| 303 | +}; |
---|
| 304 | + |
---|
| 305 | +static __init int irq_work_init_threads(void) |
---|
| 306 | +{ |
---|
| 307 | + if (IS_ENABLED(CONFIG_PREEMPT_RT)) |
---|
| 308 | + BUG_ON(smpboot_register_percpu_thread(&irqwork_threads)); |
---|
| 309 | + return 0; |
---|
| 310 | +} |
---|
| 311 | +early_initcall(irq_work_init_threads); |
---|