hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/kernel/irq_work.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
34 *
....@@ -17,36 +18,53 @@
1718 #include <linux/cpu.h>
1819 #include <linux/notifier.h>
1920 #include <linux/smp.h>
21
+#include <linux/smpboot.h>
2022 #include <linux/interrupt.h>
2123 #include <asm/processor.h>
2224
2325
2426 static DEFINE_PER_CPU(struct llist_head, raised_list);
2527 static DEFINE_PER_CPU(struct llist_head, lazy_list);
28
+static DEFINE_PER_CPU(struct task_struct *, irq_workd);
29
+
30
+static void wake_irq_workd(void)
31
+{
32
+ struct task_struct *tsk = __this_cpu_read(irq_workd);
33
+
34
+ if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk)
35
+ wake_up_process(tsk);
36
+}
37
+
38
+#ifdef CONFIG_SMP
39
+static void irq_work_wake(struct irq_work *entry)
40
+{
41
+ wake_irq_workd();
42
+}
43
+
44
+static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) =
45
+ IRQ_WORK_INIT_HARD(irq_work_wake);
46
+#endif
47
+
48
+static int irq_workd_should_run(unsigned int cpu)
49
+{
50
+ return !llist_empty(this_cpu_ptr(&lazy_list));
51
+}
2652
2753 /*
2854 * Claim the entry so that no one else will poke at it.
2955 */
3056 static bool irq_work_claim(struct irq_work *work)
3157 {
32
- unsigned long flags, oflags, nflags;
58
+ int oflags;
3359
60
+ oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->flags);
3461 /*
35
- * Start with our best wish as a premise but only trust any
36
- * flag value after cmpxchg() result.
62
+ * If the work is already pending, no need to raise the IPI.
63
+ * The pairing atomic_fetch_andnot() in irq_work_run() makes sure
64
+ * everything we did before is visible.
3765 */
38
- flags = work->flags & ~IRQ_WORK_PENDING;
39
- for (;;) {
40
- nflags = flags | IRQ_WORK_CLAIMED;
41
- oflags = cmpxchg(&work->flags, flags, nflags);
42
- if (oflags == flags)
43
- break;
44
- if (oflags & IRQ_WORK_PENDING)
45
- return false;
46
- flags = oflags;
47
- cpu_relax();
48
- }
49
-
66
+ if (oflags & IRQ_WORK_PENDING)
67
+ return false;
5068 return true;
5169 }
5270
....@@ -58,40 +76,43 @@
5876 }
5977
6078 /* Enqueue on current CPU, work must already be claimed and preempt disabled */
61
-static void __irq_work_queue_local(struct irq_work *work, struct llist_head *list)
79
+static void __irq_work_queue_local(struct irq_work *work)
6280 {
63
- bool empty;
81
+ struct llist_head *list;
82
+ bool rt_lazy_work = false;
83
+ bool lazy_work = false;
84
+ int work_flags;
6485
65
- empty = llist_add(&work->llnode, list);
86
+ work_flags = atomic_read(&work->flags);
87
+ if (work_flags & IRQ_WORK_LAZY)
88
+ lazy_work = true;
89
+ else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
90
+ !(work_flags & IRQ_WORK_HARD_IRQ))
91
+ rt_lazy_work = true;
6692
67
- if (empty &&
68
- (!(work->flags & IRQ_WORK_LAZY) ||
69
- tick_nohz_tick_stopped()))
93
+ if (lazy_work || rt_lazy_work)
94
+ list = this_cpu_ptr(&lazy_list);
95
+ else
96
+ list = this_cpu_ptr(&raised_list);
97
+
98
+ if (!llist_add(&work->llnode, list))
99
+ return;
100
+
101
+ /* If the work is "lazy", handle it from next tick if any */
102
+ if (!lazy_work || tick_nohz_tick_stopped())
70103 arch_irq_work_raise();
71
-}
72
-
73
-static inline bool use_lazy_list(struct irq_work *work)
74
-{
75
- return (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ))
76
- || (work->flags & IRQ_WORK_LAZY);
77104 }
78105
79106 /* Enqueue the irq work @work on the current CPU */
80107 bool irq_work_queue(struct irq_work *work)
81108 {
82
- struct llist_head *list;
83
-
84109 /* Only queue if not already pending */
85110 if (!irq_work_claim(work))
86111 return false;
87112
88113 /* Queue the entry and raise the IPI if needed. */
89114 preempt_disable();
90
- if (use_lazy_list(work))
91
- list = this_cpu_ptr(&lazy_list);
92
- else
93
- list = this_cpu_ptr(&raised_list);
94
- __irq_work_queue_local(work, list);
115
+ __irq_work_queue_local(work);
95116 preempt_enable();
96117
97118 return true;
....@@ -110,8 +131,6 @@
110131 return irq_work_queue(work);
111132
112133 #else /* CONFIG_SMP: */
113
- struct llist_head *list;
114
-
115134 /* All work should have been flushed before going offline */
116135 WARN_ON_ONCE(cpu_is_offline(cpu));
117136
....@@ -120,25 +139,37 @@
120139 return false;
121140
122141 preempt_disable();
123
- if (use_lazy_list(work))
124
- list = &per_cpu(lazy_list, cpu);
125
- else
126
- list = &per_cpu(raised_list, cpu);
127
-
128142 if (cpu != smp_processor_id()) {
129143 /* Arch remote IPI send/receive backend aren't NMI safe */
130144 WARN_ON_ONCE(in_nmi());
131
- if (llist_add(&work->llnode, list))
132
- arch_send_call_function_single_ipi(cpu);
145
+
146
+ /*
147
+ * On PREEMPT_RT the items which are not marked as
148
+ * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work
149
+ * item is used on the remote CPU to wake the thread.
150
+ */
151
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
152
+ !(atomic_read(&work->flags) & IRQ_WORK_HARD_IRQ)) {
153
+
154
+ if (!llist_add(&work->llnode, &per_cpu(lazy_list, cpu)))
155
+ goto out;
156
+
157
+ work = &per_cpu(irq_work_wakeup, cpu);
158
+ if (!irq_work_claim(work))
159
+ goto out;
160
+ }
161
+
162
+ __smp_call_single_queue(cpu, &work->llnode);
133163 } else {
134
- __irq_work_queue_local(work, list);
164
+ __irq_work_queue_local(work);
135165 }
166
+out:
136167 preempt_enable();
137168
138169 return true;
139170 #endif /* CONFIG_SMP */
140171 }
141
-
172
+EXPORT_SYMBOL_GPL(irq_work_queue_on);
142173
143174 bool irq_work_needs_cpu(void)
144175 {
....@@ -156,40 +187,53 @@
156187 return true;
157188 }
158189
190
+void irq_work_single(void *arg)
191
+{
192
+ struct irq_work *work = arg;
193
+ int flags;
194
+
195
+ /*
196
+ * Clear the PENDING bit, after this point the @work
197
+ * can be re-used.
198
+ * Make it immediately visible so that other CPUs trying
199
+ * to claim that work don't rely on us to handle their data
200
+ * while we are in the middle of the func.
201
+ */
202
+ flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
203
+
204
+ lockdep_irq_work_enter(work);
205
+ work->func(work);
206
+ lockdep_irq_work_exit(work);
207
+ /*
208
+ * Clear the BUSY bit and return to the free state if
209
+ * no-one else claimed it meanwhile.
210
+ */
211
+ flags &= ~IRQ_WORK_PENDING;
212
+ (void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
213
+
214
+ if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
215
+ !arch_irq_work_has_interrupt())
216
+ rcuwait_wake_up(&work->irqwait);
217
+}
218
+
159219 static void irq_work_run_list(struct llist_head *list)
160220 {
161221 struct irq_work *work, *tmp;
162222 struct llist_node *llnode;
163
- unsigned long flags;
164223
165
-#ifndef CONFIG_PREEMPT_RT_FULL
166224 /*
167
- * nort: On RT IRQ-work may run in SOFTIRQ context.
225
+ * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed
226
+ * in a per-CPU thread in preemptible context. Only the items which are
227
+ * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context.
168228 */
169
- BUG_ON(!irqs_disabled());
170
-#endif
229
+ BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT));
230
+
171231 if (llist_empty(list))
172232 return;
173233
174234 llnode = llist_del_all(list);
175
- llist_for_each_entry_safe(work, tmp, llnode, llnode) {
176
- /*
177
- * Clear the PENDING bit, after this point the @work
178
- * can be re-used.
179
- * Make it immediately visible so that other CPUs trying
180
- * to claim that work don't rely on us to handle their data
181
- * while we are in the middle of the func.
182
- */
183
- flags = work->flags & ~IRQ_WORK_PENDING;
184
- xchg(&work->flags, flags);
185
-
186
- work->func(work);
187
- /*
188
- * Clear the BUSY bit and return to the free state if
189
- * no-one else claimed it meanwhile.
190
- */
191
- (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
192
- }
235
+ llist_for_each_entry_safe(work, tmp, llnode, llnode)
236
+ irq_work_single(work);
193237 }
194238
195239 /*
....@@ -199,16 +243,10 @@
199243 void irq_work_run(void)
200244 {
201245 irq_work_run_list(this_cpu_ptr(&raised_list));
202
- if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) {
203
- /*
204
- * NOTE: we raise softirq via IPI for safety,
205
- * and execute in irq_work_tick() to move the
206
- * overhead from hard to soft irq context.
207
- */
208
- if (!llist_empty(this_cpu_ptr(&lazy_list)))
209
- raise_softirq(TIMER_SOFTIRQ);
210
- } else
246
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
211247 irq_work_run_list(this_cpu_ptr(&lazy_list));
248
+ else
249
+ wake_irq_workd();
212250 }
213251 EXPORT_SYMBOL_GPL(irq_work_run);
214252
....@@ -219,16 +257,11 @@
219257 if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
220258 irq_work_run_list(raised);
221259
222
- if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
260
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
223261 irq_work_run_list(this_cpu_ptr(&lazy_list));
262
+ else
263
+ wake_irq_workd();
224264 }
225
-
226
-#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
227
-void irq_work_tick_soft(void)
228
-{
229
- irq_work_run_list(this_cpu_ptr(&lazy_list));
230
-}
231
-#endif
232265
233266 /*
234267 * Synchronize against the irq_work @entry, ensures the entry is not
....@@ -237,8 +270,42 @@
237270 void irq_work_sync(struct irq_work *work)
238271 {
239272 lockdep_assert_irqs_enabled();
273
+ might_sleep();
240274
241
- while (work->flags & IRQ_WORK_BUSY)
275
+ if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
276
+ !arch_irq_work_has_interrupt()) {
277
+ rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work),
278
+ TASK_UNINTERRUPTIBLE);
279
+ return;
280
+ }
281
+
282
+ while (atomic_read(&work->flags) & IRQ_WORK_BUSY)
242283 cpu_relax();
243284 }
244285 EXPORT_SYMBOL_GPL(irq_work_sync);
286
+
287
+static void run_irq_workd(unsigned int cpu)
288
+{
289
+ irq_work_run_list(this_cpu_ptr(&lazy_list));
290
+}
291
+
292
+static void irq_workd_setup(unsigned int cpu)
293
+{
294
+ sched_set_fifo_low(current);
295
+}
296
+
297
+static struct smp_hotplug_thread irqwork_threads = {
298
+ .store = &irq_workd,
299
+ .setup = irq_workd_setup,
300
+ .thread_should_run = irq_workd_should_run,
301
+ .thread_fn = run_irq_workd,
302
+ .thread_comm = "irq_work/%u",
303
+};
304
+
305
+static __init int irq_work_init_threads(void)
306
+{
307
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
308
+ BUG_ON(smpboot_register_percpu_thread(&irqwork_threads));
309
+ return 0;
310
+}
311
+early_initcall(irq_work_init_threads);