hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/kernel/irq_work.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
34 *
....@@ -17,35 +18,53 @@
1718 #include <linux/cpu.h>
1819 #include <linux/notifier.h>
1920 #include <linux/smp.h>
21
+#include <linux/smpboot.h>
22
+#include <linux/interrupt.h>
2023 #include <asm/processor.h>
2124
2225
2326 static DEFINE_PER_CPU(struct llist_head, raised_list);
2427 static DEFINE_PER_CPU(struct llist_head, lazy_list);
28
+static DEFINE_PER_CPU(struct task_struct *, irq_workd);
29
+
30
+static void wake_irq_workd(void)
31
+{
32
+ struct task_struct *tsk = __this_cpu_read(irq_workd);
33
+
34
+ if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk)
35
+ wake_up_process(tsk);
36
+}
37
+
38
+#ifdef CONFIG_SMP
39
+static void irq_work_wake(struct irq_work *entry)
40
+{
41
+ wake_irq_workd();
42
+}
43
+
44
+static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) =
45
+ IRQ_WORK_INIT_HARD(irq_work_wake);
46
+#endif
47
+
48
+static int irq_workd_should_run(unsigned int cpu)
49
+{
50
+ return !llist_empty(this_cpu_ptr(&lazy_list));
51
+}
2552
2653 /*
2754 * Claim the entry so that no one else will poke at it.
2855 */
2956 static bool irq_work_claim(struct irq_work *work)
3057 {
31
- unsigned long flags, oflags, nflags;
58
+ int oflags;
3259
60
+ oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->flags);
3361 /*
34
- * Start with our best wish as a premise but only trust any
35
- * flag value after cmpxchg() result.
62
+ * If the work is already pending, no need to raise the IPI.
63
+ * The pairing atomic_fetch_andnot() in irq_work_run() makes sure
64
+ * everything we did before is visible.
3665 */
37
- flags = work->flags & ~IRQ_WORK_PENDING;
38
- for (;;) {
39
- nflags = flags | IRQ_WORK_CLAIMED;
40
- oflags = cmpxchg(&work->flags, flags, nflags);
41
- if (oflags == flags)
42
- break;
43
- if (oflags & IRQ_WORK_PENDING)
44
- return false;
45
- flags = oflags;
46
- cpu_relax();
47
- }
48
-
66
+ if (oflags & IRQ_WORK_PENDING)
67
+ return false;
4968 return true;
5069 }
5170
....@@ -59,15 +78,29 @@
5978 /* Enqueue on current CPU, work must already be claimed and preempt disabled */
6079 static void __irq_work_queue_local(struct irq_work *work)
6180 {
81
+ struct llist_head *list;
82
+ bool rt_lazy_work = false;
83
+ bool lazy_work = false;
84
+ int work_flags;
85
+
86
+ work_flags = atomic_read(&work->flags);
87
+ if (work_flags & IRQ_WORK_LAZY)
88
+ lazy_work = true;
89
+ else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
90
+ !(work_flags & IRQ_WORK_HARD_IRQ))
91
+ rt_lazy_work = true;
92
+
93
+ if (lazy_work || rt_lazy_work)
94
+ list = this_cpu_ptr(&lazy_list);
95
+ else
96
+ list = this_cpu_ptr(&raised_list);
97
+
98
+ if (!llist_add(&work->llnode, list))
99
+ return;
100
+
62101 /* If the work is "lazy", handle it from next tick if any */
63
- if (work->flags & IRQ_WORK_LAZY) {
64
- if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
65
- tick_nohz_tick_stopped())
66
- arch_irq_work_raise();
67
- } else {
68
- if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
69
- arch_irq_work_raise();
70
- }
102
+ if (!lazy_work || tick_nohz_tick_stopped())
103
+ arch_irq_work_raise();
71104 }
72105
73106 /* Enqueue the irq work @work on the current CPU */
....@@ -109,17 +142,34 @@
109142 if (cpu != smp_processor_id()) {
110143 /* Arch remote IPI send/receive backend aren't NMI safe */
111144 WARN_ON_ONCE(in_nmi());
112
- if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
113
- arch_send_call_function_single_ipi(cpu);
145
+
146
+ /*
147
+ * On PREEMPT_RT the items which are not marked as
148
+ * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work
149
+ * item is used on the remote CPU to wake the thread.
150
+ */
151
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
152
+ !(atomic_read(&work->flags) & IRQ_WORK_HARD_IRQ)) {
153
+
154
+ if (!llist_add(&work->llnode, &per_cpu(lazy_list, cpu)))
155
+ goto out;
156
+
157
+ work = &per_cpu(irq_work_wakeup, cpu);
158
+ if (!irq_work_claim(work))
159
+ goto out;
160
+ }
161
+
162
+ __smp_call_single_queue(cpu, &work->llnode);
114163 } else {
115164 __irq_work_queue_local(work);
116165 }
166
+out:
117167 preempt_enable();
118168
119169 return true;
120170 #endif /* CONFIG_SMP */
121171 }
122
-
172
+EXPORT_SYMBOL_GPL(irq_work_queue_on);
123173
124174 bool irq_work_needs_cpu(void)
125175 {
....@@ -128,9 +178,8 @@
128178 raised = this_cpu_ptr(&raised_list);
129179 lazy = this_cpu_ptr(&lazy_list);
130180
131
- if (llist_empty(raised) || arch_irq_work_has_interrupt())
132
- if (llist_empty(lazy))
133
- return false;
181
+ if (llist_empty(raised) && llist_empty(lazy))
182
+ return false;
134183
135184 /* All work should have been flushed before going offline */
136185 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
....@@ -138,36 +187,53 @@
138187 return true;
139188 }
140189
190
+void irq_work_single(void *arg)
191
+{
192
+ struct irq_work *work = arg;
193
+ int flags;
194
+
195
+ /*
196
+ * Clear the PENDING bit, after this point the @work
197
+ * can be re-used.
198
+ * Make it immediately visible so that other CPUs trying
199
+ * to claim that work don't rely on us to handle their data
200
+ * while we are in the middle of the func.
201
+ */
202
+ flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
203
+
204
+ lockdep_irq_work_enter(work);
205
+ work->func(work);
206
+ lockdep_irq_work_exit(work);
207
+ /*
208
+ * Clear the BUSY bit and return to the free state if
209
+ * no-one else claimed it meanwhile.
210
+ */
211
+ flags &= ~IRQ_WORK_PENDING;
212
+ (void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
213
+
214
+ if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
215
+ !arch_irq_work_has_interrupt())
216
+ rcuwait_wake_up(&work->irqwait);
217
+}
218
+
141219 static void irq_work_run_list(struct llist_head *list)
142220 {
143221 struct irq_work *work, *tmp;
144222 struct llist_node *llnode;
145
- unsigned long flags;
146223
147
- BUG_ON(!irqs_disabled());
224
+ /*
225
+ * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed
226
+ * in a per-CPU thread in preemptible context. Only the items which are
227
+ * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context.
228
+ */
229
+ BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT));
148230
149231 if (llist_empty(list))
150232 return;
151233
152234 llnode = llist_del_all(list);
153
- llist_for_each_entry_safe(work, tmp, llnode, llnode) {
154
- /*
155
- * Clear the PENDING bit, after this point the @work
156
- * can be re-used.
157
- * Make it immediately visible so that other CPUs trying
158
- * to claim that work don't rely on us to handle their data
159
- * while we are in the middle of the func.
160
- */
161
- flags = work->flags & ~IRQ_WORK_PENDING;
162
- xchg(&work->flags, flags);
163
-
164
- work->func(work);
165
- /*
166
- * Clear the BUSY bit and return to the free state if
167
- * no-one else claimed it meanwhile.
168
- */
169
- (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
170
- }
235
+ llist_for_each_entry_safe(work, tmp, llnode, llnode)
236
+ irq_work_single(work);
171237 }
172238
173239 /*
....@@ -177,7 +243,10 @@
177243 void irq_work_run(void)
178244 {
179245 irq_work_run_list(this_cpu_ptr(&raised_list));
180
- irq_work_run_list(this_cpu_ptr(&lazy_list));
246
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
247
+ irq_work_run_list(this_cpu_ptr(&lazy_list));
248
+ else
249
+ wake_irq_workd();
181250 }
182251 EXPORT_SYMBOL_GPL(irq_work_run);
183252
....@@ -187,7 +256,11 @@
187256
188257 if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
189258 irq_work_run_list(raised);
190
- irq_work_run_list(this_cpu_ptr(&lazy_list));
259
+
260
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
261
+ irq_work_run_list(this_cpu_ptr(&lazy_list));
262
+ else
263
+ wake_irq_workd();
191264 }
192265
193266 /*
....@@ -197,8 +270,42 @@
197270 void irq_work_sync(struct irq_work *work)
198271 {
199272 lockdep_assert_irqs_enabled();
273
+ might_sleep();
200274
201
- while (work->flags & IRQ_WORK_BUSY)
275
+ if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
276
+ !arch_irq_work_has_interrupt()) {
277
+ rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work),
278
+ TASK_UNINTERRUPTIBLE);
279
+ return;
280
+ }
281
+
282
+ while (atomic_read(&work->flags) & IRQ_WORK_BUSY)
202283 cpu_relax();
203284 }
204285 EXPORT_SYMBOL_GPL(irq_work_sync);
286
+
287
+static void run_irq_workd(unsigned int cpu)
288
+{
289
+ irq_work_run_list(this_cpu_ptr(&lazy_list));
290
+}
291
+
292
+static void irq_workd_setup(unsigned int cpu)
293
+{
294
+ sched_set_fifo_low(current);
295
+}
296
+
297
+static struct smp_hotplug_thread irqwork_threads = {
298
+ .store = &irq_workd,
299
+ .setup = irq_workd_setup,
300
+ .thread_should_run = irq_workd_should_run,
301
+ .thread_fn = run_irq_workd,
302
+ .thread_comm = "irq_work/%u",
303
+};
304
+
305
+static __init int irq_work_init_threads(void)
306
+{
307
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
308
+ BUG_ON(smpboot_register_percpu_thread(&irqwork_threads));
309
+ return 0;
310
+}
311
+early_initcall(irq_work_init_threads);