hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/dma-buf/dma-fence.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Fence mechanism for dma-buf and to allow for asynchronous dma access
34 *
....@@ -7,15 +8,6 @@
78 * Authors:
89 * Rob Clark <robdclark@gmail.com>
910 * Maarten Lankhorst <maarten.lankhorst@canonical.com>
10
- *
11
- * This program is free software; you can redistribute it and/or modify it
12
- * under the terms of the GNU General Public License version 2 as published by
13
- * the Free Software Foundation.
14
- *
15
- * This program is distributed in the hope that it will be useful, but WITHOUT
16
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18
- * more details.
1911 */
2012
2113 #include <linux/slab.h>
....@@ -29,6 +21,10 @@
2921
3022 EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
3123 EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
24
+EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
25
+
26
+static DEFINE_SPINLOCK(dma_fence_stub_lock);
27
+static struct dma_fence dma_fence_stub;
3228
3329 /*
3430 * fence context counter: each execution context should have its own
....@@ -36,7 +32,7 @@
3632 * context or not. One device can have multiple separate contexts,
3733 * and they're used if some engine can run independently of another.
3834 */
39
-static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(0);
35
+static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1);
4036
4137 /**
4238 * DOC: DMA fences overview
....@@ -64,9 +60,86 @@
6460 *
6561 * - Then there's also implicit fencing, where the synchronization points are
6662 * implicitly passed around as part of shared &dma_buf instances. Such
67
- * implicit fences are stored in &struct reservation_object through the
63
+ * implicit fences are stored in &struct dma_resv through the
6864 * &dma_buf.resv pointer.
6965 */
66
+
67
+/**
68
+ * DOC: fence cross-driver contract
69
+ *
70
+ * Since &dma_fence provide a cross driver contract, all drivers must follow the
71
+ * same rules:
72
+ *
73
+ * * Fences must complete in a reasonable time. Fences which represent kernels
74
+ * and shaders submitted by userspace, which could run forever, must be backed
75
+ * up by timeout and gpu hang recovery code. Minimally that code must prevent
76
+ * further command submission and force complete all in-flight fences, e.g.
77
+ * when the driver or hardware do not support gpu reset, or if the gpu reset
78
+ * failed for some reason. Ideally the driver supports gpu recovery which only
79
+ * affects the offending userspace context, and no other userspace
80
+ * submissions.
81
+ *
82
+ * * Drivers may have different ideas of what completion within a reasonable
83
+ * time means. Some hang recovery code uses a fixed timeout, others a mix
84
+ * between observing forward progress and increasingly strict timeouts.
85
+ * Drivers should not try to second guess timeout handling of fences from
86
+ * other drivers.
87
+ *
88
+ * * To ensure there's no deadlocks of dma_fence_wait() against other locks
89
+ * drivers should annotate all code required to reach dma_fence_signal(),
90
+ * which completes the fences, with dma_fence_begin_signalling() and
91
+ * dma_fence_end_signalling().
92
+ *
93
+ * * Drivers are allowed to call dma_fence_wait() while holding dma_resv_lock().
94
+ * This means any code required for fence completion cannot acquire a
95
+ * &dma_resv lock. Note that this also pulls in the entire established
96
+ * locking hierarchy around dma_resv_lock() and dma_resv_unlock().
97
+ *
98
+ * * Drivers are allowed to call dma_fence_wait() from their &shrinker
99
+ * callbacks. This means any code required for fence completion cannot
100
+ * allocate memory with GFP_KERNEL.
101
+ *
102
+ * * Drivers are allowed to call dma_fence_wait() from their &mmu_notifier
103
+ * respectively &mmu_interval_notifier callbacks. This means any code required
104
+ * for fence completeion cannot allocate memory with GFP_NOFS or GFP_NOIO.
105
+ * Only GFP_ATOMIC is permissible, which might fail.
106
+ *
107
+ * Note that only GPU drivers have a reasonable excuse for both requiring
108
+ * &mmu_interval_notifier and &shrinker callbacks at the same time as having to
109
+ * track asynchronous compute work using &dma_fence. No driver outside of
110
+ * drivers/gpu should ever call dma_fence_wait() in such contexts.
111
+ */
112
+
113
+static const char *dma_fence_stub_get_name(struct dma_fence *fence)
114
+{
115
+ return "stub";
116
+}
117
+
118
+static const struct dma_fence_ops dma_fence_stub_ops = {
119
+ .get_driver_name = dma_fence_stub_get_name,
120
+ .get_timeline_name = dma_fence_stub_get_name,
121
+};
122
+
123
+/**
124
+ * dma_fence_get_stub - return a signaled fence
125
+ *
126
+ * Return a stub fence which is already signaled.
127
+ */
128
+struct dma_fence *dma_fence_get_stub(void)
129
+{
130
+ spin_lock(&dma_fence_stub_lock);
131
+ if (!dma_fence_stub.ops) {
132
+ dma_fence_init(&dma_fence_stub,
133
+ &dma_fence_stub_ops,
134
+ &dma_fence_stub_lock,
135
+ 0, 0);
136
+ dma_fence_signal_locked(&dma_fence_stub);
137
+ }
138
+ spin_unlock(&dma_fence_stub_lock);
139
+
140
+ return dma_fence_get(&dma_fence_stub);
141
+}
142
+EXPORT_SYMBOL(dma_fence_get_stub);
70143
71144 /**
72145 * dma_fence_context_alloc - allocate an array of fence contexts
....@@ -79,9 +152,241 @@
79152 u64 dma_fence_context_alloc(unsigned num)
80153 {
81154 WARN_ON(!num);
82
- return atomic64_add_return(num, &dma_fence_context_counter) - num;
155
+ return atomic64_fetch_add(num, &dma_fence_context_counter);
83156 }
84157 EXPORT_SYMBOL(dma_fence_context_alloc);
158
+
159
+/**
160
+ * DOC: fence signalling annotation
161
+ *
162
+ * Proving correctness of all the kernel code around &dma_fence through code
163
+ * review and testing is tricky for a few reasons:
164
+ *
165
+ * * It is a cross-driver contract, and therefore all drivers must follow the
166
+ * same rules for lock nesting order, calling contexts for various functions
167
+ * and anything else significant for in-kernel interfaces. But it is also
168
+ * impossible to test all drivers in a single machine, hence brute-force N vs.
169
+ * N testing of all combinations is impossible. Even just limiting to the
170
+ * possible combinations is infeasible.
171
+ *
172
+ * * There is an enormous amount of driver code involved. For render drivers
173
+ * there's the tail of command submission, after fences are published,
174
+ * scheduler code, interrupt and workers to process job completion,
175
+ * and timeout, gpu reset and gpu hang recovery code. Plus for integration
176
+ * with core mm with have &mmu_notifier, respectively &mmu_interval_notifier,
177
+ * and &shrinker. For modesetting drivers there's the commit tail functions
178
+ * between when fences for an atomic modeset are published, and when the
179
+ * corresponding vblank completes, including any interrupt processing and
180
+ * related workers. Auditing all that code, across all drivers, is not
181
+ * feasible.
182
+ *
183
+ * * Due to how many other subsystems are involved and the locking hierarchies
184
+ * this pulls in there is extremely thin wiggle-room for driver-specific
185
+ * differences. &dma_fence interacts with almost all of the core memory
186
+ * handling through page fault handlers via &dma_resv, dma_resv_lock() and
187
+ * dma_resv_unlock(). On the other side it also interacts through all
188
+ * allocation sites through &mmu_notifier and &shrinker.
189
+ *
190
+ * Furthermore lockdep does not handle cross-release dependencies, which means
191
+ * any deadlocks between dma_fence_wait() and dma_fence_signal() can't be caught
192
+ * at runtime with some quick testing. The simplest example is one thread
193
+ * waiting on a &dma_fence while holding a lock::
194
+ *
195
+ * lock(A);
196
+ * dma_fence_wait(B);
197
+ * unlock(A);
198
+ *
199
+ * while the other thread is stuck trying to acquire the same lock, which
200
+ * prevents it from signalling the fence the previous thread is stuck waiting
201
+ * on::
202
+ *
203
+ * lock(A);
204
+ * unlock(A);
205
+ * dma_fence_signal(B);
206
+ *
207
+ * By manually annotating all code relevant to signalling a &dma_fence we can
208
+ * teach lockdep about these dependencies, which also helps with the validation
209
+ * headache since now lockdep can check all the rules for us::
210
+ *
211
+ * cookie = dma_fence_begin_signalling();
212
+ * lock(A);
213
+ * unlock(A);
214
+ * dma_fence_signal(B);
215
+ * dma_fence_end_signalling(cookie);
216
+ *
217
+ * For using dma_fence_begin_signalling() and dma_fence_end_signalling() to
218
+ * annotate critical sections the following rules need to be observed:
219
+ *
220
+ * * All code necessary to complete a &dma_fence must be annotated, from the
221
+ * point where a fence is accessible to other threads, to the point where
222
+ * dma_fence_signal() is called. Un-annotated code can contain deadlock issues,
223
+ * and due to the very strict rules and many corner cases it is infeasible to
224
+ * catch these just with review or normal stress testing.
225
+ *
226
+ * * &struct dma_resv deserves a special note, since the readers are only
227
+ * protected by rcu. This means the signalling critical section starts as soon
228
+ * as the new fences are installed, even before dma_resv_unlock() is called.
229
+ *
230
+ * * The only exception are fast paths and opportunistic signalling code, which
231
+ * calls dma_fence_signal() purely as an optimization, but is not required to
232
+ * guarantee completion of a &dma_fence. The usual example is a wait IOCTL
233
+ * which calls dma_fence_signal(), while the mandatory completion path goes
234
+ * through a hardware interrupt and possible job completion worker.
235
+ *
236
+ * * To aid composability of code, the annotations can be freely nested, as long
237
+ * as the overall locking hierarchy is consistent. The annotations also work
238
+ * both in interrupt and process context. Due to implementation details this
239
+ * requires that callers pass an opaque cookie from
240
+ * dma_fence_begin_signalling() to dma_fence_end_signalling().
241
+ *
242
+ * * Validation against the cross driver contract is implemented by priming
243
+ * lockdep with the relevant hierarchy at boot-up. This means even just
244
+ * testing with a single device is enough to validate a driver, at least as
245
+ * far as deadlocks with dma_fence_wait() against dma_fence_signal() are
246
+ * concerned.
247
+ */
248
+#ifdef CONFIG_LOCKDEP
249
+static struct lockdep_map dma_fence_lockdep_map = {
250
+ .name = "dma_fence_map"
251
+};
252
+
253
+/**
254
+ * dma_fence_begin_signalling - begin a critical DMA fence signalling section
255
+ *
256
+ * Drivers should use this to annotate the beginning of any code section
257
+ * required to eventually complete &dma_fence by calling dma_fence_signal().
258
+ *
259
+ * The end of these critical sections are annotated with
260
+ * dma_fence_end_signalling().
261
+ *
262
+ * Returns:
263
+ *
264
+ * Opaque cookie needed by the implementation, which needs to be passed to
265
+ * dma_fence_end_signalling().
266
+ */
267
+bool dma_fence_begin_signalling(void)
268
+{
269
+ /* explicitly nesting ... */
270
+ if (lock_is_held_type(&dma_fence_lockdep_map, 1))
271
+ return true;
272
+
273
+ /* rely on might_sleep check for soft/hardirq locks */
274
+ if (in_atomic())
275
+ return true;
276
+
277
+ /* ... and non-recursive readlock */
278
+ lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _RET_IP_);
279
+
280
+ return false;
281
+}
282
+EXPORT_SYMBOL(dma_fence_begin_signalling);
283
+
284
+/**
285
+ * dma_fence_end_signalling - end a critical DMA fence signalling section
286
+ * @cookie: opaque cookie from dma_fence_begin_signalling()
287
+ *
288
+ * Closes a critical section annotation opened by dma_fence_begin_signalling().
289
+ */
290
+void dma_fence_end_signalling(bool cookie)
291
+{
292
+ if (cookie)
293
+ return;
294
+
295
+ lock_release(&dma_fence_lockdep_map, _RET_IP_);
296
+}
297
+EXPORT_SYMBOL(dma_fence_end_signalling);
298
+
299
+void __dma_fence_might_wait(void)
300
+{
301
+ bool tmp;
302
+
303
+ tmp = lock_is_held_type(&dma_fence_lockdep_map, 1);
304
+ if (tmp)
305
+ lock_release(&dma_fence_lockdep_map, _THIS_IP_);
306
+ lock_map_acquire(&dma_fence_lockdep_map);
307
+ lock_map_release(&dma_fence_lockdep_map);
308
+ if (tmp)
309
+ lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _THIS_IP_);
310
+}
311
+#endif
312
+
313
+
314
+/**
315
+ * dma_fence_signal_timestamp_locked - signal completion of a fence
316
+ * @fence: the fence to signal
317
+ * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain
318
+ *
319
+ * Signal completion for software callbacks on a fence, this will unblock
320
+ * dma_fence_wait() calls and run all the callbacks added with
321
+ * dma_fence_add_callback(). Can be called multiple times, but since a fence
322
+ * can only go from the unsignaled to the signaled state and not back, it will
323
+ * only be effective the first time. Set the timestamp provided as the fence
324
+ * signal timestamp.
325
+ *
326
+ * Unlike dma_fence_signal_timestamp(), this function must be called with
327
+ * &dma_fence.lock held.
328
+ *
329
+ * Returns 0 on success and a negative error value when @fence has been
330
+ * signalled already.
331
+ */
332
+int dma_fence_signal_timestamp_locked(struct dma_fence *fence,
333
+ ktime_t timestamp)
334
+{
335
+ struct dma_fence_cb *cur, *tmp;
336
+ struct list_head cb_list;
337
+
338
+ lockdep_assert_held(fence->lock);
339
+
340
+ if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
341
+ &fence->flags)))
342
+ return -EINVAL;
343
+
344
+ /* Stash the cb_list before replacing it with the timestamp */
345
+ list_replace(&fence->cb_list, &cb_list);
346
+
347
+ fence->timestamp = timestamp;
348
+ set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
349
+ trace_dma_fence_signaled(fence);
350
+
351
+ list_for_each_entry_safe(cur, tmp, &cb_list, node) {
352
+ INIT_LIST_HEAD(&cur->node);
353
+ cur->func(fence, cur);
354
+ }
355
+
356
+ return 0;
357
+}
358
+EXPORT_SYMBOL(dma_fence_signal_timestamp_locked);
359
+
360
+/**
361
+ * dma_fence_signal_timestamp - signal completion of a fence
362
+ * @fence: the fence to signal
363
+ * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain
364
+ *
365
+ * Signal completion for software callbacks on a fence, this will unblock
366
+ * dma_fence_wait() calls and run all the callbacks added with
367
+ * dma_fence_add_callback(). Can be called multiple times, but since a fence
368
+ * can only go from the unsignaled to the signaled state and not back, it will
369
+ * only be effective the first time. Set the timestamp provided as the fence
370
+ * signal timestamp.
371
+ *
372
+ * Returns 0 on success and a negative error value when @fence has been
373
+ * signalled already.
374
+ */
375
+int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp)
376
+{
377
+ unsigned long flags;
378
+ int ret;
379
+
380
+ if (!fence)
381
+ return -EINVAL;
382
+
383
+ spin_lock_irqsave(fence->lock, flags);
384
+ ret = dma_fence_signal_timestamp_locked(fence, timestamp);
385
+ spin_unlock_irqrestore(fence->lock, flags);
386
+
387
+ return ret;
388
+}
389
+EXPORT_SYMBOL(dma_fence_signal_timestamp);
85390
86391 /**
87392 * dma_fence_signal_locked - signal completion of a fence
....@@ -101,32 +406,7 @@
101406 */
102407 int dma_fence_signal_locked(struct dma_fence *fence)
103408 {
104
- struct dma_fence_cb *cur, *tmp;
105
- int ret = 0;
106
-
107
- lockdep_assert_held(fence->lock);
108
-
109
- if (WARN_ON(!fence))
110
- return -EINVAL;
111
-
112
- if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
113
- ret = -EINVAL;
114
-
115
- /*
116
- * we might have raced with the unlocked dma_fence_signal,
117
- * still run through all callbacks
118
- */
119
- } else {
120
- fence->timestamp = ktime_get();
121
- set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
122
- trace_dma_fence_signaled(fence);
123
- }
124
-
125
- list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
126
- list_del_init(&cur->node);
127
- cur->func(fence, cur);
128
- }
129
- return ret;
409
+ return dma_fence_signal_timestamp_locked(fence, ktime_get());
130410 }
131411 EXPORT_SYMBOL(dma_fence_signal_locked);
132412
....@@ -146,28 +426,21 @@
146426 int dma_fence_signal(struct dma_fence *fence)
147427 {
148428 unsigned long flags;
429
+ int ret;
430
+ bool tmp;
149431
150432 if (!fence)
151433 return -EINVAL;
152434
153
- if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
154
- return -EINVAL;
435
+ tmp = dma_fence_begin_signalling();
155436
156
- fence->timestamp = ktime_get();
157
- set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
158
- trace_dma_fence_signaled(fence);
437
+ spin_lock_irqsave(fence->lock, flags);
438
+ ret = dma_fence_signal_timestamp_locked(fence, ktime_get());
439
+ spin_unlock_irqrestore(fence->lock, flags);
159440
160
- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) {
161
- struct dma_fence_cb *cur, *tmp;
441
+ dma_fence_end_signalling(tmp);
162442
163
- spin_lock_irqsave(fence->lock, flags);
164
- list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
165
- list_del_init(&cur->node);
166
- cur->func(fence, cur);
167
- }
168
- spin_unlock_irqrestore(fence->lock, flags);
169
- }
170
- return 0;
443
+ return ret;
171444 }
172445 EXPORT_SYMBOL(dma_fence_signal);
173446
....@@ -197,6 +470,10 @@
197470 if (WARN_ON(timeout < 0))
198471 return -EINVAL;
199472
473
+ might_sleep();
474
+
475
+ __dma_fence_might_wait();
476
+
200477 trace_dma_fence_wait_start(fence);
201478 if (fence->ops->wait)
202479 ret = fence->ops->wait(fence, intr, timeout);
....@@ -221,8 +498,26 @@
221498
222499 trace_dma_fence_destroy(fence);
223500
224
- /* Failed to signal before release, could be a refcounting issue */
225
- WARN_ON(!list_empty(&fence->cb_list));
501
+ if (WARN(!list_empty(&fence->cb_list) &&
502
+ !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags),
503
+ "Fence %s:%s:%llx:%llx released with pending signals!\n",
504
+ fence->ops->get_driver_name(fence),
505
+ fence->ops->get_timeline_name(fence),
506
+ fence->context, fence->seqno)) {
507
+ unsigned long flags;
508
+
509
+ /*
510
+ * Failed to signal before release, likely a refcounting issue.
511
+ *
512
+ * This should never happen, but if it does make sure that we
513
+ * don't leave chains dangling. We set the error flag first
514
+ * so that the callbacks know this signal is due to an error.
515
+ */
516
+ spin_lock_irqsave(fence->lock, flags);
517
+ fence->error = -EDEADLK;
518
+ dma_fence_signal_locked(fence);
519
+ spin_unlock_irqrestore(fence->lock, flags);
520
+ }
226521
227522 if (fence->ops->release)
228523 fence->ops->release(fence);
....@@ -607,7 +902,7 @@
607902 */
608903 void
609904 dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
610
- spinlock_t *lock, u64 context, unsigned seqno)
905
+ spinlock_t *lock, u64 context, u64 seqno)
611906 {
612907 BUG_ON(!lock);
613908 BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name);