.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Fence mechanism for dma-buf and to allow for asynchronous dma access |
---|
3 | 4 | * |
---|
.. | .. |
---|
7 | 8 | * Authors: |
---|
8 | 9 | * Rob Clark <robdclark@gmail.com> |
---|
9 | 10 | * Maarten Lankhorst <maarten.lankhorst@canonical.com> |
---|
10 | | - * |
---|
11 | | - * This program is free software; you can redistribute it and/or modify it |
---|
12 | | - * under the terms of the GNU General Public License version 2 as published by |
---|
13 | | - * the Free Software Foundation. |
---|
14 | | - * |
---|
15 | | - * This program is distributed in the hope that it will be useful, but WITHOUT |
---|
16 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
---|
17 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
---|
18 | | - * more details. |
---|
19 | 11 | */ |
---|
20 | 12 | |
---|
21 | 13 | #include <linux/slab.h> |
---|
.. | .. |
---|
29 | 21 | |
---|
30 | 22 | EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit); |
---|
31 | 23 | EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal); |
---|
| 24 | +EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled); |
---|
| 25 | + |
---|
| 26 | +static DEFINE_SPINLOCK(dma_fence_stub_lock); |
---|
| 27 | +static struct dma_fence dma_fence_stub; |
---|
32 | 28 | |
---|
33 | 29 | /* |
---|
34 | 30 | * fence context counter: each execution context should have its own |
---|
.. | .. |
---|
36 | 32 | * context or not. One device can have multiple separate contexts, |
---|
37 | 33 | * and they're used if some engine can run independently of another. |
---|
38 | 34 | */ |
---|
39 | | -static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(0); |
---|
| 35 | +static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1); |
---|
40 | 36 | |
---|
41 | 37 | /** |
---|
42 | 38 | * DOC: DMA fences overview |
---|
.. | .. |
---|
64 | 60 | * |
---|
65 | 61 | * - Then there's also implicit fencing, where the synchronization points are |
---|
66 | 62 | * implicitly passed around as part of shared &dma_buf instances. Such |
---|
67 | | - * implicit fences are stored in &struct reservation_object through the |
---|
| 63 | + * implicit fences are stored in &struct dma_resv through the |
---|
68 | 64 | * &dma_buf.resv pointer. |
---|
69 | 65 | */ |
---|
| 66 | + |
---|
| 67 | +/** |
---|
| 68 | + * DOC: fence cross-driver contract |
---|
| 69 | + * |
---|
| 70 | + * Since &dma_fence provide a cross driver contract, all drivers must follow the |
---|
| 71 | + * same rules: |
---|
| 72 | + * |
---|
| 73 | + * * Fences must complete in a reasonable time. Fences which represent kernels |
---|
| 74 | + * and shaders submitted by userspace, which could run forever, must be backed |
---|
| 75 | + * up by timeout and gpu hang recovery code. Minimally that code must prevent |
---|
| 76 | + * further command submission and force complete all in-flight fences, e.g. |
---|
| 77 | + * when the driver or hardware do not support gpu reset, or if the gpu reset |
---|
| 78 | + * failed for some reason. Ideally the driver supports gpu recovery which only |
---|
| 79 | + * affects the offending userspace context, and no other userspace |
---|
| 80 | + * submissions. |
---|
| 81 | + * |
---|
| 82 | + * * Drivers may have different ideas of what completion within a reasonable |
---|
| 83 | + * time means. Some hang recovery code uses a fixed timeout, others a mix |
---|
| 84 | + * between observing forward progress and increasingly strict timeouts. |
---|
| 85 | + * Drivers should not try to second guess timeout handling of fences from |
---|
| 86 | + * other drivers. |
---|
| 87 | + * |
---|
| 88 | + * * To ensure there's no deadlocks of dma_fence_wait() against other locks |
---|
| 89 | + * drivers should annotate all code required to reach dma_fence_signal(), |
---|
| 90 | + * which completes the fences, with dma_fence_begin_signalling() and |
---|
| 91 | + * dma_fence_end_signalling(). |
---|
| 92 | + * |
---|
| 93 | + * * Drivers are allowed to call dma_fence_wait() while holding dma_resv_lock(). |
---|
| 94 | + * This means any code required for fence completion cannot acquire a |
---|
| 95 | + * &dma_resv lock. Note that this also pulls in the entire established |
---|
| 96 | + * locking hierarchy around dma_resv_lock() and dma_resv_unlock(). |
---|
| 97 | + * |
---|
| 98 | + * * Drivers are allowed to call dma_fence_wait() from their &shrinker |
---|
| 99 | + * callbacks. This means any code required for fence completion cannot |
---|
| 100 | + * allocate memory with GFP_KERNEL. |
---|
| 101 | + * |
---|
| 102 | + * * Drivers are allowed to call dma_fence_wait() from their &mmu_notifier |
---|
| 103 | + * respectively &mmu_interval_notifier callbacks. This means any code required |
---|
| 104 | + * for fence completeion cannot allocate memory with GFP_NOFS or GFP_NOIO. |
---|
| 105 | + * Only GFP_ATOMIC is permissible, which might fail. |
---|
| 106 | + * |
---|
| 107 | + * Note that only GPU drivers have a reasonable excuse for both requiring |
---|
| 108 | + * &mmu_interval_notifier and &shrinker callbacks at the same time as having to |
---|
| 109 | + * track asynchronous compute work using &dma_fence. No driver outside of |
---|
| 110 | + * drivers/gpu should ever call dma_fence_wait() in such contexts. |
---|
| 111 | + */ |
---|
| 112 | + |
---|
| 113 | +static const char *dma_fence_stub_get_name(struct dma_fence *fence) |
---|
| 114 | +{ |
---|
| 115 | + return "stub"; |
---|
| 116 | +} |
---|
| 117 | + |
---|
| 118 | +static const struct dma_fence_ops dma_fence_stub_ops = { |
---|
| 119 | + .get_driver_name = dma_fence_stub_get_name, |
---|
| 120 | + .get_timeline_name = dma_fence_stub_get_name, |
---|
| 121 | +}; |
---|
| 122 | + |
---|
| 123 | +/** |
---|
| 124 | + * dma_fence_get_stub - return a signaled fence |
---|
| 125 | + * |
---|
| 126 | + * Return a stub fence which is already signaled. |
---|
| 127 | + */ |
---|
| 128 | +struct dma_fence *dma_fence_get_stub(void) |
---|
| 129 | +{ |
---|
| 130 | + spin_lock(&dma_fence_stub_lock); |
---|
| 131 | + if (!dma_fence_stub.ops) { |
---|
| 132 | + dma_fence_init(&dma_fence_stub, |
---|
| 133 | + &dma_fence_stub_ops, |
---|
| 134 | + &dma_fence_stub_lock, |
---|
| 135 | + 0, 0); |
---|
| 136 | + dma_fence_signal_locked(&dma_fence_stub); |
---|
| 137 | + } |
---|
| 138 | + spin_unlock(&dma_fence_stub_lock); |
---|
| 139 | + |
---|
| 140 | + return dma_fence_get(&dma_fence_stub); |
---|
| 141 | +} |
---|
| 142 | +EXPORT_SYMBOL(dma_fence_get_stub); |
---|
70 | 143 | |
---|
71 | 144 | /** |
---|
72 | 145 | * dma_fence_context_alloc - allocate an array of fence contexts |
---|
.. | .. |
---|
79 | 152 | u64 dma_fence_context_alloc(unsigned num) |
---|
80 | 153 | { |
---|
81 | 154 | WARN_ON(!num); |
---|
82 | | - return atomic64_add_return(num, &dma_fence_context_counter) - num; |
---|
| 155 | + return atomic64_fetch_add(num, &dma_fence_context_counter); |
---|
83 | 156 | } |
---|
84 | 157 | EXPORT_SYMBOL(dma_fence_context_alloc); |
---|
| 158 | + |
---|
| 159 | +/** |
---|
| 160 | + * DOC: fence signalling annotation |
---|
| 161 | + * |
---|
| 162 | + * Proving correctness of all the kernel code around &dma_fence through code |
---|
| 163 | + * review and testing is tricky for a few reasons: |
---|
| 164 | + * |
---|
| 165 | + * * It is a cross-driver contract, and therefore all drivers must follow the |
---|
| 166 | + * same rules for lock nesting order, calling contexts for various functions |
---|
| 167 | + * and anything else significant for in-kernel interfaces. But it is also |
---|
| 168 | + * impossible to test all drivers in a single machine, hence brute-force N vs. |
---|
| 169 | + * N testing of all combinations is impossible. Even just limiting to the |
---|
| 170 | + * possible combinations is infeasible. |
---|
| 171 | + * |
---|
| 172 | + * * There is an enormous amount of driver code involved. For render drivers |
---|
| 173 | + * there's the tail of command submission, after fences are published, |
---|
| 174 | + * scheduler code, interrupt and workers to process job completion, |
---|
| 175 | + * and timeout, gpu reset and gpu hang recovery code. Plus for integration |
---|
| 176 | + * with core mm with have &mmu_notifier, respectively &mmu_interval_notifier, |
---|
| 177 | + * and &shrinker. For modesetting drivers there's the commit tail functions |
---|
| 178 | + * between when fences for an atomic modeset are published, and when the |
---|
| 179 | + * corresponding vblank completes, including any interrupt processing and |
---|
| 180 | + * related workers. Auditing all that code, across all drivers, is not |
---|
| 181 | + * feasible. |
---|
| 182 | + * |
---|
| 183 | + * * Due to how many other subsystems are involved and the locking hierarchies |
---|
| 184 | + * this pulls in there is extremely thin wiggle-room for driver-specific |
---|
| 185 | + * differences. &dma_fence interacts with almost all of the core memory |
---|
| 186 | + * handling through page fault handlers via &dma_resv, dma_resv_lock() and |
---|
| 187 | + * dma_resv_unlock(). On the other side it also interacts through all |
---|
| 188 | + * allocation sites through &mmu_notifier and &shrinker. |
---|
| 189 | + * |
---|
| 190 | + * Furthermore lockdep does not handle cross-release dependencies, which means |
---|
| 191 | + * any deadlocks between dma_fence_wait() and dma_fence_signal() can't be caught |
---|
| 192 | + * at runtime with some quick testing. The simplest example is one thread |
---|
| 193 | + * waiting on a &dma_fence while holding a lock:: |
---|
| 194 | + * |
---|
| 195 | + * lock(A); |
---|
| 196 | + * dma_fence_wait(B); |
---|
| 197 | + * unlock(A); |
---|
| 198 | + * |
---|
| 199 | + * while the other thread is stuck trying to acquire the same lock, which |
---|
| 200 | + * prevents it from signalling the fence the previous thread is stuck waiting |
---|
| 201 | + * on:: |
---|
| 202 | + * |
---|
| 203 | + * lock(A); |
---|
| 204 | + * unlock(A); |
---|
| 205 | + * dma_fence_signal(B); |
---|
| 206 | + * |
---|
| 207 | + * By manually annotating all code relevant to signalling a &dma_fence we can |
---|
| 208 | + * teach lockdep about these dependencies, which also helps with the validation |
---|
| 209 | + * headache since now lockdep can check all the rules for us:: |
---|
| 210 | + * |
---|
| 211 | + * cookie = dma_fence_begin_signalling(); |
---|
| 212 | + * lock(A); |
---|
| 213 | + * unlock(A); |
---|
| 214 | + * dma_fence_signal(B); |
---|
| 215 | + * dma_fence_end_signalling(cookie); |
---|
| 216 | + * |
---|
| 217 | + * For using dma_fence_begin_signalling() and dma_fence_end_signalling() to |
---|
| 218 | + * annotate critical sections the following rules need to be observed: |
---|
| 219 | + * |
---|
| 220 | + * * All code necessary to complete a &dma_fence must be annotated, from the |
---|
| 221 | + * point where a fence is accessible to other threads, to the point where |
---|
| 222 | + * dma_fence_signal() is called. Un-annotated code can contain deadlock issues, |
---|
| 223 | + * and due to the very strict rules and many corner cases it is infeasible to |
---|
| 224 | + * catch these just with review or normal stress testing. |
---|
| 225 | + * |
---|
| 226 | + * * &struct dma_resv deserves a special note, since the readers are only |
---|
| 227 | + * protected by rcu. This means the signalling critical section starts as soon |
---|
| 228 | + * as the new fences are installed, even before dma_resv_unlock() is called. |
---|
| 229 | + * |
---|
| 230 | + * * The only exception are fast paths and opportunistic signalling code, which |
---|
| 231 | + * calls dma_fence_signal() purely as an optimization, but is not required to |
---|
| 232 | + * guarantee completion of a &dma_fence. The usual example is a wait IOCTL |
---|
| 233 | + * which calls dma_fence_signal(), while the mandatory completion path goes |
---|
| 234 | + * through a hardware interrupt and possible job completion worker. |
---|
| 235 | + * |
---|
| 236 | + * * To aid composability of code, the annotations can be freely nested, as long |
---|
| 237 | + * as the overall locking hierarchy is consistent. The annotations also work |
---|
| 238 | + * both in interrupt and process context. Due to implementation details this |
---|
| 239 | + * requires that callers pass an opaque cookie from |
---|
| 240 | + * dma_fence_begin_signalling() to dma_fence_end_signalling(). |
---|
| 241 | + * |
---|
| 242 | + * * Validation against the cross driver contract is implemented by priming |
---|
| 243 | + * lockdep with the relevant hierarchy at boot-up. This means even just |
---|
| 244 | + * testing with a single device is enough to validate a driver, at least as |
---|
| 245 | + * far as deadlocks with dma_fence_wait() against dma_fence_signal() are |
---|
| 246 | + * concerned. |
---|
| 247 | + */ |
---|
| 248 | +#ifdef CONFIG_LOCKDEP |
---|
| 249 | +static struct lockdep_map dma_fence_lockdep_map = { |
---|
| 250 | + .name = "dma_fence_map" |
---|
| 251 | +}; |
---|
| 252 | + |
---|
| 253 | +/** |
---|
| 254 | + * dma_fence_begin_signalling - begin a critical DMA fence signalling section |
---|
| 255 | + * |
---|
| 256 | + * Drivers should use this to annotate the beginning of any code section |
---|
| 257 | + * required to eventually complete &dma_fence by calling dma_fence_signal(). |
---|
| 258 | + * |
---|
| 259 | + * The end of these critical sections are annotated with |
---|
| 260 | + * dma_fence_end_signalling(). |
---|
| 261 | + * |
---|
| 262 | + * Returns: |
---|
| 263 | + * |
---|
| 264 | + * Opaque cookie needed by the implementation, which needs to be passed to |
---|
| 265 | + * dma_fence_end_signalling(). |
---|
| 266 | + */ |
---|
| 267 | +bool dma_fence_begin_signalling(void) |
---|
| 268 | +{ |
---|
| 269 | + /* explicitly nesting ... */ |
---|
| 270 | + if (lock_is_held_type(&dma_fence_lockdep_map, 1)) |
---|
| 271 | + return true; |
---|
| 272 | + |
---|
| 273 | + /* rely on might_sleep check for soft/hardirq locks */ |
---|
| 274 | + if (in_atomic()) |
---|
| 275 | + return true; |
---|
| 276 | + |
---|
| 277 | + /* ... and non-recursive readlock */ |
---|
| 278 | + lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _RET_IP_); |
---|
| 279 | + |
---|
| 280 | + return false; |
---|
| 281 | +} |
---|
| 282 | +EXPORT_SYMBOL(dma_fence_begin_signalling); |
---|
| 283 | + |
---|
| 284 | +/** |
---|
| 285 | + * dma_fence_end_signalling - end a critical DMA fence signalling section |
---|
| 286 | + * @cookie: opaque cookie from dma_fence_begin_signalling() |
---|
| 287 | + * |
---|
| 288 | + * Closes a critical section annotation opened by dma_fence_begin_signalling(). |
---|
| 289 | + */ |
---|
| 290 | +void dma_fence_end_signalling(bool cookie) |
---|
| 291 | +{ |
---|
| 292 | + if (cookie) |
---|
| 293 | + return; |
---|
| 294 | + |
---|
| 295 | + lock_release(&dma_fence_lockdep_map, _RET_IP_); |
---|
| 296 | +} |
---|
| 297 | +EXPORT_SYMBOL(dma_fence_end_signalling); |
---|
| 298 | + |
---|
| 299 | +void __dma_fence_might_wait(void) |
---|
| 300 | +{ |
---|
| 301 | + bool tmp; |
---|
| 302 | + |
---|
| 303 | + tmp = lock_is_held_type(&dma_fence_lockdep_map, 1); |
---|
| 304 | + if (tmp) |
---|
| 305 | + lock_release(&dma_fence_lockdep_map, _THIS_IP_); |
---|
| 306 | + lock_map_acquire(&dma_fence_lockdep_map); |
---|
| 307 | + lock_map_release(&dma_fence_lockdep_map); |
---|
| 308 | + if (tmp) |
---|
| 309 | + lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _THIS_IP_); |
---|
| 310 | +} |
---|
| 311 | +#endif |
---|
| 312 | + |
---|
| 313 | + |
---|
| 314 | +/** |
---|
| 315 | + * dma_fence_signal_timestamp_locked - signal completion of a fence |
---|
| 316 | + * @fence: the fence to signal |
---|
| 317 | + * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain |
---|
| 318 | + * |
---|
| 319 | + * Signal completion for software callbacks on a fence, this will unblock |
---|
| 320 | + * dma_fence_wait() calls and run all the callbacks added with |
---|
| 321 | + * dma_fence_add_callback(). Can be called multiple times, but since a fence |
---|
| 322 | + * can only go from the unsignaled to the signaled state and not back, it will |
---|
| 323 | + * only be effective the first time. Set the timestamp provided as the fence |
---|
| 324 | + * signal timestamp. |
---|
| 325 | + * |
---|
| 326 | + * Unlike dma_fence_signal_timestamp(), this function must be called with |
---|
| 327 | + * &dma_fence.lock held. |
---|
| 328 | + * |
---|
| 329 | + * Returns 0 on success and a negative error value when @fence has been |
---|
| 330 | + * signalled already. |
---|
| 331 | + */ |
---|
| 332 | +int dma_fence_signal_timestamp_locked(struct dma_fence *fence, |
---|
| 333 | + ktime_t timestamp) |
---|
| 334 | +{ |
---|
| 335 | + struct dma_fence_cb *cur, *tmp; |
---|
| 336 | + struct list_head cb_list; |
---|
| 337 | + |
---|
| 338 | + lockdep_assert_held(fence->lock); |
---|
| 339 | + |
---|
| 340 | + if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, |
---|
| 341 | + &fence->flags))) |
---|
| 342 | + return -EINVAL; |
---|
| 343 | + |
---|
| 344 | + /* Stash the cb_list before replacing it with the timestamp */ |
---|
| 345 | + list_replace(&fence->cb_list, &cb_list); |
---|
| 346 | + |
---|
| 347 | + fence->timestamp = timestamp; |
---|
| 348 | + set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); |
---|
| 349 | + trace_dma_fence_signaled(fence); |
---|
| 350 | + |
---|
| 351 | + list_for_each_entry_safe(cur, tmp, &cb_list, node) { |
---|
| 352 | + INIT_LIST_HEAD(&cur->node); |
---|
| 353 | + cur->func(fence, cur); |
---|
| 354 | + } |
---|
| 355 | + |
---|
| 356 | + return 0; |
---|
| 357 | +} |
---|
| 358 | +EXPORT_SYMBOL(dma_fence_signal_timestamp_locked); |
---|
| 359 | + |
---|
| 360 | +/** |
---|
| 361 | + * dma_fence_signal_timestamp - signal completion of a fence |
---|
| 362 | + * @fence: the fence to signal |
---|
| 363 | + * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain |
---|
| 364 | + * |
---|
| 365 | + * Signal completion for software callbacks on a fence, this will unblock |
---|
| 366 | + * dma_fence_wait() calls and run all the callbacks added with |
---|
| 367 | + * dma_fence_add_callback(). Can be called multiple times, but since a fence |
---|
| 368 | + * can only go from the unsignaled to the signaled state and not back, it will |
---|
| 369 | + * only be effective the first time. Set the timestamp provided as the fence |
---|
| 370 | + * signal timestamp. |
---|
| 371 | + * |
---|
| 372 | + * Returns 0 on success and a negative error value when @fence has been |
---|
| 373 | + * signalled already. |
---|
| 374 | + */ |
---|
| 375 | +int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp) |
---|
| 376 | +{ |
---|
| 377 | + unsigned long flags; |
---|
| 378 | + int ret; |
---|
| 379 | + |
---|
| 380 | + if (!fence) |
---|
| 381 | + return -EINVAL; |
---|
| 382 | + |
---|
| 383 | + spin_lock_irqsave(fence->lock, flags); |
---|
| 384 | + ret = dma_fence_signal_timestamp_locked(fence, timestamp); |
---|
| 385 | + spin_unlock_irqrestore(fence->lock, flags); |
---|
| 386 | + |
---|
| 387 | + return ret; |
---|
| 388 | +} |
---|
| 389 | +EXPORT_SYMBOL(dma_fence_signal_timestamp); |
---|
85 | 390 | |
---|
86 | 391 | /** |
---|
87 | 392 | * dma_fence_signal_locked - signal completion of a fence |
---|
.. | .. |
---|
101 | 406 | */ |
---|
102 | 407 | int dma_fence_signal_locked(struct dma_fence *fence) |
---|
103 | 408 | { |
---|
104 | | - struct dma_fence_cb *cur, *tmp; |
---|
105 | | - int ret = 0; |
---|
106 | | - |
---|
107 | | - lockdep_assert_held(fence->lock); |
---|
108 | | - |
---|
109 | | - if (WARN_ON(!fence)) |
---|
110 | | - return -EINVAL; |
---|
111 | | - |
---|
112 | | - if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { |
---|
113 | | - ret = -EINVAL; |
---|
114 | | - |
---|
115 | | - /* |
---|
116 | | - * we might have raced with the unlocked dma_fence_signal, |
---|
117 | | - * still run through all callbacks |
---|
118 | | - */ |
---|
119 | | - } else { |
---|
120 | | - fence->timestamp = ktime_get(); |
---|
121 | | - set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); |
---|
122 | | - trace_dma_fence_signaled(fence); |
---|
123 | | - } |
---|
124 | | - |
---|
125 | | - list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) { |
---|
126 | | - list_del_init(&cur->node); |
---|
127 | | - cur->func(fence, cur); |
---|
128 | | - } |
---|
129 | | - return ret; |
---|
| 409 | + return dma_fence_signal_timestamp_locked(fence, ktime_get()); |
---|
130 | 410 | } |
---|
131 | 411 | EXPORT_SYMBOL(dma_fence_signal_locked); |
---|
132 | 412 | |
---|
.. | .. |
---|
146 | 426 | int dma_fence_signal(struct dma_fence *fence) |
---|
147 | 427 | { |
---|
148 | 428 | unsigned long flags; |
---|
| 429 | + int ret; |
---|
| 430 | + bool tmp; |
---|
149 | 431 | |
---|
150 | 432 | if (!fence) |
---|
151 | 433 | return -EINVAL; |
---|
152 | 434 | |
---|
153 | | - if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) |
---|
154 | | - return -EINVAL; |
---|
| 435 | + tmp = dma_fence_begin_signalling(); |
---|
155 | 436 | |
---|
156 | | - fence->timestamp = ktime_get(); |
---|
157 | | - set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); |
---|
158 | | - trace_dma_fence_signaled(fence); |
---|
| 437 | + spin_lock_irqsave(fence->lock, flags); |
---|
| 438 | + ret = dma_fence_signal_timestamp_locked(fence, ktime_get()); |
---|
| 439 | + spin_unlock_irqrestore(fence->lock, flags); |
---|
159 | 440 | |
---|
160 | | - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) { |
---|
161 | | - struct dma_fence_cb *cur, *tmp; |
---|
| 441 | + dma_fence_end_signalling(tmp); |
---|
162 | 442 | |
---|
163 | | - spin_lock_irqsave(fence->lock, flags); |
---|
164 | | - list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) { |
---|
165 | | - list_del_init(&cur->node); |
---|
166 | | - cur->func(fence, cur); |
---|
167 | | - } |
---|
168 | | - spin_unlock_irqrestore(fence->lock, flags); |
---|
169 | | - } |
---|
170 | | - return 0; |
---|
| 443 | + return ret; |
---|
171 | 444 | } |
---|
172 | 445 | EXPORT_SYMBOL(dma_fence_signal); |
---|
173 | 446 | |
---|
.. | .. |
---|
197 | 470 | if (WARN_ON(timeout < 0)) |
---|
198 | 471 | return -EINVAL; |
---|
199 | 472 | |
---|
| 473 | + might_sleep(); |
---|
| 474 | + |
---|
| 475 | + __dma_fence_might_wait(); |
---|
| 476 | + |
---|
200 | 477 | trace_dma_fence_wait_start(fence); |
---|
201 | 478 | if (fence->ops->wait) |
---|
202 | 479 | ret = fence->ops->wait(fence, intr, timeout); |
---|
.. | .. |
---|
221 | 498 | |
---|
222 | 499 | trace_dma_fence_destroy(fence); |
---|
223 | 500 | |
---|
224 | | - /* Failed to signal before release, could be a refcounting issue */ |
---|
225 | | - WARN_ON(!list_empty(&fence->cb_list)); |
---|
| 501 | + if (WARN(!list_empty(&fence->cb_list) && |
---|
| 502 | + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags), |
---|
| 503 | + "Fence %s:%s:%llx:%llx released with pending signals!\n", |
---|
| 504 | + fence->ops->get_driver_name(fence), |
---|
| 505 | + fence->ops->get_timeline_name(fence), |
---|
| 506 | + fence->context, fence->seqno)) { |
---|
| 507 | + unsigned long flags; |
---|
| 508 | + |
---|
| 509 | + /* |
---|
| 510 | + * Failed to signal before release, likely a refcounting issue. |
---|
| 511 | + * |
---|
| 512 | + * This should never happen, but if it does make sure that we |
---|
| 513 | + * don't leave chains dangling. We set the error flag first |
---|
| 514 | + * so that the callbacks know this signal is due to an error. |
---|
| 515 | + */ |
---|
| 516 | + spin_lock_irqsave(fence->lock, flags); |
---|
| 517 | + fence->error = -EDEADLK; |
---|
| 518 | + dma_fence_signal_locked(fence); |
---|
| 519 | + spin_unlock_irqrestore(fence->lock, flags); |
---|
| 520 | + } |
---|
226 | 521 | |
---|
227 | 522 | if (fence->ops->release) |
---|
228 | 523 | fence->ops->release(fence); |
---|
.. | .. |
---|
607 | 902 | */ |
---|
608 | 903 | void |
---|
609 | 904 | dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, |
---|
610 | | - spinlock_t *lock, u64 context, unsigned seqno) |
---|
| 905 | + spinlock_t *lock, u64 context, u64 seqno) |
---|
611 | 906 | { |
---|
612 | 907 | BUG_ON(!lock); |
---|
613 | 908 | BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name); |
---|