| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Fence mechanism for dma-buf and to allow for asynchronous dma access |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 7 | 8 | * Authors: |
|---|
| 8 | 9 | * Rob Clark <robdclark@gmail.com> |
|---|
| 9 | 10 | * Maarten Lankhorst <maarten.lankhorst@canonical.com> |
|---|
| 10 | | - * |
|---|
| 11 | | - * This program is free software; you can redistribute it and/or modify it |
|---|
| 12 | | - * under the terms of the GNU General Public License version 2 as published by |
|---|
| 13 | | - * the Free Software Foundation. |
|---|
| 14 | | - * |
|---|
| 15 | | - * This program is distributed in the hope that it will be useful, but WITHOUT |
|---|
| 16 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|---|
| 17 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|---|
| 18 | | - * more details. |
|---|
| 19 | 11 | */ |
|---|
| 20 | 12 | |
|---|
| 21 | 13 | #include <linux/slab.h> |
|---|
| .. | .. |
|---|
| 29 | 21 | |
|---|
| 30 | 22 | EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit); |
|---|
| 31 | 23 | EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal); |
|---|
| 24 | +EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled); |
|---|
| 25 | + |
|---|
| 26 | +static DEFINE_SPINLOCK(dma_fence_stub_lock); |
|---|
| 27 | +static struct dma_fence dma_fence_stub; |
|---|
| 32 | 28 | |
|---|
| 33 | 29 | /* |
|---|
| 34 | 30 | * fence context counter: each execution context should have its own |
|---|
| .. | .. |
|---|
| 36 | 32 | * context or not. One device can have multiple separate contexts, |
|---|
| 37 | 33 | * and they're used if some engine can run independently of another. |
|---|
| 38 | 34 | */ |
|---|
| 39 | | -static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(0); |
|---|
| 35 | +static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1); |
|---|
| 40 | 36 | |
|---|
| 41 | 37 | /** |
|---|
| 42 | 38 | * DOC: DMA fences overview |
|---|
| .. | .. |
|---|
| 64 | 60 | * |
|---|
| 65 | 61 | * - Then there's also implicit fencing, where the synchronization points are |
|---|
| 66 | 62 | * implicitly passed around as part of shared &dma_buf instances. Such |
|---|
| 67 | | - * implicit fences are stored in &struct reservation_object through the |
|---|
| 63 | + * implicit fences are stored in &struct dma_resv through the |
|---|
| 68 | 64 | * &dma_buf.resv pointer. |
|---|
| 69 | 65 | */ |
|---|
| 66 | + |
|---|
| 67 | +/** |
|---|
| 68 | + * DOC: fence cross-driver contract |
|---|
| 69 | + * |
|---|
| 70 | + * Since &dma_fence provide a cross driver contract, all drivers must follow the |
|---|
| 71 | + * same rules: |
|---|
| 72 | + * |
|---|
| 73 | + * * Fences must complete in a reasonable time. Fences which represent kernels |
|---|
| 74 | + * and shaders submitted by userspace, which could run forever, must be backed |
|---|
| 75 | + * up by timeout and gpu hang recovery code. Minimally that code must prevent |
|---|
| 76 | + * further command submission and force complete all in-flight fences, e.g. |
|---|
| 77 | + * when the driver or hardware do not support gpu reset, or if the gpu reset |
|---|
| 78 | + * failed for some reason. Ideally the driver supports gpu recovery which only |
|---|
| 79 | + * affects the offending userspace context, and no other userspace |
|---|
| 80 | + * submissions. |
|---|
| 81 | + * |
|---|
| 82 | + * * Drivers may have different ideas of what completion within a reasonable |
|---|
| 83 | + * time means. Some hang recovery code uses a fixed timeout, others a mix |
|---|
| 84 | + * between observing forward progress and increasingly strict timeouts. |
|---|
| 85 | + * Drivers should not try to second guess timeout handling of fences from |
|---|
| 86 | + * other drivers. |
|---|
| 87 | + * |
|---|
| 88 | + * * To ensure there's no deadlocks of dma_fence_wait() against other locks |
|---|
| 89 | + * drivers should annotate all code required to reach dma_fence_signal(), |
|---|
| 90 | + * which completes the fences, with dma_fence_begin_signalling() and |
|---|
| 91 | + * dma_fence_end_signalling(). |
|---|
| 92 | + * |
|---|
| 93 | + * * Drivers are allowed to call dma_fence_wait() while holding dma_resv_lock(). |
|---|
| 94 | + * This means any code required for fence completion cannot acquire a |
|---|
| 95 | + * &dma_resv lock. Note that this also pulls in the entire established |
|---|
| 96 | + * locking hierarchy around dma_resv_lock() and dma_resv_unlock(). |
|---|
| 97 | + * |
|---|
| 98 | + * * Drivers are allowed to call dma_fence_wait() from their &shrinker |
|---|
| 99 | + * callbacks. This means any code required for fence completion cannot |
|---|
| 100 | + * allocate memory with GFP_KERNEL. |
|---|
| 101 | + * |
|---|
| 102 | + * * Drivers are allowed to call dma_fence_wait() from their &mmu_notifier |
|---|
| 103 | + * respectively &mmu_interval_notifier callbacks. This means any code required |
|---|
| 104 | + * for fence completeion cannot allocate memory with GFP_NOFS or GFP_NOIO. |
|---|
| 105 | + * Only GFP_ATOMIC is permissible, which might fail. |
|---|
| 106 | + * |
|---|
| 107 | + * Note that only GPU drivers have a reasonable excuse for both requiring |
|---|
| 108 | + * &mmu_interval_notifier and &shrinker callbacks at the same time as having to |
|---|
| 109 | + * track asynchronous compute work using &dma_fence. No driver outside of |
|---|
| 110 | + * drivers/gpu should ever call dma_fence_wait() in such contexts. |
|---|
| 111 | + */ |
|---|
| 112 | + |
|---|
| 113 | +static const char *dma_fence_stub_get_name(struct dma_fence *fence) |
|---|
| 114 | +{ |
|---|
| 115 | + return "stub"; |
|---|
| 116 | +} |
|---|
| 117 | + |
|---|
| 118 | +static const struct dma_fence_ops dma_fence_stub_ops = { |
|---|
| 119 | + .get_driver_name = dma_fence_stub_get_name, |
|---|
| 120 | + .get_timeline_name = dma_fence_stub_get_name, |
|---|
| 121 | +}; |
|---|
| 122 | + |
|---|
| 123 | +/** |
|---|
| 124 | + * dma_fence_get_stub - return a signaled fence |
|---|
| 125 | + * |
|---|
| 126 | + * Return a stub fence which is already signaled. |
|---|
| 127 | + */ |
|---|
| 128 | +struct dma_fence *dma_fence_get_stub(void) |
|---|
| 129 | +{ |
|---|
| 130 | + spin_lock(&dma_fence_stub_lock); |
|---|
| 131 | + if (!dma_fence_stub.ops) { |
|---|
| 132 | + dma_fence_init(&dma_fence_stub, |
|---|
| 133 | + &dma_fence_stub_ops, |
|---|
| 134 | + &dma_fence_stub_lock, |
|---|
| 135 | + 0, 0); |
|---|
| 136 | + dma_fence_signal_locked(&dma_fence_stub); |
|---|
| 137 | + } |
|---|
| 138 | + spin_unlock(&dma_fence_stub_lock); |
|---|
| 139 | + |
|---|
| 140 | + return dma_fence_get(&dma_fence_stub); |
|---|
| 141 | +} |
|---|
| 142 | +EXPORT_SYMBOL(dma_fence_get_stub); |
|---|
| 70 | 143 | |
|---|
| 71 | 144 | /** |
|---|
| 72 | 145 | * dma_fence_context_alloc - allocate an array of fence contexts |
|---|
| .. | .. |
|---|
| 79 | 152 | u64 dma_fence_context_alloc(unsigned num) |
|---|
| 80 | 153 | { |
|---|
| 81 | 154 | WARN_ON(!num); |
|---|
| 82 | | - return atomic64_add_return(num, &dma_fence_context_counter) - num; |
|---|
| 155 | + return atomic64_fetch_add(num, &dma_fence_context_counter); |
|---|
| 83 | 156 | } |
|---|
| 84 | 157 | EXPORT_SYMBOL(dma_fence_context_alloc); |
|---|
| 158 | + |
|---|
| 159 | +/** |
|---|
| 160 | + * DOC: fence signalling annotation |
|---|
| 161 | + * |
|---|
| 162 | + * Proving correctness of all the kernel code around &dma_fence through code |
|---|
| 163 | + * review and testing is tricky for a few reasons: |
|---|
| 164 | + * |
|---|
| 165 | + * * It is a cross-driver contract, and therefore all drivers must follow the |
|---|
| 166 | + * same rules for lock nesting order, calling contexts for various functions |
|---|
| 167 | + * and anything else significant for in-kernel interfaces. But it is also |
|---|
| 168 | + * impossible to test all drivers in a single machine, hence brute-force N vs. |
|---|
| 169 | + * N testing of all combinations is impossible. Even just limiting to the |
|---|
| 170 | + * possible combinations is infeasible. |
|---|
| 171 | + * |
|---|
| 172 | + * * There is an enormous amount of driver code involved. For render drivers |
|---|
| 173 | + * there's the tail of command submission, after fences are published, |
|---|
| 174 | + * scheduler code, interrupt and workers to process job completion, |
|---|
| 175 | + * and timeout, gpu reset and gpu hang recovery code. Plus for integration |
|---|
| 176 | + * with core mm with have &mmu_notifier, respectively &mmu_interval_notifier, |
|---|
| 177 | + * and &shrinker. For modesetting drivers there's the commit tail functions |
|---|
| 178 | + * between when fences for an atomic modeset are published, and when the |
|---|
| 179 | + * corresponding vblank completes, including any interrupt processing and |
|---|
| 180 | + * related workers. Auditing all that code, across all drivers, is not |
|---|
| 181 | + * feasible. |
|---|
| 182 | + * |
|---|
| 183 | + * * Due to how many other subsystems are involved and the locking hierarchies |
|---|
| 184 | + * this pulls in there is extremely thin wiggle-room for driver-specific |
|---|
| 185 | + * differences. &dma_fence interacts with almost all of the core memory |
|---|
| 186 | + * handling through page fault handlers via &dma_resv, dma_resv_lock() and |
|---|
| 187 | + * dma_resv_unlock(). On the other side it also interacts through all |
|---|
| 188 | + * allocation sites through &mmu_notifier and &shrinker. |
|---|
| 189 | + * |
|---|
| 190 | + * Furthermore lockdep does not handle cross-release dependencies, which means |
|---|
| 191 | + * any deadlocks between dma_fence_wait() and dma_fence_signal() can't be caught |
|---|
| 192 | + * at runtime with some quick testing. The simplest example is one thread |
|---|
| 193 | + * waiting on a &dma_fence while holding a lock:: |
|---|
| 194 | + * |
|---|
| 195 | + * lock(A); |
|---|
| 196 | + * dma_fence_wait(B); |
|---|
| 197 | + * unlock(A); |
|---|
| 198 | + * |
|---|
| 199 | + * while the other thread is stuck trying to acquire the same lock, which |
|---|
| 200 | + * prevents it from signalling the fence the previous thread is stuck waiting |
|---|
| 201 | + * on:: |
|---|
| 202 | + * |
|---|
| 203 | + * lock(A); |
|---|
| 204 | + * unlock(A); |
|---|
| 205 | + * dma_fence_signal(B); |
|---|
| 206 | + * |
|---|
| 207 | + * By manually annotating all code relevant to signalling a &dma_fence we can |
|---|
| 208 | + * teach lockdep about these dependencies, which also helps with the validation |
|---|
| 209 | + * headache since now lockdep can check all the rules for us:: |
|---|
| 210 | + * |
|---|
| 211 | + * cookie = dma_fence_begin_signalling(); |
|---|
| 212 | + * lock(A); |
|---|
| 213 | + * unlock(A); |
|---|
| 214 | + * dma_fence_signal(B); |
|---|
| 215 | + * dma_fence_end_signalling(cookie); |
|---|
| 216 | + * |
|---|
| 217 | + * For using dma_fence_begin_signalling() and dma_fence_end_signalling() to |
|---|
| 218 | + * annotate critical sections the following rules need to be observed: |
|---|
| 219 | + * |
|---|
| 220 | + * * All code necessary to complete a &dma_fence must be annotated, from the |
|---|
| 221 | + * point where a fence is accessible to other threads, to the point where |
|---|
| 222 | + * dma_fence_signal() is called. Un-annotated code can contain deadlock issues, |
|---|
| 223 | + * and due to the very strict rules and many corner cases it is infeasible to |
|---|
| 224 | + * catch these just with review or normal stress testing. |
|---|
| 225 | + * |
|---|
| 226 | + * * &struct dma_resv deserves a special note, since the readers are only |
|---|
| 227 | + * protected by rcu. This means the signalling critical section starts as soon |
|---|
| 228 | + * as the new fences are installed, even before dma_resv_unlock() is called. |
|---|
| 229 | + * |
|---|
| 230 | + * * The only exception are fast paths and opportunistic signalling code, which |
|---|
| 231 | + * calls dma_fence_signal() purely as an optimization, but is not required to |
|---|
| 232 | + * guarantee completion of a &dma_fence. The usual example is a wait IOCTL |
|---|
| 233 | + * which calls dma_fence_signal(), while the mandatory completion path goes |
|---|
| 234 | + * through a hardware interrupt and possible job completion worker. |
|---|
| 235 | + * |
|---|
| 236 | + * * To aid composability of code, the annotations can be freely nested, as long |
|---|
| 237 | + * as the overall locking hierarchy is consistent. The annotations also work |
|---|
| 238 | + * both in interrupt and process context. Due to implementation details this |
|---|
| 239 | + * requires that callers pass an opaque cookie from |
|---|
| 240 | + * dma_fence_begin_signalling() to dma_fence_end_signalling(). |
|---|
| 241 | + * |
|---|
| 242 | + * * Validation against the cross driver contract is implemented by priming |
|---|
| 243 | + * lockdep with the relevant hierarchy at boot-up. This means even just |
|---|
| 244 | + * testing with a single device is enough to validate a driver, at least as |
|---|
| 245 | + * far as deadlocks with dma_fence_wait() against dma_fence_signal() are |
|---|
| 246 | + * concerned. |
|---|
| 247 | + */ |
|---|
| 248 | +#ifdef CONFIG_LOCKDEP |
|---|
| 249 | +static struct lockdep_map dma_fence_lockdep_map = { |
|---|
| 250 | + .name = "dma_fence_map" |
|---|
| 251 | +}; |
|---|
| 252 | + |
|---|
| 253 | +/** |
|---|
| 254 | + * dma_fence_begin_signalling - begin a critical DMA fence signalling section |
|---|
| 255 | + * |
|---|
| 256 | + * Drivers should use this to annotate the beginning of any code section |
|---|
| 257 | + * required to eventually complete &dma_fence by calling dma_fence_signal(). |
|---|
| 258 | + * |
|---|
| 259 | + * The end of these critical sections are annotated with |
|---|
| 260 | + * dma_fence_end_signalling(). |
|---|
| 261 | + * |
|---|
| 262 | + * Returns: |
|---|
| 263 | + * |
|---|
| 264 | + * Opaque cookie needed by the implementation, which needs to be passed to |
|---|
| 265 | + * dma_fence_end_signalling(). |
|---|
| 266 | + */ |
|---|
| 267 | +bool dma_fence_begin_signalling(void) |
|---|
| 268 | +{ |
|---|
| 269 | + /* explicitly nesting ... */ |
|---|
| 270 | + if (lock_is_held_type(&dma_fence_lockdep_map, 1)) |
|---|
| 271 | + return true; |
|---|
| 272 | + |
|---|
| 273 | + /* rely on might_sleep check for soft/hardirq locks */ |
|---|
| 274 | + if (in_atomic()) |
|---|
| 275 | + return true; |
|---|
| 276 | + |
|---|
| 277 | + /* ... and non-recursive readlock */ |
|---|
| 278 | + lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _RET_IP_); |
|---|
| 279 | + |
|---|
| 280 | + return false; |
|---|
| 281 | +} |
|---|
| 282 | +EXPORT_SYMBOL(dma_fence_begin_signalling); |
|---|
| 283 | + |
|---|
| 284 | +/** |
|---|
| 285 | + * dma_fence_end_signalling - end a critical DMA fence signalling section |
|---|
| 286 | + * @cookie: opaque cookie from dma_fence_begin_signalling() |
|---|
| 287 | + * |
|---|
| 288 | + * Closes a critical section annotation opened by dma_fence_begin_signalling(). |
|---|
| 289 | + */ |
|---|
| 290 | +void dma_fence_end_signalling(bool cookie) |
|---|
| 291 | +{ |
|---|
| 292 | + if (cookie) |
|---|
| 293 | + return; |
|---|
| 294 | + |
|---|
| 295 | + lock_release(&dma_fence_lockdep_map, _RET_IP_); |
|---|
| 296 | +} |
|---|
| 297 | +EXPORT_SYMBOL(dma_fence_end_signalling); |
|---|
| 298 | + |
|---|
| 299 | +void __dma_fence_might_wait(void) |
|---|
| 300 | +{ |
|---|
| 301 | + bool tmp; |
|---|
| 302 | + |
|---|
| 303 | + tmp = lock_is_held_type(&dma_fence_lockdep_map, 1); |
|---|
| 304 | + if (tmp) |
|---|
| 305 | + lock_release(&dma_fence_lockdep_map, _THIS_IP_); |
|---|
| 306 | + lock_map_acquire(&dma_fence_lockdep_map); |
|---|
| 307 | + lock_map_release(&dma_fence_lockdep_map); |
|---|
| 308 | + if (tmp) |
|---|
| 309 | + lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _THIS_IP_); |
|---|
| 310 | +} |
|---|
| 311 | +#endif |
|---|
| 312 | + |
|---|
| 313 | + |
|---|
| 314 | +/** |
|---|
| 315 | + * dma_fence_signal_timestamp_locked - signal completion of a fence |
|---|
| 316 | + * @fence: the fence to signal |
|---|
| 317 | + * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain |
|---|
| 318 | + * |
|---|
| 319 | + * Signal completion for software callbacks on a fence, this will unblock |
|---|
| 320 | + * dma_fence_wait() calls and run all the callbacks added with |
|---|
| 321 | + * dma_fence_add_callback(). Can be called multiple times, but since a fence |
|---|
| 322 | + * can only go from the unsignaled to the signaled state and not back, it will |
|---|
| 323 | + * only be effective the first time. Set the timestamp provided as the fence |
|---|
| 324 | + * signal timestamp. |
|---|
| 325 | + * |
|---|
| 326 | + * Unlike dma_fence_signal_timestamp(), this function must be called with |
|---|
| 327 | + * &dma_fence.lock held. |
|---|
| 328 | + * |
|---|
| 329 | + * Returns 0 on success and a negative error value when @fence has been |
|---|
| 330 | + * signalled already. |
|---|
| 331 | + */ |
|---|
| 332 | +int dma_fence_signal_timestamp_locked(struct dma_fence *fence, |
|---|
| 333 | + ktime_t timestamp) |
|---|
| 334 | +{ |
|---|
| 335 | + struct dma_fence_cb *cur, *tmp; |
|---|
| 336 | + struct list_head cb_list; |
|---|
| 337 | + |
|---|
| 338 | + lockdep_assert_held(fence->lock); |
|---|
| 339 | + |
|---|
| 340 | + if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, |
|---|
| 341 | + &fence->flags))) |
|---|
| 342 | + return -EINVAL; |
|---|
| 343 | + |
|---|
| 344 | + /* Stash the cb_list before replacing it with the timestamp */ |
|---|
| 345 | + list_replace(&fence->cb_list, &cb_list); |
|---|
| 346 | + |
|---|
| 347 | + fence->timestamp = timestamp; |
|---|
| 348 | + set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); |
|---|
| 349 | + trace_dma_fence_signaled(fence); |
|---|
| 350 | + |
|---|
| 351 | + list_for_each_entry_safe(cur, tmp, &cb_list, node) { |
|---|
| 352 | + INIT_LIST_HEAD(&cur->node); |
|---|
| 353 | + cur->func(fence, cur); |
|---|
| 354 | + } |
|---|
| 355 | + |
|---|
| 356 | + return 0; |
|---|
| 357 | +} |
|---|
| 358 | +EXPORT_SYMBOL(dma_fence_signal_timestamp_locked); |
|---|
| 359 | + |
|---|
| 360 | +/** |
|---|
| 361 | + * dma_fence_signal_timestamp - signal completion of a fence |
|---|
| 362 | + * @fence: the fence to signal |
|---|
| 363 | + * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain |
|---|
| 364 | + * |
|---|
| 365 | + * Signal completion for software callbacks on a fence, this will unblock |
|---|
| 366 | + * dma_fence_wait() calls and run all the callbacks added with |
|---|
| 367 | + * dma_fence_add_callback(). Can be called multiple times, but since a fence |
|---|
| 368 | + * can only go from the unsignaled to the signaled state and not back, it will |
|---|
| 369 | + * only be effective the first time. Set the timestamp provided as the fence |
|---|
| 370 | + * signal timestamp. |
|---|
| 371 | + * |
|---|
| 372 | + * Returns 0 on success and a negative error value when @fence has been |
|---|
| 373 | + * signalled already. |
|---|
| 374 | + */ |
|---|
| 375 | +int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp) |
|---|
| 376 | +{ |
|---|
| 377 | + unsigned long flags; |
|---|
| 378 | + int ret; |
|---|
| 379 | + |
|---|
| 380 | + if (!fence) |
|---|
| 381 | + return -EINVAL; |
|---|
| 382 | + |
|---|
| 383 | + spin_lock_irqsave(fence->lock, flags); |
|---|
| 384 | + ret = dma_fence_signal_timestamp_locked(fence, timestamp); |
|---|
| 385 | + spin_unlock_irqrestore(fence->lock, flags); |
|---|
| 386 | + |
|---|
| 387 | + return ret; |
|---|
| 388 | +} |
|---|
| 389 | +EXPORT_SYMBOL(dma_fence_signal_timestamp); |
|---|
| 85 | 390 | |
|---|
| 86 | 391 | /** |
|---|
| 87 | 392 | * dma_fence_signal_locked - signal completion of a fence |
|---|
| .. | .. |
|---|
| 101 | 406 | */ |
|---|
| 102 | 407 | int dma_fence_signal_locked(struct dma_fence *fence) |
|---|
| 103 | 408 | { |
|---|
| 104 | | - struct dma_fence_cb *cur, *tmp; |
|---|
| 105 | | - int ret = 0; |
|---|
| 106 | | - |
|---|
| 107 | | - lockdep_assert_held(fence->lock); |
|---|
| 108 | | - |
|---|
| 109 | | - if (WARN_ON(!fence)) |
|---|
| 110 | | - return -EINVAL; |
|---|
| 111 | | - |
|---|
| 112 | | - if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { |
|---|
| 113 | | - ret = -EINVAL; |
|---|
| 114 | | - |
|---|
| 115 | | - /* |
|---|
| 116 | | - * we might have raced with the unlocked dma_fence_signal, |
|---|
| 117 | | - * still run through all callbacks |
|---|
| 118 | | - */ |
|---|
| 119 | | - } else { |
|---|
| 120 | | - fence->timestamp = ktime_get(); |
|---|
| 121 | | - set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); |
|---|
| 122 | | - trace_dma_fence_signaled(fence); |
|---|
| 123 | | - } |
|---|
| 124 | | - |
|---|
| 125 | | - list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) { |
|---|
| 126 | | - list_del_init(&cur->node); |
|---|
| 127 | | - cur->func(fence, cur); |
|---|
| 128 | | - } |
|---|
| 129 | | - return ret; |
|---|
| 409 | + return dma_fence_signal_timestamp_locked(fence, ktime_get()); |
|---|
| 130 | 410 | } |
|---|
| 131 | 411 | EXPORT_SYMBOL(dma_fence_signal_locked); |
|---|
| 132 | 412 | |
|---|
| .. | .. |
|---|
| 146 | 426 | int dma_fence_signal(struct dma_fence *fence) |
|---|
| 147 | 427 | { |
|---|
| 148 | 428 | unsigned long flags; |
|---|
| 429 | + int ret; |
|---|
| 430 | + bool tmp; |
|---|
| 149 | 431 | |
|---|
| 150 | 432 | if (!fence) |
|---|
| 151 | 433 | return -EINVAL; |
|---|
| 152 | 434 | |
|---|
| 153 | | - if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) |
|---|
| 154 | | - return -EINVAL; |
|---|
| 435 | + tmp = dma_fence_begin_signalling(); |
|---|
| 155 | 436 | |
|---|
| 156 | | - fence->timestamp = ktime_get(); |
|---|
| 157 | | - set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); |
|---|
| 158 | | - trace_dma_fence_signaled(fence); |
|---|
| 437 | + spin_lock_irqsave(fence->lock, flags); |
|---|
| 438 | + ret = dma_fence_signal_timestamp_locked(fence, ktime_get()); |
|---|
| 439 | + spin_unlock_irqrestore(fence->lock, flags); |
|---|
| 159 | 440 | |
|---|
| 160 | | - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) { |
|---|
| 161 | | - struct dma_fence_cb *cur, *tmp; |
|---|
| 441 | + dma_fence_end_signalling(tmp); |
|---|
| 162 | 442 | |
|---|
| 163 | | - spin_lock_irqsave(fence->lock, flags); |
|---|
| 164 | | - list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) { |
|---|
| 165 | | - list_del_init(&cur->node); |
|---|
| 166 | | - cur->func(fence, cur); |
|---|
| 167 | | - } |
|---|
| 168 | | - spin_unlock_irqrestore(fence->lock, flags); |
|---|
| 169 | | - } |
|---|
| 170 | | - return 0; |
|---|
| 443 | + return ret; |
|---|
| 171 | 444 | } |
|---|
| 172 | 445 | EXPORT_SYMBOL(dma_fence_signal); |
|---|
| 173 | 446 | |
|---|
| .. | .. |
|---|
| 197 | 470 | if (WARN_ON(timeout < 0)) |
|---|
| 198 | 471 | return -EINVAL; |
|---|
| 199 | 472 | |
|---|
| 473 | + might_sleep(); |
|---|
| 474 | + |
|---|
| 475 | + __dma_fence_might_wait(); |
|---|
| 476 | + |
|---|
| 200 | 477 | trace_dma_fence_wait_start(fence); |
|---|
| 201 | 478 | if (fence->ops->wait) |
|---|
| 202 | 479 | ret = fence->ops->wait(fence, intr, timeout); |
|---|
| .. | .. |
|---|
| 221 | 498 | |
|---|
| 222 | 499 | trace_dma_fence_destroy(fence); |
|---|
| 223 | 500 | |
|---|
| 224 | | - /* Failed to signal before release, could be a refcounting issue */ |
|---|
| 225 | | - WARN_ON(!list_empty(&fence->cb_list)); |
|---|
| 501 | + if (WARN(!list_empty(&fence->cb_list) && |
|---|
| 502 | + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags), |
|---|
| 503 | + "Fence %s:%s:%llx:%llx released with pending signals!\n", |
|---|
| 504 | + fence->ops->get_driver_name(fence), |
|---|
| 505 | + fence->ops->get_timeline_name(fence), |
|---|
| 506 | + fence->context, fence->seqno)) { |
|---|
| 507 | + unsigned long flags; |
|---|
| 508 | + |
|---|
| 509 | + /* |
|---|
| 510 | + * Failed to signal before release, likely a refcounting issue. |
|---|
| 511 | + * |
|---|
| 512 | + * This should never happen, but if it does make sure that we |
|---|
| 513 | + * don't leave chains dangling. We set the error flag first |
|---|
| 514 | + * so that the callbacks know this signal is due to an error. |
|---|
| 515 | + */ |
|---|
| 516 | + spin_lock_irqsave(fence->lock, flags); |
|---|
| 517 | + fence->error = -EDEADLK; |
|---|
| 518 | + dma_fence_signal_locked(fence); |
|---|
| 519 | + spin_unlock_irqrestore(fence->lock, flags); |
|---|
| 520 | + } |
|---|
| 226 | 521 | |
|---|
| 227 | 522 | if (fence->ops->release) |
|---|
| 228 | 523 | fence->ops->release(fence); |
|---|
| .. | .. |
|---|
| 607 | 902 | */ |
|---|
| 608 | 903 | void |
|---|
| 609 | 904 | dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, |
|---|
| 610 | | - spinlock_t *lock, u64 context, unsigned seqno) |
|---|
| 905 | + spinlock_t *lock, u64 context, u64 seqno) |
|---|
| 611 | 906 | { |
|---|
| 612 | 907 | BUG_ON(!lock); |
|---|
| 613 | 908 | BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name); |
|---|