hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/drivers/gpu/drm/i915/i915_active.c
....@@ -96,8 +96,7 @@
9696 static void debug_active_activate(struct i915_active *ref)
9797 {
9898 lockdep_assert_held(&ref->tree_lock);
99
- if (!atomic_read(&ref->count)) /* before the first inc */
100
- debug_object_activate(ref, &active_debug_desc);
99
+ debug_object_activate(ref, &active_debug_desc);
101100 }
102101
103102 static void debug_active_deactivate(struct i915_active *ref)
....@@ -432,8 +431,7 @@
432431 * we can use it to substitute for the pending idle-barrer
433432 * request that we want to emit on the kernel_context.
434433 */
435
- __active_del_barrier(ref, node_from_active(active));
436
- return true;
434
+ return __active_del_barrier(ref, node_from_active(active));
437435 }
438436
439437 int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
....@@ -446,18 +444,24 @@
446444 if (err)
447445 return err;
448446
449
- active = active_instance(ref, idx);
450
- if (!active) {
451
- err = -ENOMEM;
452
- goto out;
453
- }
447
+ do {
448
+ active = active_instance(ref, idx);
449
+ if (!active) {
450
+ err = -ENOMEM;
451
+ goto out;
452
+ }
454453
455
- if (replace_barrier(ref, active)) {
456
- RCU_INIT_POINTER(active->fence, NULL);
457
- atomic_dec(&ref->count);
458
- }
459
- if (!__i915_active_fence_set(active, fence))
454
+ if (replace_barrier(ref, active)) {
455
+ RCU_INIT_POINTER(active->fence, NULL);
456
+ atomic_dec(&ref->count);
457
+ }
458
+ } while (unlikely(is_barrier(active)));
459
+
460
+ fence = __i915_active_fence_set(active, fence);
461
+ if (!fence)
460462 __i915_active_acquire(ref);
463
+ else
464
+ dma_fence_put(fence);
461465
462466 out:
463467 i915_active_release(ref);
....@@ -476,13 +480,9 @@
476480 return NULL;
477481 }
478482
479
- rcu_read_lock();
480483 prev = __i915_active_fence_set(active, fence);
481
- if (prev)
482
- prev = dma_fence_get_rcu(prev);
483
- else
484
+ if (!prev)
484485 __i915_active_acquire(ref);
485
- rcu_read_unlock();
486486
487487 return prev;
488488 }
....@@ -1049,10 +1049,11 @@
10491049 *
10501050 * Records the new @fence as the last active fence along its timeline in
10511051 * this active tracker, moving the tracking callbacks from the previous
1052
- * fence onto this one. Returns the previous fence (if not already completed),
1053
- * which the caller must ensure is executed before the new fence. To ensure
1054
- * that the order of fences within the timeline of the i915_active_fence is
1055
- * understood, it should be locked by the caller.
1052
+ * fence onto this one. Gets and returns a reference to the previous fence
1053
+ * (if not already completed), which the caller must put after making sure
1054
+ * that it is executed before the new fence. To ensure that the order of
1055
+ * fences within the timeline of the i915_active_fence is understood, it
1056
+ * should be locked by the caller.
10561057 */
10571058 struct dma_fence *
10581059 __i915_active_fence_set(struct i915_active_fence *active,
....@@ -1061,7 +1062,23 @@
10611062 struct dma_fence *prev;
10621063 unsigned long flags;
10631064
1064
- if (fence == rcu_access_pointer(active->fence))
1065
+ /*
1066
+ * In case of fences embedded in i915_requests, their memory is
1067
+ * SLAB_FAILSAFE_BY_RCU, then it can be reused right after release
1068
+ * by new requests. Then, there is a risk of passing back a pointer
1069
+ * to a new, completely unrelated fence that reuses the same memory
1070
+ * while tracked under a different active tracker. Combined with i915
1071
+ * perf open/close operations that build await dependencies between
1072
+ * engine kernel context requests and user requests from different
1073
+ * timelines, this can lead to dependency loops and infinite waits.
1074
+ *
1075
+ * As a countermeasure, we try to get a reference to the active->fence
1076
+ * first, so if we succeed and pass it back to our user then it is not
1077
+ * released and potentially reused by an unrelated request before the
1078
+ * user has a chance to set up an await dependency on it.
1079
+ */
1080
+ prev = i915_active_fence_get(active);
1081
+ if (fence == prev)
10651082 return fence;
10661083
10671084 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
....@@ -1070,27 +1087,56 @@
10701087 * Consider that we have two threads arriving (A and B), with
10711088 * C already resident as the active->fence.
10721089 *
1073
- * A does the xchg first, and so it sees C or NULL depending
1074
- * on the timing of the interrupt handler. If it is NULL, the
1075
- * previous fence must have been signaled and we know that
1076
- * we are first on the timeline. If it is still present,
1077
- * we acquire the lock on that fence and serialise with the interrupt
1078
- * handler, in the process removing it from any future interrupt
1079
- * callback. A will then wait on C before executing (if present).
1080
- *
1081
- * As B is second, it sees A as the previous fence and so waits for
1082
- * it to complete its transition and takes over the occupancy for
1083
- * itself -- remembering that it needs to wait on A before executing.
1090
+ * Both A and B have got a reference to C or NULL, depending on the
1091
+ * timing of the interrupt handler. Let's assume that if A has got C
1092
+ * then it has locked C first (before B).
10841093 *
10851094 * Note the strong ordering of the timeline also provides consistent
10861095 * nesting rules for the fence->lock; the inner lock is always the
10871096 * older lock.
10881097 */
10891098 spin_lock_irqsave(fence->lock, flags);
1090
- prev = xchg(__active_fence_slot(active), fence);
1091
- if (prev) {
1092
- GEM_BUG_ON(prev == fence);
1099
+ if (prev)
10931100 spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
1101
+
1102
+ /*
1103
+ * A does the cmpxchg first, and so it sees C or NULL, as before, or
1104
+ * something else, depending on the timing of other threads and/or
1105
+ * interrupt handler. If not the same as before then A unlocks C if
1106
+ * applicable and retries, starting from an attempt to get a new
1107
+ * active->fence. Meanwhile, B follows the same path as A.
1108
+ * Once A succeeds with cmpxch, B fails again, retires, gets A from
1109
+ * active->fence, locks it as soon as A completes, and possibly
1110
+ * succeeds with cmpxchg.
1111
+ */
1112
+ while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) {
1113
+ if (prev) {
1114
+ spin_unlock(prev->lock);
1115
+ dma_fence_put(prev);
1116
+ }
1117
+ spin_unlock_irqrestore(fence->lock, flags);
1118
+
1119
+ prev = i915_active_fence_get(active);
1120
+ GEM_BUG_ON(prev == fence);
1121
+
1122
+ spin_lock_irqsave(fence->lock, flags);
1123
+ if (prev)
1124
+ spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
1125
+ }
1126
+
1127
+ /*
1128
+ * If prev is NULL then the previous fence must have been signaled
1129
+ * and we know that we are first on the timeline. If it is still
1130
+ * present then, having the lock on that fence already acquired, we
1131
+ * serialise with the interrupt handler, in the process of removing it
1132
+ * from any future interrupt callback. A will then wait on C before
1133
+ * executing (if present).
1134
+ *
1135
+ * As B is second, it sees A as the previous fence and so waits for
1136
+ * it to complete its transition and takes over the occupancy for
1137
+ * itself -- remembering that it needs to wait on A before executing.
1138
+ */
1139
+ if (prev) {
10941140 __list_del_entry(&active->cb.node);
10951141 spin_unlock(prev->lock); /* serialise with prev->cb_list */
10961142 }
....@@ -1107,11 +1153,7 @@
11071153 int err = 0;
11081154
11091155 /* Must maintain timeline ordering wrt previous active requests */
1110
- rcu_read_lock();
11111156 fence = __i915_active_fence_set(active, &rq->fence);
1112
- if (fence) /* but the previous fence may not belong to that timeline! */
1113
- fence = dma_fence_get_rcu(fence);
1114
- rcu_read_unlock();
11151157 if (fence) {
11161158 err = i915_request_await_dma_fence(rq, fence);
11171159 dma_fence_put(fence);