~hc/RK356X_SDK_RELEASE.git

..	..	@@ -96,8 +96,7 @@
96	96	static void debug_active_activate(struct i915_active *ref)
97	97	{
98	98	lockdep_assert_held(&ref->tree_lock);
99		- if (!atomic_read(&ref->count)) /* before the first inc */
100		- debug_object_activate(ref, &active_debug_desc);
	99	+ debug_object_activate(ref, &active_debug_desc);
101	100	}
102	101
103	102	static void debug_active_deactivate(struct i915_active *ref)
..	..	@@ -432,8 +431,7 @@
432	431	* we can use it to substitute for the pending idle-barrer
433	432	* request that we want to emit on the kernel_context.
434	433	*/
435		- __active_del_barrier(ref, node_from_active(active));
436		- return true;
	434	+ return __active_del_barrier(ref, node_from_active(active));
437	435	}
438	436
439	437	int i915_active_ref(struct i915_active ref, u64 idx, struct dma_fence fence)
..	..	@@ -446,18 +444,24 @@
446	444	if (err)
447	445	return err;
448	446
449		- active = active_instance(ref, idx);
450		- if (!active) {
451		- err = -ENOMEM;
452		- goto out;
453		- }
	447	+ do {
	448	+ active = active_instance(ref, idx);
	449	+ if (!active) {
	450	+ err = -ENOMEM;
	451	+ goto out;
	452	+ }
454	453
455		- if (replace_barrier(ref, active)) {
456		- RCU_INIT_POINTER(active->fence, NULL);
457		- atomic_dec(&ref->count);
458		- }
459		- if (!__i915_active_fence_set(active, fence))
	454	+ if (replace_barrier(ref, active)) {
	455	+ RCU_INIT_POINTER(active->fence, NULL);
	456	+ atomic_dec(&ref->count);
	457	+ }
	458	+ } while (unlikely(is_barrier(active)));
	459	+
	460	+ fence = __i915_active_fence_set(active, fence);
	461	+ if (!fence)
460	462	__i915_active_acquire(ref);
	463	+ else
	464	+ dma_fence_put(fence);
461	465
462	466	out:
463	467	i915_active_release(ref);
..	..	@@ -476,13 +480,9 @@
476	480	return NULL;
477	481	}
478	482
479		- rcu_read_lock();
480	483	prev = __i915_active_fence_set(active, fence);
481		- if (prev)
482		- prev = dma_fence_get_rcu(prev);
483		- else
	484	+ if (!prev)
484	485	__i915_active_acquire(ref);
485		- rcu_read_unlock();
486	486
487	487	return prev;
488	488	}
..	..	@@ -1049,10 +1049,11 @@
1049	1049	*
1050	1050	* Records the new @fence as the last active fence along its timeline in
1051	1051	* this active tracker, moving the tracking callbacks from the previous
1052		- * fence onto this one. Returns the previous fence (if not already completed),
1053		- * which the caller must ensure is executed before the new fence. To ensure
1054		- * that the order of fences within the timeline of the i915_active_fence is
1055		- * understood, it should be locked by the caller.
	1052	+ * fence onto this one. Gets and returns a reference to the previous fence
	1053	+ * (if not already completed), which the caller must put after making sure
	1054	+ * that it is executed before the new fence. To ensure that the order of
	1055	+ * fences within the timeline of the i915_active_fence is understood, it
	1056	+ * should be locked by the caller.
1056	1057	*/
1057	1058	struct dma_fence *
1058	1059	__i915_active_fence_set(struct i915_active_fence *active,
..	..	@@ -1061,7 +1062,23 @@
1061	1062	struct dma_fence *prev;
1062	1063	unsigned long flags;
1063	1064
1064		- if (fence == rcu_access_pointer(active->fence))
	1065	+ /*
	1066	+ * In case of fences embedded in i915_requests, their memory is
	1067	+ * SLAB_FAILSAFE_BY_RCU, then it can be reused right after release
	1068	+ * by new requests. Then, there is a risk of passing back a pointer
	1069	+ * to a new, completely unrelated fence that reuses the same memory
	1070	+ * while tracked under a different active tracker. Combined with i915
	1071	+ * perf open/close operations that build await dependencies between
	1072	+ * engine kernel context requests and user requests from different
	1073	+ * timelines, this can lead to dependency loops and infinite waits.
	1074	+ *
	1075	+ * As a countermeasure, we try to get a reference to the active->fence
	1076	+ * first, so if we succeed and pass it back to our user then it is not
	1077	+ * released and potentially reused by an unrelated request before the
	1078	+ * user has a chance to set up an await dependency on it.
	1079	+ */
	1080	+ prev = i915_active_fence_get(active);
	1081	+ if (fence == prev)
1065	1082	return fence;
1066	1083
1067	1084	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
..	..	@@ -1070,27 +1087,56 @@
1070	1087	* Consider that we have two threads arriving (A and B), with
1071	1088	* C already resident as the active->fence.
1072	1089	*
1073		- * A does the xchg first, and so it sees C or NULL depending
1074		- * on the timing of the interrupt handler. If it is NULL, the
1075		- * previous fence must have been signaled and we know that
1076		- * we are first on the timeline. If it is still present,
1077		- * we acquire the lock on that fence and serialise with the interrupt
1078		- * handler, in the process removing it from any future interrupt
1079		- * callback. A will then wait on C before executing (if present).
1080		- *
1081		- * As B is second, it sees A as the previous fence and so waits for
1082		- * it to complete its transition and takes over the occupancy for
1083		- * itself -- remembering that it needs to wait on A before executing.
	1090	+ * Both A and B have got a reference to C or NULL, depending on the
	1091	+ * timing of the interrupt handler. Let's assume that if A has got C
	1092	+ * then it has locked C first (before B).
1084	1093	*
1085	1094	* Note the strong ordering of the timeline also provides consistent
1086	1095	* nesting rules for the fence->lock; the inner lock is always the
1087	1096	* older lock.
1088	1097	*/
1089	1098	spin_lock_irqsave(fence->lock, flags);
1090		- prev = xchg(__active_fence_slot(active), fence);
1091		- if (prev) {
1092		- GEM_BUG_ON(prev == fence);
	1099	+ if (prev)
1093	1100	spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
	1101	+
	1102	+ /*
	1103	+ * A does the cmpxchg first, and so it sees C or NULL, as before, or
	1104	+ * something else, depending on the timing of other threads and/or
	1105	+ * interrupt handler. If not the same as before then A unlocks C if
	1106	+ * applicable and retries, starting from an attempt to get a new
	1107	+ * active->fence. Meanwhile, B follows the same path as A.
	1108	+ * Once A succeeds with cmpxch, B fails again, retires, gets A from
	1109	+ * active->fence, locks it as soon as A completes, and possibly
	1110	+ * succeeds with cmpxchg.
	1111	+ */
	1112	+ while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) {
	1113	+ if (prev) {
	1114	+ spin_unlock(prev->lock);
	1115	+ dma_fence_put(prev);
	1116	+ }
	1117	+ spin_unlock_irqrestore(fence->lock, flags);
	1118	+
	1119	+ prev = i915_active_fence_get(active);
	1120	+ GEM_BUG_ON(prev == fence);
	1121	+
	1122	+ spin_lock_irqsave(fence->lock, flags);
	1123	+ if (prev)
	1124	+ spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
	1125	+ }
	1126	+
	1127	+ /*
	1128	+ * If prev is NULL then the previous fence must have been signaled
	1129	+ * and we know that we are first on the timeline. If it is still
	1130	+ * present then, having the lock on that fence already acquired, we
	1131	+ * serialise with the interrupt handler, in the process of removing it
	1132	+ * from any future interrupt callback. A will then wait on C before
	1133	+ * executing (if present).
	1134	+ *
	1135	+ * As B is second, it sees A as the previous fence and so waits for
	1136	+ * it to complete its transition and takes over the occupancy for
	1137	+ * itself -- remembering that it needs to wait on A before executing.
	1138	+ */
	1139	+ if (prev) {
1094	1140	__list_del_entry(&active->cb.node);
1095	1141	spin_unlock(prev->lock); /* serialise with prev->cb_list */
1096	1142	}
..	..	@@ -1107,11 +1153,7 @@
1107	1153	int err = 0;
1108	1154
1109	1155	/* Must maintain timeline ordering wrt previous active requests */
1110		- rcu_read_lock();
1111	1156	fence = __i915_active_fence_set(active, &rq->fence);
1112		- if (fence) /* but the previous fence may not belong to that timeline! */
1113		- fence = dma_fence_get_rcu(fence);
1114		- rcu_read_unlock();
1115	1157	if (fence) {
1116	1158	err = i915_request_await_dma_fence(rq, fence);
1117	1159	dma_fence_put(fence);