~hc/RK356X_SDK_RELEASE.git

..	..	@@ -22,21 +22,48 @@
22	22	*
23	23	*/
24	24
25		-#include <linux/prefetch.h>
26	25	#include <linux/dma-fence-array.h>
	26	+#include <linux/dma-fence-chain.h>
	27	+#include <linux/irq_work.h>
	28	+#include <linux/prefetch.h>
27	29	#include <linux/sched.h>
28	30	#include <linux/sched/clock.h>
29	31	#include <linux/sched/signal.h>
30	32
	33	+#include "gem/i915_gem_context.h"
	34	+#include "gt/intel_breadcrumbs.h"
	35	+#include "gt/intel_context.h"
	36	+#include "gt/intel_ring.h"
	37	+#include "gt/intel_rps.h"
	38	+
	39	+#include "i915_active.h"
31	40	#include "i915_drv.h"
	41	+#include "i915_globals.h"
	42	+#include "i915_trace.h"
	43	+#include "intel_pm.h"
	44	+
	45	+struct execute_cb {
	46	+ struct irq_work work;
	47	+ struct i915_sw_fence *fence;
	48	+ void (hook)(struct i915_request rq, struct dma_fence *signal);
	49	+ struct i915_request *signal;
	50	+};
	51	+
	52	+static struct i915_global_request {
	53	+ struct i915_global base;
	54	+ struct kmem_cache *slab_requests;
	55	+ struct kmem_cache *slab_execute_cbs;
	56	+} global;
32	57
33	58	static const char i915_fence_get_driver_name(struct dma_fence fence)
34	59	{
35		- return "i915";
	60	+ return dev_name(to_request(fence)->engine->i915->drm.dev);
36	61	}
37	62
38	63	static const char i915_fence_get_timeline_name(struct dma_fence fence)
39	64	{
	65	+ const struct i915_gem_context *ctx;
	66	+
40	67	/*
41	68	* The timeline struct (as part of the ppgtt underneath a context)
42	69	* may be freed when the request is no longer in use by the GPU.
..	..	@@ -49,7 +76,11 @@
49	76	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
50	77	return "signaled";
51	78
52		- return to_request(fence)->timeline->name;
	79	+ ctx = i915_request_gem_context(to_request(fence));
	80	+ if (!ctx)
	81	+ return "[" DRIVER_NAME "]";
	82	+
	83	+ return ctx->name;
53	84	}
54	85
55	86	static bool i915_fence_signaled(struct dma_fence *fence)
..	..	@@ -59,14 +90,21 @@
59	90
60	91	static bool i915_fence_enable_signaling(struct dma_fence *fence)
61	92	{
62		- return intel_engine_enable_signaling(to_request(fence), true);
	93	+ return i915_request_enable_breadcrumb(to_request(fence));
63	94	}
64	95
65	96	static signed long i915_fence_wait(struct dma_fence *fence,
66	97	bool interruptible,
67	98	signed long timeout)
68	99	{
69		- return i915_request_wait(to_request(fence), interruptible, timeout);
	100	+ return i915_request_wait(to_request(fence),
	101	+ interruptible \| I915_WAIT_PRIORITY,
	102	+ timeout);
	103	+}
	104	+
	105	+struct kmem_cache *i915_request_slab_cache(void)
	106	+{
	107	+ return global.slab_requests;
70	108	}
71	109
72	110	static void i915_fence_release(struct dma_fence *fence)
..	..	@@ -81,8 +119,44 @@
81	119	* caught trying to reuse dead objects.
82	120	*/
83	121	i915_sw_fence_fini(&rq->submit);
	122	+ i915_sw_fence_fini(&rq->semaphore);
84	123
85		- kmem_cache_free(rq->i915->requests, rq);
	124	+ /*
	125	+ * Keep one request on each engine for reserved use under mempressure
	126	+ *
	127	+ * We do not hold a reference to the engine here and so have to be
	128	+ * very careful in what rq->engine we poke. The virtual engine is
	129	+ * referenced via the rq->context and we released that ref during
	130	+ * i915_request_retire(), ergo we must not dereference a virtual
	131	+ * engine here. Not that we would want to, as the only consumer of
	132	+ * the reserved engine->request_pool is the power management parking,
	133	+ * which must-not-fail, and that is only run on the physical engines.
	134	+ *
	135	+ * Since the request must have been executed to be have completed,
	136	+ * we know that it will have been processed by the HW and will
	137	+ * not be unsubmitted again, so rq->engine and rq->execution_mask
	138	+ * at this point is stable. rq->execution_mask will be a single
	139	+ * bit if the last and _only_ engine it could execution on was a
	140	+ * physical engine, if it's multiple bits then it started on and
	141	+ * could still be on a virtual engine. Thus if the mask is not a
	142	+ * power-of-two we assume that rq->engine may still be a virtual
	143	+ * engine and so a dangling invalid pointer that we cannot dereference
	144	+ *
	145	+ * For example, consider the flow of a bonded request through a virtual
	146	+ * engine. The request is created with a wide engine mask (all engines
	147	+ * that we might execute on). On processing the bond, the request mask
	148	+ * is reduced to one or more engines. If the request is subsequently
	149	+ * bound to a single engine, it will then be constrained to only
	150	+ * execute on that engine and never returned to the virtual engine
	151	+ * after timeslicing away, see __unwind_incomplete_requests(). Thus we
	152	+ * know that if the rq->execution_mask is a single bit, rq->engine
	153	+ * can be a physical engine with the exact corresponding mask.
	154	+ */
	155	+ if (is_power_of_2(rq->execution_mask) &&
	156	+ !cmpxchg(&rq->engine->request_pool, NULL, rq))
	157	+ return;
	158	+
	159	+ kmem_cache_free(global.slab_requests, rq);
86	160	}
87	161
88	162	const struct dma_fence_ops i915_fence_ops = {
..	..	@@ -94,213 +168,121 @@
94	168	.release = i915_fence_release,
95	169	};
96	170
97		-static inline void
98		-i915_request_remove_from_client(struct i915_request *request)
	171	+static void irq_execute_cb(struct irq_work *wrk)
99	172	{
100		- struct drm_i915_file_private *file_priv;
	173	+ struct execute_cb cb = container_of(wrk, typeof(cb), work);
101	174
102		- file_priv = request->file_priv;
103		- if (!file_priv)
	175	+ i915_sw_fence_complete(cb->fence);
	176	+ kmem_cache_free(global.slab_execute_cbs, cb);
	177	+}
	178	+
	179	+static void irq_execute_cb_hook(struct irq_work *wrk)
	180	+{
	181	+ struct execute_cb cb = container_of(wrk, typeof(cb), work);
	182	+
	183	+ cb->hook(container_of(cb->fence, struct i915_request, submit),
	184	+ &cb->signal->fence);
	185	+ i915_request_put(cb->signal);
	186	+
	187	+ irq_execute_cb(wrk);
	188	+}
	189	+
	190	+static __always_inline void
	191	+__notify_execute_cb(struct i915_request rq, bool (fn)(struct irq_work *wrk))
	192	+{
	193	+ struct execute_cb cb, cn;
	194	+
	195	+ if (llist_empty(&rq->execute_cb))
104	196	return;
105	197
106		- spin_lock(&file_priv->mm.lock);
107		- if (request->file_priv) {
108		- list_del(&request->client_link);
109		- request->file_priv = NULL;
	198	+ llist_for_each_entry_safe(cb, cn,
	199	+ llist_del_all(&rq->execute_cb),
	200	+ work.llnode)
	201	+ fn(&cb->work);
	202	+}
	203	+
	204	+static void __notify_execute_cb_irq(struct i915_request *rq)
	205	+{
	206	+ __notify_execute_cb(rq, irq_work_queue);
	207	+}
	208	+
	209	+static bool irq_work_imm(struct irq_work *wrk)
	210	+{
	211	+ wrk->func(wrk);
	212	+ return false;
	213	+}
	214	+
	215	+static void __notify_execute_cb_imm(struct i915_request *rq)
	216	+{
	217	+ __notify_execute_cb(rq, irq_work_imm);
	218	+}
	219	+
	220	+static void free_capture_list(struct i915_request *request)
	221	+{
	222	+ struct i915_capture_list *capture;
	223	+
	224	+ capture = fetch_and_zero(&request->capture_list);
	225	+ while (capture) {
	226	+ struct i915_capture_list *next = capture->next;
	227	+
	228	+ kfree(capture);
	229	+ capture = next;
110	230	}
111		- spin_unlock(&file_priv->mm.lock);
112	231	}
113	232
114		-static struct i915_dependency *
115		-i915_dependency_alloc(struct drm_i915_private *i915)
	233	+static void __i915_request_fill(struct i915_request *rq, u8 val)
116	234	{
117		- return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
	235	+ void *vaddr = rq->ring->vaddr;
	236	+ u32 head;
	237	+
	238	+ head = rq->infix;
	239	+ if (rq->postfix < head) {
	240	+ memset(vaddr + head, val, rq->ring->size - head);
	241	+ head = 0;
	242	+ }
	243	+ memset(vaddr + head, val, rq->postfix - head);
118	244	}
119	245
120		-static void
121		-i915_dependency_free(struct drm_i915_private *i915,
122		- struct i915_dependency *dep)
	246	+static void remove_from_engine(struct i915_request *rq)
123	247	{
124		- kmem_cache_free(i915->dependencies, dep);
125		-}
126		-
127		-static void
128		-__i915_sched_node_add_dependency(struct i915_sched_node *node,
129		- struct i915_sched_node *signal,
130		- struct i915_dependency *dep,
131		- unsigned long flags)
132		-{
133		- INIT_LIST_HEAD(&dep->dfs_link);
134		- list_add(&dep->wait_link, &signal->waiters_list);
135		- list_add(&dep->signal_link, &node->signalers_list);
136		- dep->signaler = signal;
137		- dep->flags = flags;
138		-}
139		-
140		-static int
141		-i915_sched_node_add_dependency(struct drm_i915_private *i915,
142		- struct i915_sched_node *node,
143		- struct i915_sched_node *signal)
144		-{
145		- struct i915_dependency *dep;
146		-
147		- dep = i915_dependency_alloc(i915);
148		- if (!dep)
149		- return -ENOMEM;
150		-
151		- __i915_sched_node_add_dependency(node, signal, dep,
152		- I915_DEPENDENCY_ALLOC);
153		- return 0;
154		-}
155		-
156		-static void
157		-i915_sched_node_fini(struct drm_i915_private *i915,
158		- struct i915_sched_node *node)
159		-{
160		- struct i915_dependency dep, tmp;
161		-
162		- GEM_BUG_ON(!list_empty(&node->link));
	248	+ struct intel_engine_cs engine, locked;
163	249
164	250	/*
165		- * Everyone we depended upon (the fences we wait to be signaled)
166		- * should retire before us and remove themselves from our list.
167		- * However, retirement is run independently on each timeline and
168		- * so we may be called out-of-order.
	251	+ * Virtual engines complicate acquiring the engine timeline lock,
	252	+ * as their rq->engine pointer is not stable until under that
	253	+ * engine lock. The simple ploy we use is to take the lock then
	254	+ * check that the rq still belongs to the newly locked engine.
169	255	*/
170		- list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
171		- GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler));
172		- GEM_BUG_ON(!list_empty(&dep->dfs_link));
173		-
174		- list_del(&dep->wait_link);
175		- if (dep->flags & I915_DEPENDENCY_ALLOC)
176		- i915_dependency_free(i915, dep);
	256	+ locked = READ_ONCE(rq->engine);
	257	+ spin_lock_irq(&locked->active.lock);
	258	+ while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
	259	+ spin_unlock(&locked->active.lock);
	260	+ spin_lock(&engine->active.lock);
	261	+ locked = engine;
177	262	}
	263	+ list_del_init(&rq->sched.link);
178	264
179		- /* Remove ourselves from everyone who depends upon us */
180		- list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
181		- GEM_BUG_ON(dep->signaler != node);
182		- GEM_BUG_ON(!list_empty(&dep->dfs_link));
	265	+ clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
	266	+ clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
183	267
184		- list_del(&dep->signal_link);
185		- if (dep->flags & I915_DEPENDENCY_ALLOC)
186		- i915_dependency_free(i915, dep);
187		- }
	268	+ /* Prevent further __await_execution() registering a cb, then flush */
	269	+ set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
	270	+
	271	+ spin_unlock_irq(&locked->active.lock);
	272	+
	273	+ __notify_execute_cb_imm(rq);
188	274	}
189	275
190		-static void
191		-i915_sched_node_init(struct i915_sched_node *node)
	276	+bool i915_request_retire(struct i915_request *rq)
192	277	{
193		- INIT_LIST_HEAD(&node->signalers_list);
194		- INIT_LIST_HEAD(&node->waiters_list);
195		- INIT_LIST_HEAD(&node->link);
196		- node->attr.priority = I915_PRIORITY_INVALID;
197		-}
	278	+ if (!i915_request_completed(rq))
	279	+ return false;
198	280
199		-static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
200		-{
201		- struct intel_engine_cs *engine;
202		- struct i915_timeline *timeline;
203		- enum intel_engine_id id;
204		- int ret;
	281	+ RQ_TRACE(rq, "\n");
205	282
206		- /* Carefully retire all requests without writing to the rings */
207		- ret = i915_gem_wait_for_idle(i915,
208		- I915_WAIT_INTERRUPTIBLE \|
209		- I915_WAIT_LOCKED,
210		- MAX_SCHEDULE_TIMEOUT);
211		- if (ret)
212		- return ret;
213		-
214		- GEM_BUG_ON(i915->gt.active_requests);
215		-
216		- /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
217		- for_each_engine(engine, i915, id) {
218		- GEM_TRACE("%s seqno %d (current %d) -> %d\n",
219		- engine->name,
220		- engine->timeline.seqno,
221		- intel_engine_get_seqno(engine),
222		- seqno);
223		-
224		- if (!i915_seqno_passed(seqno, engine->timeline.seqno)) {
225		- /* Flush any waiters before we reuse the seqno */
226		- intel_engine_disarm_breadcrumbs(engine);
227		- intel_engine_init_hangcheck(engine);
228		- GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals));
229		- }
230		-
231		- /* Check we are idle before we fiddle with hw state! */
232		- GEM_BUG_ON(!intel_engine_is_idle(engine));
233		- GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request));
234		-
235		- /* Finally reset hw state */
236		- intel_engine_init_global_seqno(engine, seqno);
237		- engine->timeline.seqno = seqno;
238		- }
239		-
240		- list_for_each_entry(timeline, &i915->gt.timelines, link)
241		- memset(timeline->global_sync, 0, sizeof(timeline->global_sync));
242		-
243		- i915->gt.request_serial = seqno;
244		-
245		- return 0;
246		-}
247		-
248		-int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
249		-{
250		- struct drm_i915_private *i915 = to_i915(dev);
251		-
252		- lockdep_assert_held(&i915->drm.struct_mutex);
253		-
254		- if (seqno == 0)
255		- return -EINVAL;
256		-
257		- /* HWS page needs to be set less than what we will inject to ring */
258		- return reset_all_global_seqno(i915, seqno - 1);
259		-}
260		-
261		-static int reserve_gt(struct drm_i915_private *i915)
262		-{
263		- int ret;
264		-
265		- /*
266		- * Reservation is fine until we may need to wrap around
267		- *
268		- * By incrementing the serial for every request, we know that no
269		- * individual engine may exceed that serial (as each is reset to 0
270		- * on any wrap). This protects even the most pessimistic of migrations
271		- * of every request from all engines onto just one.
272		- */
273		- while (unlikely(++i915->gt.request_serial == 0)) {
274		- ret = reset_all_global_seqno(i915, 0);
275		- if (ret) {
276		- i915->gt.request_serial--;
277		- return ret;
278		- }
279		- }
280		-
281		- if (!i915->gt.active_requests++)
282		- i915_gem_unpark(i915);
283		-
284		- return 0;
285		-}
286		-
287		-static void unreserve_gt(struct drm_i915_private *i915)
288		-{
289		- GEM_BUG_ON(!i915->gt.active_requests);
290		- if (!--i915->gt.active_requests)
291		- i915_gem_park(i915);
292		-}
293		-
294		-void i915_gem_retire_noop(struct i915_gem_active *active,
295		- struct i915_request *request)
296		-{
297		- /* Space left intentionally blank */
298		-}
299		-
300		-static void advance_ring(struct i915_request *request)
301		-{
302		- struct intel_ring *ring = request->ring;
303		- unsigned int tail;
	283	+ GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
	284	+ trace_i915_request_retire(rq);
	285	+ i915_request_mark_complete(rq);
304	286
305	287	/*
306	288	* We know the GPU must have read the request to have
..	..	@@ -311,236 +293,315 @@
311	293	* Note this requires that we are always called in request
312	294	* completion order.
313	295	*/
314		- GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
315		- if (list_is_last(&request->ring_link, &ring->request_list)) {
316		- /*
317		- * We may race here with execlists resubmitting this request
318		- * as we retire it. The resubmission will move the ring->tail
319		- * forwards (to request->wa_tail). We either read the
320		- * current value that was written to hw, or the value that
321		- * is just about to be. Either works, if we miss the last two
322		- * noops - they are safe to be replayed on a reset.
323		- */
324		- GEM_TRACE("marking %s as inactive\n", ring->timeline->name);
325		- tail = READ_ONCE(request->tail);
326		- list_del(&ring->active_link);
327		- } else {
328		- tail = request->postfix;
329		- }
330		- list_del_init(&request->ring_link);
	296	+ GEM_BUG_ON(!list_is_first(&rq->link,
	297	+ &i915_request_timeline(rq)->requests));
	298	+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
	299	+ /* Poison before we release our space in the ring */
	300	+ __i915_request_fill(rq, POISON_FREE);
	301	+ rq->ring->head = rq->postfix;
331	302
332		- ring->head = tail;
333		-}
334		-
335		-static void free_capture_list(struct i915_request *request)
336		-{
337		- struct i915_capture_list *capture;
338		-
339		- capture = request->capture_list;
340		- while (capture) {
341		- struct i915_capture_list *next = capture->next;
342		-
343		- kfree(capture);
344		- capture = next;
345		- }
346		-}
347		-
348		-static void __retire_engine_request(struct intel_engine_cs *engine,
349		- struct i915_request *rq)
350		-{
351		- GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n",
352		- __func__, engine->name,
353		- rq->fence.context, rq->fence.seqno,
354		- rq->global_seqno,
355		- intel_engine_get_seqno(engine));
356		-
357		- GEM_BUG_ON(!i915_request_completed(rq));
358		-
359		- spin_lock_irq(&engine->timeline.lock);
360		- GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests));
361		- list_del_init(&rq->link);
362		- spin_unlock(&engine->timeline.lock);
363		-
364		- spin_lock(&rq->lock);
365		- if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
	303	+ if (!i915_request_signaled(rq)) {
	304	+ spin_lock_irq(&rq->lock);
366	305	dma_fence_signal_locked(&rq->fence);
367		- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
368		- intel_engine_cancel_signaling(rq);
369		- if (rq->waitboost) {
370		- GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
371		- atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
	306	+ spin_unlock_irq(&rq->lock);
372	307	}
373		- spin_unlock_irq(&rq->lock);
	308	+
	309	+ if (i915_request_has_waitboost(rq)) {
	310	+ GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
	311	+ atomic_dec(&rq->engine->gt->rps.num_waiters);
	312	+ }
374	313
375	314	/*
376		- * The backing object for the context is done after switching to the
377		- * next context. Therefore we cannot retire the previous context until
378		- * the next context has already started running. However, since we
379		- * cannot take the required locks at i915_request_submit() we
380		- * defer the unpinning of the active context to now, retirement of
381		- * the subsequent request.
382		- */
383		- if (engine->last_retired_context)
384		- intel_context_unpin(engine->last_retired_context);
385		- engine->last_retired_context = rq->hw_context;
386		-}
387		-
388		-static void __retire_engine_upto(struct intel_engine_cs *engine,
389		- struct i915_request *rq)
390		-{
391		- struct i915_request *tmp;
392		-
393		- if (list_empty(&rq->link))
394		- return;
395		-
396		- do {
397		- tmp = list_first_entry(&engine->timeline.requests,
398		- typeof(*tmp), link);
399		-
400		- GEM_BUG_ON(tmp->engine != engine);
401		- __retire_engine_request(engine, tmp);
402		- } while (tmp != rq);
403		-}
404		-
405		-static void i915_request_retire(struct i915_request *request)
406		-{
407		- struct i915_gem_active active, next;
408		-
409		- GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n",
410		- request->engine->name,
411		- request->fence.context, request->fence.seqno,
412		- request->global_seqno,
413		- intel_engine_get_seqno(request->engine));
414		-
415		- lockdep_assert_held(&request->i915->drm.struct_mutex);
416		- GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
417		- GEM_BUG_ON(!i915_request_completed(request));
418		-
419		- trace_i915_request_retire(request);
420		-
421		- advance_ring(request);
422		- free_capture_list(request);
423		-
424		- /*
425		- * Walk through the active list, calling retire on each. This allows
426		- * objects to track their GPU activity and mark themselves as idle
427		- * when their last active request is completed (updating state
428		- * tracking lists for eviction, active references for GEM, etc).
	315	+ * We only loosely track inflight requests across preemption,
	316	+ * and so we may find ourselves attempting to retire a _completed_
	317	+ * request that we have removed from the HW and put back on a run
	318	+ * queue.
429	319	*
430		- * As the ->retire() may free the node, we decouple it first and
431		- * pass along the auxiliary information (to avoid dereferencing
432		- * the node after the callback).
	320	+ * As we set I915_FENCE_FLAG_ACTIVE on the request, this should be
	321	+ * after removing the breadcrumb and signaling it, so that we do not
	322	+ * inadvertently attach the breadcrumb to a completed request.
433	323	*/
434		- list_for_each_entry_safe(active, next, &request->active_list, link) {
435		- /*
436		- * In microbenchmarks or focusing upon time inside the kernel,
437		- * we may spend an inordinate amount of time simply handling
438		- * the retirement of requests and processing their callbacks.
439		- * Of which, this loop itself is particularly hot due to the
440		- * cache misses when jumping around the list of i915_gem_active.
441		- * So we try to keep this loop as streamlined as possible and
442		- * also prefetch the next i915_gem_active to try and hide
443		- * the likely cache miss.
444		- */
445		- prefetchw(next);
	324	+ remove_from_engine(rq);
	325	+ GEM_BUG_ON(!llist_empty(&rq->execute_cb));
446	326
447		- INIT_LIST_HEAD(&active->link);
448		- RCU_INIT_POINTER(active->request, NULL);
	327	+ __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
449	328
450		- active->retire(active, request);
451		- }
	329	+ intel_context_exit(rq->context);
	330	+ intel_context_unpin(rq->context);
452	331
453		- i915_request_remove_from_client(request);
	332	+ free_capture_list(rq);
	333	+ i915_sched_node_fini(&rq->sched);
	334	+ i915_request_put(rq);
454	335
455		- /* Retirement decays the ban score as it is a sign of ctx progress */
456		- atomic_dec_if_positive(&request->gem_context->ban_score);
457		- intel_context_unpin(request->hw_context);
458		-
459		- __retire_engine_upto(request->engine, request);
460		-
461		- unreserve_gt(request->i915);
462		-
463		- i915_sched_node_fini(request->i915, &request->sched);
464		- i915_request_put(request);
	336	+ return true;
465	337	}
466	338
467	339	void i915_request_retire_upto(struct i915_request *rq)
468	340	{
469		- struct intel_ring *ring = rq->ring;
	341	+ struct intel_timeline * const tl = i915_request_timeline(rq);
470	342	struct i915_request *tmp;
471	343
472		- GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n",
473		- rq->engine->name,
474		- rq->fence.context, rq->fence.seqno,
475		- rq->global_seqno,
476		- intel_engine_get_seqno(rq->engine));
	344	+ RQ_TRACE(rq, "\n");
477	345
478		- lockdep_assert_held(&rq->i915->drm.struct_mutex);
479	346	GEM_BUG_ON(!i915_request_completed(rq));
480	347
481		- if (list_empty(&rq->ring_link))
	348	+ do {
	349	+ tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
	350	+ } while (i915_request_retire(tmp) && tmp != rq);
	351	+}
	352	+
	353	+static struct i915_request * const *
	354	+__engine_active(struct intel_engine_cs *engine)
	355	+{
	356	+ return READ_ONCE(engine->execlists.active);
	357	+}
	358	+
	359	+static bool __request_in_flight(const struct i915_request *signal)
	360	+{
	361	+ struct i915_request * const port, rq;
	362	+ bool inflight = false;
	363	+
	364	+ if (!i915_request_is_ready(signal))
	365	+ return false;
	366	+
	367	+ /*
	368	+ * Even if we have unwound the request, it may still be on
	369	+ * the GPU (preempt-to-busy). If that request is inside an
	370	+ * unpreemptible critical section, it will not be removed. Some
	371	+ * GPU functions may even be stuck waiting for the paired request
	372	+ * (__await_execution) to be submitted and cannot be preempted
	373	+ * until the bond is executing.
	374	+ *
	375	+ * As we know that there are always preemption points between
	376	+ * requests, we know that only the currently executing request
	377	+ * may be still active even though we have cleared the flag.
	378	+ * However, we can't rely on our tracking of ELSP[0] to know
	379	+ * which request is currently active and so maybe stuck, as
	380	+ * the tracking maybe an event behind. Instead assume that
	381	+ * if the context is still inflight, then it is still active
	382	+ * even if the active flag has been cleared.
	383	+ *
	384	+ * To further complicate matters, if there a pending promotion, the HW
	385	+ * may either perform a context switch to the second inflight execlists,
	386	+ * or it may switch to the pending set of execlists. In the case of the
	387	+ * latter, it may send the ACK and we process the event copying the
	388	+ * pending[] over top of inflight[], _overwriting_ our *active. Since
	389	+ * this implies the HW is arbitrating and not struck in *active, we do
	390	+ * not worry about complete accuracy, but we do require no read/write
	391	+ * tearing of the pointer [the read of the pointer must be valid, even
	392	+ * as the array is being overwritten, for which we require the writes
	393	+ * to avoid tearing.]
	394	+ *
	395	+ * Note that the read of *execlists->active may race with the promotion
	396	+ * of execlists->pending[] to execlists->inflight[], overwritting
	397	+ * the value at *execlists->active. This is fine. The promotion implies
	398	+ * that we received an ACK from the HW, and so the context is not
	399	+ * stuck -- if we do not see ourselves in *active, the inflight status
	400	+ * is valid. If instead we see ourselves being copied into *active,
	401	+ * we are inflight and may signal the callback.
	402	+ */
	403	+ if (!intel_context_inflight(signal->context))
	404	+ return false;
	405	+
	406	+ rcu_read_lock();
	407	+ for (port = __engine_active(signal->engine);
	408	+ (rq = READ_ONCE(port)); / may race with promotion of pending[] */
	409	+ port++) {
	410	+ if (rq->context == signal->context) {
	411	+ inflight = i915_seqno_passed(rq->fence.seqno,
	412	+ signal->fence.seqno);
	413	+ break;
	414	+ }
	415	+ }
	416	+ rcu_read_unlock();
	417	+
	418	+ return inflight;
	419	+}
	420	+
	421	+static int
	422	+__await_execution(struct i915_request *rq,
	423	+ struct i915_request *signal,
	424	+ void (hook)(struct i915_request rq,
	425	+ struct dma_fence *signal),
	426	+ gfp_t gfp)
	427	+{
	428	+ struct execute_cb *cb;
	429	+
	430	+ if (i915_request_is_active(signal)) {
	431	+ if (hook)
	432	+ hook(rq, &signal->fence);
	433	+ return 0;
	434	+ }
	435	+
	436	+ cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
	437	+ if (!cb)
	438	+ return -ENOMEM;
	439	+
	440	+ cb->fence = &rq->submit;
	441	+ i915_sw_fence_await(cb->fence);
	442	+ init_irq_work(&cb->work, irq_execute_cb);
	443	+
	444	+ if (hook) {
	445	+ cb->hook = hook;
	446	+ cb->signal = i915_request_get(signal);
	447	+ cb->work.func = irq_execute_cb_hook;
	448	+ }
	449	+
	450	+ /*
	451	+ * Register the callback first, then see if the signaler is already
	452	+ * active. This ensures that if we race with the
	453	+ * __notify_execute_cb from i915_request_submit() and we are not
	454	+ * included in that list, we get a second bite of the cherry and
	455	+ * execute it ourselves. After this point, a future
	456	+ * i915_request_submit() will notify us.
	457	+ *
	458	+ * In i915_request_retire() we set the ACTIVE bit on a completed
	459	+ * request (then flush the execute_cb). So by registering the
	460	+ * callback first, then checking the ACTIVE bit, we serialise with
	461	+ * the completed/retired request.
	462	+ */
	463	+ if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
	464	+ if (i915_request_is_active(signal) \|\|
	465	+ __request_in_flight(signal))
	466	+ __notify_execute_cb_imm(signal);
	467	+ }
	468	+
	469	+ return 0;
	470	+}
	471	+
	472	+static bool fatal_error(int error)
	473	+{
	474	+ switch (error) {
	475	+ case 0: /* not an error! */
	476	+ case -EAGAIN: /* innocent victim of a GT reset (__i915_request_reset) */
	477	+ case -ETIMEDOUT: /* waiting for Godot (timer_i915_sw_fence_wake) */
	478	+ return false;
	479	+ default:
	480	+ return true;
	481	+ }
	482	+}
	483	+
	484	+void __i915_request_skip(struct i915_request *rq)
	485	+{
	486	+ GEM_BUG_ON(!fatal_error(rq->fence.error));
	487	+
	488	+ if (rq->infix == rq->postfix)
482	489	return;
483	490
	491	+ /*
	492	+ * As this request likely depends on state from the lost
	493	+ * context, clear out all the user operations leaving the
	494	+ * breadcrumb at the end (so we get the fence notifications).
	495	+ */
	496	+ __i915_request_fill(rq, 0);
	497	+ rq->infix = rq->postfix;
	498	+}
	499	+
	500	+void i915_request_set_error_once(struct i915_request *rq, int error)
	501	+{
	502	+ int old;
	503	+
	504	+ GEM_BUG_ON(!IS_ERR_VALUE((long)error));
	505	+
	506	+ if (i915_request_signaled(rq))
	507	+ return;
	508	+
	509	+ old = READ_ONCE(rq->fence.error);
484	510	do {
485		- tmp = list_first_entry(&ring->request_list,
486		- typeof(*tmp), ring_link);
487		-
488		- i915_request_retire(tmp);
489		- } while (tmp != rq);
	511	+ if (fatal_error(old))
	512	+ return;
	513	+ } while (!try_cmpxchg(&rq->fence.error, &old, error));
490	514	}
491	515
492		-static u32 timeline_get_seqno(struct i915_timeline *tl)
493		-{
494		- return ++tl->seqno;
495		-}
496		-
497		-static void move_to_timeline(struct i915_request *request,
498		- struct i915_timeline *timeline)
499		-{
500		- GEM_BUG_ON(request->timeline == &request->engine->timeline);
501		- lockdep_assert_held(&request->engine->timeline.lock);
502		-
503		- spin_lock(&request->timeline->lock);
504		- list_move_tail(&request->link, &timeline->requests);
505		- spin_unlock(&request->timeline->lock);
506		-}
507		-
508		-void __i915_request_submit(struct i915_request *request)
	516	+bool __i915_request_submit(struct i915_request *request)
509	517	{
510	518	struct intel_engine_cs *engine = request->engine;
511		- u32 seqno;
	519	+ bool result = false;
512	520
513		- GEM_TRACE("%s fence %llx:%d -> global=%d, current %d\n",
514		- engine->name,
515		- request->fence.context, request->fence.seqno,
516		- engine->timeline.seqno + 1,
517		- intel_engine_get_seqno(engine));
	521	+ RQ_TRACE(request, "\n");
518	522
519	523	GEM_BUG_ON(!irqs_disabled());
520		- lockdep_assert_held(&engine->timeline.lock);
	524	+ lockdep_assert_held(&engine->active.lock);
521	525
522		- GEM_BUG_ON(request->global_seqno);
	526	+ /*
	527	+ * With the advent of preempt-to-busy, we frequently encounter
	528	+ * requests that we have unsubmitted from HW, but left running
	529	+ * until the next ack and so have completed in the meantime. On
	530	+ * resubmission of that completed request, we can skip
	531	+ * updating the payload, and execlists can even skip submitting
	532	+ * the request.
	533	+ *
	534	+ * We must remove the request from the caller's priority queue,
	535	+ * and the caller must only call us when the request is in their
	536	+ * priority queue, under the active.lock. This ensures that the
	537	+ * request has not yet been retired and we can safely move
	538	+ * the request into the engine->active.list where it will be
	539	+ * dropped upon retiring. (Otherwise if resubmit a retired
	540	+ * request, this would be a horrible use-after-free.)
	541	+ */
	542	+ if (i915_request_completed(request))
	543	+ goto xfer;
523	544
524		- seqno = timeline_get_seqno(&engine->timeline);
525		- GEM_BUG_ON(!seqno);
526		- GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
	545	+ if (unlikely(intel_context_is_closed(request->context) &&
	546	+ !intel_engine_has_heartbeat(engine)))
	547	+ intel_context_set_banned(request->context);
527	548
528		- /* We may be recursing from the signal callback of another i915 fence */
529		- spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
530		- request->global_seqno = seqno;
531		- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
532		- intel_engine_enable_signaling(request, false);
533		- spin_unlock(&request->lock);
	549	+ if (unlikely(intel_context_is_banned(request->context)))
	550	+ i915_request_set_error_once(request, -EIO);
534	551
535		- engine->emit_breadcrumb(request,
536		- request->ring->vaddr + request->postfix);
	552	+ if (unlikely(fatal_error(request->fence.error)))
	553	+ __i915_request_skip(request);
537	554
538		- /* Transfer from per-context onto the global per-engine timeline */
539		- move_to_timeline(request, &engine->timeline);
	555	+ /*
	556	+ * Are we using semaphores when the gpu is already saturated?
	557	+ *
	558	+ * Using semaphores incurs a cost in having the GPU poll a
	559	+ * memory location, busywaiting for it to change. The continual
	560	+ * memory reads can have a noticeable impact on the rest of the
	561	+ * system with the extra bus traffic, stalling the cpu as it too
	562	+ * tries to access memory across the bus (perf stat -e bus-cycles).
	563	+ *
	564	+ * If we installed a semaphore on this request and we only submit
	565	+ * the request after the signaler completed, that indicates the
	566	+ * system is overloaded and using semaphores at this time only
	567	+ * increases the amount of work we are doing. If so, we disable
	568	+ * further use of semaphores until we are idle again, whence we
	569	+ * optimistically try again.
	570	+ */
	571	+ if (request->sched.semaphores &&
	572	+ i915_sw_fence_signaled(&request->semaphore))
	573	+ engine->saturated \|= request->sched.semaphores;
	574	+
	575	+ engine->emit_fini_breadcrumb(request,
	576	+ request->ring->vaddr + request->postfix);
540	577
541	578	trace_i915_request_execute(request);
	579	+ engine->serial++;
	580	+ result = true;
542	581
543		- wake_up_all(&request->execute);
	582	+xfer:
	583	+ if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) {
	584	+ list_move_tail(&request->sched.link, &engine->active.requests);
	585	+ clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
	586	+ }
	587	+
	588	+ /*
	589	+ * XXX Rollback bonded-execution on __i915_request_unsubmit()?
	590	+ *
	591	+ * In the future, perhaps when we have an active time-slicing scheduler,
	592	+ * it will be interesting to unsubmit parallel execution and remove
	593	+ * busywaits from the GPU until their master is restarted. This is
	594	+ * quite hairy, we have to carefully rollback the fence and do a
	595	+ * preempt-to-idle cycle on the target engine, all the while the
	596	+ * master execute_cb may refire.
	597	+ */
	598	+ __notify_execute_cb_irq(request);
	599	+
	600	+ /* We may be recursing from the signal callback of another i915 fence */
	601	+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
	602	+ i915_request_enable_breadcrumb(request);
	603	+
	604	+ return result;
544	605	}
545	606
546	607	void i915_request_submit(struct i915_request *request)
..	..	@@ -549,45 +610,41 @@
549	610	unsigned long flags;
550	611
551	612	/* Will be called from irq-context when using foreign fences. */
552		- spin_lock_irqsave(&engine->timeline.lock, flags);
	613	+ spin_lock_irqsave(&engine->active.lock, flags);
553	614
554	615	__i915_request_submit(request);
555	616
556		- spin_unlock_irqrestore(&engine->timeline.lock, flags);
	617	+ spin_unlock_irqrestore(&engine->active.lock, flags);
557	618	}
558	619
559	620	void __i915_request_unsubmit(struct i915_request *request)
560	621	{
561	622	struct intel_engine_cs *engine = request->engine;
562	623
563		- GEM_TRACE("%s fence %llx:%d <- global=%d, current %d\n",
564		- engine->name,
565		- request->fence.context, request->fence.seqno,
566		- request->global_seqno,
567		- intel_engine_get_seqno(engine));
568		-
569		- GEM_BUG_ON(!irqs_disabled());
570		- lockdep_assert_held(&engine->timeline.lock);
571		-
572	624	/*
573	625	* Only unwind in reverse order, required so that the per-context list
574	626	* is kept in seqno/ring order.
575	627	*/
576		- GEM_BUG_ON(!request->global_seqno);
577		- GEM_BUG_ON(request->global_seqno != engine->timeline.seqno);
578		- GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine),
579		- request->global_seqno));
580		- engine->timeline.seqno--;
	628	+ RQ_TRACE(request, "\n");
581	629
582		- /* We may be recursing from the signal callback of another i915 fence */
583		- spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
584		- request->global_seqno = 0;
	630	+ GEM_BUG_ON(!irqs_disabled());
	631	+ lockdep_assert_held(&engine->active.lock);
	632	+
	633	+ /*
	634	+ * Before we remove this breadcrumb from the signal list, we have
	635	+ * to ensure that a concurrent dma_fence_enable_signaling() does not
	636	+ * attach itself. We first mark the request as no longer active and
	637	+ * make sure that is visible to other cores, and then remove the
	638	+ * breadcrumb if attached.
	639	+ */
	640	+ GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
	641	+ clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
585	642	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
586		- intel_engine_cancel_signaling(request);
587		- spin_unlock(&request->lock);
	643	+ i915_request_cancel_breadcrumb(request);
588	644
589		- /* Transfer back from the global per-engine timeline to per-context */
590		- move_to_timeline(request, request->timeline);
	645	+ /* We've already spun, don't charge on resubmitting. */
	646	+ if (request->sched.semaphores && i915_request_started(request))
	647	+ request->sched.semaphores = 0;
591	648
592	649	/*
593	650	* We don't need to wake_up any waiters on request->execute, they
..	..	@@ -604,11 +661,11 @@
604	661	unsigned long flags;
605	662
606	663	/* Will be called from irq-context when using foreign fences. */
607		- spin_lock_irqsave(&engine->timeline.lock, flags);
	664	+ spin_lock_irqsave(&engine->active.lock, flags);
608	665
609	666	__i915_request_unsubmit(request);
610	667
611		- spin_unlock_irqrestore(&engine->timeline.lock, flags);
	668	+ spin_unlock_irqrestore(&engine->active.lock, flags);
612	669	}
613	670
614	671	static int __i915_sw_fence_call
..	..	@@ -620,6 +677,10 @@
620	677	switch (state) {
621	678	case FENCE_COMPLETE:
622	679	trace_i915_request_submit(request);
	680	+
	681	+ if (unlikely(fence->error))
	682	+ i915_request_set_error_once(request, fence->error);
	683	+
623	684	/*
624	685	* We need to serialize use of the submit_request() callback
625	686	* with its hotplugging performed during an emergency
..	..	@@ -641,61 +702,97 @@
641	702	return NOTIFY_DONE;
642	703	}
643	704
644		-/**
645		- * i915_request_alloc - allocate a request structure
646		- *
647		- * @engine: engine that we wish to issue the request on.
648		- * @ctx: context that the request will be associated with.
649		- *
650		- * Returns a pointer to the allocated request if successful,
651		- * or an error code if not.
652		- */
653		-struct i915_request *
654		-i915_request_alloc(struct intel_engine_cs engine, struct i915_gem_context ctx)
	705	+static int __i915_sw_fence_call
	706	+semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
655	707	{
656		- struct drm_i915_private *i915 = engine->i915;
	708	+ struct i915_request rq = container_of(fence, typeof(rq), semaphore);
	709	+
	710	+ switch (state) {
	711	+ case FENCE_COMPLETE:
	712	+ break;
	713	+
	714	+ case FENCE_FREE:
	715	+ i915_request_put(rq);
	716	+ break;
	717	+ }
	718	+
	719	+ return NOTIFY_DONE;
	720	+}
	721	+
	722	+static void retire_requests(struct intel_timeline *tl)
	723	+{
	724	+ struct i915_request rq, rn;
	725	+
	726	+ list_for_each_entry_safe(rq, rn, &tl->requests, link)
	727	+ if (!i915_request_retire(rq))
	728	+ break;
	729	+}
	730	+
	731	+static noinline struct i915_request *
	732	+request_alloc_slow(struct intel_timeline *tl,
	733	+ struct i915_request **rsvd,
	734	+ gfp_t gfp)
	735	+{
657	736	struct i915_request *rq;
658		- struct intel_context *ce;
659		- int ret;
660	737
661		- lockdep_assert_held(&i915->drm.struct_mutex);
	738	+ /* If we cannot wait, dip into our reserves */
	739	+ if (!gfpflags_allow_blocking(gfp)) {
	740	+ rq = xchg(rsvd, NULL);
	741	+ if (!rq) /* Use the normal failure path for one final WARN */
	742	+ goto out;
662	743
663		- /*
664		- * Preempt contexts are reserved for exclusive use to inject a
665		- * preemption context switch. They are never to be used for any trivial
666		- * request!
667		- */
668		- GEM_BUG_ON(ctx == i915->preempt_context);
	744	+ return rq;
	745	+ }
669	746
670		- /*
671		- * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
672		- * EIO if the GPU is already wedged.
673		- */
674		- if (i915_terminally_wedged(&i915->gpu_error))
675		- return ERR_PTR(-EIO);
676		-
677		- /*
678		- * Pinning the contexts may generate requests in order to acquire
679		- * GGTT space, so do this first before we reserve a seqno for
680		- * ourselves.
681		- */
682		- ce = intel_context_pin(ctx, engine);
683		- if (IS_ERR(ce))
684		- return ERR_CAST(ce);
685		-
686		- ret = reserve_gt(i915);
687		- if (ret)
688		- goto err_unpin;
689		-
690		- ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
691		- if (ret)
692		- goto err_unreserve;
	747	+ if (list_empty(&tl->requests))
	748	+ goto out;
693	749
694	750	/* Move our oldest request to the slab-cache (if not in use!) */
695		- rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
696		- if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
697		- i915_request_completed(rq))
698		- i915_request_retire(rq);
	751	+ rq = list_first_entry(&tl->requests, typeof(*rq), link);
	752	+ i915_request_retire(rq);
	753	+
	754	+ rq = kmem_cache_alloc(global.slab_requests,
	755	+ gfp \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
	756	+ if (rq)
	757	+ return rq;
	758	+
	759	+ /* Ratelimit ourselves to prevent oom from malicious clients */
	760	+ rq = list_last_entry(&tl->requests, typeof(*rq), link);
	761	+ cond_synchronize_rcu(rq->rcustate);
	762	+
	763	+ /* Retire our old requests in the hope that we free some */
	764	+ retire_requests(tl);
	765	+
	766	+out:
	767	+ return kmem_cache_alloc(global.slab_requests, gfp);
	768	+}
	769	+
	770	+static void __i915_request_ctor(void *arg)
	771	+{
	772	+ struct i915_request *rq = arg;
	773	+
	774	+ spin_lock_init(&rq->lock);
	775	+ i915_sched_node_init(&rq->sched);
	776	+ i915_sw_fence_init(&rq->submit, submit_notify);
	777	+ i915_sw_fence_init(&rq->semaphore, semaphore_notify);
	778	+
	779	+ rq->capture_list = NULL;
	780	+
	781	+ init_llist_head(&rq->execute_cb);
	782	+}
	783	+
	784	+struct i915_request *
	785	+__i915_request_create(struct intel_context *ce, gfp_t gfp)
	786	+{
	787	+ struct intel_timeline *tl = ce->timeline;
	788	+ struct i915_request *rq;
	789	+ u32 seqno;
	790	+ int ret;
	791	+
	792	+ might_sleep_if(gfpflags_allow_blocking(gfp));
	793	+
	794	+ /* Check that the caller provided an already pinned context */
	795	+ __intel_context_pin(ce);
699	796
700	797	/*
701	798	* Beware: Dragons be flying overhead.
..	..	@@ -703,7 +800,7 @@
703	800	* We use RCU to look up requests in flight. The lookups may
704	801	* race with the request being allocated from the slab freelist.
705	802	* That is the request we are writing to here, may be in the process
706		- * of being read by __i915_gem_active_get_rcu(). As such,
	803	+ * of being read by __i915_active_request_get_rcu(). As such,
707	804	* we have to be very careful when overwriting the contents. During
708	805	* the RCU lookup, we change chase the request->engine pointer,
709	806	* read the request->global_seqno and increment the reference count.
..	..	@@ -726,64 +823,45 @@
726	823	*
727	824	* Do not use kmem_cache_zalloc() here!
728	825	*/
729		- rq = kmem_cache_alloc(i915->requests,
730		- GFP_KERNEL \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
	826	+ rq = kmem_cache_alloc(global.slab_requests,
	827	+ gfp \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
731	828	if (unlikely(!rq)) {
732		- /* Ratelimit ourselves to prevent oom from malicious clients */
733		- ret = i915_gem_wait_for_idle(i915,
734		- I915_WAIT_LOCKED \|
735		- I915_WAIT_INTERRUPTIBLE,
736		- MAX_SCHEDULE_TIMEOUT);
737		- if (ret)
738		- goto err_unreserve;
739		-
740		- /*
741		- * We've forced the client to stall and catch up with whatever
742		- * backlog there might have been. As we are assuming that we
743		- * caused the mempressure, now is an opportune time to
744		- * recover as much memory from the request pool as is possible.
745		- * Having already penalized the client to stall, we spend
746		- * a little extra time to re-optimise page allocation.
747		- */
748		- kmem_cache_shrink(i915->requests);
749		- rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */
750		-
751		- rq = kmem_cache_alloc(i915->requests, GFP_KERNEL);
	829	+ rq = request_alloc_slow(tl, &ce->engine->request_pool, gfp);
752	830	if (!rq) {
753	831	ret = -ENOMEM;
754	832	goto err_unreserve;
755	833	}
756	834	}
757	835
758		- INIT_LIST_HEAD(&rq->active_list);
759		- rq->i915 = i915;
760		- rq->engine = engine;
761		- rq->gem_context = ctx;
762		- rq->hw_context = ce;
	836	+ rq->context = ce;
	837	+ rq->engine = ce->engine;
763	838	rq->ring = ce->ring;
764		- rq->timeline = ce->ring->timeline;
765		- GEM_BUG_ON(rq->timeline == &engine->timeline);
	839	+ rq->execution_mask = ce->engine->mask;
766	840
767		- spin_lock_init(&rq->lock);
768		- dma_fence_init(&rq->fence,
769		- &i915_fence_ops,
770		- &rq->lock,
771		- rq->timeline->fence_context,
772		- timeline_get_seqno(rq->timeline));
	841	+ ret = intel_timeline_get_seqno(tl, rq, &seqno);
	842	+ if (ret)
	843	+ goto err_free;
	844	+
	845	+ dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
	846	+ tl->fence_context, seqno);
	847	+
	848	+ RCU_INIT_POINTER(rq->timeline, tl);
	849	+ RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline);
	850	+ rq->hwsp_seqno = tl->hwsp_seqno;
	851	+ GEM_BUG_ON(i915_request_completed(rq));
	852	+
	853	+ rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
773	854
774	855	/* We bump the ref for the fence chain */
775		- i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
776		- init_waitqueue_head(&rq->execute);
	856	+ i915_sw_fence_reinit(&i915_request_get(rq)->submit);
	857	+ i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
777	858
778		- i915_sched_node_init(&rq->sched);
	859	+ i915_sched_node_reinit(&rq->sched);
779	860
780		- /* No zalloc, must clear what we need by hand */
781		- rq->global_seqno = 0;
782		- rq->signaling.wait.seqno = 0;
783		- rq->file_priv = NULL;
	861	+ /* No zalloc, everything must be cleared after use */
784	862	rq->batch = NULL;
785		- rq->capture_list = NULL;
786		- rq->waitboost = false;
	863	+ GEM_BUG_ON(rq->capture_list);
	864	+ GEM_BUG_ON(!llist_empty(&rq->execute_cb));
787	865
788	866	/*
789	867	* Reserve space in the ring buffer for all the commands required to
..	..	@@ -791,9 +869,14 @@
791	869	* i915_request_add() call can't fail. Note that the reserve may need
792	870	* to be redone if the request is not actually submitted straight
793	871	* away, e.g. because a GPU scheduler has deferred it.
	872	+ *
	873	+ * Note that due to how we add reserved_space to intel_ring_begin()
	874	+ * we need to double our request to ensure that if we need to wrap
	875	+ * around inside i915_request_add() there is sufficient space at
	876	+ * the beginning of the ring as well.
794	877	*/
795		- rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
796		- GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz);
	878	+ rq->reserved_space =
	879	+ 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
797	880
798	881	/*
799	882	* Record the position of the start of the request so that
..	..	@@ -803,38 +886,439 @@
803	886	*/
804	887	rq->head = rq->ring->emit;
805	888
806		- /* Unconditionally invalidate GPU caches and TLBs. */
807		- ret = engine->emit_flush(rq, EMIT_INVALIDATE);
	889	+ ret = rq->engine->request_alloc(rq);
808	890	if (ret)
809	891	goto err_unwind;
810		-
811		- ret = engine->request_alloc(rq);
812		- if (ret)
813		- goto err_unwind;
814		-
815		- /* Keep a second pin for the dual retirement along engine and ring */
816		- __intel_context_pin(ce);
817	892
818	893	rq->infix = rq->ring->emit; /* end of header; start of user payload */
819	894
820		- /* Check that we didn't interrupt ourselves with a new request */
821		- GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
	895	+ intel_context_mark_active(ce);
	896	+ list_add_tail_rcu(&rq->link, &tl->requests);
	897	+
822	898	return rq;
823	899
824	900	err_unwind:
825	901	ce->ring->emit = rq->head;
826	902
827	903	/* Make sure we didn't add ourselves to external state before freeing */
828		- GEM_BUG_ON(!list_empty(&rq->active_list));
829	904	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
830	905	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
831	906
832		- kmem_cache_free(i915->requests, rq);
	907	+err_free:
	908	+ kmem_cache_free(global.slab_requests, rq);
833	909	err_unreserve:
834		- unreserve_gt(i915);
835		-err_unpin:
836	910	intel_context_unpin(ce);
837	911	return ERR_PTR(ret);
	912	+}
	913	+
	914	+struct i915_request *
	915	+i915_request_create(struct intel_context *ce)
	916	+{
	917	+ struct i915_request *rq;
	918	+ struct intel_timeline *tl;
	919	+
	920	+ tl = intel_context_timeline_lock(ce);
	921	+ if (IS_ERR(tl))
	922	+ return ERR_CAST(tl);
	923	+
	924	+ /* Move our oldest request to the slab-cache (if not in use!) */
	925	+ rq = list_first_entry(&tl->requests, typeof(*rq), link);
	926	+ if (!list_is_last(&rq->link, &tl->requests))
	927	+ i915_request_retire(rq);
	928	+
	929	+ intel_context_enter(ce);
	930	+ rq = __i915_request_create(ce, GFP_KERNEL);
	931	+ intel_context_exit(ce); /* active reference transferred to request */
	932	+ if (IS_ERR(rq))
	933	+ goto err_unlock;
	934	+
	935	+ /* Check that we do not interrupt ourselves with a new request */
	936	+ rq->cookie = lockdep_pin_lock(&tl->mutex);
	937	+
	938	+ return rq;
	939	+
	940	+err_unlock:
	941	+ intel_context_timeline_unlock(tl);
	942	+ return rq;
	943	+}
	944	+
	945	+static int
	946	+i915_request_await_start(struct i915_request rq, struct i915_request signal)
	947	+{
	948	+ struct dma_fence *fence;
	949	+ int err;
	950	+
	951	+ if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline))
	952	+ return 0;
	953	+
	954	+ if (i915_request_started(signal))
	955	+ return 0;
	956	+
	957	+ fence = NULL;
	958	+ rcu_read_lock();
	959	+ spin_lock_irq(&signal->lock);
	960	+ do {
	961	+ struct list_head *pos = READ_ONCE(signal->link.prev);
	962	+ struct i915_request *prev;
	963	+
	964	+ /* Confirm signal has not been retired, the link is valid */
	965	+ if (unlikely(i915_request_started(signal)))
	966	+ break;
	967	+
	968	+ /* Is signal the earliest request on its timeline? */
	969	+ if (pos == &rcu_dereference(signal->timeline)->requests)
	970	+ break;
	971	+
	972	+ /*
	973	+ * Peek at the request before us in the timeline. That
	974	+ * request will only be valid before it is retired, so
	975	+ * after acquiring a reference to it, confirm that it is
	976	+ * still part of the signaler's timeline.
	977	+ */
	978	+ prev = list_entry(pos, typeof(*prev), link);
	979	+ if (!i915_request_get_rcu(prev))
	980	+ break;
	981	+
	982	+ /* After the strong barrier, confirm prev is still attached */
	983	+ if (unlikely(READ_ONCE(prev->link.next) != &signal->link)) {
	984	+ i915_request_put(prev);
	985	+ break;
	986	+ }
	987	+
	988	+ fence = &prev->fence;
	989	+ } while (0);
	990	+ spin_unlock_irq(&signal->lock);
	991	+ rcu_read_unlock();
	992	+ if (!fence)
	993	+ return 0;
	994	+
	995	+ err = 0;
	996	+ if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
	997	+ err = i915_sw_fence_await_dma_fence(&rq->submit,
	998	+ fence, 0,
	999	+ I915_FENCE_GFP);
	1000	+ dma_fence_put(fence);
	1001	+
	1002	+ return err;
	1003	+}
	1004	+
	1005	+static intel_engine_mask_t
	1006	+already_busywaiting(struct i915_request *rq)
	1007	+{
	1008	+ /*
	1009	+ * Polling a semaphore causes bus traffic, delaying other users of
	1010	+ * both the GPU and CPU. We want to limit the impact on others,
	1011	+ * while taking advantage of early submission to reduce GPU
	1012	+ * latency. Therefore we restrict ourselves to not using more
	1013	+ * than one semaphore from each source, and not using a semaphore
	1014	+ * if we have detected the engine is saturated (i.e. would not be
	1015	+ * submitted early and cause bus traffic reading an already passed
	1016	+ * semaphore).
	1017	+ *
	1018	+ * See the are-we-too-late? check in __i915_request_submit().
	1019	+ */
	1020	+ return rq->sched.semaphores \| READ_ONCE(rq->engine->saturated);
	1021	+}
	1022	+
	1023	+static int
	1024	+__emit_semaphore_wait(struct i915_request *to,
	1025	+ struct i915_request *from,
	1026	+ u32 seqno)
	1027	+{
	1028	+ const int has_token = INTEL_GEN(to->engine->i915) >= 12;
	1029	+ u32 hwsp_offset;
	1030	+ int len, err;
	1031	+ u32 *cs;
	1032	+
	1033	+ GEM_BUG_ON(INTEL_GEN(to->engine->i915) < 8);
	1034	+ GEM_BUG_ON(i915_request_has_initial_breadcrumb(to));
	1035	+
	1036	+ /* We need to pin the signaler's HWSP until we are finished reading. */
	1037	+ err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
	1038	+ if (err)
	1039	+ return err;
	1040	+
	1041	+ len = 4;
	1042	+ if (has_token)
	1043	+ len += 2;
	1044	+
	1045	+ cs = intel_ring_begin(to, len);
	1046	+ if (IS_ERR(cs))
	1047	+ return PTR_ERR(cs);
	1048	+
	1049	+ /*
	1050	+ * Using greater-than-or-equal here means we have to worry
	1051	+ * about seqno wraparound. To side step that issue, we swap
	1052	+ * the timeline HWSP upon wrapping, so that everyone listening
	1053	+ * for the old (pre-wrap) values do not see the much smaller
	1054	+ * (post-wrap) values than they were expecting (and so wait
	1055	+ * forever).
	1056	+ */
	1057	+ *cs++ = (MI_SEMAPHORE_WAIT \|
	1058	+ MI_SEMAPHORE_GLOBAL_GTT \|
	1059	+ MI_SEMAPHORE_POLL \|
	1060	+ MI_SEMAPHORE_SAD_GTE_SDD) +
	1061	+ has_token;
	1062	+ *cs++ = seqno;
	1063	+ *cs++ = hwsp_offset;
	1064	+ *cs++ = 0;
	1065	+ if (has_token) {
	1066	+ *cs++ = 0;
	1067	+ *cs++ = MI_NOOP;
	1068	+ }
	1069	+
	1070	+ intel_ring_advance(to, cs);
	1071	+ return 0;
	1072	+}
	1073	+
	1074	+static int
	1075	+emit_semaphore_wait(struct i915_request *to,
	1076	+ struct i915_request *from,
	1077	+ gfp_t gfp)
	1078	+{
	1079	+ const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask;
	1080	+ struct i915_sw_fence *wait = &to->submit;
	1081	+
	1082	+ if (!intel_context_use_semaphores(to->context))
	1083	+ goto await_fence;
	1084	+
	1085	+ if (i915_request_has_initial_breadcrumb(to))
	1086	+ goto await_fence;
	1087	+
	1088	+ if (!rcu_access_pointer(from->hwsp_cacheline))
	1089	+ goto await_fence;
	1090	+
	1091	+ /*
	1092	+ * If this or its dependents are waiting on an external fence
	1093	+ * that may fail catastrophically, then we want to avoid using
	1094	+ * sempahores as they bypass the fence signaling metadata, and we
	1095	+ * lose the fence->error propagation.
	1096	+ */
	1097	+ if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN)
	1098	+ goto await_fence;
	1099	+
	1100	+ /* Just emit the first semaphore we see as request space is limited. */
	1101	+ if (already_busywaiting(to) & mask)
	1102	+ goto await_fence;
	1103	+
	1104	+ if (i915_request_await_start(to, from) < 0)
	1105	+ goto await_fence;
	1106	+
	1107	+ /* Only submit our spinner after the signaler is running! */
	1108	+ if (__await_execution(to, from, NULL, gfp))
	1109	+ goto await_fence;
	1110	+
	1111	+ if (__emit_semaphore_wait(to, from, from->fence.seqno))
	1112	+ goto await_fence;
	1113	+
	1114	+ to->sched.semaphores \|= mask;
	1115	+ wait = &to->semaphore;
	1116	+
	1117	+await_fence:
	1118	+ return i915_sw_fence_await_dma_fence(wait,
	1119	+ &from->fence, 0,
	1120	+ I915_FENCE_GFP);
	1121	+}
	1122	+
	1123	+static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
	1124	+ struct dma_fence *fence)
	1125	+{
	1126	+ return __intel_timeline_sync_is_later(tl,
	1127	+ fence->context,
	1128	+ fence->seqno - 1);
	1129	+}
	1130	+
	1131	+static int intel_timeline_sync_set_start(struct intel_timeline *tl,
	1132	+ const struct dma_fence *fence)
	1133	+{
	1134	+ return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
	1135	+}
	1136	+
	1137	+static int
	1138	+__i915_request_await_execution(struct i915_request *to,
	1139	+ struct i915_request *from,
	1140	+ void (hook)(struct i915_request rq,
	1141	+ struct dma_fence *signal))
	1142	+{
	1143	+ int err;
	1144	+
	1145	+ GEM_BUG_ON(intel_context_is_barrier(from->context));
	1146	+
	1147	+ /* Submit both requests at the same time */
	1148	+ err = __await_execution(to, from, hook, I915_FENCE_GFP);
	1149	+ if (err)
	1150	+ return err;
	1151	+
	1152	+ /* Squash repeated depenendices to the same timelines */
	1153	+ if (intel_timeline_sync_has_start(i915_request_timeline(to),
	1154	+ &from->fence))
	1155	+ return 0;
	1156	+
	1157	+ /*
	1158	+ * Wait until the start of this request.
	1159	+ *
	1160	+ * The execution cb fires when we submit the request to HW. But in
	1161	+ * many cases this may be long before the request itself is ready to
	1162	+ * run (consider that we submit 2 requests for the same context, where
	1163	+ * the request of interest is behind an indefinite spinner). So we hook
	1164	+ * up to both to reduce our queues and keep the execution lag minimised
	1165	+ * in the worst case, though we hope that the await_start is elided.
	1166	+ */
	1167	+ err = i915_request_await_start(to, from);
	1168	+ if (err < 0)
	1169	+ return err;
	1170	+
	1171	+ /*
	1172	+ * Ensure both start together [after all semaphores in signal]
	1173	+ *
	1174	+ * Now that we are queued to the HW at roughly the same time (thanks
	1175	+ * to the execute cb) and are ready to run at roughly the same time
	1176	+ * (thanks to the await start), our signaler may still be indefinitely
	1177	+ * delayed by waiting on a semaphore from a remote engine. If our
	1178	+ * signaler depends on a semaphore, so indirectly do we, and we do not
	1179	+ * want to start our payload until our signaler also starts theirs.
	1180	+ * So we wait.
	1181	+ *
	1182	+ * However, there is also a second condition for which we need to wait
	1183	+ * for the precise start of the signaler. Consider that the signaler
	1184	+ * was submitted in a chain of requests following another context
	1185	+ * (with just an ordinary intra-engine fence dependency between the
	1186	+ * two). In this case the signaler is queued to HW, but not for
	1187	+ * immediate execution, and so we must wait until it reaches the
	1188	+ * active slot.
	1189	+ */
	1190	+ if (intel_engine_has_semaphores(to->engine) &&
	1191	+ !i915_request_has_initial_breadcrumb(to)) {
	1192	+ err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
	1193	+ if (err < 0)
	1194	+ return err;
	1195	+ }
	1196	+
	1197	+ /* Couple the dependency tree for PI on this exposed to->fence */
	1198	+ if (to->engine->schedule) {
	1199	+ err = i915_sched_node_add_dependency(&to->sched,
	1200	+ &from->sched,
	1201	+ I915_DEPENDENCY_WEAK);
	1202	+ if (err < 0)
	1203	+ return err;
	1204	+ }
	1205	+
	1206	+ return intel_timeline_sync_set_start(i915_request_timeline(to),
	1207	+ &from->fence);
	1208	+}
	1209	+
	1210	+static void mark_external(struct i915_request *rq)
	1211	+{
	1212	+ /*
	1213	+ * The downside of using semaphores is that we lose metadata passing
	1214	+ * along the signaling chain. This is particularly nasty when we
	1215	+ * need to pass along a fatal error such as EFAULT or EDEADLK. For
	1216	+ * fatal errors we want to scrub the request before it is executed,
	1217	+ * which means that we cannot preload the request onto HW and have
	1218	+ * it wait upon a semaphore.
	1219	+ */
	1220	+ rq->sched.flags \|= I915_SCHED_HAS_EXTERNAL_CHAIN;
	1221	+}
	1222	+
	1223	+static int
	1224	+__i915_request_await_external(struct i915_request rq, struct dma_fence fence)
	1225	+{
	1226	+ mark_external(rq);
	1227	+ return i915_sw_fence_await_dma_fence(&rq->submit, fence,
	1228	+ i915_fence_context_timeout(rq->engine->i915,
	1229	+ fence->context),
	1230	+ I915_FENCE_GFP);
	1231	+}
	1232	+
	1233	+static int
	1234	+i915_request_await_external(struct i915_request rq, struct dma_fence fence)
	1235	+{
	1236	+ struct dma_fence *iter;
	1237	+ int err = 0;
	1238	+
	1239	+ if (!to_dma_fence_chain(fence))
	1240	+ return __i915_request_await_external(rq, fence);
	1241	+
	1242	+ dma_fence_chain_for_each(iter, fence) {
	1243	+ struct dma_fence_chain *chain = to_dma_fence_chain(iter);
	1244	+
	1245	+ if (!dma_fence_is_i915(chain->fence)) {
	1246	+ err = __i915_request_await_external(rq, iter);
	1247	+ break;
	1248	+ }
	1249	+
	1250	+ err = i915_request_await_dma_fence(rq, chain->fence);
	1251	+ if (err < 0)
	1252	+ break;
	1253	+ }
	1254	+
	1255	+ dma_fence_put(iter);
	1256	+ return err;
	1257	+}
	1258	+
	1259	+int
	1260	+i915_request_await_execution(struct i915_request *rq,
	1261	+ struct dma_fence *fence,
	1262	+ void (hook)(struct i915_request rq,
	1263	+ struct dma_fence *signal))
	1264	+{
	1265	+ struct dma_fence **child = &fence;
	1266	+ unsigned int nchild = 1;
	1267	+ int ret;
	1268	+
	1269	+ if (dma_fence_is_array(fence)) {
	1270	+ struct dma_fence_array *array = to_dma_fence_array(fence);
	1271	+
	1272	+ /* XXX Error for signal-on-any fence arrays */
	1273	+
	1274	+ child = array->fences;
	1275	+ nchild = array->num_fences;
	1276	+ GEM_BUG_ON(!nchild);
	1277	+ }
	1278	+
	1279	+ do {
	1280	+ fence = *child++;
	1281	+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
	1282	+ continue;
	1283	+
	1284	+ if (fence->context == rq->fence.context)
	1285	+ continue;
	1286	+
	1287	+ /*
	1288	+ * We don't squash repeated fence dependencies here as we
	1289	+ * want to run our callback in all cases.
	1290	+ */
	1291	+
	1292	+ if (dma_fence_is_i915(fence))
	1293	+ ret = __i915_request_await_execution(rq,
	1294	+ to_request(fence),
	1295	+ hook);
	1296	+ else
	1297	+ ret = i915_request_await_external(rq, fence);
	1298	+ if (ret < 0)
	1299	+ return ret;
	1300	+ } while (--nchild);
	1301	+
	1302	+ return 0;
	1303	+}
	1304	+
	1305	+static int
	1306	+await_request_submit(struct i915_request to, struct i915_request from)
	1307	+{
	1308	+ /*
	1309	+ * If we are waiting on a virtual engine, then it may be
	1310	+ * constrained to execute on a single engine prior to submission.
	1311	+ * When it is submitted, it will be first submitted to the virtual
	1312	+ * engine and then passed to the physical engine. We cannot allow
	1313	+ * the waiter to be submitted immediately to the physical engine
	1314	+ * as it may then bypass the virtual request.
	1315	+ */
	1316	+ if (to->engine == READ_ONCE(from->engine))
	1317	+ return i915_sw_fence_await_sw_fence_gfp(&to->submit,
	1318	+ &from->submit,
	1319	+ I915_FENCE_GFP);
	1320	+ else
	1321	+ return __i915_request_await_execution(to, from, NULL);
838	1322	}
839	1323
840	1324	static int
..	..	@@ -845,50 +1329,27 @@
845	1329	GEM_BUG_ON(to == from);
846	1330	GEM_BUG_ON(to->timeline == from->timeline);
847	1331
848		- if (i915_request_completed(from))
	1332	+ if (i915_request_completed(from)) {
	1333	+ i915_sw_fence_set_error_once(&to->submit, from->fence.error);
849	1334	return 0;
	1335	+ }
850	1336
851	1337	if (to->engine->schedule) {
852		- ret = i915_sched_node_add_dependency(to->i915,
853		- &to->sched,
854		- &from->sched);
	1338	+ ret = i915_sched_node_add_dependency(&to->sched,
	1339	+ &from->sched,
	1340	+ I915_DEPENDENCY_EXTERNAL);
855	1341	if (ret < 0)
856	1342	return ret;
857	1343	}
858	1344
859		- if (to->engine == from->engine) {
860		- ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
861		- &from->submit,
862		- I915_FENCE_GFP);
863		- return ret < 0 ? ret : 0;
864		- }
	1345	+ if (is_power_of_2(to->execution_mask \| READ_ONCE(from->execution_mask)))
	1346	+ ret = await_request_submit(to, from);
	1347	+ else
	1348	+ ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
	1349	+ if (ret < 0)
	1350	+ return ret;
865	1351
866		- if (to->engine->semaphore.sync_to) {
867		- u32 seqno;
868		-
869		- GEM_BUG_ON(!from->engine->semaphore.signal);
870		-
871		- seqno = i915_request_global_seqno(from);
872		- if (!seqno)
873		- goto await_dma_fence;
874		-
875		- if (seqno <= to->timeline->global_sync[from->engine->id])
876		- return 0;
877		-
878		- trace_i915_gem_ring_sync_to(to, from);
879		- ret = to->engine->semaphore.sync_to(to, from);
880		- if (ret)
881		- return ret;
882		-
883		- to->timeline->global_sync[from->engine->id] = seqno;
884		- return 0;
885		- }
886		-
887		-await_dma_fence:
888		- ret = i915_sw_fence_await_dma_fence(&to->submit,
889		- &from->fence, 0,
890		- I915_FENCE_GFP);
891		- return ret < 0 ? ret : 0;
	1352	+ return 0;
892	1353	}
893	1354
894	1355	int
..	..	@@ -928,22 +1389,22 @@
928	1389	continue;
929	1390
930	1391	/* Squash repeated waits to the same timelines */
931		- if (fence->context != rq->i915->mm.unordered_timeline &&
932		- i915_timeline_sync_is_later(rq->timeline, fence))
	1392	+ if (fence->context &&
	1393	+ intel_timeline_sync_is_later(i915_request_timeline(rq),
	1394	+ fence))
933	1395	continue;
934	1396
935	1397	if (dma_fence_is_i915(fence))
936	1398	ret = i915_request_await_request(rq, to_request(fence));
937	1399	else
938		- ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
939		- I915_FENCE_TIMEOUT,
940		- I915_FENCE_GFP);
	1400	+ ret = i915_request_await_external(rq, fence);
941	1401	if (ret < 0)
942	1402	return ret;
943	1403
944	1404	/* Record the latest fence used against each timeline */
945		- if (fence->context != rq->i915->mm.unordered_timeline)
946		- i915_timeline_sync_set(rq->timeline, fence);
	1405	+ if (fence->context)
	1406	+ intel_timeline_sync_set(i915_request_timeline(rq),
	1407	+ fence);
947	1408	} while (--nchild);
948	1409
949	1410	return 0;
..	..	@@ -981,7 +1442,7 @@
981	1442	struct dma_fence **shared;
982	1443	unsigned int count, i;
983	1444
984		- ret = reservation_object_get_fences_rcu(obj->resv,
	1445	+ ret = dma_resv_get_fences_rcu(obj->base.resv,
985	1446	&excl, &count, &shared);
986	1447	if (ret)
987	1448	return ret;
..	..	@@ -998,7 +1459,7 @@
998	1459	dma_fence_put(shared[i]);
999	1460	kfree(shared);
1000	1461	} else {
1001		- excl = reservation_object_get_excl_rcu(obj->resv);
	1462	+ excl = dma_resv_get_excl_rcu(obj->base.resv);
1002	1463	}
1003	1464
1004	1465	if (excl) {
..	..	@@ -1011,25 +1472,68 @@
1011	1472	return ret;
1012	1473	}
1013	1474
1014		-void i915_request_skip(struct i915_request *rq, int error)
	1475	+static struct i915_request *
	1476	+__i915_request_add_to_timeline(struct i915_request *rq)
1015	1477	{
1016		- void *vaddr = rq->ring->vaddr;
1017		- u32 head;
1018		-
1019		- GEM_BUG_ON(!IS_ERR_VALUE((long)error));
1020		- dma_fence_set_error(&rq->fence, error);
	1478	+ struct intel_timeline *timeline = i915_request_timeline(rq);
	1479	+ struct i915_request *prev;
1021	1480
1022	1481	/*
1023		- * As this request likely depends on state from the lost
1024		- * context, clear out all the user operations leaving the
1025		- * breadcrumb at the end (so we get the fence notifications).
	1482	+ * Dependency tracking and request ordering along the timeline
	1483	+ * is special cased so that we can eliminate redundant ordering
	1484	+ * operations while building the request (we know that the timeline
	1485	+ * itself is ordered, and here we guarantee it).
	1486	+ *
	1487	+ * As we know we will need to emit tracking along the timeline,
	1488	+ * we embed the hooks into our request struct -- at the cost of
	1489	+ * having to have specialised no-allocation interfaces (which will
	1490	+ * be beneficial elsewhere).
	1491	+ *
	1492	+ * A second benefit to open-coding i915_request_await_request is
	1493	+ * that we can apply a slight variant of the rules specialised
	1494	+ * for timelines that jump between engines (such as virtual engines).
	1495	+ * If we consider the case of virtual engine, we must emit a dma-fence
	1496	+ * to prevent scheduling of the second request until the first is
	1497	+ * complete (to maximise our greedy late load balancing) and this
	1498	+ * precludes optimising to use semaphores serialisation of a single
	1499	+ * timeline across engines.
1026	1500	*/
1027		- head = rq->infix;
1028		- if (rq->postfix < head) {
1029		- memset(vaddr + head, 0, rq->ring->size - head);
1030		- head = 0;
	1501	+ prev = to_request(__i915_active_fence_set(&timeline->last_request,
	1502	+ &rq->fence));
	1503	+ if (prev && !i915_request_completed(prev)) {
	1504	+ /*
	1505	+ * The requests are supposed to be kept in order. However,
	1506	+ * we need to be wary in case the timeline->last_request
	1507	+ * is used as a barrier for external modification to this
	1508	+ * context.
	1509	+ */
	1510	+ GEM_BUG_ON(prev->context == rq->context &&
	1511	+ i915_seqno_passed(prev->fence.seqno,
	1512	+ rq->fence.seqno));
	1513	+
	1514	+ if (is_power_of_2(READ_ONCE(prev->engine)->mask \| rq->engine->mask))
	1515	+ i915_sw_fence_await_sw_fence(&rq->submit,
	1516	+ &prev->submit,
	1517	+ &rq->submitq);
	1518	+ else
	1519	+ __i915_sw_fence_await_dma_fence(&rq->submit,
	1520	+ &prev->fence,
	1521	+ &rq->dmaq);
	1522	+ if (rq->engine->schedule)
	1523	+ __i915_sched_node_add_dependency(&rq->sched,
	1524	+ &prev->sched,
	1525	+ &rq->dep,
	1526	+ 0);
1031	1527	}
1032		- memset(vaddr + head, 0, rq->postfix - head);
	1528	+
	1529	+ /*
	1530	+ * Make sure that no request gazumped us - if it was allocated after
	1531	+ * our i915_request_alloc() and called __i915_request_add() before
	1532	+ * us, the timeline will hold its seqno which is later than ours.
	1533	+ */
	1534	+ GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
	1535	+
	1536	+ return prev;
1033	1537	}
1034	1538
1035	1539	/*
..	..	@@ -1037,34 +1541,22 @@
1037	1541	* request is not being tracked for completion but the work itself is
1038	1542	* going to happen on the hardware. This would be a Bad Thing(tm).
1039	1543	*/
1040		-void i915_request_add(struct i915_request *request)
	1544	+struct i915_request __i915_request_commit(struct i915_request rq)
1041	1545	{
1042		- struct intel_engine_cs *engine = request->engine;
1043		- struct i915_timeline *timeline = request->timeline;
1044		- struct intel_ring *ring = request->ring;
1045		- struct i915_request *prev;
	1546	+ struct intel_engine_cs *engine = rq->engine;
	1547	+ struct intel_ring *ring = rq->ring;
1046	1548	u32 *cs;
1047	1549
1048		- GEM_TRACE("%s fence %llx:%d\n",
1049		- engine->name, request->fence.context, request->fence.seqno);
1050		-
1051		- lockdep_assert_held(&request->i915->drm.struct_mutex);
1052		- trace_i915_request_add(request);
1053		-
1054		- /*
1055		- * Make sure that no request gazumped us - if it was allocated after
1056		- * our i915_request_alloc() and called __i915_request_add() before
1057		- * us, the timeline will hold its seqno which is later than ours.
1058		- */
1059		- GEM_BUG_ON(timeline->seqno != request->fence.seqno);
	1550	+ RQ_TRACE(rq, "\n");
1060	1551
1061	1552	/*
1062	1553	* To ensure that this call will not fail, space for its emissions
1063	1554	* should already have been reserved in the ring buffer. Let the ring
1064	1555	* know that it is time to use that space up.
1065	1556	*/
1066		- request->reserved_space = 0;
1067		- engine->emit_flush(request, EMIT_FLUSH);
	1557	+ GEM_BUG_ON(rq->reserved_space > ring->space);
	1558	+ rq->reserved_space = 0;
	1559	+ rq->emitted_jiffies = jiffies;
1068	1560
1069	1561	/*
1070	1562	* Record the position of the start of the breadcrumb so that
..	..	@@ -1072,43 +1564,16 @@
1072	1564	* GPU processing the request, we never over-estimate the
1073	1565	* position of the ring's HEAD.
1074	1566	*/
1075		- cs = intel_ring_begin(request, engine->emit_breadcrumb_sz);
	1567	+ cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
1076	1568	GEM_BUG_ON(IS_ERR(cs));
1077		- request->postfix = intel_ring_offset(request, cs);
	1569	+ rq->postfix = intel_ring_offset(rq, cs);
1078	1570
1079		- /*
1080		- * Seal the request and mark it as pending execution. Note that
1081		- * we may inspect this state, without holding any locks, during
1082		- * hangcheck. Hence we apply the barrier to ensure that we do not
1083		- * see a more recent value in the hws than we are tracking.
1084		- */
	1571	+ return __i915_request_add_to_timeline(rq);
	1572	+}
1085	1573
1086		- prev = i915_gem_active_raw(&timeline->last_request,
1087		- &request->i915->drm.struct_mutex);
1088		- if (prev && !i915_request_completed(prev)) {
1089		- i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
1090		- &request->submitq);
1091		- if (engine->schedule)
1092		- __i915_sched_node_add_dependency(&request->sched,
1093		- &prev->sched,
1094		- &request->dep,
1095		- 0);
1096		- }
1097		-
1098		- spin_lock_irq(&timeline->lock);
1099		- list_add_tail(&request->link, &timeline->requests);
1100		- spin_unlock_irq(&timeline->lock);
1101		-
1102		- GEM_BUG_ON(timeline->seqno != request->fence.seqno);
1103		- i915_gem_active_set(&timeline->last_request, request);
1104		-
1105		- list_add_tail(&request->ring_link, &ring->request_list);
1106		- if (list_is_first(&request->ring_link, &ring->request_list)) {
1107		- GEM_TRACE("marking %s as active\n", ring->timeline->name);
1108		- list_add(&ring->active_link, &request->i915->gt.active_rings);
1109		- }
1110		- request->emitted_jiffies = jiffies;
1111		-
	1574	+void __i915_request_queue(struct i915_request *rq,
	1575	+ const struct i915_sched_attr *attr)
	1576	+{
1112	1577	/*
1113	1578	* Let the backend know a new request has arrived that may need
1114	1579	* to adjust the existing execution schedule due to a high priority
..	..	@@ -1120,36 +1585,37 @@
1120	1585	* decide whether to preempt the entire chain so that it is ready to
1121	1586	* run at the earliest possible convenience.
1122	1587	*/
1123		- local_bh_disable();
1124		- rcu_read_lock(); /* RCU serialisation for set-wedged protection */
1125		- if (engine->schedule)
1126		- engine->schedule(request, &request->gem_context->sched);
1127		- rcu_read_unlock();
1128		- i915_sw_fence_commit(&request->submit);
1129		- local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
1130		-
1131		- /*
1132		- * In typical scenarios, we do not expect the previous request on
1133		- * the timeline to be still tracked by timeline->last_request if it
1134		- * has been completed. If the completed request is still here, that
1135		- * implies that request retirement is a long way behind submission,
1136		- * suggesting that we haven't been retiring frequently enough from
1137		- * the combination of retire-before-alloc, waiters and the background
1138		- * retirement worker. So if the last request on this timeline was
1139		- * already completed, do a catch up pass, flushing the retirement queue
1140		- * up to this client. Since we have now moved the heaviest operations
1141		- * during retirement onto secondary workers, such as freeing objects
1142		- * or contexts, retiring a bunch of requests is mostly list management
1143		- * (and cache misses), and so we should not be overly penalizing this
1144		- * client by performing excess work, though we may still performing
1145		- * work on behalf of others -- but instead we should benefit from
1146		- * improved resource management. (Well, that's the theory at least.)
1147		- */
1148		- if (prev && i915_request_completed(prev))
1149		- i915_request_retire_upto(prev);
	1588	+ if (attr && rq->engine->schedule)
	1589	+ rq->engine->schedule(rq, attr);
	1590	+ i915_sw_fence_commit(&rq->semaphore);
	1591	+ i915_sw_fence_commit(&rq->submit);
1150	1592	}
1151	1593
1152		-static unsigned long local_clock_us(unsigned int *cpu)
	1594	+void i915_request_add(struct i915_request *rq)
	1595	+{
	1596	+ struct intel_timeline * const tl = i915_request_timeline(rq);
	1597	+ struct i915_sched_attr attr = {};
	1598	+ struct i915_gem_context *ctx;
	1599	+
	1600	+ lockdep_assert_held(&tl->mutex);
	1601	+ lockdep_unpin_lock(&tl->mutex, rq->cookie);
	1602	+
	1603	+ trace_i915_request_add(rq);
	1604	+ __i915_request_commit(rq);
	1605	+
	1606	+ /* XXX placeholder for selftests */
	1607	+ rcu_read_lock();
	1608	+ ctx = rcu_dereference(rq->context->gem_context);
	1609	+ if (ctx)
	1610	+ attr = ctx->sched;
	1611	+ rcu_read_unlock();
	1612	+
	1613	+ __i915_request_queue(rq, &attr);
	1614	+
	1615	+ mutex_unlock(&tl->mutex);
	1616	+}
	1617	+
	1618	+static unsigned long local_clock_ns(unsigned int *cpu)
1153	1619	{
1154	1620	unsigned long t;
1155	1621
..	..	@@ -1166,7 +1632,7 @@
1166	1632	* stop busywaiting, see busywait_stop().
1167	1633	*/
1168	1634	*cpu = get_cpu();
1169		- t = local_clock() >> 10;
	1635	+ t = local_clock();
1170	1636	put_cpu();
1171	1637
1172	1638	return t;
..	..	@@ -1176,19 +1642,16 @@
1176	1642	{
1177	1643	unsigned int this_cpu;
1178	1644
1179		- if (time_after(local_clock_us(&this_cpu), timeout))
	1645	+ if (time_after(local_clock_ns(&this_cpu), timeout))
1180	1646	return true;
1181	1647
1182	1648	return this_cpu != cpu;
1183	1649	}
1184	1650
1185		-static bool __i915_spin_request(const struct i915_request *rq,
1186		- u32 seqno, int state, unsigned long timeout_us)
	1651	+static bool __i915_spin_request(struct i915_request * const rq, int state)
1187	1652	{
1188		- struct intel_engine_cs *engine = rq->engine;
1189		- unsigned int irq, cpu;
1190		-
1191		- GEM_BUG_ON(!seqno);
	1653	+ unsigned long timeout_ns;
	1654	+ unsigned int cpu;
1192	1655
1193	1656	/*
1194	1657	* Only wait for the request if we know it is likely to complete.
..	..	@@ -1196,12 +1659,12 @@
1196	1659	* We don't track the timestamps around requests, nor the average
1197	1660	* request length, so we do not have a good indicator that this
1198	1661	* request will complete within the timeout. What we do know is the
1199		- * order in which requests are executed by the engine and so we can
1200		- * tell if the request has started. If the request hasn't started yet,
1201		- * it is a fair assumption that it will not complete within our
1202		- * relatively short timeout.
	1662	+ * order in which requests are executed by the context and so we can
	1663	+ * tell if the request has been started. If the request is not even
	1664	+ * running yet, it is a fair assumption that it will not complete
	1665	+ * within our relatively short timeout.
1203	1666	*/
1204		- if (!i915_seqno_passed(intel_engine_get_seqno(engine), seqno - 1))
	1667	+ if (!i915_request_is_running(rq))
1205	1668	return false;
1206	1669
1207	1670	/*
..	..	@@ -1215,25 +1678,16 @@
1215	1678	* takes to sleep on a request, on the order of a microsecond.
1216	1679	*/
1217	1680
1218		- irq = READ_ONCE(engine->breadcrumbs.irq_count);
1219		- timeout_us += local_clock_us(&cpu);
	1681	+ timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
	1682	+ timeout_ns += local_clock_ns(&cpu);
1220	1683	do {
1221		- if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno))
1222		- return seqno == i915_request_global_seqno(rq);
1223		-
1224		- /*
1225		- * Seqno are meant to be ordered before the interrupt. If
1226		- * we see an interrupt without a corresponding seqno advance,
1227		- * assume we won't see one in the near future but require
1228		- * the engine->seqno_barrier() to fixup coherency.
1229		- */
1230		- if (READ_ONCE(engine->breadcrumbs.irq_count) != irq)
1231		- break;
	1684	+ if (dma_fence_is_signaled(&rq->fence))
	1685	+ return true;
1232	1686
1233	1687	if (signal_pending_state(state, current))
1234	1688	break;
1235	1689
1236		- if (busywait_stop(timeout_us, cpu))
	1690	+ if (busywait_stop(timeout_ns, cpu))
1237	1691	break;
1238	1692
1239	1693	cpu_relax();
..	..	@@ -1242,16 +1696,16 @@
1242	1696	return false;
1243	1697	}
1244	1698
1245		-static bool __i915_wait_request_check_and_reset(struct i915_request *request)
	1699	+struct request_wait {
	1700	+ struct dma_fence_cb cb;
	1701	+ struct task_struct *tsk;
	1702	+};
	1703	+
	1704	+static void request_wait_wake(struct dma_fence fence, struct dma_fence_cb cb)
1246	1705	{
1247		- struct i915_gpu_error *error = &request->i915->gpu_error;
	1706	+ struct request_wait wait = container_of(cb, typeof(wait), cb);
1248	1707
1249		- if (likely(!i915_reset_handoff(error)))
1250		- return false;
1251		-
1252		- __set_current_state(TASK_RUNNING);
1253		- i915_reset(request->i915, error->stalled_mask, error->reason);
1254		- return true;
	1708	+ wake_up_process(fetch_and_zero(&wait->tsk));
1255	1709	}
1256	1710
1257	1711	/**
..	..	@@ -1264,10 +1718,6 @@
1264	1718	* maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
1265	1719	* unbounded wait).
1266	1720	*
1267		- * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED
1268		- * in via the flags, and vice versa if the struct_mutex is not held, the caller
1269		- * must not specify that the wait is locked.
1270		- *
1271	1721	* Returns the remaining time (in jiffies) if the request completed, which may
1272	1722	* be zero or -ETIME if the request is unfinished after the timeout expires.
1273	1723	* May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
..	..	@@ -1279,20 +1729,12 @@
1279	1729	{
1280	1730	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
1281	1731	TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
1282		- wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue;
1283		- DEFINE_WAIT_FUNC(reset, default_wake_function);
1284		- DEFINE_WAIT_FUNC(exec, default_wake_function);
1285		- struct intel_wait wait;
	1732	+ struct request_wait wait;
1286	1733
1287	1734	might_sleep();
1288		-#if IS_ENABLED(CONFIG_LOCKDEP)
1289		- GEM_BUG_ON(debug_locks &&
1290		- !!lockdep_is_held(&rq->i915->drm.struct_mutex) !=
1291		- !!(flags & I915_WAIT_LOCKED));
1292		-#endif
1293	1735	GEM_BUG_ON(timeout < 0);
1294	1736
1295		- if (i915_request_completed(rq))
	1737	+ if (dma_fence_is_signaled(&rq->fence))
1296	1738	return timeout;
1297	1739
1298	1740	if (!timeout)
..	..	@@ -1300,55 +1742,84 @@
1300	1742
1301	1743	trace_i915_request_wait_begin(rq, flags);
1302	1744
1303		- add_wait_queue(&rq->execute, &exec);
1304		- if (flags & I915_WAIT_LOCKED)
1305		- add_wait_queue(errq, &reset);
	1745	+ /*
	1746	+ * We must never wait on the GPU while holding a lock as we
	1747	+ * may need to perform a GPU reset. So while we don't need to
	1748	+ * serialise wait/reset with an explicit lock, we do want
	1749	+ * lockdep to detect potential dependency cycles.
	1750	+ */
	1751	+ mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_);
1306	1752
1307		- intel_wait_init(&wait);
	1753	+ /*
	1754	+ * Optimistic spin before touching IRQs.
	1755	+ *
	1756	+ * We may use a rather large value here to offset the penalty of
	1757	+ * switching away from the active task. Frequently, the client will
	1758	+ * wait upon an old swapbuffer to throttle itself to remain within a
	1759	+ * frame of the gpu. If the client is running in lockstep with the gpu,
	1760	+ * then it should not be waiting long at all, and a sleep now will incur
	1761	+ * extra scheduler latency in producing the next frame. To try to
	1762	+ * avoid adding the cost of enabling/disabling the interrupt to the
	1763	+ * short wait, we first spin to see if the request would have completed
	1764	+ * in the time taken to setup the interrupt.
	1765	+ *
	1766	+ * We need upto 5us to enable the irq, and upto 20us to hide the
	1767	+ * scheduler latency of a context switch, ignoring the secondary
	1768	+ * impacts from a context switch such as cache eviction.
	1769	+ *
	1770	+ * The scheme used for low-latency IO is called "hybrid interrupt
	1771	+ * polling". The suggestion there is to sleep until just before you
	1772	+ * expect to be woken by the device interrupt and then poll for its
	1773	+ * completion. That requires having a good predictor for the request
	1774	+ * duration, which we currently lack.
	1775	+ */
	1776	+ if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) &&
	1777	+ __i915_spin_request(rq, state))
	1778	+ goto out;
1308	1779
1309		-restart:
1310		- do {
1311		- set_current_state(state);
1312		- if (intel_wait_update_request(&wait, rq))
1313		- break;
	1780	+ /*
	1781	+ * This client is about to stall waiting for the GPU. In many cases
	1782	+ * this is undesirable and limits the throughput of the system, as
	1783	+ * many clients cannot continue processing user input/output whilst
	1784	+ * blocked. RPS autotuning may take tens of milliseconds to respond
	1785	+ * to the GPU load and thus incurs additional latency for the client.
	1786	+ * We can circumvent that by promoting the GPU frequency to maximum
	1787	+ * before we sleep. This makes the GPU throttle up much more quickly
	1788	+ * (good for benchmarks and user experience, e.g. window animations),
	1789	+ * but at a cost of spending more power processing the workload
	1790	+ * (bad for battery).
	1791	+ */
	1792	+ if (flags & I915_WAIT_PRIORITY && !i915_request_started(rq))
	1793	+ intel_rps_boost(rq);
1314	1794
1315		- if (flags & I915_WAIT_LOCKED &&
1316		- __i915_wait_request_check_and_reset(rq))
1317		- continue;
	1795	+ wait.tsk = current;
	1796	+ if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
	1797	+ goto out;
1318	1798
1319		- if (signal_pending_state(state, current)) {
1320		- timeout = -ERESTARTSYS;
1321		- goto complete;
1322		- }
1323		-
1324		- if (!timeout) {
1325		- timeout = -ETIME;
1326		- goto complete;
1327		- }
1328		-
1329		- timeout = io_schedule_timeout(timeout);
1330		- } while (1);
1331		-
1332		- GEM_BUG_ON(!intel_wait_has_seqno(&wait));
1333		- GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
1334		-
1335		- /* Optimistic short spin before touching IRQs */
1336		- if (__i915_spin_request(rq, wait.seqno, state, 5))
1337		- goto complete;
1338		-
1339		- set_current_state(state);
1340		- if (intel_engine_add_wait(rq->engine, &wait))
1341		- /*
1342		- * In order to check that we haven't missed the interrupt
1343		- * as we enabled it, we need to kick ourselves to do a
1344		- * coherent check on the seqno before we sleep.
1345		- */
1346		- goto wakeup;
1347		-
1348		- if (flags & I915_WAIT_LOCKED)
1349		- __i915_wait_request_check_and_reset(rq);
	1799	+ /*
	1800	+ * Flush the submission tasklet, but only if it may help this request.
	1801	+ *
	1802	+ * We sometimes experience some latency between the HW interrupts and
	1803	+ * tasklet execution (mostly due to ksoftirqd latency, but it can also
	1804	+ * be due to lazy CS events), so lets run the tasklet manually if there
	1805	+ * is a chance it may submit this request. If the request is not ready
	1806	+ * to run, as it is waiting for other fences to be signaled, flushing
	1807	+ * the tasklet is busy work without any advantage for this client.
	1808	+ *
	1809	+ * If the HW is being lazy, this is the last chance before we go to
	1810	+ * sleep to catch any pending events. We will check periodically in
	1811	+ * the heartbeat to flush the submission tasklets as a last resort
	1812	+ * for unhappy HW.
	1813	+ */
	1814	+ if (i915_request_is_ready(rq))
	1815	+ intel_engine_flush_submission(rq->engine);
1350	1816
1351	1817	for (;;) {
	1818	+ set_current_state(state);
	1819	+
	1820	+ if (dma_fence_is_signaled(&rq->fence))
	1821	+ break;
	1822	+
1352	1823	if (signal_pending_state(state, current)) {
1353	1824	timeout = -ERESTARTSYS;
1354	1825	break;
..	..	@@ -1360,86 +1831,65 @@
1360	1831	}
1361	1832
1362	1833	timeout = io_schedule_timeout(timeout);
1363		-
1364		- if (intel_wait_complete(&wait) &&
1365		- intel_wait_check_request(&wait, rq))
1366		- break;
1367		-
1368		- set_current_state(state);
1369		-
1370		-wakeup:
1371		- /*
1372		- * Carefully check if the request is complete, giving time
1373		- * for the seqno to be visible following the interrupt.
1374		- * We also have to check in case we are kicked by the GPU
1375		- * reset in order to drop the struct_mutex.
1376		- */
1377		- if (__i915_request_irq_complete(rq))
1378		- break;
1379		-
1380		- /*
1381		- * If the GPU is hung, and we hold the lock, reset the GPU
1382		- * and then check for completion. On a full reset, the engine's
1383		- * HW seqno will be advanced passed us and we are complete.
1384		- * If we do a partial reset, we have to wait for the GPU to
1385		- * resume and update the breadcrumb.
1386		- *
1387		- * If we don't hold the mutex, we can just wait for the worker
1388		- * to come along and update the breadcrumb (either directly
1389		- * itself, or indirectly by recovering the GPU).
1390		- */
1391		- if (flags & I915_WAIT_LOCKED &&
1392		- __i915_wait_request_check_and_reset(rq))
1393		- continue;
1394		-
1395		- /* Only spin if we know the GPU is processing this request */
1396		- if (__i915_spin_request(rq, wait.seqno, state, 2))
1397		- break;
1398		-
1399		- if (!intel_wait_check_request(&wait, rq)) {
1400		- intel_engine_remove_wait(rq->engine, &wait);
1401		- goto restart;
1402		- }
1403	1834	}
1404		-
1405		- intel_engine_remove_wait(rq->engine, &wait);
1406		-complete:
1407	1835	__set_current_state(TASK_RUNNING);
1408		- if (flags & I915_WAIT_LOCKED)
1409		- remove_wait_queue(errq, &reset);
1410		- remove_wait_queue(&rq->execute, &exec);
	1836	+
	1837	+ if (READ_ONCE(wait.tsk))
	1838	+ dma_fence_remove_callback(&rq->fence, &wait.cb);
	1839	+ GEM_BUG_ON(!list_empty(&wait.cb.node));
	1840	+
	1841	+out:
	1842	+ mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
1411	1843	trace_i915_request_wait_end(rq);
1412		-
1413	1844	return timeout;
1414		-}
1415		-
1416		-static void ring_retire_requests(struct intel_ring *ring)
1417		-{
1418		- struct i915_request request, next;
1419		-
1420		- list_for_each_entry_safe(request, next,
1421		- &ring->request_list, ring_link) {
1422		- if (!i915_request_completed(request))
1423		- break;
1424		-
1425		- i915_request_retire(request);
1426		- }
1427		-}
1428		-
1429		-void i915_retire_requests(struct drm_i915_private *i915)
1430		-{
1431		- struct intel_ring ring, tmp;
1432		-
1433		- lockdep_assert_held(&i915->drm.struct_mutex);
1434		-
1435		- if (!i915->gt.active_requests)
1436		- return;
1437		-
1438		- list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link)
1439		- ring_retire_requests(ring);
1440	1845	}
1441	1846
1442	1847	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1443	1848	#include "selftests/mock_request.c"
1444	1849	#include "selftests/i915_request.c"
1445	1850	#endif
	1851	+
	1852	+static void i915_global_request_shrink(void)
	1853	+{
	1854	+ kmem_cache_shrink(global.slab_execute_cbs);
	1855	+ kmem_cache_shrink(global.slab_requests);
	1856	+}
	1857	+
	1858	+static void i915_global_request_exit(void)
	1859	+{
	1860	+ kmem_cache_destroy(global.slab_execute_cbs);
	1861	+ kmem_cache_destroy(global.slab_requests);
	1862	+}
	1863	+
	1864	+static struct i915_global_request global = { {
	1865	+ .shrink = i915_global_request_shrink,
	1866	+ .exit = i915_global_request_exit,
	1867	+} };
	1868	+
	1869	+int __init i915_global_request_init(void)
	1870	+{
	1871	+ global.slab_requests =
	1872	+ kmem_cache_create("i915_request",
	1873	+ sizeof(struct i915_request),
	1874	+ __alignof__(struct i915_request),
	1875	+ SLAB_HWCACHE_ALIGN \|
	1876	+ SLAB_RECLAIM_ACCOUNT \|
	1877	+ SLAB_TYPESAFE_BY_RCU,
	1878	+ __i915_request_ctor);
	1879	+ if (!global.slab_requests)
	1880	+ return -ENOMEM;
	1881	+
	1882	+ global.slab_execute_cbs = KMEM_CACHE(execute_cb,
	1883	+ SLAB_HWCACHE_ALIGN \|
	1884	+ SLAB_RECLAIM_ACCOUNT \|
	1885	+ SLAB_TYPESAFE_BY_RCU);
	1886	+ if (!global.slab_execute_cbs)
	1887	+ goto err_requests;
	1888	+
	1889	+ i915_global_register(&global.base);
	1890	+ return 0;
	1891	+
	1892	+err_requests:
	1893	+ kmem_cache_destroy(global.slab_requests);
	1894	+ return -ENOMEM;
	1895	+}