~hc/RK356X_SDK_RELEASE.git

..	..	@@ -22,21 +22,48 @@
22	22	*
23	23	*/
24	24
25		-#include <linux/prefetch.h>
26	25	#include <linux/dma-fence-array.h>
	26	+#include <linux/dma-fence-chain.h>
	27	+#include <linux/irq_work.h>
	28	+#include <linux/prefetch.h>
27	29	#include <linux/sched.h>
28	30	#include <linux/sched/clock.h>
29	31	#include <linux/sched/signal.h>
30	32
	33	+#include "gem/i915_gem_context.h"
	34	+#include "gt/intel_breadcrumbs.h"
	35	+#include "gt/intel_context.h"
	36	+#include "gt/intel_ring.h"
	37	+#include "gt/intel_rps.h"
	38	+
	39	+#include "i915_active.h"
31	40	#include "i915_drv.h"
	41	+#include "i915_globals.h"
	42	+#include "i915_trace.h"
	43	+#include "intel_pm.h"
	44	+
	45	+struct execute_cb {
	46	+ struct irq_work work;
	47	+ struct i915_sw_fence *fence;
	48	+ void (hook)(struct i915_request rq, struct dma_fence *signal);
	49	+ struct i915_request *signal;
	50	+};
	51	+
	52	+static struct i915_global_request {
	53	+ struct i915_global base;
	54	+ struct kmem_cache *slab_requests;
	55	+ struct kmem_cache *slab_execute_cbs;
	56	+} global;
32	57
33	58	static const char i915_fence_get_driver_name(struct dma_fence fence)
34	59	{
35		- return "i915";
	60	+ return dev_name(to_request(fence)->engine->i915->drm.dev);
36	61	}
37	62
38	63	static const char i915_fence_get_timeline_name(struct dma_fence fence)
39	64	{
	65	+ const struct i915_gem_context *ctx;
	66	+
40	67	/*
41	68	* The timeline struct (as part of the ppgtt underneath a context)
42	69	* may be freed when the request is no longer in use by the GPU.
..	..	@@ -49,7 +76,11 @@
49	76	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
50	77	return "signaled";
51	78
52		- return to_request(fence)->timeline->name;
	79	+ ctx = i915_request_gem_context(to_request(fence));
	80	+ if (!ctx)
	81	+ return "[" DRIVER_NAME "]";
	82	+
	83	+ return ctx->name;
53	84	}
54	85
55	86	static bool i915_fence_signaled(struct dma_fence *fence)
..	..	@@ -59,14 +90,21 @@
59	90
60	91	static bool i915_fence_enable_signaling(struct dma_fence *fence)
61	92	{
62		- return intel_engine_enable_signaling(to_request(fence), true);
	93	+ return i915_request_enable_breadcrumb(to_request(fence));
63	94	}
64	95
65	96	static signed long i915_fence_wait(struct dma_fence *fence,
66	97	bool interruptible,
67	98	signed long timeout)
68	99	{
69		- return i915_request_wait(to_request(fence), interruptible, timeout);
	100	+ return i915_request_wait(to_request(fence),
	101	+ interruptible \| I915_WAIT_PRIORITY,
	102	+ timeout);
	103	+}
	104	+
	105	+struct kmem_cache *i915_request_slab_cache(void)
	106	+{
	107	+ return global.slab_requests;
70	108	}
71	109
72	110	static void i915_fence_release(struct dma_fence *fence)
..	..	@@ -81,8 +119,44 @@
81	119	* caught trying to reuse dead objects.
82	120	*/
83	121	i915_sw_fence_fini(&rq->submit);
	122	+ i915_sw_fence_fini(&rq->semaphore);
84	123
85		- kmem_cache_free(rq->i915->requests, rq);
	124	+ /*
	125	+ * Keep one request on each engine for reserved use under mempressure
	126	+ *
	127	+ * We do not hold a reference to the engine here and so have to be
	128	+ * very careful in what rq->engine we poke. The virtual engine is
	129	+ * referenced via the rq->context and we released that ref during
	130	+ * i915_request_retire(), ergo we must not dereference a virtual
	131	+ * engine here. Not that we would want to, as the only consumer of
	132	+ * the reserved engine->request_pool is the power management parking,
	133	+ * which must-not-fail, and that is only run on the physical engines.
	134	+ *
	135	+ * Since the request must have been executed to be have completed,
	136	+ * we know that it will have been processed by the HW and will
	137	+ * not be unsubmitted again, so rq->engine and rq->execution_mask
	138	+ * at this point is stable. rq->execution_mask will be a single
	139	+ * bit if the last and _only_ engine it could execution on was a
	140	+ * physical engine, if it's multiple bits then it started on and
	141	+ * could still be on a virtual engine. Thus if the mask is not a
	142	+ * power-of-two we assume that rq->engine may still be a virtual
	143	+ * engine and so a dangling invalid pointer that we cannot dereference
	144	+ *
	145	+ * For example, consider the flow of a bonded request through a virtual
	146	+ * engine. The request is created with a wide engine mask (all engines
	147	+ * that we might execute on). On processing the bond, the request mask
	148	+ * is reduced to one or more engines. If the request is subsequently
	149	+ * bound to a single engine, it will then be constrained to only
	150	+ * execute on that engine and never returned to the virtual engine
	151	+ * after timeslicing away, see __unwind_incomplete_requests(). Thus we
	152	+ * know that if the rq->execution_mask is a single bit, rq->engine
	153	+ * can be a physical engine with the exact corresponding mask.
	154	+ */
	155	+ if (is_power_of_2(rq->execution_mask) &&
	156	+ !cmpxchg(&rq->engine->request_pool, NULL, rq))
	157	+ return;
	158	+
	159	+ kmem_cache_free(global.slab_requests, rq);
86	160	}
87	161
88	162	const struct dma_fence_ops i915_fence_ops = {
..	..	@@ -94,213 +168,121 @@
94	168	.release = i915_fence_release,
95	169	};
96	170
97		-static inline void
98		-i915_request_remove_from_client(struct i915_request *request)
	171	+static void irq_execute_cb(struct irq_work *wrk)
99	172	{
100		- struct drm_i915_file_private *file_priv;
	173	+ struct execute_cb cb = container_of(wrk, typeof(cb), work);
101	174
102		- file_priv = request->file_priv;
103		- if (!file_priv)
	175	+ i915_sw_fence_complete(cb->fence);
	176	+ kmem_cache_free(global.slab_execute_cbs, cb);
	177	+}
	178	+
	179	+static void irq_execute_cb_hook(struct irq_work *wrk)
	180	+{
	181	+ struct execute_cb cb = container_of(wrk, typeof(cb), work);
	182	+
	183	+ cb->hook(container_of(cb->fence, struct i915_request, submit),
	184	+ &cb->signal->fence);
	185	+ i915_request_put(cb->signal);
	186	+
	187	+ irq_execute_cb(wrk);
	188	+}
	189	+
	190	+static __always_inline void
	191	+__notify_execute_cb(struct i915_request rq, bool (fn)(struct irq_work *wrk))
	192	+{
	193	+ struct execute_cb cb, cn;
	194	+
	195	+ if (llist_empty(&rq->execute_cb))
104	196	return;
105	197
106		- spin_lock(&file_priv->mm.lock);
107		- if (request->file_priv) {
108		- list_del(&request->client_link);
109		- request->file_priv = NULL;
	198	+ llist_for_each_entry_safe(cb, cn,
	199	+ llist_del_all(&rq->execute_cb),
	200	+ work.llnode)
	201	+ fn(&cb->work);
	202	+}
	203	+
	204	+static void __notify_execute_cb_irq(struct i915_request *rq)
	205	+{
	206	+ __notify_execute_cb(rq, irq_work_queue);
	207	+}
	208	+
	209	+static bool irq_work_imm(struct irq_work *wrk)
	210	+{
	211	+ wrk->func(wrk);
	212	+ return false;
	213	+}
	214	+
	215	+static void __notify_execute_cb_imm(struct i915_request *rq)
	216	+{
	217	+ __notify_execute_cb(rq, irq_work_imm);
	218	+}
	219	+
	220	+static void free_capture_list(struct i915_request *request)
	221	+{
	222	+ struct i915_capture_list *capture;
	223	+
	224	+ capture = fetch_and_zero(&request->capture_list);
	225	+ while (capture) {
	226	+ struct i915_capture_list *next = capture->next;
	227	+
	228	+ kfree(capture);
	229	+ capture = next;
110	230	}
111		- spin_unlock(&file_priv->mm.lock);
112	231	}
113	232
114		-static struct i915_dependency *
115		-i915_dependency_alloc(struct drm_i915_private *i915)
	233	+static void __i915_request_fill(struct i915_request *rq, u8 val)
116	234	{
117		- return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
	235	+ void *vaddr = rq->ring->vaddr;
	236	+ u32 head;
	237	+
	238	+ head = rq->infix;
	239	+ if (rq->postfix < head) {
	240	+ memset(vaddr + head, val, rq->ring->size - head);
	241	+ head = 0;
	242	+ }
	243	+ memset(vaddr + head, val, rq->postfix - head);
118	244	}
119	245
120		-static void
121		-i915_dependency_free(struct drm_i915_private *i915,
122		- struct i915_dependency *dep)
	246	+static void remove_from_engine(struct i915_request *rq)
123	247	{
124		- kmem_cache_free(i915->dependencies, dep);
125		-}
126		-
127		-static void
128		-__i915_sched_node_add_dependency(struct i915_sched_node *node,
129		- struct i915_sched_node *signal,
130		- struct i915_dependency *dep,
131		- unsigned long flags)
132		-{
133		- INIT_LIST_HEAD(&dep->dfs_link);
134		- list_add(&dep->wait_link, &signal->waiters_list);
135		- list_add(&dep->signal_link, &node->signalers_list);
136		- dep->signaler = signal;
137		- dep->flags = flags;
138		-}
139		-
140		-static int
141		-i915_sched_node_add_dependency(struct drm_i915_private *i915,
142		- struct i915_sched_node *node,
143		- struct i915_sched_node *signal)
144		-{
145		- struct i915_dependency *dep;
146		-
147		- dep = i915_dependency_alloc(i915);
148		- if (!dep)
149		- return -ENOMEM;
150		-
151		- __i915_sched_node_add_dependency(node, signal, dep,
152		- I915_DEPENDENCY_ALLOC);
153		- return 0;
154		-}
155		-
156		-static void
157		-i915_sched_node_fini(struct drm_i915_private *i915,
158		- struct i915_sched_node *node)
159		-{
160		- struct i915_dependency dep, tmp;
161		-
162		- GEM_BUG_ON(!list_empty(&node->link));
	248	+ struct intel_engine_cs engine, locked;
163	249
164	250	/*
165		- * Everyone we depended upon (the fences we wait to be signaled)
166		- * should retire before us and remove themselves from our list.
167		- * However, retirement is run independently on each timeline and
168		- * so we may be called out-of-order.
	251	+ * Virtual engines complicate acquiring the engine timeline lock,
	252	+ * as their rq->engine pointer is not stable until under that
	253	+ * engine lock. The simple ploy we use is to take the lock then
	254	+ * check that the rq still belongs to the newly locked engine.
169	255	*/
170		- list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
171		- GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler));
172		- GEM_BUG_ON(!list_empty(&dep->dfs_link));
173		-
174		- list_del(&dep->wait_link);
175		- if (dep->flags & I915_DEPENDENCY_ALLOC)
176		- i915_dependency_free(i915, dep);
	256	+ locked = READ_ONCE(rq->engine);
	257	+ spin_lock_irq(&locked->active.lock);
	258	+ while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
	259	+ spin_unlock(&locked->active.lock);
	260	+ spin_lock(&engine->active.lock);
	261	+ locked = engine;
177	262	}
	263	+ list_del_init(&rq->sched.link);
178	264
179		- /* Remove ourselves from everyone who depends upon us */
180		- list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
181		- GEM_BUG_ON(dep->signaler != node);
182		- GEM_BUG_ON(!list_empty(&dep->dfs_link));
	265	+ clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
	266	+ clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
183	267
184		- list_del(&dep->signal_link);
185		- if (dep->flags & I915_DEPENDENCY_ALLOC)
186		- i915_dependency_free(i915, dep);
187		- }
	268	+ /* Prevent further __await_execution() registering a cb, then flush */
	269	+ set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
	270	+
	271	+ spin_unlock_irq(&locked->active.lock);
	272	+
	273	+ __notify_execute_cb_imm(rq);
188	274	}
189	275
190		-static void
191		-i915_sched_node_init(struct i915_sched_node *node)
	276	+bool i915_request_retire(struct i915_request *rq)
192	277	{
193		- INIT_LIST_HEAD(&node->signalers_list);
194		- INIT_LIST_HEAD(&node->waiters_list);
195		- INIT_LIST_HEAD(&node->link);
196		- node->attr.priority = I915_PRIORITY_INVALID;
197		-}
	278	+ if (!i915_request_completed(rq))
	279	+ return false;
198	280
199		-static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
200		-{
201		- struct intel_engine_cs *engine;
202		- struct i915_timeline *timeline;
203		- enum intel_engine_id id;
204		- int ret;
	281	+ RQ_TRACE(rq, "\n");
205	282
206		- /* Carefully retire all requests without writing to the rings */
207		- ret = i915_gem_wait_for_idle(i915,
208		- I915_WAIT_INTERRUPTIBLE \|
209		- I915_WAIT_LOCKED,
210		- MAX_SCHEDULE_TIMEOUT);
211		- if (ret)
212		- return ret;
213		-
214		- GEM_BUG_ON(i915->gt.active_requests);
215		-
216		- /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
217		- for_each_engine(engine, i915, id) {
218		- GEM_TRACE("%s seqno %d (current %d) -> %d\n",
219		- engine->name,
220		- engine->timeline.seqno,
221		- intel_engine_get_seqno(engine),
222		- seqno);
223		-
224		- if (!i915_seqno_passed(seqno, engine->timeline.seqno)) {
225		- /* Flush any waiters before we reuse the seqno */
226		- intel_engine_disarm_breadcrumbs(engine);
227		- intel_engine_init_hangcheck(engine);
228		- GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals));
229		- }
230		-
231		- /* Check we are idle before we fiddle with hw state! */
232		- GEM_BUG_ON(!intel_engine_is_idle(engine));
233		- GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request));
234		-
235		- /* Finally reset hw state */
236		- intel_engine_init_global_seqno(engine, seqno);
237		- engine->timeline.seqno = seqno;
238		- }
239		-
240		- list_for_each_entry(timeline, &i915->gt.timelines, link)
241		- memset(timeline->global_sync, 0, sizeof(timeline->global_sync));
242		-
243		- i915->gt.request_serial = seqno;
244		-
245		- return 0;
246		-}
247		-
248		-int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
249		-{
250		- struct drm_i915_private *i915 = to_i915(dev);
251		-
252		- lockdep_assert_held(&i915->drm.struct_mutex);
253		-
254		- if (seqno == 0)
255		- return -EINVAL;
256		-
257		- /* HWS page needs to be set less than what we will inject to ring */
258		- return reset_all_global_seqno(i915, seqno - 1);
259		-}
260		-
261		-static int reserve_gt(struct drm_i915_private *i915)
262		-{
263		- int ret;
264		-
265		- /*
266		- * Reservation is fine until we may need to wrap around
267		- *
268		- * By incrementing the serial for every request, we know that no
269		- * individual engine may exceed that serial (as each is reset to 0
270		- * on any wrap). This protects even the most pessimistic of migrations
271		- * of every request from all engines onto just one.
272		- */
273		- while (unlikely(++i915->gt.request_serial == 0)) {
274		- ret = reset_all_global_seqno(i915, 0);
275		- if (ret) {
276		- i915->gt.request_serial--;
277		- return ret;
278		- }
279		- }
280		-
281		- if (!i915->gt.active_requests++)
282		- i915_gem_unpark(i915);
283		-
284		- return 0;
285		-}
286		-
287		-static void unreserve_gt(struct drm_i915_private *i915)
288		-{
289		- GEM_BUG_ON(!i915->gt.active_requests);
290		- if (!--i915->gt.active_requests)
291		- i915_gem_park(i915);
292		-}
293		-
294		-void i915_gem_retire_noop(struct i915_gem_active *active,
295		- struct i915_request *request)
296		-{
297		- /* Space left intentionally blank */
298		-}
299		-
300		-static void advance_ring(struct i915_request *request)
301		-{
302		- struct intel_ring *ring = request->ring;
303		- unsigned int tail;
	283	+ GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
	284	+ trace_i915_request_retire(rq);
	285	+ i915_request_mark_complete(rq);
304	286
305	287	/*
306	288	* We know the GPU must have read the request to have
..	..	@@ -311,240 +293,315 @@
311	293	* Note this requires that we are always called in request
312	294	* completion order.
313	295	*/
314		- GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
315		- if (list_is_last(&request->ring_link, &ring->request_list)) {
316		- /*
317		- * We may race here with execlists resubmitting this request
318		- * as we retire it. The resubmission will move the ring->tail
319		- * forwards (to request->wa_tail). We either read the
320		- * current value that was written to hw, or the value that
321		- * is just about to be. Either works, if we miss the last two
322		- * noops - they are safe to be replayed on a reset.
323		- */
324		- GEM_TRACE("marking %s as inactive\n", ring->timeline->name);
325		- tail = READ_ONCE(request->tail);
326		- list_del(&ring->active_link);
327		- } else {
328		- tail = request->postfix;
329		- }
330		- list_del_init(&request->ring_link);
	296	+ GEM_BUG_ON(!list_is_first(&rq->link,
	297	+ &i915_request_timeline(rq)->requests));
	298	+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
	299	+ /* Poison before we release our space in the ring */
	300	+ __i915_request_fill(rq, POISON_FREE);
	301	+ rq->ring->head = rq->postfix;
331	302
332		- ring->head = tail;
333		-}
334		-
335		-static void free_capture_list(struct i915_request *request)
336		-{
337		- struct i915_capture_list *capture;
338		-
339		- capture = request->capture_list;
340		- while (capture) {
341		- struct i915_capture_list *next = capture->next;
342		-
343		- kfree(capture);
344		- capture = next;
345		- }
346		-}
347		-
348		-static void __retire_engine_request(struct intel_engine_cs *engine,
349		- struct i915_request *rq)
350		-{
351		- GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n",
352		- __func__, engine->name,
353		- rq->fence.context, rq->fence.seqno,
354		- rq->global_seqno,
355		- intel_engine_get_seqno(engine));
356		-
357		- GEM_BUG_ON(!i915_request_completed(rq));
358		-
359		- local_irq_disable();
360		-
361		- spin_lock(&engine->timeline.lock);
362		- GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests));
363		- list_del_init(&rq->link);
364		- spin_unlock(&engine->timeline.lock);
365		-
366		- spin_lock(&rq->lock);
367		- if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
	303	+ if (!i915_request_signaled(rq)) {
	304	+ spin_lock_irq(&rq->lock);
368	305	dma_fence_signal_locked(&rq->fence);
369		- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
370		- intel_engine_cancel_signaling(rq);
371		- if (rq->waitboost) {
372		- GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
373		- atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
	306	+ spin_unlock_irq(&rq->lock);
374	307	}
375		- spin_unlock(&rq->lock);
376	308
377		- local_irq_enable();
	309	+ if (i915_request_has_waitboost(rq)) {
	310	+ GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
	311	+ atomic_dec(&rq->engine->gt->rps.num_waiters);
	312	+ }
378	313
379	314	/*
380		- * The backing object for the context is done after switching to the
381		- * next context. Therefore we cannot retire the previous context until
382		- * the next context has already started running. However, since we
383		- * cannot take the required locks at i915_request_submit() we
384		- * defer the unpinning of the active context to now, retirement of
385		- * the subsequent request.
386		- */
387		- if (engine->last_retired_context)
388		- intel_context_unpin(engine->last_retired_context);
389		- engine->last_retired_context = rq->hw_context;
390		-}
391		-
392		-static void __retire_engine_upto(struct intel_engine_cs *engine,
393		- struct i915_request *rq)
394		-{
395		- struct i915_request *tmp;
396		-
397		- if (list_empty(&rq->link))
398		- return;
399		-
400		- do {
401		- tmp = list_first_entry(&engine->timeline.requests,
402		- typeof(*tmp), link);
403		-
404		- GEM_BUG_ON(tmp->engine != engine);
405		- __retire_engine_request(engine, tmp);
406		- } while (tmp != rq);
407		-}
408		-
409		-static void i915_request_retire(struct i915_request *request)
410		-{
411		- struct i915_gem_active active, next;
412		-
413		- GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n",
414		- request->engine->name,
415		- request->fence.context, request->fence.seqno,
416		- request->global_seqno,
417		- intel_engine_get_seqno(request->engine));
418		-
419		- lockdep_assert_held(&request->i915->drm.struct_mutex);
420		- GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
421		- GEM_BUG_ON(!i915_request_completed(request));
422		-
423		- trace_i915_request_retire(request);
424		-
425		- advance_ring(request);
426		- free_capture_list(request);
427		-
428		- /*
429		- * Walk through the active list, calling retire on each. This allows
430		- * objects to track their GPU activity and mark themselves as idle
431		- * when their last active request is completed (updating state
432		- * tracking lists for eviction, active references for GEM, etc).
	315	+ * We only loosely track inflight requests across preemption,
	316	+ * and so we may find ourselves attempting to retire a _completed_
	317	+ * request that we have removed from the HW and put back on a run
	318	+ * queue.
433	319	*
434		- * As the ->retire() may free the node, we decouple it first and
435		- * pass along the auxiliary information (to avoid dereferencing
436		- * the node after the callback).
	320	+ * As we set I915_FENCE_FLAG_ACTIVE on the request, this should be
	321	+ * after removing the breadcrumb and signaling it, so that we do not
	322	+ * inadvertently attach the breadcrumb to a completed request.
437	323	*/
438		- list_for_each_entry_safe(active, next, &request->active_list, link) {
439		- /*
440		- * In microbenchmarks or focusing upon time inside the kernel,
441		- * we may spend an inordinate amount of time simply handling
442		- * the retirement of requests and processing their callbacks.
443		- * Of which, this loop itself is particularly hot due to the
444		- * cache misses when jumping around the list of i915_gem_active.
445		- * So we try to keep this loop as streamlined as possible and
446		- * also prefetch the next i915_gem_active to try and hide
447		- * the likely cache miss.
448		- */
449		- prefetchw(next);
	324	+ remove_from_engine(rq);
	325	+ GEM_BUG_ON(!llist_empty(&rq->execute_cb));
450	326
451		- INIT_LIST_HEAD(&active->link);
452		- RCU_INIT_POINTER(active->request, NULL);
	327	+ __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
453	328
454		- active->retire(active, request);
455		- }
	329	+ intel_context_exit(rq->context);
	330	+ intel_context_unpin(rq->context);
456	331
457		- i915_request_remove_from_client(request);
	332	+ free_capture_list(rq);
	333	+ i915_sched_node_fini(&rq->sched);
	334	+ i915_request_put(rq);
458	335
459		- /* Retirement decays the ban score as it is a sign of ctx progress */
460		- atomic_dec_if_positive(&request->gem_context->ban_score);
461		- intel_context_unpin(request->hw_context);
462		-
463		- __retire_engine_upto(request->engine, request);
464		-
465		- unreserve_gt(request->i915);
466		-
467		- i915_sched_node_fini(request->i915, &request->sched);
468		- i915_request_put(request);
	336	+ return true;
469	337	}
470	338
471	339	void i915_request_retire_upto(struct i915_request *rq)
472	340	{
473		- struct intel_ring *ring = rq->ring;
	341	+ struct intel_timeline * const tl = i915_request_timeline(rq);
474	342	struct i915_request *tmp;
475	343
476		- GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n",
477		- rq->engine->name,
478		- rq->fence.context, rq->fence.seqno,
479		- rq->global_seqno,
480		- intel_engine_get_seqno(rq->engine));
	344	+ RQ_TRACE(rq, "\n");
481	345
482		- lockdep_assert_held(&rq->i915->drm.struct_mutex);
483	346	GEM_BUG_ON(!i915_request_completed(rq));
484	347
485		- if (list_empty(&rq->ring_link))
	348	+ do {
	349	+ tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
	350	+ } while (i915_request_retire(tmp) && tmp != rq);
	351	+}
	352	+
	353	+static struct i915_request * const *
	354	+__engine_active(struct intel_engine_cs *engine)
	355	+{
	356	+ return READ_ONCE(engine->execlists.active);
	357	+}
	358	+
	359	+static bool __request_in_flight(const struct i915_request *signal)
	360	+{
	361	+ struct i915_request * const port, rq;
	362	+ bool inflight = false;
	363	+
	364	+ if (!i915_request_is_ready(signal))
	365	+ return false;
	366	+
	367	+ /*
	368	+ * Even if we have unwound the request, it may still be on
	369	+ * the GPU (preempt-to-busy). If that request is inside an
	370	+ * unpreemptible critical section, it will not be removed. Some
	371	+ * GPU functions may even be stuck waiting for the paired request
	372	+ * (__await_execution) to be submitted and cannot be preempted
	373	+ * until the bond is executing.
	374	+ *
	375	+ * As we know that there are always preemption points between
	376	+ * requests, we know that only the currently executing request
	377	+ * may be still active even though we have cleared the flag.
	378	+ * However, we can't rely on our tracking of ELSP[0] to know
	379	+ * which request is currently active and so maybe stuck, as
	380	+ * the tracking maybe an event behind. Instead assume that
	381	+ * if the context is still inflight, then it is still active
	382	+ * even if the active flag has been cleared.
	383	+ *
	384	+ * To further complicate matters, if there a pending promotion, the HW
	385	+ * may either perform a context switch to the second inflight execlists,
	386	+ * or it may switch to the pending set of execlists. In the case of the
	387	+ * latter, it may send the ACK and we process the event copying the
	388	+ * pending[] over top of inflight[], _overwriting_ our *active. Since
	389	+ * this implies the HW is arbitrating and not struck in *active, we do
	390	+ * not worry about complete accuracy, but we do require no read/write
	391	+ * tearing of the pointer [the read of the pointer must be valid, even
	392	+ * as the array is being overwritten, for which we require the writes
	393	+ * to avoid tearing.]
	394	+ *
	395	+ * Note that the read of *execlists->active may race with the promotion
	396	+ * of execlists->pending[] to execlists->inflight[], overwritting
	397	+ * the value at *execlists->active. This is fine. The promotion implies
	398	+ * that we received an ACK from the HW, and so the context is not
	399	+ * stuck -- if we do not see ourselves in *active, the inflight status
	400	+ * is valid. If instead we see ourselves being copied into *active,
	401	+ * we are inflight and may signal the callback.
	402	+ */
	403	+ if (!intel_context_inflight(signal->context))
	404	+ return false;
	405	+
	406	+ rcu_read_lock();
	407	+ for (port = __engine_active(signal->engine);
	408	+ (rq = READ_ONCE(port)); / may race with promotion of pending[] */
	409	+ port++) {
	410	+ if (rq->context == signal->context) {
	411	+ inflight = i915_seqno_passed(rq->fence.seqno,
	412	+ signal->fence.seqno);
	413	+ break;
	414	+ }
	415	+ }
	416	+ rcu_read_unlock();
	417	+
	418	+ return inflight;
	419	+}
	420	+
	421	+static int
	422	+__await_execution(struct i915_request *rq,
	423	+ struct i915_request *signal,
	424	+ void (hook)(struct i915_request rq,
	425	+ struct dma_fence *signal),
	426	+ gfp_t gfp)
	427	+{
	428	+ struct execute_cb *cb;
	429	+
	430	+ if (i915_request_is_active(signal)) {
	431	+ if (hook)
	432	+ hook(rq, &signal->fence);
	433	+ return 0;
	434	+ }
	435	+
	436	+ cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
	437	+ if (!cb)
	438	+ return -ENOMEM;
	439	+
	440	+ cb->fence = &rq->submit;
	441	+ i915_sw_fence_await(cb->fence);
	442	+ init_irq_work(&cb->work, irq_execute_cb);
	443	+
	444	+ if (hook) {
	445	+ cb->hook = hook;
	446	+ cb->signal = i915_request_get(signal);
	447	+ cb->work.func = irq_execute_cb_hook;
	448	+ }
	449	+
	450	+ /*
	451	+ * Register the callback first, then see if the signaler is already
	452	+ * active. This ensures that if we race with the
	453	+ * __notify_execute_cb from i915_request_submit() and we are not
	454	+ * included in that list, we get a second bite of the cherry and
	455	+ * execute it ourselves. After this point, a future
	456	+ * i915_request_submit() will notify us.
	457	+ *
	458	+ * In i915_request_retire() we set the ACTIVE bit on a completed
	459	+ * request (then flush the execute_cb). So by registering the
	460	+ * callback first, then checking the ACTIVE bit, we serialise with
	461	+ * the completed/retired request.
	462	+ */
	463	+ if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
	464	+ if (i915_request_is_active(signal) \|\|
	465	+ __request_in_flight(signal))
	466	+ __notify_execute_cb_imm(signal);
	467	+ }
	468	+
	469	+ return 0;
	470	+}
	471	+
	472	+static bool fatal_error(int error)
	473	+{
	474	+ switch (error) {
	475	+ case 0: /* not an error! */
	476	+ case -EAGAIN: /* innocent victim of a GT reset (__i915_request_reset) */
	477	+ case -ETIMEDOUT: /* waiting for Godot (timer_i915_sw_fence_wake) */
	478	+ return false;
	479	+ default:
	480	+ return true;
	481	+ }
	482	+}
	483	+
	484	+void __i915_request_skip(struct i915_request *rq)
	485	+{
	486	+ GEM_BUG_ON(!fatal_error(rq->fence.error));
	487	+
	488	+ if (rq->infix == rq->postfix)
486	489	return;
487	490
	491	+ /*
	492	+ * As this request likely depends on state from the lost
	493	+ * context, clear out all the user operations leaving the
	494	+ * breadcrumb at the end (so we get the fence notifications).
	495	+ */
	496	+ __i915_request_fill(rq, 0);
	497	+ rq->infix = rq->postfix;
	498	+}
	499	+
	500	+void i915_request_set_error_once(struct i915_request *rq, int error)
	501	+{
	502	+ int old;
	503	+
	504	+ GEM_BUG_ON(!IS_ERR_VALUE((long)error));
	505	+
	506	+ if (i915_request_signaled(rq))
	507	+ return;
	508	+
	509	+ old = READ_ONCE(rq->fence.error);
488	510	do {
489		- tmp = list_first_entry(&ring->request_list,
490		- typeof(*tmp), ring_link);
491		-
492		- i915_request_retire(tmp);
493		- } while (tmp != rq);
	511	+ if (fatal_error(old))
	512	+ return;
	513	+ } while (!try_cmpxchg(&rq->fence.error, &old, error));
494	514	}
495	515
496		-static u32 timeline_get_seqno(struct i915_timeline *tl)
497		-{
498		- return ++tl->seqno;
499		-}
500		-
501		-static void move_to_timeline(struct i915_request *request,
502		- struct i915_timeline *timeline)
503		-{
504		- GEM_BUG_ON(request->timeline == &request->engine->timeline);
505		- lockdep_assert_held(&request->engine->timeline.lock);
506		-
507		- spin_lock(&request->timeline->lock);
508		- list_move_tail(&request->link, &timeline->requests);
509		- spin_unlock(&request->timeline->lock);
510		-}
511		-
512		-void __i915_request_submit(struct i915_request *request)
	516	+bool __i915_request_submit(struct i915_request *request)
513	517	{
514	518	struct intel_engine_cs *engine = request->engine;
515		- u32 seqno;
	519	+ bool result = false;
516	520
517		- GEM_TRACE("%s fence %llx:%d -> global=%d, current %d\n",
518		- engine->name,
519		- request->fence.context, request->fence.seqno,
520		- engine->timeline.seqno + 1,
521		- intel_engine_get_seqno(engine));
	521	+ RQ_TRACE(request, "\n");
522	522
523	523	GEM_BUG_ON(!irqs_disabled());
524		- lockdep_assert_held(&engine->timeline.lock);
	524	+ lockdep_assert_held(&engine->active.lock);
525	525
526		- GEM_BUG_ON(request->global_seqno);
	526	+ /*
	527	+ * With the advent of preempt-to-busy, we frequently encounter
	528	+ * requests that we have unsubmitted from HW, but left running
	529	+ * until the next ack and so have completed in the meantime. On
	530	+ * resubmission of that completed request, we can skip
	531	+ * updating the payload, and execlists can even skip submitting
	532	+ * the request.
	533	+ *
	534	+ * We must remove the request from the caller's priority queue,
	535	+ * and the caller must only call us when the request is in their
	536	+ * priority queue, under the active.lock. This ensures that the
	537	+ * request has not yet been retired and we can safely move
	538	+ * the request into the engine->active.list where it will be
	539	+ * dropped upon retiring. (Otherwise if resubmit a retired
	540	+ * request, this would be a horrible use-after-free.)
	541	+ */
	542	+ if (i915_request_completed(request))
	543	+ goto xfer;
527	544
528		- seqno = timeline_get_seqno(&engine->timeline);
529		- GEM_BUG_ON(!seqno);
530		- GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
	545	+ if (unlikely(intel_context_is_closed(request->context) &&
	546	+ !intel_engine_has_heartbeat(engine)))
	547	+ intel_context_set_banned(request->context);
531	548
532		- /* We may be recursing from the signal callback of another i915 fence */
533		- spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
534		- request->global_seqno = seqno;
535		- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
536		- intel_engine_enable_signaling(request, false);
537		- spin_unlock(&request->lock);
	549	+ if (unlikely(intel_context_is_banned(request->context)))
	550	+ i915_request_set_error_once(request, -EIO);
538	551
539		- engine->emit_breadcrumb(request,
540		- request->ring->vaddr + request->postfix);
	552	+ if (unlikely(fatal_error(request->fence.error)))
	553	+ __i915_request_skip(request);
541	554
542		- /* Transfer from per-context onto the global per-engine timeline */
543		- move_to_timeline(request, &engine->timeline);
	555	+ /*
	556	+ * Are we using semaphores when the gpu is already saturated?
	557	+ *
	558	+ * Using semaphores incurs a cost in having the GPU poll a
	559	+ * memory location, busywaiting for it to change. The continual
	560	+ * memory reads can have a noticeable impact on the rest of the
	561	+ * system with the extra bus traffic, stalling the cpu as it too
	562	+ * tries to access memory across the bus (perf stat -e bus-cycles).
	563	+ *
	564	+ * If we installed a semaphore on this request and we only submit
	565	+ * the request after the signaler completed, that indicates the
	566	+ * system is overloaded and using semaphores at this time only
	567	+ * increases the amount of work we are doing. If so, we disable
	568	+ * further use of semaphores until we are idle again, whence we
	569	+ * optimistically try again.
	570	+ */
	571	+ if (request->sched.semaphores &&
	572	+ i915_sw_fence_signaled(&request->semaphore))
	573	+ engine->saturated \|= request->sched.semaphores;
	574	+
	575	+ engine->emit_fini_breadcrumb(request,
	576	+ request->ring->vaddr + request->postfix);
544	577
545	578	trace_i915_request_execute(request);
	579	+ engine->serial++;
	580	+ result = true;
546	581
547		- wake_up_all(&request->execute);
	582	+xfer:
	583	+ if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) {
	584	+ list_move_tail(&request->sched.link, &engine->active.requests);
	585	+ clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
	586	+ }
	587	+
	588	+ /*
	589	+ * XXX Rollback bonded-execution on __i915_request_unsubmit()?
	590	+ *
	591	+ * In the future, perhaps when we have an active time-slicing scheduler,
	592	+ * it will be interesting to unsubmit parallel execution and remove
	593	+ * busywaits from the GPU until their master is restarted. This is
	594	+ * quite hairy, we have to carefully rollback the fence and do a
	595	+ * preempt-to-idle cycle on the target engine, all the while the
	596	+ * master execute_cb may refire.
	597	+ */
	598	+ __notify_execute_cb_irq(request);
	599	+
	600	+ /* We may be recursing from the signal callback of another i915 fence */
	601	+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
	602	+ i915_request_enable_breadcrumb(request);
	603	+
	604	+ return result;
548	605	}
549	606
550	607	void i915_request_submit(struct i915_request *request)
..	..	@@ -553,45 +610,41 @@
553	610	unsigned long flags;
554	611
555	612	/* Will be called from irq-context when using foreign fences. */
556		- spin_lock_irqsave(&engine->timeline.lock, flags);
	613	+ spin_lock_irqsave(&engine->active.lock, flags);
557	614
558	615	__i915_request_submit(request);
559	616
560		- spin_unlock_irqrestore(&engine->timeline.lock, flags);
	617	+ spin_unlock_irqrestore(&engine->active.lock, flags);
561	618	}
562	619
563	620	void __i915_request_unsubmit(struct i915_request *request)
564	621	{
565	622	struct intel_engine_cs *engine = request->engine;
566	623
567		- GEM_TRACE("%s fence %llx:%d <- global=%d, current %d\n",
568		- engine->name,
569		- request->fence.context, request->fence.seqno,
570		- request->global_seqno,
571		- intel_engine_get_seqno(engine));
572		-
573		- GEM_BUG_ON(!irqs_disabled());
574		- lockdep_assert_held(&engine->timeline.lock);
575		-
576	624	/*
577	625	* Only unwind in reverse order, required so that the per-context list
578	626	* is kept in seqno/ring order.
579	627	*/
580		- GEM_BUG_ON(!request->global_seqno);
581		- GEM_BUG_ON(request->global_seqno != engine->timeline.seqno);
582		- GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine),
583		- request->global_seqno));
584		- engine->timeline.seqno--;
	628	+ RQ_TRACE(request, "\n");
585	629
586		- /* We may be recursing from the signal callback of another i915 fence */
587		- spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
588		- request->global_seqno = 0;
	630	+ GEM_BUG_ON(!irqs_disabled());
	631	+ lockdep_assert_held(&engine->active.lock);
	632	+
	633	+ /*
	634	+ * Before we remove this breadcrumb from the signal list, we have
	635	+ * to ensure that a concurrent dma_fence_enable_signaling() does not
	636	+ * attach itself. We first mark the request as no longer active and
	637	+ * make sure that is visible to other cores, and then remove the
	638	+ * breadcrumb if attached.
	639	+ */
	640	+ GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
	641	+ clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
589	642	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
590		- intel_engine_cancel_signaling(request);
591		- spin_unlock(&request->lock);
	643	+ i915_request_cancel_breadcrumb(request);
592	644
593		- /* Transfer back from the global per-engine timeline to per-context */
594		- move_to_timeline(request, request->timeline);
	645	+ /* We've already spun, don't charge on resubmitting. */
	646	+ if (request->sched.semaphores && i915_request_started(request))
	647	+ request->sched.semaphores = 0;
595	648
596	649	/*
597	650	* We don't need to wake_up any waiters on request->execute, they
..	..	@@ -608,11 +661,11 @@
608	661	unsigned long flags;
609	662
610	663	/* Will be called from irq-context when using foreign fences. */
611		- spin_lock_irqsave(&engine->timeline.lock, flags);
	664	+ spin_lock_irqsave(&engine->active.lock, flags);
612	665
613	666	__i915_request_unsubmit(request);
614	667
615		- spin_unlock_irqrestore(&engine->timeline.lock, flags);
	668	+ spin_unlock_irqrestore(&engine->active.lock, flags);
616	669	}
617	670
618	671	static int __i915_sw_fence_call
..	..	@@ -624,6 +677,10 @@
624	677	switch (state) {
625	678	case FENCE_COMPLETE:
626	679	trace_i915_request_submit(request);
	680	+
	681	+ if (unlikely(fence->error))
	682	+ i915_request_set_error_once(request, fence->error);
	683	+
627	684	/*
628	685	* We need to serialize use of the submit_request() callback
629	686	* with its hotplugging performed during an emergency
..	..	@@ -645,61 +702,97 @@
645	702	return NOTIFY_DONE;
646	703	}
647	704
648		-/**
649		- * i915_request_alloc - allocate a request structure
650		- *
651		- * @engine: engine that we wish to issue the request on.
652		- * @ctx: context that the request will be associated with.
653		- *
654		- * Returns a pointer to the allocated request if successful,
655		- * or an error code if not.
656		- */
657		-struct i915_request *
658		-i915_request_alloc(struct intel_engine_cs engine, struct i915_gem_context ctx)
	705	+static int __i915_sw_fence_call
	706	+semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
659	707	{
660		- struct drm_i915_private *i915 = engine->i915;
	708	+ struct i915_request rq = container_of(fence, typeof(rq), semaphore);
	709	+
	710	+ switch (state) {
	711	+ case FENCE_COMPLETE:
	712	+ break;
	713	+
	714	+ case FENCE_FREE:
	715	+ i915_request_put(rq);
	716	+ break;
	717	+ }
	718	+
	719	+ return NOTIFY_DONE;
	720	+}
	721	+
	722	+static void retire_requests(struct intel_timeline *tl)
	723	+{
	724	+ struct i915_request rq, rn;
	725	+
	726	+ list_for_each_entry_safe(rq, rn, &tl->requests, link)
	727	+ if (!i915_request_retire(rq))
	728	+ break;
	729	+}
	730	+
	731	+static noinline struct i915_request *
	732	+request_alloc_slow(struct intel_timeline *tl,
	733	+ struct i915_request **rsvd,
	734	+ gfp_t gfp)
	735	+{
661	736	struct i915_request *rq;
662		- struct intel_context *ce;
663		- int ret;
664	737
665		- lockdep_assert_held(&i915->drm.struct_mutex);
	738	+ /* If we cannot wait, dip into our reserves */
	739	+ if (!gfpflags_allow_blocking(gfp)) {
	740	+ rq = xchg(rsvd, NULL);
	741	+ if (!rq) /* Use the normal failure path for one final WARN */
	742	+ goto out;
666	743
667		- /*
668		- * Preempt contexts are reserved for exclusive use to inject a
669		- * preemption context switch. They are never to be used for any trivial
670		- * request!
671		- */
672		- GEM_BUG_ON(ctx == i915->preempt_context);
	744	+ return rq;
	745	+ }
673	746
674		- /*
675		- * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
676		- * EIO if the GPU is already wedged.
677		- */
678		- if (i915_terminally_wedged(&i915->gpu_error))
679		- return ERR_PTR(-EIO);
680		-
681		- /*
682		- * Pinning the contexts may generate requests in order to acquire
683		- * GGTT space, so do this first before we reserve a seqno for
684		- * ourselves.
685		- */
686		- ce = intel_context_pin(ctx, engine);
687		- if (IS_ERR(ce))
688		- return ERR_CAST(ce);
689		-
690		- ret = reserve_gt(i915);
691		- if (ret)
692		- goto err_unpin;
693		-
694		- ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
695		- if (ret)
696		- goto err_unreserve;
	747	+ if (list_empty(&tl->requests))
	748	+ goto out;
697	749
698	750	/* Move our oldest request to the slab-cache (if not in use!) */
699		- rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
700		- if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
701		- i915_request_completed(rq))
702		- i915_request_retire(rq);
	751	+ rq = list_first_entry(&tl->requests, typeof(*rq), link);
	752	+ i915_request_retire(rq);
	753	+
	754	+ rq = kmem_cache_alloc(global.slab_requests,
	755	+ gfp \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
	756	+ if (rq)
	757	+ return rq;
	758	+
	759	+ /* Ratelimit ourselves to prevent oom from malicious clients */
	760	+ rq = list_last_entry(&tl->requests, typeof(*rq), link);
	761	+ cond_synchronize_rcu(rq->rcustate);
	762	+
	763	+ /* Retire our old requests in the hope that we free some */
	764	+ retire_requests(tl);
	765	+
	766	+out:
	767	+ return kmem_cache_alloc(global.slab_requests, gfp);
	768	+}
	769	+
	770	+static void __i915_request_ctor(void *arg)
	771	+{
	772	+ struct i915_request *rq = arg;
	773	+
	774	+ spin_lock_init(&rq->lock);
	775	+ i915_sched_node_init(&rq->sched);
	776	+ i915_sw_fence_init(&rq->submit, submit_notify);
	777	+ i915_sw_fence_init(&rq->semaphore, semaphore_notify);
	778	+
	779	+ rq->capture_list = NULL;
	780	+
	781	+ init_llist_head(&rq->execute_cb);
	782	+}
	783	+
	784	+struct i915_request *
	785	+__i915_request_create(struct intel_context *ce, gfp_t gfp)
	786	+{
	787	+ struct intel_timeline *tl = ce->timeline;
	788	+ struct i915_request *rq;
	789	+ u32 seqno;
	790	+ int ret;
	791	+
	792	+ might_sleep_if(gfpflags_allow_blocking(gfp));
	793	+
	794	+ /* Check that the caller provided an already pinned context */
	795	+ __intel_context_pin(ce);
703	796
704	797	/*
705	798	* Beware: Dragons be flying overhead.
..	..	@@ -707,7 +800,7 @@
707	800	* We use RCU to look up requests in flight. The lookups may
708	801	* race with the request being allocated from the slab freelist.
709	802	* That is the request we are writing to here, may be in the process
710		- * of being read by __i915_gem_active_get_rcu(). As such,
	803	+ * of being read by __i915_active_request_get_rcu(). As such,
711	804	* we have to be very careful when overwriting the contents. During
712	805	* the RCU lookup, we change chase the request->engine pointer,
713	806	* read the request->global_seqno and increment the reference count.
..	..	@@ -730,64 +823,45 @@
730	823	*
731	824	* Do not use kmem_cache_zalloc() here!
732	825	*/
733		- rq = kmem_cache_alloc(i915->requests,
734		- GFP_KERNEL \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
	826	+ rq = kmem_cache_alloc(global.slab_requests,
	827	+ gfp \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
735	828	if (unlikely(!rq)) {
736		- /* Ratelimit ourselves to prevent oom from malicious clients */
737		- ret = i915_gem_wait_for_idle(i915,
738		- I915_WAIT_LOCKED \|
739		- I915_WAIT_INTERRUPTIBLE,
740		- MAX_SCHEDULE_TIMEOUT);
741		- if (ret)
742		- goto err_unreserve;
743		-
744		- /*
745		- * We've forced the client to stall and catch up with whatever
746		- * backlog there might have been. As we are assuming that we
747		- * caused the mempressure, now is an opportune time to
748		- * recover as much memory from the request pool as is possible.
749		- * Having already penalized the client to stall, we spend
750		- * a little extra time to re-optimise page allocation.
751		- */
752		- kmem_cache_shrink(i915->requests);
753		- rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */
754		-
755		- rq = kmem_cache_alloc(i915->requests, GFP_KERNEL);
	829	+ rq = request_alloc_slow(tl, &ce->engine->request_pool, gfp);
756	830	if (!rq) {
757	831	ret = -ENOMEM;
758	832	goto err_unreserve;
759	833	}
760	834	}
761	835
762		- INIT_LIST_HEAD(&rq->active_list);
763		- rq->i915 = i915;
764		- rq->engine = engine;
765		- rq->gem_context = ctx;
766		- rq->hw_context = ce;
	836	+ rq->context = ce;
	837	+ rq->engine = ce->engine;
767	838	rq->ring = ce->ring;
768		- rq->timeline = ce->ring->timeline;
769		- GEM_BUG_ON(rq->timeline == &engine->timeline);
	839	+ rq->execution_mask = ce->engine->mask;
770	840
771		- spin_lock_init(&rq->lock);
772		- dma_fence_init(&rq->fence,
773		- &i915_fence_ops,
774		- &rq->lock,
775		- rq->timeline->fence_context,
776		- timeline_get_seqno(rq->timeline));
	841	+ ret = intel_timeline_get_seqno(tl, rq, &seqno);
	842	+ if (ret)
	843	+ goto err_free;
	844	+
	845	+ dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
	846	+ tl->fence_context, seqno);
	847	+
	848	+ RCU_INIT_POINTER(rq->timeline, tl);
	849	+ RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline);
	850	+ rq->hwsp_seqno = tl->hwsp_seqno;
	851	+ GEM_BUG_ON(i915_request_completed(rq));
	852	+
	853	+ rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
777	854
778	855	/* We bump the ref for the fence chain */
779		- i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
780		- init_waitqueue_head(&rq->execute);
	856	+ i915_sw_fence_reinit(&i915_request_get(rq)->submit);
	857	+ i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
781	858
782		- i915_sched_node_init(&rq->sched);
	859	+ i915_sched_node_reinit(&rq->sched);
783	860
784		- /* No zalloc, must clear what we need by hand */
785		- rq->global_seqno = 0;
786		- rq->signaling.wait.seqno = 0;
787		- rq->file_priv = NULL;
	861	+ /* No zalloc, everything must be cleared after use */
788	862	rq->batch = NULL;
789		- rq->capture_list = NULL;
790		- rq->waitboost = false;
	863	+ GEM_BUG_ON(rq->capture_list);
	864	+ GEM_BUG_ON(!llist_empty(&rq->execute_cb));
791	865
792	866	/*
793	867	* Reserve space in the ring buffer for all the commands required to
..	..	@@ -795,9 +869,14 @@
795	869	* i915_request_add() call can't fail. Note that the reserve may need
796	870	* to be redone if the request is not actually submitted straight
797	871	* away, e.g. because a GPU scheduler has deferred it.
	872	+ *
	873	+ * Note that due to how we add reserved_space to intel_ring_begin()
	874	+ * we need to double our request to ensure that if we need to wrap
	875	+ * around inside i915_request_add() there is sufficient space at
	876	+ * the beginning of the ring as well.
798	877	*/
799		- rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
800		- GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz);
	878	+ rq->reserved_space =
	879	+ 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
801	880
802	881	/*
803	882	* Record the position of the start of the request so that
..	..	@@ -807,38 +886,439 @@
807	886	*/
808	887	rq->head = rq->ring->emit;
809	888
810		- /* Unconditionally invalidate GPU caches and TLBs. */
811		- ret = engine->emit_flush(rq, EMIT_INVALIDATE);
	889	+ ret = rq->engine->request_alloc(rq);
812	890	if (ret)
813	891	goto err_unwind;
814		-
815		- ret = engine->request_alloc(rq);
816		- if (ret)
817		- goto err_unwind;
818		-
819		- /* Keep a second pin for the dual retirement along engine and ring */
820		- __intel_context_pin(ce);
821	892
822	893	rq->infix = rq->ring->emit; /* end of header; start of user payload */
823	894
824		- /* Check that we didn't interrupt ourselves with a new request */
825		- GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
	895	+ intel_context_mark_active(ce);
	896	+ list_add_tail_rcu(&rq->link, &tl->requests);
	897	+
826	898	return rq;
827	899
828	900	err_unwind:
829	901	ce->ring->emit = rq->head;
830	902
831	903	/* Make sure we didn't add ourselves to external state before freeing */
832		- GEM_BUG_ON(!list_empty(&rq->active_list));
833	904	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
834	905	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
835	906
836		- kmem_cache_free(i915->requests, rq);
	907	+err_free:
	908	+ kmem_cache_free(global.slab_requests, rq);
837	909	err_unreserve:
838		- unreserve_gt(i915);
839		-err_unpin:
840	910	intel_context_unpin(ce);
841	911	return ERR_PTR(ret);
	912	+}
	913	+
	914	+struct i915_request *
	915	+i915_request_create(struct intel_context *ce)
	916	+{
	917	+ struct i915_request *rq;
	918	+ struct intel_timeline *tl;
	919	+
	920	+ tl = intel_context_timeline_lock(ce);
	921	+ if (IS_ERR(tl))
	922	+ return ERR_CAST(tl);
	923	+
	924	+ /* Move our oldest request to the slab-cache (if not in use!) */
	925	+ rq = list_first_entry(&tl->requests, typeof(*rq), link);
	926	+ if (!list_is_last(&rq->link, &tl->requests))
	927	+ i915_request_retire(rq);
	928	+
	929	+ intel_context_enter(ce);
	930	+ rq = __i915_request_create(ce, GFP_KERNEL);
	931	+ intel_context_exit(ce); /* active reference transferred to request */
	932	+ if (IS_ERR(rq))
	933	+ goto err_unlock;
	934	+
	935	+ /* Check that we do not interrupt ourselves with a new request */
	936	+ rq->cookie = lockdep_pin_lock(&tl->mutex);
	937	+
	938	+ return rq;
	939	+
	940	+err_unlock:
	941	+ intel_context_timeline_unlock(tl);
	942	+ return rq;
	943	+}
	944	+
	945	+static int
	946	+i915_request_await_start(struct i915_request rq, struct i915_request signal)
	947	+{
	948	+ struct dma_fence *fence;
	949	+ int err;
	950	+
	951	+ if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline))
	952	+ return 0;
	953	+
	954	+ if (i915_request_started(signal))
	955	+ return 0;
	956	+
	957	+ fence = NULL;
	958	+ rcu_read_lock();
	959	+ spin_lock_irq(&signal->lock);
	960	+ do {
	961	+ struct list_head *pos = READ_ONCE(signal->link.prev);
	962	+ struct i915_request *prev;
	963	+
	964	+ /* Confirm signal has not been retired, the link is valid */
	965	+ if (unlikely(i915_request_started(signal)))
	966	+ break;
	967	+
	968	+ /* Is signal the earliest request on its timeline? */
	969	+ if (pos == &rcu_dereference(signal->timeline)->requests)
	970	+ break;
	971	+
	972	+ /*
	973	+ * Peek at the request before us in the timeline. That
	974	+ * request will only be valid before it is retired, so
	975	+ * after acquiring a reference to it, confirm that it is
	976	+ * still part of the signaler's timeline.
	977	+ */
	978	+ prev = list_entry(pos, typeof(*prev), link);
	979	+ if (!i915_request_get_rcu(prev))
	980	+ break;
	981	+
	982	+ /* After the strong barrier, confirm prev is still attached */
	983	+ if (unlikely(READ_ONCE(prev->link.next) != &signal->link)) {
	984	+ i915_request_put(prev);
	985	+ break;
	986	+ }
	987	+
	988	+ fence = &prev->fence;
	989	+ } while (0);
	990	+ spin_unlock_irq(&signal->lock);
	991	+ rcu_read_unlock();
	992	+ if (!fence)
	993	+ return 0;
	994	+
	995	+ err = 0;
	996	+ if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
	997	+ err = i915_sw_fence_await_dma_fence(&rq->submit,
	998	+ fence, 0,
	999	+ I915_FENCE_GFP);
	1000	+ dma_fence_put(fence);
	1001	+
	1002	+ return err;
	1003	+}
	1004	+
	1005	+static intel_engine_mask_t
	1006	+already_busywaiting(struct i915_request *rq)
	1007	+{
	1008	+ /*
	1009	+ * Polling a semaphore causes bus traffic, delaying other users of
	1010	+ * both the GPU and CPU. We want to limit the impact on others,
	1011	+ * while taking advantage of early submission to reduce GPU
	1012	+ * latency. Therefore we restrict ourselves to not using more
	1013	+ * than one semaphore from each source, and not using a semaphore
	1014	+ * if we have detected the engine is saturated (i.e. would not be
	1015	+ * submitted early and cause bus traffic reading an already passed
	1016	+ * semaphore).
	1017	+ *
	1018	+ * See the are-we-too-late? check in __i915_request_submit().
	1019	+ */
	1020	+ return rq->sched.semaphores \| READ_ONCE(rq->engine->saturated);
	1021	+}
	1022	+
	1023	+static int
	1024	+__emit_semaphore_wait(struct i915_request *to,
	1025	+ struct i915_request *from,
	1026	+ u32 seqno)
	1027	+{
	1028	+ const int has_token = INTEL_GEN(to->engine->i915) >= 12;
	1029	+ u32 hwsp_offset;
	1030	+ int len, err;
	1031	+ u32 *cs;
	1032	+
	1033	+ GEM_BUG_ON(INTEL_GEN(to->engine->i915) < 8);
	1034	+ GEM_BUG_ON(i915_request_has_initial_breadcrumb(to));
	1035	+
	1036	+ /* We need to pin the signaler's HWSP until we are finished reading. */
	1037	+ err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
	1038	+ if (err)
	1039	+ return err;
	1040	+
	1041	+ len = 4;
	1042	+ if (has_token)
	1043	+ len += 2;
	1044	+
	1045	+ cs = intel_ring_begin(to, len);
	1046	+ if (IS_ERR(cs))
	1047	+ return PTR_ERR(cs);
	1048	+
	1049	+ /*
	1050	+ * Using greater-than-or-equal here means we have to worry
	1051	+ * about seqno wraparound. To side step that issue, we swap
	1052	+ * the timeline HWSP upon wrapping, so that everyone listening
	1053	+ * for the old (pre-wrap) values do not see the much smaller
	1054	+ * (post-wrap) values than they were expecting (and so wait
	1055	+ * forever).
	1056	+ */
	1057	+ *cs++ = (MI_SEMAPHORE_WAIT \|
	1058	+ MI_SEMAPHORE_GLOBAL_GTT \|
	1059	+ MI_SEMAPHORE_POLL \|
	1060	+ MI_SEMAPHORE_SAD_GTE_SDD) +
	1061	+ has_token;
	1062	+ *cs++ = seqno;
	1063	+ *cs++ = hwsp_offset;
	1064	+ *cs++ = 0;
	1065	+ if (has_token) {
	1066	+ *cs++ = 0;
	1067	+ *cs++ = MI_NOOP;
	1068	+ }
	1069	+
	1070	+ intel_ring_advance(to, cs);
	1071	+ return 0;
	1072	+}
	1073	+
	1074	+static int
	1075	+emit_semaphore_wait(struct i915_request *to,
	1076	+ struct i915_request *from,
	1077	+ gfp_t gfp)
	1078	+{
	1079	+ const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask;
	1080	+ struct i915_sw_fence *wait = &to->submit;
	1081	+
	1082	+ if (!intel_context_use_semaphores(to->context))
	1083	+ goto await_fence;
	1084	+
	1085	+ if (i915_request_has_initial_breadcrumb(to))
	1086	+ goto await_fence;
	1087	+
	1088	+ if (!rcu_access_pointer(from->hwsp_cacheline))
	1089	+ goto await_fence;
	1090	+
	1091	+ /*
	1092	+ * If this or its dependents are waiting on an external fence
	1093	+ * that may fail catastrophically, then we want to avoid using
	1094	+ * sempahores as they bypass the fence signaling metadata, and we
	1095	+ * lose the fence->error propagation.
	1096	+ */
	1097	+ if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN)
	1098	+ goto await_fence;
	1099	+
	1100	+ /* Just emit the first semaphore we see as request space is limited. */
	1101	+ if (already_busywaiting(to) & mask)
	1102	+ goto await_fence;
	1103	+
	1104	+ if (i915_request_await_start(to, from) < 0)
	1105	+ goto await_fence;
	1106	+
	1107	+ /* Only submit our spinner after the signaler is running! */
	1108	+ if (__await_execution(to, from, NULL, gfp))
	1109	+ goto await_fence;
	1110	+
	1111	+ if (__emit_semaphore_wait(to, from, from->fence.seqno))
	1112	+ goto await_fence;
	1113	+
	1114	+ to->sched.semaphores \|= mask;
	1115	+ wait = &to->semaphore;
	1116	+
	1117	+await_fence:
	1118	+ return i915_sw_fence_await_dma_fence(wait,
	1119	+ &from->fence, 0,
	1120	+ I915_FENCE_GFP);
	1121	+}
	1122	+
	1123	+static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
	1124	+ struct dma_fence *fence)
	1125	+{
	1126	+ return __intel_timeline_sync_is_later(tl,
	1127	+ fence->context,
	1128	+ fence->seqno - 1);
	1129	+}
	1130	+
	1131	+static int intel_timeline_sync_set_start(struct intel_timeline *tl,
	1132	+ const struct dma_fence *fence)
	1133	+{
	1134	+ return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
	1135	+}
	1136	+
	1137	+static int
	1138	+__i915_request_await_execution(struct i915_request *to,
	1139	+ struct i915_request *from,
	1140	+ void (hook)(struct i915_request rq,
	1141	+ struct dma_fence *signal))
	1142	+{
	1143	+ int err;
	1144	+
	1145	+ GEM_BUG_ON(intel_context_is_barrier(from->context));
	1146	+
	1147	+ /* Submit both requests at the same time */
	1148	+ err = __await_execution(to, from, hook, I915_FENCE_GFP);
	1149	+ if (err)
	1150	+ return err;
	1151	+
	1152	+ /* Squash repeated depenendices to the same timelines */
	1153	+ if (intel_timeline_sync_has_start(i915_request_timeline(to),
	1154	+ &from->fence))
	1155	+ return 0;
	1156	+
	1157	+ /*
	1158	+ * Wait until the start of this request.
	1159	+ *
	1160	+ * The execution cb fires when we submit the request to HW. But in
	1161	+ * many cases this may be long before the request itself is ready to
	1162	+ * run (consider that we submit 2 requests for the same context, where
	1163	+ * the request of interest is behind an indefinite spinner). So we hook
	1164	+ * up to both to reduce our queues and keep the execution lag minimised
	1165	+ * in the worst case, though we hope that the await_start is elided.
	1166	+ */
	1167	+ err = i915_request_await_start(to, from);
	1168	+ if (err < 0)
	1169	+ return err;
	1170	+
	1171	+ /*
	1172	+ * Ensure both start together [after all semaphores in signal]
	1173	+ *
	1174	+ * Now that we are queued to the HW at roughly the same time (thanks
	1175	+ * to the execute cb) and are ready to run at roughly the same time
	1176	+ * (thanks to the await start), our signaler may still be indefinitely
	1177	+ * delayed by waiting on a semaphore from a remote engine. If our
	1178	+ * signaler depends on a semaphore, so indirectly do we, and we do not
	1179	+ * want to start our payload until our signaler also starts theirs.
	1180	+ * So we wait.
	1181	+ *
	1182	+ * However, there is also a second condition for which we need to wait
	1183	+ * for the precise start of the signaler. Consider that the signaler
	1184	+ * was submitted in a chain of requests following another context
	1185	+ * (with just an ordinary intra-engine fence dependency between the
	1186	+ * two). In this case the signaler is queued to HW, but not for
	1187	+ * immediate execution, and so we must wait until it reaches the
	1188	+ * active slot.
	1189	+ */
	1190	+ if (intel_engine_has_semaphores(to->engine) &&
	1191	+ !i915_request_has_initial_breadcrumb(to)) {
	1192	+ err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
	1193	+ if (err < 0)
	1194	+ return err;
	1195	+ }
	1196	+
	1197	+ /* Couple the dependency tree for PI on this exposed to->fence */
	1198	+ if (to->engine->schedule) {
	1199	+ err = i915_sched_node_add_dependency(&to->sched,
	1200	+ &from->sched,
	1201	+ I915_DEPENDENCY_WEAK);
	1202	+ if (err < 0)
	1203	+ return err;
	1204	+ }
	1205	+
	1206	+ return intel_timeline_sync_set_start(i915_request_timeline(to),
	1207	+ &from->fence);
	1208	+}
	1209	+
	1210	+static void mark_external(struct i915_request *rq)
	1211	+{
	1212	+ /*
	1213	+ * The downside of using semaphores is that we lose metadata passing
	1214	+ * along the signaling chain. This is particularly nasty when we
	1215	+ * need to pass along a fatal error such as EFAULT or EDEADLK. For
	1216	+ * fatal errors we want to scrub the request before it is executed,
	1217	+ * which means that we cannot preload the request onto HW and have
	1218	+ * it wait upon a semaphore.
	1219	+ */
	1220	+ rq->sched.flags \|= I915_SCHED_HAS_EXTERNAL_CHAIN;
	1221	+}
	1222	+
	1223	+static int
	1224	+__i915_request_await_external(struct i915_request rq, struct dma_fence fence)
	1225	+{
	1226	+ mark_external(rq);
	1227	+ return i915_sw_fence_await_dma_fence(&rq->submit, fence,
	1228	+ i915_fence_context_timeout(rq->engine->i915,
	1229	+ fence->context),
	1230	+ I915_FENCE_GFP);
	1231	+}
	1232	+
	1233	+static int
	1234	+i915_request_await_external(struct i915_request rq, struct dma_fence fence)
	1235	+{
	1236	+ struct dma_fence *iter;
	1237	+ int err = 0;
	1238	+
	1239	+ if (!to_dma_fence_chain(fence))
	1240	+ return __i915_request_await_external(rq, fence);
	1241	+
	1242	+ dma_fence_chain_for_each(iter, fence) {
	1243	+ struct dma_fence_chain *chain = to_dma_fence_chain(iter);
	1244	+
	1245	+ if (!dma_fence_is_i915(chain->fence)) {
	1246	+ err = __i915_request_await_external(rq, iter);
	1247	+ break;
	1248	+ }
	1249	+
	1250	+ err = i915_request_await_dma_fence(rq, chain->fence);
	1251	+ if (err < 0)
	1252	+ break;
	1253	+ }
	1254	+
	1255	+ dma_fence_put(iter);
	1256	+ return err;
	1257	+}
	1258	+
	1259	+int
	1260	+i915_request_await_execution(struct i915_request *rq,
	1261	+ struct dma_fence *fence,
	1262	+ void (hook)(struct i915_request rq,
	1263	+ struct dma_fence *signal))
	1264	+{
	1265	+ struct dma_fence **child = &fence;
	1266	+ unsigned int nchild = 1;
	1267	+ int ret;
	1268	+
	1269	+ if (dma_fence_is_array(fence)) {
	1270	+ struct dma_fence_array *array = to_dma_fence_array(fence);
	1271	+
	1272	+ /* XXX Error for signal-on-any fence arrays */
	1273	+
	1274	+ child = array->fences;
	1275	+ nchild = array->num_fences;
	1276	+ GEM_BUG_ON(!nchild);
	1277	+ }
	1278	+
	1279	+ do {
	1280	+ fence = *child++;
	1281	+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
	1282	+ continue;
	1283	+
	1284	+ if (fence->context == rq->fence.context)
	1285	+ continue;
	1286	+
	1287	+ /*
	1288	+ * We don't squash repeated fence dependencies here as we
	1289	+ * want to run our callback in all cases.
	1290	+ */
	1291	+
	1292	+ if (dma_fence_is_i915(fence))
	1293	+ ret = __i915_request_await_execution(rq,
	1294	+ to_request(fence),
	1295	+ hook);
	1296	+ else
	1297	+ ret = i915_request_await_external(rq, fence);
	1298	+ if (ret < 0)
	1299	+ return ret;
	1300	+ } while (--nchild);
	1301	+
	1302	+ return 0;
	1303	+}
	1304	+
	1305	+static int
	1306	+await_request_submit(struct i915_request to, struct i915_request from)
	1307	+{
	1308	+ /*
	1309	+ * If we are waiting on a virtual engine, then it may be
	1310	+ * constrained to execute on a single engine prior to submission.
	1311	+ * When it is submitted, it will be first submitted to the virtual
	1312	+ * engine and then passed to the physical engine. We cannot allow
	1313	+ * the waiter to be submitted immediately to the physical engine
	1314	+ * as it may then bypass the virtual request.
	1315	+ */
	1316	+ if (to->engine == READ_ONCE(from->engine))
	1317	+ return i915_sw_fence_await_sw_fence_gfp(&to->submit,
	1318	+ &from->submit,
	1319	+ I915_FENCE_GFP);
	1320	+ else
	1321	+ return __i915_request_await_execution(to, from, NULL);
842	1322	}
843	1323
844	1324	static int
..	..	@@ -849,50 +1329,27 @@
849	1329	GEM_BUG_ON(to == from);
850	1330	GEM_BUG_ON(to->timeline == from->timeline);
851	1331
852		- if (i915_request_completed(from))
	1332	+ if (i915_request_completed(from)) {
	1333	+ i915_sw_fence_set_error_once(&to->submit, from->fence.error);
853	1334	return 0;
	1335	+ }
854	1336
855	1337	if (to->engine->schedule) {
856		- ret = i915_sched_node_add_dependency(to->i915,
857		- &to->sched,
858		- &from->sched);
	1338	+ ret = i915_sched_node_add_dependency(&to->sched,
	1339	+ &from->sched,
	1340	+ I915_DEPENDENCY_EXTERNAL);
859	1341	if (ret < 0)
860	1342	return ret;
861	1343	}
862	1344
863		- if (to->engine == from->engine) {
864		- ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
865		- &from->submit,
866		- I915_FENCE_GFP);
867		- return ret < 0 ? ret : 0;
868		- }
	1345	+ if (is_power_of_2(to->execution_mask \| READ_ONCE(from->execution_mask)))
	1346	+ ret = await_request_submit(to, from);
	1347	+ else
	1348	+ ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
	1349	+ if (ret < 0)
	1350	+ return ret;
869	1351
870		- if (to->engine->semaphore.sync_to) {
871		- u32 seqno;
872		-
873		- GEM_BUG_ON(!from->engine->semaphore.signal);
874		-
875		- seqno = i915_request_global_seqno(from);
876		- if (!seqno)
877		- goto await_dma_fence;
878		-
879		- if (seqno <= to->timeline->global_sync[from->engine->id])
880		- return 0;
881		-
882		- trace_i915_gem_ring_sync_to(to, from);
883		- ret = to->engine->semaphore.sync_to(to, from);
884		- if (ret)
885		- return ret;
886		-
887		- to->timeline->global_sync[from->engine->id] = seqno;
888		- return 0;
889		- }
890		-
891		-await_dma_fence:
892		- ret = i915_sw_fence_await_dma_fence(&to->submit,
893		- &from->fence, 0,
894		- I915_FENCE_GFP);
895		- return ret < 0 ? ret : 0;
	1352	+ return 0;
896	1353	}
897	1354
898	1355	int
..	..	@@ -932,22 +1389,22 @@
932	1389	continue;
933	1390
934	1391	/* Squash repeated waits to the same timelines */
935		- if (fence->context != rq->i915->mm.unordered_timeline &&
936		- i915_timeline_sync_is_later(rq->timeline, fence))
	1392	+ if (fence->context &&
	1393	+ intel_timeline_sync_is_later(i915_request_timeline(rq),
	1394	+ fence))
937	1395	continue;
938	1396
939	1397	if (dma_fence_is_i915(fence))
940	1398	ret = i915_request_await_request(rq, to_request(fence));
941	1399	else
942		- ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
943		- I915_FENCE_TIMEOUT,
944		- I915_FENCE_GFP);
	1400	+ ret = i915_request_await_external(rq, fence);
945	1401	if (ret < 0)
946	1402	return ret;
947	1403
948	1404	/* Record the latest fence used against each timeline */
949		- if (fence->context != rq->i915->mm.unordered_timeline)
950		- i915_timeline_sync_set(rq->timeline, fence);
	1405	+ if (fence->context)
	1406	+ intel_timeline_sync_set(i915_request_timeline(rq),
	1407	+ fence);
951	1408	} while (--nchild);
952	1409
953	1410	return 0;
..	..	@@ -985,7 +1442,7 @@
985	1442	struct dma_fence **shared;
986	1443	unsigned int count, i;
987	1444
988		- ret = reservation_object_get_fences_rcu(obj->resv,
	1445	+ ret = dma_resv_get_fences_rcu(obj->base.resv,
989	1446	&excl, &count, &shared);
990	1447	if (ret)
991	1448	return ret;
..	..	@@ -1002,7 +1459,7 @@
1002	1459	dma_fence_put(shared[i]);
1003	1460	kfree(shared);
1004	1461	} else {
1005		- excl = reservation_object_get_excl_rcu(obj->resv);
	1462	+ excl = dma_resv_get_excl_rcu(obj->base.resv);
1006	1463	}
1007	1464
1008	1465	if (excl) {
..	..	@@ -1015,25 +1472,70 @@
1015	1472	return ret;
1016	1473	}
1017	1474
1018		-void i915_request_skip(struct i915_request *rq, int error)
	1475	+static struct i915_request *
	1476	+__i915_request_add_to_timeline(struct i915_request *rq)
1019	1477	{
1020		- void *vaddr = rq->ring->vaddr;
1021		- u32 head;
1022		-
1023		- GEM_BUG_ON(!IS_ERR_VALUE((long)error));
1024		- dma_fence_set_error(&rq->fence, error);
	1478	+ struct intel_timeline *timeline = i915_request_timeline(rq);
	1479	+ struct i915_request *prev;
1025	1480
1026	1481	/*
1027		- * As this request likely depends on state from the lost
1028		- * context, clear out all the user operations leaving the
1029		- * breadcrumb at the end (so we get the fence notifications).
	1482	+ * Dependency tracking and request ordering along the timeline
	1483	+ * is special cased so that we can eliminate redundant ordering
	1484	+ * operations while building the request (we know that the timeline
	1485	+ * itself is ordered, and here we guarantee it).
	1486	+ *
	1487	+ * As we know we will need to emit tracking along the timeline,
	1488	+ * we embed the hooks into our request struct -- at the cost of
	1489	+ * having to have specialised no-allocation interfaces (which will
	1490	+ * be beneficial elsewhere).
	1491	+ *
	1492	+ * A second benefit to open-coding i915_request_await_request is
	1493	+ * that we can apply a slight variant of the rules specialised
	1494	+ * for timelines that jump between engines (such as virtual engines).
	1495	+ * If we consider the case of virtual engine, we must emit a dma-fence
	1496	+ * to prevent scheduling of the second request until the first is
	1497	+ * complete (to maximise our greedy late load balancing) and this
	1498	+ * precludes optimising to use semaphores serialisation of a single
	1499	+ * timeline across engines.
1030	1500	*/
1031		- head = rq->infix;
1032		- if (rq->postfix < head) {
1033		- memset(vaddr + head, 0, rq->ring->size - head);
1034		- head = 0;
	1501	+ prev = to_request(__i915_active_fence_set(&timeline->last_request,
	1502	+ &rq->fence));
	1503	+ if (prev && !i915_request_completed(prev)) {
	1504	+ /*
	1505	+ * The requests are supposed to be kept in order. However,
	1506	+ * we need to be wary in case the timeline->last_request
	1507	+ * is used as a barrier for external modification to this
	1508	+ * context.
	1509	+ */
	1510	+ GEM_BUG_ON(prev->context == rq->context &&
	1511	+ i915_seqno_passed(prev->fence.seqno,
	1512	+ rq->fence.seqno));
	1513	+
	1514	+ if (is_power_of_2(READ_ONCE(prev->engine)->mask \| rq->engine->mask))
	1515	+ i915_sw_fence_await_sw_fence(&rq->submit,
	1516	+ &prev->submit,
	1517	+ &rq->submitq);
	1518	+ else
	1519	+ __i915_sw_fence_await_dma_fence(&rq->submit,
	1520	+ &prev->fence,
	1521	+ &rq->dmaq);
	1522	+ if (rq->engine->schedule)
	1523	+ __i915_sched_node_add_dependency(&rq->sched,
	1524	+ &prev->sched,
	1525	+ &rq->dep,
	1526	+ 0);
1035	1527	}
1036		- memset(vaddr + head, 0, rq->postfix - head);
	1528	+ if (prev)
	1529	+ i915_request_put(prev);
	1530	+
	1531	+ /*
	1532	+ * Make sure that no request gazumped us - if it was allocated after
	1533	+ * our i915_request_alloc() and called __i915_request_add() before
	1534	+ * us, the timeline will hold its seqno which is later than ours.
	1535	+ */
	1536	+ GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
	1537	+
	1538	+ return prev;
1037	1539	}
1038	1540
1039	1541	/*
..	..	@@ -1041,34 +1543,22 @@
1041	1543	* request is not being tracked for completion but the work itself is
1042	1544	* going to happen on the hardware. This would be a Bad Thing(tm).
1043	1545	*/
1044		-void i915_request_add(struct i915_request *request)
	1546	+struct i915_request __i915_request_commit(struct i915_request rq)
1045	1547	{
1046		- struct intel_engine_cs *engine = request->engine;
1047		- struct i915_timeline *timeline = request->timeline;
1048		- struct intel_ring *ring = request->ring;
1049		- struct i915_request *prev;
	1548	+ struct intel_engine_cs *engine = rq->engine;
	1549	+ struct intel_ring *ring = rq->ring;
1050	1550	u32 *cs;
1051	1551
1052		- GEM_TRACE("%s fence %llx:%d\n",
1053		- engine->name, request->fence.context, request->fence.seqno);
1054		-
1055		- lockdep_assert_held(&request->i915->drm.struct_mutex);
1056		- trace_i915_request_add(request);
1057		-
1058		- /*
1059		- * Make sure that no request gazumped us - if it was allocated after
1060		- * our i915_request_alloc() and called __i915_request_add() before
1061		- * us, the timeline will hold its seqno which is later than ours.
1062		- */
1063		- GEM_BUG_ON(timeline->seqno != request->fence.seqno);
	1552	+ RQ_TRACE(rq, "\n");
1064	1553
1065	1554	/*
1066	1555	* To ensure that this call will not fail, space for its emissions
1067	1556	* should already have been reserved in the ring buffer. Let the ring
1068	1557	* know that it is time to use that space up.
1069	1558	*/
1070		- request->reserved_space = 0;
1071		- engine->emit_flush(request, EMIT_FLUSH);
	1559	+ GEM_BUG_ON(rq->reserved_space > ring->space);
	1560	+ rq->reserved_space = 0;
	1561	+ rq->emitted_jiffies = jiffies;
1072	1562
1073	1563	/*
1074	1564	* Record the position of the start of the breadcrumb so that
..	..	@@ -1076,43 +1566,16 @@
1076	1566	* GPU processing the request, we never over-estimate the
1077	1567	* position of the ring's HEAD.
1078	1568	*/
1079		- cs = intel_ring_begin(request, engine->emit_breadcrumb_sz);
	1569	+ cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
1080	1570	GEM_BUG_ON(IS_ERR(cs));
1081		- request->postfix = intel_ring_offset(request, cs);
	1571	+ rq->postfix = intel_ring_offset(rq, cs);
1082	1572
1083		- /*
1084		- * Seal the request and mark it as pending execution. Note that
1085		- * we may inspect this state, without holding any locks, during
1086		- * hangcheck. Hence we apply the barrier to ensure that we do not
1087		- * see a more recent value in the hws than we are tracking.
1088		- */
	1573	+ return __i915_request_add_to_timeline(rq);
	1574	+}
1089	1575
1090		- prev = i915_gem_active_raw(&timeline->last_request,
1091		- &request->i915->drm.struct_mutex);
1092		- if (prev && !i915_request_completed(prev)) {
1093		- i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
1094		- &request->submitq);
1095		- if (engine->schedule)
1096		- __i915_sched_node_add_dependency(&request->sched,
1097		- &prev->sched,
1098		- &request->dep,
1099		- 0);
1100		- }
1101		-
1102		- spin_lock_irq(&timeline->lock);
1103		- list_add_tail(&request->link, &timeline->requests);
1104		- spin_unlock_irq(&timeline->lock);
1105		-
1106		- GEM_BUG_ON(timeline->seqno != request->fence.seqno);
1107		- i915_gem_active_set(&timeline->last_request, request);
1108		-
1109		- list_add_tail(&request->ring_link, &ring->request_list);
1110		- if (list_is_first(&request->ring_link, &ring->request_list)) {
1111		- GEM_TRACE("marking %s as active\n", ring->timeline->name);
1112		- list_add(&ring->active_link, &request->i915->gt.active_rings);
1113		- }
1114		- request->emitted_jiffies = jiffies;
1115		-
	1576	+void __i915_request_queue(struct i915_request *rq,
	1577	+ const struct i915_sched_attr *attr)
	1578	+{
1116	1579	/*
1117	1580	* Let the backend know a new request has arrived that may need
1118	1581	* to adjust the existing execution schedule due to a high priority
..	..	@@ -1124,36 +1587,37 @@
1124	1587	* decide whether to preempt the entire chain so that it is ready to
1125	1588	* run at the earliest possible convenience.
1126	1589	*/
1127		- local_bh_disable();
1128		- rcu_read_lock(); /* RCU serialisation for set-wedged protection */
1129		- if (engine->schedule)
1130		- engine->schedule(request, &request->gem_context->sched);
1131		- rcu_read_unlock();
1132		- i915_sw_fence_commit(&request->submit);
1133		- local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
1134		-
1135		- /*
1136		- * In typical scenarios, we do not expect the previous request on
1137		- * the timeline to be still tracked by timeline->last_request if it
1138		- * has been completed. If the completed request is still here, that
1139		- * implies that request retirement is a long way behind submission,
1140		- * suggesting that we haven't been retiring frequently enough from
1141		- * the combination of retire-before-alloc, waiters and the background
1142		- * retirement worker. So if the last request on this timeline was
1143		- * already completed, do a catch up pass, flushing the retirement queue
1144		- * up to this client. Since we have now moved the heaviest operations
1145		- * during retirement onto secondary workers, such as freeing objects
1146		- * or contexts, retiring a bunch of requests is mostly list management
1147		- * (and cache misses), and so we should not be overly penalizing this
1148		- * client by performing excess work, though we may still performing
1149		- * work on behalf of others -- but instead we should benefit from
1150		- * improved resource management. (Well, that's the theory at least.)
1151		- */
1152		- if (prev && i915_request_completed(prev))
1153		- i915_request_retire_upto(prev);
	1590	+ if (attr && rq->engine->schedule)
	1591	+ rq->engine->schedule(rq, attr);
	1592	+ i915_sw_fence_commit(&rq->semaphore);
	1593	+ i915_sw_fence_commit(&rq->submit);
1154	1594	}
1155	1595
1156		-static unsigned long local_clock_us(unsigned int *cpu)
	1596	+void i915_request_add(struct i915_request *rq)
	1597	+{
	1598	+ struct intel_timeline * const tl = i915_request_timeline(rq);
	1599	+ struct i915_sched_attr attr = {};
	1600	+ struct i915_gem_context *ctx;
	1601	+
	1602	+ lockdep_assert_held(&tl->mutex);
	1603	+ lockdep_unpin_lock(&tl->mutex, rq->cookie);
	1604	+
	1605	+ trace_i915_request_add(rq);
	1606	+ __i915_request_commit(rq);
	1607	+
	1608	+ /* XXX placeholder for selftests */
	1609	+ rcu_read_lock();
	1610	+ ctx = rcu_dereference(rq->context->gem_context);
	1611	+ if (ctx)
	1612	+ attr = ctx->sched;
	1613	+ rcu_read_unlock();
	1614	+
	1615	+ __i915_request_queue(rq, &attr);
	1616	+
	1617	+ mutex_unlock(&tl->mutex);
	1618	+}
	1619	+
	1620	+static unsigned long local_clock_ns(unsigned int *cpu)
1157	1621	{
1158	1622	unsigned long t;
1159	1623
..	..	@@ -1170,7 +1634,7 @@
1170	1634	* stop busywaiting, see busywait_stop().
1171	1635	*/
1172	1636	*cpu = get_cpu();
1173		- t = local_clock() >> 10;
	1637	+ t = local_clock();
1174	1638	put_cpu();
1175	1639
1176	1640	return t;
..	..	@@ -1180,19 +1644,16 @@
1180	1644	{
1181	1645	unsigned int this_cpu;
1182	1646
1183		- if (time_after(local_clock_us(&this_cpu), timeout))
	1647	+ if (time_after(local_clock_ns(&this_cpu), timeout))
1184	1648	return true;
1185	1649
1186	1650	return this_cpu != cpu;
1187	1651	}
1188	1652
1189		-static bool __i915_spin_request(const struct i915_request *rq,
1190		- u32 seqno, int state, unsigned long timeout_us)
	1653	+static bool __i915_spin_request(struct i915_request * const rq, int state)
1191	1654	{
1192		- struct intel_engine_cs *engine = rq->engine;
1193		- unsigned int irq, cpu;
1194		-
1195		- GEM_BUG_ON(!seqno);
	1655	+ unsigned long timeout_ns;
	1656	+ unsigned int cpu;
1196	1657
1197	1658	/*
1198	1659	* Only wait for the request if we know it is likely to complete.
..	..	@@ -1200,12 +1661,12 @@
1200	1661	* We don't track the timestamps around requests, nor the average
1201	1662	* request length, so we do not have a good indicator that this
1202	1663	* request will complete within the timeout. What we do know is the
1203		- * order in which requests are executed by the engine and so we can
1204		- * tell if the request has started. If the request hasn't started yet,
1205		- * it is a fair assumption that it will not complete within our
1206		- * relatively short timeout.
	1664	+ * order in which requests are executed by the context and so we can
	1665	+ * tell if the request has been started. If the request is not even
	1666	+ * running yet, it is a fair assumption that it will not complete
	1667	+ * within our relatively short timeout.
1207	1668	*/
1208		- if (!i915_seqno_passed(intel_engine_get_seqno(engine), seqno - 1))
	1669	+ if (!i915_request_is_running(rq))
1209	1670	return false;
1210	1671
1211	1672	/*
..	..	@@ -1219,25 +1680,16 @@
1219	1680	* takes to sleep on a request, on the order of a microsecond.
1220	1681	*/
1221	1682
1222		- irq = READ_ONCE(engine->breadcrumbs.irq_count);
1223		- timeout_us += local_clock_us(&cpu);
	1683	+ timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
	1684	+ timeout_ns += local_clock_ns(&cpu);
1224	1685	do {
1225		- if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno))
1226		- return seqno == i915_request_global_seqno(rq);
1227		-
1228		- /*
1229		- * Seqno are meant to be ordered before the interrupt. If
1230		- * we see an interrupt without a corresponding seqno advance,
1231		- * assume we won't see one in the near future but require
1232		- * the engine->seqno_barrier() to fixup coherency.
1233		- */
1234		- if (READ_ONCE(engine->breadcrumbs.irq_count) != irq)
1235		- break;
	1686	+ if (dma_fence_is_signaled(&rq->fence))
	1687	+ return true;
1236	1688
1237	1689	if (signal_pending_state(state, current))
1238	1690	break;
1239	1691
1240		- if (busywait_stop(timeout_us, cpu))
	1692	+ if (busywait_stop(timeout_ns, cpu))
1241	1693	break;
1242	1694
1243	1695	cpu_relax();
..	..	@@ -1246,16 +1698,16 @@
1246	1698	return false;
1247	1699	}
1248	1700
1249		-static bool __i915_wait_request_check_and_reset(struct i915_request *request)
	1701	+struct request_wait {
	1702	+ struct dma_fence_cb cb;
	1703	+ struct task_struct *tsk;
	1704	+};
	1705	+
	1706	+static void request_wait_wake(struct dma_fence fence, struct dma_fence_cb cb)
1250	1707	{
1251		- struct i915_gpu_error *error = &request->i915->gpu_error;
	1708	+ struct request_wait wait = container_of(cb, typeof(wait), cb);
1252	1709
1253		- if (likely(!i915_reset_handoff(error)))
1254		- return false;
1255		-
1256		- __set_current_state(TASK_RUNNING);
1257		- i915_reset(request->i915, error->stalled_mask, error->reason);
1258		- return true;
	1710	+ wake_up_process(fetch_and_zero(&wait->tsk));
1259	1711	}
1260	1712
1261	1713	/**
..	..	@@ -1268,10 +1720,6 @@
1268	1720	* maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
1269	1721	* unbounded wait).
1270	1722	*
1271		- * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED
1272		- * in via the flags, and vice versa if the struct_mutex is not held, the caller
1273		- * must not specify that the wait is locked.
1274		- *
1275	1723	* Returns the remaining time (in jiffies) if the request completed, which may
1276	1724	* be zero or -ETIME if the request is unfinished after the timeout expires.
1277	1725	* May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
..	..	@@ -1283,20 +1731,12 @@
1283	1731	{
1284	1732	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
1285	1733	TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
1286		- wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue;
1287		- DEFINE_WAIT_FUNC(reset, default_wake_function);
1288		- DEFINE_WAIT_FUNC(exec, default_wake_function);
1289		- struct intel_wait wait;
	1734	+ struct request_wait wait;
1290	1735
1291	1736	might_sleep();
1292		-#if IS_ENABLED(CONFIG_LOCKDEP)
1293		- GEM_BUG_ON(debug_locks &&
1294		- !!lockdep_is_held(&rq->i915->drm.struct_mutex) !=
1295		- !!(flags & I915_WAIT_LOCKED));
1296		-#endif
1297	1737	GEM_BUG_ON(timeout < 0);
1298	1738
1299		- if (i915_request_completed(rq))
	1739	+ if (dma_fence_is_signaled(&rq->fence))
1300	1740	return timeout;
1301	1741
1302	1742	if (!timeout)
..	..	@@ -1304,55 +1744,84 @@
1304	1744
1305	1745	trace_i915_request_wait_begin(rq, flags);
1306	1746
1307		- add_wait_queue(&rq->execute, &exec);
1308		- if (flags & I915_WAIT_LOCKED)
1309		- add_wait_queue(errq, &reset);
	1747	+ /*
	1748	+ * We must never wait on the GPU while holding a lock as we
	1749	+ * may need to perform a GPU reset. So while we don't need to
	1750	+ * serialise wait/reset with an explicit lock, we do want
	1751	+ * lockdep to detect potential dependency cycles.
	1752	+ */
	1753	+ mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_);
1310	1754
1311		- intel_wait_init(&wait);
	1755	+ /*
	1756	+ * Optimistic spin before touching IRQs.
	1757	+ *
	1758	+ * We may use a rather large value here to offset the penalty of
	1759	+ * switching away from the active task. Frequently, the client will
	1760	+ * wait upon an old swapbuffer to throttle itself to remain within a
	1761	+ * frame of the gpu. If the client is running in lockstep with the gpu,
	1762	+ * then it should not be waiting long at all, and a sleep now will incur
	1763	+ * extra scheduler latency in producing the next frame. To try to
	1764	+ * avoid adding the cost of enabling/disabling the interrupt to the
	1765	+ * short wait, we first spin to see if the request would have completed
	1766	+ * in the time taken to setup the interrupt.
	1767	+ *
	1768	+ * We need upto 5us to enable the irq, and upto 20us to hide the
	1769	+ * scheduler latency of a context switch, ignoring the secondary
	1770	+ * impacts from a context switch such as cache eviction.
	1771	+ *
	1772	+ * The scheme used for low-latency IO is called "hybrid interrupt
	1773	+ * polling". The suggestion there is to sleep until just before you
	1774	+ * expect to be woken by the device interrupt and then poll for its
	1775	+ * completion. That requires having a good predictor for the request
	1776	+ * duration, which we currently lack.
	1777	+ */
	1778	+ if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) &&
	1779	+ __i915_spin_request(rq, state))
	1780	+ goto out;
1312	1781
1313		-restart:
1314		- do {
1315		- set_current_state(state);
1316		- if (intel_wait_update_request(&wait, rq))
1317		- break;
	1782	+ /*
	1783	+ * This client is about to stall waiting for the GPU. In many cases
	1784	+ * this is undesirable and limits the throughput of the system, as
	1785	+ * many clients cannot continue processing user input/output whilst
	1786	+ * blocked. RPS autotuning may take tens of milliseconds to respond
	1787	+ * to the GPU load and thus incurs additional latency for the client.
	1788	+ * We can circumvent that by promoting the GPU frequency to maximum
	1789	+ * before we sleep. This makes the GPU throttle up much more quickly
	1790	+ * (good for benchmarks and user experience, e.g. window animations),
	1791	+ * but at a cost of spending more power processing the workload
	1792	+ * (bad for battery).
	1793	+ */
	1794	+ if (flags & I915_WAIT_PRIORITY && !i915_request_started(rq))
	1795	+ intel_rps_boost(rq);
1318	1796
1319		- if (flags & I915_WAIT_LOCKED &&
1320		- __i915_wait_request_check_and_reset(rq))
1321		- continue;
	1797	+ wait.tsk = current;
	1798	+ if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
	1799	+ goto out;
1322	1800
1323		- if (signal_pending_state(state, current)) {
1324		- timeout = -ERESTARTSYS;
1325		- goto complete;
1326		- }
1327		-
1328		- if (!timeout) {
1329		- timeout = -ETIME;
1330		- goto complete;
1331		- }
1332		-
1333		- timeout = io_schedule_timeout(timeout);
1334		- } while (1);
1335		-
1336		- GEM_BUG_ON(!intel_wait_has_seqno(&wait));
1337		- GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
1338		-
1339		- /* Optimistic short spin before touching IRQs */
1340		- if (__i915_spin_request(rq, wait.seqno, state, 5))
1341		- goto complete;
1342		-
1343		- set_current_state(state);
1344		- if (intel_engine_add_wait(rq->engine, &wait))
1345		- /*
1346		- * In order to check that we haven't missed the interrupt
1347		- * as we enabled it, we need to kick ourselves to do a
1348		- * coherent check on the seqno before we sleep.
1349		- */
1350		- goto wakeup;
1351		-
1352		- if (flags & I915_WAIT_LOCKED)
1353		- __i915_wait_request_check_and_reset(rq);
	1801	+ /*
	1802	+ * Flush the submission tasklet, but only if it may help this request.
	1803	+ *
	1804	+ * We sometimes experience some latency between the HW interrupts and
	1805	+ * tasklet execution (mostly due to ksoftirqd latency, but it can also
	1806	+ * be due to lazy CS events), so lets run the tasklet manually if there
	1807	+ * is a chance it may submit this request. If the request is not ready
	1808	+ * to run, as it is waiting for other fences to be signaled, flushing
	1809	+ * the tasklet is busy work without any advantage for this client.
	1810	+ *
	1811	+ * If the HW is being lazy, this is the last chance before we go to
	1812	+ * sleep to catch any pending events. We will check periodically in
	1813	+ * the heartbeat to flush the submission tasklets as a last resort
	1814	+ * for unhappy HW.
	1815	+ */
	1816	+ if (i915_request_is_ready(rq))
	1817	+ intel_engine_flush_submission(rq->engine);
1354	1818
1355	1819	for (;;) {
	1820	+ set_current_state(state);
	1821	+
	1822	+ if (dma_fence_is_signaled(&rq->fence))
	1823	+ break;
	1824	+
1356	1825	if (signal_pending_state(state, current)) {
1357	1826	timeout = -ERESTARTSYS;
1358	1827	break;
..	..	@@ -1364,86 +1833,65 @@
1364	1833	}
1365	1834
1366	1835	timeout = io_schedule_timeout(timeout);
1367		-
1368		- if (intel_wait_complete(&wait) &&
1369		- intel_wait_check_request(&wait, rq))
1370		- break;
1371		-
1372		- set_current_state(state);
1373		-
1374		-wakeup:
1375		- /*
1376		- * Carefully check if the request is complete, giving time
1377		- * for the seqno to be visible following the interrupt.
1378		- * We also have to check in case we are kicked by the GPU
1379		- * reset in order to drop the struct_mutex.
1380		- */
1381		- if (__i915_request_irq_complete(rq))
1382		- break;
1383		-
1384		- /*
1385		- * If the GPU is hung, and we hold the lock, reset the GPU
1386		- * and then check for completion. On a full reset, the engine's
1387		- * HW seqno will be advanced passed us and we are complete.
1388		- * If we do a partial reset, we have to wait for the GPU to
1389		- * resume and update the breadcrumb.
1390		- *
1391		- * If we don't hold the mutex, we can just wait for the worker
1392		- * to come along and update the breadcrumb (either directly
1393		- * itself, or indirectly by recovering the GPU).
1394		- */
1395		- if (flags & I915_WAIT_LOCKED &&
1396		- __i915_wait_request_check_and_reset(rq))
1397		- continue;
1398		-
1399		- /* Only spin if we know the GPU is processing this request */
1400		- if (__i915_spin_request(rq, wait.seqno, state, 2))
1401		- break;
1402		-
1403		- if (!intel_wait_check_request(&wait, rq)) {
1404		- intel_engine_remove_wait(rq->engine, &wait);
1405		- goto restart;
1406		- }
1407	1836	}
1408		-
1409		- intel_engine_remove_wait(rq->engine, &wait);
1410		-complete:
1411	1837	__set_current_state(TASK_RUNNING);
1412		- if (flags & I915_WAIT_LOCKED)
1413		- remove_wait_queue(errq, &reset);
1414		- remove_wait_queue(&rq->execute, &exec);
	1838	+
	1839	+ if (READ_ONCE(wait.tsk))
	1840	+ dma_fence_remove_callback(&rq->fence, &wait.cb);
	1841	+ GEM_BUG_ON(!list_empty(&wait.cb.node));
	1842	+
	1843	+out:
	1844	+ mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
1415	1845	trace_i915_request_wait_end(rq);
1416		-
1417	1846	return timeout;
1418		-}
1419		-
1420		-static void ring_retire_requests(struct intel_ring *ring)
1421		-{
1422		- struct i915_request request, next;
1423		-
1424		- list_for_each_entry_safe(request, next,
1425		- &ring->request_list, ring_link) {
1426		- if (!i915_request_completed(request))
1427		- break;
1428		-
1429		- i915_request_retire(request);
1430		- }
1431		-}
1432		-
1433		-void i915_retire_requests(struct drm_i915_private *i915)
1434		-{
1435		- struct intel_ring ring, tmp;
1436		-
1437		- lockdep_assert_held(&i915->drm.struct_mutex);
1438		-
1439		- if (!i915->gt.active_requests)
1440		- return;
1441		-
1442		- list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link)
1443		- ring_retire_requests(ring);
1444	1847	}
1445	1848
1446	1849	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1447	1850	#include "selftests/mock_request.c"
1448	1851	#include "selftests/i915_request.c"
1449	1852	#endif
	1853	+
	1854	+static void i915_global_request_shrink(void)
	1855	+{
	1856	+ kmem_cache_shrink(global.slab_execute_cbs);
	1857	+ kmem_cache_shrink(global.slab_requests);
	1858	+}
	1859	+
	1860	+static void i915_global_request_exit(void)
	1861	+{
	1862	+ kmem_cache_destroy(global.slab_execute_cbs);
	1863	+ kmem_cache_destroy(global.slab_requests);
	1864	+}
	1865	+
	1866	+static struct i915_global_request global = { {
	1867	+ .shrink = i915_global_request_shrink,
	1868	+ .exit = i915_global_request_exit,
	1869	+} };
	1870	+
	1871	+int __init i915_global_request_init(void)
	1872	+{
	1873	+ global.slab_requests =
	1874	+ kmem_cache_create("i915_request",
	1875	+ sizeof(struct i915_request),
	1876	+ __alignof__(struct i915_request),
	1877	+ SLAB_HWCACHE_ALIGN \|
	1878	+ SLAB_RECLAIM_ACCOUNT \|
	1879	+ SLAB_TYPESAFE_BY_RCU,
	1880	+ __i915_request_ctor);
	1881	+ if (!global.slab_requests)
	1882	+ return -ENOMEM;
	1883	+
	1884	+ global.slab_execute_cbs = KMEM_CACHE(execute_cb,
	1885	+ SLAB_HWCACHE_ALIGN \|
	1886	+ SLAB_RECLAIM_ACCOUNT \|
	1887	+ SLAB_TYPESAFE_BY_RCU);
	1888	+ if (!global.slab_execute_cbs)
	1889	+ goto err_requests;
	1890	+
	1891	+ i915_global_register(&global.base);
	1892	+ return 0;
	1893	+
	1894	+err_requests:
	1895	+ kmem_cache_destroy(global.slab_requests);
	1896	+ return -ENOMEM;
	1897	+}