~hc/RK356X_SDK_RELEASE.git

..	..	@@ -26,34 +26,116 @@
26	26	#define I915_REQUEST_H
27	27
28	28	#include <linux/dma-fence.h>
	29	+#include <linux/irq_work.h>
	30	+#include <linux/lockdep.h>
	31	+
	32	+#include "gem/i915_gem_context_types.h"
	33	+#include "gt/intel_context_types.h"
	34	+#include "gt/intel_engine_types.h"
	35	+#include "gt/intel_timeline_types.h"
29	36
30	37	#include "i915_gem.h"
31	38	#include "i915_scheduler.h"
	39	+#include "i915_selftest.h"
32	40	#include "i915_sw_fence.h"
33		-#include "i915_scheduler.h"
34	41
35	42	#include <uapi/drm/i915_drm.h>
36	43
37	44	struct drm_file;
38	45	struct drm_i915_gem_object;
39	46	struct i915_request;
40		-struct i915_timeline;
41		-
42		-struct intel_wait {
43		- struct rb_node node;
44		- struct task_struct *tsk;
45		- struct i915_request *request;
46		- u32 seqno;
47		-};
48		-
49		-struct intel_signal_node {
50		- struct intel_wait wait;
51		- struct list_head link;
52		-};
53	47
54	48	struct i915_capture_list {
55	49	struct i915_capture_list *next;
56	50	struct i915_vma *vma;
	51	+};
	52	+
	53	+#define RQ_TRACE(rq, fmt, ...) do { \
	54	+ const struct i915_request *rq__ = (rq); \
	55	+ ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d " fmt, \
	56	+ rq__->fence.context, rq__->fence.seqno, \
	57	+ hwsp_seqno(rq__), ##__VA_ARGS__); \
	58	+} while (0)
	59	+
	60	+enum {
	61	+ /*
	62	+ * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW.
	63	+ *
	64	+ * Set by __i915_request_submit() on handing over to HW, and cleared
	65	+ * by __i915_request_unsubmit() if we preempt this request.
	66	+ *
	67	+ * Finally cleared for consistency on retiring the request, when
	68	+ * we know the HW is no longer running this request.
	69	+ *
	70	+ * See i915_request_is_active()
	71	+ */
	72	+ I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,
	73	+
	74	+ /*
	75	+ * I915_FENCE_FLAG_PQUEUE - this request is ready for execution
	76	+ *
	77	+ * Using the scheduler, when a request is ready for execution it is put
	78	+ * into the priority queue, and removed from that queue when transferred
	79	+ * to the HW runlists. We want to track its membership within the
	80	+ * priority queue so that we can easily check before rescheduling.
	81	+ *
	82	+ * See i915_request_in_priority_queue()
	83	+ */
	84	+ I915_FENCE_FLAG_PQUEUE,
	85	+
	86	+ /*
	87	+ * I915_FENCE_FLAG_HOLD - this request is currently on hold
	88	+ *
	89	+ * This request has been suspended, pending an ongoing investigation.
	90	+ */
	91	+ I915_FENCE_FLAG_HOLD,
	92	+
	93	+ /*
	94	+ * I915_FENCE_FLAG_INITIAL_BREADCRUMB - this request has the initial
	95	+ * breadcrumb that marks the end of semaphore waits and start of the
	96	+ * user payload.
	97	+ */
	98	+ I915_FENCE_FLAG_INITIAL_BREADCRUMB,
	99	+
	100	+ /*
	101	+ * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
	102	+ *
	103	+ * Internal bookkeeping used by the breadcrumb code to track when
	104	+ * a request is on the various signal_list.
	105	+ */
	106	+ I915_FENCE_FLAG_SIGNAL,
	107	+
	108	+ /*
	109	+ * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted
	110	+ *
	111	+ * The execution of some requests should not be interrupted. This is
	112	+ * a sensitive operation as it makes the request super important,
	113	+ * blocking other higher priority work. Abuse of this flag will
	114	+ * lead to quality of service issues.
	115	+ */
	116	+ I915_FENCE_FLAG_NOPREEMPT,
	117	+
	118	+ /*
	119	+ * I915_FENCE_FLAG_SENTINEL - this request should be last in the queue
	120	+ *
	121	+ * A high priority sentinel request may be submitted to clear the
	122	+ * submission queue. As it will be the only request in-flight, upon
	123	+ * execution all other active requests will have been preempted and
	124	+ * unsubmitted. This preemptive pulse is used to re-evaluate the
	125	+ * in-flight requests, particularly in cases where an active context
	126	+ * is banned and those active requests need to be cancelled.
	127	+ */
	128	+ I915_FENCE_FLAG_SENTINEL,
	129	+
	130	+ /*
	131	+ * I915_FENCE_FLAG_BOOST - upclock the gpu for this request
	132	+ *
	133	+ * Some requests are more important than others! In particular, a
	134	+ * request that the user is waiting on is typically required for
	135	+ * interactive latency, for which we want to minimise by upclocking
	136	+ * the GPU. Here we track such boost requests on a per-request basis.
	137	+ */
	138	+ I915_FENCE_FLAG_BOOST,
57	139	};
58	140
59	141	/**
..	..	@@ -80,9 +162,6 @@
80	162	struct dma_fence fence;
81	163	spinlock_t lock;
82	164
83		- /** On Which ring this request was generated */
84		- struct drm_i915_private *i915;
85		-
86	165	/**
87	166	* Context and ring buffer related to this request
88	167	* Contexts are refcounted, so when this request is associated with a
..	..	@@ -93,12 +172,30 @@
93	172	* i915_request_free() will then decrement the refcount on the
94	173	* context.
95	174	*/
96		- struct i915_gem_context *gem_context;
97	175	struct intel_engine_cs *engine;
98		- struct intel_context *hw_context;
	176	+ struct intel_context *context;
99	177	struct intel_ring *ring;
100		- struct i915_timeline *timeline;
101		- struct intel_signal_node signaling;
	178	+ struct intel_timeline __rcu *timeline;
	179	+
	180	+ struct list_head signal_link;
	181	+ struct llist_node signal_node;
	182	+
	183	+ /*
	184	+ * The rcu epoch of when this request was allocated. Used to judiciously
	185	+ * apply backpressure on future allocations to ensure that under
	186	+ * mempressure there is sufficient RCU ticks for us to reclaim our
	187	+ * RCU protected slabs.
	188	+ */
	189	+ unsigned long rcustate;
	190	+
	191	+ /*
	192	+ * We pin the timeline->mutex while constructing the request to
	193	+ * ensure that no caller accidentally drops it during construction.
	194	+ * The timeline->mutex must be held to ensure that only this caller
	195	+ * can use the ring and manipulate the associated timeline during
	196	+ * construction.
	197	+ */
	198	+ struct pin_cookie cookie;
102	199
103	200	/*
104	201	* Fences for the various phases in the request's lifetime.
..	..	@@ -108,8 +205,16 @@
108	205	* It is used by the driver to then queue the request for execution.
109	206	*/
110	207	struct i915_sw_fence submit;
111		- wait_queue_entry_t submitq;
112		- wait_queue_head_t execute;
	208	+ union {
	209	+ wait_queue_entry_t submitq;
	210	+ struct i915_sw_dma_fence_cb dmaq;
	211	+ struct i915_request_duration_cb {
	212	+ struct dma_fence_cb cb;
	213	+ ktime_t emitted;
	214	+ } duration;
	215	+ };
	216	+ struct llist_head execute_cb;
	217	+ struct i915_sw_fence semaphore;
113	218
114	219	/*
115	220	* A list of everyone we wait upon, and everyone who waits upon us.
..	..	@@ -122,14 +227,24 @@
122	227	*/
123	228	struct i915_sched_node sched;
124	229	struct i915_dependency dep;
	230	+ intel_engine_mask_t execution_mask;
125	231
126		- /**
127		- * GEM sequence number associated with this request on the
128		- * global execution timeline. It is zero when the request is not
129		- * on the HW queue (i.e. not on the engine timeline list).
130		- * Its value is guarded by the timeline spinlock.
	232	+ /*
	233	+ * A convenience pointer to the current breadcrumb value stored in
	234	+ * the HW status page (or our timeline's local equivalent). The full
	235	+ * path would be rq->hw_context->ring->timeline->hwsp_seqno.
131	236	*/
132		- u32 global_seqno;
	237	+ const u32 *hwsp_seqno;
	238	+
	239	+ /*
	240	+ * If we need to access the timeline's seqno for this request in
	241	+ * another request, we need to keep a read reference to this associated
	242	+ * cacheline, so that we do not free and recycle it before the foreign
	243	+ * observers have completed. Hence, we keep a pointer to the cacheline
	244	+ * inside the timeline's HWSP vma, but it is only valid while this
	245	+ * request has not completed and guarded by the timeline mutex.
	246	+ */
	247	+ struct intel_timeline_cacheline __rcu *hwsp_cacheline;
133	248
134	249	/** Position in the ring of the start of the request */
135	250	u32 head;
..	..	@@ -164,22 +279,17 @@
164	279	* on the active_list (of their final request).
165	280	*/
166	281	struct i915_capture_list *capture_list;
167		- struct list_head active_list;
168	282
169	283	/** Time at which this request was emitted, in jiffies. */
170	284	unsigned long emitted_jiffies;
171	285
172		- bool waitboost;
173		-
174		- /** engine->request_list entry for this request */
	286	+ /** timeline->request entry for this request */
175	287	struct list_head link;
176	288
177		- /** ring->request_list entry for this request */
178		- struct list_head ring_link;
179		-
180		- struct drm_i915_file_private *file_priv;
181		- /** file_priv list entry for this request */
182		- struct list_head client_link;
	289	+ I915_SELFTEST_DECLARE(struct {
	290	+ struct list_head link;
	291	+ unsigned long delay;
	292	+ } mock;)
183	293	};
184	294
185	295	#define I915_FENCE_GFP (GFP_KERNEL \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN)
..	..	@@ -191,9 +301,21 @@
191	301	return fence->ops == &i915_fence_ops;
192	302	}
193	303
	304	+struct kmem_cache *i915_request_slab_cache(void);
	305	+
194	306	struct i915_request * __must_check
195		-i915_request_alloc(struct intel_engine_cs *engine,
196		- struct i915_gem_context *ctx);
	307	+__i915_request_create(struct intel_context *ce, gfp_t gfp);
	308	+struct i915_request * __must_check
	309	+i915_request_create(struct intel_context *ce);
	310	+
	311	+void i915_request_set_error_once(struct i915_request *rq, int error);
	312	+void __i915_request_skip(struct i915_request *rq);
	313	+
	314	+struct i915_request __i915_request_commit(struct i915_request request);
	315	+void __i915_request_queue(struct i915_request *rq,
	316	+ const struct i915_sched_attr *attr);
	317	+
	318	+bool i915_request_retire(struct i915_request *rq);
197	319	void i915_request_retire_upto(struct i915_request *rq);
198	320
199	321	static inline struct i915_request *
..	..	@@ -223,42 +345,20 @@
223	345	dma_fence_put(&rq->fence);
224	346	}
225	347
226		-/**
227		- * i915_request_global_seqno - report the current global seqno
228		- * @request - the request
229		- *
230		- * A request is assigned a global seqno only when it is on the hardware
231		- * execution queue. The global seqno can be used to maintain a list of
232		- * requests on the same engine in retirement order, for example for
233		- * constructing a priority queue for waiting. Prior to its execution, or
234		- * if it is subsequently removed in the event of preemption, its global
235		- * seqno is zero. As both insertion and removal from the execution queue
236		- * may operate in IRQ context, it is not guarded by the usual struct_mutex
237		- * BKL. Instead those relying on the global seqno must be prepared for its
238		- * value to change between reads. Only when the request is complete can
239		- * the global seqno be stable (due to the memory barriers on submitting
240		- * the commands to the hardware to write the breadcrumb, if the HWS shows
241		- * that it has passed the global seqno and the global seqno is unchanged
242		- * after the read, it is indeed complete).
243		- */
244		-static u32
245		-i915_request_global_seqno(const struct i915_request *request)
246		-{
247		- return READ_ONCE(request->global_seqno);
248		-}
249		-
250	348	int i915_request_await_object(struct i915_request *to,
251	349	struct drm_i915_gem_object *obj,
252	350	bool write);
253	351	int i915_request_await_dma_fence(struct i915_request *rq,
254	352	struct dma_fence *fence);
	353	+int i915_request_await_execution(struct i915_request *rq,
	354	+ struct dma_fence *fence,
	355	+ void (hook)(struct i915_request rq,
	356	+ struct dma_fence *signal));
255	357
256	358	void i915_request_add(struct i915_request *rq);
257	359
258		-void __i915_request_submit(struct i915_request *request);
	360	+bool __i915_request_submit(struct i915_request *request);
259	361	void i915_request_submit(struct i915_request *request);
260		-
261		-void i915_request_skip(struct i915_request *request, int error);
262	362
263	363	void __i915_request_unsubmit(struct i915_request *request);
264	364	void i915_request_unsubmit(struct i915_request *request);
..	..	@@ -268,11 +368,30 @@
268	368	long timeout)
269	369	__attribute__((nonnull(1)));
270	370	#define I915_WAIT_INTERRUPTIBLE BIT(0)
271		-#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */
	371	+#define I915_WAIT_PRIORITY BIT(1) /* small priority bump for the request */
272	372	#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */
273		-#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
274	373
275		-static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
	374	+static inline bool i915_request_signaled(const struct i915_request *rq)
	375	+{
	376	+ /* The request may live longer than its HWSP, so check flags first! */
	377	+ return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags);
	378	+}
	379	+
	380	+static inline bool i915_request_is_active(const struct i915_request *rq)
	381	+{
	382	+ return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
	383	+}
	384	+
	385	+static inline bool i915_request_in_priority_queue(const struct i915_request *rq)
	386	+{
	387	+ return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
	388	+}
	389	+
	390	+static inline bool
	391	+i915_request_has_initial_breadcrumb(const struct i915_request *rq)
	392	+{
	393	+ return test_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
	394	+}
276	395
277	396	/**
278	397	* Returns true if seq1 is later than seq2.
..	..	@@ -282,428 +401,211 @@
282	401	return (s32)(seq1 - seq2) >= 0;
283	402	}
284	403
285		-static inline bool
286		-__i915_request_completed(const struct i915_request *rq, u32 seqno)
	404	+static inline u32 __hwsp_seqno(const struct i915_request *rq)
287	405	{
288		- GEM_BUG_ON(!seqno);
289		- return i915_seqno_passed(intel_engine_get_seqno(rq->engine), seqno) &&
290		- seqno == i915_request_global_seqno(rq);
	406	+ const u32 *hwsp = READ_ONCE(rq->hwsp_seqno);
	407	+
	408	+ return READ_ONCE(*hwsp);
	409	+}
	410	+
	411	+/**
	412	+ * hwsp_seqno - the current breadcrumb value in the HW status page
	413	+ * @rq: the request, to chase the relevant HW status page
	414	+ *
	415	+ * The emphasis in naming here is that hwsp_seqno() is not a property of the
	416	+ * request, but an indication of the current HW state (associated with this
	417	+ * request). Its value will change as the GPU executes more requests.
	418	+ *
	419	+ * Returns the current breadcrumb value in the associated HW status page (or
	420	+ * the local timeline's equivalent) for this request. The request itself
	421	+ * has the associated breadcrumb value of rq->fence.seqno, when the HW
	422	+ * status page has that breadcrumb or later, this request is complete.
	423	+ */
	424	+static inline u32 hwsp_seqno(const struct i915_request *rq)
	425	+{
	426	+ u32 seqno;
	427	+
	428	+ rcu_read_lock(); /* the HWSP may be freed at runtime */
	429	+ seqno = __hwsp_seqno(rq);
	430	+ rcu_read_unlock();
	431	+
	432	+ return seqno;
	433	+}
	434	+
	435	+static inline bool __i915_request_has_started(const struct i915_request *rq)
	436	+{
	437	+ return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1);
	438	+}
	439	+
	440	+/**
	441	+ * i915_request_started - check if the request has begun being executed
	442	+ * @rq: the request
	443	+ *
	444	+ * If the timeline is not using initial breadcrumbs, a request is
	445	+ * considered started if the previous request on its timeline (i.e.
	446	+ * context) has been signaled.
	447	+ *
	448	+ * If the timeline is using semaphores, it will also be emitting an
	449	+ * "initial breadcrumb" after the semaphores are complete and just before
	450	+ * it began executing the user payload. A request can therefore be active
	451	+ * on the HW and not yet started as it is still busywaiting on its
	452	+ * dependencies (via HW semaphores).
	453	+ *
	454	+ * If the request has started, its dependencies will have been signaled
	455	+ * (either by fences or by semaphores) and it will have begun processing
	456	+ * the user payload.
	457	+ *
	458	+ * However, even if a request has started, it may have been preempted and
	459	+ * so no longer active, or it may have already completed.
	460	+ *
	461	+ * See also i915_request_is_active().
	462	+ *
	463	+ * Returns true if the request has begun executing the user payload, or
	464	+ * has completed:
	465	+ */
	466	+static inline bool i915_request_started(const struct i915_request *rq)
	467	+{
	468	+ bool result;
	469	+
	470	+ if (i915_request_signaled(rq))
	471	+ return true;
	472	+
	473	+ result = true;
	474	+ rcu_read_lock(); /* the HWSP may be freed at runtime */
	475	+ if (likely(!i915_request_signaled(rq)))
	476	+ /* Remember: started but may have since been preempted! */
	477	+ result = __i915_request_has_started(rq);
	478	+ rcu_read_unlock();
	479	+
	480	+ return result;
	481	+}
	482	+
	483	+/**
	484	+ * i915_request_is_running - check if the request may actually be executing
	485	+ * @rq: the request
	486	+ *
	487	+ * Returns true if the request is currently submitted to hardware, has passed
	488	+ * its start point (i.e. the context is setup and not busywaiting). Note that
	489	+ * it may no longer be running by the time the function returns!
	490	+ */
	491	+static inline bool i915_request_is_running(const struct i915_request *rq)
	492	+{
	493	+ bool result;
	494	+
	495	+ if (!i915_request_is_active(rq))
	496	+ return false;
	497	+
	498	+ rcu_read_lock();
	499	+ result = __i915_request_has_started(rq) && i915_request_is_active(rq);
	500	+ rcu_read_unlock();
	501	+
	502	+ return result;
	503	+}
	504	+
	505	+/**
	506	+ * i915_request_is_ready - check if the request is ready for execution
	507	+ * @rq: the request
	508	+ *
	509	+ * Upon construction, the request is instructed to wait upon various
	510	+ * signals before it is ready to be executed by the HW. That is, we do
	511	+ * not want to start execution and read data before it is written. In practice,
	512	+ * this is controlled with a mixture of interrupts and semaphores. Once
	513	+ * the submit fence is completed, the backend scheduler will place the
	514	+ * request into its queue and from there submit it for execution. So we
	515	+ * can detect when a request is eligible for execution (and is under control
	516	+ * of the scheduler) by querying where it is in any of the scheduler's lists.
	517	+ *
	518	+ * Returns true if the request is ready for execution (it may be inflight),
	519	+ * false otherwise.
	520	+ */
	521	+static inline bool i915_request_is_ready(const struct i915_request *rq)
	522	+{
	523	+ return !list_empty(&rq->sched.link);
	524	+}
	525	+
	526	+static inline bool __i915_request_is_complete(const struct i915_request *rq)
	527	+{
	528	+ return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
291	529	}
292	530
293	531	static inline bool i915_request_completed(const struct i915_request *rq)
294	532	{
295		- u32 seqno;
	533	+ bool result;
296	534
297		- seqno = i915_request_global_seqno(rq);
298		- if (!seqno)
299		- return false;
	535	+ if (i915_request_signaled(rq))
	536	+ return true;
300	537
301		- return __i915_request_completed(rq, seqno);
302		-}
303		-
304		-static inline bool i915_request_started(const struct i915_request *rq)
305		-{
306		- u32 seqno;
307		-
308		- seqno = i915_request_global_seqno(rq);
309		- if (!seqno)
310		- return false;
311		-
312		- return i915_seqno_passed(intel_engine_get_seqno(rq->engine),
313		- seqno - 1);
314		-}
315		-
316		-static inline bool i915_sched_node_signaled(const struct i915_sched_node *node)
317		-{
318		- const struct i915_request *rq =
319		- container_of(node, const struct i915_request, sched);
320		-
321		- return i915_request_completed(rq);
322		-}
323		-
324		-void i915_retire_requests(struct drm_i915_private *i915);
325		-
326		-/*
327		- * We treat requests as fences. This is not be to confused with our
328		- * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
329		- * We use the fences to synchronize access from the CPU with activity on the
330		- * GPU, for example, we should not rewrite an object's PTE whilst the GPU
331		- * is reading them. We also track fences at a higher level to provide
332		- * implicit synchronisation around GEM objects, e.g. set-domain will wait
333		- * for outstanding GPU rendering before marking the object ready for CPU
334		- * access, or a pageflip will wait until the GPU is complete before showing
335		- * the frame on the scanout.
336		- *
337		- * In order to use a fence, the object must track the fence it needs to
338		- * serialise with. For example, GEM objects want to track both read and
339		- * write access so that we can perform concurrent read operations between
340		- * the CPU and GPU engines, as well as waiting for all rendering to
341		- * complete, or waiting for the last GPU user of a "fence register". The
342		- * object then embeds a #i915_gem_active to track the most recent (in
343		- * retirement order) request relevant for the desired mode of access.
344		- * The #i915_gem_active is updated with i915_gem_active_set() to track the
345		- * most recent fence request, typically this is done as part of
346		- * i915_vma_move_to_active().
347		- *
348		- * When the #i915_gem_active completes (is retired), it will
349		- * signal its completion to the owner through a callback as well as mark
350		- * itself as idle (i915_gem_active.request == NULL). The owner
351		- * can then perform any action, such as delayed freeing of an active
352		- * resource including itself.
353		- */
354		-struct i915_gem_active;
355		-
356		-typedef void (i915_gem_retire_fn)(struct i915_gem_active ,
357		- struct i915_request *);
358		-
359		-struct i915_gem_active {
360		- struct i915_request __rcu *request;
361		- struct list_head link;
362		- i915_gem_retire_fn retire;
363		-};
364		-
365		-void i915_gem_retire_noop(struct i915_gem_active *,
366		- struct i915_request *request);
367		-
368		-/**
369		- * init_request_active - prepares the activity tracker for use
370		- * @active - the active tracker
371		- * @func - a callback when then the tracker is retired (becomes idle),
372		- * can be NULL
373		- *
374		- * init_request_active() prepares the embedded @active struct for use as
375		- * an activity tracker, that is for tracking the last known active request
376		- * associated with it. When the last request becomes idle, when it is retired
377		- * after completion, the optional callback @func is invoked.
378		- */
379		-static inline void
380		-init_request_active(struct i915_gem_active *active,
381		- i915_gem_retire_fn retire)
382		-{
383		- RCU_INIT_POINTER(active->request, NULL);
384		- INIT_LIST_HEAD(&active->link);
385		- active->retire = retire ?: i915_gem_retire_noop;
386		-}
387		-
388		-/**
389		- * i915_gem_active_set - updates the tracker to watch the current request
390		- * @active - the active tracker
391		- * @request - the request to watch
392		- *
393		- * i915_gem_active_set() watches the given @request for completion. Whilst
394		- * that @request is busy, the @active reports busy. When that @request is
395		- * retired, the @active tracker is updated to report idle.
396		- */
397		-static inline void
398		-i915_gem_active_set(struct i915_gem_active *active,
399		- struct i915_request *request)
400		-{
401		- list_move(&active->link, &request->active_list);
402		- rcu_assign_pointer(active->request, request);
403		-}
404		-
405		-/**
406		- * i915_gem_active_set_retire_fn - updates the retirement callback
407		- * @active - the active tracker
408		- * @fn - the routine called when the request is retired
409		- * @mutex - struct_mutex used to guard retirements
410		- *
411		- * i915_gem_active_set_retire_fn() updates the function pointer that
412		- * is called when the final request associated with the @active tracker
413		- * is retired.
414		- */
415		-static inline void
416		-i915_gem_active_set_retire_fn(struct i915_gem_active *active,
417		- i915_gem_retire_fn fn,
418		- struct mutex *mutex)
419		-{
420		- lockdep_assert_held(mutex);
421		- active->retire = fn ?: i915_gem_retire_noop;
422		-}
423		-
424		-static inline struct i915_request *
425		-__i915_gem_active_peek(const struct i915_gem_active *active)
426		-{
427		- /*
428		- * Inside the error capture (running with the driver in an unknown
429		- * state), we want to bend the rules slightly (a lot).
430		- *
431		- * Work is in progress to make it safer, in the meantime this keeps
432		- * the known issue from spamming the logs.
433		- */
434		- return rcu_dereference_protected(active->request, 1);
435		-}
436		-
437		-/**
438		- * i915_gem_active_raw - return the active request
439		- * @active - the active tracker
440		- *
441		- * i915_gem_active_raw() returns the current request being tracked, or NULL.
442		- * It does not obtain a reference on the request for the caller, so the caller
443		- * must hold struct_mutex.
444		- */
445		-static inline struct i915_request *
446		-i915_gem_active_raw(const struct i915_gem_active active, struct mutex mutex)
447		-{
448		- return rcu_dereference_protected(active->request,
449		- lockdep_is_held(mutex));
450		-}
451		-
452		-/**
453		- * i915_gem_active_peek - report the active request being monitored
454		- * @active - the active tracker
455		- *
456		- * i915_gem_active_peek() returns the current request being tracked if
457		- * still active, or NULL. It does not obtain a reference on the request
458		- * for the caller, so the caller must hold struct_mutex.
459		- */
460		-static inline struct i915_request *
461		-i915_gem_active_peek(const struct i915_gem_active active, struct mutex mutex)
462		-{
463		- struct i915_request *request;
464		-
465		- request = i915_gem_active_raw(active, mutex);
466		- if (!request \|\| i915_request_completed(request))
467		- return NULL;
468		-
469		- return request;
470		-}
471		-
472		-/**
473		- * i915_gem_active_get - return a reference to the active request
474		- * @active - the active tracker
475		- *
476		- * i915_gem_active_get() returns a reference to the active request, or NULL
477		- * if the active tracker is idle. The caller must hold struct_mutex.
478		- */
479		-static inline struct i915_request *
480		-i915_gem_active_get(const struct i915_gem_active active, struct mutex mutex)
481		-{
482		- return i915_request_get(i915_gem_active_peek(active, mutex));
483		-}
484		-
485		-/**
486		- * __i915_gem_active_get_rcu - return a reference to the active request
487		- * @active - the active tracker
488		- *
489		- * __i915_gem_active_get() returns a reference to the active request, or NULL
490		- * if the active tracker is idle. The caller must hold the RCU read lock, but
491		- * the returned pointer is safe to use outside of RCU.
492		- */
493		-static inline struct i915_request *
494		-__i915_gem_active_get_rcu(const struct i915_gem_active *active)
495		-{
496		- /*
497		- * Performing a lockless retrieval of the active request is super
498		- * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
499		- * slab of request objects will not be freed whilst we hold the
500		- * RCU read lock. It does not guarantee that the request itself
501		- * will not be freed and then reused. Viz,
502		- *
503		- * Thread A Thread B
504		- *
505		- * rq = active.request
506		- * retire(rq) -> free(rq);
507		- * (rq is now first on the slab freelist)
508		- * active.request = NULL
509		- *
510		- * rq = new submission on a new object
511		- * ref(rq)
512		- *
513		- * To prevent the request from being reused whilst the caller
514		- * uses it, we take a reference like normal. Whilst acquiring
515		- * the reference we check that it is not in a destroyed state
516		- * (refcnt == 0). That prevents the request being reallocated
517		- * whilst the caller holds on to it. To check that the request
518		- * was not reallocated as we acquired the reference we have to
519		- * check that our request remains the active request across
520		- * the lookup, in the same manner as a seqlock. The visibility
521		- * of the pointer versus the reference counting is controlled
522		- * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
523		- *
524		- * In the middle of all that, we inspect whether the request is
525		- * complete. Retiring is lazy so the request may be completed long
526		- * before the active tracker is updated. Querying whether the
527		- * request is complete is far cheaper (as it involves no locked
528		- * instructions setting cachelines to exclusive) than acquiring
529		- * the reference, so we do it first. The RCU read lock ensures the
530		- * pointer dereference is valid, but does not ensure that the
531		- * seqno nor HWS is the right one! However, if the request was
532		- * reallocated, that means the active tracker's request was complete.
533		- * If the new request is also complete, then both are and we can
534		- * just report the active tracker is idle. If the new request is
535		- * incomplete, then we acquire a reference on it and check that
536		- * it remained the active request.
537		- *
538		- * It is then imperative that we do not zero the request on
539		- * reallocation, so that we can chase the dangling pointers!
540		- * See i915_request_alloc().
541		- */
542		- do {
543		- struct i915_request *request;
544		-
545		- request = rcu_dereference(active->request);
546		- if (!request \|\| i915_request_completed(request))
547		- return NULL;
548		-
549		- /*
550		- * An especially silly compiler could decide to recompute the
551		- * result of i915_request_completed, more specifically
552		- * re-emit the load for request->fence.seqno. A race would catch
553		- * a later seqno value, which could flip the result from true to
554		- * false. Which means part of the instructions below might not
555		- * be executed, while later on instructions are executed. Due to
556		- * barriers within the refcounting the inconsistency can't reach
557		- * past the call to i915_request_get_rcu, but not executing
558		- * that while still executing i915_request_put() creates
559		- * havoc enough. Prevent this with a compiler barrier.
560		- */
561		- barrier();
562		-
563		- request = i915_request_get_rcu(request);
564		-
565		- /*
566		- * What stops the following rcu_access_pointer() from occurring
567		- * before the above i915_request_get_rcu()? If we were
568		- * to read the value before pausing to get the reference to
569		- * the request, we may not notice a change in the active
570		- * tracker.
571		- *
572		- * The rcu_access_pointer() is a mere compiler barrier, which
573		- * means both the CPU and compiler are free to perform the
574		- * memory read without constraint. The compiler only has to
575		- * ensure that any operations after the rcu_access_pointer()
576		- * occur afterwards in program order. This means the read may
577		- * be performed earlier by an out-of-order CPU, or adventurous
578		- * compiler.
579		- *
580		- * The atomic operation at the heart of
581		- * i915_request_get_rcu(), see dma_fence_get_rcu(), is
582		- * atomic_inc_not_zero() which is only a full memory barrier
583		- * when successful. That is, if i915_request_get_rcu()
584		- * returns the request (and so with the reference counted
585		- * incremented) then the following read for rcu_access_pointer()
586		- * must occur after the atomic operation and so confirm
587		- * that this request is the one currently being tracked.
588		- *
589		- * The corresponding write barrier is part of
590		- * rcu_assign_pointer().
591		- */
592		- if (!request \|\| request == rcu_access_pointer(active->request))
593		- return rcu_pointer_handoff(request);
594		-
595		- i915_request_put(request);
596		- } while (1);
597		-}
598		-
599		-/**
600		- * i915_gem_active_get_unlocked - return a reference to the active request
601		- * @active - the active tracker
602		- *
603		- * i915_gem_active_get_unlocked() returns a reference to the active request,
604		- * or NULL if the active tracker is idle. The reference is obtained under RCU,
605		- * so no locking is required by the caller.
606		- *
607		- * The reference should be freed with i915_request_put().
608		- */
609		-static inline struct i915_request *
610		-i915_gem_active_get_unlocked(const struct i915_gem_active *active)
611		-{
612		- struct i915_request *request;
613		-
614		- rcu_read_lock();
615		- request = __i915_gem_active_get_rcu(active);
	538	+ result = true;
	539	+ rcu_read_lock(); /* the HWSP may be freed at runtime */
	540	+ if (likely(!i915_request_signaled(rq)))
	541	+ result = __i915_request_is_complete(rq);
616	542	rcu_read_unlock();
617	543
618		- return request;
	544	+ return result;
619	545	}
620	546
621		-/**
622		- * i915_gem_active_isset - report whether the active tracker is assigned
623		- * @active - the active tracker
624		- *
625		- * i915_gem_active_isset() returns true if the active tracker is currently
626		- * assigned to a request. Due to the lazy retiring, that request may be idle
627		- * and this may report stale information.
628		- */
629		-static inline bool
630		-i915_gem_active_isset(const struct i915_gem_active *active)
	547	+static inline void i915_request_mark_complete(struct i915_request *rq)
631	548	{
632		- return rcu_access_pointer(active->request);
	549	+ WRITE_ONCE(rq->hwsp_seqno, /* decouple from HWSP */
	550	+ (u32 *)&rq->fence.seqno);
633	551	}
634	552
635		-/**
636		- * i915_gem_active_wait - waits until the request is completed
637		- * @active - the active request on which to wait
638		- * @flags - how to wait
639		- * @timeout - how long to wait at most
640		- * @rps - userspace client to charge for a waitboost
641		- *
642		- * i915_gem_active_wait() waits until the request is completed before
643		- * returning, without requiring any locks to be held. Note that it does not
644		- * retire any requests before returning.
645		- *
646		- * This function relies on RCU in order to acquire the reference to the active
647		- * request without holding any locks. See __i915_gem_active_get_rcu() for the
648		- * glory details on how that is managed. Once the reference is acquired, we
649		- * can then wait upon the request, and afterwards release our reference,
650		- * free of any locking.
651		- *
652		- * This function wraps i915_request_wait(), see it for the full details on
653		- * the arguments.
654		- *
655		- * Returns 0 if successful, or a negative error code.
656		- */
657		-static inline int
658		-i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags)
	553	+static inline bool i915_request_has_waitboost(const struct i915_request *rq)
659	554	{
660		- struct i915_request *request;
661		- long ret = 0;
662		-
663		- request = i915_gem_active_get_unlocked(active);
664		- if (request) {
665		- ret = i915_request_wait(request, flags, MAX_SCHEDULE_TIMEOUT);
666		- i915_request_put(request);
667		- }
668		-
669		- return ret < 0 ? ret : 0;
	555	+ return test_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
670	556	}
671	557
672		-/**
673		- * i915_gem_active_retire - waits until the request is retired
674		- * @active - the active request on which to wait
675		- *
676		- * i915_gem_active_retire() waits until the request is completed,
677		- * and then ensures that at least the retirement handler for this
678		- * @active tracker is called before returning. If the @active
679		- * tracker is idle, the function returns immediately.
680		- */
681		-static inline int __must_check
682		-i915_gem_active_retire(struct i915_gem_active *active,
683		- struct mutex *mutex)
	558	+static inline bool i915_request_has_nopreempt(const struct i915_request *rq)
684	559	{
685		- struct i915_request *request;
686		- long ret;
687		-
688		- request = i915_gem_active_raw(active, mutex);
689		- if (!request)
690		- return 0;
691		-
692		- ret = i915_request_wait(request,
693		- I915_WAIT_INTERRUPTIBLE \| I915_WAIT_LOCKED,
694		- MAX_SCHEDULE_TIMEOUT);
695		- if (ret < 0)
696		- return ret;
697		-
698		- list_del_init(&active->link);
699		- RCU_INIT_POINTER(active->request, NULL);
700		-
701		- active->retire(active, request);
702		-
703		- return 0;
	560	+ /* Preemption should only be disabled very rarely */
	561	+ return unlikely(test_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags));
704	562	}
705	563
706		-#define for_each_active(mask, idx) \
707		- for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx))
	564	+static inline bool i915_request_has_sentinel(const struct i915_request *rq)
	565	+{
	566	+ return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags));
	567	+}
	568	+
	569	+static inline bool i915_request_on_hold(const struct i915_request *rq)
	570	+{
	571	+ return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags));
	572	+}
	573	+
	574	+static inline void i915_request_set_hold(struct i915_request *rq)
	575	+{
	576	+ set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
	577	+}
	578	+
	579	+static inline void i915_request_clear_hold(struct i915_request *rq)
	580	+{
	581	+ clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
	582	+}
	583	+
	584	+static inline struct intel_timeline *
	585	+i915_request_timeline(const struct i915_request *rq)
	586	+{
	587	+ /* Valid only while the request is being constructed (or retired). */
	588	+ return rcu_dereference_protected(rq->timeline,
	589	+ lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex));
	590	+}
	591	+
	592	+static inline struct i915_gem_context *
	593	+i915_request_gem_context(const struct i915_request *rq)
	594	+{
	595	+ /* Valid only while the request is being constructed (or retired). */
	596	+ return rcu_dereference_protected(rq->context->gem_context, true);
	597	+}
	598	+
	599	+static inline struct intel_timeline *
	600	+i915_request_active_timeline(const struct i915_request *rq)
	601	+{
	602	+ /*
	603	+ * When in use during submission, we are protected by a guarantee that
	604	+ * the context/timeline is pinned and must remain pinned until after
	605	+ * this submission.
	606	+ */
	607	+ return rcu_dereference_protected(rq->timeline,
	608	+ lockdep_is_held(&rq->engine->active.lock));
	609	+}
708	610
709	611	#endif /* I915_REQUEST_H */