hc
2024-05-10 cde9070d9970eef1f7ec2360586c802a16230ad8
kernel/drivers/gpu/drm/i915/i915_request.h
....@@ -26,34 +26,116 @@
2626 #define I915_REQUEST_H
2727
2828 #include <linux/dma-fence.h>
29
+#include <linux/irq_work.h>
30
+#include <linux/lockdep.h>
31
+
32
+#include "gem/i915_gem_context_types.h"
33
+#include "gt/intel_context_types.h"
34
+#include "gt/intel_engine_types.h"
35
+#include "gt/intel_timeline_types.h"
2936
3037 #include "i915_gem.h"
3138 #include "i915_scheduler.h"
39
+#include "i915_selftest.h"
3240 #include "i915_sw_fence.h"
33
-#include "i915_scheduler.h"
3441
3542 #include <uapi/drm/i915_drm.h>
3643
3744 struct drm_file;
3845 struct drm_i915_gem_object;
3946 struct i915_request;
40
-struct i915_timeline;
41
-
42
-struct intel_wait {
43
- struct rb_node node;
44
- struct task_struct *tsk;
45
- struct i915_request *request;
46
- u32 seqno;
47
-};
48
-
49
-struct intel_signal_node {
50
- struct intel_wait wait;
51
- struct list_head link;
52
-};
5347
5448 struct i915_capture_list {
5549 struct i915_capture_list *next;
5650 struct i915_vma *vma;
51
+};
52
+
53
+#define RQ_TRACE(rq, fmt, ...) do { \
54
+ const struct i915_request *rq__ = (rq); \
55
+ ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d " fmt, \
56
+ rq__->fence.context, rq__->fence.seqno, \
57
+ hwsp_seqno(rq__), ##__VA_ARGS__); \
58
+} while (0)
59
+
60
+enum {
61
+ /*
62
+ * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW.
63
+ *
64
+ * Set by __i915_request_submit() on handing over to HW, and cleared
65
+ * by __i915_request_unsubmit() if we preempt this request.
66
+ *
67
+ * Finally cleared for consistency on retiring the request, when
68
+ * we know the HW is no longer running this request.
69
+ *
70
+ * See i915_request_is_active()
71
+ */
72
+ I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,
73
+
74
+ /*
75
+ * I915_FENCE_FLAG_PQUEUE - this request is ready for execution
76
+ *
77
+ * Using the scheduler, when a request is ready for execution it is put
78
+ * into the priority queue, and removed from that queue when transferred
79
+ * to the HW runlists. We want to track its membership within the
80
+ * priority queue so that we can easily check before rescheduling.
81
+ *
82
+ * See i915_request_in_priority_queue()
83
+ */
84
+ I915_FENCE_FLAG_PQUEUE,
85
+
86
+ /*
87
+ * I915_FENCE_FLAG_HOLD - this request is currently on hold
88
+ *
89
+ * This request has been suspended, pending an ongoing investigation.
90
+ */
91
+ I915_FENCE_FLAG_HOLD,
92
+
93
+ /*
94
+ * I915_FENCE_FLAG_INITIAL_BREADCRUMB - this request has the initial
95
+ * breadcrumb that marks the end of semaphore waits and start of the
96
+ * user payload.
97
+ */
98
+ I915_FENCE_FLAG_INITIAL_BREADCRUMB,
99
+
100
+ /*
101
+ * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
102
+ *
103
+ * Internal bookkeeping used by the breadcrumb code to track when
104
+ * a request is on the various signal_list.
105
+ */
106
+ I915_FENCE_FLAG_SIGNAL,
107
+
108
+ /*
109
+ * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted
110
+ *
111
+ * The execution of some requests should not be interrupted. This is
112
+ * a sensitive operation as it makes the request super important,
113
+ * blocking other higher priority work. Abuse of this flag will
114
+ * lead to quality of service issues.
115
+ */
116
+ I915_FENCE_FLAG_NOPREEMPT,
117
+
118
+ /*
119
+ * I915_FENCE_FLAG_SENTINEL - this request should be last in the queue
120
+ *
121
+ * A high priority sentinel request may be submitted to clear the
122
+ * submission queue. As it will be the only request in-flight, upon
123
+ * execution all other active requests will have been preempted and
124
+ * unsubmitted. This preemptive pulse is used to re-evaluate the
125
+ * in-flight requests, particularly in cases where an active context
126
+ * is banned and those active requests need to be cancelled.
127
+ */
128
+ I915_FENCE_FLAG_SENTINEL,
129
+
130
+ /*
131
+ * I915_FENCE_FLAG_BOOST - upclock the gpu for this request
132
+ *
133
+ * Some requests are more important than others! In particular, a
134
+ * request that the user is waiting on is typically required for
135
+ * interactive latency, for which we want to minimise by upclocking
136
+ * the GPU. Here we track such boost requests on a per-request basis.
137
+ */
138
+ I915_FENCE_FLAG_BOOST,
57139 };
58140
59141 /**
....@@ -80,9 +162,6 @@
80162 struct dma_fence fence;
81163 spinlock_t lock;
82164
83
- /** On Which ring this request was generated */
84
- struct drm_i915_private *i915;
85
-
86165 /**
87166 * Context and ring buffer related to this request
88167 * Contexts are refcounted, so when this request is associated with a
....@@ -93,12 +172,30 @@
93172 * i915_request_free() will then decrement the refcount on the
94173 * context.
95174 */
96
- struct i915_gem_context *gem_context;
97175 struct intel_engine_cs *engine;
98
- struct intel_context *hw_context;
176
+ struct intel_context *context;
99177 struct intel_ring *ring;
100
- struct i915_timeline *timeline;
101
- struct intel_signal_node signaling;
178
+ struct intel_timeline __rcu *timeline;
179
+
180
+ struct list_head signal_link;
181
+ struct llist_node signal_node;
182
+
183
+ /*
184
+ * The rcu epoch of when this request was allocated. Used to judiciously
185
+ * apply backpressure on future allocations to ensure that under
186
+ * mempressure there is sufficient RCU ticks for us to reclaim our
187
+ * RCU protected slabs.
188
+ */
189
+ unsigned long rcustate;
190
+
191
+ /*
192
+ * We pin the timeline->mutex while constructing the request to
193
+ * ensure that no caller accidentally drops it during construction.
194
+ * The timeline->mutex must be held to ensure that only this caller
195
+ * can use the ring and manipulate the associated timeline during
196
+ * construction.
197
+ */
198
+ struct pin_cookie cookie;
102199
103200 /*
104201 * Fences for the various phases in the request's lifetime.
....@@ -108,8 +205,16 @@
108205 * It is used by the driver to then queue the request for execution.
109206 */
110207 struct i915_sw_fence submit;
111
- wait_queue_entry_t submitq;
112
- wait_queue_head_t execute;
208
+ union {
209
+ wait_queue_entry_t submitq;
210
+ struct i915_sw_dma_fence_cb dmaq;
211
+ struct i915_request_duration_cb {
212
+ struct dma_fence_cb cb;
213
+ ktime_t emitted;
214
+ } duration;
215
+ };
216
+ struct llist_head execute_cb;
217
+ struct i915_sw_fence semaphore;
113218
114219 /*
115220 * A list of everyone we wait upon, and everyone who waits upon us.
....@@ -122,14 +227,24 @@
122227 */
123228 struct i915_sched_node sched;
124229 struct i915_dependency dep;
230
+ intel_engine_mask_t execution_mask;
125231
126
- /**
127
- * GEM sequence number associated with this request on the
128
- * global execution timeline. It is zero when the request is not
129
- * on the HW queue (i.e. not on the engine timeline list).
130
- * Its value is guarded by the timeline spinlock.
232
+ /*
233
+ * A convenience pointer to the current breadcrumb value stored in
234
+ * the HW status page (or our timeline's local equivalent). The full
235
+ * path would be rq->hw_context->ring->timeline->hwsp_seqno.
131236 */
132
- u32 global_seqno;
237
+ const u32 *hwsp_seqno;
238
+
239
+ /*
240
+ * If we need to access the timeline's seqno for this request in
241
+ * another request, we need to keep a read reference to this associated
242
+ * cacheline, so that we do not free and recycle it before the foreign
243
+ * observers have completed. Hence, we keep a pointer to the cacheline
244
+ * inside the timeline's HWSP vma, but it is only valid while this
245
+ * request has not completed and guarded by the timeline mutex.
246
+ */
247
+ struct intel_timeline_cacheline __rcu *hwsp_cacheline;
133248
134249 /** Position in the ring of the start of the request */
135250 u32 head;
....@@ -164,22 +279,17 @@
164279 * on the active_list (of their final request).
165280 */
166281 struct i915_capture_list *capture_list;
167
- struct list_head active_list;
168282
169283 /** Time at which this request was emitted, in jiffies. */
170284 unsigned long emitted_jiffies;
171285
172
- bool waitboost;
173
-
174
- /** engine->request_list entry for this request */
286
+ /** timeline->request entry for this request */
175287 struct list_head link;
176288
177
- /** ring->request_list entry for this request */
178
- struct list_head ring_link;
179
-
180
- struct drm_i915_file_private *file_priv;
181
- /** file_priv list entry for this request */
182
- struct list_head client_link;
289
+ I915_SELFTEST_DECLARE(struct {
290
+ struct list_head link;
291
+ unsigned long delay;
292
+ } mock;)
183293 };
184294
185295 #define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
....@@ -191,9 +301,21 @@
191301 return fence->ops == &i915_fence_ops;
192302 }
193303
304
+struct kmem_cache *i915_request_slab_cache(void);
305
+
194306 struct i915_request * __must_check
195
-i915_request_alloc(struct intel_engine_cs *engine,
196
- struct i915_gem_context *ctx);
307
+__i915_request_create(struct intel_context *ce, gfp_t gfp);
308
+struct i915_request * __must_check
309
+i915_request_create(struct intel_context *ce);
310
+
311
+void i915_request_set_error_once(struct i915_request *rq, int error);
312
+void __i915_request_skip(struct i915_request *rq);
313
+
314
+struct i915_request *__i915_request_commit(struct i915_request *request);
315
+void __i915_request_queue(struct i915_request *rq,
316
+ const struct i915_sched_attr *attr);
317
+
318
+bool i915_request_retire(struct i915_request *rq);
197319 void i915_request_retire_upto(struct i915_request *rq);
198320
199321 static inline struct i915_request *
....@@ -223,42 +345,20 @@
223345 dma_fence_put(&rq->fence);
224346 }
225347
226
-/**
227
- * i915_request_global_seqno - report the current global seqno
228
- * @request - the request
229
- *
230
- * A request is assigned a global seqno only when it is on the hardware
231
- * execution queue. The global seqno can be used to maintain a list of
232
- * requests on the same engine in retirement order, for example for
233
- * constructing a priority queue for waiting. Prior to its execution, or
234
- * if it is subsequently removed in the event of preemption, its global
235
- * seqno is zero. As both insertion and removal from the execution queue
236
- * may operate in IRQ context, it is not guarded by the usual struct_mutex
237
- * BKL. Instead those relying on the global seqno must be prepared for its
238
- * value to change between reads. Only when the request is complete can
239
- * the global seqno be stable (due to the memory barriers on submitting
240
- * the commands to the hardware to write the breadcrumb, if the HWS shows
241
- * that it has passed the global seqno and the global seqno is unchanged
242
- * after the read, it is indeed complete).
243
- */
244
-static u32
245
-i915_request_global_seqno(const struct i915_request *request)
246
-{
247
- return READ_ONCE(request->global_seqno);
248
-}
249
-
250348 int i915_request_await_object(struct i915_request *to,
251349 struct drm_i915_gem_object *obj,
252350 bool write);
253351 int i915_request_await_dma_fence(struct i915_request *rq,
254352 struct dma_fence *fence);
353
+int i915_request_await_execution(struct i915_request *rq,
354
+ struct dma_fence *fence,
355
+ void (*hook)(struct i915_request *rq,
356
+ struct dma_fence *signal));
255357
256358 void i915_request_add(struct i915_request *rq);
257359
258
-void __i915_request_submit(struct i915_request *request);
360
+bool __i915_request_submit(struct i915_request *request);
259361 void i915_request_submit(struct i915_request *request);
260
-
261
-void i915_request_skip(struct i915_request *request, int error);
262362
263363 void __i915_request_unsubmit(struct i915_request *request);
264364 void i915_request_unsubmit(struct i915_request *request);
....@@ -268,11 +368,30 @@
268368 long timeout)
269369 __attribute__((nonnull(1)));
270370 #define I915_WAIT_INTERRUPTIBLE BIT(0)
271
-#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */
371
+#define I915_WAIT_PRIORITY BIT(1) /* small priority bump for the request */
272372 #define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */
273
-#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
274373
275
-static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
374
+static inline bool i915_request_signaled(const struct i915_request *rq)
375
+{
376
+ /* The request may live longer than its HWSP, so check flags first! */
377
+ return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags);
378
+}
379
+
380
+static inline bool i915_request_is_active(const struct i915_request *rq)
381
+{
382
+ return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
383
+}
384
+
385
+static inline bool i915_request_in_priority_queue(const struct i915_request *rq)
386
+{
387
+ return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
388
+}
389
+
390
+static inline bool
391
+i915_request_has_initial_breadcrumb(const struct i915_request *rq)
392
+{
393
+ return test_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
394
+}
276395
277396 /**
278397 * Returns true if seq1 is later than seq2.
....@@ -282,428 +401,211 @@
282401 return (s32)(seq1 - seq2) >= 0;
283402 }
284403
285
-static inline bool
286
-__i915_request_completed(const struct i915_request *rq, u32 seqno)
404
+static inline u32 __hwsp_seqno(const struct i915_request *rq)
287405 {
288
- GEM_BUG_ON(!seqno);
289
- return i915_seqno_passed(intel_engine_get_seqno(rq->engine), seqno) &&
290
- seqno == i915_request_global_seqno(rq);
406
+ const u32 *hwsp = READ_ONCE(rq->hwsp_seqno);
407
+
408
+ return READ_ONCE(*hwsp);
409
+}
410
+
411
+/**
412
+ * hwsp_seqno - the current breadcrumb value in the HW status page
413
+ * @rq: the request, to chase the relevant HW status page
414
+ *
415
+ * The emphasis in naming here is that hwsp_seqno() is not a property of the
416
+ * request, but an indication of the current HW state (associated with this
417
+ * request). Its value will change as the GPU executes more requests.
418
+ *
419
+ * Returns the current breadcrumb value in the associated HW status page (or
420
+ * the local timeline's equivalent) for this request. The request itself
421
+ * has the associated breadcrumb value of rq->fence.seqno, when the HW
422
+ * status page has that breadcrumb or later, this request is complete.
423
+ */
424
+static inline u32 hwsp_seqno(const struct i915_request *rq)
425
+{
426
+ u32 seqno;
427
+
428
+ rcu_read_lock(); /* the HWSP may be freed at runtime */
429
+ seqno = __hwsp_seqno(rq);
430
+ rcu_read_unlock();
431
+
432
+ return seqno;
433
+}
434
+
435
+static inline bool __i915_request_has_started(const struct i915_request *rq)
436
+{
437
+ return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1);
438
+}
439
+
440
+/**
441
+ * i915_request_started - check if the request has begun being executed
442
+ * @rq: the request
443
+ *
444
+ * If the timeline is not using initial breadcrumbs, a request is
445
+ * considered started if the previous request on its timeline (i.e.
446
+ * context) has been signaled.
447
+ *
448
+ * If the timeline is using semaphores, it will also be emitting an
449
+ * "initial breadcrumb" after the semaphores are complete and just before
450
+ * it began executing the user payload. A request can therefore be active
451
+ * on the HW and not yet started as it is still busywaiting on its
452
+ * dependencies (via HW semaphores).
453
+ *
454
+ * If the request has started, its dependencies will have been signaled
455
+ * (either by fences or by semaphores) and it will have begun processing
456
+ * the user payload.
457
+ *
458
+ * However, even if a request has started, it may have been preempted and
459
+ * so no longer active, or it may have already completed.
460
+ *
461
+ * See also i915_request_is_active().
462
+ *
463
+ * Returns true if the request has begun executing the user payload, or
464
+ * has completed:
465
+ */
466
+static inline bool i915_request_started(const struct i915_request *rq)
467
+{
468
+ bool result;
469
+
470
+ if (i915_request_signaled(rq))
471
+ return true;
472
+
473
+ result = true;
474
+ rcu_read_lock(); /* the HWSP may be freed at runtime */
475
+ if (likely(!i915_request_signaled(rq)))
476
+ /* Remember: started but may have since been preempted! */
477
+ result = __i915_request_has_started(rq);
478
+ rcu_read_unlock();
479
+
480
+ return result;
481
+}
482
+
483
+/**
484
+ * i915_request_is_running - check if the request may actually be executing
485
+ * @rq: the request
486
+ *
487
+ * Returns true if the request is currently submitted to hardware, has passed
488
+ * its start point (i.e. the context is setup and not busywaiting). Note that
489
+ * it may no longer be running by the time the function returns!
490
+ */
491
+static inline bool i915_request_is_running(const struct i915_request *rq)
492
+{
493
+ bool result;
494
+
495
+ if (!i915_request_is_active(rq))
496
+ return false;
497
+
498
+ rcu_read_lock();
499
+ result = __i915_request_has_started(rq) && i915_request_is_active(rq);
500
+ rcu_read_unlock();
501
+
502
+ return result;
503
+}
504
+
505
+/**
506
+ * i915_request_is_ready - check if the request is ready for execution
507
+ * @rq: the request
508
+ *
509
+ * Upon construction, the request is instructed to wait upon various
510
+ * signals before it is ready to be executed by the HW. That is, we do
511
+ * not want to start execution and read data before it is written. In practice,
512
+ * this is controlled with a mixture of interrupts and semaphores. Once
513
+ * the submit fence is completed, the backend scheduler will place the
514
+ * request into its queue and from there submit it for execution. So we
515
+ * can detect when a request is eligible for execution (and is under control
516
+ * of the scheduler) by querying where it is in any of the scheduler's lists.
517
+ *
518
+ * Returns true if the request is ready for execution (it may be inflight),
519
+ * false otherwise.
520
+ */
521
+static inline bool i915_request_is_ready(const struct i915_request *rq)
522
+{
523
+ return !list_empty(&rq->sched.link);
524
+}
525
+
526
+static inline bool __i915_request_is_complete(const struct i915_request *rq)
527
+{
528
+ return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
291529 }
292530
293531 static inline bool i915_request_completed(const struct i915_request *rq)
294532 {
295
- u32 seqno;
533
+ bool result;
296534
297
- seqno = i915_request_global_seqno(rq);
298
- if (!seqno)
299
- return false;
535
+ if (i915_request_signaled(rq))
536
+ return true;
300537
301
- return __i915_request_completed(rq, seqno);
302
-}
303
-
304
-static inline bool i915_request_started(const struct i915_request *rq)
305
-{
306
- u32 seqno;
307
-
308
- seqno = i915_request_global_seqno(rq);
309
- if (!seqno)
310
- return false;
311
-
312
- return i915_seqno_passed(intel_engine_get_seqno(rq->engine),
313
- seqno - 1);
314
-}
315
-
316
-static inline bool i915_sched_node_signaled(const struct i915_sched_node *node)
317
-{
318
- const struct i915_request *rq =
319
- container_of(node, const struct i915_request, sched);
320
-
321
- return i915_request_completed(rq);
322
-}
323
-
324
-void i915_retire_requests(struct drm_i915_private *i915);
325
-
326
-/*
327
- * We treat requests as fences. This is not be to confused with our
328
- * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
329
- * We use the fences to synchronize access from the CPU with activity on the
330
- * GPU, for example, we should not rewrite an object's PTE whilst the GPU
331
- * is reading them. We also track fences at a higher level to provide
332
- * implicit synchronisation around GEM objects, e.g. set-domain will wait
333
- * for outstanding GPU rendering before marking the object ready for CPU
334
- * access, or a pageflip will wait until the GPU is complete before showing
335
- * the frame on the scanout.
336
- *
337
- * In order to use a fence, the object must track the fence it needs to
338
- * serialise with. For example, GEM objects want to track both read and
339
- * write access so that we can perform concurrent read operations between
340
- * the CPU and GPU engines, as well as waiting for all rendering to
341
- * complete, or waiting for the last GPU user of a "fence register". The
342
- * object then embeds a #i915_gem_active to track the most recent (in
343
- * retirement order) request relevant for the desired mode of access.
344
- * The #i915_gem_active is updated with i915_gem_active_set() to track the
345
- * most recent fence request, typically this is done as part of
346
- * i915_vma_move_to_active().
347
- *
348
- * When the #i915_gem_active completes (is retired), it will
349
- * signal its completion to the owner through a callback as well as mark
350
- * itself as idle (i915_gem_active.request == NULL). The owner
351
- * can then perform any action, such as delayed freeing of an active
352
- * resource including itself.
353
- */
354
-struct i915_gem_active;
355
-
356
-typedef void (*i915_gem_retire_fn)(struct i915_gem_active *,
357
- struct i915_request *);
358
-
359
-struct i915_gem_active {
360
- struct i915_request __rcu *request;
361
- struct list_head link;
362
- i915_gem_retire_fn retire;
363
-};
364
-
365
-void i915_gem_retire_noop(struct i915_gem_active *,
366
- struct i915_request *request);
367
-
368
-/**
369
- * init_request_active - prepares the activity tracker for use
370
- * @active - the active tracker
371
- * @func - a callback when then the tracker is retired (becomes idle),
372
- * can be NULL
373
- *
374
- * init_request_active() prepares the embedded @active struct for use as
375
- * an activity tracker, that is for tracking the last known active request
376
- * associated with it. When the last request becomes idle, when it is retired
377
- * after completion, the optional callback @func is invoked.
378
- */
379
-static inline void
380
-init_request_active(struct i915_gem_active *active,
381
- i915_gem_retire_fn retire)
382
-{
383
- RCU_INIT_POINTER(active->request, NULL);
384
- INIT_LIST_HEAD(&active->link);
385
- active->retire = retire ?: i915_gem_retire_noop;
386
-}
387
-
388
-/**
389
- * i915_gem_active_set - updates the tracker to watch the current request
390
- * @active - the active tracker
391
- * @request - the request to watch
392
- *
393
- * i915_gem_active_set() watches the given @request for completion. Whilst
394
- * that @request is busy, the @active reports busy. When that @request is
395
- * retired, the @active tracker is updated to report idle.
396
- */
397
-static inline void
398
-i915_gem_active_set(struct i915_gem_active *active,
399
- struct i915_request *request)
400
-{
401
- list_move(&active->link, &request->active_list);
402
- rcu_assign_pointer(active->request, request);
403
-}
404
-
405
-/**
406
- * i915_gem_active_set_retire_fn - updates the retirement callback
407
- * @active - the active tracker
408
- * @fn - the routine called when the request is retired
409
- * @mutex - struct_mutex used to guard retirements
410
- *
411
- * i915_gem_active_set_retire_fn() updates the function pointer that
412
- * is called when the final request associated with the @active tracker
413
- * is retired.
414
- */
415
-static inline void
416
-i915_gem_active_set_retire_fn(struct i915_gem_active *active,
417
- i915_gem_retire_fn fn,
418
- struct mutex *mutex)
419
-{
420
- lockdep_assert_held(mutex);
421
- active->retire = fn ?: i915_gem_retire_noop;
422
-}
423
-
424
-static inline struct i915_request *
425
-__i915_gem_active_peek(const struct i915_gem_active *active)
426
-{
427
- /*
428
- * Inside the error capture (running with the driver in an unknown
429
- * state), we want to bend the rules slightly (a lot).
430
- *
431
- * Work is in progress to make it safer, in the meantime this keeps
432
- * the known issue from spamming the logs.
433
- */
434
- return rcu_dereference_protected(active->request, 1);
435
-}
436
-
437
-/**
438
- * i915_gem_active_raw - return the active request
439
- * @active - the active tracker
440
- *
441
- * i915_gem_active_raw() returns the current request being tracked, or NULL.
442
- * It does not obtain a reference on the request for the caller, so the caller
443
- * must hold struct_mutex.
444
- */
445
-static inline struct i915_request *
446
-i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex)
447
-{
448
- return rcu_dereference_protected(active->request,
449
- lockdep_is_held(mutex));
450
-}
451
-
452
-/**
453
- * i915_gem_active_peek - report the active request being monitored
454
- * @active - the active tracker
455
- *
456
- * i915_gem_active_peek() returns the current request being tracked if
457
- * still active, or NULL. It does not obtain a reference on the request
458
- * for the caller, so the caller must hold struct_mutex.
459
- */
460
-static inline struct i915_request *
461
-i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
462
-{
463
- struct i915_request *request;
464
-
465
- request = i915_gem_active_raw(active, mutex);
466
- if (!request || i915_request_completed(request))
467
- return NULL;
468
-
469
- return request;
470
-}
471
-
472
-/**
473
- * i915_gem_active_get - return a reference to the active request
474
- * @active - the active tracker
475
- *
476
- * i915_gem_active_get() returns a reference to the active request, or NULL
477
- * if the active tracker is idle. The caller must hold struct_mutex.
478
- */
479
-static inline struct i915_request *
480
-i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex)
481
-{
482
- return i915_request_get(i915_gem_active_peek(active, mutex));
483
-}
484
-
485
-/**
486
- * __i915_gem_active_get_rcu - return a reference to the active request
487
- * @active - the active tracker
488
- *
489
- * __i915_gem_active_get() returns a reference to the active request, or NULL
490
- * if the active tracker is idle. The caller must hold the RCU read lock, but
491
- * the returned pointer is safe to use outside of RCU.
492
- */
493
-static inline struct i915_request *
494
-__i915_gem_active_get_rcu(const struct i915_gem_active *active)
495
-{
496
- /*
497
- * Performing a lockless retrieval of the active request is super
498
- * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
499
- * slab of request objects will not be freed whilst we hold the
500
- * RCU read lock. It does not guarantee that the request itself
501
- * will not be freed and then *reused*. Viz,
502
- *
503
- * Thread A Thread B
504
- *
505
- * rq = active.request
506
- * retire(rq) -> free(rq);
507
- * (rq is now first on the slab freelist)
508
- * active.request = NULL
509
- *
510
- * rq = new submission on a new object
511
- * ref(rq)
512
- *
513
- * To prevent the request from being reused whilst the caller
514
- * uses it, we take a reference like normal. Whilst acquiring
515
- * the reference we check that it is not in a destroyed state
516
- * (refcnt == 0). That prevents the request being reallocated
517
- * whilst the caller holds on to it. To check that the request
518
- * was not reallocated as we acquired the reference we have to
519
- * check that our request remains the active request across
520
- * the lookup, in the same manner as a seqlock. The visibility
521
- * of the pointer versus the reference counting is controlled
522
- * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
523
- *
524
- * In the middle of all that, we inspect whether the request is
525
- * complete. Retiring is lazy so the request may be completed long
526
- * before the active tracker is updated. Querying whether the
527
- * request is complete is far cheaper (as it involves no locked
528
- * instructions setting cachelines to exclusive) than acquiring
529
- * the reference, so we do it first. The RCU read lock ensures the
530
- * pointer dereference is valid, but does not ensure that the
531
- * seqno nor HWS is the right one! However, if the request was
532
- * reallocated, that means the active tracker's request was complete.
533
- * If the new request is also complete, then both are and we can
534
- * just report the active tracker is idle. If the new request is
535
- * incomplete, then we acquire a reference on it and check that
536
- * it remained the active request.
537
- *
538
- * It is then imperative that we do not zero the request on
539
- * reallocation, so that we can chase the dangling pointers!
540
- * See i915_request_alloc().
541
- */
542
- do {
543
- struct i915_request *request;
544
-
545
- request = rcu_dereference(active->request);
546
- if (!request || i915_request_completed(request))
547
- return NULL;
548
-
549
- /*
550
- * An especially silly compiler could decide to recompute the
551
- * result of i915_request_completed, more specifically
552
- * re-emit the load for request->fence.seqno. A race would catch
553
- * a later seqno value, which could flip the result from true to
554
- * false. Which means part of the instructions below might not
555
- * be executed, while later on instructions are executed. Due to
556
- * barriers within the refcounting the inconsistency can't reach
557
- * past the call to i915_request_get_rcu, but not executing
558
- * that while still executing i915_request_put() creates
559
- * havoc enough. Prevent this with a compiler barrier.
560
- */
561
- barrier();
562
-
563
- request = i915_request_get_rcu(request);
564
-
565
- /*
566
- * What stops the following rcu_access_pointer() from occurring
567
- * before the above i915_request_get_rcu()? If we were
568
- * to read the value before pausing to get the reference to
569
- * the request, we may not notice a change in the active
570
- * tracker.
571
- *
572
- * The rcu_access_pointer() is a mere compiler barrier, which
573
- * means both the CPU and compiler are free to perform the
574
- * memory read without constraint. The compiler only has to
575
- * ensure that any operations after the rcu_access_pointer()
576
- * occur afterwards in program order. This means the read may
577
- * be performed earlier by an out-of-order CPU, or adventurous
578
- * compiler.
579
- *
580
- * The atomic operation at the heart of
581
- * i915_request_get_rcu(), see dma_fence_get_rcu(), is
582
- * atomic_inc_not_zero() which is only a full memory barrier
583
- * when successful. That is, if i915_request_get_rcu()
584
- * returns the request (and so with the reference counted
585
- * incremented) then the following read for rcu_access_pointer()
586
- * must occur after the atomic operation and so confirm
587
- * that this request is the one currently being tracked.
588
- *
589
- * The corresponding write barrier is part of
590
- * rcu_assign_pointer().
591
- */
592
- if (!request || request == rcu_access_pointer(active->request))
593
- return rcu_pointer_handoff(request);
594
-
595
- i915_request_put(request);
596
- } while (1);
597
-}
598
-
599
-/**
600
- * i915_gem_active_get_unlocked - return a reference to the active request
601
- * @active - the active tracker
602
- *
603
- * i915_gem_active_get_unlocked() returns a reference to the active request,
604
- * or NULL if the active tracker is idle. The reference is obtained under RCU,
605
- * so no locking is required by the caller.
606
- *
607
- * The reference should be freed with i915_request_put().
608
- */
609
-static inline struct i915_request *
610
-i915_gem_active_get_unlocked(const struct i915_gem_active *active)
611
-{
612
- struct i915_request *request;
613
-
614
- rcu_read_lock();
615
- request = __i915_gem_active_get_rcu(active);
538
+ result = true;
539
+ rcu_read_lock(); /* the HWSP may be freed at runtime */
540
+ if (likely(!i915_request_signaled(rq)))
541
+ result = __i915_request_is_complete(rq);
616542 rcu_read_unlock();
617543
618
- return request;
544
+ return result;
619545 }
620546
621
-/**
622
- * i915_gem_active_isset - report whether the active tracker is assigned
623
- * @active - the active tracker
624
- *
625
- * i915_gem_active_isset() returns true if the active tracker is currently
626
- * assigned to a request. Due to the lazy retiring, that request may be idle
627
- * and this may report stale information.
628
- */
629
-static inline bool
630
-i915_gem_active_isset(const struct i915_gem_active *active)
547
+static inline void i915_request_mark_complete(struct i915_request *rq)
631548 {
632
- return rcu_access_pointer(active->request);
549
+ WRITE_ONCE(rq->hwsp_seqno, /* decouple from HWSP */
550
+ (u32 *)&rq->fence.seqno);
633551 }
634552
635
-/**
636
- * i915_gem_active_wait - waits until the request is completed
637
- * @active - the active request on which to wait
638
- * @flags - how to wait
639
- * @timeout - how long to wait at most
640
- * @rps - userspace client to charge for a waitboost
641
- *
642
- * i915_gem_active_wait() waits until the request is completed before
643
- * returning, without requiring any locks to be held. Note that it does not
644
- * retire any requests before returning.
645
- *
646
- * This function relies on RCU in order to acquire the reference to the active
647
- * request without holding any locks. See __i915_gem_active_get_rcu() for the
648
- * glory details on how that is managed. Once the reference is acquired, we
649
- * can then wait upon the request, and afterwards release our reference,
650
- * free of any locking.
651
- *
652
- * This function wraps i915_request_wait(), see it for the full details on
653
- * the arguments.
654
- *
655
- * Returns 0 if successful, or a negative error code.
656
- */
657
-static inline int
658
-i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags)
553
+static inline bool i915_request_has_waitboost(const struct i915_request *rq)
659554 {
660
- struct i915_request *request;
661
- long ret = 0;
662
-
663
- request = i915_gem_active_get_unlocked(active);
664
- if (request) {
665
- ret = i915_request_wait(request, flags, MAX_SCHEDULE_TIMEOUT);
666
- i915_request_put(request);
667
- }
668
-
669
- return ret < 0 ? ret : 0;
555
+ return test_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
670556 }
671557
672
-/**
673
- * i915_gem_active_retire - waits until the request is retired
674
- * @active - the active request on which to wait
675
- *
676
- * i915_gem_active_retire() waits until the request is completed,
677
- * and then ensures that at least the retirement handler for this
678
- * @active tracker is called before returning. If the @active
679
- * tracker is idle, the function returns immediately.
680
- */
681
-static inline int __must_check
682
-i915_gem_active_retire(struct i915_gem_active *active,
683
- struct mutex *mutex)
558
+static inline bool i915_request_has_nopreempt(const struct i915_request *rq)
684559 {
685
- struct i915_request *request;
686
- long ret;
687
-
688
- request = i915_gem_active_raw(active, mutex);
689
- if (!request)
690
- return 0;
691
-
692
- ret = i915_request_wait(request,
693
- I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
694
- MAX_SCHEDULE_TIMEOUT);
695
- if (ret < 0)
696
- return ret;
697
-
698
- list_del_init(&active->link);
699
- RCU_INIT_POINTER(active->request, NULL);
700
-
701
- active->retire(active, request);
702
-
703
- return 0;
560
+ /* Preemption should only be disabled very rarely */
561
+ return unlikely(test_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags));
704562 }
705563
706
-#define for_each_active(mask, idx) \
707
- for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx))
564
+static inline bool i915_request_has_sentinel(const struct i915_request *rq)
565
+{
566
+ return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags));
567
+}
568
+
569
+static inline bool i915_request_on_hold(const struct i915_request *rq)
570
+{
571
+ return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags));
572
+}
573
+
574
+static inline void i915_request_set_hold(struct i915_request *rq)
575
+{
576
+ set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
577
+}
578
+
579
+static inline void i915_request_clear_hold(struct i915_request *rq)
580
+{
581
+ clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
582
+}
583
+
584
+static inline struct intel_timeline *
585
+i915_request_timeline(const struct i915_request *rq)
586
+{
587
+ /* Valid only while the request is being constructed (or retired). */
588
+ return rcu_dereference_protected(rq->timeline,
589
+ lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex));
590
+}
591
+
592
+static inline struct i915_gem_context *
593
+i915_request_gem_context(const struct i915_request *rq)
594
+{
595
+ /* Valid only while the request is being constructed (or retired). */
596
+ return rcu_dereference_protected(rq->context->gem_context, true);
597
+}
598
+
599
+static inline struct intel_timeline *
600
+i915_request_active_timeline(const struct i915_request *rq)
601
+{
602
+ /*
603
+ * When in use during submission, we are protected by a guarantee that
604
+ * the context/timeline is pinned and must remain pinned until after
605
+ * this submission.
606
+ */
607
+ return rcu_dereference_protected(rq->timeline,
608
+ lockdep_is_held(&rq->engine->active.lock));
609
+}
708610
709611 #endif /* I915_REQUEST_H */