| .. | .. |
|---|
| 22 | 22 | * |
|---|
| 23 | 23 | */ |
|---|
| 24 | 24 | |
|---|
| 25 | | -#include <linux/prefetch.h> |
|---|
| 26 | 25 | #include <linux/dma-fence-array.h> |
|---|
| 26 | +#include <linux/dma-fence-chain.h> |
|---|
| 27 | +#include <linux/irq_work.h> |
|---|
| 28 | +#include <linux/prefetch.h> |
|---|
| 27 | 29 | #include <linux/sched.h> |
|---|
| 28 | 30 | #include <linux/sched/clock.h> |
|---|
| 29 | 31 | #include <linux/sched/signal.h> |
|---|
| 30 | 32 | |
|---|
| 33 | +#include "gem/i915_gem_context.h" |
|---|
| 34 | +#include "gt/intel_breadcrumbs.h" |
|---|
| 35 | +#include "gt/intel_context.h" |
|---|
| 36 | +#include "gt/intel_ring.h" |
|---|
| 37 | +#include "gt/intel_rps.h" |
|---|
| 38 | + |
|---|
| 39 | +#include "i915_active.h" |
|---|
| 31 | 40 | #include "i915_drv.h" |
|---|
| 41 | +#include "i915_globals.h" |
|---|
| 42 | +#include "i915_trace.h" |
|---|
| 43 | +#include "intel_pm.h" |
|---|
| 44 | + |
|---|
| 45 | +struct execute_cb { |
|---|
| 46 | + struct irq_work work; |
|---|
| 47 | + struct i915_sw_fence *fence; |
|---|
| 48 | + void (*hook)(struct i915_request *rq, struct dma_fence *signal); |
|---|
| 49 | + struct i915_request *signal; |
|---|
| 50 | +}; |
|---|
| 51 | + |
|---|
| 52 | +static struct i915_global_request { |
|---|
| 53 | + struct i915_global base; |
|---|
| 54 | + struct kmem_cache *slab_requests; |
|---|
| 55 | + struct kmem_cache *slab_execute_cbs; |
|---|
| 56 | +} global; |
|---|
| 32 | 57 | |
|---|
| 33 | 58 | static const char *i915_fence_get_driver_name(struct dma_fence *fence) |
|---|
| 34 | 59 | { |
|---|
| 35 | | - return "i915"; |
|---|
| 60 | + return dev_name(to_request(fence)->engine->i915->drm.dev); |
|---|
| 36 | 61 | } |
|---|
| 37 | 62 | |
|---|
| 38 | 63 | static const char *i915_fence_get_timeline_name(struct dma_fence *fence) |
|---|
| 39 | 64 | { |
|---|
| 65 | + const struct i915_gem_context *ctx; |
|---|
| 66 | + |
|---|
| 40 | 67 | /* |
|---|
| 41 | 68 | * The timeline struct (as part of the ppgtt underneath a context) |
|---|
| 42 | 69 | * may be freed when the request is no longer in use by the GPU. |
|---|
| .. | .. |
|---|
| 49 | 76 | if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) |
|---|
| 50 | 77 | return "signaled"; |
|---|
| 51 | 78 | |
|---|
| 52 | | - return to_request(fence)->timeline->name; |
|---|
| 79 | + ctx = i915_request_gem_context(to_request(fence)); |
|---|
| 80 | + if (!ctx) |
|---|
| 81 | + return "[" DRIVER_NAME "]"; |
|---|
| 82 | + |
|---|
| 83 | + return ctx->name; |
|---|
| 53 | 84 | } |
|---|
| 54 | 85 | |
|---|
| 55 | 86 | static bool i915_fence_signaled(struct dma_fence *fence) |
|---|
| .. | .. |
|---|
| 59 | 90 | |
|---|
| 60 | 91 | static bool i915_fence_enable_signaling(struct dma_fence *fence) |
|---|
| 61 | 92 | { |
|---|
| 62 | | - return intel_engine_enable_signaling(to_request(fence), true); |
|---|
| 93 | + return i915_request_enable_breadcrumb(to_request(fence)); |
|---|
| 63 | 94 | } |
|---|
| 64 | 95 | |
|---|
| 65 | 96 | static signed long i915_fence_wait(struct dma_fence *fence, |
|---|
| 66 | 97 | bool interruptible, |
|---|
| 67 | 98 | signed long timeout) |
|---|
| 68 | 99 | { |
|---|
| 69 | | - return i915_request_wait(to_request(fence), interruptible, timeout); |
|---|
| 100 | + return i915_request_wait(to_request(fence), |
|---|
| 101 | + interruptible | I915_WAIT_PRIORITY, |
|---|
| 102 | + timeout); |
|---|
| 103 | +} |
|---|
| 104 | + |
|---|
| 105 | +struct kmem_cache *i915_request_slab_cache(void) |
|---|
| 106 | +{ |
|---|
| 107 | + return global.slab_requests; |
|---|
| 70 | 108 | } |
|---|
| 71 | 109 | |
|---|
| 72 | 110 | static void i915_fence_release(struct dma_fence *fence) |
|---|
| .. | .. |
|---|
| 81 | 119 | * caught trying to reuse dead objects. |
|---|
| 82 | 120 | */ |
|---|
| 83 | 121 | i915_sw_fence_fini(&rq->submit); |
|---|
| 122 | + i915_sw_fence_fini(&rq->semaphore); |
|---|
| 84 | 123 | |
|---|
| 85 | | - kmem_cache_free(rq->i915->requests, rq); |
|---|
| 124 | + /* |
|---|
| 125 | + * Keep one request on each engine for reserved use under mempressure |
|---|
| 126 | + * |
|---|
| 127 | + * We do not hold a reference to the engine here and so have to be |
|---|
| 128 | + * very careful in what rq->engine we poke. The virtual engine is |
|---|
| 129 | + * referenced via the rq->context and we released that ref during |
|---|
| 130 | + * i915_request_retire(), ergo we must not dereference a virtual |
|---|
| 131 | + * engine here. Not that we would want to, as the only consumer of |
|---|
| 132 | + * the reserved engine->request_pool is the power management parking, |
|---|
| 133 | + * which must-not-fail, and that is only run on the physical engines. |
|---|
| 134 | + * |
|---|
| 135 | + * Since the request must have been executed to be have completed, |
|---|
| 136 | + * we know that it will have been processed by the HW and will |
|---|
| 137 | + * not be unsubmitted again, so rq->engine and rq->execution_mask |
|---|
| 138 | + * at this point is stable. rq->execution_mask will be a single |
|---|
| 139 | + * bit if the last and _only_ engine it could execution on was a |
|---|
| 140 | + * physical engine, if it's multiple bits then it started on and |
|---|
| 141 | + * could still be on a virtual engine. Thus if the mask is not a |
|---|
| 142 | + * power-of-two we assume that rq->engine may still be a virtual |
|---|
| 143 | + * engine and so a dangling invalid pointer that we cannot dereference |
|---|
| 144 | + * |
|---|
| 145 | + * For example, consider the flow of a bonded request through a virtual |
|---|
| 146 | + * engine. The request is created with a wide engine mask (all engines |
|---|
| 147 | + * that we might execute on). On processing the bond, the request mask |
|---|
| 148 | + * is reduced to one or more engines. If the request is subsequently |
|---|
| 149 | + * bound to a single engine, it will then be constrained to only |
|---|
| 150 | + * execute on that engine and never returned to the virtual engine |
|---|
| 151 | + * after timeslicing away, see __unwind_incomplete_requests(). Thus we |
|---|
| 152 | + * know that if the rq->execution_mask is a single bit, rq->engine |
|---|
| 153 | + * can be a physical engine with the exact corresponding mask. |
|---|
| 154 | + */ |
|---|
| 155 | + if (is_power_of_2(rq->execution_mask) && |
|---|
| 156 | + !cmpxchg(&rq->engine->request_pool, NULL, rq)) |
|---|
| 157 | + return; |
|---|
| 158 | + |
|---|
| 159 | + kmem_cache_free(global.slab_requests, rq); |
|---|
| 86 | 160 | } |
|---|
| 87 | 161 | |
|---|
| 88 | 162 | const struct dma_fence_ops i915_fence_ops = { |
|---|
| .. | .. |
|---|
| 94 | 168 | .release = i915_fence_release, |
|---|
| 95 | 169 | }; |
|---|
| 96 | 170 | |
|---|
| 97 | | -static inline void |
|---|
| 98 | | -i915_request_remove_from_client(struct i915_request *request) |
|---|
| 171 | +static void irq_execute_cb(struct irq_work *wrk) |
|---|
| 99 | 172 | { |
|---|
| 100 | | - struct drm_i915_file_private *file_priv; |
|---|
| 173 | + struct execute_cb *cb = container_of(wrk, typeof(*cb), work); |
|---|
| 101 | 174 | |
|---|
| 102 | | - file_priv = request->file_priv; |
|---|
| 103 | | - if (!file_priv) |
|---|
| 175 | + i915_sw_fence_complete(cb->fence); |
|---|
| 176 | + kmem_cache_free(global.slab_execute_cbs, cb); |
|---|
| 177 | +} |
|---|
| 178 | + |
|---|
| 179 | +static void irq_execute_cb_hook(struct irq_work *wrk) |
|---|
| 180 | +{ |
|---|
| 181 | + struct execute_cb *cb = container_of(wrk, typeof(*cb), work); |
|---|
| 182 | + |
|---|
| 183 | + cb->hook(container_of(cb->fence, struct i915_request, submit), |
|---|
| 184 | + &cb->signal->fence); |
|---|
| 185 | + i915_request_put(cb->signal); |
|---|
| 186 | + |
|---|
| 187 | + irq_execute_cb(wrk); |
|---|
| 188 | +} |
|---|
| 189 | + |
|---|
| 190 | +static __always_inline void |
|---|
| 191 | +__notify_execute_cb(struct i915_request *rq, bool (*fn)(struct irq_work *wrk)) |
|---|
| 192 | +{ |
|---|
| 193 | + struct execute_cb *cb, *cn; |
|---|
| 194 | + |
|---|
| 195 | + if (llist_empty(&rq->execute_cb)) |
|---|
| 104 | 196 | return; |
|---|
| 105 | 197 | |
|---|
| 106 | | - spin_lock(&file_priv->mm.lock); |
|---|
| 107 | | - if (request->file_priv) { |
|---|
| 108 | | - list_del(&request->client_link); |
|---|
| 109 | | - request->file_priv = NULL; |
|---|
| 198 | + llist_for_each_entry_safe(cb, cn, |
|---|
| 199 | + llist_del_all(&rq->execute_cb), |
|---|
| 200 | + work.llnode) |
|---|
| 201 | + fn(&cb->work); |
|---|
| 202 | +} |
|---|
| 203 | + |
|---|
| 204 | +static void __notify_execute_cb_irq(struct i915_request *rq) |
|---|
| 205 | +{ |
|---|
| 206 | + __notify_execute_cb(rq, irq_work_queue); |
|---|
| 207 | +} |
|---|
| 208 | + |
|---|
| 209 | +static bool irq_work_imm(struct irq_work *wrk) |
|---|
| 210 | +{ |
|---|
| 211 | + wrk->func(wrk); |
|---|
| 212 | + return false; |
|---|
| 213 | +} |
|---|
| 214 | + |
|---|
| 215 | +static void __notify_execute_cb_imm(struct i915_request *rq) |
|---|
| 216 | +{ |
|---|
| 217 | + __notify_execute_cb(rq, irq_work_imm); |
|---|
| 218 | +} |
|---|
| 219 | + |
|---|
| 220 | +static void free_capture_list(struct i915_request *request) |
|---|
| 221 | +{ |
|---|
| 222 | + struct i915_capture_list *capture; |
|---|
| 223 | + |
|---|
| 224 | + capture = fetch_and_zero(&request->capture_list); |
|---|
| 225 | + while (capture) { |
|---|
| 226 | + struct i915_capture_list *next = capture->next; |
|---|
| 227 | + |
|---|
| 228 | + kfree(capture); |
|---|
| 229 | + capture = next; |
|---|
| 110 | 230 | } |
|---|
| 111 | | - spin_unlock(&file_priv->mm.lock); |
|---|
| 112 | 231 | } |
|---|
| 113 | 232 | |
|---|
| 114 | | -static struct i915_dependency * |
|---|
| 115 | | -i915_dependency_alloc(struct drm_i915_private *i915) |
|---|
| 233 | +static void __i915_request_fill(struct i915_request *rq, u8 val) |
|---|
| 116 | 234 | { |
|---|
| 117 | | - return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); |
|---|
| 235 | + void *vaddr = rq->ring->vaddr; |
|---|
| 236 | + u32 head; |
|---|
| 237 | + |
|---|
| 238 | + head = rq->infix; |
|---|
| 239 | + if (rq->postfix < head) { |
|---|
| 240 | + memset(vaddr + head, val, rq->ring->size - head); |
|---|
| 241 | + head = 0; |
|---|
| 242 | + } |
|---|
| 243 | + memset(vaddr + head, val, rq->postfix - head); |
|---|
| 118 | 244 | } |
|---|
| 119 | 245 | |
|---|
| 120 | | -static void |
|---|
| 121 | | -i915_dependency_free(struct drm_i915_private *i915, |
|---|
| 122 | | - struct i915_dependency *dep) |
|---|
| 246 | +static void remove_from_engine(struct i915_request *rq) |
|---|
| 123 | 247 | { |
|---|
| 124 | | - kmem_cache_free(i915->dependencies, dep); |
|---|
| 125 | | -} |
|---|
| 126 | | - |
|---|
| 127 | | -static void |
|---|
| 128 | | -__i915_sched_node_add_dependency(struct i915_sched_node *node, |
|---|
| 129 | | - struct i915_sched_node *signal, |
|---|
| 130 | | - struct i915_dependency *dep, |
|---|
| 131 | | - unsigned long flags) |
|---|
| 132 | | -{ |
|---|
| 133 | | - INIT_LIST_HEAD(&dep->dfs_link); |
|---|
| 134 | | - list_add(&dep->wait_link, &signal->waiters_list); |
|---|
| 135 | | - list_add(&dep->signal_link, &node->signalers_list); |
|---|
| 136 | | - dep->signaler = signal; |
|---|
| 137 | | - dep->flags = flags; |
|---|
| 138 | | -} |
|---|
| 139 | | - |
|---|
| 140 | | -static int |
|---|
| 141 | | -i915_sched_node_add_dependency(struct drm_i915_private *i915, |
|---|
| 142 | | - struct i915_sched_node *node, |
|---|
| 143 | | - struct i915_sched_node *signal) |
|---|
| 144 | | -{ |
|---|
| 145 | | - struct i915_dependency *dep; |
|---|
| 146 | | - |
|---|
| 147 | | - dep = i915_dependency_alloc(i915); |
|---|
| 148 | | - if (!dep) |
|---|
| 149 | | - return -ENOMEM; |
|---|
| 150 | | - |
|---|
| 151 | | - __i915_sched_node_add_dependency(node, signal, dep, |
|---|
| 152 | | - I915_DEPENDENCY_ALLOC); |
|---|
| 153 | | - return 0; |
|---|
| 154 | | -} |
|---|
| 155 | | - |
|---|
| 156 | | -static void |
|---|
| 157 | | -i915_sched_node_fini(struct drm_i915_private *i915, |
|---|
| 158 | | - struct i915_sched_node *node) |
|---|
| 159 | | -{ |
|---|
| 160 | | - struct i915_dependency *dep, *tmp; |
|---|
| 161 | | - |
|---|
| 162 | | - GEM_BUG_ON(!list_empty(&node->link)); |
|---|
| 248 | + struct intel_engine_cs *engine, *locked; |
|---|
| 163 | 249 | |
|---|
| 164 | 250 | /* |
|---|
| 165 | | - * Everyone we depended upon (the fences we wait to be signaled) |
|---|
| 166 | | - * should retire before us and remove themselves from our list. |
|---|
| 167 | | - * However, retirement is run independently on each timeline and |
|---|
| 168 | | - * so we may be called out-of-order. |
|---|
| 251 | + * Virtual engines complicate acquiring the engine timeline lock, |
|---|
| 252 | + * as their rq->engine pointer is not stable until under that |
|---|
| 253 | + * engine lock. The simple ploy we use is to take the lock then |
|---|
| 254 | + * check that the rq still belongs to the newly locked engine. |
|---|
| 169 | 255 | */ |
|---|
| 170 | | - list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { |
|---|
| 171 | | - GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler)); |
|---|
| 172 | | - GEM_BUG_ON(!list_empty(&dep->dfs_link)); |
|---|
| 173 | | - |
|---|
| 174 | | - list_del(&dep->wait_link); |
|---|
| 175 | | - if (dep->flags & I915_DEPENDENCY_ALLOC) |
|---|
| 176 | | - i915_dependency_free(i915, dep); |
|---|
| 256 | + locked = READ_ONCE(rq->engine); |
|---|
| 257 | + spin_lock_irq(&locked->active.lock); |
|---|
| 258 | + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { |
|---|
| 259 | + spin_unlock(&locked->active.lock); |
|---|
| 260 | + spin_lock(&engine->active.lock); |
|---|
| 261 | + locked = engine; |
|---|
| 177 | 262 | } |
|---|
| 263 | + list_del_init(&rq->sched.link); |
|---|
| 178 | 264 | |
|---|
| 179 | | - /* Remove ourselves from everyone who depends upon us */ |
|---|
| 180 | | - list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { |
|---|
| 181 | | - GEM_BUG_ON(dep->signaler != node); |
|---|
| 182 | | - GEM_BUG_ON(!list_empty(&dep->dfs_link)); |
|---|
| 265 | + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); |
|---|
| 266 | + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); |
|---|
| 183 | 267 | |
|---|
| 184 | | - list_del(&dep->signal_link); |
|---|
| 185 | | - if (dep->flags & I915_DEPENDENCY_ALLOC) |
|---|
| 186 | | - i915_dependency_free(i915, dep); |
|---|
| 187 | | - } |
|---|
| 268 | + /* Prevent further __await_execution() registering a cb, then flush */ |
|---|
| 269 | + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); |
|---|
| 270 | + |
|---|
| 271 | + spin_unlock_irq(&locked->active.lock); |
|---|
| 272 | + |
|---|
| 273 | + __notify_execute_cb_imm(rq); |
|---|
| 188 | 274 | } |
|---|
| 189 | 275 | |
|---|
| 190 | | -static void |
|---|
| 191 | | -i915_sched_node_init(struct i915_sched_node *node) |
|---|
| 276 | +bool i915_request_retire(struct i915_request *rq) |
|---|
| 192 | 277 | { |
|---|
| 193 | | - INIT_LIST_HEAD(&node->signalers_list); |
|---|
| 194 | | - INIT_LIST_HEAD(&node->waiters_list); |
|---|
| 195 | | - INIT_LIST_HEAD(&node->link); |
|---|
| 196 | | - node->attr.priority = I915_PRIORITY_INVALID; |
|---|
| 197 | | -} |
|---|
| 278 | + if (!i915_request_completed(rq)) |
|---|
| 279 | + return false; |
|---|
| 198 | 280 | |
|---|
| 199 | | -static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) |
|---|
| 200 | | -{ |
|---|
| 201 | | - struct intel_engine_cs *engine; |
|---|
| 202 | | - struct i915_timeline *timeline; |
|---|
| 203 | | - enum intel_engine_id id; |
|---|
| 204 | | - int ret; |
|---|
| 281 | + RQ_TRACE(rq, "\n"); |
|---|
| 205 | 282 | |
|---|
| 206 | | - /* Carefully retire all requests without writing to the rings */ |
|---|
| 207 | | - ret = i915_gem_wait_for_idle(i915, |
|---|
| 208 | | - I915_WAIT_INTERRUPTIBLE | |
|---|
| 209 | | - I915_WAIT_LOCKED, |
|---|
| 210 | | - MAX_SCHEDULE_TIMEOUT); |
|---|
| 211 | | - if (ret) |
|---|
| 212 | | - return ret; |
|---|
| 213 | | - |
|---|
| 214 | | - GEM_BUG_ON(i915->gt.active_requests); |
|---|
| 215 | | - |
|---|
| 216 | | - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ |
|---|
| 217 | | - for_each_engine(engine, i915, id) { |
|---|
| 218 | | - GEM_TRACE("%s seqno %d (current %d) -> %d\n", |
|---|
| 219 | | - engine->name, |
|---|
| 220 | | - engine->timeline.seqno, |
|---|
| 221 | | - intel_engine_get_seqno(engine), |
|---|
| 222 | | - seqno); |
|---|
| 223 | | - |
|---|
| 224 | | - if (!i915_seqno_passed(seqno, engine->timeline.seqno)) { |
|---|
| 225 | | - /* Flush any waiters before we reuse the seqno */ |
|---|
| 226 | | - intel_engine_disarm_breadcrumbs(engine); |
|---|
| 227 | | - intel_engine_init_hangcheck(engine); |
|---|
| 228 | | - GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); |
|---|
| 229 | | - } |
|---|
| 230 | | - |
|---|
| 231 | | - /* Check we are idle before we fiddle with hw state! */ |
|---|
| 232 | | - GEM_BUG_ON(!intel_engine_is_idle(engine)); |
|---|
| 233 | | - GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request)); |
|---|
| 234 | | - |
|---|
| 235 | | - /* Finally reset hw state */ |
|---|
| 236 | | - intel_engine_init_global_seqno(engine, seqno); |
|---|
| 237 | | - engine->timeline.seqno = seqno; |
|---|
| 238 | | - } |
|---|
| 239 | | - |
|---|
| 240 | | - list_for_each_entry(timeline, &i915->gt.timelines, link) |
|---|
| 241 | | - memset(timeline->global_sync, 0, sizeof(timeline->global_sync)); |
|---|
| 242 | | - |
|---|
| 243 | | - i915->gt.request_serial = seqno; |
|---|
| 244 | | - |
|---|
| 245 | | - return 0; |
|---|
| 246 | | -} |
|---|
| 247 | | - |
|---|
| 248 | | -int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) |
|---|
| 249 | | -{ |
|---|
| 250 | | - struct drm_i915_private *i915 = to_i915(dev); |
|---|
| 251 | | - |
|---|
| 252 | | - lockdep_assert_held(&i915->drm.struct_mutex); |
|---|
| 253 | | - |
|---|
| 254 | | - if (seqno == 0) |
|---|
| 255 | | - return -EINVAL; |
|---|
| 256 | | - |
|---|
| 257 | | - /* HWS page needs to be set less than what we will inject to ring */ |
|---|
| 258 | | - return reset_all_global_seqno(i915, seqno - 1); |
|---|
| 259 | | -} |
|---|
| 260 | | - |
|---|
| 261 | | -static int reserve_gt(struct drm_i915_private *i915) |
|---|
| 262 | | -{ |
|---|
| 263 | | - int ret; |
|---|
| 264 | | - |
|---|
| 265 | | - /* |
|---|
| 266 | | - * Reservation is fine until we may need to wrap around |
|---|
| 267 | | - * |
|---|
| 268 | | - * By incrementing the serial for every request, we know that no |
|---|
| 269 | | - * individual engine may exceed that serial (as each is reset to 0 |
|---|
| 270 | | - * on any wrap). This protects even the most pessimistic of migrations |
|---|
| 271 | | - * of every request from all engines onto just one. |
|---|
| 272 | | - */ |
|---|
| 273 | | - while (unlikely(++i915->gt.request_serial == 0)) { |
|---|
| 274 | | - ret = reset_all_global_seqno(i915, 0); |
|---|
| 275 | | - if (ret) { |
|---|
| 276 | | - i915->gt.request_serial--; |
|---|
| 277 | | - return ret; |
|---|
| 278 | | - } |
|---|
| 279 | | - } |
|---|
| 280 | | - |
|---|
| 281 | | - if (!i915->gt.active_requests++) |
|---|
| 282 | | - i915_gem_unpark(i915); |
|---|
| 283 | | - |
|---|
| 284 | | - return 0; |
|---|
| 285 | | -} |
|---|
| 286 | | - |
|---|
| 287 | | -static void unreserve_gt(struct drm_i915_private *i915) |
|---|
| 288 | | -{ |
|---|
| 289 | | - GEM_BUG_ON(!i915->gt.active_requests); |
|---|
| 290 | | - if (!--i915->gt.active_requests) |
|---|
| 291 | | - i915_gem_park(i915); |
|---|
| 292 | | -} |
|---|
| 293 | | - |
|---|
| 294 | | -void i915_gem_retire_noop(struct i915_gem_active *active, |
|---|
| 295 | | - struct i915_request *request) |
|---|
| 296 | | -{ |
|---|
| 297 | | - /* Space left intentionally blank */ |
|---|
| 298 | | -} |
|---|
| 299 | | - |
|---|
| 300 | | -static void advance_ring(struct i915_request *request) |
|---|
| 301 | | -{ |
|---|
| 302 | | - struct intel_ring *ring = request->ring; |
|---|
| 303 | | - unsigned int tail; |
|---|
| 283 | + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); |
|---|
| 284 | + trace_i915_request_retire(rq); |
|---|
| 285 | + i915_request_mark_complete(rq); |
|---|
| 304 | 286 | |
|---|
| 305 | 287 | /* |
|---|
| 306 | 288 | * We know the GPU must have read the request to have |
|---|
| .. | .. |
|---|
| 311 | 293 | * Note this requires that we are always called in request |
|---|
| 312 | 294 | * completion order. |
|---|
| 313 | 295 | */ |
|---|
| 314 | | - GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list)); |
|---|
| 315 | | - if (list_is_last(&request->ring_link, &ring->request_list)) { |
|---|
| 316 | | - /* |
|---|
| 317 | | - * We may race here with execlists resubmitting this request |
|---|
| 318 | | - * as we retire it. The resubmission will move the ring->tail |
|---|
| 319 | | - * forwards (to request->wa_tail). We either read the |
|---|
| 320 | | - * current value that was written to hw, or the value that |
|---|
| 321 | | - * is just about to be. Either works, if we miss the last two |
|---|
| 322 | | - * noops - they are safe to be replayed on a reset. |
|---|
| 323 | | - */ |
|---|
| 324 | | - GEM_TRACE("marking %s as inactive\n", ring->timeline->name); |
|---|
| 325 | | - tail = READ_ONCE(request->tail); |
|---|
| 326 | | - list_del(&ring->active_link); |
|---|
| 327 | | - } else { |
|---|
| 328 | | - tail = request->postfix; |
|---|
| 329 | | - } |
|---|
| 330 | | - list_del_init(&request->ring_link); |
|---|
| 296 | + GEM_BUG_ON(!list_is_first(&rq->link, |
|---|
| 297 | + &i915_request_timeline(rq)->requests)); |
|---|
| 298 | + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) |
|---|
| 299 | + /* Poison before we release our space in the ring */ |
|---|
| 300 | + __i915_request_fill(rq, POISON_FREE); |
|---|
| 301 | + rq->ring->head = rq->postfix; |
|---|
| 331 | 302 | |
|---|
| 332 | | - ring->head = tail; |
|---|
| 333 | | -} |
|---|
| 334 | | - |
|---|
| 335 | | -static void free_capture_list(struct i915_request *request) |
|---|
| 336 | | -{ |
|---|
| 337 | | - struct i915_capture_list *capture; |
|---|
| 338 | | - |
|---|
| 339 | | - capture = request->capture_list; |
|---|
| 340 | | - while (capture) { |
|---|
| 341 | | - struct i915_capture_list *next = capture->next; |
|---|
| 342 | | - |
|---|
| 343 | | - kfree(capture); |
|---|
| 344 | | - capture = next; |
|---|
| 345 | | - } |
|---|
| 346 | | -} |
|---|
| 347 | | - |
|---|
| 348 | | -static void __retire_engine_request(struct intel_engine_cs *engine, |
|---|
| 349 | | - struct i915_request *rq) |
|---|
| 350 | | -{ |
|---|
| 351 | | - GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n", |
|---|
| 352 | | - __func__, engine->name, |
|---|
| 353 | | - rq->fence.context, rq->fence.seqno, |
|---|
| 354 | | - rq->global_seqno, |
|---|
| 355 | | - intel_engine_get_seqno(engine)); |
|---|
| 356 | | - |
|---|
| 357 | | - GEM_BUG_ON(!i915_request_completed(rq)); |
|---|
| 358 | | - |
|---|
| 359 | | - spin_lock_irq(&engine->timeline.lock); |
|---|
| 360 | | - GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests)); |
|---|
| 361 | | - list_del_init(&rq->link); |
|---|
| 362 | | - spin_unlock(&engine->timeline.lock); |
|---|
| 363 | | - |
|---|
| 364 | | - spin_lock(&rq->lock); |
|---|
| 365 | | - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) |
|---|
| 303 | + if (!i915_request_signaled(rq)) { |
|---|
| 304 | + spin_lock_irq(&rq->lock); |
|---|
| 366 | 305 | dma_fence_signal_locked(&rq->fence); |
|---|
| 367 | | - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) |
|---|
| 368 | | - intel_engine_cancel_signaling(rq); |
|---|
| 369 | | - if (rq->waitboost) { |
|---|
| 370 | | - GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); |
|---|
| 371 | | - atomic_dec(&rq->i915->gt_pm.rps.num_waiters); |
|---|
| 306 | + spin_unlock_irq(&rq->lock); |
|---|
| 372 | 307 | } |
|---|
| 373 | | - spin_unlock_irq(&rq->lock); |
|---|
| 308 | + |
|---|
| 309 | + if (i915_request_has_waitboost(rq)) { |
|---|
| 310 | + GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters)); |
|---|
| 311 | + atomic_dec(&rq->engine->gt->rps.num_waiters); |
|---|
| 312 | + } |
|---|
| 374 | 313 | |
|---|
| 375 | 314 | /* |
|---|
| 376 | | - * The backing object for the context is done after switching to the |
|---|
| 377 | | - * *next* context. Therefore we cannot retire the previous context until |
|---|
| 378 | | - * the next context has already started running. However, since we |
|---|
| 379 | | - * cannot take the required locks at i915_request_submit() we |
|---|
| 380 | | - * defer the unpinning of the active context to now, retirement of |
|---|
| 381 | | - * the subsequent request. |
|---|
| 382 | | - */ |
|---|
| 383 | | - if (engine->last_retired_context) |
|---|
| 384 | | - intel_context_unpin(engine->last_retired_context); |
|---|
| 385 | | - engine->last_retired_context = rq->hw_context; |
|---|
| 386 | | -} |
|---|
| 387 | | - |
|---|
| 388 | | -static void __retire_engine_upto(struct intel_engine_cs *engine, |
|---|
| 389 | | - struct i915_request *rq) |
|---|
| 390 | | -{ |
|---|
| 391 | | - struct i915_request *tmp; |
|---|
| 392 | | - |
|---|
| 393 | | - if (list_empty(&rq->link)) |
|---|
| 394 | | - return; |
|---|
| 395 | | - |
|---|
| 396 | | - do { |
|---|
| 397 | | - tmp = list_first_entry(&engine->timeline.requests, |
|---|
| 398 | | - typeof(*tmp), link); |
|---|
| 399 | | - |
|---|
| 400 | | - GEM_BUG_ON(tmp->engine != engine); |
|---|
| 401 | | - __retire_engine_request(engine, tmp); |
|---|
| 402 | | - } while (tmp != rq); |
|---|
| 403 | | -} |
|---|
| 404 | | - |
|---|
| 405 | | -static void i915_request_retire(struct i915_request *request) |
|---|
| 406 | | -{ |
|---|
| 407 | | - struct i915_gem_active *active, *next; |
|---|
| 408 | | - |
|---|
| 409 | | - GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", |
|---|
| 410 | | - request->engine->name, |
|---|
| 411 | | - request->fence.context, request->fence.seqno, |
|---|
| 412 | | - request->global_seqno, |
|---|
| 413 | | - intel_engine_get_seqno(request->engine)); |
|---|
| 414 | | - |
|---|
| 415 | | - lockdep_assert_held(&request->i915->drm.struct_mutex); |
|---|
| 416 | | - GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); |
|---|
| 417 | | - GEM_BUG_ON(!i915_request_completed(request)); |
|---|
| 418 | | - |
|---|
| 419 | | - trace_i915_request_retire(request); |
|---|
| 420 | | - |
|---|
| 421 | | - advance_ring(request); |
|---|
| 422 | | - free_capture_list(request); |
|---|
| 423 | | - |
|---|
| 424 | | - /* |
|---|
| 425 | | - * Walk through the active list, calling retire on each. This allows |
|---|
| 426 | | - * objects to track their GPU activity and mark themselves as idle |
|---|
| 427 | | - * when their *last* active request is completed (updating state |
|---|
| 428 | | - * tracking lists for eviction, active references for GEM, etc). |
|---|
| 315 | + * We only loosely track inflight requests across preemption, |
|---|
| 316 | + * and so we may find ourselves attempting to retire a _completed_ |
|---|
| 317 | + * request that we have removed from the HW and put back on a run |
|---|
| 318 | + * queue. |
|---|
| 429 | 319 | * |
|---|
| 430 | | - * As the ->retire() may free the node, we decouple it first and |
|---|
| 431 | | - * pass along the auxiliary information (to avoid dereferencing |
|---|
| 432 | | - * the node after the callback). |
|---|
| 320 | + * As we set I915_FENCE_FLAG_ACTIVE on the request, this should be |
|---|
| 321 | + * after removing the breadcrumb and signaling it, so that we do not |
|---|
| 322 | + * inadvertently attach the breadcrumb to a completed request. |
|---|
| 433 | 323 | */ |
|---|
| 434 | | - list_for_each_entry_safe(active, next, &request->active_list, link) { |
|---|
| 435 | | - /* |
|---|
| 436 | | - * In microbenchmarks or focusing upon time inside the kernel, |
|---|
| 437 | | - * we may spend an inordinate amount of time simply handling |
|---|
| 438 | | - * the retirement of requests and processing their callbacks. |
|---|
| 439 | | - * Of which, this loop itself is particularly hot due to the |
|---|
| 440 | | - * cache misses when jumping around the list of i915_gem_active. |
|---|
| 441 | | - * So we try to keep this loop as streamlined as possible and |
|---|
| 442 | | - * also prefetch the next i915_gem_active to try and hide |
|---|
| 443 | | - * the likely cache miss. |
|---|
| 444 | | - */ |
|---|
| 445 | | - prefetchw(next); |
|---|
| 324 | + remove_from_engine(rq); |
|---|
| 325 | + GEM_BUG_ON(!llist_empty(&rq->execute_cb)); |
|---|
| 446 | 326 | |
|---|
| 447 | | - INIT_LIST_HEAD(&active->link); |
|---|
| 448 | | - RCU_INIT_POINTER(active->request, NULL); |
|---|
| 327 | + __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */ |
|---|
| 449 | 328 | |
|---|
| 450 | | - active->retire(active, request); |
|---|
| 451 | | - } |
|---|
| 329 | + intel_context_exit(rq->context); |
|---|
| 330 | + intel_context_unpin(rq->context); |
|---|
| 452 | 331 | |
|---|
| 453 | | - i915_request_remove_from_client(request); |
|---|
| 332 | + free_capture_list(rq); |
|---|
| 333 | + i915_sched_node_fini(&rq->sched); |
|---|
| 334 | + i915_request_put(rq); |
|---|
| 454 | 335 | |
|---|
| 455 | | - /* Retirement decays the ban score as it is a sign of ctx progress */ |
|---|
| 456 | | - atomic_dec_if_positive(&request->gem_context->ban_score); |
|---|
| 457 | | - intel_context_unpin(request->hw_context); |
|---|
| 458 | | - |
|---|
| 459 | | - __retire_engine_upto(request->engine, request); |
|---|
| 460 | | - |
|---|
| 461 | | - unreserve_gt(request->i915); |
|---|
| 462 | | - |
|---|
| 463 | | - i915_sched_node_fini(request->i915, &request->sched); |
|---|
| 464 | | - i915_request_put(request); |
|---|
| 336 | + return true; |
|---|
| 465 | 337 | } |
|---|
| 466 | 338 | |
|---|
| 467 | 339 | void i915_request_retire_upto(struct i915_request *rq) |
|---|
| 468 | 340 | { |
|---|
| 469 | | - struct intel_ring *ring = rq->ring; |
|---|
| 341 | + struct intel_timeline * const tl = i915_request_timeline(rq); |
|---|
| 470 | 342 | struct i915_request *tmp; |
|---|
| 471 | 343 | |
|---|
| 472 | | - GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", |
|---|
| 473 | | - rq->engine->name, |
|---|
| 474 | | - rq->fence.context, rq->fence.seqno, |
|---|
| 475 | | - rq->global_seqno, |
|---|
| 476 | | - intel_engine_get_seqno(rq->engine)); |
|---|
| 344 | + RQ_TRACE(rq, "\n"); |
|---|
| 477 | 345 | |
|---|
| 478 | | - lockdep_assert_held(&rq->i915->drm.struct_mutex); |
|---|
| 479 | 346 | GEM_BUG_ON(!i915_request_completed(rq)); |
|---|
| 480 | 347 | |
|---|
| 481 | | - if (list_empty(&rq->ring_link)) |
|---|
| 348 | + do { |
|---|
| 349 | + tmp = list_first_entry(&tl->requests, typeof(*tmp), link); |
|---|
| 350 | + } while (i915_request_retire(tmp) && tmp != rq); |
|---|
| 351 | +} |
|---|
| 352 | + |
|---|
| 353 | +static struct i915_request * const * |
|---|
| 354 | +__engine_active(struct intel_engine_cs *engine) |
|---|
| 355 | +{ |
|---|
| 356 | + return READ_ONCE(engine->execlists.active); |
|---|
| 357 | +} |
|---|
| 358 | + |
|---|
| 359 | +static bool __request_in_flight(const struct i915_request *signal) |
|---|
| 360 | +{ |
|---|
| 361 | + struct i915_request * const *port, *rq; |
|---|
| 362 | + bool inflight = false; |
|---|
| 363 | + |
|---|
| 364 | + if (!i915_request_is_ready(signal)) |
|---|
| 365 | + return false; |
|---|
| 366 | + |
|---|
| 367 | + /* |
|---|
| 368 | + * Even if we have unwound the request, it may still be on |
|---|
| 369 | + * the GPU (preempt-to-busy). If that request is inside an |
|---|
| 370 | + * unpreemptible critical section, it will not be removed. Some |
|---|
| 371 | + * GPU functions may even be stuck waiting for the paired request |
|---|
| 372 | + * (__await_execution) to be submitted and cannot be preempted |
|---|
| 373 | + * until the bond is executing. |
|---|
| 374 | + * |
|---|
| 375 | + * As we know that there are always preemption points between |
|---|
| 376 | + * requests, we know that only the currently executing request |
|---|
| 377 | + * may be still active even though we have cleared the flag. |
|---|
| 378 | + * However, we can't rely on our tracking of ELSP[0] to know |
|---|
| 379 | + * which request is currently active and so maybe stuck, as |
|---|
| 380 | + * the tracking maybe an event behind. Instead assume that |
|---|
| 381 | + * if the context is still inflight, then it is still active |
|---|
| 382 | + * even if the active flag has been cleared. |
|---|
| 383 | + * |
|---|
| 384 | + * To further complicate matters, if there a pending promotion, the HW |
|---|
| 385 | + * may either perform a context switch to the second inflight execlists, |
|---|
| 386 | + * or it may switch to the pending set of execlists. In the case of the |
|---|
| 387 | + * latter, it may send the ACK and we process the event copying the |
|---|
| 388 | + * pending[] over top of inflight[], _overwriting_ our *active. Since |
|---|
| 389 | + * this implies the HW is arbitrating and not struck in *active, we do |
|---|
| 390 | + * not worry about complete accuracy, but we do require no read/write |
|---|
| 391 | + * tearing of the pointer [the read of the pointer must be valid, even |
|---|
| 392 | + * as the array is being overwritten, for which we require the writes |
|---|
| 393 | + * to avoid tearing.] |
|---|
| 394 | + * |
|---|
| 395 | + * Note that the read of *execlists->active may race with the promotion |
|---|
| 396 | + * of execlists->pending[] to execlists->inflight[], overwritting |
|---|
| 397 | + * the value at *execlists->active. This is fine. The promotion implies |
|---|
| 398 | + * that we received an ACK from the HW, and so the context is not |
|---|
| 399 | + * stuck -- if we do not see ourselves in *active, the inflight status |
|---|
| 400 | + * is valid. If instead we see ourselves being copied into *active, |
|---|
| 401 | + * we are inflight and may signal the callback. |
|---|
| 402 | + */ |
|---|
| 403 | + if (!intel_context_inflight(signal->context)) |
|---|
| 404 | + return false; |
|---|
| 405 | + |
|---|
| 406 | + rcu_read_lock(); |
|---|
| 407 | + for (port = __engine_active(signal->engine); |
|---|
| 408 | + (rq = READ_ONCE(*port)); /* may race with promotion of pending[] */ |
|---|
| 409 | + port++) { |
|---|
| 410 | + if (rq->context == signal->context) { |
|---|
| 411 | + inflight = i915_seqno_passed(rq->fence.seqno, |
|---|
| 412 | + signal->fence.seqno); |
|---|
| 413 | + break; |
|---|
| 414 | + } |
|---|
| 415 | + } |
|---|
| 416 | + rcu_read_unlock(); |
|---|
| 417 | + |
|---|
| 418 | + return inflight; |
|---|
| 419 | +} |
|---|
| 420 | + |
|---|
| 421 | +static int |
|---|
| 422 | +__await_execution(struct i915_request *rq, |
|---|
| 423 | + struct i915_request *signal, |
|---|
| 424 | + void (*hook)(struct i915_request *rq, |
|---|
| 425 | + struct dma_fence *signal), |
|---|
| 426 | + gfp_t gfp) |
|---|
| 427 | +{ |
|---|
| 428 | + struct execute_cb *cb; |
|---|
| 429 | + |
|---|
| 430 | + if (i915_request_is_active(signal)) { |
|---|
| 431 | + if (hook) |
|---|
| 432 | + hook(rq, &signal->fence); |
|---|
| 433 | + return 0; |
|---|
| 434 | + } |
|---|
| 435 | + |
|---|
| 436 | + cb = kmem_cache_alloc(global.slab_execute_cbs, gfp); |
|---|
| 437 | + if (!cb) |
|---|
| 438 | + return -ENOMEM; |
|---|
| 439 | + |
|---|
| 440 | + cb->fence = &rq->submit; |
|---|
| 441 | + i915_sw_fence_await(cb->fence); |
|---|
| 442 | + init_irq_work(&cb->work, irq_execute_cb); |
|---|
| 443 | + |
|---|
| 444 | + if (hook) { |
|---|
| 445 | + cb->hook = hook; |
|---|
| 446 | + cb->signal = i915_request_get(signal); |
|---|
| 447 | + cb->work.func = irq_execute_cb_hook; |
|---|
| 448 | + } |
|---|
| 449 | + |
|---|
| 450 | + /* |
|---|
| 451 | + * Register the callback first, then see if the signaler is already |
|---|
| 452 | + * active. This ensures that if we race with the |
|---|
| 453 | + * __notify_execute_cb from i915_request_submit() and we are not |
|---|
| 454 | + * included in that list, we get a second bite of the cherry and |
|---|
| 455 | + * execute it ourselves. After this point, a future |
|---|
| 456 | + * i915_request_submit() will notify us. |
|---|
| 457 | + * |
|---|
| 458 | + * In i915_request_retire() we set the ACTIVE bit on a completed |
|---|
| 459 | + * request (then flush the execute_cb). So by registering the |
|---|
| 460 | + * callback first, then checking the ACTIVE bit, we serialise with |
|---|
| 461 | + * the completed/retired request. |
|---|
| 462 | + */ |
|---|
| 463 | + if (llist_add(&cb->work.llnode, &signal->execute_cb)) { |
|---|
| 464 | + if (i915_request_is_active(signal) || |
|---|
| 465 | + __request_in_flight(signal)) |
|---|
| 466 | + __notify_execute_cb_imm(signal); |
|---|
| 467 | + } |
|---|
| 468 | + |
|---|
| 469 | + return 0; |
|---|
| 470 | +} |
|---|
| 471 | + |
|---|
| 472 | +static bool fatal_error(int error) |
|---|
| 473 | +{ |
|---|
| 474 | + switch (error) { |
|---|
| 475 | + case 0: /* not an error! */ |
|---|
| 476 | + case -EAGAIN: /* innocent victim of a GT reset (__i915_request_reset) */ |
|---|
| 477 | + case -ETIMEDOUT: /* waiting for Godot (timer_i915_sw_fence_wake) */ |
|---|
| 478 | + return false; |
|---|
| 479 | + default: |
|---|
| 480 | + return true; |
|---|
| 481 | + } |
|---|
| 482 | +} |
|---|
| 483 | + |
|---|
| 484 | +void __i915_request_skip(struct i915_request *rq) |
|---|
| 485 | +{ |
|---|
| 486 | + GEM_BUG_ON(!fatal_error(rq->fence.error)); |
|---|
| 487 | + |
|---|
| 488 | + if (rq->infix == rq->postfix) |
|---|
| 482 | 489 | return; |
|---|
| 483 | 490 | |
|---|
| 491 | + /* |
|---|
| 492 | + * As this request likely depends on state from the lost |
|---|
| 493 | + * context, clear out all the user operations leaving the |
|---|
| 494 | + * breadcrumb at the end (so we get the fence notifications). |
|---|
| 495 | + */ |
|---|
| 496 | + __i915_request_fill(rq, 0); |
|---|
| 497 | + rq->infix = rq->postfix; |
|---|
| 498 | +} |
|---|
| 499 | + |
|---|
| 500 | +void i915_request_set_error_once(struct i915_request *rq, int error) |
|---|
| 501 | +{ |
|---|
| 502 | + int old; |
|---|
| 503 | + |
|---|
| 504 | + GEM_BUG_ON(!IS_ERR_VALUE((long)error)); |
|---|
| 505 | + |
|---|
| 506 | + if (i915_request_signaled(rq)) |
|---|
| 507 | + return; |
|---|
| 508 | + |
|---|
| 509 | + old = READ_ONCE(rq->fence.error); |
|---|
| 484 | 510 | do { |
|---|
| 485 | | - tmp = list_first_entry(&ring->request_list, |
|---|
| 486 | | - typeof(*tmp), ring_link); |
|---|
| 487 | | - |
|---|
| 488 | | - i915_request_retire(tmp); |
|---|
| 489 | | - } while (tmp != rq); |
|---|
| 511 | + if (fatal_error(old)) |
|---|
| 512 | + return; |
|---|
| 513 | + } while (!try_cmpxchg(&rq->fence.error, &old, error)); |
|---|
| 490 | 514 | } |
|---|
| 491 | 515 | |
|---|
| 492 | | -static u32 timeline_get_seqno(struct i915_timeline *tl) |
|---|
| 493 | | -{ |
|---|
| 494 | | - return ++tl->seqno; |
|---|
| 495 | | -} |
|---|
| 496 | | - |
|---|
| 497 | | -static void move_to_timeline(struct i915_request *request, |
|---|
| 498 | | - struct i915_timeline *timeline) |
|---|
| 499 | | -{ |
|---|
| 500 | | - GEM_BUG_ON(request->timeline == &request->engine->timeline); |
|---|
| 501 | | - lockdep_assert_held(&request->engine->timeline.lock); |
|---|
| 502 | | - |
|---|
| 503 | | - spin_lock(&request->timeline->lock); |
|---|
| 504 | | - list_move_tail(&request->link, &timeline->requests); |
|---|
| 505 | | - spin_unlock(&request->timeline->lock); |
|---|
| 506 | | -} |
|---|
| 507 | | - |
|---|
| 508 | | -void __i915_request_submit(struct i915_request *request) |
|---|
| 516 | +bool __i915_request_submit(struct i915_request *request) |
|---|
| 509 | 517 | { |
|---|
| 510 | 518 | struct intel_engine_cs *engine = request->engine; |
|---|
| 511 | | - u32 seqno; |
|---|
| 519 | + bool result = false; |
|---|
| 512 | 520 | |
|---|
| 513 | | - GEM_TRACE("%s fence %llx:%d -> global=%d, current %d\n", |
|---|
| 514 | | - engine->name, |
|---|
| 515 | | - request->fence.context, request->fence.seqno, |
|---|
| 516 | | - engine->timeline.seqno + 1, |
|---|
| 517 | | - intel_engine_get_seqno(engine)); |
|---|
| 521 | + RQ_TRACE(request, "\n"); |
|---|
| 518 | 522 | |
|---|
| 519 | 523 | GEM_BUG_ON(!irqs_disabled()); |
|---|
| 520 | | - lockdep_assert_held(&engine->timeline.lock); |
|---|
| 524 | + lockdep_assert_held(&engine->active.lock); |
|---|
| 521 | 525 | |
|---|
| 522 | | - GEM_BUG_ON(request->global_seqno); |
|---|
| 526 | + /* |
|---|
| 527 | + * With the advent of preempt-to-busy, we frequently encounter |
|---|
| 528 | + * requests that we have unsubmitted from HW, but left running |
|---|
| 529 | + * until the next ack and so have completed in the meantime. On |
|---|
| 530 | + * resubmission of that completed request, we can skip |
|---|
| 531 | + * updating the payload, and execlists can even skip submitting |
|---|
| 532 | + * the request. |
|---|
| 533 | + * |
|---|
| 534 | + * We must remove the request from the caller's priority queue, |
|---|
| 535 | + * and the caller must only call us when the request is in their |
|---|
| 536 | + * priority queue, under the active.lock. This ensures that the |
|---|
| 537 | + * request has *not* yet been retired and we can safely move |
|---|
| 538 | + * the request into the engine->active.list where it will be |
|---|
| 539 | + * dropped upon retiring. (Otherwise if resubmit a *retired* |
|---|
| 540 | + * request, this would be a horrible use-after-free.) |
|---|
| 541 | + */ |
|---|
| 542 | + if (i915_request_completed(request)) |
|---|
| 543 | + goto xfer; |
|---|
| 523 | 544 | |
|---|
| 524 | | - seqno = timeline_get_seqno(&engine->timeline); |
|---|
| 525 | | - GEM_BUG_ON(!seqno); |
|---|
| 526 | | - GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); |
|---|
| 545 | + if (unlikely(intel_context_is_closed(request->context) && |
|---|
| 546 | + !intel_engine_has_heartbeat(engine))) |
|---|
| 547 | + intel_context_set_banned(request->context); |
|---|
| 527 | 548 | |
|---|
| 528 | | - /* We may be recursing from the signal callback of another i915 fence */ |
|---|
| 529 | | - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); |
|---|
| 530 | | - request->global_seqno = seqno; |
|---|
| 531 | | - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) |
|---|
| 532 | | - intel_engine_enable_signaling(request, false); |
|---|
| 533 | | - spin_unlock(&request->lock); |
|---|
| 549 | + if (unlikely(intel_context_is_banned(request->context))) |
|---|
| 550 | + i915_request_set_error_once(request, -EIO); |
|---|
| 534 | 551 | |
|---|
| 535 | | - engine->emit_breadcrumb(request, |
|---|
| 536 | | - request->ring->vaddr + request->postfix); |
|---|
| 552 | + if (unlikely(fatal_error(request->fence.error))) |
|---|
| 553 | + __i915_request_skip(request); |
|---|
| 537 | 554 | |
|---|
| 538 | | - /* Transfer from per-context onto the global per-engine timeline */ |
|---|
| 539 | | - move_to_timeline(request, &engine->timeline); |
|---|
| 555 | + /* |
|---|
| 556 | + * Are we using semaphores when the gpu is already saturated? |
|---|
| 557 | + * |
|---|
| 558 | + * Using semaphores incurs a cost in having the GPU poll a |
|---|
| 559 | + * memory location, busywaiting for it to change. The continual |
|---|
| 560 | + * memory reads can have a noticeable impact on the rest of the |
|---|
| 561 | + * system with the extra bus traffic, stalling the cpu as it too |
|---|
| 562 | + * tries to access memory across the bus (perf stat -e bus-cycles). |
|---|
| 563 | + * |
|---|
| 564 | + * If we installed a semaphore on this request and we only submit |
|---|
| 565 | + * the request after the signaler completed, that indicates the |
|---|
| 566 | + * system is overloaded and using semaphores at this time only |
|---|
| 567 | + * increases the amount of work we are doing. If so, we disable |
|---|
| 568 | + * further use of semaphores until we are idle again, whence we |
|---|
| 569 | + * optimistically try again. |
|---|
| 570 | + */ |
|---|
| 571 | + if (request->sched.semaphores && |
|---|
| 572 | + i915_sw_fence_signaled(&request->semaphore)) |
|---|
| 573 | + engine->saturated |= request->sched.semaphores; |
|---|
| 574 | + |
|---|
| 575 | + engine->emit_fini_breadcrumb(request, |
|---|
| 576 | + request->ring->vaddr + request->postfix); |
|---|
| 540 | 577 | |
|---|
| 541 | 578 | trace_i915_request_execute(request); |
|---|
| 579 | + engine->serial++; |
|---|
| 580 | + result = true; |
|---|
| 542 | 581 | |
|---|
| 543 | | - wake_up_all(&request->execute); |
|---|
| 582 | +xfer: |
|---|
| 583 | + if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { |
|---|
| 584 | + list_move_tail(&request->sched.link, &engine->active.requests); |
|---|
| 585 | + clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); |
|---|
| 586 | + } |
|---|
| 587 | + |
|---|
| 588 | + /* |
|---|
| 589 | + * XXX Rollback bonded-execution on __i915_request_unsubmit()? |
|---|
| 590 | + * |
|---|
| 591 | + * In the future, perhaps when we have an active time-slicing scheduler, |
|---|
| 592 | + * it will be interesting to unsubmit parallel execution and remove |
|---|
| 593 | + * busywaits from the GPU until their master is restarted. This is |
|---|
| 594 | + * quite hairy, we have to carefully rollback the fence and do a |
|---|
| 595 | + * preempt-to-idle cycle on the target engine, all the while the |
|---|
| 596 | + * master execute_cb may refire. |
|---|
| 597 | + */ |
|---|
| 598 | + __notify_execute_cb_irq(request); |
|---|
| 599 | + |
|---|
| 600 | + /* We may be recursing from the signal callback of another i915 fence */ |
|---|
| 601 | + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) |
|---|
| 602 | + i915_request_enable_breadcrumb(request); |
|---|
| 603 | + |
|---|
| 604 | + return result; |
|---|
| 544 | 605 | } |
|---|
| 545 | 606 | |
|---|
| 546 | 607 | void i915_request_submit(struct i915_request *request) |
|---|
| .. | .. |
|---|
| 549 | 610 | unsigned long flags; |
|---|
| 550 | 611 | |
|---|
| 551 | 612 | /* Will be called from irq-context when using foreign fences. */ |
|---|
| 552 | | - spin_lock_irqsave(&engine->timeline.lock, flags); |
|---|
| 613 | + spin_lock_irqsave(&engine->active.lock, flags); |
|---|
| 553 | 614 | |
|---|
| 554 | 615 | __i915_request_submit(request); |
|---|
| 555 | 616 | |
|---|
| 556 | | - spin_unlock_irqrestore(&engine->timeline.lock, flags); |
|---|
| 617 | + spin_unlock_irqrestore(&engine->active.lock, flags); |
|---|
| 557 | 618 | } |
|---|
| 558 | 619 | |
|---|
| 559 | 620 | void __i915_request_unsubmit(struct i915_request *request) |
|---|
| 560 | 621 | { |
|---|
| 561 | 622 | struct intel_engine_cs *engine = request->engine; |
|---|
| 562 | 623 | |
|---|
| 563 | | - GEM_TRACE("%s fence %llx:%d <- global=%d, current %d\n", |
|---|
| 564 | | - engine->name, |
|---|
| 565 | | - request->fence.context, request->fence.seqno, |
|---|
| 566 | | - request->global_seqno, |
|---|
| 567 | | - intel_engine_get_seqno(engine)); |
|---|
| 568 | | - |
|---|
| 569 | | - GEM_BUG_ON(!irqs_disabled()); |
|---|
| 570 | | - lockdep_assert_held(&engine->timeline.lock); |
|---|
| 571 | | - |
|---|
| 572 | 624 | /* |
|---|
| 573 | 625 | * Only unwind in reverse order, required so that the per-context list |
|---|
| 574 | 626 | * is kept in seqno/ring order. |
|---|
| 575 | 627 | */ |
|---|
| 576 | | - GEM_BUG_ON(!request->global_seqno); |
|---|
| 577 | | - GEM_BUG_ON(request->global_seqno != engine->timeline.seqno); |
|---|
| 578 | | - GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), |
|---|
| 579 | | - request->global_seqno)); |
|---|
| 580 | | - engine->timeline.seqno--; |
|---|
| 628 | + RQ_TRACE(request, "\n"); |
|---|
| 581 | 629 | |
|---|
| 582 | | - /* We may be recursing from the signal callback of another i915 fence */ |
|---|
| 583 | | - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); |
|---|
| 584 | | - request->global_seqno = 0; |
|---|
| 630 | + GEM_BUG_ON(!irqs_disabled()); |
|---|
| 631 | + lockdep_assert_held(&engine->active.lock); |
|---|
| 632 | + |
|---|
| 633 | + /* |
|---|
| 634 | + * Before we remove this breadcrumb from the signal list, we have |
|---|
| 635 | + * to ensure that a concurrent dma_fence_enable_signaling() does not |
|---|
| 636 | + * attach itself. We first mark the request as no longer active and |
|---|
| 637 | + * make sure that is visible to other cores, and then remove the |
|---|
| 638 | + * breadcrumb if attached. |
|---|
| 639 | + */ |
|---|
| 640 | + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); |
|---|
| 641 | + clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); |
|---|
| 585 | 642 | if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) |
|---|
| 586 | | - intel_engine_cancel_signaling(request); |
|---|
| 587 | | - spin_unlock(&request->lock); |
|---|
| 643 | + i915_request_cancel_breadcrumb(request); |
|---|
| 588 | 644 | |
|---|
| 589 | | - /* Transfer back from the global per-engine timeline to per-context */ |
|---|
| 590 | | - move_to_timeline(request, request->timeline); |
|---|
| 645 | + /* We've already spun, don't charge on resubmitting. */ |
|---|
| 646 | + if (request->sched.semaphores && i915_request_started(request)) |
|---|
| 647 | + request->sched.semaphores = 0; |
|---|
| 591 | 648 | |
|---|
| 592 | 649 | /* |
|---|
| 593 | 650 | * We don't need to wake_up any waiters on request->execute, they |
|---|
| .. | .. |
|---|
| 604 | 661 | unsigned long flags; |
|---|
| 605 | 662 | |
|---|
| 606 | 663 | /* Will be called from irq-context when using foreign fences. */ |
|---|
| 607 | | - spin_lock_irqsave(&engine->timeline.lock, flags); |
|---|
| 664 | + spin_lock_irqsave(&engine->active.lock, flags); |
|---|
| 608 | 665 | |
|---|
| 609 | 666 | __i915_request_unsubmit(request); |
|---|
| 610 | 667 | |
|---|
| 611 | | - spin_unlock_irqrestore(&engine->timeline.lock, flags); |
|---|
| 668 | + spin_unlock_irqrestore(&engine->active.lock, flags); |
|---|
| 612 | 669 | } |
|---|
| 613 | 670 | |
|---|
| 614 | 671 | static int __i915_sw_fence_call |
|---|
| .. | .. |
|---|
| 620 | 677 | switch (state) { |
|---|
| 621 | 678 | case FENCE_COMPLETE: |
|---|
| 622 | 679 | trace_i915_request_submit(request); |
|---|
| 680 | + |
|---|
| 681 | + if (unlikely(fence->error)) |
|---|
| 682 | + i915_request_set_error_once(request, fence->error); |
|---|
| 683 | + |
|---|
| 623 | 684 | /* |
|---|
| 624 | 685 | * We need to serialize use of the submit_request() callback |
|---|
| 625 | 686 | * with its hotplugging performed during an emergency |
|---|
| .. | .. |
|---|
| 641 | 702 | return NOTIFY_DONE; |
|---|
| 642 | 703 | } |
|---|
| 643 | 704 | |
|---|
| 644 | | -/** |
|---|
| 645 | | - * i915_request_alloc - allocate a request structure |
|---|
| 646 | | - * |
|---|
| 647 | | - * @engine: engine that we wish to issue the request on. |
|---|
| 648 | | - * @ctx: context that the request will be associated with. |
|---|
| 649 | | - * |
|---|
| 650 | | - * Returns a pointer to the allocated request if successful, |
|---|
| 651 | | - * or an error code if not. |
|---|
| 652 | | - */ |
|---|
| 653 | | -struct i915_request * |
|---|
| 654 | | -i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) |
|---|
| 705 | +static int __i915_sw_fence_call |
|---|
| 706 | +semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) |
|---|
| 655 | 707 | { |
|---|
| 656 | | - struct drm_i915_private *i915 = engine->i915; |
|---|
| 708 | + struct i915_request *rq = container_of(fence, typeof(*rq), semaphore); |
|---|
| 709 | + |
|---|
| 710 | + switch (state) { |
|---|
| 711 | + case FENCE_COMPLETE: |
|---|
| 712 | + break; |
|---|
| 713 | + |
|---|
| 714 | + case FENCE_FREE: |
|---|
| 715 | + i915_request_put(rq); |
|---|
| 716 | + break; |
|---|
| 717 | + } |
|---|
| 718 | + |
|---|
| 719 | + return NOTIFY_DONE; |
|---|
| 720 | +} |
|---|
| 721 | + |
|---|
| 722 | +static void retire_requests(struct intel_timeline *tl) |
|---|
| 723 | +{ |
|---|
| 724 | + struct i915_request *rq, *rn; |
|---|
| 725 | + |
|---|
| 726 | + list_for_each_entry_safe(rq, rn, &tl->requests, link) |
|---|
| 727 | + if (!i915_request_retire(rq)) |
|---|
| 728 | + break; |
|---|
| 729 | +} |
|---|
| 730 | + |
|---|
| 731 | +static noinline struct i915_request * |
|---|
| 732 | +request_alloc_slow(struct intel_timeline *tl, |
|---|
| 733 | + struct i915_request **rsvd, |
|---|
| 734 | + gfp_t gfp) |
|---|
| 735 | +{ |
|---|
| 657 | 736 | struct i915_request *rq; |
|---|
| 658 | | - struct intel_context *ce; |
|---|
| 659 | | - int ret; |
|---|
| 660 | 737 | |
|---|
| 661 | | - lockdep_assert_held(&i915->drm.struct_mutex); |
|---|
| 738 | + /* If we cannot wait, dip into our reserves */ |
|---|
| 739 | + if (!gfpflags_allow_blocking(gfp)) { |
|---|
| 740 | + rq = xchg(rsvd, NULL); |
|---|
| 741 | + if (!rq) /* Use the normal failure path for one final WARN */ |
|---|
| 742 | + goto out; |
|---|
| 662 | 743 | |
|---|
| 663 | | - /* |
|---|
| 664 | | - * Preempt contexts are reserved for exclusive use to inject a |
|---|
| 665 | | - * preemption context switch. They are never to be used for any trivial |
|---|
| 666 | | - * request! |
|---|
| 667 | | - */ |
|---|
| 668 | | - GEM_BUG_ON(ctx == i915->preempt_context); |
|---|
| 744 | + return rq; |
|---|
| 745 | + } |
|---|
| 669 | 746 | |
|---|
| 670 | | - /* |
|---|
| 671 | | - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report |
|---|
| 672 | | - * EIO if the GPU is already wedged. |
|---|
| 673 | | - */ |
|---|
| 674 | | - if (i915_terminally_wedged(&i915->gpu_error)) |
|---|
| 675 | | - return ERR_PTR(-EIO); |
|---|
| 676 | | - |
|---|
| 677 | | - /* |
|---|
| 678 | | - * Pinning the contexts may generate requests in order to acquire |
|---|
| 679 | | - * GGTT space, so do this first before we reserve a seqno for |
|---|
| 680 | | - * ourselves. |
|---|
| 681 | | - */ |
|---|
| 682 | | - ce = intel_context_pin(ctx, engine); |
|---|
| 683 | | - if (IS_ERR(ce)) |
|---|
| 684 | | - return ERR_CAST(ce); |
|---|
| 685 | | - |
|---|
| 686 | | - ret = reserve_gt(i915); |
|---|
| 687 | | - if (ret) |
|---|
| 688 | | - goto err_unpin; |
|---|
| 689 | | - |
|---|
| 690 | | - ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST); |
|---|
| 691 | | - if (ret) |
|---|
| 692 | | - goto err_unreserve; |
|---|
| 747 | + if (list_empty(&tl->requests)) |
|---|
| 748 | + goto out; |
|---|
| 693 | 749 | |
|---|
| 694 | 750 | /* Move our oldest request to the slab-cache (if not in use!) */ |
|---|
| 695 | | - rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); |
|---|
| 696 | | - if (!list_is_last(&rq->ring_link, &ce->ring->request_list) && |
|---|
| 697 | | - i915_request_completed(rq)) |
|---|
| 698 | | - i915_request_retire(rq); |
|---|
| 751 | + rq = list_first_entry(&tl->requests, typeof(*rq), link); |
|---|
| 752 | + i915_request_retire(rq); |
|---|
| 753 | + |
|---|
| 754 | + rq = kmem_cache_alloc(global.slab_requests, |
|---|
| 755 | + gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); |
|---|
| 756 | + if (rq) |
|---|
| 757 | + return rq; |
|---|
| 758 | + |
|---|
| 759 | + /* Ratelimit ourselves to prevent oom from malicious clients */ |
|---|
| 760 | + rq = list_last_entry(&tl->requests, typeof(*rq), link); |
|---|
| 761 | + cond_synchronize_rcu(rq->rcustate); |
|---|
| 762 | + |
|---|
| 763 | + /* Retire our old requests in the hope that we free some */ |
|---|
| 764 | + retire_requests(tl); |
|---|
| 765 | + |
|---|
| 766 | +out: |
|---|
| 767 | + return kmem_cache_alloc(global.slab_requests, gfp); |
|---|
| 768 | +} |
|---|
| 769 | + |
|---|
| 770 | +static void __i915_request_ctor(void *arg) |
|---|
| 771 | +{ |
|---|
| 772 | + struct i915_request *rq = arg; |
|---|
| 773 | + |
|---|
| 774 | + spin_lock_init(&rq->lock); |
|---|
| 775 | + i915_sched_node_init(&rq->sched); |
|---|
| 776 | + i915_sw_fence_init(&rq->submit, submit_notify); |
|---|
| 777 | + i915_sw_fence_init(&rq->semaphore, semaphore_notify); |
|---|
| 778 | + |
|---|
| 779 | + rq->capture_list = NULL; |
|---|
| 780 | + |
|---|
| 781 | + init_llist_head(&rq->execute_cb); |
|---|
| 782 | +} |
|---|
| 783 | + |
|---|
| 784 | +struct i915_request * |
|---|
| 785 | +__i915_request_create(struct intel_context *ce, gfp_t gfp) |
|---|
| 786 | +{ |
|---|
| 787 | + struct intel_timeline *tl = ce->timeline; |
|---|
| 788 | + struct i915_request *rq; |
|---|
| 789 | + u32 seqno; |
|---|
| 790 | + int ret; |
|---|
| 791 | + |
|---|
| 792 | + might_sleep_if(gfpflags_allow_blocking(gfp)); |
|---|
| 793 | + |
|---|
| 794 | + /* Check that the caller provided an already pinned context */ |
|---|
| 795 | + __intel_context_pin(ce); |
|---|
| 699 | 796 | |
|---|
| 700 | 797 | /* |
|---|
| 701 | 798 | * Beware: Dragons be flying overhead. |
|---|
| .. | .. |
|---|
| 703 | 800 | * We use RCU to look up requests in flight. The lookups may |
|---|
| 704 | 801 | * race with the request being allocated from the slab freelist. |
|---|
| 705 | 802 | * That is the request we are writing to here, may be in the process |
|---|
| 706 | | - * of being read by __i915_gem_active_get_rcu(). As such, |
|---|
| 803 | + * of being read by __i915_active_request_get_rcu(). As such, |
|---|
| 707 | 804 | * we have to be very careful when overwriting the contents. During |
|---|
| 708 | 805 | * the RCU lookup, we change chase the request->engine pointer, |
|---|
| 709 | 806 | * read the request->global_seqno and increment the reference count. |
|---|
| .. | .. |
|---|
| 726 | 823 | * |
|---|
| 727 | 824 | * Do not use kmem_cache_zalloc() here! |
|---|
| 728 | 825 | */ |
|---|
| 729 | | - rq = kmem_cache_alloc(i915->requests, |
|---|
| 730 | | - GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); |
|---|
| 826 | + rq = kmem_cache_alloc(global.slab_requests, |
|---|
| 827 | + gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); |
|---|
| 731 | 828 | if (unlikely(!rq)) { |
|---|
| 732 | | - /* Ratelimit ourselves to prevent oom from malicious clients */ |
|---|
| 733 | | - ret = i915_gem_wait_for_idle(i915, |
|---|
| 734 | | - I915_WAIT_LOCKED | |
|---|
| 735 | | - I915_WAIT_INTERRUPTIBLE, |
|---|
| 736 | | - MAX_SCHEDULE_TIMEOUT); |
|---|
| 737 | | - if (ret) |
|---|
| 738 | | - goto err_unreserve; |
|---|
| 739 | | - |
|---|
| 740 | | - /* |
|---|
| 741 | | - * We've forced the client to stall and catch up with whatever |
|---|
| 742 | | - * backlog there might have been. As we are assuming that we |
|---|
| 743 | | - * caused the mempressure, now is an opportune time to |
|---|
| 744 | | - * recover as much memory from the request pool as is possible. |
|---|
| 745 | | - * Having already penalized the client to stall, we spend |
|---|
| 746 | | - * a little extra time to re-optimise page allocation. |
|---|
| 747 | | - */ |
|---|
| 748 | | - kmem_cache_shrink(i915->requests); |
|---|
| 749 | | - rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */ |
|---|
| 750 | | - |
|---|
| 751 | | - rq = kmem_cache_alloc(i915->requests, GFP_KERNEL); |
|---|
| 829 | + rq = request_alloc_slow(tl, &ce->engine->request_pool, gfp); |
|---|
| 752 | 830 | if (!rq) { |
|---|
| 753 | 831 | ret = -ENOMEM; |
|---|
| 754 | 832 | goto err_unreserve; |
|---|
| 755 | 833 | } |
|---|
| 756 | 834 | } |
|---|
| 757 | 835 | |
|---|
| 758 | | - INIT_LIST_HEAD(&rq->active_list); |
|---|
| 759 | | - rq->i915 = i915; |
|---|
| 760 | | - rq->engine = engine; |
|---|
| 761 | | - rq->gem_context = ctx; |
|---|
| 762 | | - rq->hw_context = ce; |
|---|
| 836 | + rq->context = ce; |
|---|
| 837 | + rq->engine = ce->engine; |
|---|
| 763 | 838 | rq->ring = ce->ring; |
|---|
| 764 | | - rq->timeline = ce->ring->timeline; |
|---|
| 765 | | - GEM_BUG_ON(rq->timeline == &engine->timeline); |
|---|
| 839 | + rq->execution_mask = ce->engine->mask; |
|---|
| 766 | 840 | |
|---|
| 767 | | - spin_lock_init(&rq->lock); |
|---|
| 768 | | - dma_fence_init(&rq->fence, |
|---|
| 769 | | - &i915_fence_ops, |
|---|
| 770 | | - &rq->lock, |
|---|
| 771 | | - rq->timeline->fence_context, |
|---|
| 772 | | - timeline_get_seqno(rq->timeline)); |
|---|
| 841 | + ret = intel_timeline_get_seqno(tl, rq, &seqno); |
|---|
| 842 | + if (ret) |
|---|
| 843 | + goto err_free; |
|---|
| 844 | + |
|---|
| 845 | + dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, |
|---|
| 846 | + tl->fence_context, seqno); |
|---|
| 847 | + |
|---|
| 848 | + RCU_INIT_POINTER(rq->timeline, tl); |
|---|
| 849 | + RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); |
|---|
| 850 | + rq->hwsp_seqno = tl->hwsp_seqno; |
|---|
| 851 | + GEM_BUG_ON(i915_request_completed(rq)); |
|---|
| 852 | + |
|---|
| 853 | + rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ |
|---|
| 773 | 854 | |
|---|
| 774 | 855 | /* We bump the ref for the fence chain */ |
|---|
| 775 | | - i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); |
|---|
| 776 | | - init_waitqueue_head(&rq->execute); |
|---|
| 856 | + i915_sw_fence_reinit(&i915_request_get(rq)->submit); |
|---|
| 857 | + i915_sw_fence_reinit(&i915_request_get(rq)->semaphore); |
|---|
| 777 | 858 | |
|---|
| 778 | | - i915_sched_node_init(&rq->sched); |
|---|
| 859 | + i915_sched_node_reinit(&rq->sched); |
|---|
| 779 | 860 | |
|---|
| 780 | | - /* No zalloc, must clear what we need by hand */ |
|---|
| 781 | | - rq->global_seqno = 0; |
|---|
| 782 | | - rq->signaling.wait.seqno = 0; |
|---|
| 783 | | - rq->file_priv = NULL; |
|---|
| 861 | + /* No zalloc, everything must be cleared after use */ |
|---|
| 784 | 862 | rq->batch = NULL; |
|---|
| 785 | | - rq->capture_list = NULL; |
|---|
| 786 | | - rq->waitboost = false; |
|---|
| 863 | + GEM_BUG_ON(rq->capture_list); |
|---|
| 864 | + GEM_BUG_ON(!llist_empty(&rq->execute_cb)); |
|---|
| 787 | 865 | |
|---|
| 788 | 866 | /* |
|---|
| 789 | 867 | * Reserve space in the ring buffer for all the commands required to |
|---|
| .. | .. |
|---|
| 791 | 869 | * i915_request_add() call can't fail. Note that the reserve may need |
|---|
| 792 | 870 | * to be redone if the request is not actually submitted straight |
|---|
| 793 | 871 | * away, e.g. because a GPU scheduler has deferred it. |
|---|
| 872 | + * |
|---|
| 873 | + * Note that due to how we add reserved_space to intel_ring_begin() |
|---|
| 874 | + * we need to double our request to ensure that if we need to wrap |
|---|
| 875 | + * around inside i915_request_add() there is sufficient space at |
|---|
| 876 | + * the beginning of the ring as well. |
|---|
| 794 | 877 | */ |
|---|
| 795 | | - rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; |
|---|
| 796 | | - GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz); |
|---|
| 878 | + rq->reserved_space = |
|---|
| 879 | + 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32); |
|---|
| 797 | 880 | |
|---|
| 798 | 881 | /* |
|---|
| 799 | 882 | * Record the position of the start of the request so that |
|---|
| .. | .. |
|---|
| 803 | 886 | */ |
|---|
| 804 | 887 | rq->head = rq->ring->emit; |
|---|
| 805 | 888 | |
|---|
| 806 | | - /* Unconditionally invalidate GPU caches and TLBs. */ |
|---|
| 807 | | - ret = engine->emit_flush(rq, EMIT_INVALIDATE); |
|---|
| 889 | + ret = rq->engine->request_alloc(rq); |
|---|
| 808 | 890 | if (ret) |
|---|
| 809 | 891 | goto err_unwind; |
|---|
| 810 | | - |
|---|
| 811 | | - ret = engine->request_alloc(rq); |
|---|
| 812 | | - if (ret) |
|---|
| 813 | | - goto err_unwind; |
|---|
| 814 | | - |
|---|
| 815 | | - /* Keep a second pin for the dual retirement along engine and ring */ |
|---|
| 816 | | - __intel_context_pin(ce); |
|---|
| 817 | 892 | |
|---|
| 818 | 893 | rq->infix = rq->ring->emit; /* end of header; start of user payload */ |
|---|
| 819 | 894 | |
|---|
| 820 | | - /* Check that we didn't interrupt ourselves with a new request */ |
|---|
| 821 | | - GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); |
|---|
| 895 | + intel_context_mark_active(ce); |
|---|
| 896 | + list_add_tail_rcu(&rq->link, &tl->requests); |
|---|
| 897 | + |
|---|
| 822 | 898 | return rq; |
|---|
| 823 | 899 | |
|---|
| 824 | 900 | err_unwind: |
|---|
| 825 | 901 | ce->ring->emit = rq->head; |
|---|
| 826 | 902 | |
|---|
| 827 | 903 | /* Make sure we didn't add ourselves to external state before freeing */ |
|---|
| 828 | | - GEM_BUG_ON(!list_empty(&rq->active_list)); |
|---|
| 829 | 904 | GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); |
|---|
| 830 | 905 | GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); |
|---|
| 831 | 906 | |
|---|
| 832 | | - kmem_cache_free(i915->requests, rq); |
|---|
| 907 | +err_free: |
|---|
| 908 | + kmem_cache_free(global.slab_requests, rq); |
|---|
| 833 | 909 | err_unreserve: |
|---|
| 834 | | - unreserve_gt(i915); |
|---|
| 835 | | -err_unpin: |
|---|
| 836 | 910 | intel_context_unpin(ce); |
|---|
| 837 | 911 | return ERR_PTR(ret); |
|---|
| 912 | +} |
|---|
| 913 | + |
|---|
| 914 | +struct i915_request * |
|---|
| 915 | +i915_request_create(struct intel_context *ce) |
|---|
| 916 | +{ |
|---|
| 917 | + struct i915_request *rq; |
|---|
| 918 | + struct intel_timeline *tl; |
|---|
| 919 | + |
|---|
| 920 | + tl = intel_context_timeline_lock(ce); |
|---|
| 921 | + if (IS_ERR(tl)) |
|---|
| 922 | + return ERR_CAST(tl); |
|---|
| 923 | + |
|---|
| 924 | + /* Move our oldest request to the slab-cache (if not in use!) */ |
|---|
| 925 | + rq = list_first_entry(&tl->requests, typeof(*rq), link); |
|---|
| 926 | + if (!list_is_last(&rq->link, &tl->requests)) |
|---|
| 927 | + i915_request_retire(rq); |
|---|
| 928 | + |
|---|
| 929 | + intel_context_enter(ce); |
|---|
| 930 | + rq = __i915_request_create(ce, GFP_KERNEL); |
|---|
| 931 | + intel_context_exit(ce); /* active reference transferred to request */ |
|---|
| 932 | + if (IS_ERR(rq)) |
|---|
| 933 | + goto err_unlock; |
|---|
| 934 | + |
|---|
| 935 | + /* Check that we do not interrupt ourselves with a new request */ |
|---|
| 936 | + rq->cookie = lockdep_pin_lock(&tl->mutex); |
|---|
| 937 | + |
|---|
| 938 | + return rq; |
|---|
| 939 | + |
|---|
| 940 | +err_unlock: |
|---|
| 941 | + intel_context_timeline_unlock(tl); |
|---|
| 942 | + return rq; |
|---|
| 943 | +} |
|---|
| 944 | + |
|---|
| 945 | +static int |
|---|
| 946 | +i915_request_await_start(struct i915_request *rq, struct i915_request *signal) |
|---|
| 947 | +{ |
|---|
| 948 | + struct dma_fence *fence; |
|---|
| 949 | + int err; |
|---|
| 950 | + |
|---|
| 951 | + if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline)) |
|---|
| 952 | + return 0; |
|---|
| 953 | + |
|---|
| 954 | + if (i915_request_started(signal)) |
|---|
| 955 | + return 0; |
|---|
| 956 | + |
|---|
| 957 | + fence = NULL; |
|---|
| 958 | + rcu_read_lock(); |
|---|
| 959 | + spin_lock_irq(&signal->lock); |
|---|
| 960 | + do { |
|---|
| 961 | + struct list_head *pos = READ_ONCE(signal->link.prev); |
|---|
| 962 | + struct i915_request *prev; |
|---|
| 963 | + |
|---|
| 964 | + /* Confirm signal has not been retired, the link is valid */ |
|---|
| 965 | + if (unlikely(i915_request_started(signal))) |
|---|
| 966 | + break; |
|---|
| 967 | + |
|---|
| 968 | + /* Is signal the earliest request on its timeline? */ |
|---|
| 969 | + if (pos == &rcu_dereference(signal->timeline)->requests) |
|---|
| 970 | + break; |
|---|
| 971 | + |
|---|
| 972 | + /* |
|---|
| 973 | + * Peek at the request before us in the timeline. That |
|---|
| 974 | + * request will only be valid before it is retired, so |
|---|
| 975 | + * after acquiring a reference to it, confirm that it is |
|---|
| 976 | + * still part of the signaler's timeline. |
|---|
| 977 | + */ |
|---|
| 978 | + prev = list_entry(pos, typeof(*prev), link); |
|---|
| 979 | + if (!i915_request_get_rcu(prev)) |
|---|
| 980 | + break; |
|---|
| 981 | + |
|---|
| 982 | + /* After the strong barrier, confirm prev is still attached */ |
|---|
| 983 | + if (unlikely(READ_ONCE(prev->link.next) != &signal->link)) { |
|---|
| 984 | + i915_request_put(prev); |
|---|
| 985 | + break; |
|---|
| 986 | + } |
|---|
| 987 | + |
|---|
| 988 | + fence = &prev->fence; |
|---|
| 989 | + } while (0); |
|---|
| 990 | + spin_unlock_irq(&signal->lock); |
|---|
| 991 | + rcu_read_unlock(); |
|---|
| 992 | + if (!fence) |
|---|
| 993 | + return 0; |
|---|
| 994 | + |
|---|
| 995 | + err = 0; |
|---|
| 996 | + if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence)) |
|---|
| 997 | + err = i915_sw_fence_await_dma_fence(&rq->submit, |
|---|
| 998 | + fence, 0, |
|---|
| 999 | + I915_FENCE_GFP); |
|---|
| 1000 | + dma_fence_put(fence); |
|---|
| 1001 | + |
|---|
| 1002 | + return err; |
|---|
| 1003 | +} |
|---|
| 1004 | + |
|---|
| 1005 | +static intel_engine_mask_t |
|---|
| 1006 | +already_busywaiting(struct i915_request *rq) |
|---|
| 1007 | +{ |
|---|
| 1008 | + /* |
|---|
| 1009 | + * Polling a semaphore causes bus traffic, delaying other users of |
|---|
| 1010 | + * both the GPU and CPU. We want to limit the impact on others, |
|---|
| 1011 | + * while taking advantage of early submission to reduce GPU |
|---|
| 1012 | + * latency. Therefore we restrict ourselves to not using more |
|---|
| 1013 | + * than one semaphore from each source, and not using a semaphore |
|---|
| 1014 | + * if we have detected the engine is saturated (i.e. would not be |
|---|
| 1015 | + * submitted early and cause bus traffic reading an already passed |
|---|
| 1016 | + * semaphore). |
|---|
| 1017 | + * |
|---|
| 1018 | + * See the are-we-too-late? check in __i915_request_submit(). |
|---|
| 1019 | + */ |
|---|
| 1020 | + return rq->sched.semaphores | READ_ONCE(rq->engine->saturated); |
|---|
| 1021 | +} |
|---|
| 1022 | + |
|---|
| 1023 | +static int |
|---|
| 1024 | +__emit_semaphore_wait(struct i915_request *to, |
|---|
| 1025 | + struct i915_request *from, |
|---|
| 1026 | + u32 seqno) |
|---|
| 1027 | +{ |
|---|
| 1028 | + const int has_token = INTEL_GEN(to->engine->i915) >= 12; |
|---|
| 1029 | + u32 hwsp_offset; |
|---|
| 1030 | + int len, err; |
|---|
| 1031 | + u32 *cs; |
|---|
| 1032 | + |
|---|
| 1033 | + GEM_BUG_ON(INTEL_GEN(to->engine->i915) < 8); |
|---|
| 1034 | + GEM_BUG_ON(i915_request_has_initial_breadcrumb(to)); |
|---|
| 1035 | + |
|---|
| 1036 | + /* We need to pin the signaler's HWSP until we are finished reading. */ |
|---|
| 1037 | + err = intel_timeline_read_hwsp(from, to, &hwsp_offset); |
|---|
| 1038 | + if (err) |
|---|
| 1039 | + return err; |
|---|
| 1040 | + |
|---|
| 1041 | + len = 4; |
|---|
| 1042 | + if (has_token) |
|---|
| 1043 | + len += 2; |
|---|
| 1044 | + |
|---|
| 1045 | + cs = intel_ring_begin(to, len); |
|---|
| 1046 | + if (IS_ERR(cs)) |
|---|
| 1047 | + return PTR_ERR(cs); |
|---|
| 1048 | + |
|---|
| 1049 | + /* |
|---|
| 1050 | + * Using greater-than-or-equal here means we have to worry |
|---|
| 1051 | + * about seqno wraparound. To side step that issue, we swap |
|---|
| 1052 | + * the timeline HWSP upon wrapping, so that everyone listening |
|---|
| 1053 | + * for the old (pre-wrap) values do not see the much smaller |
|---|
| 1054 | + * (post-wrap) values than they were expecting (and so wait |
|---|
| 1055 | + * forever). |
|---|
| 1056 | + */ |
|---|
| 1057 | + *cs++ = (MI_SEMAPHORE_WAIT | |
|---|
| 1058 | + MI_SEMAPHORE_GLOBAL_GTT | |
|---|
| 1059 | + MI_SEMAPHORE_POLL | |
|---|
| 1060 | + MI_SEMAPHORE_SAD_GTE_SDD) + |
|---|
| 1061 | + has_token; |
|---|
| 1062 | + *cs++ = seqno; |
|---|
| 1063 | + *cs++ = hwsp_offset; |
|---|
| 1064 | + *cs++ = 0; |
|---|
| 1065 | + if (has_token) { |
|---|
| 1066 | + *cs++ = 0; |
|---|
| 1067 | + *cs++ = MI_NOOP; |
|---|
| 1068 | + } |
|---|
| 1069 | + |
|---|
| 1070 | + intel_ring_advance(to, cs); |
|---|
| 1071 | + return 0; |
|---|
| 1072 | +} |
|---|
| 1073 | + |
|---|
| 1074 | +static int |
|---|
| 1075 | +emit_semaphore_wait(struct i915_request *to, |
|---|
| 1076 | + struct i915_request *from, |
|---|
| 1077 | + gfp_t gfp) |
|---|
| 1078 | +{ |
|---|
| 1079 | + const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask; |
|---|
| 1080 | + struct i915_sw_fence *wait = &to->submit; |
|---|
| 1081 | + |
|---|
| 1082 | + if (!intel_context_use_semaphores(to->context)) |
|---|
| 1083 | + goto await_fence; |
|---|
| 1084 | + |
|---|
| 1085 | + if (i915_request_has_initial_breadcrumb(to)) |
|---|
| 1086 | + goto await_fence; |
|---|
| 1087 | + |
|---|
| 1088 | + if (!rcu_access_pointer(from->hwsp_cacheline)) |
|---|
| 1089 | + goto await_fence; |
|---|
| 1090 | + |
|---|
| 1091 | + /* |
|---|
| 1092 | + * If this or its dependents are waiting on an external fence |
|---|
| 1093 | + * that may fail catastrophically, then we want to avoid using |
|---|
| 1094 | + * sempahores as they bypass the fence signaling metadata, and we |
|---|
| 1095 | + * lose the fence->error propagation. |
|---|
| 1096 | + */ |
|---|
| 1097 | + if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN) |
|---|
| 1098 | + goto await_fence; |
|---|
| 1099 | + |
|---|
| 1100 | + /* Just emit the first semaphore we see as request space is limited. */ |
|---|
| 1101 | + if (already_busywaiting(to) & mask) |
|---|
| 1102 | + goto await_fence; |
|---|
| 1103 | + |
|---|
| 1104 | + if (i915_request_await_start(to, from) < 0) |
|---|
| 1105 | + goto await_fence; |
|---|
| 1106 | + |
|---|
| 1107 | + /* Only submit our spinner after the signaler is running! */ |
|---|
| 1108 | + if (__await_execution(to, from, NULL, gfp)) |
|---|
| 1109 | + goto await_fence; |
|---|
| 1110 | + |
|---|
| 1111 | + if (__emit_semaphore_wait(to, from, from->fence.seqno)) |
|---|
| 1112 | + goto await_fence; |
|---|
| 1113 | + |
|---|
| 1114 | + to->sched.semaphores |= mask; |
|---|
| 1115 | + wait = &to->semaphore; |
|---|
| 1116 | + |
|---|
| 1117 | +await_fence: |
|---|
| 1118 | + return i915_sw_fence_await_dma_fence(wait, |
|---|
| 1119 | + &from->fence, 0, |
|---|
| 1120 | + I915_FENCE_GFP); |
|---|
| 1121 | +} |
|---|
| 1122 | + |
|---|
| 1123 | +static bool intel_timeline_sync_has_start(struct intel_timeline *tl, |
|---|
| 1124 | + struct dma_fence *fence) |
|---|
| 1125 | +{ |
|---|
| 1126 | + return __intel_timeline_sync_is_later(tl, |
|---|
| 1127 | + fence->context, |
|---|
| 1128 | + fence->seqno - 1); |
|---|
| 1129 | +} |
|---|
| 1130 | + |
|---|
| 1131 | +static int intel_timeline_sync_set_start(struct intel_timeline *tl, |
|---|
| 1132 | + const struct dma_fence *fence) |
|---|
| 1133 | +{ |
|---|
| 1134 | + return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); |
|---|
| 1135 | +} |
|---|
| 1136 | + |
|---|
| 1137 | +static int |
|---|
| 1138 | +__i915_request_await_execution(struct i915_request *to, |
|---|
| 1139 | + struct i915_request *from, |
|---|
| 1140 | + void (*hook)(struct i915_request *rq, |
|---|
| 1141 | + struct dma_fence *signal)) |
|---|
| 1142 | +{ |
|---|
| 1143 | + int err; |
|---|
| 1144 | + |
|---|
| 1145 | + GEM_BUG_ON(intel_context_is_barrier(from->context)); |
|---|
| 1146 | + |
|---|
| 1147 | + /* Submit both requests at the same time */ |
|---|
| 1148 | + err = __await_execution(to, from, hook, I915_FENCE_GFP); |
|---|
| 1149 | + if (err) |
|---|
| 1150 | + return err; |
|---|
| 1151 | + |
|---|
| 1152 | + /* Squash repeated depenendices to the same timelines */ |
|---|
| 1153 | + if (intel_timeline_sync_has_start(i915_request_timeline(to), |
|---|
| 1154 | + &from->fence)) |
|---|
| 1155 | + return 0; |
|---|
| 1156 | + |
|---|
| 1157 | + /* |
|---|
| 1158 | + * Wait until the start of this request. |
|---|
| 1159 | + * |
|---|
| 1160 | + * The execution cb fires when we submit the request to HW. But in |
|---|
| 1161 | + * many cases this may be long before the request itself is ready to |
|---|
| 1162 | + * run (consider that we submit 2 requests for the same context, where |
|---|
| 1163 | + * the request of interest is behind an indefinite spinner). So we hook |
|---|
| 1164 | + * up to both to reduce our queues and keep the execution lag minimised |
|---|
| 1165 | + * in the worst case, though we hope that the await_start is elided. |
|---|
| 1166 | + */ |
|---|
| 1167 | + err = i915_request_await_start(to, from); |
|---|
| 1168 | + if (err < 0) |
|---|
| 1169 | + return err; |
|---|
| 1170 | + |
|---|
| 1171 | + /* |
|---|
| 1172 | + * Ensure both start together [after all semaphores in signal] |
|---|
| 1173 | + * |
|---|
| 1174 | + * Now that we are queued to the HW at roughly the same time (thanks |
|---|
| 1175 | + * to the execute cb) and are ready to run at roughly the same time |
|---|
| 1176 | + * (thanks to the await start), our signaler may still be indefinitely |
|---|
| 1177 | + * delayed by waiting on a semaphore from a remote engine. If our |
|---|
| 1178 | + * signaler depends on a semaphore, so indirectly do we, and we do not |
|---|
| 1179 | + * want to start our payload until our signaler also starts theirs. |
|---|
| 1180 | + * So we wait. |
|---|
| 1181 | + * |
|---|
| 1182 | + * However, there is also a second condition for which we need to wait |
|---|
| 1183 | + * for the precise start of the signaler. Consider that the signaler |
|---|
| 1184 | + * was submitted in a chain of requests following another context |
|---|
| 1185 | + * (with just an ordinary intra-engine fence dependency between the |
|---|
| 1186 | + * two). In this case the signaler is queued to HW, but not for |
|---|
| 1187 | + * immediate execution, and so we must wait until it reaches the |
|---|
| 1188 | + * active slot. |
|---|
| 1189 | + */ |
|---|
| 1190 | + if (intel_engine_has_semaphores(to->engine) && |
|---|
| 1191 | + !i915_request_has_initial_breadcrumb(to)) { |
|---|
| 1192 | + err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); |
|---|
| 1193 | + if (err < 0) |
|---|
| 1194 | + return err; |
|---|
| 1195 | + } |
|---|
| 1196 | + |
|---|
| 1197 | + /* Couple the dependency tree for PI on this exposed to->fence */ |
|---|
| 1198 | + if (to->engine->schedule) { |
|---|
| 1199 | + err = i915_sched_node_add_dependency(&to->sched, |
|---|
| 1200 | + &from->sched, |
|---|
| 1201 | + I915_DEPENDENCY_WEAK); |
|---|
| 1202 | + if (err < 0) |
|---|
| 1203 | + return err; |
|---|
| 1204 | + } |
|---|
| 1205 | + |
|---|
| 1206 | + return intel_timeline_sync_set_start(i915_request_timeline(to), |
|---|
| 1207 | + &from->fence); |
|---|
| 1208 | +} |
|---|
| 1209 | + |
|---|
| 1210 | +static void mark_external(struct i915_request *rq) |
|---|
| 1211 | +{ |
|---|
| 1212 | + /* |
|---|
| 1213 | + * The downside of using semaphores is that we lose metadata passing |
|---|
| 1214 | + * along the signaling chain. This is particularly nasty when we |
|---|
| 1215 | + * need to pass along a fatal error such as EFAULT or EDEADLK. For |
|---|
| 1216 | + * fatal errors we want to scrub the request before it is executed, |
|---|
| 1217 | + * which means that we cannot preload the request onto HW and have |
|---|
| 1218 | + * it wait upon a semaphore. |
|---|
| 1219 | + */ |
|---|
| 1220 | + rq->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN; |
|---|
| 1221 | +} |
|---|
| 1222 | + |
|---|
| 1223 | +static int |
|---|
| 1224 | +__i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) |
|---|
| 1225 | +{ |
|---|
| 1226 | + mark_external(rq); |
|---|
| 1227 | + return i915_sw_fence_await_dma_fence(&rq->submit, fence, |
|---|
| 1228 | + i915_fence_context_timeout(rq->engine->i915, |
|---|
| 1229 | + fence->context), |
|---|
| 1230 | + I915_FENCE_GFP); |
|---|
| 1231 | +} |
|---|
| 1232 | + |
|---|
| 1233 | +static int |
|---|
| 1234 | +i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) |
|---|
| 1235 | +{ |
|---|
| 1236 | + struct dma_fence *iter; |
|---|
| 1237 | + int err = 0; |
|---|
| 1238 | + |
|---|
| 1239 | + if (!to_dma_fence_chain(fence)) |
|---|
| 1240 | + return __i915_request_await_external(rq, fence); |
|---|
| 1241 | + |
|---|
| 1242 | + dma_fence_chain_for_each(iter, fence) { |
|---|
| 1243 | + struct dma_fence_chain *chain = to_dma_fence_chain(iter); |
|---|
| 1244 | + |
|---|
| 1245 | + if (!dma_fence_is_i915(chain->fence)) { |
|---|
| 1246 | + err = __i915_request_await_external(rq, iter); |
|---|
| 1247 | + break; |
|---|
| 1248 | + } |
|---|
| 1249 | + |
|---|
| 1250 | + err = i915_request_await_dma_fence(rq, chain->fence); |
|---|
| 1251 | + if (err < 0) |
|---|
| 1252 | + break; |
|---|
| 1253 | + } |
|---|
| 1254 | + |
|---|
| 1255 | + dma_fence_put(iter); |
|---|
| 1256 | + return err; |
|---|
| 1257 | +} |
|---|
| 1258 | + |
|---|
| 1259 | +int |
|---|
| 1260 | +i915_request_await_execution(struct i915_request *rq, |
|---|
| 1261 | + struct dma_fence *fence, |
|---|
| 1262 | + void (*hook)(struct i915_request *rq, |
|---|
| 1263 | + struct dma_fence *signal)) |
|---|
| 1264 | +{ |
|---|
| 1265 | + struct dma_fence **child = &fence; |
|---|
| 1266 | + unsigned int nchild = 1; |
|---|
| 1267 | + int ret; |
|---|
| 1268 | + |
|---|
| 1269 | + if (dma_fence_is_array(fence)) { |
|---|
| 1270 | + struct dma_fence_array *array = to_dma_fence_array(fence); |
|---|
| 1271 | + |
|---|
| 1272 | + /* XXX Error for signal-on-any fence arrays */ |
|---|
| 1273 | + |
|---|
| 1274 | + child = array->fences; |
|---|
| 1275 | + nchild = array->num_fences; |
|---|
| 1276 | + GEM_BUG_ON(!nchild); |
|---|
| 1277 | + } |
|---|
| 1278 | + |
|---|
| 1279 | + do { |
|---|
| 1280 | + fence = *child++; |
|---|
| 1281 | + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) |
|---|
| 1282 | + continue; |
|---|
| 1283 | + |
|---|
| 1284 | + if (fence->context == rq->fence.context) |
|---|
| 1285 | + continue; |
|---|
| 1286 | + |
|---|
| 1287 | + /* |
|---|
| 1288 | + * We don't squash repeated fence dependencies here as we |
|---|
| 1289 | + * want to run our callback in all cases. |
|---|
| 1290 | + */ |
|---|
| 1291 | + |
|---|
| 1292 | + if (dma_fence_is_i915(fence)) |
|---|
| 1293 | + ret = __i915_request_await_execution(rq, |
|---|
| 1294 | + to_request(fence), |
|---|
| 1295 | + hook); |
|---|
| 1296 | + else |
|---|
| 1297 | + ret = i915_request_await_external(rq, fence); |
|---|
| 1298 | + if (ret < 0) |
|---|
| 1299 | + return ret; |
|---|
| 1300 | + } while (--nchild); |
|---|
| 1301 | + |
|---|
| 1302 | + return 0; |
|---|
| 1303 | +} |
|---|
| 1304 | + |
|---|
| 1305 | +static int |
|---|
| 1306 | +await_request_submit(struct i915_request *to, struct i915_request *from) |
|---|
| 1307 | +{ |
|---|
| 1308 | + /* |
|---|
| 1309 | + * If we are waiting on a virtual engine, then it may be |
|---|
| 1310 | + * constrained to execute on a single engine *prior* to submission. |
|---|
| 1311 | + * When it is submitted, it will be first submitted to the virtual |
|---|
| 1312 | + * engine and then passed to the physical engine. We cannot allow |
|---|
| 1313 | + * the waiter to be submitted immediately to the physical engine |
|---|
| 1314 | + * as it may then bypass the virtual request. |
|---|
| 1315 | + */ |
|---|
| 1316 | + if (to->engine == READ_ONCE(from->engine)) |
|---|
| 1317 | + return i915_sw_fence_await_sw_fence_gfp(&to->submit, |
|---|
| 1318 | + &from->submit, |
|---|
| 1319 | + I915_FENCE_GFP); |
|---|
| 1320 | + else |
|---|
| 1321 | + return __i915_request_await_execution(to, from, NULL); |
|---|
| 838 | 1322 | } |
|---|
| 839 | 1323 | |
|---|
| 840 | 1324 | static int |
|---|
| .. | .. |
|---|
| 845 | 1329 | GEM_BUG_ON(to == from); |
|---|
| 846 | 1330 | GEM_BUG_ON(to->timeline == from->timeline); |
|---|
| 847 | 1331 | |
|---|
| 848 | | - if (i915_request_completed(from)) |
|---|
| 1332 | + if (i915_request_completed(from)) { |
|---|
| 1333 | + i915_sw_fence_set_error_once(&to->submit, from->fence.error); |
|---|
| 849 | 1334 | return 0; |
|---|
| 1335 | + } |
|---|
| 850 | 1336 | |
|---|
| 851 | 1337 | if (to->engine->schedule) { |
|---|
| 852 | | - ret = i915_sched_node_add_dependency(to->i915, |
|---|
| 853 | | - &to->sched, |
|---|
| 854 | | - &from->sched); |
|---|
| 1338 | + ret = i915_sched_node_add_dependency(&to->sched, |
|---|
| 1339 | + &from->sched, |
|---|
| 1340 | + I915_DEPENDENCY_EXTERNAL); |
|---|
| 855 | 1341 | if (ret < 0) |
|---|
| 856 | 1342 | return ret; |
|---|
| 857 | 1343 | } |
|---|
| 858 | 1344 | |
|---|
| 859 | | - if (to->engine == from->engine) { |
|---|
| 860 | | - ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, |
|---|
| 861 | | - &from->submit, |
|---|
| 862 | | - I915_FENCE_GFP); |
|---|
| 863 | | - return ret < 0 ? ret : 0; |
|---|
| 864 | | - } |
|---|
| 1345 | + if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask))) |
|---|
| 1346 | + ret = await_request_submit(to, from); |
|---|
| 1347 | + else |
|---|
| 1348 | + ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); |
|---|
| 1349 | + if (ret < 0) |
|---|
| 1350 | + return ret; |
|---|
| 865 | 1351 | |
|---|
| 866 | | - if (to->engine->semaphore.sync_to) { |
|---|
| 867 | | - u32 seqno; |
|---|
| 868 | | - |
|---|
| 869 | | - GEM_BUG_ON(!from->engine->semaphore.signal); |
|---|
| 870 | | - |
|---|
| 871 | | - seqno = i915_request_global_seqno(from); |
|---|
| 872 | | - if (!seqno) |
|---|
| 873 | | - goto await_dma_fence; |
|---|
| 874 | | - |
|---|
| 875 | | - if (seqno <= to->timeline->global_sync[from->engine->id]) |
|---|
| 876 | | - return 0; |
|---|
| 877 | | - |
|---|
| 878 | | - trace_i915_gem_ring_sync_to(to, from); |
|---|
| 879 | | - ret = to->engine->semaphore.sync_to(to, from); |
|---|
| 880 | | - if (ret) |
|---|
| 881 | | - return ret; |
|---|
| 882 | | - |
|---|
| 883 | | - to->timeline->global_sync[from->engine->id] = seqno; |
|---|
| 884 | | - return 0; |
|---|
| 885 | | - } |
|---|
| 886 | | - |
|---|
| 887 | | -await_dma_fence: |
|---|
| 888 | | - ret = i915_sw_fence_await_dma_fence(&to->submit, |
|---|
| 889 | | - &from->fence, 0, |
|---|
| 890 | | - I915_FENCE_GFP); |
|---|
| 891 | | - return ret < 0 ? ret : 0; |
|---|
| 1352 | + return 0; |
|---|
| 892 | 1353 | } |
|---|
| 893 | 1354 | |
|---|
| 894 | 1355 | int |
|---|
| .. | .. |
|---|
| 928 | 1389 | continue; |
|---|
| 929 | 1390 | |
|---|
| 930 | 1391 | /* Squash repeated waits to the same timelines */ |
|---|
| 931 | | - if (fence->context != rq->i915->mm.unordered_timeline && |
|---|
| 932 | | - i915_timeline_sync_is_later(rq->timeline, fence)) |
|---|
| 1392 | + if (fence->context && |
|---|
| 1393 | + intel_timeline_sync_is_later(i915_request_timeline(rq), |
|---|
| 1394 | + fence)) |
|---|
| 933 | 1395 | continue; |
|---|
| 934 | 1396 | |
|---|
| 935 | 1397 | if (dma_fence_is_i915(fence)) |
|---|
| 936 | 1398 | ret = i915_request_await_request(rq, to_request(fence)); |
|---|
| 937 | 1399 | else |
|---|
| 938 | | - ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, |
|---|
| 939 | | - I915_FENCE_TIMEOUT, |
|---|
| 940 | | - I915_FENCE_GFP); |
|---|
| 1400 | + ret = i915_request_await_external(rq, fence); |
|---|
| 941 | 1401 | if (ret < 0) |
|---|
| 942 | 1402 | return ret; |
|---|
| 943 | 1403 | |
|---|
| 944 | 1404 | /* Record the latest fence used against each timeline */ |
|---|
| 945 | | - if (fence->context != rq->i915->mm.unordered_timeline) |
|---|
| 946 | | - i915_timeline_sync_set(rq->timeline, fence); |
|---|
| 1405 | + if (fence->context) |
|---|
| 1406 | + intel_timeline_sync_set(i915_request_timeline(rq), |
|---|
| 1407 | + fence); |
|---|
| 947 | 1408 | } while (--nchild); |
|---|
| 948 | 1409 | |
|---|
| 949 | 1410 | return 0; |
|---|
| .. | .. |
|---|
| 981 | 1442 | struct dma_fence **shared; |
|---|
| 982 | 1443 | unsigned int count, i; |
|---|
| 983 | 1444 | |
|---|
| 984 | | - ret = reservation_object_get_fences_rcu(obj->resv, |
|---|
| 1445 | + ret = dma_resv_get_fences_rcu(obj->base.resv, |
|---|
| 985 | 1446 | &excl, &count, &shared); |
|---|
| 986 | 1447 | if (ret) |
|---|
| 987 | 1448 | return ret; |
|---|
| .. | .. |
|---|
| 998 | 1459 | dma_fence_put(shared[i]); |
|---|
| 999 | 1460 | kfree(shared); |
|---|
| 1000 | 1461 | } else { |
|---|
| 1001 | | - excl = reservation_object_get_excl_rcu(obj->resv); |
|---|
| 1462 | + excl = dma_resv_get_excl_rcu(obj->base.resv); |
|---|
| 1002 | 1463 | } |
|---|
| 1003 | 1464 | |
|---|
| 1004 | 1465 | if (excl) { |
|---|
| .. | .. |
|---|
| 1011 | 1472 | return ret; |
|---|
| 1012 | 1473 | } |
|---|
| 1013 | 1474 | |
|---|
| 1014 | | -void i915_request_skip(struct i915_request *rq, int error) |
|---|
| 1475 | +static struct i915_request * |
|---|
| 1476 | +__i915_request_add_to_timeline(struct i915_request *rq) |
|---|
| 1015 | 1477 | { |
|---|
| 1016 | | - void *vaddr = rq->ring->vaddr; |
|---|
| 1017 | | - u32 head; |
|---|
| 1018 | | - |
|---|
| 1019 | | - GEM_BUG_ON(!IS_ERR_VALUE((long)error)); |
|---|
| 1020 | | - dma_fence_set_error(&rq->fence, error); |
|---|
| 1478 | + struct intel_timeline *timeline = i915_request_timeline(rq); |
|---|
| 1479 | + struct i915_request *prev; |
|---|
| 1021 | 1480 | |
|---|
| 1022 | 1481 | /* |
|---|
| 1023 | | - * As this request likely depends on state from the lost |
|---|
| 1024 | | - * context, clear out all the user operations leaving the |
|---|
| 1025 | | - * breadcrumb at the end (so we get the fence notifications). |
|---|
| 1482 | + * Dependency tracking and request ordering along the timeline |
|---|
| 1483 | + * is special cased so that we can eliminate redundant ordering |
|---|
| 1484 | + * operations while building the request (we know that the timeline |
|---|
| 1485 | + * itself is ordered, and here we guarantee it). |
|---|
| 1486 | + * |
|---|
| 1487 | + * As we know we will need to emit tracking along the timeline, |
|---|
| 1488 | + * we embed the hooks into our request struct -- at the cost of |
|---|
| 1489 | + * having to have specialised no-allocation interfaces (which will |
|---|
| 1490 | + * be beneficial elsewhere). |
|---|
| 1491 | + * |
|---|
| 1492 | + * A second benefit to open-coding i915_request_await_request is |
|---|
| 1493 | + * that we can apply a slight variant of the rules specialised |
|---|
| 1494 | + * for timelines that jump between engines (such as virtual engines). |
|---|
| 1495 | + * If we consider the case of virtual engine, we must emit a dma-fence |
|---|
| 1496 | + * to prevent scheduling of the second request until the first is |
|---|
| 1497 | + * complete (to maximise our greedy late load balancing) and this |
|---|
| 1498 | + * precludes optimising to use semaphores serialisation of a single |
|---|
| 1499 | + * timeline across engines. |
|---|
| 1026 | 1500 | */ |
|---|
| 1027 | | - head = rq->infix; |
|---|
| 1028 | | - if (rq->postfix < head) { |
|---|
| 1029 | | - memset(vaddr + head, 0, rq->ring->size - head); |
|---|
| 1030 | | - head = 0; |
|---|
| 1501 | + prev = to_request(__i915_active_fence_set(&timeline->last_request, |
|---|
| 1502 | + &rq->fence)); |
|---|
| 1503 | + if (prev && !i915_request_completed(prev)) { |
|---|
| 1504 | + /* |
|---|
| 1505 | + * The requests are supposed to be kept in order. However, |
|---|
| 1506 | + * we need to be wary in case the timeline->last_request |
|---|
| 1507 | + * is used as a barrier for external modification to this |
|---|
| 1508 | + * context. |
|---|
| 1509 | + */ |
|---|
| 1510 | + GEM_BUG_ON(prev->context == rq->context && |
|---|
| 1511 | + i915_seqno_passed(prev->fence.seqno, |
|---|
| 1512 | + rq->fence.seqno)); |
|---|
| 1513 | + |
|---|
| 1514 | + if (is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) |
|---|
| 1515 | + i915_sw_fence_await_sw_fence(&rq->submit, |
|---|
| 1516 | + &prev->submit, |
|---|
| 1517 | + &rq->submitq); |
|---|
| 1518 | + else |
|---|
| 1519 | + __i915_sw_fence_await_dma_fence(&rq->submit, |
|---|
| 1520 | + &prev->fence, |
|---|
| 1521 | + &rq->dmaq); |
|---|
| 1522 | + if (rq->engine->schedule) |
|---|
| 1523 | + __i915_sched_node_add_dependency(&rq->sched, |
|---|
| 1524 | + &prev->sched, |
|---|
| 1525 | + &rq->dep, |
|---|
| 1526 | + 0); |
|---|
| 1031 | 1527 | } |
|---|
| 1032 | | - memset(vaddr + head, 0, rq->postfix - head); |
|---|
| 1528 | + |
|---|
| 1529 | + /* |
|---|
| 1530 | + * Make sure that no request gazumped us - if it was allocated after |
|---|
| 1531 | + * our i915_request_alloc() and called __i915_request_add() before |
|---|
| 1532 | + * us, the timeline will hold its seqno which is later than ours. |
|---|
| 1533 | + */ |
|---|
| 1534 | + GEM_BUG_ON(timeline->seqno != rq->fence.seqno); |
|---|
| 1535 | + |
|---|
| 1536 | + return prev; |
|---|
| 1033 | 1537 | } |
|---|
| 1034 | 1538 | |
|---|
| 1035 | 1539 | /* |
|---|
| .. | .. |
|---|
| 1037 | 1541 | * request is not being tracked for completion but the work itself is |
|---|
| 1038 | 1542 | * going to happen on the hardware. This would be a Bad Thing(tm). |
|---|
| 1039 | 1543 | */ |
|---|
| 1040 | | -void i915_request_add(struct i915_request *request) |
|---|
| 1544 | +struct i915_request *__i915_request_commit(struct i915_request *rq) |
|---|
| 1041 | 1545 | { |
|---|
| 1042 | | - struct intel_engine_cs *engine = request->engine; |
|---|
| 1043 | | - struct i915_timeline *timeline = request->timeline; |
|---|
| 1044 | | - struct intel_ring *ring = request->ring; |
|---|
| 1045 | | - struct i915_request *prev; |
|---|
| 1546 | + struct intel_engine_cs *engine = rq->engine; |
|---|
| 1547 | + struct intel_ring *ring = rq->ring; |
|---|
| 1046 | 1548 | u32 *cs; |
|---|
| 1047 | 1549 | |
|---|
| 1048 | | - GEM_TRACE("%s fence %llx:%d\n", |
|---|
| 1049 | | - engine->name, request->fence.context, request->fence.seqno); |
|---|
| 1050 | | - |
|---|
| 1051 | | - lockdep_assert_held(&request->i915->drm.struct_mutex); |
|---|
| 1052 | | - trace_i915_request_add(request); |
|---|
| 1053 | | - |
|---|
| 1054 | | - /* |
|---|
| 1055 | | - * Make sure that no request gazumped us - if it was allocated after |
|---|
| 1056 | | - * our i915_request_alloc() and called __i915_request_add() before |
|---|
| 1057 | | - * us, the timeline will hold its seqno which is later than ours. |
|---|
| 1058 | | - */ |
|---|
| 1059 | | - GEM_BUG_ON(timeline->seqno != request->fence.seqno); |
|---|
| 1550 | + RQ_TRACE(rq, "\n"); |
|---|
| 1060 | 1551 | |
|---|
| 1061 | 1552 | /* |
|---|
| 1062 | 1553 | * To ensure that this call will not fail, space for its emissions |
|---|
| 1063 | 1554 | * should already have been reserved in the ring buffer. Let the ring |
|---|
| 1064 | 1555 | * know that it is time to use that space up. |
|---|
| 1065 | 1556 | */ |
|---|
| 1066 | | - request->reserved_space = 0; |
|---|
| 1067 | | - engine->emit_flush(request, EMIT_FLUSH); |
|---|
| 1557 | + GEM_BUG_ON(rq->reserved_space > ring->space); |
|---|
| 1558 | + rq->reserved_space = 0; |
|---|
| 1559 | + rq->emitted_jiffies = jiffies; |
|---|
| 1068 | 1560 | |
|---|
| 1069 | 1561 | /* |
|---|
| 1070 | 1562 | * Record the position of the start of the breadcrumb so that |
|---|
| .. | .. |
|---|
| 1072 | 1564 | * GPU processing the request, we never over-estimate the |
|---|
| 1073 | 1565 | * position of the ring's HEAD. |
|---|
| 1074 | 1566 | */ |
|---|
| 1075 | | - cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); |
|---|
| 1567 | + cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw); |
|---|
| 1076 | 1568 | GEM_BUG_ON(IS_ERR(cs)); |
|---|
| 1077 | | - request->postfix = intel_ring_offset(request, cs); |
|---|
| 1569 | + rq->postfix = intel_ring_offset(rq, cs); |
|---|
| 1078 | 1570 | |
|---|
| 1079 | | - /* |
|---|
| 1080 | | - * Seal the request and mark it as pending execution. Note that |
|---|
| 1081 | | - * we may inspect this state, without holding any locks, during |
|---|
| 1082 | | - * hangcheck. Hence we apply the barrier to ensure that we do not |
|---|
| 1083 | | - * see a more recent value in the hws than we are tracking. |
|---|
| 1084 | | - */ |
|---|
| 1571 | + return __i915_request_add_to_timeline(rq); |
|---|
| 1572 | +} |
|---|
| 1085 | 1573 | |
|---|
| 1086 | | - prev = i915_gem_active_raw(&timeline->last_request, |
|---|
| 1087 | | - &request->i915->drm.struct_mutex); |
|---|
| 1088 | | - if (prev && !i915_request_completed(prev)) { |
|---|
| 1089 | | - i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, |
|---|
| 1090 | | - &request->submitq); |
|---|
| 1091 | | - if (engine->schedule) |
|---|
| 1092 | | - __i915_sched_node_add_dependency(&request->sched, |
|---|
| 1093 | | - &prev->sched, |
|---|
| 1094 | | - &request->dep, |
|---|
| 1095 | | - 0); |
|---|
| 1096 | | - } |
|---|
| 1097 | | - |
|---|
| 1098 | | - spin_lock_irq(&timeline->lock); |
|---|
| 1099 | | - list_add_tail(&request->link, &timeline->requests); |
|---|
| 1100 | | - spin_unlock_irq(&timeline->lock); |
|---|
| 1101 | | - |
|---|
| 1102 | | - GEM_BUG_ON(timeline->seqno != request->fence.seqno); |
|---|
| 1103 | | - i915_gem_active_set(&timeline->last_request, request); |
|---|
| 1104 | | - |
|---|
| 1105 | | - list_add_tail(&request->ring_link, &ring->request_list); |
|---|
| 1106 | | - if (list_is_first(&request->ring_link, &ring->request_list)) { |
|---|
| 1107 | | - GEM_TRACE("marking %s as active\n", ring->timeline->name); |
|---|
| 1108 | | - list_add(&ring->active_link, &request->i915->gt.active_rings); |
|---|
| 1109 | | - } |
|---|
| 1110 | | - request->emitted_jiffies = jiffies; |
|---|
| 1111 | | - |
|---|
| 1574 | +void __i915_request_queue(struct i915_request *rq, |
|---|
| 1575 | + const struct i915_sched_attr *attr) |
|---|
| 1576 | +{ |
|---|
| 1112 | 1577 | /* |
|---|
| 1113 | 1578 | * Let the backend know a new request has arrived that may need |
|---|
| 1114 | 1579 | * to adjust the existing execution schedule due to a high priority |
|---|
| .. | .. |
|---|
| 1120 | 1585 | * decide whether to preempt the entire chain so that it is ready to |
|---|
| 1121 | 1586 | * run at the earliest possible convenience. |
|---|
| 1122 | 1587 | */ |
|---|
| 1123 | | - local_bh_disable(); |
|---|
| 1124 | | - rcu_read_lock(); /* RCU serialisation for set-wedged protection */ |
|---|
| 1125 | | - if (engine->schedule) |
|---|
| 1126 | | - engine->schedule(request, &request->gem_context->sched); |
|---|
| 1127 | | - rcu_read_unlock(); |
|---|
| 1128 | | - i915_sw_fence_commit(&request->submit); |
|---|
| 1129 | | - local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ |
|---|
| 1130 | | - |
|---|
| 1131 | | - /* |
|---|
| 1132 | | - * In typical scenarios, we do not expect the previous request on |
|---|
| 1133 | | - * the timeline to be still tracked by timeline->last_request if it |
|---|
| 1134 | | - * has been completed. If the completed request is still here, that |
|---|
| 1135 | | - * implies that request retirement is a long way behind submission, |
|---|
| 1136 | | - * suggesting that we haven't been retiring frequently enough from |
|---|
| 1137 | | - * the combination of retire-before-alloc, waiters and the background |
|---|
| 1138 | | - * retirement worker. So if the last request on this timeline was |
|---|
| 1139 | | - * already completed, do a catch up pass, flushing the retirement queue |
|---|
| 1140 | | - * up to this client. Since we have now moved the heaviest operations |
|---|
| 1141 | | - * during retirement onto secondary workers, such as freeing objects |
|---|
| 1142 | | - * or contexts, retiring a bunch of requests is mostly list management |
|---|
| 1143 | | - * (and cache misses), and so we should not be overly penalizing this |
|---|
| 1144 | | - * client by performing excess work, though we may still performing |
|---|
| 1145 | | - * work on behalf of others -- but instead we should benefit from |
|---|
| 1146 | | - * improved resource management. (Well, that's the theory at least.) |
|---|
| 1147 | | - */ |
|---|
| 1148 | | - if (prev && i915_request_completed(prev)) |
|---|
| 1149 | | - i915_request_retire_upto(prev); |
|---|
| 1588 | + if (attr && rq->engine->schedule) |
|---|
| 1589 | + rq->engine->schedule(rq, attr); |
|---|
| 1590 | + i915_sw_fence_commit(&rq->semaphore); |
|---|
| 1591 | + i915_sw_fence_commit(&rq->submit); |
|---|
| 1150 | 1592 | } |
|---|
| 1151 | 1593 | |
|---|
| 1152 | | -static unsigned long local_clock_us(unsigned int *cpu) |
|---|
| 1594 | +void i915_request_add(struct i915_request *rq) |
|---|
| 1595 | +{ |
|---|
| 1596 | + struct intel_timeline * const tl = i915_request_timeline(rq); |
|---|
| 1597 | + struct i915_sched_attr attr = {}; |
|---|
| 1598 | + struct i915_gem_context *ctx; |
|---|
| 1599 | + |
|---|
| 1600 | + lockdep_assert_held(&tl->mutex); |
|---|
| 1601 | + lockdep_unpin_lock(&tl->mutex, rq->cookie); |
|---|
| 1602 | + |
|---|
| 1603 | + trace_i915_request_add(rq); |
|---|
| 1604 | + __i915_request_commit(rq); |
|---|
| 1605 | + |
|---|
| 1606 | + /* XXX placeholder for selftests */ |
|---|
| 1607 | + rcu_read_lock(); |
|---|
| 1608 | + ctx = rcu_dereference(rq->context->gem_context); |
|---|
| 1609 | + if (ctx) |
|---|
| 1610 | + attr = ctx->sched; |
|---|
| 1611 | + rcu_read_unlock(); |
|---|
| 1612 | + |
|---|
| 1613 | + __i915_request_queue(rq, &attr); |
|---|
| 1614 | + |
|---|
| 1615 | + mutex_unlock(&tl->mutex); |
|---|
| 1616 | +} |
|---|
| 1617 | + |
|---|
| 1618 | +static unsigned long local_clock_ns(unsigned int *cpu) |
|---|
| 1153 | 1619 | { |
|---|
| 1154 | 1620 | unsigned long t; |
|---|
| 1155 | 1621 | |
|---|
| .. | .. |
|---|
| 1166 | 1632 | * stop busywaiting, see busywait_stop(). |
|---|
| 1167 | 1633 | */ |
|---|
| 1168 | 1634 | *cpu = get_cpu(); |
|---|
| 1169 | | - t = local_clock() >> 10; |
|---|
| 1635 | + t = local_clock(); |
|---|
| 1170 | 1636 | put_cpu(); |
|---|
| 1171 | 1637 | |
|---|
| 1172 | 1638 | return t; |
|---|
| .. | .. |
|---|
| 1176 | 1642 | { |
|---|
| 1177 | 1643 | unsigned int this_cpu; |
|---|
| 1178 | 1644 | |
|---|
| 1179 | | - if (time_after(local_clock_us(&this_cpu), timeout)) |
|---|
| 1645 | + if (time_after(local_clock_ns(&this_cpu), timeout)) |
|---|
| 1180 | 1646 | return true; |
|---|
| 1181 | 1647 | |
|---|
| 1182 | 1648 | return this_cpu != cpu; |
|---|
| 1183 | 1649 | } |
|---|
| 1184 | 1650 | |
|---|
| 1185 | | -static bool __i915_spin_request(const struct i915_request *rq, |
|---|
| 1186 | | - u32 seqno, int state, unsigned long timeout_us) |
|---|
| 1651 | +static bool __i915_spin_request(struct i915_request * const rq, int state) |
|---|
| 1187 | 1652 | { |
|---|
| 1188 | | - struct intel_engine_cs *engine = rq->engine; |
|---|
| 1189 | | - unsigned int irq, cpu; |
|---|
| 1190 | | - |
|---|
| 1191 | | - GEM_BUG_ON(!seqno); |
|---|
| 1653 | + unsigned long timeout_ns; |
|---|
| 1654 | + unsigned int cpu; |
|---|
| 1192 | 1655 | |
|---|
| 1193 | 1656 | /* |
|---|
| 1194 | 1657 | * Only wait for the request if we know it is likely to complete. |
|---|
| .. | .. |
|---|
| 1196 | 1659 | * We don't track the timestamps around requests, nor the average |
|---|
| 1197 | 1660 | * request length, so we do not have a good indicator that this |
|---|
| 1198 | 1661 | * request will complete within the timeout. What we do know is the |
|---|
| 1199 | | - * order in which requests are executed by the engine and so we can |
|---|
| 1200 | | - * tell if the request has started. If the request hasn't started yet, |
|---|
| 1201 | | - * it is a fair assumption that it will not complete within our |
|---|
| 1202 | | - * relatively short timeout. |
|---|
| 1662 | + * order in which requests are executed by the context and so we can |
|---|
| 1663 | + * tell if the request has been started. If the request is not even |
|---|
| 1664 | + * running yet, it is a fair assumption that it will not complete |
|---|
| 1665 | + * within our relatively short timeout. |
|---|
| 1203 | 1666 | */ |
|---|
| 1204 | | - if (!i915_seqno_passed(intel_engine_get_seqno(engine), seqno - 1)) |
|---|
| 1667 | + if (!i915_request_is_running(rq)) |
|---|
| 1205 | 1668 | return false; |
|---|
| 1206 | 1669 | |
|---|
| 1207 | 1670 | /* |
|---|
| .. | .. |
|---|
| 1215 | 1678 | * takes to sleep on a request, on the order of a microsecond. |
|---|
| 1216 | 1679 | */ |
|---|
| 1217 | 1680 | |
|---|
| 1218 | | - irq = READ_ONCE(engine->breadcrumbs.irq_count); |
|---|
| 1219 | | - timeout_us += local_clock_us(&cpu); |
|---|
| 1681 | + timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns); |
|---|
| 1682 | + timeout_ns += local_clock_ns(&cpu); |
|---|
| 1220 | 1683 | do { |
|---|
| 1221 | | - if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno)) |
|---|
| 1222 | | - return seqno == i915_request_global_seqno(rq); |
|---|
| 1223 | | - |
|---|
| 1224 | | - /* |
|---|
| 1225 | | - * Seqno are meant to be ordered *before* the interrupt. If |
|---|
| 1226 | | - * we see an interrupt without a corresponding seqno advance, |
|---|
| 1227 | | - * assume we won't see one in the near future but require |
|---|
| 1228 | | - * the engine->seqno_barrier() to fixup coherency. |
|---|
| 1229 | | - */ |
|---|
| 1230 | | - if (READ_ONCE(engine->breadcrumbs.irq_count) != irq) |
|---|
| 1231 | | - break; |
|---|
| 1684 | + if (dma_fence_is_signaled(&rq->fence)) |
|---|
| 1685 | + return true; |
|---|
| 1232 | 1686 | |
|---|
| 1233 | 1687 | if (signal_pending_state(state, current)) |
|---|
| 1234 | 1688 | break; |
|---|
| 1235 | 1689 | |
|---|
| 1236 | | - if (busywait_stop(timeout_us, cpu)) |
|---|
| 1690 | + if (busywait_stop(timeout_ns, cpu)) |
|---|
| 1237 | 1691 | break; |
|---|
| 1238 | 1692 | |
|---|
| 1239 | 1693 | cpu_relax(); |
|---|
| .. | .. |
|---|
| 1242 | 1696 | return false; |
|---|
| 1243 | 1697 | } |
|---|
| 1244 | 1698 | |
|---|
| 1245 | | -static bool __i915_wait_request_check_and_reset(struct i915_request *request) |
|---|
| 1699 | +struct request_wait { |
|---|
| 1700 | + struct dma_fence_cb cb; |
|---|
| 1701 | + struct task_struct *tsk; |
|---|
| 1702 | +}; |
|---|
| 1703 | + |
|---|
| 1704 | +static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb) |
|---|
| 1246 | 1705 | { |
|---|
| 1247 | | - struct i915_gpu_error *error = &request->i915->gpu_error; |
|---|
| 1706 | + struct request_wait *wait = container_of(cb, typeof(*wait), cb); |
|---|
| 1248 | 1707 | |
|---|
| 1249 | | - if (likely(!i915_reset_handoff(error))) |
|---|
| 1250 | | - return false; |
|---|
| 1251 | | - |
|---|
| 1252 | | - __set_current_state(TASK_RUNNING); |
|---|
| 1253 | | - i915_reset(request->i915, error->stalled_mask, error->reason); |
|---|
| 1254 | | - return true; |
|---|
| 1708 | + wake_up_process(fetch_and_zero(&wait->tsk)); |
|---|
| 1255 | 1709 | } |
|---|
| 1256 | 1710 | |
|---|
| 1257 | 1711 | /** |
|---|
| .. | .. |
|---|
| 1264 | 1718 | * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an |
|---|
| 1265 | 1719 | * unbounded wait). |
|---|
| 1266 | 1720 | * |
|---|
| 1267 | | - * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED |
|---|
| 1268 | | - * in via the flags, and vice versa if the struct_mutex is not held, the caller |
|---|
| 1269 | | - * must not specify that the wait is locked. |
|---|
| 1270 | | - * |
|---|
| 1271 | 1721 | * Returns the remaining time (in jiffies) if the request completed, which may |
|---|
| 1272 | 1722 | * be zero or -ETIME if the request is unfinished after the timeout expires. |
|---|
| 1273 | 1723 | * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is |
|---|
| .. | .. |
|---|
| 1279 | 1729 | { |
|---|
| 1280 | 1730 | const int state = flags & I915_WAIT_INTERRUPTIBLE ? |
|---|
| 1281 | 1731 | TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; |
|---|
| 1282 | | - wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue; |
|---|
| 1283 | | - DEFINE_WAIT_FUNC(reset, default_wake_function); |
|---|
| 1284 | | - DEFINE_WAIT_FUNC(exec, default_wake_function); |
|---|
| 1285 | | - struct intel_wait wait; |
|---|
| 1732 | + struct request_wait wait; |
|---|
| 1286 | 1733 | |
|---|
| 1287 | 1734 | might_sleep(); |
|---|
| 1288 | | -#if IS_ENABLED(CONFIG_LOCKDEP) |
|---|
| 1289 | | - GEM_BUG_ON(debug_locks && |
|---|
| 1290 | | - !!lockdep_is_held(&rq->i915->drm.struct_mutex) != |
|---|
| 1291 | | - !!(flags & I915_WAIT_LOCKED)); |
|---|
| 1292 | | -#endif |
|---|
| 1293 | 1735 | GEM_BUG_ON(timeout < 0); |
|---|
| 1294 | 1736 | |
|---|
| 1295 | | - if (i915_request_completed(rq)) |
|---|
| 1737 | + if (dma_fence_is_signaled(&rq->fence)) |
|---|
| 1296 | 1738 | return timeout; |
|---|
| 1297 | 1739 | |
|---|
| 1298 | 1740 | if (!timeout) |
|---|
| .. | .. |
|---|
| 1300 | 1742 | |
|---|
| 1301 | 1743 | trace_i915_request_wait_begin(rq, flags); |
|---|
| 1302 | 1744 | |
|---|
| 1303 | | - add_wait_queue(&rq->execute, &exec); |
|---|
| 1304 | | - if (flags & I915_WAIT_LOCKED) |
|---|
| 1305 | | - add_wait_queue(errq, &reset); |
|---|
| 1745 | + /* |
|---|
| 1746 | + * We must never wait on the GPU while holding a lock as we |
|---|
| 1747 | + * may need to perform a GPU reset. So while we don't need to |
|---|
| 1748 | + * serialise wait/reset with an explicit lock, we do want |
|---|
| 1749 | + * lockdep to detect potential dependency cycles. |
|---|
| 1750 | + */ |
|---|
| 1751 | + mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_); |
|---|
| 1306 | 1752 | |
|---|
| 1307 | | - intel_wait_init(&wait); |
|---|
| 1753 | + /* |
|---|
| 1754 | + * Optimistic spin before touching IRQs. |
|---|
| 1755 | + * |
|---|
| 1756 | + * We may use a rather large value here to offset the penalty of |
|---|
| 1757 | + * switching away from the active task. Frequently, the client will |
|---|
| 1758 | + * wait upon an old swapbuffer to throttle itself to remain within a |
|---|
| 1759 | + * frame of the gpu. If the client is running in lockstep with the gpu, |
|---|
| 1760 | + * then it should not be waiting long at all, and a sleep now will incur |
|---|
| 1761 | + * extra scheduler latency in producing the next frame. To try to |
|---|
| 1762 | + * avoid adding the cost of enabling/disabling the interrupt to the |
|---|
| 1763 | + * short wait, we first spin to see if the request would have completed |
|---|
| 1764 | + * in the time taken to setup the interrupt. |
|---|
| 1765 | + * |
|---|
| 1766 | + * We need upto 5us to enable the irq, and upto 20us to hide the |
|---|
| 1767 | + * scheduler latency of a context switch, ignoring the secondary |
|---|
| 1768 | + * impacts from a context switch such as cache eviction. |
|---|
| 1769 | + * |
|---|
| 1770 | + * The scheme used for low-latency IO is called "hybrid interrupt |
|---|
| 1771 | + * polling". The suggestion there is to sleep until just before you |
|---|
| 1772 | + * expect to be woken by the device interrupt and then poll for its |
|---|
| 1773 | + * completion. That requires having a good predictor for the request |
|---|
| 1774 | + * duration, which we currently lack. |
|---|
| 1775 | + */ |
|---|
| 1776 | + if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) && |
|---|
| 1777 | + __i915_spin_request(rq, state)) |
|---|
| 1778 | + goto out; |
|---|
| 1308 | 1779 | |
|---|
| 1309 | | -restart: |
|---|
| 1310 | | - do { |
|---|
| 1311 | | - set_current_state(state); |
|---|
| 1312 | | - if (intel_wait_update_request(&wait, rq)) |
|---|
| 1313 | | - break; |
|---|
| 1780 | + /* |
|---|
| 1781 | + * This client is about to stall waiting for the GPU. In many cases |
|---|
| 1782 | + * this is undesirable and limits the throughput of the system, as |
|---|
| 1783 | + * many clients cannot continue processing user input/output whilst |
|---|
| 1784 | + * blocked. RPS autotuning may take tens of milliseconds to respond |
|---|
| 1785 | + * to the GPU load and thus incurs additional latency for the client. |
|---|
| 1786 | + * We can circumvent that by promoting the GPU frequency to maximum |
|---|
| 1787 | + * before we sleep. This makes the GPU throttle up much more quickly |
|---|
| 1788 | + * (good for benchmarks and user experience, e.g. window animations), |
|---|
| 1789 | + * but at a cost of spending more power processing the workload |
|---|
| 1790 | + * (bad for battery). |
|---|
| 1791 | + */ |
|---|
| 1792 | + if (flags & I915_WAIT_PRIORITY && !i915_request_started(rq)) |
|---|
| 1793 | + intel_rps_boost(rq); |
|---|
| 1314 | 1794 | |
|---|
| 1315 | | - if (flags & I915_WAIT_LOCKED && |
|---|
| 1316 | | - __i915_wait_request_check_and_reset(rq)) |
|---|
| 1317 | | - continue; |
|---|
| 1795 | + wait.tsk = current; |
|---|
| 1796 | + if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake)) |
|---|
| 1797 | + goto out; |
|---|
| 1318 | 1798 | |
|---|
| 1319 | | - if (signal_pending_state(state, current)) { |
|---|
| 1320 | | - timeout = -ERESTARTSYS; |
|---|
| 1321 | | - goto complete; |
|---|
| 1322 | | - } |
|---|
| 1323 | | - |
|---|
| 1324 | | - if (!timeout) { |
|---|
| 1325 | | - timeout = -ETIME; |
|---|
| 1326 | | - goto complete; |
|---|
| 1327 | | - } |
|---|
| 1328 | | - |
|---|
| 1329 | | - timeout = io_schedule_timeout(timeout); |
|---|
| 1330 | | - } while (1); |
|---|
| 1331 | | - |
|---|
| 1332 | | - GEM_BUG_ON(!intel_wait_has_seqno(&wait)); |
|---|
| 1333 | | - GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); |
|---|
| 1334 | | - |
|---|
| 1335 | | - /* Optimistic short spin before touching IRQs */ |
|---|
| 1336 | | - if (__i915_spin_request(rq, wait.seqno, state, 5)) |
|---|
| 1337 | | - goto complete; |
|---|
| 1338 | | - |
|---|
| 1339 | | - set_current_state(state); |
|---|
| 1340 | | - if (intel_engine_add_wait(rq->engine, &wait)) |
|---|
| 1341 | | - /* |
|---|
| 1342 | | - * In order to check that we haven't missed the interrupt |
|---|
| 1343 | | - * as we enabled it, we need to kick ourselves to do a |
|---|
| 1344 | | - * coherent check on the seqno before we sleep. |
|---|
| 1345 | | - */ |
|---|
| 1346 | | - goto wakeup; |
|---|
| 1347 | | - |
|---|
| 1348 | | - if (flags & I915_WAIT_LOCKED) |
|---|
| 1349 | | - __i915_wait_request_check_and_reset(rq); |
|---|
| 1799 | + /* |
|---|
| 1800 | + * Flush the submission tasklet, but only if it may help this request. |
|---|
| 1801 | + * |
|---|
| 1802 | + * We sometimes experience some latency between the HW interrupts and |
|---|
| 1803 | + * tasklet execution (mostly due to ksoftirqd latency, but it can also |
|---|
| 1804 | + * be due to lazy CS events), so lets run the tasklet manually if there |
|---|
| 1805 | + * is a chance it may submit this request. If the request is not ready |
|---|
| 1806 | + * to run, as it is waiting for other fences to be signaled, flushing |
|---|
| 1807 | + * the tasklet is busy work without any advantage for this client. |
|---|
| 1808 | + * |
|---|
| 1809 | + * If the HW is being lazy, this is the last chance before we go to |
|---|
| 1810 | + * sleep to catch any pending events. We will check periodically in |
|---|
| 1811 | + * the heartbeat to flush the submission tasklets as a last resort |
|---|
| 1812 | + * for unhappy HW. |
|---|
| 1813 | + */ |
|---|
| 1814 | + if (i915_request_is_ready(rq)) |
|---|
| 1815 | + intel_engine_flush_submission(rq->engine); |
|---|
| 1350 | 1816 | |
|---|
| 1351 | 1817 | for (;;) { |
|---|
| 1818 | + set_current_state(state); |
|---|
| 1819 | + |
|---|
| 1820 | + if (dma_fence_is_signaled(&rq->fence)) |
|---|
| 1821 | + break; |
|---|
| 1822 | + |
|---|
| 1352 | 1823 | if (signal_pending_state(state, current)) { |
|---|
| 1353 | 1824 | timeout = -ERESTARTSYS; |
|---|
| 1354 | 1825 | break; |
|---|
| .. | .. |
|---|
| 1360 | 1831 | } |
|---|
| 1361 | 1832 | |
|---|
| 1362 | 1833 | timeout = io_schedule_timeout(timeout); |
|---|
| 1363 | | - |
|---|
| 1364 | | - if (intel_wait_complete(&wait) && |
|---|
| 1365 | | - intel_wait_check_request(&wait, rq)) |
|---|
| 1366 | | - break; |
|---|
| 1367 | | - |
|---|
| 1368 | | - set_current_state(state); |
|---|
| 1369 | | - |
|---|
| 1370 | | -wakeup: |
|---|
| 1371 | | - /* |
|---|
| 1372 | | - * Carefully check if the request is complete, giving time |
|---|
| 1373 | | - * for the seqno to be visible following the interrupt. |
|---|
| 1374 | | - * We also have to check in case we are kicked by the GPU |
|---|
| 1375 | | - * reset in order to drop the struct_mutex. |
|---|
| 1376 | | - */ |
|---|
| 1377 | | - if (__i915_request_irq_complete(rq)) |
|---|
| 1378 | | - break; |
|---|
| 1379 | | - |
|---|
| 1380 | | - /* |
|---|
| 1381 | | - * If the GPU is hung, and we hold the lock, reset the GPU |
|---|
| 1382 | | - * and then check for completion. On a full reset, the engine's |
|---|
| 1383 | | - * HW seqno will be advanced passed us and we are complete. |
|---|
| 1384 | | - * If we do a partial reset, we have to wait for the GPU to |
|---|
| 1385 | | - * resume and update the breadcrumb. |
|---|
| 1386 | | - * |
|---|
| 1387 | | - * If we don't hold the mutex, we can just wait for the worker |
|---|
| 1388 | | - * to come along and update the breadcrumb (either directly |
|---|
| 1389 | | - * itself, or indirectly by recovering the GPU). |
|---|
| 1390 | | - */ |
|---|
| 1391 | | - if (flags & I915_WAIT_LOCKED && |
|---|
| 1392 | | - __i915_wait_request_check_and_reset(rq)) |
|---|
| 1393 | | - continue; |
|---|
| 1394 | | - |
|---|
| 1395 | | - /* Only spin if we know the GPU is processing this request */ |
|---|
| 1396 | | - if (__i915_spin_request(rq, wait.seqno, state, 2)) |
|---|
| 1397 | | - break; |
|---|
| 1398 | | - |
|---|
| 1399 | | - if (!intel_wait_check_request(&wait, rq)) { |
|---|
| 1400 | | - intel_engine_remove_wait(rq->engine, &wait); |
|---|
| 1401 | | - goto restart; |
|---|
| 1402 | | - } |
|---|
| 1403 | 1834 | } |
|---|
| 1404 | | - |
|---|
| 1405 | | - intel_engine_remove_wait(rq->engine, &wait); |
|---|
| 1406 | | -complete: |
|---|
| 1407 | 1835 | __set_current_state(TASK_RUNNING); |
|---|
| 1408 | | - if (flags & I915_WAIT_LOCKED) |
|---|
| 1409 | | - remove_wait_queue(errq, &reset); |
|---|
| 1410 | | - remove_wait_queue(&rq->execute, &exec); |
|---|
| 1836 | + |
|---|
| 1837 | + if (READ_ONCE(wait.tsk)) |
|---|
| 1838 | + dma_fence_remove_callback(&rq->fence, &wait.cb); |
|---|
| 1839 | + GEM_BUG_ON(!list_empty(&wait.cb.node)); |
|---|
| 1840 | + |
|---|
| 1841 | +out: |
|---|
| 1842 | + mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_); |
|---|
| 1411 | 1843 | trace_i915_request_wait_end(rq); |
|---|
| 1412 | | - |
|---|
| 1413 | 1844 | return timeout; |
|---|
| 1414 | | -} |
|---|
| 1415 | | - |
|---|
| 1416 | | -static void ring_retire_requests(struct intel_ring *ring) |
|---|
| 1417 | | -{ |
|---|
| 1418 | | - struct i915_request *request, *next; |
|---|
| 1419 | | - |
|---|
| 1420 | | - list_for_each_entry_safe(request, next, |
|---|
| 1421 | | - &ring->request_list, ring_link) { |
|---|
| 1422 | | - if (!i915_request_completed(request)) |
|---|
| 1423 | | - break; |
|---|
| 1424 | | - |
|---|
| 1425 | | - i915_request_retire(request); |
|---|
| 1426 | | - } |
|---|
| 1427 | | -} |
|---|
| 1428 | | - |
|---|
| 1429 | | -void i915_retire_requests(struct drm_i915_private *i915) |
|---|
| 1430 | | -{ |
|---|
| 1431 | | - struct intel_ring *ring, *tmp; |
|---|
| 1432 | | - |
|---|
| 1433 | | - lockdep_assert_held(&i915->drm.struct_mutex); |
|---|
| 1434 | | - |
|---|
| 1435 | | - if (!i915->gt.active_requests) |
|---|
| 1436 | | - return; |
|---|
| 1437 | | - |
|---|
| 1438 | | - list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link) |
|---|
| 1439 | | - ring_retire_requests(ring); |
|---|
| 1440 | 1845 | } |
|---|
| 1441 | 1846 | |
|---|
| 1442 | 1847 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
|---|
| 1443 | 1848 | #include "selftests/mock_request.c" |
|---|
| 1444 | 1849 | #include "selftests/i915_request.c" |
|---|
| 1445 | 1850 | #endif |
|---|
| 1851 | + |
|---|
| 1852 | +static void i915_global_request_shrink(void) |
|---|
| 1853 | +{ |
|---|
| 1854 | + kmem_cache_shrink(global.slab_execute_cbs); |
|---|
| 1855 | + kmem_cache_shrink(global.slab_requests); |
|---|
| 1856 | +} |
|---|
| 1857 | + |
|---|
| 1858 | +static void i915_global_request_exit(void) |
|---|
| 1859 | +{ |
|---|
| 1860 | + kmem_cache_destroy(global.slab_execute_cbs); |
|---|
| 1861 | + kmem_cache_destroy(global.slab_requests); |
|---|
| 1862 | +} |
|---|
| 1863 | + |
|---|
| 1864 | +static struct i915_global_request global = { { |
|---|
| 1865 | + .shrink = i915_global_request_shrink, |
|---|
| 1866 | + .exit = i915_global_request_exit, |
|---|
| 1867 | +} }; |
|---|
| 1868 | + |
|---|
| 1869 | +int __init i915_global_request_init(void) |
|---|
| 1870 | +{ |
|---|
| 1871 | + global.slab_requests = |
|---|
| 1872 | + kmem_cache_create("i915_request", |
|---|
| 1873 | + sizeof(struct i915_request), |
|---|
| 1874 | + __alignof__(struct i915_request), |
|---|
| 1875 | + SLAB_HWCACHE_ALIGN | |
|---|
| 1876 | + SLAB_RECLAIM_ACCOUNT | |
|---|
| 1877 | + SLAB_TYPESAFE_BY_RCU, |
|---|
| 1878 | + __i915_request_ctor); |
|---|
| 1879 | + if (!global.slab_requests) |
|---|
| 1880 | + return -ENOMEM; |
|---|
| 1881 | + |
|---|
| 1882 | + global.slab_execute_cbs = KMEM_CACHE(execute_cb, |
|---|
| 1883 | + SLAB_HWCACHE_ALIGN | |
|---|
| 1884 | + SLAB_RECLAIM_ACCOUNT | |
|---|
| 1885 | + SLAB_TYPESAFE_BY_RCU); |
|---|
| 1886 | + if (!global.slab_execute_cbs) |
|---|
| 1887 | + goto err_requests; |
|---|
| 1888 | + |
|---|
| 1889 | + i915_global_register(&global.base); |
|---|
| 1890 | + return 0; |
|---|
| 1891 | + |
|---|
| 1892 | +err_requests: |
|---|
| 1893 | + kmem_cache_destroy(global.slab_requests); |
|---|
| 1894 | + return -ENOMEM; |
|---|
| 1895 | +} |
|---|