.. | .. |
---|
23 | 23 | */ |
---|
24 | 24 | |
---|
25 | 25 | #include <linux/prime_numbers.h> |
---|
| 26 | +#include <linux/pm_qos.h> |
---|
| 27 | +#include <linux/sort.h> |
---|
26 | 28 | |
---|
27 | | -#include "../i915_selftest.h" |
---|
| 29 | +#include "gem/i915_gem_pm.h" |
---|
| 30 | +#include "gem/selftests/mock_context.h" |
---|
28 | 31 | |
---|
29 | | -#include "mock_context.h" |
---|
| 32 | +#include "gt/intel_engine_heartbeat.h" |
---|
| 33 | +#include "gt/intel_engine_pm.h" |
---|
| 34 | +#include "gt/intel_engine_user.h" |
---|
| 35 | +#include "gt/intel_gt.h" |
---|
| 36 | +#include "gt/intel_gt_requests.h" |
---|
| 37 | +#include "gt/selftest_engine_heartbeat.h" |
---|
| 38 | + |
---|
| 39 | +#include "i915_random.h" |
---|
| 40 | +#include "i915_selftest.h" |
---|
| 41 | +#include "igt_flush_test.h" |
---|
| 42 | +#include "igt_live_test.h" |
---|
| 43 | +#include "igt_spinner.h" |
---|
| 44 | +#include "lib_sw_fence.h" |
---|
| 45 | + |
---|
| 46 | +#include "mock_drm.h" |
---|
30 | 47 | #include "mock_gem_device.h" |
---|
| 48 | + |
---|
| 49 | +static unsigned int num_uabi_engines(struct drm_i915_private *i915) |
---|
| 50 | +{ |
---|
| 51 | + struct intel_engine_cs *engine; |
---|
| 52 | + unsigned int count; |
---|
| 53 | + |
---|
| 54 | + count = 0; |
---|
| 55 | + for_each_uabi_engine(engine, i915) |
---|
| 56 | + count++; |
---|
| 57 | + |
---|
| 58 | + return count; |
---|
| 59 | +} |
---|
| 60 | + |
---|
| 61 | +static struct intel_engine_cs *rcs0(struct drm_i915_private *i915) |
---|
| 62 | +{ |
---|
| 63 | + return intel_engine_lookup_user(i915, I915_ENGINE_CLASS_RENDER, 0); |
---|
| 64 | +} |
---|
31 | 65 | |
---|
32 | 66 | static int igt_add_request(void *arg) |
---|
33 | 67 | { |
---|
34 | 68 | struct drm_i915_private *i915 = arg; |
---|
35 | 69 | struct i915_request *request; |
---|
36 | | - int err = -ENOMEM; |
---|
37 | 70 | |
---|
38 | 71 | /* Basic preliminary test to create a request and let it loose! */ |
---|
39 | 72 | |
---|
40 | | - mutex_lock(&i915->drm.struct_mutex); |
---|
41 | | - request = mock_request(i915->engine[RCS], |
---|
42 | | - i915->kernel_context, |
---|
43 | | - HZ / 10); |
---|
| 73 | + request = mock_request(rcs0(i915)->kernel_context, HZ / 10); |
---|
44 | 74 | if (!request) |
---|
45 | | - goto out_unlock; |
---|
| 75 | + return -ENOMEM; |
---|
46 | 76 | |
---|
47 | 77 | i915_request_add(request); |
---|
48 | 78 | |
---|
49 | | - err = 0; |
---|
50 | | -out_unlock: |
---|
51 | | - mutex_unlock(&i915->drm.struct_mutex); |
---|
52 | | - return err; |
---|
| 79 | + return 0; |
---|
53 | 80 | } |
---|
54 | 81 | |
---|
55 | 82 | static int igt_wait_request(void *arg) |
---|
.. | .. |
---|
61 | 88 | |
---|
62 | 89 | /* Submit a request, then wait upon it */ |
---|
63 | 90 | |
---|
64 | | - mutex_lock(&i915->drm.struct_mutex); |
---|
65 | | - request = mock_request(i915->engine[RCS], i915->kernel_context, T); |
---|
66 | | - if (!request) { |
---|
67 | | - err = -ENOMEM; |
---|
68 | | - goto out_unlock; |
---|
69 | | - } |
---|
| 91 | + request = mock_request(rcs0(i915)->kernel_context, T); |
---|
| 92 | + if (!request) |
---|
| 93 | + return -ENOMEM; |
---|
70 | 94 | |
---|
71 | | - if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) { |
---|
| 95 | + i915_request_get(request); |
---|
| 96 | + |
---|
| 97 | + if (i915_request_wait(request, 0, 0) != -ETIME) { |
---|
72 | 98 | pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); |
---|
73 | | - goto out_unlock; |
---|
| 99 | + goto out_request; |
---|
74 | 100 | } |
---|
75 | 101 | |
---|
76 | | - if (i915_request_wait(request, I915_WAIT_LOCKED, T) != -ETIME) { |
---|
| 102 | + if (i915_request_wait(request, 0, T) != -ETIME) { |
---|
77 | 103 | pr_err("request wait succeeded (expected timeout before submit!)\n"); |
---|
78 | | - goto out_unlock; |
---|
| 104 | + goto out_request; |
---|
79 | 105 | } |
---|
80 | 106 | |
---|
81 | 107 | if (i915_request_completed(request)) { |
---|
82 | 108 | pr_err("request completed before submit!!\n"); |
---|
83 | | - goto out_unlock; |
---|
| 109 | + goto out_request; |
---|
84 | 110 | } |
---|
85 | 111 | |
---|
86 | 112 | i915_request_add(request); |
---|
87 | 113 | |
---|
88 | | - if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) { |
---|
| 114 | + if (i915_request_wait(request, 0, 0) != -ETIME) { |
---|
89 | 115 | pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); |
---|
90 | | - goto out_unlock; |
---|
| 116 | + goto out_request; |
---|
91 | 117 | } |
---|
92 | 118 | |
---|
93 | 119 | if (i915_request_completed(request)) { |
---|
94 | 120 | pr_err("request completed immediately!\n"); |
---|
95 | | - goto out_unlock; |
---|
| 121 | + goto out_request; |
---|
96 | 122 | } |
---|
97 | 123 | |
---|
98 | | - if (i915_request_wait(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { |
---|
| 124 | + if (i915_request_wait(request, 0, T / 2) != -ETIME) { |
---|
99 | 125 | pr_err("request wait succeeded (expected timeout!)\n"); |
---|
100 | | - goto out_unlock; |
---|
| 126 | + goto out_request; |
---|
101 | 127 | } |
---|
102 | 128 | |
---|
103 | | - if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) { |
---|
| 129 | + if (i915_request_wait(request, 0, T) == -ETIME) { |
---|
104 | 130 | pr_err("request wait timed out!\n"); |
---|
105 | | - goto out_unlock; |
---|
| 131 | + goto out_request; |
---|
106 | 132 | } |
---|
107 | 133 | |
---|
108 | 134 | if (!i915_request_completed(request)) { |
---|
109 | 135 | pr_err("request not complete after waiting!\n"); |
---|
110 | | - goto out_unlock; |
---|
| 136 | + goto out_request; |
---|
111 | 137 | } |
---|
112 | 138 | |
---|
113 | | - if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) { |
---|
| 139 | + if (i915_request_wait(request, 0, T) == -ETIME) { |
---|
114 | 140 | pr_err("request wait timed out when already complete!\n"); |
---|
115 | | - goto out_unlock; |
---|
| 141 | + goto out_request; |
---|
116 | 142 | } |
---|
117 | 143 | |
---|
118 | 144 | err = 0; |
---|
119 | | -out_unlock: |
---|
| 145 | +out_request: |
---|
| 146 | + i915_request_put(request); |
---|
120 | 147 | mock_device_flush(i915); |
---|
121 | | - mutex_unlock(&i915->drm.struct_mutex); |
---|
122 | 148 | return err; |
---|
123 | 149 | } |
---|
124 | 150 | |
---|
.. | .. |
---|
131 | 157 | |
---|
132 | 158 | /* Submit a request, treat it as a fence and wait upon it */ |
---|
133 | 159 | |
---|
134 | | - mutex_lock(&i915->drm.struct_mutex); |
---|
135 | | - request = mock_request(i915->engine[RCS], i915->kernel_context, T); |
---|
136 | | - if (!request) { |
---|
137 | | - err = -ENOMEM; |
---|
138 | | - goto out_locked; |
---|
139 | | - } |
---|
140 | | - mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */ |
---|
| 160 | + request = mock_request(rcs0(i915)->kernel_context, T); |
---|
| 161 | + if (!request) |
---|
| 162 | + return -ENOMEM; |
---|
141 | 163 | |
---|
142 | 164 | if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { |
---|
143 | 165 | pr_err("fence wait success before submit (expected timeout)!\n"); |
---|
144 | | - goto out_device; |
---|
| 166 | + goto out; |
---|
145 | 167 | } |
---|
146 | 168 | |
---|
147 | | - mutex_lock(&i915->drm.struct_mutex); |
---|
148 | 169 | i915_request_add(request); |
---|
149 | | - mutex_unlock(&i915->drm.struct_mutex); |
---|
150 | 170 | |
---|
151 | 171 | if (dma_fence_is_signaled(&request->fence)) { |
---|
152 | 172 | pr_err("fence signaled immediately!\n"); |
---|
153 | | - goto out_device; |
---|
| 173 | + goto out; |
---|
154 | 174 | } |
---|
155 | 175 | |
---|
156 | 176 | if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { |
---|
157 | 177 | pr_err("fence wait success after submit (expected timeout)!\n"); |
---|
158 | | - goto out_device; |
---|
| 178 | + goto out; |
---|
159 | 179 | } |
---|
160 | 180 | |
---|
161 | 181 | if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { |
---|
162 | 182 | pr_err("fence wait timed out (expected success)!\n"); |
---|
163 | | - goto out_device; |
---|
| 183 | + goto out; |
---|
164 | 184 | } |
---|
165 | 185 | |
---|
166 | 186 | if (!dma_fence_is_signaled(&request->fence)) { |
---|
167 | 187 | pr_err("fence unsignaled after waiting!\n"); |
---|
168 | | - goto out_device; |
---|
| 188 | + goto out; |
---|
169 | 189 | } |
---|
170 | 190 | |
---|
171 | 191 | if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { |
---|
172 | 192 | pr_err("fence wait timed out when complete (expected success)!\n"); |
---|
173 | | - goto out_device; |
---|
| 193 | + goto out; |
---|
174 | 194 | } |
---|
175 | 195 | |
---|
176 | 196 | err = 0; |
---|
177 | | -out_device: |
---|
178 | | - mutex_lock(&i915->drm.struct_mutex); |
---|
179 | | -out_locked: |
---|
| 197 | +out: |
---|
180 | 198 | mock_device_flush(i915); |
---|
181 | | - mutex_unlock(&i915->drm.struct_mutex); |
---|
182 | 199 | return err; |
---|
183 | 200 | } |
---|
184 | 201 | |
---|
.. | .. |
---|
187 | 204 | struct drm_i915_private *i915 = arg; |
---|
188 | 205 | struct i915_request *request, *vip; |
---|
189 | 206 | struct i915_gem_context *ctx[2]; |
---|
| 207 | + struct intel_context *ce; |
---|
190 | 208 | int err = -EINVAL; |
---|
191 | 209 | |
---|
192 | | - mutex_lock(&i915->drm.struct_mutex); |
---|
193 | 210 | ctx[0] = mock_context(i915, "A"); |
---|
194 | | - request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ); |
---|
| 211 | + |
---|
| 212 | + ce = i915_gem_context_get_engine(ctx[0], RCS0); |
---|
| 213 | + GEM_BUG_ON(IS_ERR(ce)); |
---|
| 214 | + request = mock_request(ce, 2 * HZ); |
---|
| 215 | + intel_context_put(ce); |
---|
195 | 216 | if (!request) { |
---|
196 | 217 | err = -ENOMEM; |
---|
197 | 218 | goto err_context_0; |
---|
.. | .. |
---|
201 | 222 | i915_request_add(request); |
---|
202 | 223 | |
---|
203 | 224 | ctx[1] = mock_context(i915, "B"); |
---|
204 | | - vip = mock_request(i915->engine[RCS], ctx[1], 0); |
---|
| 225 | + |
---|
| 226 | + ce = i915_gem_context_get_engine(ctx[1], RCS0); |
---|
| 227 | + GEM_BUG_ON(IS_ERR(ce)); |
---|
| 228 | + vip = mock_request(ce, 0); |
---|
| 229 | + intel_context_put(ce); |
---|
205 | 230 | if (!vip) { |
---|
206 | 231 | err = -ENOMEM; |
---|
207 | 232 | goto err_context_1; |
---|
.. | .. |
---|
219 | 244 | request->engine->submit_request(request); |
---|
220 | 245 | rcu_read_unlock(); |
---|
221 | 246 | |
---|
222 | | - mutex_unlock(&i915->drm.struct_mutex); |
---|
223 | 247 | |
---|
224 | 248 | if (i915_request_wait(vip, 0, HZ) == -ETIME) { |
---|
225 | | - pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", |
---|
226 | | - vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); |
---|
| 249 | + pr_err("timed out waiting for high priority request\n"); |
---|
227 | 250 | goto err; |
---|
228 | 251 | } |
---|
229 | 252 | |
---|
.. | .. |
---|
235 | 258 | err = 0; |
---|
236 | 259 | err: |
---|
237 | 260 | i915_request_put(vip); |
---|
238 | | - mutex_lock(&i915->drm.struct_mutex); |
---|
239 | 261 | err_context_1: |
---|
240 | 262 | mock_context_close(ctx[1]); |
---|
241 | 263 | i915_request_put(request); |
---|
242 | 264 | err_context_0: |
---|
243 | 265 | mock_context_close(ctx[0]); |
---|
244 | 266 | mock_device_flush(i915); |
---|
245 | | - mutex_unlock(&i915->drm.struct_mutex); |
---|
246 | 267 | return err; |
---|
| 268 | +} |
---|
| 269 | + |
---|
| 270 | +struct smoketest { |
---|
| 271 | + struct intel_engine_cs *engine; |
---|
| 272 | + struct i915_gem_context **contexts; |
---|
| 273 | + atomic_long_t num_waits, num_fences; |
---|
| 274 | + int ncontexts, max_batch; |
---|
| 275 | + struct i915_request *(*request_alloc)(struct intel_context *ce); |
---|
| 276 | +}; |
---|
| 277 | + |
---|
| 278 | +static struct i915_request * |
---|
| 279 | +__mock_request_alloc(struct intel_context *ce) |
---|
| 280 | +{ |
---|
| 281 | + return mock_request(ce, 0); |
---|
| 282 | +} |
---|
| 283 | + |
---|
| 284 | +static struct i915_request * |
---|
| 285 | +__live_request_alloc(struct intel_context *ce) |
---|
| 286 | +{ |
---|
| 287 | + return intel_context_create_request(ce); |
---|
| 288 | +} |
---|
| 289 | + |
---|
| 290 | +static int __igt_breadcrumbs_smoketest(void *arg) |
---|
| 291 | +{ |
---|
| 292 | + struct smoketest *t = arg; |
---|
| 293 | + const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; |
---|
| 294 | + const unsigned int total = 4 * t->ncontexts + 1; |
---|
| 295 | + unsigned int num_waits = 0, num_fences = 0; |
---|
| 296 | + struct i915_request **requests; |
---|
| 297 | + I915_RND_STATE(prng); |
---|
| 298 | + unsigned int *order; |
---|
| 299 | + int err = 0; |
---|
| 300 | + |
---|
| 301 | + /* |
---|
| 302 | + * A very simple test to catch the most egregious of list handling bugs. |
---|
| 303 | + * |
---|
| 304 | + * At its heart, we simply create oodles of requests running across |
---|
| 305 | + * multiple kthreads and enable signaling on them, for the sole purpose |
---|
| 306 | + * of stressing our breadcrumb handling. The only inspection we do is |
---|
| 307 | + * that the fences were marked as signaled. |
---|
| 308 | + */ |
---|
| 309 | + |
---|
| 310 | + requests = kcalloc(total, sizeof(*requests), GFP_KERNEL); |
---|
| 311 | + if (!requests) |
---|
| 312 | + return -ENOMEM; |
---|
| 313 | + |
---|
| 314 | + order = i915_random_order(total, &prng); |
---|
| 315 | + if (!order) { |
---|
| 316 | + err = -ENOMEM; |
---|
| 317 | + goto out_requests; |
---|
| 318 | + } |
---|
| 319 | + |
---|
| 320 | + while (!kthread_should_stop()) { |
---|
| 321 | + struct i915_sw_fence *submit, *wait; |
---|
| 322 | + unsigned int n, count; |
---|
| 323 | + |
---|
| 324 | + submit = heap_fence_create(GFP_KERNEL); |
---|
| 325 | + if (!submit) { |
---|
| 326 | + err = -ENOMEM; |
---|
| 327 | + break; |
---|
| 328 | + } |
---|
| 329 | + |
---|
| 330 | + wait = heap_fence_create(GFP_KERNEL); |
---|
| 331 | + if (!wait) { |
---|
| 332 | + i915_sw_fence_commit(submit); |
---|
| 333 | + heap_fence_put(submit); |
---|
| 334 | + err = -ENOMEM; |
---|
| 335 | + break; |
---|
| 336 | + } |
---|
| 337 | + |
---|
| 338 | + i915_random_reorder(order, total, &prng); |
---|
| 339 | + count = 1 + i915_prandom_u32_max_state(max_batch, &prng); |
---|
| 340 | + |
---|
| 341 | + for (n = 0; n < count; n++) { |
---|
| 342 | + struct i915_gem_context *ctx = |
---|
| 343 | + t->contexts[order[n] % t->ncontexts]; |
---|
| 344 | + struct i915_request *rq; |
---|
| 345 | + struct intel_context *ce; |
---|
| 346 | + |
---|
| 347 | + ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx); |
---|
| 348 | + GEM_BUG_ON(IS_ERR(ce)); |
---|
| 349 | + rq = t->request_alloc(ce); |
---|
| 350 | + intel_context_put(ce); |
---|
| 351 | + if (IS_ERR(rq)) { |
---|
| 352 | + err = PTR_ERR(rq); |
---|
| 353 | + count = n; |
---|
| 354 | + break; |
---|
| 355 | + } |
---|
| 356 | + |
---|
| 357 | + err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, |
---|
| 358 | + submit, |
---|
| 359 | + GFP_KERNEL); |
---|
| 360 | + |
---|
| 361 | + requests[n] = i915_request_get(rq); |
---|
| 362 | + i915_request_add(rq); |
---|
| 363 | + |
---|
| 364 | + if (err >= 0) |
---|
| 365 | + err = i915_sw_fence_await_dma_fence(wait, |
---|
| 366 | + &rq->fence, |
---|
| 367 | + 0, |
---|
| 368 | + GFP_KERNEL); |
---|
| 369 | + |
---|
| 370 | + if (err < 0) { |
---|
| 371 | + i915_request_put(rq); |
---|
| 372 | + count = n; |
---|
| 373 | + break; |
---|
| 374 | + } |
---|
| 375 | + } |
---|
| 376 | + |
---|
| 377 | + i915_sw_fence_commit(submit); |
---|
| 378 | + i915_sw_fence_commit(wait); |
---|
| 379 | + |
---|
| 380 | + if (!wait_event_timeout(wait->wait, |
---|
| 381 | + i915_sw_fence_done(wait), |
---|
| 382 | + 5 * HZ)) { |
---|
| 383 | + struct i915_request *rq = requests[count - 1]; |
---|
| 384 | + |
---|
| 385 | + pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n", |
---|
| 386 | + atomic_read(&wait->pending), count, |
---|
| 387 | + rq->fence.context, rq->fence.seqno, |
---|
| 388 | + t->engine->name); |
---|
| 389 | + GEM_TRACE_DUMP(); |
---|
| 390 | + |
---|
| 391 | + intel_gt_set_wedged(t->engine->gt); |
---|
| 392 | + GEM_BUG_ON(!i915_request_completed(rq)); |
---|
| 393 | + i915_sw_fence_wait(wait); |
---|
| 394 | + err = -EIO; |
---|
| 395 | + } |
---|
| 396 | + |
---|
| 397 | + for (n = 0; n < count; n++) { |
---|
| 398 | + struct i915_request *rq = requests[n]; |
---|
| 399 | + |
---|
| 400 | + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, |
---|
| 401 | + &rq->fence.flags)) { |
---|
| 402 | + pr_err("%llu:%llu was not signaled!\n", |
---|
| 403 | + rq->fence.context, rq->fence.seqno); |
---|
| 404 | + err = -EINVAL; |
---|
| 405 | + } |
---|
| 406 | + |
---|
| 407 | + i915_request_put(rq); |
---|
| 408 | + } |
---|
| 409 | + |
---|
| 410 | + heap_fence_put(wait); |
---|
| 411 | + heap_fence_put(submit); |
---|
| 412 | + |
---|
| 413 | + if (err < 0) |
---|
| 414 | + break; |
---|
| 415 | + |
---|
| 416 | + num_fences += count; |
---|
| 417 | + num_waits++; |
---|
| 418 | + |
---|
| 419 | + cond_resched(); |
---|
| 420 | + } |
---|
| 421 | + |
---|
| 422 | + atomic_long_add(num_fences, &t->num_fences); |
---|
| 423 | + atomic_long_add(num_waits, &t->num_waits); |
---|
| 424 | + |
---|
| 425 | + kfree(order); |
---|
| 426 | +out_requests: |
---|
| 427 | + kfree(requests); |
---|
| 428 | + return err; |
---|
| 429 | +} |
---|
| 430 | + |
---|
| 431 | +static int mock_breadcrumbs_smoketest(void *arg) |
---|
| 432 | +{ |
---|
| 433 | + struct drm_i915_private *i915 = arg; |
---|
| 434 | + struct smoketest t = { |
---|
| 435 | + .engine = rcs0(i915), |
---|
| 436 | + .ncontexts = 1024, |
---|
| 437 | + .max_batch = 1024, |
---|
| 438 | + .request_alloc = __mock_request_alloc |
---|
| 439 | + }; |
---|
| 440 | + unsigned int ncpus = num_online_cpus(); |
---|
| 441 | + struct task_struct **threads; |
---|
| 442 | + unsigned int n; |
---|
| 443 | + int ret = 0; |
---|
| 444 | + |
---|
| 445 | + /* |
---|
| 446 | + * Smoketest our breadcrumb/signal handling for requests across multiple |
---|
| 447 | + * threads. A very simple test to only catch the most egregious of bugs. |
---|
| 448 | + * See __igt_breadcrumbs_smoketest(); |
---|
| 449 | + */ |
---|
| 450 | + |
---|
| 451 | + threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL); |
---|
| 452 | + if (!threads) |
---|
| 453 | + return -ENOMEM; |
---|
| 454 | + |
---|
| 455 | + t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); |
---|
| 456 | + if (!t.contexts) { |
---|
| 457 | + ret = -ENOMEM; |
---|
| 458 | + goto out_threads; |
---|
| 459 | + } |
---|
| 460 | + |
---|
| 461 | + for (n = 0; n < t.ncontexts; n++) { |
---|
| 462 | + t.contexts[n] = mock_context(t.engine->i915, "mock"); |
---|
| 463 | + if (!t.contexts[n]) { |
---|
| 464 | + ret = -ENOMEM; |
---|
| 465 | + goto out_contexts; |
---|
| 466 | + } |
---|
| 467 | + } |
---|
| 468 | + |
---|
| 469 | + for (n = 0; n < ncpus; n++) { |
---|
| 470 | + threads[n] = kthread_run(__igt_breadcrumbs_smoketest, |
---|
| 471 | + &t, "igt/%d", n); |
---|
| 472 | + if (IS_ERR(threads[n])) { |
---|
| 473 | + ret = PTR_ERR(threads[n]); |
---|
| 474 | + ncpus = n; |
---|
| 475 | + break; |
---|
| 476 | + } |
---|
| 477 | + |
---|
| 478 | + get_task_struct(threads[n]); |
---|
| 479 | + } |
---|
| 480 | + |
---|
| 481 | + yield(); /* start all threads before we begin */ |
---|
| 482 | + msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); |
---|
| 483 | + |
---|
| 484 | + for (n = 0; n < ncpus; n++) { |
---|
| 485 | + int err; |
---|
| 486 | + |
---|
| 487 | + err = kthread_stop(threads[n]); |
---|
| 488 | + if (err < 0 && !ret) |
---|
| 489 | + ret = err; |
---|
| 490 | + |
---|
| 491 | + put_task_struct(threads[n]); |
---|
| 492 | + } |
---|
| 493 | + pr_info("Completed %lu waits for %lu fence across %d cpus\n", |
---|
| 494 | + atomic_long_read(&t.num_waits), |
---|
| 495 | + atomic_long_read(&t.num_fences), |
---|
| 496 | + ncpus); |
---|
| 497 | + |
---|
| 498 | +out_contexts: |
---|
| 499 | + for (n = 0; n < t.ncontexts; n++) { |
---|
| 500 | + if (!t.contexts[n]) |
---|
| 501 | + break; |
---|
| 502 | + mock_context_close(t.contexts[n]); |
---|
| 503 | + } |
---|
| 504 | + kfree(t.contexts); |
---|
| 505 | +out_threads: |
---|
| 506 | + kfree(threads); |
---|
| 507 | + return ret; |
---|
247 | 508 | } |
---|
248 | 509 | |
---|
249 | 510 | int i915_request_mock_selftests(void) |
---|
.. | .. |
---|
253 | 514 | SUBTEST(igt_wait_request), |
---|
254 | 515 | SUBTEST(igt_fence_wait), |
---|
255 | 516 | SUBTEST(igt_request_rewind), |
---|
| 517 | + SUBTEST(mock_breadcrumbs_smoketest), |
---|
256 | 518 | }; |
---|
257 | 519 | struct drm_i915_private *i915; |
---|
258 | | - int err; |
---|
| 520 | + intel_wakeref_t wakeref; |
---|
| 521 | + int err = 0; |
---|
259 | 522 | |
---|
260 | 523 | i915 = mock_gem_device(); |
---|
261 | 524 | if (!i915) |
---|
262 | 525 | return -ENOMEM; |
---|
263 | 526 | |
---|
264 | | - err = i915_subtests(tests, i915); |
---|
265 | | - drm_dev_put(&i915->drm); |
---|
| 527 | + with_intel_runtime_pm(&i915->runtime_pm, wakeref) |
---|
| 528 | + err = i915_subtests(tests, i915); |
---|
| 529 | + |
---|
| 530 | + mock_destroy_device(i915); |
---|
266 | 531 | |
---|
267 | 532 | return err; |
---|
268 | | -} |
---|
269 | | - |
---|
270 | | -struct live_test { |
---|
271 | | - struct drm_i915_private *i915; |
---|
272 | | - const char *func; |
---|
273 | | - const char *name; |
---|
274 | | - |
---|
275 | | - unsigned int reset_count; |
---|
276 | | -}; |
---|
277 | | - |
---|
278 | | -static int begin_live_test(struct live_test *t, |
---|
279 | | - struct drm_i915_private *i915, |
---|
280 | | - const char *func, |
---|
281 | | - const char *name) |
---|
282 | | -{ |
---|
283 | | - int err; |
---|
284 | | - |
---|
285 | | - t->i915 = i915; |
---|
286 | | - t->func = func; |
---|
287 | | - t->name = name; |
---|
288 | | - |
---|
289 | | - err = i915_gem_wait_for_idle(i915, |
---|
290 | | - I915_WAIT_LOCKED, |
---|
291 | | - MAX_SCHEDULE_TIMEOUT); |
---|
292 | | - if (err) { |
---|
293 | | - pr_err("%s(%s): failed to idle before, with err=%d!", |
---|
294 | | - func, name, err); |
---|
295 | | - return err; |
---|
296 | | - } |
---|
297 | | - |
---|
298 | | - i915->gpu_error.missed_irq_rings = 0; |
---|
299 | | - t->reset_count = i915_reset_count(&i915->gpu_error); |
---|
300 | | - |
---|
301 | | - return 0; |
---|
302 | | -} |
---|
303 | | - |
---|
304 | | -static int end_live_test(struct live_test *t) |
---|
305 | | -{ |
---|
306 | | - struct drm_i915_private *i915 = t->i915; |
---|
307 | | - |
---|
308 | | - i915_retire_requests(i915); |
---|
309 | | - |
---|
310 | | - if (wait_for(intel_engines_are_idle(i915), 10)) { |
---|
311 | | - pr_err("%s(%s): GPU not idle\n", t->func, t->name); |
---|
312 | | - return -EIO; |
---|
313 | | - } |
---|
314 | | - |
---|
315 | | - if (t->reset_count != i915_reset_count(&i915->gpu_error)) { |
---|
316 | | - pr_err("%s(%s): GPU was reset %d times!\n", |
---|
317 | | - t->func, t->name, |
---|
318 | | - i915_reset_count(&i915->gpu_error) - t->reset_count); |
---|
319 | | - return -EIO; |
---|
320 | | - } |
---|
321 | | - |
---|
322 | | - if (i915->gpu_error.missed_irq_rings) { |
---|
323 | | - pr_err("%s(%s): Missed interrupts on engines %lx\n", |
---|
324 | | - t->func, t->name, i915->gpu_error.missed_irq_rings); |
---|
325 | | - return -EIO; |
---|
326 | | - } |
---|
327 | | - |
---|
328 | | - return 0; |
---|
329 | 533 | } |
---|
330 | 534 | |
---|
331 | 535 | static int live_nop_request(void *arg) |
---|
332 | 536 | { |
---|
333 | 537 | struct drm_i915_private *i915 = arg; |
---|
334 | 538 | struct intel_engine_cs *engine; |
---|
335 | | - struct live_test t; |
---|
336 | | - unsigned int id; |
---|
| 539 | + struct igt_live_test t; |
---|
337 | 540 | int err = -ENODEV; |
---|
338 | 541 | |
---|
339 | | - /* Submit various sized batches of empty requests, to each engine |
---|
| 542 | + /* |
---|
| 543 | + * Submit various sized batches of empty requests, to each engine |
---|
340 | 544 | * (individually), and wait for the batch to complete. We can check |
---|
341 | 545 | * the overhead of submitting requests to the hardware. |
---|
342 | 546 | */ |
---|
343 | 547 | |
---|
344 | | - mutex_lock(&i915->drm.struct_mutex); |
---|
345 | | - |
---|
346 | | - for_each_engine(engine, i915, id) { |
---|
347 | | - struct i915_request *request = NULL; |
---|
| 548 | + for_each_uabi_engine(engine, i915) { |
---|
348 | 549 | unsigned long n, prime; |
---|
349 | 550 | IGT_TIMEOUT(end_time); |
---|
350 | 551 | ktime_t times[2] = {}; |
---|
351 | 552 | |
---|
352 | | - err = begin_live_test(&t, i915, __func__, engine->name); |
---|
| 553 | + err = igt_live_test_begin(&t, i915, __func__, engine->name); |
---|
353 | 554 | if (err) |
---|
354 | | - goto out_unlock; |
---|
| 555 | + return err; |
---|
355 | 556 | |
---|
| 557 | + intel_engine_pm_get(engine); |
---|
356 | 558 | for_each_prime_number_from(prime, 1, 8192) { |
---|
| 559 | + struct i915_request *request = NULL; |
---|
| 560 | + |
---|
357 | 561 | times[1] = ktime_get_raw(); |
---|
358 | 562 | |
---|
359 | 563 | for (n = 0; n < prime; n++) { |
---|
360 | | - request = i915_request_alloc(engine, |
---|
361 | | - i915->kernel_context); |
---|
362 | | - if (IS_ERR(request)) { |
---|
363 | | - err = PTR_ERR(request); |
---|
364 | | - goto out_unlock; |
---|
365 | | - } |
---|
| 564 | + i915_request_put(request); |
---|
| 565 | + request = i915_request_create(engine->kernel_context); |
---|
| 566 | + if (IS_ERR(request)) |
---|
| 567 | + return PTR_ERR(request); |
---|
366 | 568 | |
---|
367 | | - /* This space is left intentionally blank. |
---|
| 569 | + /* |
---|
| 570 | + * This space is left intentionally blank. |
---|
368 | 571 | * |
---|
369 | 572 | * We do not actually want to perform any |
---|
370 | 573 | * action with this request, we just want |
---|
.. | .. |
---|
377 | 580 | * for latency. |
---|
378 | 581 | */ |
---|
379 | 582 | |
---|
| 583 | + i915_request_get(request); |
---|
380 | 584 | i915_request_add(request); |
---|
381 | 585 | } |
---|
382 | | - i915_request_wait(request, |
---|
383 | | - I915_WAIT_LOCKED, |
---|
384 | | - MAX_SCHEDULE_TIMEOUT); |
---|
| 586 | + i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); |
---|
| 587 | + i915_request_put(request); |
---|
385 | 588 | |
---|
386 | 589 | times[1] = ktime_sub(ktime_get_raw(), times[1]); |
---|
387 | 590 | if (prime == 1) |
---|
.. | .. |
---|
390 | 593 | if (__igt_timeout(end_time, NULL)) |
---|
391 | 594 | break; |
---|
392 | 595 | } |
---|
| 596 | + intel_engine_pm_put(engine); |
---|
393 | 597 | |
---|
394 | | - err = end_live_test(&t); |
---|
| 598 | + err = igt_live_test_end(&t); |
---|
395 | 599 | if (err) |
---|
396 | | - goto out_unlock; |
---|
| 600 | + return err; |
---|
397 | 601 | |
---|
398 | 602 | pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", |
---|
399 | 603 | engine->name, |
---|
.. | .. |
---|
401 | 605 | prime, div64_u64(ktime_to_ns(times[1]), prime)); |
---|
402 | 606 | } |
---|
403 | 607 | |
---|
404 | | -out_unlock: |
---|
405 | | - mutex_unlock(&i915->drm.struct_mutex); |
---|
406 | 608 | return err; |
---|
407 | 609 | } |
---|
408 | 610 | |
---|
.. | .. |
---|
424 | 626 | } |
---|
425 | 627 | |
---|
426 | 628 | *cmd = MI_BATCH_BUFFER_END; |
---|
427 | | - i915_gem_chipset_flush(i915); |
---|
428 | 629 | |
---|
| 630 | + __i915_gem_object_flush_map(obj, 0, 64); |
---|
429 | 631 | i915_gem_object_unpin_map(obj); |
---|
430 | 632 | |
---|
431 | | - err = i915_gem_object_set_to_gtt_domain(obj, false); |
---|
432 | | - if (err) |
---|
433 | | - goto err; |
---|
| 633 | + intel_gt_chipset_flush(&i915->gt); |
---|
434 | 634 | |
---|
435 | 635 | vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); |
---|
436 | 636 | if (IS_ERR(vma)) { |
---|
.. | .. |
---|
442 | 642 | if (err) |
---|
443 | 643 | goto err; |
---|
444 | 644 | |
---|
| 645 | + /* Force the wait wait now to avoid including it in the benchmark */ |
---|
| 646 | + err = i915_vma_sync(vma); |
---|
| 647 | + if (err) |
---|
| 648 | + goto err_pin; |
---|
| 649 | + |
---|
445 | 650 | return vma; |
---|
446 | 651 | |
---|
| 652 | +err_pin: |
---|
| 653 | + i915_vma_unpin(vma); |
---|
447 | 654 | err: |
---|
448 | 655 | i915_gem_object_put(obj); |
---|
449 | 656 | return ERR_PTR(err); |
---|
.. | .. |
---|
456 | 663 | struct i915_request *request; |
---|
457 | 664 | int err; |
---|
458 | 665 | |
---|
459 | | - request = i915_request_alloc(engine, engine->i915->kernel_context); |
---|
| 666 | + request = i915_request_create(engine->kernel_context); |
---|
460 | 667 | if (IS_ERR(request)) |
---|
461 | 668 | return request; |
---|
462 | 669 | |
---|
.. | .. |
---|
467 | 674 | if (err) |
---|
468 | 675 | goto out_request; |
---|
469 | 676 | |
---|
| 677 | + i915_request_get(request); |
---|
470 | 678 | out_request: |
---|
471 | 679 | i915_request_add(request); |
---|
472 | 680 | return err ? ERR_PTR(err) : request; |
---|
.. | .. |
---|
476 | 684 | { |
---|
477 | 685 | struct drm_i915_private *i915 = arg; |
---|
478 | 686 | struct intel_engine_cs *engine; |
---|
479 | | - struct live_test t; |
---|
| 687 | + struct igt_live_test t; |
---|
480 | 688 | struct i915_vma *batch; |
---|
481 | | - unsigned int id; |
---|
482 | 689 | int err = 0; |
---|
483 | 690 | |
---|
484 | | - /* Submit various sized batches of empty requests, to each engine |
---|
| 691 | + /* |
---|
| 692 | + * Submit various sized batches of empty requests, to each engine |
---|
485 | 693 | * (individually), and wait for the batch to complete. We can check |
---|
486 | 694 | * the overhead of submitting requests to the hardware. |
---|
487 | 695 | */ |
---|
488 | 696 | |
---|
489 | | - mutex_lock(&i915->drm.struct_mutex); |
---|
490 | | - |
---|
491 | 697 | batch = empty_batch(i915); |
---|
492 | | - if (IS_ERR(batch)) { |
---|
493 | | - err = PTR_ERR(batch); |
---|
494 | | - goto out_unlock; |
---|
495 | | - } |
---|
| 698 | + if (IS_ERR(batch)) |
---|
| 699 | + return PTR_ERR(batch); |
---|
496 | 700 | |
---|
497 | | - for_each_engine(engine, i915, id) { |
---|
| 701 | + for_each_uabi_engine(engine, i915) { |
---|
498 | 702 | IGT_TIMEOUT(end_time); |
---|
499 | 703 | struct i915_request *request; |
---|
500 | 704 | unsigned long n, prime; |
---|
501 | 705 | ktime_t times[2] = {}; |
---|
502 | 706 | |
---|
503 | | - err = begin_live_test(&t, i915, __func__, engine->name); |
---|
| 707 | + err = igt_live_test_begin(&t, i915, __func__, engine->name); |
---|
504 | 708 | if (err) |
---|
505 | 709 | goto out_batch; |
---|
| 710 | + |
---|
| 711 | + intel_engine_pm_get(engine); |
---|
506 | 712 | |
---|
507 | 713 | /* Warmup / preload */ |
---|
508 | 714 | request = empty_request(engine, batch); |
---|
509 | 715 | if (IS_ERR(request)) { |
---|
510 | 716 | err = PTR_ERR(request); |
---|
| 717 | + intel_engine_pm_put(engine); |
---|
511 | 718 | goto out_batch; |
---|
512 | 719 | } |
---|
513 | | - i915_request_wait(request, |
---|
514 | | - I915_WAIT_LOCKED, |
---|
515 | | - MAX_SCHEDULE_TIMEOUT); |
---|
| 720 | + i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); |
---|
516 | 721 | |
---|
517 | 722 | for_each_prime_number_from(prime, 1, 8192) { |
---|
518 | 723 | times[1] = ktime_get_raw(); |
---|
519 | 724 | |
---|
520 | 725 | for (n = 0; n < prime; n++) { |
---|
| 726 | + i915_request_put(request); |
---|
521 | 727 | request = empty_request(engine, batch); |
---|
522 | 728 | if (IS_ERR(request)) { |
---|
523 | 729 | err = PTR_ERR(request); |
---|
| 730 | + intel_engine_pm_put(engine); |
---|
524 | 731 | goto out_batch; |
---|
525 | 732 | } |
---|
526 | 733 | } |
---|
527 | | - i915_request_wait(request, |
---|
528 | | - I915_WAIT_LOCKED, |
---|
529 | | - MAX_SCHEDULE_TIMEOUT); |
---|
| 734 | + i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); |
---|
530 | 735 | |
---|
531 | 736 | times[1] = ktime_sub(ktime_get_raw(), times[1]); |
---|
532 | 737 | if (prime == 1) |
---|
.. | .. |
---|
535 | 740 | if (__igt_timeout(end_time, NULL)) |
---|
536 | 741 | break; |
---|
537 | 742 | } |
---|
| 743 | + i915_request_put(request); |
---|
| 744 | + intel_engine_pm_put(engine); |
---|
538 | 745 | |
---|
539 | | - err = end_live_test(&t); |
---|
| 746 | + err = igt_live_test_end(&t); |
---|
540 | 747 | if (err) |
---|
541 | 748 | goto out_batch; |
---|
542 | 749 | |
---|
.. | .. |
---|
549 | 756 | out_batch: |
---|
550 | 757 | i915_vma_unpin(batch); |
---|
551 | 758 | i915_vma_put(batch); |
---|
552 | | -out_unlock: |
---|
553 | | - mutex_unlock(&i915->drm.struct_mutex); |
---|
554 | 759 | return err; |
---|
555 | 760 | } |
---|
556 | 761 | |
---|
557 | 762 | static struct i915_vma *recursive_batch(struct drm_i915_private *i915) |
---|
558 | 763 | { |
---|
559 | | - struct i915_gem_context *ctx = i915->kernel_context; |
---|
560 | | - struct i915_address_space *vm = |
---|
561 | | - ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; |
---|
562 | 764 | struct drm_i915_gem_object *obj; |
---|
563 | 765 | const int gen = INTEL_GEN(i915); |
---|
564 | 766 | struct i915_vma *vma; |
---|
.. | .. |
---|
569 | 771 | if (IS_ERR(obj)) |
---|
570 | 772 | return ERR_CAST(obj); |
---|
571 | 773 | |
---|
572 | | - vma = i915_vma_instance(obj, vm, NULL); |
---|
| 774 | + vma = i915_vma_instance(obj, i915->gt.vm, NULL); |
---|
573 | 775 | if (IS_ERR(vma)) { |
---|
574 | 776 | err = PTR_ERR(vma); |
---|
575 | 777 | goto err; |
---|
576 | 778 | } |
---|
577 | 779 | |
---|
578 | 780 | err = i915_vma_pin(vma, 0, 0, PIN_USER); |
---|
579 | | - if (err) |
---|
580 | | - goto err; |
---|
581 | | - |
---|
582 | | - err = i915_gem_object_set_to_wc_domain(obj, true); |
---|
583 | 781 | if (err) |
---|
584 | 782 | goto err; |
---|
585 | 783 | |
---|
.. | .. |
---|
601 | 799 | *cmd++ = lower_32_bits(vma->node.start); |
---|
602 | 800 | } |
---|
603 | 801 | *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ |
---|
604 | | - i915_gem_chipset_flush(i915); |
---|
605 | 802 | |
---|
| 803 | + __i915_gem_object_flush_map(obj, 0, 64); |
---|
606 | 804 | i915_gem_object_unpin_map(obj); |
---|
| 805 | + |
---|
| 806 | + intel_gt_chipset_flush(&i915->gt); |
---|
607 | 807 | |
---|
608 | 808 | return vma; |
---|
609 | 809 | |
---|
.. | .. |
---|
621 | 821 | return PTR_ERR(cmd); |
---|
622 | 822 | |
---|
623 | 823 | *cmd = MI_BATCH_BUFFER_END; |
---|
624 | | - i915_gem_chipset_flush(batch->vm->i915); |
---|
625 | 824 | |
---|
| 825 | + __i915_gem_object_flush_map(batch->obj, 0, sizeof(*cmd)); |
---|
626 | 826 | i915_gem_object_unpin_map(batch->obj); |
---|
| 827 | + |
---|
| 828 | + intel_gt_chipset_flush(batch->vm->gt); |
---|
627 | 829 | |
---|
628 | 830 | return 0; |
---|
629 | 831 | } |
---|
.. | .. |
---|
631 | 833 | static int live_all_engines(void *arg) |
---|
632 | 834 | { |
---|
633 | 835 | struct drm_i915_private *i915 = arg; |
---|
| 836 | + const unsigned int nengines = num_uabi_engines(i915); |
---|
634 | 837 | struct intel_engine_cs *engine; |
---|
635 | | - struct i915_request *request[I915_NUM_ENGINES]; |
---|
| 838 | + struct i915_request **request; |
---|
| 839 | + struct igt_live_test t; |
---|
636 | 840 | struct i915_vma *batch; |
---|
637 | | - struct live_test t; |
---|
638 | | - unsigned int id; |
---|
| 841 | + unsigned int idx; |
---|
639 | 842 | int err; |
---|
640 | 843 | |
---|
641 | | - /* Check we can submit requests to all engines simultaneously. We |
---|
| 844 | + /* |
---|
| 845 | + * Check we can submit requests to all engines simultaneously. We |
---|
642 | 846 | * send a recursive batch to each engine - checking that we don't |
---|
643 | 847 | * block doing so, and that they don't complete too soon. |
---|
644 | 848 | */ |
---|
645 | 849 | |
---|
646 | | - mutex_lock(&i915->drm.struct_mutex); |
---|
| 850 | + request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); |
---|
| 851 | + if (!request) |
---|
| 852 | + return -ENOMEM; |
---|
647 | 853 | |
---|
648 | | - err = begin_live_test(&t, i915, __func__, ""); |
---|
| 854 | + err = igt_live_test_begin(&t, i915, __func__, ""); |
---|
649 | 855 | if (err) |
---|
650 | | - goto out_unlock; |
---|
| 856 | + goto out_free; |
---|
651 | 857 | |
---|
652 | 858 | batch = recursive_batch(i915); |
---|
653 | 859 | if (IS_ERR(batch)) { |
---|
654 | 860 | err = PTR_ERR(batch); |
---|
655 | 861 | pr_err("%s: Unable to create batch, err=%d\n", __func__, err); |
---|
656 | | - goto out_unlock; |
---|
| 862 | + goto out_free; |
---|
657 | 863 | } |
---|
658 | 864 | |
---|
659 | | - for_each_engine(engine, i915, id) { |
---|
660 | | - request[id] = i915_request_alloc(engine, i915->kernel_context); |
---|
661 | | - if (IS_ERR(request[id])) { |
---|
662 | | - err = PTR_ERR(request[id]); |
---|
| 865 | + i915_vma_lock(batch); |
---|
| 866 | + |
---|
| 867 | + idx = 0; |
---|
| 868 | + for_each_uabi_engine(engine, i915) { |
---|
| 869 | + request[idx] = intel_engine_create_kernel_request(engine); |
---|
| 870 | + if (IS_ERR(request[idx])) { |
---|
| 871 | + err = PTR_ERR(request[idx]); |
---|
663 | 872 | pr_err("%s: Request allocation failed with err=%d\n", |
---|
664 | 873 | __func__, err); |
---|
665 | 874 | goto out_request; |
---|
666 | 875 | } |
---|
667 | 876 | |
---|
668 | | - err = engine->emit_bb_start(request[id], |
---|
| 877 | + err = i915_request_await_object(request[idx], batch->obj, 0); |
---|
| 878 | + if (err == 0) |
---|
| 879 | + err = i915_vma_move_to_active(batch, request[idx], 0); |
---|
| 880 | + GEM_BUG_ON(err); |
---|
| 881 | + |
---|
| 882 | + err = engine->emit_bb_start(request[idx], |
---|
669 | 883 | batch->node.start, |
---|
670 | 884 | batch->node.size, |
---|
671 | 885 | 0); |
---|
672 | 886 | GEM_BUG_ON(err); |
---|
673 | | - request[id]->batch = batch; |
---|
| 887 | + request[idx]->batch = batch; |
---|
674 | 888 | |
---|
675 | | - if (!i915_gem_object_has_active_reference(batch->obj)) { |
---|
676 | | - i915_gem_object_get(batch->obj); |
---|
677 | | - i915_gem_object_set_active_reference(batch->obj); |
---|
678 | | - } |
---|
679 | | - |
---|
680 | | - err = i915_vma_move_to_active(batch, request[id], 0); |
---|
681 | | - GEM_BUG_ON(err); |
---|
682 | | - |
---|
683 | | - i915_request_get(request[id]); |
---|
684 | | - i915_request_add(request[id]); |
---|
| 889 | + i915_request_get(request[idx]); |
---|
| 890 | + i915_request_add(request[idx]); |
---|
| 891 | + idx++; |
---|
685 | 892 | } |
---|
686 | 893 | |
---|
687 | | - for_each_engine(engine, i915, id) { |
---|
688 | | - if (i915_request_completed(request[id])) { |
---|
| 894 | + i915_vma_unlock(batch); |
---|
| 895 | + |
---|
| 896 | + idx = 0; |
---|
| 897 | + for_each_uabi_engine(engine, i915) { |
---|
| 898 | + if (i915_request_completed(request[idx])) { |
---|
689 | 899 | pr_err("%s(%s): request completed too early!\n", |
---|
690 | 900 | __func__, engine->name); |
---|
691 | 901 | err = -EINVAL; |
---|
692 | 902 | goto out_request; |
---|
693 | 903 | } |
---|
| 904 | + idx++; |
---|
694 | 905 | } |
---|
695 | 906 | |
---|
696 | 907 | err = recursive_batch_resolve(batch); |
---|
.. | .. |
---|
699 | 910 | goto out_request; |
---|
700 | 911 | } |
---|
701 | 912 | |
---|
702 | | - for_each_engine(engine, i915, id) { |
---|
| 913 | + idx = 0; |
---|
| 914 | + for_each_uabi_engine(engine, i915) { |
---|
703 | 915 | long timeout; |
---|
704 | 916 | |
---|
705 | | - timeout = i915_request_wait(request[id], |
---|
706 | | - I915_WAIT_LOCKED, |
---|
| 917 | + timeout = i915_request_wait(request[idx], 0, |
---|
707 | 918 | MAX_SCHEDULE_TIMEOUT); |
---|
708 | 919 | if (timeout < 0) { |
---|
709 | 920 | err = timeout; |
---|
.. | .. |
---|
712 | 923 | goto out_request; |
---|
713 | 924 | } |
---|
714 | 925 | |
---|
715 | | - GEM_BUG_ON(!i915_request_completed(request[id])); |
---|
716 | | - i915_request_put(request[id]); |
---|
717 | | - request[id] = NULL; |
---|
| 926 | + GEM_BUG_ON(!i915_request_completed(request[idx])); |
---|
| 927 | + i915_request_put(request[idx]); |
---|
| 928 | + request[idx] = NULL; |
---|
| 929 | + idx++; |
---|
718 | 930 | } |
---|
719 | 931 | |
---|
720 | | - err = end_live_test(&t); |
---|
| 932 | + err = igt_live_test_end(&t); |
---|
721 | 933 | |
---|
722 | 934 | out_request: |
---|
723 | | - for_each_engine(engine, i915, id) |
---|
724 | | - if (request[id]) |
---|
725 | | - i915_request_put(request[id]); |
---|
| 935 | + idx = 0; |
---|
| 936 | + for_each_uabi_engine(engine, i915) { |
---|
| 937 | + if (request[idx]) |
---|
| 938 | + i915_request_put(request[idx]); |
---|
| 939 | + idx++; |
---|
| 940 | + } |
---|
726 | 941 | i915_vma_unpin(batch); |
---|
727 | 942 | i915_vma_put(batch); |
---|
728 | | -out_unlock: |
---|
729 | | - mutex_unlock(&i915->drm.struct_mutex); |
---|
| 943 | +out_free: |
---|
| 944 | + kfree(request); |
---|
730 | 945 | return err; |
---|
731 | 946 | } |
---|
732 | 947 | |
---|
733 | 948 | static int live_sequential_engines(void *arg) |
---|
734 | 949 | { |
---|
735 | 950 | struct drm_i915_private *i915 = arg; |
---|
736 | | - struct i915_request *request[I915_NUM_ENGINES] = {}; |
---|
| 951 | + const unsigned int nengines = num_uabi_engines(i915); |
---|
| 952 | + struct i915_request **request; |
---|
737 | 953 | struct i915_request *prev = NULL; |
---|
738 | 954 | struct intel_engine_cs *engine; |
---|
739 | | - struct live_test t; |
---|
740 | | - unsigned int id; |
---|
| 955 | + struct igt_live_test t; |
---|
| 956 | + unsigned int idx; |
---|
741 | 957 | int err; |
---|
742 | 958 | |
---|
743 | | - /* Check we can submit requests to all engines sequentially, such |
---|
| 959 | + /* |
---|
| 960 | + * Check we can submit requests to all engines sequentially, such |
---|
744 | 961 | * that each successive request waits for the earlier ones. This |
---|
745 | 962 | * tests that we don't execute requests out of order, even though |
---|
746 | 963 | * they are running on independent engines. |
---|
747 | 964 | */ |
---|
748 | 965 | |
---|
749 | | - mutex_lock(&i915->drm.struct_mutex); |
---|
| 966 | + request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); |
---|
| 967 | + if (!request) |
---|
| 968 | + return -ENOMEM; |
---|
750 | 969 | |
---|
751 | | - err = begin_live_test(&t, i915, __func__, ""); |
---|
| 970 | + err = igt_live_test_begin(&t, i915, __func__, ""); |
---|
752 | 971 | if (err) |
---|
753 | | - goto out_unlock; |
---|
| 972 | + goto out_free; |
---|
754 | 973 | |
---|
755 | | - for_each_engine(engine, i915, id) { |
---|
| 974 | + idx = 0; |
---|
| 975 | + for_each_uabi_engine(engine, i915) { |
---|
756 | 976 | struct i915_vma *batch; |
---|
757 | 977 | |
---|
758 | 978 | batch = recursive_batch(i915); |
---|
.. | .. |
---|
760 | 980 | err = PTR_ERR(batch); |
---|
761 | 981 | pr_err("%s: Unable to create batch for %s, err=%d\n", |
---|
762 | 982 | __func__, engine->name, err); |
---|
| 983 | + goto out_free; |
---|
| 984 | + } |
---|
| 985 | + |
---|
| 986 | + i915_vma_lock(batch); |
---|
| 987 | + request[idx] = intel_engine_create_kernel_request(engine); |
---|
| 988 | + if (IS_ERR(request[idx])) { |
---|
| 989 | + err = PTR_ERR(request[idx]); |
---|
| 990 | + pr_err("%s: Request allocation failed for %s with err=%d\n", |
---|
| 991 | + __func__, engine->name, err); |
---|
763 | 992 | goto out_unlock; |
---|
764 | 993 | } |
---|
765 | 994 | |
---|
766 | | - request[id] = i915_request_alloc(engine, i915->kernel_context); |
---|
767 | | - if (IS_ERR(request[id])) { |
---|
768 | | - err = PTR_ERR(request[id]); |
---|
769 | | - pr_err("%s: Request allocation failed for %s with err=%d\n", |
---|
770 | | - __func__, engine->name, err); |
---|
771 | | - goto out_request; |
---|
772 | | - } |
---|
773 | | - |
---|
774 | 995 | if (prev) { |
---|
775 | | - err = i915_request_await_dma_fence(request[id], |
---|
| 996 | + err = i915_request_await_dma_fence(request[idx], |
---|
776 | 997 | &prev->fence); |
---|
777 | 998 | if (err) { |
---|
778 | | - i915_request_add(request[id]); |
---|
| 999 | + i915_request_add(request[idx]); |
---|
779 | 1000 | pr_err("%s: Request await failed for %s with err=%d\n", |
---|
780 | 1001 | __func__, engine->name, err); |
---|
781 | | - goto out_request; |
---|
| 1002 | + goto out_unlock; |
---|
782 | 1003 | } |
---|
783 | 1004 | } |
---|
784 | 1005 | |
---|
785 | | - err = engine->emit_bb_start(request[id], |
---|
| 1006 | + err = i915_request_await_object(request[idx], |
---|
| 1007 | + batch->obj, false); |
---|
| 1008 | + if (err == 0) |
---|
| 1009 | + err = i915_vma_move_to_active(batch, request[idx], 0); |
---|
| 1010 | + GEM_BUG_ON(err); |
---|
| 1011 | + |
---|
| 1012 | + err = engine->emit_bb_start(request[idx], |
---|
786 | 1013 | batch->node.start, |
---|
787 | 1014 | batch->node.size, |
---|
788 | 1015 | 0); |
---|
789 | 1016 | GEM_BUG_ON(err); |
---|
790 | | - request[id]->batch = batch; |
---|
| 1017 | + request[idx]->batch = batch; |
---|
791 | 1018 | |
---|
792 | | - err = i915_vma_move_to_active(batch, request[id], 0); |
---|
793 | | - GEM_BUG_ON(err); |
---|
| 1019 | + i915_request_get(request[idx]); |
---|
| 1020 | + i915_request_add(request[idx]); |
---|
794 | 1021 | |
---|
795 | | - i915_gem_object_set_active_reference(batch->obj); |
---|
796 | | - i915_vma_get(batch); |
---|
| 1022 | + prev = request[idx]; |
---|
| 1023 | + idx++; |
---|
797 | 1024 | |
---|
798 | | - i915_request_get(request[id]); |
---|
799 | | - i915_request_add(request[id]); |
---|
800 | | - |
---|
801 | | - prev = request[id]; |
---|
| 1025 | +out_unlock: |
---|
| 1026 | + i915_vma_unlock(batch); |
---|
| 1027 | + if (err) |
---|
| 1028 | + goto out_request; |
---|
802 | 1029 | } |
---|
803 | 1030 | |
---|
804 | | - for_each_engine(engine, i915, id) { |
---|
| 1031 | + idx = 0; |
---|
| 1032 | + for_each_uabi_engine(engine, i915) { |
---|
805 | 1033 | long timeout; |
---|
806 | 1034 | |
---|
807 | | - if (i915_request_completed(request[id])) { |
---|
| 1035 | + if (i915_request_completed(request[idx])) { |
---|
808 | 1036 | pr_err("%s(%s): request completed too early!\n", |
---|
809 | 1037 | __func__, engine->name); |
---|
810 | 1038 | err = -EINVAL; |
---|
811 | 1039 | goto out_request; |
---|
812 | 1040 | } |
---|
813 | 1041 | |
---|
814 | | - err = recursive_batch_resolve(request[id]->batch); |
---|
| 1042 | + err = recursive_batch_resolve(request[idx]->batch); |
---|
815 | 1043 | if (err) { |
---|
816 | 1044 | pr_err("%s: failed to resolve batch, err=%d\n", |
---|
817 | 1045 | __func__, err); |
---|
818 | 1046 | goto out_request; |
---|
819 | 1047 | } |
---|
820 | 1048 | |
---|
821 | | - timeout = i915_request_wait(request[id], |
---|
822 | | - I915_WAIT_LOCKED, |
---|
| 1049 | + timeout = i915_request_wait(request[idx], 0, |
---|
823 | 1050 | MAX_SCHEDULE_TIMEOUT); |
---|
824 | 1051 | if (timeout < 0) { |
---|
825 | 1052 | err = timeout; |
---|
.. | .. |
---|
828 | 1055 | goto out_request; |
---|
829 | 1056 | } |
---|
830 | 1057 | |
---|
831 | | - GEM_BUG_ON(!i915_request_completed(request[id])); |
---|
| 1058 | + GEM_BUG_ON(!i915_request_completed(request[idx])); |
---|
| 1059 | + idx++; |
---|
832 | 1060 | } |
---|
833 | 1061 | |
---|
834 | | - err = end_live_test(&t); |
---|
| 1062 | + err = igt_live_test_end(&t); |
---|
835 | 1063 | |
---|
836 | 1064 | out_request: |
---|
837 | | - for_each_engine(engine, i915, id) { |
---|
| 1065 | + idx = 0; |
---|
| 1066 | + for_each_uabi_engine(engine, i915) { |
---|
838 | 1067 | u32 *cmd; |
---|
839 | 1068 | |
---|
840 | | - if (!request[id]) |
---|
| 1069 | + if (!request[idx]) |
---|
841 | 1070 | break; |
---|
842 | 1071 | |
---|
843 | | - cmd = i915_gem_object_pin_map(request[id]->batch->obj, |
---|
| 1072 | + cmd = i915_gem_object_pin_map(request[idx]->batch->obj, |
---|
844 | 1073 | I915_MAP_WC); |
---|
845 | 1074 | if (!IS_ERR(cmd)) { |
---|
846 | 1075 | *cmd = MI_BATCH_BUFFER_END; |
---|
847 | | - i915_gem_chipset_flush(i915); |
---|
848 | 1076 | |
---|
849 | | - i915_gem_object_unpin_map(request[id]->batch->obj); |
---|
| 1077 | + __i915_gem_object_flush_map(request[idx]->batch->obj, |
---|
| 1078 | + 0, sizeof(*cmd)); |
---|
| 1079 | + i915_gem_object_unpin_map(request[idx]->batch->obj); |
---|
| 1080 | + |
---|
| 1081 | + intel_gt_chipset_flush(engine->gt); |
---|
850 | 1082 | } |
---|
851 | 1083 | |
---|
852 | | - i915_vma_put(request[id]->batch); |
---|
853 | | - i915_request_put(request[id]); |
---|
| 1084 | + i915_vma_put(request[idx]->batch); |
---|
| 1085 | + i915_request_put(request[idx]); |
---|
| 1086 | + idx++; |
---|
854 | 1087 | } |
---|
855 | | -out_unlock: |
---|
856 | | - mutex_unlock(&i915->drm.struct_mutex); |
---|
| 1088 | +out_free: |
---|
| 1089 | + kfree(request); |
---|
857 | 1090 | return err; |
---|
| 1091 | +} |
---|
| 1092 | + |
---|
| 1093 | +static int __live_parallel_engine1(void *arg) |
---|
| 1094 | +{ |
---|
| 1095 | + struct intel_engine_cs *engine = arg; |
---|
| 1096 | + IGT_TIMEOUT(end_time); |
---|
| 1097 | + unsigned long count; |
---|
| 1098 | + int err = 0; |
---|
| 1099 | + |
---|
| 1100 | + count = 0; |
---|
| 1101 | + intel_engine_pm_get(engine); |
---|
| 1102 | + do { |
---|
| 1103 | + struct i915_request *rq; |
---|
| 1104 | + |
---|
| 1105 | + rq = i915_request_create(engine->kernel_context); |
---|
| 1106 | + if (IS_ERR(rq)) { |
---|
| 1107 | + err = PTR_ERR(rq); |
---|
| 1108 | + break; |
---|
| 1109 | + } |
---|
| 1110 | + |
---|
| 1111 | + i915_request_get(rq); |
---|
| 1112 | + i915_request_add(rq); |
---|
| 1113 | + |
---|
| 1114 | + err = 0; |
---|
| 1115 | + if (i915_request_wait(rq, 0, HZ / 5) < 0) |
---|
| 1116 | + err = -ETIME; |
---|
| 1117 | + i915_request_put(rq); |
---|
| 1118 | + if (err) |
---|
| 1119 | + break; |
---|
| 1120 | + |
---|
| 1121 | + count++; |
---|
| 1122 | + } while (!__igt_timeout(end_time, NULL)); |
---|
| 1123 | + intel_engine_pm_put(engine); |
---|
| 1124 | + |
---|
| 1125 | + pr_info("%s: %lu request + sync\n", engine->name, count); |
---|
| 1126 | + return err; |
---|
| 1127 | +} |
---|
| 1128 | + |
---|
| 1129 | +static int __live_parallel_engineN(void *arg) |
---|
| 1130 | +{ |
---|
| 1131 | + struct intel_engine_cs *engine = arg; |
---|
| 1132 | + IGT_TIMEOUT(end_time); |
---|
| 1133 | + unsigned long count; |
---|
| 1134 | + int err = 0; |
---|
| 1135 | + |
---|
| 1136 | + count = 0; |
---|
| 1137 | + intel_engine_pm_get(engine); |
---|
| 1138 | + do { |
---|
| 1139 | + struct i915_request *rq; |
---|
| 1140 | + |
---|
| 1141 | + rq = i915_request_create(engine->kernel_context); |
---|
| 1142 | + if (IS_ERR(rq)) { |
---|
| 1143 | + err = PTR_ERR(rq); |
---|
| 1144 | + break; |
---|
| 1145 | + } |
---|
| 1146 | + |
---|
| 1147 | + i915_request_add(rq); |
---|
| 1148 | + count++; |
---|
| 1149 | + } while (!__igt_timeout(end_time, NULL)); |
---|
| 1150 | + intel_engine_pm_put(engine); |
---|
| 1151 | + |
---|
| 1152 | + pr_info("%s: %lu requests\n", engine->name, count); |
---|
| 1153 | + return err; |
---|
| 1154 | +} |
---|
| 1155 | + |
---|
| 1156 | +static bool wake_all(struct drm_i915_private *i915) |
---|
| 1157 | +{ |
---|
| 1158 | + if (atomic_dec_and_test(&i915->selftest.counter)) { |
---|
| 1159 | + wake_up_var(&i915->selftest.counter); |
---|
| 1160 | + return true; |
---|
| 1161 | + } |
---|
| 1162 | + |
---|
| 1163 | + return false; |
---|
| 1164 | +} |
---|
| 1165 | + |
---|
| 1166 | +static int wait_for_all(struct drm_i915_private *i915) |
---|
| 1167 | +{ |
---|
| 1168 | + if (wake_all(i915)) |
---|
| 1169 | + return 0; |
---|
| 1170 | + |
---|
| 1171 | + if (wait_var_event_timeout(&i915->selftest.counter, |
---|
| 1172 | + !atomic_read(&i915->selftest.counter), |
---|
| 1173 | + i915_selftest.timeout_jiffies)) |
---|
| 1174 | + return 0; |
---|
| 1175 | + |
---|
| 1176 | + return -ETIME; |
---|
| 1177 | +} |
---|
| 1178 | + |
---|
| 1179 | +static int __live_parallel_spin(void *arg) |
---|
| 1180 | +{ |
---|
| 1181 | + struct intel_engine_cs *engine = arg; |
---|
| 1182 | + struct igt_spinner spin; |
---|
| 1183 | + struct i915_request *rq; |
---|
| 1184 | + int err = 0; |
---|
| 1185 | + |
---|
| 1186 | + /* |
---|
| 1187 | + * Create a spinner running for eternity on each engine. If a second |
---|
| 1188 | + * spinner is incorrectly placed on the same engine, it will not be |
---|
| 1189 | + * able to start in time. |
---|
| 1190 | + */ |
---|
| 1191 | + |
---|
| 1192 | + if (igt_spinner_init(&spin, engine->gt)) { |
---|
| 1193 | + wake_all(engine->i915); |
---|
| 1194 | + return -ENOMEM; |
---|
| 1195 | + } |
---|
| 1196 | + |
---|
| 1197 | + intel_engine_pm_get(engine); |
---|
| 1198 | + rq = igt_spinner_create_request(&spin, |
---|
| 1199 | + engine->kernel_context, |
---|
| 1200 | + MI_NOOP); /* no preemption */ |
---|
| 1201 | + intel_engine_pm_put(engine); |
---|
| 1202 | + if (IS_ERR(rq)) { |
---|
| 1203 | + err = PTR_ERR(rq); |
---|
| 1204 | + if (err == -ENODEV) |
---|
| 1205 | + err = 0; |
---|
| 1206 | + wake_all(engine->i915); |
---|
| 1207 | + goto out_spin; |
---|
| 1208 | + } |
---|
| 1209 | + |
---|
| 1210 | + i915_request_get(rq); |
---|
| 1211 | + i915_request_add(rq); |
---|
| 1212 | + if (igt_wait_for_spinner(&spin, rq)) { |
---|
| 1213 | + /* Occupy this engine for the whole test */ |
---|
| 1214 | + err = wait_for_all(engine->i915); |
---|
| 1215 | + } else { |
---|
| 1216 | + pr_err("Failed to start spinner on %s\n", engine->name); |
---|
| 1217 | + err = -EINVAL; |
---|
| 1218 | + } |
---|
| 1219 | + igt_spinner_end(&spin); |
---|
| 1220 | + |
---|
| 1221 | + if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) |
---|
| 1222 | + err = -EIO; |
---|
| 1223 | + i915_request_put(rq); |
---|
| 1224 | + |
---|
| 1225 | +out_spin: |
---|
| 1226 | + igt_spinner_fini(&spin); |
---|
| 1227 | + return err; |
---|
| 1228 | +} |
---|
| 1229 | + |
---|
| 1230 | +static int live_parallel_engines(void *arg) |
---|
| 1231 | +{ |
---|
| 1232 | + struct drm_i915_private *i915 = arg; |
---|
| 1233 | + static int (* const func[])(void *arg) = { |
---|
| 1234 | + __live_parallel_engine1, |
---|
| 1235 | + __live_parallel_engineN, |
---|
| 1236 | + __live_parallel_spin, |
---|
| 1237 | + NULL, |
---|
| 1238 | + }; |
---|
| 1239 | + const unsigned int nengines = num_uabi_engines(i915); |
---|
| 1240 | + struct intel_engine_cs *engine; |
---|
| 1241 | + int (* const *fn)(void *arg); |
---|
| 1242 | + struct task_struct **tsk; |
---|
| 1243 | + int err = 0; |
---|
| 1244 | + |
---|
| 1245 | + /* |
---|
| 1246 | + * Check we can submit requests to all engines concurrently. This |
---|
| 1247 | + * tests that we load up the system maximally. |
---|
| 1248 | + */ |
---|
| 1249 | + |
---|
| 1250 | + tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL); |
---|
| 1251 | + if (!tsk) |
---|
| 1252 | + return -ENOMEM; |
---|
| 1253 | + |
---|
| 1254 | + for (fn = func; !err && *fn; fn++) { |
---|
| 1255 | + char name[KSYM_NAME_LEN]; |
---|
| 1256 | + struct igt_live_test t; |
---|
| 1257 | + unsigned int idx; |
---|
| 1258 | + |
---|
| 1259 | + snprintf(name, sizeof(name), "%ps", *fn); |
---|
| 1260 | + err = igt_live_test_begin(&t, i915, __func__, name); |
---|
| 1261 | + if (err) |
---|
| 1262 | + break; |
---|
| 1263 | + |
---|
| 1264 | + atomic_set(&i915->selftest.counter, nengines); |
---|
| 1265 | + |
---|
| 1266 | + idx = 0; |
---|
| 1267 | + for_each_uabi_engine(engine, i915) { |
---|
| 1268 | + tsk[idx] = kthread_run(*fn, engine, |
---|
| 1269 | + "igt/parallel:%s", |
---|
| 1270 | + engine->name); |
---|
| 1271 | + if (IS_ERR(tsk[idx])) { |
---|
| 1272 | + err = PTR_ERR(tsk[idx]); |
---|
| 1273 | + break; |
---|
| 1274 | + } |
---|
| 1275 | + get_task_struct(tsk[idx++]); |
---|
| 1276 | + } |
---|
| 1277 | + |
---|
| 1278 | + yield(); /* start all threads before we kthread_stop() */ |
---|
| 1279 | + |
---|
| 1280 | + idx = 0; |
---|
| 1281 | + for_each_uabi_engine(engine, i915) { |
---|
| 1282 | + int status; |
---|
| 1283 | + |
---|
| 1284 | + if (IS_ERR(tsk[idx])) |
---|
| 1285 | + break; |
---|
| 1286 | + |
---|
| 1287 | + status = kthread_stop(tsk[idx]); |
---|
| 1288 | + if (status && !err) |
---|
| 1289 | + err = status; |
---|
| 1290 | + |
---|
| 1291 | + put_task_struct(tsk[idx++]); |
---|
| 1292 | + } |
---|
| 1293 | + |
---|
| 1294 | + if (igt_live_test_end(&t)) |
---|
| 1295 | + err = -EIO; |
---|
| 1296 | + } |
---|
| 1297 | + |
---|
| 1298 | + kfree(tsk); |
---|
| 1299 | + return err; |
---|
| 1300 | +} |
---|
| 1301 | + |
---|
| 1302 | +static int |
---|
| 1303 | +max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) |
---|
| 1304 | +{ |
---|
| 1305 | + struct i915_request *rq; |
---|
| 1306 | + int ret; |
---|
| 1307 | + |
---|
| 1308 | + /* |
---|
| 1309 | + * Before execlists, all contexts share the same ringbuffer. With |
---|
| 1310 | + * execlists, each context/engine has a separate ringbuffer and |
---|
| 1311 | + * for the purposes of this test, inexhaustible. |
---|
| 1312 | + * |
---|
| 1313 | + * For the global ringbuffer though, we have to be very careful |
---|
| 1314 | + * that we do not wrap while preventing the execution of requests |
---|
| 1315 | + * with a unsignaled fence. |
---|
| 1316 | + */ |
---|
| 1317 | + if (HAS_EXECLISTS(ctx->i915)) |
---|
| 1318 | + return INT_MAX; |
---|
| 1319 | + |
---|
| 1320 | + rq = igt_request_alloc(ctx, engine); |
---|
| 1321 | + if (IS_ERR(rq)) { |
---|
| 1322 | + ret = PTR_ERR(rq); |
---|
| 1323 | + } else { |
---|
| 1324 | + int sz; |
---|
| 1325 | + |
---|
| 1326 | + ret = rq->ring->size - rq->reserved_space; |
---|
| 1327 | + i915_request_add(rq); |
---|
| 1328 | + |
---|
| 1329 | + sz = rq->ring->emit - rq->head; |
---|
| 1330 | + if (sz < 0) |
---|
| 1331 | + sz += rq->ring->size; |
---|
| 1332 | + ret /= sz; |
---|
| 1333 | + ret /= 2; /* leave half spare, in case of emergency! */ |
---|
| 1334 | + } |
---|
| 1335 | + |
---|
| 1336 | + return ret; |
---|
| 1337 | +} |
---|
| 1338 | + |
---|
| 1339 | +static int live_breadcrumbs_smoketest(void *arg) |
---|
| 1340 | +{ |
---|
| 1341 | + struct drm_i915_private *i915 = arg; |
---|
| 1342 | + const unsigned int nengines = num_uabi_engines(i915); |
---|
| 1343 | + const unsigned int ncpus = num_online_cpus(); |
---|
| 1344 | + unsigned long num_waits, num_fences; |
---|
| 1345 | + struct intel_engine_cs *engine; |
---|
| 1346 | + struct task_struct **threads; |
---|
| 1347 | + struct igt_live_test live; |
---|
| 1348 | + intel_wakeref_t wakeref; |
---|
| 1349 | + struct smoketest *smoke; |
---|
| 1350 | + unsigned int n, idx; |
---|
| 1351 | + struct file *file; |
---|
| 1352 | + int ret = 0; |
---|
| 1353 | + |
---|
| 1354 | + /* |
---|
| 1355 | + * Smoketest our breadcrumb/signal handling for requests across multiple |
---|
| 1356 | + * threads. A very simple test to only catch the most egregious of bugs. |
---|
| 1357 | + * See __igt_breadcrumbs_smoketest(); |
---|
| 1358 | + * |
---|
| 1359 | + * On real hardware this time. |
---|
| 1360 | + */ |
---|
| 1361 | + |
---|
| 1362 | + wakeref = intel_runtime_pm_get(&i915->runtime_pm); |
---|
| 1363 | + |
---|
| 1364 | + file = mock_file(i915); |
---|
| 1365 | + if (IS_ERR(file)) { |
---|
| 1366 | + ret = PTR_ERR(file); |
---|
| 1367 | + goto out_rpm; |
---|
| 1368 | + } |
---|
| 1369 | + |
---|
| 1370 | + smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL); |
---|
| 1371 | + if (!smoke) { |
---|
| 1372 | + ret = -ENOMEM; |
---|
| 1373 | + goto out_file; |
---|
| 1374 | + } |
---|
| 1375 | + |
---|
| 1376 | + threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL); |
---|
| 1377 | + if (!threads) { |
---|
| 1378 | + ret = -ENOMEM; |
---|
| 1379 | + goto out_smoke; |
---|
| 1380 | + } |
---|
| 1381 | + |
---|
| 1382 | + smoke[0].request_alloc = __live_request_alloc; |
---|
| 1383 | + smoke[0].ncontexts = 64; |
---|
| 1384 | + smoke[0].contexts = kcalloc(smoke[0].ncontexts, |
---|
| 1385 | + sizeof(*smoke[0].contexts), |
---|
| 1386 | + GFP_KERNEL); |
---|
| 1387 | + if (!smoke[0].contexts) { |
---|
| 1388 | + ret = -ENOMEM; |
---|
| 1389 | + goto out_threads; |
---|
| 1390 | + } |
---|
| 1391 | + |
---|
| 1392 | + for (n = 0; n < smoke[0].ncontexts; n++) { |
---|
| 1393 | + smoke[0].contexts[n] = live_context(i915, file); |
---|
| 1394 | + if (IS_ERR(smoke[0].contexts[n])) { |
---|
| 1395 | + ret = PTR_ERR(smoke[0].contexts[n]); |
---|
| 1396 | + goto out_contexts; |
---|
| 1397 | + } |
---|
| 1398 | + } |
---|
| 1399 | + |
---|
| 1400 | + ret = igt_live_test_begin(&live, i915, __func__, ""); |
---|
| 1401 | + if (ret) |
---|
| 1402 | + goto out_contexts; |
---|
| 1403 | + |
---|
| 1404 | + idx = 0; |
---|
| 1405 | + for_each_uabi_engine(engine, i915) { |
---|
| 1406 | + smoke[idx] = smoke[0]; |
---|
| 1407 | + smoke[idx].engine = engine; |
---|
| 1408 | + smoke[idx].max_batch = |
---|
| 1409 | + max_batches(smoke[0].contexts[0], engine); |
---|
| 1410 | + if (smoke[idx].max_batch < 0) { |
---|
| 1411 | + ret = smoke[idx].max_batch; |
---|
| 1412 | + goto out_flush; |
---|
| 1413 | + } |
---|
| 1414 | + /* One ring interleaved between requests from all cpus */ |
---|
| 1415 | + smoke[idx].max_batch /= num_online_cpus() + 1; |
---|
| 1416 | + pr_debug("Limiting batches to %d requests on %s\n", |
---|
| 1417 | + smoke[idx].max_batch, engine->name); |
---|
| 1418 | + |
---|
| 1419 | + for (n = 0; n < ncpus; n++) { |
---|
| 1420 | + struct task_struct *tsk; |
---|
| 1421 | + |
---|
| 1422 | + tsk = kthread_run(__igt_breadcrumbs_smoketest, |
---|
| 1423 | + &smoke[idx], "igt/%d.%d", idx, n); |
---|
| 1424 | + if (IS_ERR(tsk)) { |
---|
| 1425 | + ret = PTR_ERR(tsk); |
---|
| 1426 | + goto out_flush; |
---|
| 1427 | + } |
---|
| 1428 | + |
---|
| 1429 | + get_task_struct(tsk); |
---|
| 1430 | + threads[idx * ncpus + n] = tsk; |
---|
| 1431 | + } |
---|
| 1432 | + |
---|
| 1433 | + idx++; |
---|
| 1434 | + } |
---|
| 1435 | + |
---|
| 1436 | + yield(); /* start all threads before we begin */ |
---|
| 1437 | + msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); |
---|
| 1438 | + |
---|
| 1439 | +out_flush: |
---|
| 1440 | + idx = 0; |
---|
| 1441 | + num_waits = 0; |
---|
| 1442 | + num_fences = 0; |
---|
| 1443 | + for_each_uabi_engine(engine, i915) { |
---|
| 1444 | + for (n = 0; n < ncpus; n++) { |
---|
| 1445 | + struct task_struct *tsk = threads[idx * ncpus + n]; |
---|
| 1446 | + int err; |
---|
| 1447 | + |
---|
| 1448 | + if (!tsk) |
---|
| 1449 | + continue; |
---|
| 1450 | + |
---|
| 1451 | + err = kthread_stop(tsk); |
---|
| 1452 | + if (err < 0 && !ret) |
---|
| 1453 | + ret = err; |
---|
| 1454 | + |
---|
| 1455 | + put_task_struct(tsk); |
---|
| 1456 | + } |
---|
| 1457 | + |
---|
| 1458 | + num_waits += atomic_long_read(&smoke[idx].num_waits); |
---|
| 1459 | + num_fences += atomic_long_read(&smoke[idx].num_fences); |
---|
| 1460 | + idx++; |
---|
| 1461 | + } |
---|
| 1462 | + pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", |
---|
| 1463 | + num_waits, num_fences, idx, ncpus); |
---|
| 1464 | + |
---|
| 1465 | + ret = igt_live_test_end(&live) ?: ret; |
---|
| 1466 | +out_contexts: |
---|
| 1467 | + kfree(smoke[0].contexts); |
---|
| 1468 | +out_threads: |
---|
| 1469 | + kfree(threads); |
---|
| 1470 | +out_smoke: |
---|
| 1471 | + kfree(smoke); |
---|
| 1472 | +out_file: |
---|
| 1473 | + fput(file); |
---|
| 1474 | +out_rpm: |
---|
| 1475 | + intel_runtime_pm_put(&i915->runtime_pm, wakeref); |
---|
| 1476 | + |
---|
| 1477 | + return ret; |
---|
858 | 1478 | } |
---|
859 | 1479 | |
---|
860 | 1480 | int i915_request_live_selftests(struct drm_i915_private *i915) |
---|
.. | .. |
---|
863 | 1483 | SUBTEST(live_nop_request), |
---|
864 | 1484 | SUBTEST(live_all_engines), |
---|
865 | 1485 | SUBTEST(live_sequential_engines), |
---|
| 1486 | + SUBTEST(live_parallel_engines), |
---|
866 | 1487 | SUBTEST(live_empty_request), |
---|
| 1488 | + SUBTEST(live_breadcrumbs_smoketest), |
---|
867 | 1489 | }; |
---|
868 | 1490 | |
---|
869 | | - if (i915_terminally_wedged(&i915->gpu_error)) |
---|
| 1491 | + if (intel_gt_is_wedged(&i915->gt)) |
---|
| 1492 | + return 0; |
---|
| 1493 | + |
---|
| 1494 | + return i915_subtests(tests, i915); |
---|
| 1495 | +} |
---|
| 1496 | + |
---|
| 1497 | +static int switch_to_kernel_sync(struct intel_context *ce, int err) |
---|
| 1498 | +{ |
---|
| 1499 | + struct i915_request *rq; |
---|
| 1500 | + struct dma_fence *fence; |
---|
| 1501 | + |
---|
| 1502 | + rq = intel_engine_create_kernel_request(ce->engine); |
---|
| 1503 | + if (IS_ERR(rq)) |
---|
| 1504 | + return PTR_ERR(rq); |
---|
| 1505 | + |
---|
| 1506 | + fence = i915_active_fence_get(&ce->timeline->last_request); |
---|
| 1507 | + if (fence) { |
---|
| 1508 | + i915_request_await_dma_fence(rq, fence); |
---|
| 1509 | + dma_fence_put(fence); |
---|
| 1510 | + } |
---|
| 1511 | + |
---|
| 1512 | + rq = i915_request_get(rq); |
---|
| 1513 | + i915_request_add(rq); |
---|
| 1514 | + if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err) |
---|
| 1515 | + err = -ETIME; |
---|
| 1516 | + i915_request_put(rq); |
---|
| 1517 | + |
---|
| 1518 | + while (!err && !intel_engine_is_idle(ce->engine)) |
---|
| 1519 | + intel_engine_flush_submission(ce->engine); |
---|
| 1520 | + |
---|
| 1521 | + return err; |
---|
| 1522 | +} |
---|
| 1523 | + |
---|
| 1524 | +struct perf_stats { |
---|
| 1525 | + struct intel_engine_cs *engine; |
---|
| 1526 | + unsigned long count; |
---|
| 1527 | + ktime_t time; |
---|
| 1528 | + ktime_t busy; |
---|
| 1529 | + u64 runtime; |
---|
| 1530 | +}; |
---|
| 1531 | + |
---|
| 1532 | +struct perf_series { |
---|
| 1533 | + struct drm_i915_private *i915; |
---|
| 1534 | + unsigned int nengines; |
---|
| 1535 | + struct intel_context *ce[]; |
---|
| 1536 | +}; |
---|
| 1537 | + |
---|
| 1538 | +static int cmp_u32(const void *A, const void *B) |
---|
| 1539 | +{ |
---|
| 1540 | + const u32 *a = A, *b = B; |
---|
| 1541 | + |
---|
| 1542 | + return *a - *b; |
---|
| 1543 | +} |
---|
| 1544 | + |
---|
| 1545 | +static u32 trifilter(u32 *a) |
---|
| 1546 | +{ |
---|
| 1547 | + u64 sum; |
---|
| 1548 | + |
---|
| 1549 | +#define TF_COUNT 5 |
---|
| 1550 | + sort(a, TF_COUNT, sizeof(*a), cmp_u32, NULL); |
---|
| 1551 | + |
---|
| 1552 | + sum = mul_u32_u32(a[2], 2); |
---|
| 1553 | + sum += a[1]; |
---|
| 1554 | + sum += a[3]; |
---|
| 1555 | + |
---|
| 1556 | + GEM_BUG_ON(sum > U32_MAX); |
---|
| 1557 | + return sum; |
---|
| 1558 | +#define TF_BIAS 2 |
---|
| 1559 | +} |
---|
| 1560 | + |
---|
| 1561 | +static u64 cycles_to_ns(struct intel_engine_cs *engine, u32 cycles) |
---|
| 1562 | +{ |
---|
| 1563 | + u64 ns = i915_cs_timestamp_ticks_to_ns(engine->i915, cycles); |
---|
| 1564 | + |
---|
| 1565 | + return DIV_ROUND_CLOSEST(ns, 1 << TF_BIAS); |
---|
| 1566 | +} |
---|
| 1567 | + |
---|
| 1568 | +static u32 *emit_timestamp_store(u32 *cs, struct intel_context *ce, u32 offset) |
---|
| 1569 | +{ |
---|
| 1570 | + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; |
---|
| 1571 | + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP((ce->engine->mmio_base))); |
---|
| 1572 | + *cs++ = offset; |
---|
| 1573 | + *cs++ = 0; |
---|
| 1574 | + |
---|
| 1575 | + return cs; |
---|
| 1576 | +} |
---|
| 1577 | + |
---|
| 1578 | +static u32 *emit_store_dw(u32 *cs, u32 offset, u32 value) |
---|
| 1579 | +{ |
---|
| 1580 | + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; |
---|
| 1581 | + *cs++ = offset; |
---|
| 1582 | + *cs++ = 0; |
---|
| 1583 | + *cs++ = value; |
---|
| 1584 | + |
---|
| 1585 | + return cs; |
---|
| 1586 | +} |
---|
| 1587 | + |
---|
| 1588 | +static u32 *emit_semaphore_poll(u32 *cs, u32 mode, u32 value, u32 offset) |
---|
| 1589 | +{ |
---|
| 1590 | + *cs++ = MI_SEMAPHORE_WAIT | |
---|
| 1591 | + MI_SEMAPHORE_GLOBAL_GTT | |
---|
| 1592 | + MI_SEMAPHORE_POLL | |
---|
| 1593 | + mode; |
---|
| 1594 | + *cs++ = value; |
---|
| 1595 | + *cs++ = offset; |
---|
| 1596 | + *cs++ = 0; |
---|
| 1597 | + |
---|
| 1598 | + return cs; |
---|
| 1599 | +} |
---|
| 1600 | + |
---|
| 1601 | +static u32 *emit_semaphore_poll_until(u32 *cs, u32 offset, u32 value) |
---|
| 1602 | +{ |
---|
| 1603 | + return emit_semaphore_poll(cs, MI_SEMAPHORE_SAD_EQ_SDD, value, offset); |
---|
| 1604 | +} |
---|
| 1605 | + |
---|
| 1606 | +static void semaphore_set(u32 *sema, u32 value) |
---|
| 1607 | +{ |
---|
| 1608 | + WRITE_ONCE(*sema, value); |
---|
| 1609 | + wmb(); /* flush the update to the cache, and beyond */ |
---|
| 1610 | +} |
---|
| 1611 | + |
---|
| 1612 | +static u32 *hwsp_scratch(const struct intel_context *ce) |
---|
| 1613 | +{ |
---|
| 1614 | + return memset32(ce->engine->status_page.addr + 1000, 0, 21); |
---|
| 1615 | +} |
---|
| 1616 | + |
---|
| 1617 | +static u32 hwsp_offset(const struct intel_context *ce, u32 *dw) |
---|
| 1618 | +{ |
---|
| 1619 | + return (i915_ggtt_offset(ce->engine->status_page.vma) + |
---|
| 1620 | + offset_in_page(dw)); |
---|
| 1621 | +} |
---|
| 1622 | + |
---|
| 1623 | +static int measure_semaphore_response(struct intel_context *ce) |
---|
| 1624 | +{ |
---|
| 1625 | + u32 *sema = hwsp_scratch(ce); |
---|
| 1626 | + const u32 offset = hwsp_offset(ce, sema); |
---|
| 1627 | + u32 elapsed[TF_COUNT], cycles; |
---|
| 1628 | + struct i915_request *rq; |
---|
| 1629 | + u32 *cs; |
---|
| 1630 | + int err; |
---|
| 1631 | + int i; |
---|
| 1632 | + |
---|
| 1633 | + /* |
---|
| 1634 | + * Measure how many cycles it takes for the HW to detect the change |
---|
| 1635 | + * in a semaphore value. |
---|
| 1636 | + * |
---|
| 1637 | + * A: read CS_TIMESTAMP from CPU |
---|
| 1638 | + * poke semaphore |
---|
| 1639 | + * B: read CS_TIMESTAMP on GPU |
---|
| 1640 | + * |
---|
| 1641 | + * Semaphore latency: B - A |
---|
| 1642 | + */ |
---|
| 1643 | + |
---|
| 1644 | + semaphore_set(sema, -1); |
---|
| 1645 | + |
---|
| 1646 | + rq = i915_request_create(ce); |
---|
| 1647 | + if (IS_ERR(rq)) |
---|
| 1648 | + return PTR_ERR(rq); |
---|
| 1649 | + |
---|
| 1650 | + cs = intel_ring_begin(rq, 4 + 12 * ARRAY_SIZE(elapsed)); |
---|
| 1651 | + if (IS_ERR(cs)) { |
---|
| 1652 | + i915_request_add(rq); |
---|
| 1653 | + err = PTR_ERR(cs); |
---|
| 1654 | + goto err; |
---|
| 1655 | + } |
---|
| 1656 | + |
---|
| 1657 | + cs = emit_store_dw(cs, offset, 0); |
---|
| 1658 | + for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { |
---|
| 1659 | + cs = emit_semaphore_poll_until(cs, offset, i); |
---|
| 1660 | + cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); |
---|
| 1661 | + cs = emit_store_dw(cs, offset, 0); |
---|
| 1662 | + } |
---|
| 1663 | + |
---|
| 1664 | + intel_ring_advance(rq, cs); |
---|
| 1665 | + i915_request_add(rq); |
---|
| 1666 | + |
---|
| 1667 | + if (wait_for(READ_ONCE(*sema) == 0, 50)) { |
---|
| 1668 | + err = -EIO; |
---|
| 1669 | + goto err; |
---|
| 1670 | + } |
---|
| 1671 | + |
---|
| 1672 | + for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { |
---|
| 1673 | + preempt_disable(); |
---|
| 1674 | + cycles = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); |
---|
| 1675 | + semaphore_set(sema, i); |
---|
| 1676 | + preempt_enable(); |
---|
| 1677 | + |
---|
| 1678 | + if (wait_for(READ_ONCE(*sema) == 0, 50)) { |
---|
| 1679 | + err = -EIO; |
---|
| 1680 | + goto err; |
---|
| 1681 | + } |
---|
| 1682 | + |
---|
| 1683 | + elapsed[i - 1] = sema[i] - cycles; |
---|
| 1684 | + } |
---|
| 1685 | + |
---|
| 1686 | + cycles = trifilter(elapsed); |
---|
| 1687 | + pr_info("%s: semaphore response %d cycles, %lluns\n", |
---|
| 1688 | + ce->engine->name, cycles >> TF_BIAS, |
---|
| 1689 | + cycles_to_ns(ce->engine, cycles)); |
---|
| 1690 | + |
---|
| 1691 | + return intel_gt_wait_for_idle(ce->engine->gt, HZ); |
---|
| 1692 | + |
---|
| 1693 | +err: |
---|
| 1694 | + intel_gt_set_wedged(ce->engine->gt); |
---|
| 1695 | + return err; |
---|
| 1696 | +} |
---|
| 1697 | + |
---|
| 1698 | +static int measure_idle_dispatch(struct intel_context *ce) |
---|
| 1699 | +{ |
---|
| 1700 | + u32 *sema = hwsp_scratch(ce); |
---|
| 1701 | + const u32 offset = hwsp_offset(ce, sema); |
---|
| 1702 | + u32 elapsed[TF_COUNT], cycles; |
---|
| 1703 | + u32 *cs; |
---|
| 1704 | + int err; |
---|
| 1705 | + int i; |
---|
| 1706 | + |
---|
| 1707 | + /* |
---|
| 1708 | + * Measure how long it takes for us to submit a request while the |
---|
| 1709 | + * engine is idle, but is resting in our context. |
---|
| 1710 | + * |
---|
| 1711 | + * A: read CS_TIMESTAMP from CPU |
---|
| 1712 | + * submit request |
---|
| 1713 | + * B: read CS_TIMESTAMP on GPU |
---|
| 1714 | + * |
---|
| 1715 | + * Submission latency: B - A |
---|
| 1716 | + */ |
---|
| 1717 | + |
---|
| 1718 | + for (i = 0; i < ARRAY_SIZE(elapsed); i++) { |
---|
| 1719 | + struct i915_request *rq; |
---|
| 1720 | + |
---|
| 1721 | + err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); |
---|
| 1722 | + if (err) |
---|
| 1723 | + return err; |
---|
| 1724 | + |
---|
| 1725 | + rq = i915_request_create(ce); |
---|
| 1726 | + if (IS_ERR(rq)) { |
---|
| 1727 | + err = PTR_ERR(rq); |
---|
| 1728 | + goto err; |
---|
| 1729 | + } |
---|
| 1730 | + |
---|
| 1731 | + cs = intel_ring_begin(rq, 4); |
---|
| 1732 | + if (IS_ERR(cs)) { |
---|
| 1733 | + i915_request_add(rq); |
---|
| 1734 | + err = PTR_ERR(cs); |
---|
| 1735 | + goto err; |
---|
| 1736 | + } |
---|
| 1737 | + |
---|
| 1738 | + cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); |
---|
| 1739 | + |
---|
| 1740 | + intel_ring_advance(rq, cs); |
---|
| 1741 | + |
---|
| 1742 | + preempt_disable(); |
---|
| 1743 | + local_bh_disable(); |
---|
| 1744 | + elapsed[i] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); |
---|
| 1745 | + i915_request_add(rq); |
---|
| 1746 | + local_bh_enable(); |
---|
| 1747 | + preempt_enable(); |
---|
| 1748 | + } |
---|
| 1749 | + |
---|
| 1750 | + err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); |
---|
| 1751 | + if (err) |
---|
| 1752 | + goto err; |
---|
| 1753 | + |
---|
| 1754 | + for (i = 0; i < ARRAY_SIZE(elapsed); i++) |
---|
| 1755 | + elapsed[i] = sema[i] - elapsed[i]; |
---|
| 1756 | + |
---|
| 1757 | + cycles = trifilter(elapsed); |
---|
| 1758 | + pr_info("%s: idle dispatch latency %d cycles, %lluns\n", |
---|
| 1759 | + ce->engine->name, cycles >> TF_BIAS, |
---|
| 1760 | + cycles_to_ns(ce->engine, cycles)); |
---|
| 1761 | + |
---|
| 1762 | + return intel_gt_wait_for_idle(ce->engine->gt, HZ); |
---|
| 1763 | + |
---|
| 1764 | +err: |
---|
| 1765 | + intel_gt_set_wedged(ce->engine->gt); |
---|
| 1766 | + return err; |
---|
| 1767 | +} |
---|
| 1768 | + |
---|
| 1769 | +static int measure_busy_dispatch(struct intel_context *ce) |
---|
| 1770 | +{ |
---|
| 1771 | + u32 *sema = hwsp_scratch(ce); |
---|
| 1772 | + const u32 offset = hwsp_offset(ce, sema); |
---|
| 1773 | + u32 elapsed[TF_COUNT + 1], cycles; |
---|
| 1774 | + u32 *cs; |
---|
| 1775 | + int err; |
---|
| 1776 | + int i; |
---|
| 1777 | + |
---|
| 1778 | + /* |
---|
| 1779 | + * Measure how long it takes for us to submit a request while the |
---|
| 1780 | + * engine is busy, polling on a semaphore in our context. With |
---|
| 1781 | + * direct submission, this will include the cost of a lite restore. |
---|
| 1782 | + * |
---|
| 1783 | + * A: read CS_TIMESTAMP from CPU |
---|
| 1784 | + * submit request |
---|
| 1785 | + * B: read CS_TIMESTAMP on GPU |
---|
| 1786 | + * |
---|
| 1787 | + * Submission latency: B - A |
---|
| 1788 | + */ |
---|
| 1789 | + |
---|
| 1790 | + for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { |
---|
| 1791 | + struct i915_request *rq; |
---|
| 1792 | + |
---|
| 1793 | + rq = i915_request_create(ce); |
---|
| 1794 | + if (IS_ERR(rq)) { |
---|
| 1795 | + err = PTR_ERR(rq); |
---|
| 1796 | + goto err; |
---|
| 1797 | + } |
---|
| 1798 | + |
---|
| 1799 | + cs = intel_ring_begin(rq, 12); |
---|
| 1800 | + if (IS_ERR(cs)) { |
---|
| 1801 | + i915_request_add(rq); |
---|
| 1802 | + err = PTR_ERR(cs); |
---|
| 1803 | + goto err; |
---|
| 1804 | + } |
---|
| 1805 | + |
---|
| 1806 | + cs = emit_store_dw(cs, offset + i * sizeof(u32), -1); |
---|
| 1807 | + cs = emit_semaphore_poll_until(cs, offset, i); |
---|
| 1808 | + cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); |
---|
| 1809 | + |
---|
| 1810 | + intel_ring_advance(rq, cs); |
---|
| 1811 | + |
---|
| 1812 | + if (i > 1 && wait_for(READ_ONCE(sema[i - 1]), 500)) { |
---|
| 1813 | + err = -EIO; |
---|
| 1814 | + goto err; |
---|
| 1815 | + } |
---|
| 1816 | + |
---|
| 1817 | + preempt_disable(); |
---|
| 1818 | + local_bh_disable(); |
---|
| 1819 | + elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); |
---|
| 1820 | + i915_request_add(rq); |
---|
| 1821 | + local_bh_enable(); |
---|
| 1822 | + semaphore_set(sema, i - 1); |
---|
| 1823 | + preempt_enable(); |
---|
| 1824 | + } |
---|
| 1825 | + |
---|
| 1826 | + wait_for(READ_ONCE(sema[i - 1]), 500); |
---|
| 1827 | + semaphore_set(sema, i - 1); |
---|
| 1828 | + |
---|
| 1829 | + for (i = 1; i <= TF_COUNT; i++) { |
---|
| 1830 | + GEM_BUG_ON(sema[i] == -1); |
---|
| 1831 | + elapsed[i - 1] = sema[i] - elapsed[i]; |
---|
| 1832 | + } |
---|
| 1833 | + |
---|
| 1834 | + cycles = trifilter(elapsed); |
---|
| 1835 | + pr_info("%s: busy dispatch latency %d cycles, %lluns\n", |
---|
| 1836 | + ce->engine->name, cycles >> TF_BIAS, |
---|
| 1837 | + cycles_to_ns(ce->engine, cycles)); |
---|
| 1838 | + |
---|
| 1839 | + return intel_gt_wait_for_idle(ce->engine->gt, HZ); |
---|
| 1840 | + |
---|
| 1841 | +err: |
---|
| 1842 | + intel_gt_set_wedged(ce->engine->gt); |
---|
| 1843 | + return err; |
---|
| 1844 | +} |
---|
| 1845 | + |
---|
| 1846 | +static int plug(struct intel_engine_cs *engine, u32 *sema, u32 mode, int value) |
---|
| 1847 | +{ |
---|
| 1848 | + const u32 offset = |
---|
| 1849 | + i915_ggtt_offset(engine->status_page.vma) + |
---|
| 1850 | + offset_in_page(sema); |
---|
| 1851 | + struct i915_request *rq; |
---|
| 1852 | + u32 *cs; |
---|
| 1853 | + |
---|
| 1854 | + rq = i915_request_create(engine->kernel_context); |
---|
| 1855 | + if (IS_ERR(rq)) |
---|
| 1856 | + return PTR_ERR(rq); |
---|
| 1857 | + |
---|
| 1858 | + cs = intel_ring_begin(rq, 4); |
---|
| 1859 | + if (IS_ERR(cs)) { |
---|
| 1860 | + i915_request_add(rq); |
---|
| 1861 | + return PTR_ERR(cs); |
---|
| 1862 | + } |
---|
| 1863 | + |
---|
| 1864 | + cs = emit_semaphore_poll(cs, mode, value, offset); |
---|
| 1865 | + |
---|
| 1866 | + intel_ring_advance(rq, cs); |
---|
| 1867 | + i915_request_add(rq); |
---|
| 1868 | + |
---|
| 1869 | + return 0; |
---|
| 1870 | +} |
---|
| 1871 | + |
---|
| 1872 | +static int measure_inter_request(struct intel_context *ce) |
---|
| 1873 | +{ |
---|
| 1874 | + u32 *sema = hwsp_scratch(ce); |
---|
| 1875 | + const u32 offset = hwsp_offset(ce, sema); |
---|
| 1876 | + u32 elapsed[TF_COUNT + 1], cycles; |
---|
| 1877 | + struct i915_sw_fence *submit; |
---|
| 1878 | + int i, err; |
---|
| 1879 | + |
---|
| 1880 | + /* |
---|
| 1881 | + * Measure how long it takes to advance from one request into the |
---|
| 1882 | + * next. Between each request we flush the GPU caches to memory, |
---|
| 1883 | + * update the breadcrumbs, and then invalidate those caches. |
---|
| 1884 | + * We queue up all the requests to be submitted in one batch so |
---|
| 1885 | + * it should be one set of contiguous measurements. |
---|
| 1886 | + * |
---|
| 1887 | + * A: read CS_TIMESTAMP on GPU |
---|
| 1888 | + * advance request |
---|
| 1889 | + * B: read CS_TIMESTAMP on GPU |
---|
| 1890 | + * |
---|
| 1891 | + * Request latency: B - A |
---|
| 1892 | + */ |
---|
| 1893 | + |
---|
| 1894 | + err = plug(ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, 0); |
---|
| 1895 | + if (err) |
---|
| 1896 | + return err; |
---|
| 1897 | + |
---|
| 1898 | + submit = heap_fence_create(GFP_KERNEL); |
---|
| 1899 | + if (!submit) { |
---|
| 1900 | + semaphore_set(sema, 1); |
---|
| 1901 | + return -ENOMEM; |
---|
| 1902 | + } |
---|
| 1903 | + |
---|
| 1904 | + intel_engine_flush_submission(ce->engine); |
---|
| 1905 | + for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { |
---|
| 1906 | + struct i915_request *rq; |
---|
| 1907 | + u32 *cs; |
---|
| 1908 | + |
---|
| 1909 | + rq = i915_request_create(ce); |
---|
| 1910 | + if (IS_ERR(rq)) { |
---|
| 1911 | + err = PTR_ERR(rq); |
---|
| 1912 | + goto err_submit; |
---|
| 1913 | + } |
---|
| 1914 | + |
---|
| 1915 | + err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, |
---|
| 1916 | + submit, |
---|
| 1917 | + GFP_KERNEL); |
---|
| 1918 | + if (err < 0) { |
---|
| 1919 | + i915_request_add(rq); |
---|
| 1920 | + goto err_submit; |
---|
| 1921 | + } |
---|
| 1922 | + |
---|
| 1923 | + cs = intel_ring_begin(rq, 4); |
---|
| 1924 | + if (IS_ERR(cs)) { |
---|
| 1925 | + i915_request_add(rq); |
---|
| 1926 | + err = PTR_ERR(cs); |
---|
| 1927 | + goto err_submit; |
---|
| 1928 | + } |
---|
| 1929 | + |
---|
| 1930 | + cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); |
---|
| 1931 | + |
---|
| 1932 | + intel_ring_advance(rq, cs); |
---|
| 1933 | + i915_request_add(rq); |
---|
| 1934 | + } |
---|
| 1935 | + local_bh_disable(); |
---|
| 1936 | + i915_sw_fence_commit(submit); |
---|
| 1937 | + local_bh_enable(); |
---|
| 1938 | + intel_engine_flush_submission(ce->engine); |
---|
| 1939 | + heap_fence_put(submit); |
---|
| 1940 | + |
---|
| 1941 | + semaphore_set(sema, 1); |
---|
| 1942 | + err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); |
---|
| 1943 | + if (err) |
---|
| 1944 | + goto err; |
---|
| 1945 | + |
---|
| 1946 | + for (i = 1; i <= TF_COUNT; i++) |
---|
| 1947 | + elapsed[i - 1] = sema[i + 1] - sema[i]; |
---|
| 1948 | + |
---|
| 1949 | + cycles = trifilter(elapsed); |
---|
| 1950 | + pr_info("%s: inter-request latency %d cycles, %lluns\n", |
---|
| 1951 | + ce->engine->name, cycles >> TF_BIAS, |
---|
| 1952 | + cycles_to_ns(ce->engine, cycles)); |
---|
| 1953 | + |
---|
| 1954 | + return intel_gt_wait_for_idle(ce->engine->gt, HZ); |
---|
| 1955 | + |
---|
| 1956 | +err_submit: |
---|
| 1957 | + i915_sw_fence_commit(submit); |
---|
| 1958 | + heap_fence_put(submit); |
---|
| 1959 | + semaphore_set(sema, 1); |
---|
| 1960 | +err: |
---|
| 1961 | + intel_gt_set_wedged(ce->engine->gt); |
---|
| 1962 | + return err; |
---|
| 1963 | +} |
---|
| 1964 | + |
---|
| 1965 | +static int measure_context_switch(struct intel_context *ce) |
---|
| 1966 | +{ |
---|
| 1967 | + u32 *sema = hwsp_scratch(ce); |
---|
| 1968 | + const u32 offset = hwsp_offset(ce, sema); |
---|
| 1969 | + struct i915_request *fence = NULL; |
---|
| 1970 | + u32 elapsed[TF_COUNT + 1], cycles; |
---|
| 1971 | + int i, j, err; |
---|
| 1972 | + u32 *cs; |
---|
| 1973 | + |
---|
| 1974 | + /* |
---|
| 1975 | + * Measure how long it takes to advance from one request in one |
---|
| 1976 | + * context to a request in another context. This allows us to |
---|
| 1977 | + * measure how long the context save/restore take, along with all |
---|
| 1978 | + * the inter-context setup we require. |
---|
| 1979 | + * |
---|
| 1980 | + * A: read CS_TIMESTAMP on GPU |
---|
| 1981 | + * switch context |
---|
| 1982 | + * B: read CS_TIMESTAMP on GPU |
---|
| 1983 | + * |
---|
| 1984 | + * Context switch latency: B - A |
---|
| 1985 | + */ |
---|
| 1986 | + |
---|
| 1987 | + err = plug(ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, 0); |
---|
| 1988 | + if (err) |
---|
| 1989 | + return err; |
---|
| 1990 | + |
---|
| 1991 | + for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { |
---|
| 1992 | + struct intel_context *arr[] = { |
---|
| 1993 | + ce, ce->engine->kernel_context |
---|
| 1994 | + }; |
---|
| 1995 | + u32 addr = offset + ARRAY_SIZE(arr) * i * sizeof(u32); |
---|
| 1996 | + |
---|
| 1997 | + for (j = 0; j < ARRAY_SIZE(arr); j++) { |
---|
| 1998 | + struct i915_request *rq; |
---|
| 1999 | + |
---|
| 2000 | + rq = i915_request_create(arr[j]); |
---|
| 2001 | + if (IS_ERR(rq)) { |
---|
| 2002 | + err = PTR_ERR(rq); |
---|
| 2003 | + goto err_fence; |
---|
| 2004 | + } |
---|
| 2005 | + |
---|
| 2006 | + if (fence) { |
---|
| 2007 | + err = i915_request_await_dma_fence(rq, |
---|
| 2008 | + &fence->fence); |
---|
| 2009 | + if (err) { |
---|
| 2010 | + i915_request_add(rq); |
---|
| 2011 | + goto err_fence; |
---|
| 2012 | + } |
---|
| 2013 | + } |
---|
| 2014 | + |
---|
| 2015 | + cs = intel_ring_begin(rq, 4); |
---|
| 2016 | + if (IS_ERR(cs)) { |
---|
| 2017 | + i915_request_add(rq); |
---|
| 2018 | + err = PTR_ERR(cs); |
---|
| 2019 | + goto err_fence; |
---|
| 2020 | + } |
---|
| 2021 | + |
---|
| 2022 | + cs = emit_timestamp_store(cs, ce, addr); |
---|
| 2023 | + addr += sizeof(u32); |
---|
| 2024 | + |
---|
| 2025 | + intel_ring_advance(rq, cs); |
---|
| 2026 | + |
---|
| 2027 | + i915_request_put(fence); |
---|
| 2028 | + fence = i915_request_get(rq); |
---|
| 2029 | + |
---|
| 2030 | + i915_request_add(rq); |
---|
| 2031 | + } |
---|
| 2032 | + } |
---|
| 2033 | + i915_request_put(fence); |
---|
| 2034 | + intel_engine_flush_submission(ce->engine); |
---|
| 2035 | + |
---|
| 2036 | + semaphore_set(sema, 1); |
---|
| 2037 | + err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); |
---|
| 2038 | + if (err) |
---|
| 2039 | + goto err; |
---|
| 2040 | + |
---|
| 2041 | + for (i = 1; i <= TF_COUNT; i++) |
---|
| 2042 | + elapsed[i - 1] = sema[2 * i + 2] - sema[2 * i + 1]; |
---|
| 2043 | + |
---|
| 2044 | + cycles = trifilter(elapsed); |
---|
| 2045 | + pr_info("%s: context switch latency %d cycles, %lluns\n", |
---|
| 2046 | + ce->engine->name, cycles >> TF_BIAS, |
---|
| 2047 | + cycles_to_ns(ce->engine, cycles)); |
---|
| 2048 | + |
---|
| 2049 | + return intel_gt_wait_for_idle(ce->engine->gt, HZ); |
---|
| 2050 | + |
---|
| 2051 | +err_fence: |
---|
| 2052 | + i915_request_put(fence); |
---|
| 2053 | + semaphore_set(sema, 1); |
---|
| 2054 | +err: |
---|
| 2055 | + intel_gt_set_wedged(ce->engine->gt); |
---|
| 2056 | + return err; |
---|
| 2057 | +} |
---|
| 2058 | + |
---|
| 2059 | +static int measure_preemption(struct intel_context *ce) |
---|
| 2060 | +{ |
---|
| 2061 | + u32 *sema = hwsp_scratch(ce); |
---|
| 2062 | + const u32 offset = hwsp_offset(ce, sema); |
---|
| 2063 | + u32 elapsed[TF_COUNT], cycles; |
---|
| 2064 | + u32 *cs; |
---|
| 2065 | + int err; |
---|
| 2066 | + int i; |
---|
| 2067 | + |
---|
| 2068 | + /* |
---|
| 2069 | + * We measure two latencies while triggering preemption. The first |
---|
| 2070 | + * latency is how long it takes for us to submit a preempting request. |
---|
| 2071 | + * The second latency is how it takes for us to return from the |
---|
| 2072 | + * preemption back to the original context. |
---|
| 2073 | + * |
---|
| 2074 | + * A: read CS_TIMESTAMP from CPU |
---|
| 2075 | + * submit preemption |
---|
| 2076 | + * B: read CS_TIMESTAMP on GPU (in preempting context) |
---|
| 2077 | + * context switch |
---|
| 2078 | + * C: read CS_TIMESTAMP on GPU (in original context) |
---|
| 2079 | + * |
---|
| 2080 | + * Preemption dispatch latency: B - A |
---|
| 2081 | + * Preemption switch latency: C - B |
---|
| 2082 | + */ |
---|
| 2083 | + |
---|
| 2084 | + if (!intel_engine_has_preemption(ce->engine)) |
---|
| 2085 | + return 0; |
---|
| 2086 | + |
---|
| 2087 | + for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { |
---|
| 2088 | + u32 addr = offset + 2 * i * sizeof(u32); |
---|
| 2089 | + struct i915_request *rq; |
---|
| 2090 | + |
---|
| 2091 | + rq = i915_request_create(ce); |
---|
| 2092 | + if (IS_ERR(rq)) { |
---|
| 2093 | + err = PTR_ERR(rq); |
---|
| 2094 | + goto err; |
---|
| 2095 | + } |
---|
| 2096 | + |
---|
| 2097 | + cs = intel_ring_begin(rq, 12); |
---|
| 2098 | + if (IS_ERR(cs)) { |
---|
| 2099 | + i915_request_add(rq); |
---|
| 2100 | + err = PTR_ERR(cs); |
---|
| 2101 | + goto err; |
---|
| 2102 | + } |
---|
| 2103 | + |
---|
| 2104 | + cs = emit_store_dw(cs, addr, -1); |
---|
| 2105 | + cs = emit_semaphore_poll_until(cs, offset, i); |
---|
| 2106 | + cs = emit_timestamp_store(cs, ce, addr + sizeof(u32)); |
---|
| 2107 | + |
---|
| 2108 | + intel_ring_advance(rq, cs); |
---|
| 2109 | + i915_request_add(rq); |
---|
| 2110 | + |
---|
| 2111 | + if (wait_for(READ_ONCE(sema[2 * i]) == -1, 500)) { |
---|
| 2112 | + err = -EIO; |
---|
| 2113 | + goto err; |
---|
| 2114 | + } |
---|
| 2115 | + |
---|
| 2116 | + rq = i915_request_create(ce->engine->kernel_context); |
---|
| 2117 | + if (IS_ERR(rq)) { |
---|
| 2118 | + err = PTR_ERR(rq); |
---|
| 2119 | + goto err; |
---|
| 2120 | + } |
---|
| 2121 | + |
---|
| 2122 | + cs = intel_ring_begin(rq, 8); |
---|
| 2123 | + if (IS_ERR(cs)) { |
---|
| 2124 | + i915_request_add(rq); |
---|
| 2125 | + err = PTR_ERR(cs); |
---|
| 2126 | + goto err; |
---|
| 2127 | + } |
---|
| 2128 | + |
---|
| 2129 | + cs = emit_timestamp_store(cs, ce, addr); |
---|
| 2130 | + cs = emit_store_dw(cs, offset, i); |
---|
| 2131 | + |
---|
| 2132 | + intel_ring_advance(rq, cs); |
---|
| 2133 | + rq->sched.attr.priority = I915_PRIORITY_BARRIER; |
---|
| 2134 | + |
---|
| 2135 | + elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); |
---|
| 2136 | + i915_request_add(rq); |
---|
| 2137 | + } |
---|
| 2138 | + |
---|
| 2139 | + if (wait_for(READ_ONCE(sema[2 * i - 2]) != -1, 500)) { |
---|
| 2140 | + err = -EIO; |
---|
| 2141 | + goto err; |
---|
| 2142 | + } |
---|
| 2143 | + |
---|
| 2144 | + for (i = 1; i <= TF_COUNT; i++) |
---|
| 2145 | + elapsed[i - 1] = sema[2 * i + 0] - elapsed[i - 1]; |
---|
| 2146 | + |
---|
| 2147 | + cycles = trifilter(elapsed); |
---|
| 2148 | + pr_info("%s: preemption dispatch latency %d cycles, %lluns\n", |
---|
| 2149 | + ce->engine->name, cycles >> TF_BIAS, |
---|
| 2150 | + cycles_to_ns(ce->engine, cycles)); |
---|
| 2151 | + |
---|
| 2152 | + for (i = 1; i <= TF_COUNT; i++) |
---|
| 2153 | + elapsed[i - 1] = sema[2 * i + 1] - sema[2 * i + 0]; |
---|
| 2154 | + |
---|
| 2155 | + cycles = trifilter(elapsed); |
---|
| 2156 | + pr_info("%s: preemption switch latency %d cycles, %lluns\n", |
---|
| 2157 | + ce->engine->name, cycles >> TF_BIAS, |
---|
| 2158 | + cycles_to_ns(ce->engine, cycles)); |
---|
| 2159 | + |
---|
| 2160 | + return intel_gt_wait_for_idle(ce->engine->gt, HZ); |
---|
| 2161 | + |
---|
| 2162 | +err: |
---|
| 2163 | + intel_gt_set_wedged(ce->engine->gt); |
---|
| 2164 | + return err; |
---|
| 2165 | +} |
---|
| 2166 | + |
---|
| 2167 | +struct signal_cb { |
---|
| 2168 | + struct dma_fence_cb base; |
---|
| 2169 | + bool seen; |
---|
| 2170 | +}; |
---|
| 2171 | + |
---|
| 2172 | +static void signal_cb(struct dma_fence *fence, struct dma_fence_cb *cb) |
---|
| 2173 | +{ |
---|
| 2174 | + struct signal_cb *s = container_of(cb, typeof(*s), base); |
---|
| 2175 | + |
---|
| 2176 | + smp_store_mb(s->seen, true); /* be safe, be strong */ |
---|
| 2177 | +} |
---|
| 2178 | + |
---|
| 2179 | +static int measure_completion(struct intel_context *ce) |
---|
| 2180 | +{ |
---|
| 2181 | + u32 *sema = hwsp_scratch(ce); |
---|
| 2182 | + const u32 offset = hwsp_offset(ce, sema); |
---|
| 2183 | + u32 elapsed[TF_COUNT], cycles; |
---|
| 2184 | + u32 *cs; |
---|
| 2185 | + int err; |
---|
| 2186 | + int i; |
---|
| 2187 | + |
---|
| 2188 | + /* |
---|
| 2189 | + * Measure how long it takes for the signal (interrupt) to be |
---|
| 2190 | + * sent from the GPU to be processed by the CPU. |
---|
| 2191 | + * |
---|
| 2192 | + * A: read CS_TIMESTAMP on GPU |
---|
| 2193 | + * signal |
---|
| 2194 | + * B: read CS_TIMESTAMP from CPU |
---|
| 2195 | + * |
---|
| 2196 | + * Completion latency: B - A |
---|
| 2197 | + */ |
---|
| 2198 | + |
---|
| 2199 | + for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { |
---|
| 2200 | + struct signal_cb cb = { .seen = false }; |
---|
| 2201 | + struct i915_request *rq; |
---|
| 2202 | + |
---|
| 2203 | + rq = i915_request_create(ce); |
---|
| 2204 | + if (IS_ERR(rq)) { |
---|
| 2205 | + err = PTR_ERR(rq); |
---|
| 2206 | + goto err; |
---|
| 2207 | + } |
---|
| 2208 | + |
---|
| 2209 | + cs = intel_ring_begin(rq, 12); |
---|
| 2210 | + if (IS_ERR(cs)) { |
---|
| 2211 | + i915_request_add(rq); |
---|
| 2212 | + err = PTR_ERR(cs); |
---|
| 2213 | + goto err; |
---|
| 2214 | + } |
---|
| 2215 | + |
---|
| 2216 | + cs = emit_store_dw(cs, offset + i * sizeof(u32), -1); |
---|
| 2217 | + cs = emit_semaphore_poll_until(cs, offset, i); |
---|
| 2218 | + cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); |
---|
| 2219 | + |
---|
| 2220 | + intel_ring_advance(rq, cs); |
---|
| 2221 | + |
---|
| 2222 | + dma_fence_add_callback(&rq->fence, &cb.base, signal_cb); |
---|
| 2223 | + |
---|
| 2224 | + local_bh_disable(); |
---|
| 2225 | + i915_request_add(rq); |
---|
| 2226 | + local_bh_enable(); |
---|
| 2227 | + |
---|
| 2228 | + if (wait_for(READ_ONCE(sema[i]) == -1, 50)) { |
---|
| 2229 | + err = -EIO; |
---|
| 2230 | + goto err; |
---|
| 2231 | + } |
---|
| 2232 | + |
---|
| 2233 | + preempt_disable(); |
---|
| 2234 | + semaphore_set(sema, i); |
---|
| 2235 | + while (!READ_ONCE(cb.seen)) |
---|
| 2236 | + cpu_relax(); |
---|
| 2237 | + |
---|
| 2238 | + elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); |
---|
| 2239 | + preempt_enable(); |
---|
| 2240 | + } |
---|
| 2241 | + |
---|
| 2242 | + err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); |
---|
| 2243 | + if (err) |
---|
| 2244 | + goto err; |
---|
| 2245 | + |
---|
| 2246 | + for (i = 0; i < ARRAY_SIZE(elapsed); i++) { |
---|
| 2247 | + GEM_BUG_ON(sema[i + 1] == -1); |
---|
| 2248 | + elapsed[i] = elapsed[i] - sema[i + 1]; |
---|
| 2249 | + } |
---|
| 2250 | + |
---|
| 2251 | + cycles = trifilter(elapsed); |
---|
| 2252 | + pr_info("%s: completion latency %d cycles, %lluns\n", |
---|
| 2253 | + ce->engine->name, cycles >> TF_BIAS, |
---|
| 2254 | + cycles_to_ns(ce->engine, cycles)); |
---|
| 2255 | + |
---|
| 2256 | + return intel_gt_wait_for_idle(ce->engine->gt, HZ); |
---|
| 2257 | + |
---|
| 2258 | +err: |
---|
| 2259 | + intel_gt_set_wedged(ce->engine->gt); |
---|
| 2260 | + return err; |
---|
| 2261 | +} |
---|
| 2262 | + |
---|
| 2263 | +static void rps_pin(struct intel_gt *gt) |
---|
| 2264 | +{ |
---|
| 2265 | + /* Pin the frequency to max */ |
---|
| 2266 | + atomic_inc(>->rps.num_waiters); |
---|
| 2267 | + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); |
---|
| 2268 | + |
---|
| 2269 | + mutex_lock(>->rps.lock); |
---|
| 2270 | + intel_rps_set(>->rps, gt->rps.max_freq); |
---|
| 2271 | + mutex_unlock(>->rps.lock); |
---|
| 2272 | +} |
---|
| 2273 | + |
---|
| 2274 | +static void rps_unpin(struct intel_gt *gt) |
---|
| 2275 | +{ |
---|
| 2276 | + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); |
---|
| 2277 | + atomic_dec(>->rps.num_waiters); |
---|
| 2278 | +} |
---|
| 2279 | + |
---|
| 2280 | +static int perf_request_latency(void *arg) |
---|
| 2281 | +{ |
---|
| 2282 | + struct drm_i915_private *i915 = arg; |
---|
| 2283 | + struct intel_engine_cs *engine; |
---|
| 2284 | + struct pm_qos_request qos; |
---|
| 2285 | + int err = 0; |
---|
| 2286 | + |
---|
| 2287 | + if (INTEL_GEN(i915) < 8) /* per-engine CS timestamp, semaphores */ |
---|
| 2288 | + return 0; |
---|
| 2289 | + |
---|
| 2290 | + cpu_latency_qos_add_request(&qos, 0); /* disable cstates */ |
---|
| 2291 | + |
---|
| 2292 | + for_each_uabi_engine(engine, i915) { |
---|
| 2293 | + struct intel_context *ce; |
---|
| 2294 | + |
---|
| 2295 | + ce = intel_context_create(engine); |
---|
| 2296 | + if (IS_ERR(ce)) { |
---|
| 2297 | + err = PTR_ERR(ce); |
---|
| 2298 | + goto out; |
---|
| 2299 | + } |
---|
| 2300 | + |
---|
| 2301 | + err = intel_context_pin(ce); |
---|
| 2302 | + if (err) { |
---|
| 2303 | + intel_context_put(ce); |
---|
| 2304 | + goto out; |
---|
| 2305 | + } |
---|
| 2306 | + |
---|
| 2307 | + st_engine_heartbeat_disable(engine); |
---|
| 2308 | + rps_pin(engine->gt); |
---|
| 2309 | + |
---|
| 2310 | + if (err == 0) |
---|
| 2311 | + err = measure_semaphore_response(ce); |
---|
| 2312 | + if (err == 0) |
---|
| 2313 | + err = measure_idle_dispatch(ce); |
---|
| 2314 | + if (err == 0) |
---|
| 2315 | + err = measure_busy_dispatch(ce); |
---|
| 2316 | + if (err == 0) |
---|
| 2317 | + err = measure_inter_request(ce); |
---|
| 2318 | + if (err == 0) |
---|
| 2319 | + err = measure_context_switch(ce); |
---|
| 2320 | + if (err == 0) |
---|
| 2321 | + err = measure_preemption(ce); |
---|
| 2322 | + if (err == 0) |
---|
| 2323 | + err = measure_completion(ce); |
---|
| 2324 | + |
---|
| 2325 | + rps_unpin(engine->gt); |
---|
| 2326 | + st_engine_heartbeat_enable(engine); |
---|
| 2327 | + |
---|
| 2328 | + intel_context_unpin(ce); |
---|
| 2329 | + intel_context_put(ce); |
---|
| 2330 | + if (err) |
---|
| 2331 | + goto out; |
---|
| 2332 | + } |
---|
| 2333 | + |
---|
| 2334 | +out: |
---|
| 2335 | + if (igt_flush_test(i915)) |
---|
| 2336 | + err = -EIO; |
---|
| 2337 | + |
---|
| 2338 | + cpu_latency_qos_remove_request(&qos); |
---|
| 2339 | + return err; |
---|
| 2340 | +} |
---|
| 2341 | + |
---|
| 2342 | +static int s_sync0(void *arg) |
---|
| 2343 | +{ |
---|
| 2344 | + struct perf_series *ps = arg; |
---|
| 2345 | + IGT_TIMEOUT(end_time); |
---|
| 2346 | + unsigned int idx = 0; |
---|
| 2347 | + int err = 0; |
---|
| 2348 | + |
---|
| 2349 | + GEM_BUG_ON(!ps->nengines); |
---|
| 2350 | + do { |
---|
| 2351 | + struct i915_request *rq; |
---|
| 2352 | + |
---|
| 2353 | + rq = i915_request_create(ps->ce[idx]); |
---|
| 2354 | + if (IS_ERR(rq)) { |
---|
| 2355 | + err = PTR_ERR(rq); |
---|
| 2356 | + break; |
---|
| 2357 | + } |
---|
| 2358 | + |
---|
| 2359 | + i915_request_get(rq); |
---|
| 2360 | + i915_request_add(rq); |
---|
| 2361 | + |
---|
| 2362 | + if (i915_request_wait(rq, 0, HZ / 5) < 0) |
---|
| 2363 | + err = -ETIME; |
---|
| 2364 | + i915_request_put(rq); |
---|
| 2365 | + if (err) |
---|
| 2366 | + break; |
---|
| 2367 | + |
---|
| 2368 | + if (++idx == ps->nengines) |
---|
| 2369 | + idx = 0; |
---|
| 2370 | + } while (!__igt_timeout(end_time, NULL)); |
---|
| 2371 | + |
---|
| 2372 | + return err; |
---|
| 2373 | +} |
---|
| 2374 | + |
---|
| 2375 | +static int s_sync1(void *arg) |
---|
| 2376 | +{ |
---|
| 2377 | + struct perf_series *ps = arg; |
---|
| 2378 | + struct i915_request *prev = NULL; |
---|
| 2379 | + IGT_TIMEOUT(end_time); |
---|
| 2380 | + unsigned int idx = 0; |
---|
| 2381 | + int err = 0; |
---|
| 2382 | + |
---|
| 2383 | + GEM_BUG_ON(!ps->nengines); |
---|
| 2384 | + do { |
---|
| 2385 | + struct i915_request *rq; |
---|
| 2386 | + |
---|
| 2387 | + rq = i915_request_create(ps->ce[idx]); |
---|
| 2388 | + if (IS_ERR(rq)) { |
---|
| 2389 | + err = PTR_ERR(rq); |
---|
| 2390 | + break; |
---|
| 2391 | + } |
---|
| 2392 | + |
---|
| 2393 | + i915_request_get(rq); |
---|
| 2394 | + i915_request_add(rq); |
---|
| 2395 | + |
---|
| 2396 | + if (prev && i915_request_wait(prev, 0, HZ / 5) < 0) |
---|
| 2397 | + err = -ETIME; |
---|
| 2398 | + i915_request_put(prev); |
---|
| 2399 | + prev = rq; |
---|
| 2400 | + if (err) |
---|
| 2401 | + break; |
---|
| 2402 | + |
---|
| 2403 | + if (++idx == ps->nengines) |
---|
| 2404 | + idx = 0; |
---|
| 2405 | + } while (!__igt_timeout(end_time, NULL)); |
---|
| 2406 | + i915_request_put(prev); |
---|
| 2407 | + |
---|
| 2408 | + return err; |
---|
| 2409 | +} |
---|
| 2410 | + |
---|
| 2411 | +static int s_many(void *arg) |
---|
| 2412 | +{ |
---|
| 2413 | + struct perf_series *ps = arg; |
---|
| 2414 | + IGT_TIMEOUT(end_time); |
---|
| 2415 | + unsigned int idx = 0; |
---|
| 2416 | + |
---|
| 2417 | + GEM_BUG_ON(!ps->nengines); |
---|
| 2418 | + do { |
---|
| 2419 | + struct i915_request *rq; |
---|
| 2420 | + |
---|
| 2421 | + rq = i915_request_create(ps->ce[idx]); |
---|
| 2422 | + if (IS_ERR(rq)) |
---|
| 2423 | + return PTR_ERR(rq); |
---|
| 2424 | + |
---|
| 2425 | + i915_request_add(rq); |
---|
| 2426 | + |
---|
| 2427 | + if (++idx == ps->nengines) |
---|
| 2428 | + idx = 0; |
---|
| 2429 | + } while (!__igt_timeout(end_time, NULL)); |
---|
| 2430 | + |
---|
| 2431 | + return 0; |
---|
| 2432 | +} |
---|
| 2433 | + |
---|
| 2434 | +static int perf_series_engines(void *arg) |
---|
| 2435 | +{ |
---|
| 2436 | + struct drm_i915_private *i915 = arg; |
---|
| 2437 | + static int (* const func[])(void *arg) = { |
---|
| 2438 | + s_sync0, |
---|
| 2439 | + s_sync1, |
---|
| 2440 | + s_many, |
---|
| 2441 | + NULL, |
---|
| 2442 | + }; |
---|
| 2443 | + const unsigned int nengines = num_uabi_engines(i915); |
---|
| 2444 | + struct intel_engine_cs *engine; |
---|
| 2445 | + int (* const *fn)(void *arg); |
---|
| 2446 | + struct pm_qos_request qos; |
---|
| 2447 | + struct perf_stats *stats; |
---|
| 2448 | + struct perf_series *ps; |
---|
| 2449 | + unsigned int idx; |
---|
| 2450 | + int err = 0; |
---|
| 2451 | + |
---|
| 2452 | + stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL); |
---|
| 2453 | + if (!stats) |
---|
| 2454 | + return -ENOMEM; |
---|
| 2455 | + |
---|
| 2456 | + ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL); |
---|
| 2457 | + if (!ps) { |
---|
| 2458 | + kfree(stats); |
---|
| 2459 | + return -ENOMEM; |
---|
| 2460 | + } |
---|
| 2461 | + |
---|
| 2462 | + cpu_latency_qos_add_request(&qos, 0); /* disable cstates */ |
---|
| 2463 | + |
---|
| 2464 | + ps->i915 = i915; |
---|
| 2465 | + ps->nengines = nengines; |
---|
| 2466 | + |
---|
| 2467 | + idx = 0; |
---|
| 2468 | + for_each_uabi_engine(engine, i915) { |
---|
| 2469 | + struct intel_context *ce; |
---|
| 2470 | + |
---|
| 2471 | + ce = intel_context_create(engine); |
---|
| 2472 | + if (IS_ERR(ce)) { |
---|
| 2473 | + err = PTR_ERR(ce); |
---|
| 2474 | + goto out; |
---|
| 2475 | + } |
---|
| 2476 | + |
---|
| 2477 | + err = intel_context_pin(ce); |
---|
| 2478 | + if (err) { |
---|
| 2479 | + intel_context_put(ce); |
---|
| 2480 | + goto out; |
---|
| 2481 | + } |
---|
| 2482 | + |
---|
| 2483 | + ps->ce[idx++] = ce; |
---|
| 2484 | + } |
---|
| 2485 | + GEM_BUG_ON(idx != ps->nengines); |
---|
| 2486 | + |
---|
| 2487 | + for (fn = func; *fn && !err; fn++) { |
---|
| 2488 | + char name[KSYM_NAME_LEN]; |
---|
| 2489 | + struct igt_live_test t; |
---|
| 2490 | + |
---|
| 2491 | + snprintf(name, sizeof(name), "%ps", *fn); |
---|
| 2492 | + err = igt_live_test_begin(&t, i915, __func__, name); |
---|
| 2493 | + if (err) |
---|
| 2494 | + break; |
---|
| 2495 | + |
---|
| 2496 | + for (idx = 0; idx < nengines; idx++) { |
---|
| 2497 | + struct perf_stats *p = |
---|
| 2498 | + memset(&stats[idx], 0, sizeof(stats[idx])); |
---|
| 2499 | + struct intel_context *ce = ps->ce[idx]; |
---|
| 2500 | + |
---|
| 2501 | + p->engine = ps->ce[idx]->engine; |
---|
| 2502 | + intel_engine_pm_get(p->engine); |
---|
| 2503 | + |
---|
| 2504 | + if (intel_engine_supports_stats(p->engine)) |
---|
| 2505 | + p->busy = intel_engine_get_busy_time(p->engine, |
---|
| 2506 | + &p->time) + 1; |
---|
| 2507 | + else |
---|
| 2508 | + p->time = ktime_get(); |
---|
| 2509 | + p->runtime = -intel_context_get_total_runtime_ns(ce); |
---|
| 2510 | + } |
---|
| 2511 | + |
---|
| 2512 | + err = (*fn)(ps); |
---|
| 2513 | + if (igt_live_test_end(&t)) |
---|
| 2514 | + err = -EIO; |
---|
| 2515 | + |
---|
| 2516 | + for (idx = 0; idx < nengines; idx++) { |
---|
| 2517 | + struct perf_stats *p = &stats[idx]; |
---|
| 2518 | + struct intel_context *ce = ps->ce[idx]; |
---|
| 2519 | + int integer, decimal; |
---|
| 2520 | + u64 busy, dt, now; |
---|
| 2521 | + |
---|
| 2522 | + if (p->busy) |
---|
| 2523 | + p->busy = ktime_sub(intel_engine_get_busy_time(p->engine, |
---|
| 2524 | + &now), |
---|
| 2525 | + p->busy - 1); |
---|
| 2526 | + else |
---|
| 2527 | + now = ktime_get(); |
---|
| 2528 | + p->time = ktime_sub(now, p->time); |
---|
| 2529 | + |
---|
| 2530 | + err = switch_to_kernel_sync(ce, err); |
---|
| 2531 | + p->runtime += intel_context_get_total_runtime_ns(ce); |
---|
| 2532 | + intel_engine_pm_put(p->engine); |
---|
| 2533 | + |
---|
| 2534 | + busy = 100 * ktime_to_ns(p->busy); |
---|
| 2535 | + dt = ktime_to_ns(p->time); |
---|
| 2536 | + if (dt) { |
---|
| 2537 | + integer = div64_u64(busy, dt); |
---|
| 2538 | + busy -= integer * dt; |
---|
| 2539 | + decimal = div64_u64(100 * busy, dt); |
---|
| 2540 | + } else { |
---|
| 2541 | + integer = 0; |
---|
| 2542 | + decimal = 0; |
---|
| 2543 | + } |
---|
| 2544 | + |
---|
| 2545 | + pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n", |
---|
| 2546 | + name, p->engine->name, ce->timeline->seqno, |
---|
| 2547 | + integer, decimal, |
---|
| 2548 | + div_u64(p->runtime, 1000 * 1000), |
---|
| 2549 | + div_u64(ktime_to_ns(p->time), 1000 * 1000)); |
---|
| 2550 | + } |
---|
| 2551 | + } |
---|
| 2552 | + |
---|
| 2553 | +out: |
---|
| 2554 | + for (idx = 0; idx < nengines; idx++) { |
---|
| 2555 | + if (IS_ERR_OR_NULL(ps->ce[idx])) |
---|
| 2556 | + break; |
---|
| 2557 | + |
---|
| 2558 | + intel_context_unpin(ps->ce[idx]); |
---|
| 2559 | + intel_context_put(ps->ce[idx]); |
---|
| 2560 | + } |
---|
| 2561 | + kfree(ps); |
---|
| 2562 | + |
---|
| 2563 | + cpu_latency_qos_remove_request(&qos); |
---|
| 2564 | + kfree(stats); |
---|
| 2565 | + return err; |
---|
| 2566 | +} |
---|
| 2567 | + |
---|
| 2568 | +static int p_sync0(void *arg) |
---|
| 2569 | +{ |
---|
| 2570 | + struct perf_stats *p = arg; |
---|
| 2571 | + struct intel_engine_cs *engine = p->engine; |
---|
| 2572 | + struct intel_context *ce; |
---|
| 2573 | + IGT_TIMEOUT(end_time); |
---|
| 2574 | + unsigned long count; |
---|
| 2575 | + bool busy; |
---|
| 2576 | + int err = 0; |
---|
| 2577 | + |
---|
| 2578 | + ce = intel_context_create(engine); |
---|
| 2579 | + if (IS_ERR(ce)) |
---|
| 2580 | + return PTR_ERR(ce); |
---|
| 2581 | + |
---|
| 2582 | + err = intel_context_pin(ce); |
---|
| 2583 | + if (err) { |
---|
| 2584 | + intel_context_put(ce); |
---|
| 2585 | + return err; |
---|
| 2586 | + } |
---|
| 2587 | + |
---|
| 2588 | + if (intel_engine_supports_stats(engine)) { |
---|
| 2589 | + p->busy = intel_engine_get_busy_time(engine, &p->time); |
---|
| 2590 | + busy = true; |
---|
| 2591 | + } else { |
---|
| 2592 | + p->time = ktime_get(); |
---|
| 2593 | + busy = false; |
---|
| 2594 | + } |
---|
| 2595 | + |
---|
| 2596 | + count = 0; |
---|
| 2597 | + do { |
---|
| 2598 | + struct i915_request *rq; |
---|
| 2599 | + |
---|
| 2600 | + rq = i915_request_create(ce); |
---|
| 2601 | + if (IS_ERR(rq)) { |
---|
| 2602 | + err = PTR_ERR(rq); |
---|
| 2603 | + break; |
---|
| 2604 | + } |
---|
| 2605 | + |
---|
| 2606 | + i915_request_get(rq); |
---|
| 2607 | + i915_request_add(rq); |
---|
| 2608 | + |
---|
| 2609 | + err = 0; |
---|
| 2610 | + if (i915_request_wait(rq, 0, HZ / 5) < 0) |
---|
| 2611 | + err = -ETIME; |
---|
| 2612 | + i915_request_put(rq); |
---|
| 2613 | + if (err) |
---|
| 2614 | + break; |
---|
| 2615 | + |
---|
| 2616 | + count++; |
---|
| 2617 | + } while (!__igt_timeout(end_time, NULL)); |
---|
| 2618 | + |
---|
| 2619 | + if (busy) { |
---|
| 2620 | + ktime_t now; |
---|
| 2621 | + |
---|
| 2622 | + p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), |
---|
| 2623 | + p->busy); |
---|
| 2624 | + p->time = ktime_sub(now, p->time); |
---|
| 2625 | + } else { |
---|
| 2626 | + p->time = ktime_sub(ktime_get(), p->time); |
---|
| 2627 | + } |
---|
| 2628 | + |
---|
| 2629 | + err = switch_to_kernel_sync(ce, err); |
---|
| 2630 | + p->runtime = intel_context_get_total_runtime_ns(ce); |
---|
| 2631 | + p->count = count; |
---|
| 2632 | + |
---|
| 2633 | + intel_context_unpin(ce); |
---|
| 2634 | + intel_context_put(ce); |
---|
| 2635 | + return err; |
---|
| 2636 | +} |
---|
| 2637 | + |
---|
| 2638 | +static int p_sync1(void *arg) |
---|
| 2639 | +{ |
---|
| 2640 | + struct perf_stats *p = arg; |
---|
| 2641 | + struct intel_engine_cs *engine = p->engine; |
---|
| 2642 | + struct i915_request *prev = NULL; |
---|
| 2643 | + struct intel_context *ce; |
---|
| 2644 | + IGT_TIMEOUT(end_time); |
---|
| 2645 | + unsigned long count; |
---|
| 2646 | + bool busy; |
---|
| 2647 | + int err = 0; |
---|
| 2648 | + |
---|
| 2649 | + ce = intel_context_create(engine); |
---|
| 2650 | + if (IS_ERR(ce)) |
---|
| 2651 | + return PTR_ERR(ce); |
---|
| 2652 | + |
---|
| 2653 | + err = intel_context_pin(ce); |
---|
| 2654 | + if (err) { |
---|
| 2655 | + intel_context_put(ce); |
---|
| 2656 | + return err; |
---|
| 2657 | + } |
---|
| 2658 | + |
---|
| 2659 | + if (intel_engine_supports_stats(engine)) { |
---|
| 2660 | + p->busy = intel_engine_get_busy_time(engine, &p->time); |
---|
| 2661 | + busy = true; |
---|
| 2662 | + } else { |
---|
| 2663 | + p->time = ktime_get(); |
---|
| 2664 | + busy = false; |
---|
| 2665 | + } |
---|
| 2666 | + |
---|
| 2667 | + count = 0; |
---|
| 2668 | + do { |
---|
| 2669 | + struct i915_request *rq; |
---|
| 2670 | + |
---|
| 2671 | + rq = i915_request_create(ce); |
---|
| 2672 | + if (IS_ERR(rq)) { |
---|
| 2673 | + err = PTR_ERR(rq); |
---|
| 2674 | + break; |
---|
| 2675 | + } |
---|
| 2676 | + |
---|
| 2677 | + i915_request_get(rq); |
---|
| 2678 | + i915_request_add(rq); |
---|
| 2679 | + |
---|
| 2680 | + err = 0; |
---|
| 2681 | + if (prev && i915_request_wait(prev, 0, HZ / 5) < 0) |
---|
| 2682 | + err = -ETIME; |
---|
| 2683 | + i915_request_put(prev); |
---|
| 2684 | + prev = rq; |
---|
| 2685 | + if (err) |
---|
| 2686 | + break; |
---|
| 2687 | + |
---|
| 2688 | + count++; |
---|
| 2689 | + } while (!__igt_timeout(end_time, NULL)); |
---|
| 2690 | + i915_request_put(prev); |
---|
| 2691 | + |
---|
| 2692 | + if (busy) { |
---|
| 2693 | + ktime_t now; |
---|
| 2694 | + |
---|
| 2695 | + p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), |
---|
| 2696 | + p->busy); |
---|
| 2697 | + p->time = ktime_sub(now, p->time); |
---|
| 2698 | + } else { |
---|
| 2699 | + p->time = ktime_sub(ktime_get(), p->time); |
---|
| 2700 | + } |
---|
| 2701 | + |
---|
| 2702 | + err = switch_to_kernel_sync(ce, err); |
---|
| 2703 | + p->runtime = intel_context_get_total_runtime_ns(ce); |
---|
| 2704 | + p->count = count; |
---|
| 2705 | + |
---|
| 2706 | + intel_context_unpin(ce); |
---|
| 2707 | + intel_context_put(ce); |
---|
| 2708 | + return err; |
---|
| 2709 | +} |
---|
| 2710 | + |
---|
| 2711 | +static int p_many(void *arg) |
---|
| 2712 | +{ |
---|
| 2713 | + struct perf_stats *p = arg; |
---|
| 2714 | + struct intel_engine_cs *engine = p->engine; |
---|
| 2715 | + struct intel_context *ce; |
---|
| 2716 | + IGT_TIMEOUT(end_time); |
---|
| 2717 | + unsigned long count; |
---|
| 2718 | + int err = 0; |
---|
| 2719 | + bool busy; |
---|
| 2720 | + |
---|
| 2721 | + ce = intel_context_create(engine); |
---|
| 2722 | + if (IS_ERR(ce)) |
---|
| 2723 | + return PTR_ERR(ce); |
---|
| 2724 | + |
---|
| 2725 | + err = intel_context_pin(ce); |
---|
| 2726 | + if (err) { |
---|
| 2727 | + intel_context_put(ce); |
---|
| 2728 | + return err; |
---|
| 2729 | + } |
---|
| 2730 | + |
---|
| 2731 | + if (intel_engine_supports_stats(engine)) { |
---|
| 2732 | + p->busy = intel_engine_get_busy_time(engine, &p->time); |
---|
| 2733 | + busy = true; |
---|
| 2734 | + } else { |
---|
| 2735 | + p->time = ktime_get(); |
---|
| 2736 | + busy = false; |
---|
| 2737 | + } |
---|
| 2738 | + |
---|
| 2739 | + count = 0; |
---|
| 2740 | + do { |
---|
| 2741 | + struct i915_request *rq; |
---|
| 2742 | + |
---|
| 2743 | + rq = i915_request_create(ce); |
---|
| 2744 | + if (IS_ERR(rq)) { |
---|
| 2745 | + err = PTR_ERR(rq); |
---|
| 2746 | + break; |
---|
| 2747 | + } |
---|
| 2748 | + |
---|
| 2749 | + i915_request_add(rq); |
---|
| 2750 | + count++; |
---|
| 2751 | + } while (!__igt_timeout(end_time, NULL)); |
---|
| 2752 | + |
---|
| 2753 | + if (busy) { |
---|
| 2754 | + ktime_t now; |
---|
| 2755 | + |
---|
| 2756 | + p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), |
---|
| 2757 | + p->busy); |
---|
| 2758 | + p->time = ktime_sub(now, p->time); |
---|
| 2759 | + } else { |
---|
| 2760 | + p->time = ktime_sub(ktime_get(), p->time); |
---|
| 2761 | + } |
---|
| 2762 | + |
---|
| 2763 | + err = switch_to_kernel_sync(ce, err); |
---|
| 2764 | + p->runtime = intel_context_get_total_runtime_ns(ce); |
---|
| 2765 | + p->count = count; |
---|
| 2766 | + |
---|
| 2767 | + intel_context_unpin(ce); |
---|
| 2768 | + intel_context_put(ce); |
---|
| 2769 | + return err; |
---|
| 2770 | +} |
---|
| 2771 | + |
---|
| 2772 | +static int perf_parallel_engines(void *arg) |
---|
| 2773 | +{ |
---|
| 2774 | + struct drm_i915_private *i915 = arg; |
---|
| 2775 | + static int (* const func[])(void *arg) = { |
---|
| 2776 | + p_sync0, |
---|
| 2777 | + p_sync1, |
---|
| 2778 | + p_many, |
---|
| 2779 | + NULL, |
---|
| 2780 | + }; |
---|
| 2781 | + const unsigned int nengines = num_uabi_engines(i915); |
---|
| 2782 | + struct intel_engine_cs *engine; |
---|
| 2783 | + int (* const *fn)(void *arg); |
---|
| 2784 | + struct pm_qos_request qos; |
---|
| 2785 | + struct { |
---|
| 2786 | + struct perf_stats p; |
---|
| 2787 | + struct task_struct *tsk; |
---|
| 2788 | + } *engines; |
---|
| 2789 | + int err = 0; |
---|
| 2790 | + |
---|
| 2791 | + engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL); |
---|
| 2792 | + if (!engines) |
---|
| 2793 | + return -ENOMEM; |
---|
| 2794 | + |
---|
| 2795 | + cpu_latency_qos_add_request(&qos, 0); |
---|
| 2796 | + |
---|
| 2797 | + for (fn = func; *fn; fn++) { |
---|
| 2798 | + char name[KSYM_NAME_LEN]; |
---|
| 2799 | + struct igt_live_test t; |
---|
| 2800 | + unsigned int idx; |
---|
| 2801 | + |
---|
| 2802 | + snprintf(name, sizeof(name), "%ps", *fn); |
---|
| 2803 | + err = igt_live_test_begin(&t, i915, __func__, name); |
---|
| 2804 | + if (err) |
---|
| 2805 | + break; |
---|
| 2806 | + |
---|
| 2807 | + atomic_set(&i915->selftest.counter, nengines); |
---|
| 2808 | + |
---|
| 2809 | + idx = 0; |
---|
| 2810 | + for_each_uabi_engine(engine, i915) { |
---|
| 2811 | + intel_engine_pm_get(engine); |
---|
| 2812 | + |
---|
| 2813 | + memset(&engines[idx].p, 0, sizeof(engines[idx].p)); |
---|
| 2814 | + engines[idx].p.engine = engine; |
---|
| 2815 | + |
---|
| 2816 | + engines[idx].tsk = kthread_run(*fn, &engines[idx].p, |
---|
| 2817 | + "igt:%s", engine->name); |
---|
| 2818 | + if (IS_ERR(engines[idx].tsk)) { |
---|
| 2819 | + err = PTR_ERR(engines[idx].tsk); |
---|
| 2820 | + intel_engine_pm_put(engine); |
---|
| 2821 | + break; |
---|
| 2822 | + } |
---|
| 2823 | + get_task_struct(engines[idx++].tsk); |
---|
| 2824 | + } |
---|
| 2825 | + |
---|
| 2826 | + yield(); /* start all threads before we kthread_stop() */ |
---|
| 2827 | + |
---|
| 2828 | + idx = 0; |
---|
| 2829 | + for_each_uabi_engine(engine, i915) { |
---|
| 2830 | + int status; |
---|
| 2831 | + |
---|
| 2832 | + if (IS_ERR(engines[idx].tsk)) |
---|
| 2833 | + break; |
---|
| 2834 | + |
---|
| 2835 | + status = kthread_stop(engines[idx].tsk); |
---|
| 2836 | + if (status && !err) |
---|
| 2837 | + err = status; |
---|
| 2838 | + |
---|
| 2839 | + intel_engine_pm_put(engine); |
---|
| 2840 | + put_task_struct(engines[idx++].tsk); |
---|
| 2841 | + } |
---|
| 2842 | + |
---|
| 2843 | + if (igt_live_test_end(&t)) |
---|
| 2844 | + err = -EIO; |
---|
| 2845 | + if (err) |
---|
| 2846 | + break; |
---|
| 2847 | + |
---|
| 2848 | + idx = 0; |
---|
| 2849 | + for_each_uabi_engine(engine, i915) { |
---|
| 2850 | + struct perf_stats *p = &engines[idx].p; |
---|
| 2851 | + u64 busy = 100 * ktime_to_ns(p->busy); |
---|
| 2852 | + u64 dt = ktime_to_ns(p->time); |
---|
| 2853 | + int integer, decimal; |
---|
| 2854 | + |
---|
| 2855 | + if (dt) { |
---|
| 2856 | + integer = div64_u64(busy, dt); |
---|
| 2857 | + busy -= integer * dt; |
---|
| 2858 | + decimal = div64_u64(100 * busy, dt); |
---|
| 2859 | + } else { |
---|
| 2860 | + integer = 0; |
---|
| 2861 | + decimal = 0; |
---|
| 2862 | + } |
---|
| 2863 | + |
---|
| 2864 | + GEM_BUG_ON(engine != p->engine); |
---|
| 2865 | + pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n", |
---|
| 2866 | + name, engine->name, p->count, integer, decimal, |
---|
| 2867 | + div_u64(p->runtime, 1000 * 1000), |
---|
| 2868 | + div_u64(ktime_to_ns(p->time), 1000 * 1000)); |
---|
| 2869 | + idx++; |
---|
| 2870 | + } |
---|
| 2871 | + } |
---|
| 2872 | + |
---|
| 2873 | + cpu_latency_qos_remove_request(&qos); |
---|
| 2874 | + kfree(engines); |
---|
| 2875 | + return err; |
---|
| 2876 | +} |
---|
| 2877 | + |
---|
| 2878 | +int i915_request_perf_selftests(struct drm_i915_private *i915) |
---|
| 2879 | +{ |
---|
| 2880 | + static const struct i915_subtest tests[] = { |
---|
| 2881 | + SUBTEST(perf_request_latency), |
---|
| 2882 | + SUBTEST(perf_series_engines), |
---|
| 2883 | + SUBTEST(perf_parallel_engines), |
---|
| 2884 | + }; |
---|
| 2885 | + |
---|
| 2886 | + if (intel_gt_is_wedged(&i915->gt)) |
---|
870 | 2887 | return 0; |
---|
871 | 2888 | |
---|
872 | 2889 | return i915_subtests(tests, i915); |
---|