.. | .. |
---|
195 | 195 | #include <linux/sizes.h> |
---|
196 | 196 | #include <linux/uuid.h> |
---|
197 | 197 | |
---|
| 198 | +#include "gem/i915_gem_context.h" |
---|
| 199 | +#include "gt/intel_engine_pm.h" |
---|
| 200 | +#include "gt/intel_engine_user.h" |
---|
| 201 | +#include "gt/intel_gt.h" |
---|
| 202 | +#include "gt/intel_lrc_reg.h" |
---|
| 203 | +#include "gt/intel_ring.h" |
---|
| 204 | + |
---|
198 | 205 | #include "i915_drv.h" |
---|
199 | | -#include "i915_oa_hsw.h" |
---|
200 | | -#include "i915_oa_bdw.h" |
---|
201 | | -#include "i915_oa_chv.h" |
---|
202 | | -#include "i915_oa_sklgt2.h" |
---|
203 | | -#include "i915_oa_sklgt3.h" |
---|
204 | | -#include "i915_oa_sklgt4.h" |
---|
205 | | -#include "i915_oa_bxt.h" |
---|
206 | | -#include "i915_oa_kblgt2.h" |
---|
207 | | -#include "i915_oa_kblgt3.h" |
---|
208 | | -#include "i915_oa_glk.h" |
---|
209 | | -#include "i915_oa_cflgt2.h" |
---|
210 | | -#include "i915_oa_cflgt3.h" |
---|
211 | | -#include "i915_oa_cnl.h" |
---|
212 | | -#include "i915_oa_icl.h" |
---|
| 206 | +#include "i915_perf.h" |
---|
213 | 207 | |
---|
214 | 208 | /* HW requires this to be a power of two, between 128k and 16M, though driver |
---|
215 | 209 | * is currently generally designed assuming the largest 16M size is used such |
---|
.. | .. |
---|
229 | 223 | * |
---|
230 | 224 | * Although this can be observed explicitly while copying reports to userspace |
---|
231 | 225 | * by checking for a zeroed report-id field in tail reports, we want to account |
---|
232 | | - * for this earlier, as part of the oa_buffer_check to avoid lots of redundant |
---|
233 | | - * read() attempts. |
---|
| 226 | + * for this earlier, as part of the oa_buffer_check_unlocked to avoid lots of |
---|
| 227 | + * redundant read() attempts. |
---|
234 | 228 | * |
---|
235 | | - * In effect we define a tail pointer for reading that lags the real tail |
---|
236 | | - * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough |
---|
237 | | - * time for the corresponding reports to become visible to the CPU. |
---|
238 | | - * |
---|
239 | | - * To manage this we actually track two tail pointers: |
---|
240 | | - * 1) An 'aging' tail with an associated timestamp that is tracked until we |
---|
241 | | - * can trust the corresponding data is visible to the CPU; at which point |
---|
242 | | - * it is considered 'aged'. |
---|
243 | | - * 2) An 'aged' tail that can be used for read()ing. |
---|
244 | | - * |
---|
245 | | - * The two separate pointers let us decouple read()s from tail pointer aging. |
---|
246 | | - * |
---|
247 | | - * The tail pointers are checked and updated at a limited rate within a hrtimer |
---|
248 | | - * callback (the same callback that is used for delivering EPOLLIN events) |
---|
249 | | - * |
---|
250 | | - * Initially the tails are marked invalid with %INVALID_TAIL_PTR which |
---|
251 | | - * indicates that an updated tail pointer is needed. |
---|
| 229 | + * We workaround this issue in oa_buffer_check_unlocked() by reading the reports |
---|
| 230 | + * in the OA buffer, starting from the tail reported by the HW until we find a |
---|
| 231 | + * report with its first 2 dwords not 0 meaning its previous report is |
---|
| 232 | + * completely in memory and ready to be read. Those dwords are also set to 0 |
---|
| 233 | + * once read and the whole buffer is cleared upon OA buffer initialization. The |
---|
| 234 | + * first dword is the reason for this report while the second is the timestamp, |
---|
| 235 | + * making the chances of having those 2 fields at 0 fairly unlikely. A more |
---|
| 236 | + * detailed explanation is available in oa_buffer_check_unlocked(). |
---|
252 | 237 | * |
---|
253 | 238 | * Most of the implementation details for this workaround are in |
---|
254 | 239 | * oa_buffer_check_unlocked() and _append_oa_reports() |
---|
.. | .. |
---|
263 | 248 | #define OA_TAIL_MARGIN_NSEC 100000ULL |
---|
264 | 249 | #define INVALID_TAIL_PTR 0xffffffff |
---|
265 | 250 | |
---|
266 | | -/* frequency for checking whether the OA unit has written new reports to the |
---|
267 | | - * circular OA buffer... |
---|
| 251 | +/* The default frequency for checking whether the OA unit has written new |
---|
| 252 | + * reports to the circular OA buffer... |
---|
268 | 253 | */ |
---|
269 | | -#define POLL_FREQUENCY 200 |
---|
270 | | -#define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY) |
---|
| 254 | +#define DEFAULT_POLL_FREQUENCY_HZ 200 |
---|
| 255 | +#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) |
---|
271 | 256 | |
---|
272 | 257 | /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */ |
---|
273 | | -static int zero; |
---|
274 | | -static int one = 1; |
---|
275 | 258 | static u32 i915_perf_stream_paranoid = true; |
---|
276 | 259 | |
---|
277 | 260 | /* The maximum exponent the hardware accepts is 63 (essentially it selects one |
---|
.. | .. |
---|
288 | 271 | |
---|
289 | 272 | /* On Gen8+ automatically triggered OA reports include a 'reason' field... */ |
---|
290 | 273 | #define OAREPORT_REASON_MASK 0x3f |
---|
| 274 | +#define OAREPORT_REASON_MASK_EXTENDED 0x7f |
---|
291 | 275 | #define OAREPORT_REASON_SHIFT 19 |
---|
292 | 276 | #define OAREPORT_REASON_TIMER (1<<0) |
---|
293 | 277 | #define OAREPORT_REASON_CTX_SWITCH (1<<3) |
---|
.. | .. |
---|
333 | 317 | [I915_OA_FORMAT_C4_B8] = { 7, 64 }, |
---|
334 | 318 | }; |
---|
335 | 319 | |
---|
| 320 | +static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = { |
---|
| 321 | + [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, |
---|
| 322 | +}; |
---|
| 323 | + |
---|
336 | 324 | #define SAMPLE_OA_REPORT (1<<0) |
---|
337 | 325 | |
---|
338 | 326 | /** |
---|
339 | 327 | * struct perf_open_properties - for validated properties given to open a stream |
---|
340 | 328 | * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags |
---|
341 | 329 | * @single_context: Whether a single or all gpu contexts should be monitored |
---|
| 330 | + * @hold_preemption: Whether the preemption is disabled for the filtered |
---|
| 331 | + * context |
---|
342 | 332 | * @ctx_handle: A gem ctx handle for use with @single_context |
---|
343 | 333 | * @metrics_set: An ID for an OA unit metric set advertised via sysfs |
---|
344 | 334 | * @oa_format: An OA unit HW report format |
---|
345 | 335 | * @oa_periodic: Whether to enable periodic OA unit sampling |
---|
346 | 336 | * @oa_period_exponent: The OA unit sampling period is derived from this |
---|
| 337 | + * @engine: The engine (typically rcs0) being monitored by the OA unit |
---|
| 338 | + * @has_sseu: Whether @sseu was specified by userspace |
---|
| 339 | + * @sseu: internal SSEU configuration computed either from the userspace |
---|
| 340 | + * specified configuration in the opening parameters or a default value |
---|
| 341 | + * (see get_default_sseu_config()) |
---|
| 342 | + * @poll_oa_period: The period in nanoseconds at which the CPU will check for OA |
---|
| 343 | + * data availability |
---|
347 | 344 | * |
---|
348 | 345 | * As read_properties_unlocked() enumerates and validates the properties given |
---|
349 | 346 | * to open a stream of metrics the configuration is built up in the structure |
---|
.. | .. |
---|
353 | 350 | u32 sample_flags; |
---|
354 | 351 | |
---|
355 | 352 | u64 single_context:1; |
---|
| 353 | + u64 hold_preemption:1; |
---|
356 | 354 | u64 ctx_handle; |
---|
357 | 355 | |
---|
358 | 356 | /* OA sampling state */ |
---|
.. | .. |
---|
360 | 358 | int oa_format; |
---|
361 | 359 | bool oa_periodic; |
---|
362 | 360 | int oa_period_exponent; |
---|
| 361 | + |
---|
| 362 | + struct intel_engine_cs *engine; |
---|
| 363 | + |
---|
| 364 | + bool has_sseu; |
---|
| 365 | + struct intel_sseu sseu; |
---|
| 366 | + |
---|
| 367 | + u64 poll_oa_period; |
---|
363 | 368 | }; |
---|
364 | 369 | |
---|
365 | | -static void free_oa_config(struct drm_i915_private *dev_priv, |
---|
366 | | - struct i915_oa_config *oa_config) |
---|
| 370 | +struct i915_oa_config_bo { |
---|
| 371 | + struct llist_node node; |
---|
| 372 | + |
---|
| 373 | + struct i915_oa_config *oa_config; |
---|
| 374 | + struct i915_vma *vma; |
---|
| 375 | +}; |
---|
| 376 | + |
---|
| 377 | +static struct ctl_table_header *sysctl_header; |
---|
| 378 | + |
---|
| 379 | +static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); |
---|
| 380 | + |
---|
| 381 | +void i915_oa_config_release(struct kref *ref) |
---|
367 | 382 | { |
---|
368 | | - if (!PTR_ERR(oa_config->flex_regs)) |
---|
369 | | - kfree(oa_config->flex_regs); |
---|
370 | | - if (!PTR_ERR(oa_config->b_counter_regs)) |
---|
371 | | - kfree(oa_config->b_counter_regs); |
---|
372 | | - if (!PTR_ERR(oa_config->mux_regs)) |
---|
373 | | - kfree(oa_config->mux_regs); |
---|
374 | | - kfree(oa_config); |
---|
| 383 | + struct i915_oa_config *oa_config = |
---|
| 384 | + container_of(ref, typeof(*oa_config), ref); |
---|
| 385 | + |
---|
| 386 | + kfree(oa_config->flex_regs); |
---|
| 387 | + kfree(oa_config->b_counter_regs); |
---|
| 388 | + kfree(oa_config->mux_regs); |
---|
| 389 | + |
---|
| 390 | + kfree_rcu(oa_config, rcu); |
---|
375 | 391 | } |
---|
376 | 392 | |
---|
377 | | -static void put_oa_config(struct drm_i915_private *dev_priv, |
---|
378 | | - struct i915_oa_config *oa_config) |
---|
| 393 | +struct i915_oa_config * |
---|
| 394 | +i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set) |
---|
379 | 395 | { |
---|
380 | | - if (!atomic_dec_and_test(&oa_config->ref_count)) |
---|
381 | | - return; |
---|
| 396 | + struct i915_oa_config *oa_config; |
---|
382 | 397 | |
---|
383 | | - free_oa_config(dev_priv, oa_config); |
---|
| 398 | + rcu_read_lock(); |
---|
| 399 | + oa_config = idr_find(&perf->metrics_idr, metrics_set); |
---|
| 400 | + if (oa_config) |
---|
| 401 | + oa_config = i915_oa_config_get(oa_config); |
---|
| 402 | + rcu_read_unlock(); |
---|
| 403 | + |
---|
| 404 | + return oa_config; |
---|
384 | 405 | } |
---|
385 | 406 | |
---|
386 | | -static int get_oa_config(struct drm_i915_private *dev_priv, |
---|
387 | | - int metrics_set, |
---|
388 | | - struct i915_oa_config **out_config) |
---|
| 407 | +static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) |
---|
389 | 408 | { |
---|
390 | | - int ret; |
---|
391 | | - |
---|
392 | | - if (metrics_set == 1) { |
---|
393 | | - *out_config = &dev_priv->perf.oa.test_config; |
---|
394 | | - atomic_inc(&dev_priv->perf.oa.test_config.ref_count); |
---|
395 | | - return 0; |
---|
396 | | - } |
---|
397 | | - |
---|
398 | | - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); |
---|
399 | | - if (ret) |
---|
400 | | - return ret; |
---|
401 | | - |
---|
402 | | - *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set); |
---|
403 | | - if (!*out_config) |
---|
404 | | - ret = -EINVAL; |
---|
405 | | - else |
---|
406 | | - atomic_inc(&(*out_config)->ref_count); |
---|
407 | | - |
---|
408 | | - mutex_unlock(&dev_priv->perf.metrics_lock); |
---|
409 | | - |
---|
410 | | - return ret; |
---|
| 409 | + i915_oa_config_put(oa_bo->oa_config); |
---|
| 410 | + i915_vma_put(oa_bo->vma); |
---|
| 411 | + kfree(oa_bo); |
---|
411 | 412 | } |
---|
412 | 413 | |
---|
413 | | -static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv) |
---|
| 414 | +static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream) |
---|
414 | 415 | { |
---|
415 | | - return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; |
---|
| 416 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 417 | + |
---|
| 418 | + return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) & |
---|
| 419 | + GEN12_OAG_OATAILPTR_MASK; |
---|
416 | 420 | } |
---|
417 | 421 | |
---|
418 | | -static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv) |
---|
| 422 | +static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) |
---|
419 | 423 | { |
---|
420 | | - u32 oastatus1 = I915_READ(GEN7_OASTATUS1); |
---|
| 424 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 425 | + |
---|
| 426 | + return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; |
---|
| 427 | +} |
---|
| 428 | + |
---|
| 429 | +static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) |
---|
| 430 | +{ |
---|
| 431 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 432 | + u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); |
---|
421 | 433 | |
---|
422 | 434 | return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; |
---|
423 | 435 | } |
---|
424 | 436 | |
---|
425 | 437 | /** |
---|
426 | 438 | * oa_buffer_check_unlocked - check for data and update tail ptr state |
---|
427 | | - * @dev_priv: i915 device instance |
---|
| 439 | + * @stream: i915 stream instance |
---|
428 | 440 | * |
---|
429 | 441 | * This is either called via fops (for blocking reads in user ctx) or the poll |
---|
430 | 442 | * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check |
---|
.. | .. |
---|
437 | 449 | * (See description of OA_TAIL_MARGIN_NSEC above for further details.) |
---|
438 | 450 | * |
---|
439 | 451 | * Besides returning true when there is data available to read() this function |
---|
440 | | - * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp |
---|
441 | | - * and .aged_tail_idx state used for reading. |
---|
| 452 | + * also updates the tail, aging_tail and aging_timestamp in the oa_buffer |
---|
| 453 | + * object. |
---|
442 | 454 | * |
---|
443 | 455 | * Note: It's safe to read OA config state here unlocked, assuming that this is |
---|
444 | 456 | * only called while the stream is enabled, while the global OA configuration |
---|
.. | .. |
---|
446 | 458 | * |
---|
447 | 459 | * Returns: %true if the OA buffer contains data, else %false |
---|
448 | 460 | */ |
---|
449 | | -static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) |
---|
| 461 | +static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) |
---|
450 | 462 | { |
---|
451 | | - int report_size = dev_priv->perf.oa.oa_buffer.format_size; |
---|
| 463 | + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); |
---|
| 464 | + int report_size = stream->oa_buffer.format_size; |
---|
452 | 465 | unsigned long flags; |
---|
453 | | - unsigned int aged_idx; |
---|
454 | | - u32 head, hw_tail, aged_tail, aging_tail; |
---|
| 466 | + bool pollin; |
---|
| 467 | + u32 hw_tail; |
---|
455 | 468 | u64 now; |
---|
456 | 469 | |
---|
457 | 470 | /* We have to consider the (unlikely) possibility that read() errors |
---|
458 | | - * could result in an OA buffer reset which might reset the head, |
---|
459 | | - * tails[] and aged_tail state. |
---|
| 471 | + * could result in an OA buffer reset which might reset the head and |
---|
| 472 | + * tail state. |
---|
460 | 473 | */ |
---|
461 | | - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); |
---|
| 474 | + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); |
---|
462 | 475 | |
---|
463 | | - /* NB: The head we observe here might effectively be a little out of |
---|
464 | | - * date (between head and tails[aged_idx].offset if there is currently |
---|
465 | | - * a read() in progress. |
---|
466 | | - */ |
---|
467 | | - head = dev_priv->perf.oa.oa_buffer.head; |
---|
468 | | - |
---|
469 | | - aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; |
---|
470 | | - aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset; |
---|
471 | | - aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset; |
---|
472 | | - |
---|
473 | | - hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv); |
---|
| 476 | + hw_tail = stream->perf->ops.oa_hw_tail_read(stream); |
---|
474 | 477 | |
---|
475 | 478 | /* The tail pointer increases in 64 byte increments, |
---|
476 | 479 | * not in report_size steps... |
---|
.. | .. |
---|
479 | 482 | |
---|
480 | 483 | now = ktime_get_mono_fast_ns(); |
---|
481 | 484 | |
---|
482 | | - /* Update the aged tail |
---|
483 | | - * |
---|
484 | | - * Flip the tail pointer available for read()s once the aging tail is |
---|
485 | | - * old enough to trust that the corresponding data will be visible to |
---|
486 | | - * the CPU... |
---|
487 | | - * |
---|
488 | | - * Do this before updating the aging pointer in case we may be able to |
---|
489 | | - * immediately start aging a new pointer too (if new data has become |
---|
490 | | - * available) without needing to wait for a later hrtimer callback. |
---|
491 | | - */ |
---|
492 | | - if (aging_tail != INVALID_TAIL_PTR && |
---|
493 | | - ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) > |
---|
494 | | - OA_TAIL_MARGIN_NSEC)) { |
---|
495 | | - |
---|
496 | | - aged_idx ^= 1; |
---|
497 | | - dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx; |
---|
498 | | - |
---|
499 | | - aged_tail = aging_tail; |
---|
500 | | - |
---|
501 | | - /* Mark that we need a new pointer to start aging... */ |
---|
502 | | - dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR; |
---|
503 | | - aging_tail = INVALID_TAIL_PTR; |
---|
504 | | - } |
---|
505 | | - |
---|
506 | | - /* Update the aging tail |
---|
507 | | - * |
---|
508 | | - * We throttle aging tail updates until we have a new tail that |
---|
509 | | - * represents >= one report more data than is already available for |
---|
510 | | - * reading. This ensures there will be enough data for a successful |
---|
511 | | - * read once this new pointer has aged and ensures we will give the new |
---|
512 | | - * pointer time to age. |
---|
513 | | - */ |
---|
514 | | - if (aging_tail == INVALID_TAIL_PTR && |
---|
515 | | - (aged_tail == INVALID_TAIL_PTR || |
---|
516 | | - OA_TAKEN(hw_tail, aged_tail) >= report_size)) { |
---|
517 | | - struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma; |
---|
518 | | - u32 gtt_offset = i915_ggtt_offset(vma); |
---|
519 | | - |
---|
520 | | - /* Be paranoid and do a bounds check on the pointer read back |
---|
521 | | - * from hardware, just in case some spurious hardware condition |
---|
522 | | - * could put the tail out of bounds... |
---|
| 485 | + if (hw_tail == stream->oa_buffer.aging_tail && |
---|
| 486 | + (now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC) { |
---|
| 487 | + /* If the HW tail hasn't move since the last check and the HW |
---|
| 488 | + * tail has been aging for long enough, declare it the new |
---|
| 489 | + * tail. |
---|
523 | 490 | */ |
---|
524 | | - if (hw_tail >= gtt_offset && |
---|
525 | | - hw_tail < (gtt_offset + OA_BUFFER_SIZE)) { |
---|
526 | | - dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = |
---|
527 | | - aging_tail = hw_tail; |
---|
528 | | - dev_priv->perf.oa.oa_buffer.aging_timestamp = now; |
---|
529 | | - } else { |
---|
530 | | - DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", |
---|
531 | | - hw_tail); |
---|
| 491 | + stream->oa_buffer.tail = stream->oa_buffer.aging_tail; |
---|
| 492 | + } else { |
---|
| 493 | + u32 head, tail, aged_tail; |
---|
| 494 | + |
---|
| 495 | + /* NB: The head we observe here might effectively be a little |
---|
| 496 | + * out of date. If a read() is in progress, the head could be |
---|
| 497 | + * anywhere between this head and stream->oa_buffer.tail. |
---|
| 498 | + */ |
---|
| 499 | + head = stream->oa_buffer.head - gtt_offset; |
---|
| 500 | + aged_tail = stream->oa_buffer.tail - gtt_offset; |
---|
| 501 | + |
---|
| 502 | + hw_tail -= gtt_offset; |
---|
| 503 | + tail = hw_tail; |
---|
| 504 | + |
---|
| 505 | + /* Walk the stream backward until we find a report with dword 0 |
---|
| 506 | + * & 1 not at 0. Since the circular buffer pointers progress by |
---|
| 507 | + * increments of 64 bytes and that reports can be up to 256 |
---|
| 508 | + * bytes long, we can't tell whether a report has fully landed |
---|
| 509 | + * in memory before the first 2 dwords of the following report |
---|
| 510 | + * have effectively landed. |
---|
| 511 | + * |
---|
| 512 | + * This is assuming that the writes of the OA unit land in |
---|
| 513 | + * memory in the order they were written to. |
---|
| 514 | + * If not : (╯°□°)╯︵ ┻━┻ |
---|
| 515 | + */ |
---|
| 516 | + while (OA_TAKEN(tail, aged_tail) >= report_size) { |
---|
| 517 | + u32 *report32 = (void *)(stream->oa_buffer.vaddr + tail); |
---|
| 518 | + |
---|
| 519 | + if (report32[0] != 0 || report32[1] != 0) |
---|
| 520 | + break; |
---|
| 521 | + |
---|
| 522 | + tail = (tail - report_size) & (OA_BUFFER_SIZE - 1); |
---|
532 | 523 | } |
---|
| 524 | + |
---|
| 525 | + if (OA_TAKEN(hw_tail, tail) > report_size && |
---|
| 526 | + __ratelimit(&stream->perf->tail_pointer_race)) |
---|
| 527 | + DRM_NOTE("unlanded report(s) head=0x%x " |
---|
| 528 | + "tail=0x%x hw_tail=0x%x\n", |
---|
| 529 | + head, tail, hw_tail); |
---|
| 530 | + |
---|
| 531 | + stream->oa_buffer.tail = gtt_offset + tail; |
---|
| 532 | + stream->oa_buffer.aging_tail = gtt_offset + hw_tail; |
---|
| 533 | + stream->oa_buffer.aging_timestamp = now; |
---|
533 | 534 | } |
---|
534 | 535 | |
---|
535 | | - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); |
---|
| 536 | + pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset, |
---|
| 537 | + stream->oa_buffer.head - gtt_offset) >= report_size; |
---|
536 | 538 | |
---|
537 | | - return aged_tail == INVALID_TAIL_PTR ? |
---|
538 | | - false : OA_TAKEN(aged_tail, head) >= report_size; |
---|
| 539 | + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); |
---|
| 540 | + |
---|
| 541 | + return pollin; |
---|
539 | 542 | } |
---|
540 | 543 | |
---|
541 | 544 | /** |
---|
.. | .. |
---|
595 | 598 | size_t *offset, |
---|
596 | 599 | const u8 *report) |
---|
597 | 600 | { |
---|
598 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
599 | | - int report_size = dev_priv->perf.oa.oa_buffer.format_size; |
---|
| 601 | + int report_size = stream->oa_buffer.format_size; |
---|
600 | 602 | struct drm_i915_perf_record_header header; |
---|
601 | | - u32 sample_flags = stream->sample_flags; |
---|
602 | 603 | |
---|
603 | 604 | header.type = DRM_I915_PERF_RECORD_SAMPLE; |
---|
604 | 605 | header.pad = 0; |
---|
.. | .. |
---|
612 | 613 | return -EFAULT; |
---|
613 | 614 | buf += sizeof(header); |
---|
614 | 615 | |
---|
615 | | - if (sample_flags & SAMPLE_OA_REPORT) { |
---|
616 | | - if (copy_to_user(buf, report, report_size)) |
---|
617 | | - return -EFAULT; |
---|
618 | | - } |
---|
| 616 | + if (copy_to_user(buf, report, report_size)) |
---|
| 617 | + return -EFAULT; |
---|
619 | 618 | |
---|
620 | 619 | (*offset) += header.size; |
---|
621 | 620 | |
---|
.. | .. |
---|
647 | 646 | size_t count, |
---|
648 | 647 | size_t *offset) |
---|
649 | 648 | { |
---|
650 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
651 | | - int report_size = dev_priv->perf.oa.oa_buffer.format_size; |
---|
652 | | - u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; |
---|
653 | | - u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); |
---|
| 649 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 650 | + int report_size = stream->oa_buffer.format_size; |
---|
| 651 | + u8 *oa_buf_base = stream->oa_buffer.vaddr; |
---|
| 652 | + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); |
---|
654 | 653 | u32 mask = (OA_BUFFER_SIZE - 1); |
---|
655 | 654 | size_t start_offset = *offset; |
---|
656 | 655 | unsigned long flags; |
---|
657 | | - unsigned int aged_tail_idx; |
---|
658 | 656 | u32 head, tail; |
---|
659 | 657 | u32 taken; |
---|
660 | 658 | int ret = 0; |
---|
661 | 659 | |
---|
662 | | - if (WARN_ON(!stream->enabled)) |
---|
| 660 | + if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled)) |
---|
663 | 661 | return -EIO; |
---|
664 | 662 | |
---|
665 | | - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); |
---|
| 663 | + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); |
---|
666 | 664 | |
---|
667 | | - head = dev_priv->perf.oa.oa_buffer.head; |
---|
668 | | - aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; |
---|
669 | | - tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset; |
---|
| 665 | + head = stream->oa_buffer.head; |
---|
| 666 | + tail = stream->oa_buffer.tail; |
---|
670 | 667 | |
---|
671 | | - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); |
---|
672 | | - |
---|
673 | | - /* |
---|
674 | | - * An invalid tail pointer here means we're still waiting for the poll |
---|
675 | | - * hrtimer callback to give us a pointer |
---|
676 | | - */ |
---|
677 | | - if (tail == INVALID_TAIL_PTR) |
---|
678 | | - return -EAGAIN; |
---|
| 668 | + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); |
---|
679 | 669 | |
---|
680 | 670 | /* |
---|
681 | 671 | * NB: oa_buffer.head/tail include the gtt_offset which we don't want |
---|
.. | .. |
---|
691 | 681 | * only be incremented by multiples of the report size (notably also |
---|
692 | 682 | * all a power of two). |
---|
693 | 683 | */ |
---|
694 | | - if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || |
---|
695 | | - tail > OA_BUFFER_SIZE || tail % report_size, |
---|
696 | | - "Inconsistent OA buffer pointers: head = %u, tail = %u\n", |
---|
697 | | - head, tail)) |
---|
| 684 | + if (drm_WARN_ONCE(&uncore->i915->drm, |
---|
| 685 | + head > OA_BUFFER_SIZE || head % report_size || |
---|
| 686 | + tail > OA_BUFFER_SIZE || tail % report_size, |
---|
| 687 | + "Inconsistent OA buffer pointers: head = %u, tail = %u\n", |
---|
| 688 | + head, tail)) |
---|
698 | 689 | return -EIO; |
---|
699 | 690 | |
---|
700 | 691 | |
---|
.. | .. |
---|
715 | 706 | * here would imply a driver bug that would result |
---|
716 | 707 | * in an overrun. |
---|
717 | 708 | */ |
---|
718 | | - if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { |
---|
719 | | - DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); |
---|
| 709 | + if (drm_WARN_ON(&uncore->i915->drm, |
---|
| 710 | + (OA_BUFFER_SIZE - head) < report_size)) { |
---|
| 711 | + drm_err(&uncore->i915->drm, |
---|
| 712 | + "Spurious OA head ptr: non-integral report offset\n"); |
---|
720 | 713 | break; |
---|
721 | 714 | } |
---|
722 | 715 | |
---|
.. | .. |
---|
730 | 723 | * it to userspace... |
---|
731 | 724 | */ |
---|
732 | 725 | reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & |
---|
733 | | - OAREPORT_REASON_MASK); |
---|
| 726 | + (IS_GEN(stream->perf->i915, 12) ? |
---|
| 727 | + OAREPORT_REASON_MASK_EXTENDED : |
---|
| 728 | + OAREPORT_REASON_MASK)); |
---|
734 | 729 | if (reason == 0) { |
---|
735 | | - if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs)) |
---|
| 730 | + if (__ratelimit(&stream->perf->spurious_report_rs)) |
---|
736 | 731 | DRM_NOTE("Skipping spurious, invalid OA report\n"); |
---|
737 | 732 | continue; |
---|
738 | 733 | } |
---|
739 | 734 | |
---|
740 | | - ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask; |
---|
| 735 | + ctx_id = report32[2] & stream->specific_ctx_id_mask; |
---|
741 | 736 | |
---|
742 | 737 | /* |
---|
743 | 738 | * Squash whatever is in the CTX_ID field if it's marked as |
---|
.. | .. |
---|
747 | 742 | * Note: that we don't clear the valid_ctx_bit so userspace can |
---|
748 | 743 | * understand that the ID has been squashed by the kernel. |
---|
749 | 744 | */ |
---|
750 | | - if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit)) |
---|
| 745 | + if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) && |
---|
| 746 | + INTEL_GEN(stream->perf->i915) <= 11) |
---|
751 | 747 | ctx_id = report32[2] = INVALID_CTX_ID; |
---|
752 | 748 | |
---|
753 | 749 | /* |
---|
.. | .. |
---|
781 | 777 | * switches since it's not-uncommon for periodic samples to |
---|
782 | 778 | * identify a switch before any 'context switch' report. |
---|
783 | 779 | */ |
---|
784 | | - if (!dev_priv->perf.oa.exclusive_stream->ctx || |
---|
785 | | - dev_priv->perf.oa.specific_ctx_id == ctx_id || |
---|
786 | | - (dev_priv->perf.oa.oa_buffer.last_ctx_id == |
---|
787 | | - dev_priv->perf.oa.specific_ctx_id) || |
---|
| 780 | + if (!stream->perf->exclusive_stream->ctx || |
---|
| 781 | + stream->specific_ctx_id == ctx_id || |
---|
| 782 | + stream->oa_buffer.last_ctx_id == stream->specific_ctx_id || |
---|
788 | 783 | reason & OAREPORT_REASON_CTX_SWITCH) { |
---|
789 | 784 | |
---|
790 | 785 | /* |
---|
791 | 786 | * While filtering for a single context we avoid |
---|
792 | 787 | * leaking the IDs of other contexts. |
---|
793 | 788 | */ |
---|
794 | | - if (dev_priv->perf.oa.exclusive_stream->ctx && |
---|
795 | | - dev_priv->perf.oa.specific_ctx_id != ctx_id) { |
---|
| 789 | + if (stream->perf->exclusive_stream->ctx && |
---|
| 790 | + stream->specific_ctx_id != ctx_id) { |
---|
796 | 791 | report32[2] = INVALID_CTX_ID; |
---|
797 | 792 | } |
---|
798 | 793 | |
---|
.. | .. |
---|
801 | 796 | if (ret) |
---|
802 | 797 | break; |
---|
803 | 798 | |
---|
804 | | - dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id; |
---|
| 799 | + stream->oa_buffer.last_ctx_id = ctx_id; |
---|
805 | 800 | } |
---|
806 | 801 | |
---|
807 | 802 | /* |
---|
808 | | - * The above reason field sanity check is based on |
---|
809 | | - * the assumption that the OA buffer is initially |
---|
810 | | - * zeroed and we reset the field after copying so the |
---|
811 | | - * check is still meaningful once old reports start |
---|
812 | | - * being overwritten. |
---|
| 803 | + * Clear out the first 2 dword as a mean to detect unlanded |
---|
| 804 | + * reports. |
---|
813 | 805 | */ |
---|
814 | 806 | report32[0] = 0; |
---|
| 807 | + report32[1] = 0; |
---|
815 | 808 | } |
---|
816 | 809 | |
---|
817 | 810 | if (start_offset != *offset) { |
---|
818 | | - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); |
---|
| 811 | + i915_reg_t oaheadptr; |
---|
| 812 | + |
---|
| 813 | + oaheadptr = IS_GEN(stream->perf->i915, 12) ? |
---|
| 814 | + GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR; |
---|
| 815 | + |
---|
| 816 | + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); |
---|
819 | 817 | |
---|
820 | 818 | /* |
---|
821 | 819 | * We removed the gtt_offset for the copy loop above, indexing |
---|
822 | 820 | * relative to oa_buf_base so put back here... |
---|
823 | 821 | */ |
---|
824 | 822 | head += gtt_offset; |
---|
| 823 | + intel_uncore_write(uncore, oaheadptr, |
---|
| 824 | + head & GEN12_OAG_OAHEADPTR_MASK); |
---|
| 825 | + stream->oa_buffer.head = head; |
---|
825 | 826 | |
---|
826 | | - I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK); |
---|
827 | | - dev_priv->perf.oa.oa_buffer.head = head; |
---|
828 | | - |
---|
829 | | - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); |
---|
| 827 | + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); |
---|
830 | 828 | } |
---|
831 | 829 | |
---|
832 | 830 | return ret; |
---|
.. | .. |
---|
857 | 855 | size_t count, |
---|
858 | 856 | size_t *offset) |
---|
859 | 857 | { |
---|
860 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
| 858 | + struct intel_uncore *uncore = stream->uncore; |
---|
861 | 859 | u32 oastatus; |
---|
| 860 | + i915_reg_t oastatus_reg; |
---|
862 | 861 | int ret; |
---|
863 | 862 | |
---|
864 | | - if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) |
---|
| 863 | + if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr)) |
---|
865 | 864 | return -EIO; |
---|
866 | 865 | |
---|
867 | | - oastatus = I915_READ(GEN8_OASTATUS); |
---|
| 866 | + oastatus_reg = IS_GEN(stream->perf->i915, 12) ? |
---|
| 867 | + GEN12_OAG_OASTATUS : GEN8_OASTATUS; |
---|
| 868 | + |
---|
| 869 | + oastatus = intel_uncore_read(uncore, oastatus_reg); |
---|
868 | 870 | |
---|
869 | 871 | /* |
---|
870 | 872 | * We treat OABUFFER_OVERFLOW as a significant error: |
---|
.. | .. |
---|
887 | 889 | return ret; |
---|
888 | 890 | |
---|
889 | 891 | DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", |
---|
890 | | - dev_priv->perf.oa.period_exponent); |
---|
| 892 | + stream->period_exponent); |
---|
891 | 893 | |
---|
892 | | - dev_priv->perf.oa.ops.oa_disable(dev_priv); |
---|
893 | | - dev_priv->perf.oa.ops.oa_enable(dev_priv); |
---|
| 894 | + stream->perf->ops.oa_disable(stream); |
---|
| 895 | + stream->perf->ops.oa_enable(stream); |
---|
894 | 896 | |
---|
895 | 897 | /* |
---|
896 | 898 | * Note: .oa_enable() is expected to re-init the oabuffer and |
---|
897 | 899 | * reset GEN8_OASTATUS for us |
---|
898 | 900 | */ |
---|
899 | | - oastatus = I915_READ(GEN8_OASTATUS); |
---|
| 901 | + oastatus = intel_uncore_read(uncore, oastatus_reg); |
---|
900 | 902 | } |
---|
901 | 903 | |
---|
902 | 904 | if (oastatus & GEN8_OASTATUS_REPORT_LOST) { |
---|
.. | .. |
---|
904 | 906 | DRM_I915_PERF_RECORD_OA_REPORT_LOST); |
---|
905 | 907 | if (ret) |
---|
906 | 908 | return ret; |
---|
907 | | - I915_WRITE(GEN8_OASTATUS, |
---|
908 | | - oastatus & ~GEN8_OASTATUS_REPORT_LOST); |
---|
| 909 | + |
---|
| 910 | + intel_uncore_rmw(uncore, oastatus_reg, |
---|
| 911 | + GEN8_OASTATUS_COUNTER_OVERFLOW | |
---|
| 912 | + GEN8_OASTATUS_REPORT_LOST, |
---|
| 913 | + IS_GEN_RANGE(uncore->i915, 8, 10) ? |
---|
| 914 | + (GEN8_OASTATUS_HEAD_POINTER_WRAP | |
---|
| 915 | + GEN8_OASTATUS_TAIL_POINTER_WRAP) : 0); |
---|
909 | 916 | } |
---|
910 | 917 | |
---|
911 | 918 | return gen8_append_oa_reports(stream, buf, count, offset); |
---|
.. | .. |
---|
936 | 943 | size_t count, |
---|
937 | 944 | size_t *offset) |
---|
938 | 945 | { |
---|
939 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
940 | | - int report_size = dev_priv->perf.oa.oa_buffer.format_size; |
---|
941 | | - u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; |
---|
942 | | - u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); |
---|
| 946 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 947 | + int report_size = stream->oa_buffer.format_size; |
---|
| 948 | + u8 *oa_buf_base = stream->oa_buffer.vaddr; |
---|
| 949 | + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); |
---|
943 | 950 | u32 mask = (OA_BUFFER_SIZE - 1); |
---|
944 | 951 | size_t start_offset = *offset; |
---|
945 | 952 | unsigned long flags; |
---|
946 | | - unsigned int aged_tail_idx; |
---|
947 | 953 | u32 head, tail; |
---|
948 | 954 | u32 taken; |
---|
949 | 955 | int ret = 0; |
---|
950 | 956 | |
---|
951 | | - if (WARN_ON(!stream->enabled)) |
---|
| 957 | + if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled)) |
---|
952 | 958 | return -EIO; |
---|
953 | 959 | |
---|
954 | | - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); |
---|
| 960 | + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); |
---|
955 | 961 | |
---|
956 | | - head = dev_priv->perf.oa.oa_buffer.head; |
---|
957 | | - aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; |
---|
958 | | - tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset; |
---|
| 962 | + head = stream->oa_buffer.head; |
---|
| 963 | + tail = stream->oa_buffer.tail; |
---|
959 | 964 | |
---|
960 | | - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); |
---|
961 | | - |
---|
962 | | - /* An invalid tail pointer here means we're still waiting for the poll |
---|
963 | | - * hrtimer callback to give us a pointer |
---|
964 | | - */ |
---|
965 | | - if (tail == INVALID_TAIL_PTR) |
---|
966 | | - return -EAGAIN; |
---|
| 965 | + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); |
---|
967 | 966 | |
---|
968 | 967 | /* NB: oa_buffer.head/tail include the gtt_offset which we don't want |
---|
969 | 968 | * while indexing relative to oa_buf_base. |
---|
.. | .. |
---|
977 | 976 | * only be incremented by multiples of the report size (notably also |
---|
978 | 977 | * all a power of two). |
---|
979 | 978 | */ |
---|
980 | | - if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || |
---|
981 | | - tail > OA_BUFFER_SIZE || tail % report_size, |
---|
982 | | - "Inconsistent OA buffer pointers: head = %u, tail = %u\n", |
---|
983 | | - head, tail)) |
---|
| 979 | + if (drm_WARN_ONCE(&uncore->i915->drm, |
---|
| 980 | + head > OA_BUFFER_SIZE || head % report_size || |
---|
| 981 | + tail > OA_BUFFER_SIZE || tail % report_size, |
---|
| 982 | + "Inconsistent OA buffer pointers: head = %u, tail = %u\n", |
---|
| 983 | + head, tail)) |
---|
984 | 984 | return -EIO; |
---|
985 | 985 | |
---|
986 | 986 | |
---|
.. | .. |
---|
998 | 998 | * here would imply a driver bug that would result |
---|
999 | 999 | * in an overrun. |
---|
1000 | 1000 | */ |
---|
1001 | | - if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { |
---|
1002 | | - DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); |
---|
| 1001 | + if (drm_WARN_ON(&uncore->i915->drm, |
---|
| 1002 | + (OA_BUFFER_SIZE - head) < report_size)) { |
---|
| 1003 | + drm_err(&uncore->i915->drm, |
---|
| 1004 | + "Spurious OA head ptr: non-integral report offset\n"); |
---|
1003 | 1005 | break; |
---|
1004 | 1006 | } |
---|
1005 | 1007 | |
---|
.. | .. |
---|
1010 | 1012 | * copying it to userspace... |
---|
1011 | 1013 | */ |
---|
1012 | 1014 | if (report32[0] == 0) { |
---|
1013 | | - if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs)) |
---|
| 1015 | + if (__ratelimit(&stream->perf->spurious_report_rs)) |
---|
1014 | 1016 | DRM_NOTE("Skipping spurious, invalid OA report\n"); |
---|
1015 | 1017 | continue; |
---|
1016 | 1018 | } |
---|
.. | .. |
---|
1019 | 1021 | if (ret) |
---|
1020 | 1022 | break; |
---|
1021 | 1023 | |
---|
1022 | | - /* The above report-id field sanity check is based on |
---|
1023 | | - * the assumption that the OA buffer is initially |
---|
1024 | | - * zeroed and we reset the field after copying so the |
---|
1025 | | - * check is still meaningful once old reports start |
---|
1026 | | - * being overwritten. |
---|
| 1024 | + /* Clear out the first 2 dwords as a mean to detect unlanded |
---|
| 1025 | + * reports. |
---|
1027 | 1026 | */ |
---|
1028 | 1027 | report32[0] = 0; |
---|
| 1028 | + report32[1] = 0; |
---|
1029 | 1029 | } |
---|
1030 | 1030 | |
---|
1031 | 1031 | if (start_offset != *offset) { |
---|
1032 | | - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); |
---|
| 1032 | + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); |
---|
1033 | 1033 | |
---|
1034 | 1034 | /* We removed the gtt_offset for the copy loop above, indexing |
---|
1035 | 1035 | * relative to oa_buf_base so put back here... |
---|
1036 | 1036 | */ |
---|
1037 | 1037 | head += gtt_offset; |
---|
1038 | 1038 | |
---|
1039 | | - I915_WRITE(GEN7_OASTATUS2, |
---|
1040 | | - ((head & GEN7_OASTATUS2_HEAD_MASK) | |
---|
1041 | | - GEN7_OASTATUS2_MEM_SELECT_GGTT)); |
---|
1042 | | - dev_priv->perf.oa.oa_buffer.head = head; |
---|
| 1039 | + intel_uncore_write(uncore, GEN7_OASTATUS2, |
---|
| 1040 | + (head & GEN7_OASTATUS2_HEAD_MASK) | |
---|
| 1041 | + GEN7_OASTATUS2_MEM_SELECT_GGTT); |
---|
| 1042 | + stream->oa_buffer.head = head; |
---|
1043 | 1043 | |
---|
1044 | | - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); |
---|
| 1044 | + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); |
---|
1045 | 1045 | } |
---|
1046 | 1046 | |
---|
1047 | 1047 | return ret; |
---|
.. | .. |
---|
1068 | 1068 | size_t count, |
---|
1069 | 1069 | size_t *offset) |
---|
1070 | 1070 | { |
---|
1071 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
| 1071 | + struct intel_uncore *uncore = stream->uncore; |
---|
1072 | 1072 | u32 oastatus1; |
---|
1073 | 1073 | int ret; |
---|
1074 | 1074 | |
---|
1075 | | - if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) |
---|
| 1075 | + if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr)) |
---|
1076 | 1076 | return -EIO; |
---|
1077 | 1077 | |
---|
1078 | | - oastatus1 = I915_READ(GEN7_OASTATUS1); |
---|
| 1078 | + oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); |
---|
1079 | 1079 | |
---|
1080 | 1080 | /* XXX: On Haswell we don't have a safe way to clear oastatus1 |
---|
1081 | 1081 | * bits while the OA unit is enabled (while the tail pointer |
---|
1082 | 1082 | * may be updated asynchronously) so we ignore status bits |
---|
1083 | 1083 | * that have already been reported to userspace. |
---|
1084 | 1084 | */ |
---|
1085 | | - oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1; |
---|
| 1085 | + oastatus1 &= ~stream->perf->gen7_latched_oastatus1; |
---|
1086 | 1086 | |
---|
1087 | 1087 | /* We treat OABUFFER_OVERFLOW as a significant error: |
---|
1088 | 1088 | * |
---|
.. | .. |
---|
1111 | 1111 | return ret; |
---|
1112 | 1112 | |
---|
1113 | 1113 | DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", |
---|
1114 | | - dev_priv->perf.oa.period_exponent); |
---|
| 1114 | + stream->period_exponent); |
---|
1115 | 1115 | |
---|
1116 | | - dev_priv->perf.oa.ops.oa_disable(dev_priv); |
---|
1117 | | - dev_priv->perf.oa.ops.oa_enable(dev_priv); |
---|
| 1116 | + stream->perf->ops.oa_disable(stream); |
---|
| 1117 | + stream->perf->ops.oa_enable(stream); |
---|
1118 | 1118 | |
---|
1119 | | - oastatus1 = I915_READ(GEN7_OASTATUS1); |
---|
| 1119 | + oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); |
---|
1120 | 1120 | } |
---|
1121 | 1121 | |
---|
1122 | 1122 | if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { |
---|
.. | .. |
---|
1124 | 1124 | DRM_I915_PERF_RECORD_OA_REPORT_LOST); |
---|
1125 | 1125 | if (ret) |
---|
1126 | 1126 | return ret; |
---|
1127 | | - dev_priv->perf.oa.gen7_latched_oastatus1 |= |
---|
| 1127 | + stream->perf->gen7_latched_oastatus1 |= |
---|
1128 | 1128 | GEN7_OASTATUS1_REPORT_LOST; |
---|
1129 | 1129 | } |
---|
1130 | 1130 | |
---|
.. | .. |
---|
1147 | 1147 | */ |
---|
1148 | 1148 | static int i915_oa_wait_unlocked(struct i915_perf_stream *stream) |
---|
1149 | 1149 | { |
---|
1150 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
1151 | | - |
---|
1152 | 1150 | /* We would wait indefinitely if periodic sampling is not enabled */ |
---|
1153 | | - if (!dev_priv->perf.oa.periodic) |
---|
| 1151 | + if (!stream->periodic) |
---|
1154 | 1152 | return -EIO; |
---|
1155 | 1153 | |
---|
1156 | | - return wait_event_interruptible(dev_priv->perf.oa.poll_wq, |
---|
1157 | | - oa_buffer_check_unlocked(dev_priv)); |
---|
| 1154 | + return wait_event_interruptible(stream->poll_wq, |
---|
| 1155 | + oa_buffer_check_unlocked(stream)); |
---|
1158 | 1156 | } |
---|
1159 | 1157 | |
---|
1160 | 1158 | /** |
---|
.. | .. |
---|
1171 | 1169 | struct file *file, |
---|
1172 | 1170 | poll_table *wait) |
---|
1173 | 1171 | { |
---|
1174 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
1175 | | - |
---|
1176 | | - poll_wait(file, &dev_priv->perf.oa.poll_wq, wait); |
---|
| 1172 | + poll_wait(file, &stream->poll_wq, wait); |
---|
1177 | 1173 | } |
---|
1178 | 1174 | |
---|
1179 | 1175 | /** |
---|
.. | .. |
---|
1193 | 1189 | size_t count, |
---|
1194 | 1190 | size_t *offset) |
---|
1195 | 1191 | { |
---|
1196 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
1197 | | - |
---|
1198 | | - return dev_priv->perf.oa.ops.read(stream, buf, count, offset); |
---|
| 1192 | + return stream->perf->ops.read(stream, buf, count, offset); |
---|
1199 | 1193 | } |
---|
1200 | 1194 | |
---|
1201 | | -static struct intel_context *oa_pin_context(struct drm_i915_private *i915, |
---|
1202 | | - struct i915_gem_context *ctx) |
---|
| 1195 | +static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) |
---|
1203 | 1196 | { |
---|
1204 | | - struct intel_engine_cs *engine = i915->engine[RCS]; |
---|
| 1197 | + struct i915_gem_engines_iter it; |
---|
| 1198 | + struct i915_gem_context *ctx = stream->ctx; |
---|
1205 | 1199 | struct intel_context *ce; |
---|
1206 | | - int ret; |
---|
| 1200 | + struct i915_gem_ww_ctx ww; |
---|
| 1201 | + int err = -ENODEV; |
---|
1207 | 1202 | |
---|
1208 | | - ret = i915_mutex_lock_interruptible(&i915->drm); |
---|
1209 | | - if (ret) |
---|
1210 | | - return ERR_PTR(ret); |
---|
| 1203 | + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { |
---|
| 1204 | + if (ce->engine != stream->engine) /* first match! */ |
---|
| 1205 | + continue; |
---|
1211 | 1206 | |
---|
| 1207 | + err = 0; |
---|
| 1208 | + break; |
---|
| 1209 | + } |
---|
| 1210 | + i915_gem_context_unlock_engines(ctx); |
---|
| 1211 | + |
---|
| 1212 | + if (err) |
---|
| 1213 | + return ERR_PTR(err); |
---|
| 1214 | + |
---|
| 1215 | + i915_gem_ww_ctx_init(&ww, true); |
---|
| 1216 | +retry: |
---|
1212 | 1217 | /* |
---|
1213 | 1218 | * As the ID is the gtt offset of the context's vma we |
---|
1214 | 1219 | * pin the vma to ensure the ID remains fixed. |
---|
1215 | | - * |
---|
1216 | | - * NB: implied RCS engine... |
---|
1217 | 1220 | */ |
---|
1218 | | - ce = intel_context_pin(ctx, engine); |
---|
1219 | | - mutex_unlock(&i915->drm.struct_mutex); |
---|
1220 | | - if (IS_ERR(ce)) |
---|
1221 | | - return ce; |
---|
| 1221 | + err = intel_context_pin_ww(ce, &ww); |
---|
| 1222 | + if (err == -EDEADLK) { |
---|
| 1223 | + err = i915_gem_ww_ctx_backoff(&ww); |
---|
| 1224 | + if (!err) |
---|
| 1225 | + goto retry; |
---|
| 1226 | + } |
---|
| 1227 | + i915_gem_ww_ctx_fini(&ww); |
---|
1222 | 1228 | |
---|
1223 | | - i915->perf.oa.pinned_ctx = ce; |
---|
| 1229 | + if (err) |
---|
| 1230 | + return ERR_PTR(err); |
---|
1224 | 1231 | |
---|
1225 | | - return ce; |
---|
| 1232 | + stream->pinned_ctx = ce; |
---|
| 1233 | + return stream->pinned_ctx; |
---|
1226 | 1234 | } |
---|
1227 | 1235 | |
---|
1228 | 1236 | /** |
---|
.. | .. |
---|
1237 | 1245 | */ |
---|
1238 | 1246 | static int oa_get_render_ctx_id(struct i915_perf_stream *stream) |
---|
1239 | 1247 | { |
---|
1240 | | - struct drm_i915_private *i915 = stream->dev_priv; |
---|
1241 | 1248 | struct intel_context *ce; |
---|
1242 | 1249 | |
---|
1243 | | - ce = oa_pin_context(i915, stream->ctx); |
---|
| 1250 | + ce = oa_pin_context(stream); |
---|
1244 | 1251 | if (IS_ERR(ce)) |
---|
1245 | 1252 | return PTR_ERR(ce); |
---|
1246 | 1253 | |
---|
1247 | | - switch (INTEL_GEN(i915)) { |
---|
| 1254 | + switch (INTEL_GEN(ce->engine->i915)) { |
---|
1248 | 1255 | case 7: { |
---|
1249 | 1256 | /* |
---|
1250 | 1257 | * On Haswell we don't do any post processing of the reports |
---|
1251 | 1258 | * and don't need to use the mask. |
---|
1252 | 1259 | */ |
---|
1253 | | - i915->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state); |
---|
1254 | | - i915->perf.oa.specific_ctx_id_mask = 0; |
---|
| 1260 | + stream->specific_ctx_id = i915_ggtt_offset(ce->state); |
---|
| 1261 | + stream->specific_ctx_id_mask = 0; |
---|
1255 | 1262 | break; |
---|
1256 | 1263 | } |
---|
1257 | 1264 | |
---|
1258 | 1265 | case 8: |
---|
1259 | 1266 | case 9: |
---|
1260 | 1267 | case 10: |
---|
1261 | | - if (USES_GUC_SUBMISSION(i915)) { |
---|
| 1268 | + if (intel_engine_in_execlists_submission_mode(ce->engine)) { |
---|
| 1269 | + stream->specific_ctx_id_mask = |
---|
| 1270 | + (1U << GEN8_CTX_ID_WIDTH) - 1; |
---|
| 1271 | + stream->specific_ctx_id = stream->specific_ctx_id_mask; |
---|
| 1272 | + } else { |
---|
1262 | 1273 | /* |
---|
1263 | 1274 | * When using GuC, the context descriptor we write in |
---|
1264 | 1275 | * i915 is read by GuC and rewritten before it's |
---|
.. | .. |
---|
1269 | 1280 | * dropped by GuC. They won't be part of the context |
---|
1270 | 1281 | * ID in the OA reports, so squash those lower bits. |
---|
1271 | 1282 | */ |
---|
1272 | | - i915->perf.oa.specific_ctx_id = |
---|
1273 | | - lower_32_bits(ce->lrc_desc) >> 12; |
---|
| 1283 | + stream->specific_ctx_id = ce->lrc.lrca >> 12; |
---|
1274 | 1284 | |
---|
1275 | 1285 | /* |
---|
1276 | 1286 | * GuC uses the top bit to signal proxy submission, so |
---|
1277 | 1287 | * ignore that bit. |
---|
1278 | 1288 | */ |
---|
1279 | | - i915->perf.oa.specific_ctx_id_mask = |
---|
| 1289 | + stream->specific_ctx_id_mask = |
---|
1280 | 1290 | (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1; |
---|
1281 | | - } else { |
---|
1282 | | - i915->perf.oa.specific_ctx_id_mask = |
---|
1283 | | - (1U << GEN8_CTX_ID_WIDTH) - 1; |
---|
1284 | | - i915->perf.oa.specific_ctx_id = |
---|
1285 | | - upper_32_bits(ce->lrc_desc); |
---|
1286 | | - i915->perf.oa.specific_ctx_id &= |
---|
1287 | | - i915->perf.oa.specific_ctx_id_mask; |
---|
1288 | 1291 | } |
---|
1289 | 1292 | break; |
---|
1290 | 1293 | |
---|
1291 | | - case 11: { |
---|
1292 | | - i915->perf.oa.specific_ctx_id_mask = |
---|
1293 | | - ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) | |
---|
1294 | | - ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) | |
---|
1295 | | - ((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32); |
---|
1296 | | - i915->perf.oa.specific_ctx_id = upper_32_bits(ce->lrc_desc); |
---|
1297 | | - i915->perf.oa.specific_ctx_id &= |
---|
1298 | | - i915->perf.oa.specific_ctx_id_mask; |
---|
| 1294 | + case 11: |
---|
| 1295 | + case 12: { |
---|
| 1296 | + stream->specific_ctx_id_mask = |
---|
| 1297 | + ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); |
---|
| 1298 | + /* |
---|
| 1299 | + * Pick an unused context id |
---|
| 1300 | + * 0 - BITS_PER_LONG are used by other contexts |
---|
| 1301 | + * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context |
---|
| 1302 | + */ |
---|
| 1303 | + stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); |
---|
1299 | 1304 | break; |
---|
1300 | 1305 | } |
---|
1301 | 1306 | |
---|
1302 | 1307 | default: |
---|
1303 | | - MISSING_CASE(INTEL_GEN(i915)); |
---|
| 1308 | + MISSING_CASE(INTEL_GEN(ce->engine->i915)); |
---|
1304 | 1309 | } |
---|
1305 | 1310 | |
---|
1306 | | - DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", |
---|
1307 | | - i915->perf.oa.specific_ctx_id, |
---|
1308 | | - i915->perf.oa.specific_ctx_id_mask); |
---|
| 1311 | + ce->tag = stream->specific_ctx_id; |
---|
| 1312 | + |
---|
| 1313 | + drm_dbg(&stream->perf->i915->drm, |
---|
| 1314 | + "filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", |
---|
| 1315 | + stream->specific_ctx_id, |
---|
| 1316 | + stream->specific_ctx_id_mask); |
---|
1309 | 1317 | |
---|
1310 | 1318 | return 0; |
---|
1311 | 1319 | } |
---|
.. | .. |
---|
1319 | 1327 | */ |
---|
1320 | 1328 | static void oa_put_render_ctx_id(struct i915_perf_stream *stream) |
---|
1321 | 1329 | { |
---|
1322 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
1323 | 1330 | struct intel_context *ce; |
---|
1324 | 1331 | |
---|
1325 | | - dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; |
---|
1326 | | - dev_priv->perf.oa.specific_ctx_id_mask = 0; |
---|
1327 | | - |
---|
1328 | | - ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx); |
---|
| 1332 | + ce = fetch_and_zero(&stream->pinned_ctx); |
---|
1329 | 1333 | if (ce) { |
---|
1330 | | - mutex_lock(&dev_priv->drm.struct_mutex); |
---|
| 1334 | + ce->tag = 0; /* recomputed on next submission after parking */ |
---|
1331 | 1335 | intel_context_unpin(ce); |
---|
1332 | | - mutex_unlock(&dev_priv->drm.struct_mutex); |
---|
1333 | 1336 | } |
---|
| 1337 | + |
---|
| 1338 | + stream->specific_ctx_id = INVALID_CTX_ID; |
---|
| 1339 | + stream->specific_ctx_id_mask = 0; |
---|
1334 | 1340 | } |
---|
1335 | 1341 | |
---|
1336 | 1342 | static void |
---|
1337 | | -free_oa_buffer(struct drm_i915_private *i915) |
---|
| 1343 | +free_oa_buffer(struct i915_perf_stream *stream) |
---|
1338 | 1344 | { |
---|
1339 | | - mutex_lock(&i915->drm.struct_mutex); |
---|
| 1345 | + i915_vma_unpin_and_release(&stream->oa_buffer.vma, |
---|
| 1346 | + I915_VMA_RELEASE_MAP); |
---|
1340 | 1347 | |
---|
1341 | | - i915_gem_object_unpin_map(i915->perf.oa.oa_buffer.vma->obj); |
---|
1342 | | - i915_vma_unpin(i915->perf.oa.oa_buffer.vma); |
---|
1343 | | - i915_gem_object_put(i915->perf.oa.oa_buffer.vma->obj); |
---|
| 1348 | + stream->oa_buffer.vaddr = NULL; |
---|
| 1349 | +} |
---|
1344 | 1350 | |
---|
1345 | | - i915->perf.oa.oa_buffer.vma = NULL; |
---|
1346 | | - i915->perf.oa.oa_buffer.vaddr = NULL; |
---|
| 1351 | +static void |
---|
| 1352 | +free_oa_configs(struct i915_perf_stream *stream) |
---|
| 1353 | +{ |
---|
| 1354 | + struct i915_oa_config_bo *oa_bo, *tmp; |
---|
1347 | 1355 | |
---|
1348 | | - mutex_unlock(&i915->drm.struct_mutex); |
---|
| 1356 | + i915_oa_config_put(stream->oa_config); |
---|
| 1357 | + llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) |
---|
| 1358 | + free_oa_config_bo(oa_bo); |
---|
| 1359 | +} |
---|
| 1360 | + |
---|
| 1361 | +static void |
---|
| 1362 | +free_noa_wait(struct i915_perf_stream *stream) |
---|
| 1363 | +{ |
---|
| 1364 | + i915_vma_unpin_and_release(&stream->noa_wait, 0); |
---|
1349 | 1365 | } |
---|
1350 | 1366 | |
---|
1351 | 1367 | static void i915_oa_stream_destroy(struct i915_perf_stream *stream) |
---|
1352 | 1368 | { |
---|
1353 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
| 1369 | + struct i915_perf *perf = stream->perf; |
---|
1354 | 1370 | |
---|
1355 | | - BUG_ON(stream != dev_priv->perf.oa.exclusive_stream); |
---|
| 1371 | + BUG_ON(stream != perf->exclusive_stream); |
---|
1356 | 1372 | |
---|
1357 | 1373 | /* |
---|
1358 | 1374 | * Unset exclusive_stream first, it will be checked while disabling |
---|
1359 | 1375 | * the metric set on gen8+. |
---|
| 1376 | + * |
---|
| 1377 | + * See i915_oa_init_reg_state() and lrc_configure_all_contexts() |
---|
1360 | 1378 | */ |
---|
1361 | | - mutex_lock(&dev_priv->drm.struct_mutex); |
---|
1362 | | - dev_priv->perf.oa.exclusive_stream = NULL; |
---|
1363 | | - dev_priv->perf.oa.ops.disable_metric_set(dev_priv); |
---|
1364 | | - mutex_unlock(&dev_priv->drm.struct_mutex); |
---|
| 1379 | + WRITE_ONCE(perf->exclusive_stream, NULL); |
---|
| 1380 | + perf->ops.disable_metric_set(stream); |
---|
1365 | 1381 | |
---|
1366 | | - free_oa_buffer(dev_priv); |
---|
| 1382 | + free_oa_buffer(stream); |
---|
1367 | 1383 | |
---|
1368 | | - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); |
---|
1369 | | - intel_runtime_pm_put(dev_priv); |
---|
| 1384 | + intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); |
---|
| 1385 | + intel_engine_pm_put(stream->engine); |
---|
1370 | 1386 | |
---|
1371 | 1387 | if (stream->ctx) |
---|
1372 | 1388 | oa_put_render_ctx_id(stream); |
---|
1373 | 1389 | |
---|
1374 | | - put_oa_config(dev_priv, stream->oa_config); |
---|
| 1390 | + free_oa_configs(stream); |
---|
| 1391 | + free_noa_wait(stream); |
---|
1375 | 1392 | |
---|
1376 | | - if (dev_priv->perf.oa.spurious_report_rs.missed) { |
---|
| 1393 | + if (perf->spurious_report_rs.missed) { |
---|
1377 | 1394 | DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", |
---|
1378 | | - dev_priv->perf.oa.spurious_report_rs.missed); |
---|
| 1395 | + perf->spurious_report_rs.missed); |
---|
1379 | 1396 | } |
---|
1380 | 1397 | } |
---|
1381 | 1398 | |
---|
1382 | | -static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) |
---|
| 1399 | +static void gen7_init_oa_buffer(struct i915_perf_stream *stream) |
---|
1383 | 1400 | { |
---|
1384 | | - u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); |
---|
| 1401 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 1402 | + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); |
---|
1385 | 1403 | unsigned long flags; |
---|
1386 | 1404 | |
---|
1387 | | - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); |
---|
| 1405 | + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); |
---|
1388 | 1406 | |
---|
1389 | 1407 | /* Pre-DevBDW: OABUFFER must be set with counters off, |
---|
1390 | 1408 | * before OASTATUS1, but after OASTATUS2 |
---|
1391 | 1409 | */ |
---|
1392 | | - I915_WRITE(GEN7_OASTATUS2, |
---|
1393 | | - gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */ |
---|
1394 | | - dev_priv->perf.oa.oa_buffer.head = gtt_offset; |
---|
| 1410 | + intel_uncore_write(uncore, GEN7_OASTATUS2, /* head */ |
---|
| 1411 | + gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); |
---|
| 1412 | + stream->oa_buffer.head = gtt_offset; |
---|
1395 | 1413 | |
---|
1396 | | - I915_WRITE(GEN7_OABUFFER, gtt_offset); |
---|
| 1414 | + intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset); |
---|
1397 | 1415 | |
---|
1398 | | - I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ |
---|
| 1416 | + intel_uncore_write(uncore, GEN7_OASTATUS1, /* tail */ |
---|
| 1417 | + gtt_offset | OABUFFER_SIZE_16M); |
---|
1399 | 1418 | |
---|
1400 | 1419 | /* Mark that we need updated tail pointers to read from... */ |
---|
1401 | | - dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR; |
---|
1402 | | - dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR; |
---|
| 1420 | + stream->oa_buffer.aging_tail = INVALID_TAIL_PTR; |
---|
| 1421 | + stream->oa_buffer.tail = gtt_offset; |
---|
1403 | 1422 | |
---|
1404 | | - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); |
---|
| 1423 | + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); |
---|
1405 | 1424 | |
---|
1406 | 1425 | /* On Haswell we have to track which OASTATUS1 flags we've |
---|
1407 | 1426 | * already seen since they can't be cleared while periodic |
---|
1408 | 1427 | * sampling is enabled. |
---|
1409 | 1428 | */ |
---|
1410 | | - dev_priv->perf.oa.gen7_latched_oastatus1 = 0; |
---|
| 1429 | + stream->perf->gen7_latched_oastatus1 = 0; |
---|
1411 | 1430 | |
---|
1412 | 1431 | /* NB: although the OA buffer will initially be allocated |
---|
1413 | 1432 | * zeroed via shmfs (and so this memset is redundant when |
---|
.. | .. |
---|
1420 | 1439 | * the assumption that new reports are being written to zeroed |
---|
1421 | 1440 | * memory... |
---|
1422 | 1441 | */ |
---|
1423 | | - memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); |
---|
1424 | | - |
---|
1425 | | - /* Maybe make ->pollin per-stream state if we support multiple |
---|
1426 | | - * concurrent streams in the future. |
---|
1427 | | - */ |
---|
1428 | | - dev_priv->perf.oa.pollin = false; |
---|
| 1442 | + memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); |
---|
1429 | 1443 | } |
---|
1430 | 1444 | |
---|
1431 | | -static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) |
---|
| 1445 | +static void gen8_init_oa_buffer(struct i915_perf_stream *stream) |
---|
1432 | 1446 | { |
---|
1433 | | - u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); |
---|
| 1447 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 1448 | + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); |
---|
1434 | 1449 | unsigned long flags; |
---|
1435 | 1450 | |
---|
1436 | | - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); |
---|
| 1451 | + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); |
---|
1437 | 1452 | |
---|
1438 | | - I915_WRITE(GEN8_OASTATUS, 0); |
---|
1439 | | - I915_WRITE(GEN8_OAHEADPTR, gtt_offset); |
---|
1440 | | - dev_priv->perf.oa.oa_buffer.head = gtt_offset; |
---|
| 1453 | + intel_uncore_write(uncore, GEN8_OASTATUS, 0); |
---|
| 1454 | + intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset); |
---|
| 1455 | + stream->oa_buffer.head = gtt_offset; |
---|
1441 | 1456 | |
---|
1442 | | - I915_WRITE(GEN8_OABUFFER_UDW, 0); |
---|
| 1457 | + intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0); |
---|
1443 | 1458 | |
---|
1444 | 1459 | /* |
---|
1445 | 1460 | * PRM says: |
---|
.. | .. |
---|
1449 | 1464 | * to enable proper functionality of the overflow |
---|
1450 | 1465 | * bit." |
---|
1451 | 1466 | */ |
---|
1452 | | - I915_WRITE(GEN8_OABUFFER, gtt_offset | |
---|
| 1467 | + intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset | |
---|
1453 | 1468 | OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); |
---|
1454 | | - I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); |
---|
| 1469 | + intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); |
---|
1455 | 1470 | |
---|
1456 | 1471 | /* Mark that we need updated tail pointers to read from... */ |
---|
1457 | | - dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR; |
---|
1458 | | - dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR; |
---|
| 1472 | + stream->oa_buffer.aging_tail = INVALID_TAIL_PTR; |
---|
| 1473 | + stream->oa_buffer.tail = gtt_offset; |
---|
1459 | 1474 | |
---|
1460 | 1475 | /* |
---|
1461 | 1476 | * Reset state used to recognise context switches, affecting which |
---|
1462 | 1477 | * reports we will forward to userspace while filtering for a single |
---|
1463 | 1478 | * context. |
---|
1464 | 1479 | */ |
---|
1465 | | - dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID; |
---|
| 1480 | + stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; |
---|
1466 | 1481 | |
---|
1467 | | - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); |
---|
| 1482 | + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); |
---|
1468 | 1483 | |
---|
1469 | 1484 | /* |
---|
1470 | 1485 | * NB: although the OA buffer will initially be allocated |
---|
.. | .. |
---|
1478 | 1493 | * the assumption that new reports are being written to zeroed |
---|
1479 | 1494 | * memory... |
---|
1480 | 1495 | */ |
---|
1481 | | - memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); |
---|
1482 | | - |
---|
1483 | | - /* |
---|
1484 | | - * Maybe make ->pollin per-stream state if we support multiple |
---|
1485 | | - * concurrent streams in the future. |
---|
1486 | | - */ |
---|
1487 | | - dev_priv->perf.oa.pollin = false; |
---|
| 1496 | + memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); |
---|
1488 | 1497 | } |
---|
1489 | 1498 | |
---|
1490 | | -static int alloc_oa_buffer(struct drm_i915_private *dev_priv) |
---|
| 1499 | +static void gen12_init_oa_buffer(struct i915_perf_stream *stream) |
---|
1491 | 1500 | { |
---|
| 1501 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 1502 | + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); |
---|
| 1503 | + unsigned long flags; |
---|
| 1504 | + |
---|
| 1505 | + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); |
---|
| 1506 | + |
---|
| 1507 | + intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0); |
---|
| 1508 | + intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR, |
---|
| 1509 | + gtt_offset & GEN12_OAG_OAHEADPTR_MASK); |
---|
| 1510 | + stream->oa_buffer.head = gtt_offset; |
---|
| 1511 | + |
---|
| 1512 | + /* |
---|
| 1513 | + * PRM says: |
---|
| 1514 | + * |
---|
| 1515 | + * "This MMIO must be set before the OATAILPTR |
---|
| 1516 | + * register and after the OAHEADPTR register. This is |
---|
| 1517 | + * to enable proper functionality of the overflow |
---|
| 1518 | + * bit." |
---|
| 1519 | + */ |
---|
| 1520 | + intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset | |
---|
| 1521 | + OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); |
---|
| 1522 | + intel_uncore_write(uncore, GEN12_OAG_OATAILPTR, |
---|
| 1523 | + gtt_offset & GEN12_OAG_OATAILPTR_MASK); |
---|
| 1524 | + |
---|
| 1525 | + /* Mark that we need updated tail pointers to read from... */ |
---|
| 1526 | + stream->oa_buffer.aging_tail = INVALID_TAIL_PTR; |
---|
| 1527 | + stream->oa_buffer.tail = gtt_offset; |
---|
| 1528 | + |
---|
| 1529 | + /* |
---|
| 1530 | + * Reset state used to recognise context switches, affecting which |
---|
| 1531 | + * reports we will forward to userspace while filtering for a single |
---|
| 1532 | + * context. |
---|
| 1533 | + */ |
---|
| 1534 | + stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; |
---|
| 1535 | + |
---|
| 1536 | + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); |
---|
| 1537 | + |
---|
| 1538 | + /* |
---|
| 1539 | + * NB: although the OA buffer will initially be allocated |
---|
| 1540 | + * zeroed via shmfs (and so this memset is redundant when |
---|
| 1541 | + * first allocating), we may re-init the OA buffer, either |
---|
| 1542 | + * when re-enabling a stream or in error/reset paths. |
---|
| 1543 | + * |
---|
| 1544 | + * The reason we clear the buffer for each re-init is for the |
---|
| 1545 | + * sanity check in gen8_append_oa_reports() that looks at the |
---|
| 1546 | + * reason field to make sure it's non-zero which relies on |
---|
| 1547 | + * the assumption that new reports are being written to zeroed |
---|
| 1548 | + * memory... |
---|
| 1549 | + */ |
---|
| 1550 | + memset(stream->oa_buffer.vaddr, 0, |
---|
| 1551 | + stream->oa_buffer.vma->size); |
---|
| 1552 | +} |
---|
| 1553 | + |
---|
| 1554 | +static int alloc_oa_buffer(struct i915_perf_stream *stream) |
---|
| 1555 | +{ |
---|
| 1556 | + struct drm_i915_private *i915 = stream->perf->i915; |
---|
1492 | 1557 | struct drm_i915_gem_object *bo; |
---|
1493 | 1558 | struct i915_vma *vma; |
---|
1494 | 1559 | int ret; |
---|
1495 | 1560 | |
---|
1496 | | - if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma)) |
---|
| 1561 | + if (drm_WARN_ON(&i915->drm, stream->oa_buffer.vma)) |
---|
1497 | 1562 | return -ENODEV; |
---|
1498 | | - |
---|
1499 | | - ret = i915_mutex_lock_interruptible(&dev_priv->drm); |
---|
1500 | | - if (ret) |
---|
1501 | | - return ret; |
---|
1502 | 1563 | |
---|
1503 | 1564 | BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); |
---|
1504 | 1565 | BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); |
---|
1505 | 1566 | |
---|
1506 | | - bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE); |
---|
| 1567 | + bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE); |
---|
1507 | 1568 | if (IS_ERR(bo)) { |
---|
1508 | | - DRM_ERROR("Failed to allocate OA buffer\n"); |
---|
1509 | | - ret = PTR_ERR(bo); |
---|
1510 | | - goto unlock; |
---|
| 1569 | + drm_err(&i915->drm, "Failed to allocate OA buffer\n"); |
---|
| 1570 | + return PTR_ERR(bo); |
---|
1511 | 1571 | } |
---|
1512 | 1572 | |
---|
1513 | | - ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC); |
---|
1514 | | - if (ret) |
---|
1515 | | - goto err_unref; |
---|
| 1573 | + i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); |
---|
1516 | 1574 | |
---|
1517 | 1575 | /* PreHSW required 512K alignment, HSW requires 16M */ |
---|
1518 | 1576 | vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0); |
---|
.. | .. |
---|
1520 | 1578 | ret = PTR_ERR(vma); |
---|
1521 | 1579 | goto err_unref; |
---|
1522 | 1580 | } |
---|
1523 | | - dev_priv->perf.oa.oa_buffer.vma = vma; |
---|
| 1581 | + stream->oa_buffer.vma = vma; |
---|
1524 | 1582 | |
---|
1525 | | - dev_priv->perf.oa.oa_buffer.vaddr = |
---|
| 1583 | + stream->oa_buffer.vaddr = |
---|
1526 | 1584 | i915_gem_object_pin_map(bo, I915_MAP_WB); |
---|
1527 | | - if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) { |
---|
1528 | | - ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr); |
---|
| 1585 | + if (IS_ERR(stream->oa_buffer.vaddr)) { |
---|
| 1586 | + ret = PTR_ERR(stream->oa_buffer.vaddr); |
---|
1529 | 1587 | goto err_unpin; |
---|
1530 | 1588 | } |
---|
1531 | 1589 | |
---|
1532 | | - dev_priv->perf.oa.ops.init_oa_buffer(dev_priv); |
---|
1533 | | - |
---|
1534 | | - DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", |
---|
1535 | | - i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma), |
---|
1536 | | - dev_priv->perf.oa.oa_buffer.vaddr); |
---|
1537 | | - |
---|
1538 | | - goto unlock; |
---|
| 1590 | + return 0; |
---|
1539 | 1591 | |
---|
1540 | 1592 | err_unpin: |
---|
1541 | 1593 | __i915_vma_unpin(vma); |
---|
.. | .. |
---|
1543 | 1595 | err_unref: |
---|
1544 | 1596 | i915_gem_object_put(bo); |
---|
1545 | 1597 | |
---|
1546 | | - dev_priv->perf.oa.oa_buffer.vaddr = NULL; |
---|
1547 | | - dev_priv->perf.oa.oa_buffer.vma = NULL; |
---|
| 1598 | + stream->oa_buffer.vaddr = NULL; |
---|
| 1599 | + stream->oa_buffer.vma = NULL; |
---|
1548 | 1600 | |
---|
1549 | | -unlock: |
---|
1550 | | - mutex_unlock(&dev_priv->drm.struct_mutex); |
---|
1551 | 1601 | return ret; |
---|
1552 | 1602 | } |
---|
1553 | 1603 | |
---|
1554 | | -static void config_oa_regs(struct drm_i915_private *dev_priv, |
---|
1555 | | - const struct i915_oa_reg *regs, |
---|
1556 | | - u32 n_regs) |
---|
| 1604 | +static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs, |
---|
| 1605 | + bool save, i915_reg_t reg, u32 offset, |
---|
| 1606 | + u32 dword_count) |
---|
| 1607 | +{ |
---|
| 1608 | + u32 cmd; |
---|
| 1609 | + u32 d; |
---|
| 1610 | + |
---|
| 1611 | + cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM; |
---|
| 1612 | + cmd |= MI_SRM_LRM_GLOBAL_GTT; |
---|
| 1613 | + if (INTEL_GEN(stream->perf->i915) >= 8) |
---|
| 1614 | + cmd++; |
---|
| 1615 | + |
---|
| 1616 | + for (d = 0; d < dword_count; d++) { |
---|
| 1617 | + *cs++ = cmd; |
---|
| 1618 | + *cs++ = i915_mmio_reg_offset(reg) + 4 * d; |
---|
| 1619 | + *cs++ = intel_gt_scratch_offset(stream->engine->gt, |
---|
| 1620 | + offset) + 4 * d; |
---|
| 1621 | + *cs++ = 0; |
---|
| 1622 | + } |
---|
| 1623 | + |
---|
| 1624 | + return cs; |
---|
| 1625 | +} |
---|
| 1626 | + |
---|
| 1627 | +static int alloc_noa_wait(struct i915_perf_stream *stream) |
---|
| 1628 | +{ |
---|
| 1629 | + struct drm_i915_private *i915 = stream->perf->i915; |
---|
| 1630 | + struct drm_i915_gem_object *bo; |
---|
| 1631 | + struct i915_vma *vma; |
---|
| 1632 | + const u64 delay_ticks = 0xffffffffffffffff - |
---|
| 1633 | + i915_cs_timestamp_ns_to_ticks(i915, atomic64_read(&stream->perf->noa_programming_delay)); |
---|
| 1634 | + const u32 base = stream->engine->mmio_base; |
---|
| 1635 | +#define CS_GPR(x) GEN8_RING_CS_GPR(base, x) |
---|
| 1636 | + u32 *batch, *ts0, *cs, *jump; |
---|
| 1637 | + int ret, i; |
---|
| 1638 | + enum { |
---|
| 1639 | + START_TS, |
---|
| 1640 | + NOW_TS, |
---|
| 1641 | + DELTA_TS, |
---|
| 1642 | + JUMP_PREDICATE, |
---|
| 1643 | + DELTA_TARGET, |
---|
| 1644 | + N_CS_GPR |
---|
| 1645 | + }; |
---|
| 1646 | + |
---|
| 1647 | + bo = i915_gem_object_create_internal(i915, 4096); |
---|
| 1648 | + if (IS_ERR(bo)) { |
---|
| 1649 | + drm_err(&i915->drm, |
---|
| 1650 | + "Failed to allocate NOA wait batchbuffer\n"); |
---|
| 1651 | + return PTR_ERR(bo); |
---|
| 1652 | + } |
---|
| 1653 | + |
---|
| 1654 | + /* |
---|
| 1655 | + * We pin in GGTT because we jump into this buffer now because |
---|
| 1656 | + * multiple OA config BOs will have a jump to this address and it |
---|
| 1657 | + * needs to be fixed during the lifetime of the i915/perf stream. |
---|
| 1658 | + */ |
---|
| 1659 | + vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 0, PIN_HIGH); |
---|
| 1660 | + if (IS_ERR(vma)) { |
---|
| 1661 | + ret = PTR_ERR(vma); |
---|
| 1662 | + goto err_unref; |
---|
| 1663 | + } |
---|
| 1664 | + |
---|
| 1665 | + batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); |
---|
| 1666 | + if (IS_ERR(batch)) { |
---|
| 1667 | + ret = PTR_ERR(batch); |
---|
| 1668 | + goto err_unpin; |
---|
| 1669 | + } |
---|
| 1670 | + |
---|
| 1671 | + /* Save registers. */ |
---|
| 1672 | + for (i = 0; i < N_CS_GPR; i++) |
---|
| 1673 | + cs = save_restore_register( |
---|
| 1674 | + stream, cs, true /* save */, CS_GPR(i), |
---|
| 1675 | + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); |
---|
| 1676 | + cs = save_restore_register( |
---|
| 1677 | + stream, cs, true /* save */, MI_PREDICATE_RESULT_1, |
---|
| 1678 | + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); |
---|
| 1679 | + |
---|
| 1680 | + /* First timestamp snapshot location. */ |
---|
| 1681 | + ts0 = cs; |
---|
| 1682 | + |
---|
| 1683 | + /* |
---|
| 1684 | + * Initial snapshot of the timestamp register to implement the wait. |
---|
| 1685 | + * We work with 32b values, so clear out the top 32b bits of the |
---|
| 1686 | + * register because the ALU works 64bits. |
---|
| 1687 | + */ |
---|
| 1688 | + *cs++ = MI_LOAD_REGISTER_IMM(1); |
---|
| 1689 | + *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4; |
---|
| 1690 | + *cs++ = 0; |
---|
| 1691 | + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); |
---|
| 1692 | + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base)); |
---|
| 1693 | + *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)); |
---|
| 1694 | + |
---|
| 1695 | + /* |
---|
| 1696 | + * This is the location we're going to jump back into until the |
---|
| 1697 | + * required amount of time has passed. |
---|
| 1698 | + */ |
---|
| 1699 | + jump = cs; |
---|
| 1700 | + |
---|
| 1701 | + /* |
---|
| 1702 | + * Take another snapshot of the timestamp register. Take care to clear |
---|
| 1703 | + * up the top 32bits of CS_GPR(1) as we're using it for other |
---|
| 1704 | + * operations below. |
---|
| 1705 | + */ |
---|
| 1706 | + *cs++ = MI_LOAD_REGISTER_IMM(1); |
---|
| 1707 | + *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4; |
---|
| 1708 | + *cs++ = 0; |
---|
| 1709 | + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); |
---|
| 1710 | + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base)); |
---|
| 1711 | + *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)); |
---|
| 1712 | + |
---|
| 1713 | + /* |
---|
| 1714 | + * Do a diff between the 2 timestamps and store the result back into |
---|
| 1715 | + * CS_GPR(1). |
---|
| 1716 | + */ |
---|
| 1717 | + *cs++ = MI_MATH(5); |
---|
| 1718 | + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS)); |
---|
| 1719 | + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS)); |
---|
| 1720 | + *cs++ = MI_MATH_SUB; |
---|
| 1721 | + *cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU); |
---|
| 1722 | + *cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF); |
---|
| 1723 | + |
---|
| 1724 | + /* |
---|
| 1725 | + * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the |
---|
| 1726 | + * timestamp have rolled over the 32bits) into the predicate register |
---|
| 1727 | + * to be used for the predicated jump. |
---|
| 1728 | + */ |
---|
| 1729 | + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); |
---|
| 1730 | + *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)); |
---|
| 1731 | + *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1); |
---|
| 1732 | + |
---|
| 1733 | + /* Restart from the beginning if we had timestamps roll over. */ |
---|
| 1734 | + *cs++ = (INTEL_GEN(i915) < 8 ? |
---|
| 1735 | + MI_BATCH_BUFFER_START : |
---|
| 1736 | + MI_BATCH_BUFFER_START_GEN8) | |
---|
| 1737 | + MI_BATCH_PREDICATE; |
---|
| 1738 | + *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4; |
---|
| 1739 | + *cs++ = 0; |
---|
| 1740 | + |
---|
| 1741 | + /* |
---|
| 1742 | + * Now add the diff between to previous timestamps and add it to : |
---|
| 1743 | + * (((1 * << 64) - 1) - delay_ns) |
---|
| 1744 | + * |
---|
| 1745 | + * When the Carry Flag contains 1 this means the elapsed time is |
---|
| 1746 | + * longer than the expected delay, and we can exit the wait loop. |
---|
| 1747 | + */ |
---|
| 1748 | + *cs++ = MI_LOAD_REGISTER_IMM(2); |
---|
| 1749 | + *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)); |
---|
| 1750 | + *cs++ = lower_32_bits(delay_ticks); |
---|
| 1751 | + *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4; |
---|
| 1752 | + *cs++ = upper_32_bits(delay_ticks); |
---|
| 1753 | + |
---|
| 1754 | + *cs++ = MI_MATH(4); |
---|
| 1755 | + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS)); |
---|
| 1756 | + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET)); |
---|
| 1757 | + *cs++ = MI_MATH_ADD; |
---|
| 1758 | + *cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF); |
---|
| 1759 | + |
---|
| 1760 | + *cs++ = MI_ARB_CHECK; |
---|
| 1761 | + |
---|
| 1762 | + /* |
---|
| 1763 | + * Transfer the result into the predicate register to be used for the |
---|
| 1764 | + * predicated jump. |
---|
| 1765 | + */ |
---|
| 1766 | + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); |
---|
| 1767 | + *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)); |
---|
| 1768 | + *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1); |
---|
| 1769 | + |
---|
| 1770 | + /* Predicate the jump. */ |
---|
| 1771 | + *cs++ = (INTEL_GEN(i915) < 8 ? |
---|
| 1772 | + MI_BATCH_BUFFER_START : |
---|
| 1773 | + MI_BATCH_BUFFER_START_GEN8) | |
---|
| 1774 | + MI_BATCH_PREDICATE; |
---|
| 1775 | + *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4; |
---|
| 1776 | + *cs++ = 0; |
---|
| 1777 | + |
---|
| 1778 | + /* Restore registers. */ |
---|
| 1779 | + for (i = 0; i < N_CS_GPR; i++) |
---|
| 1780 | + cs = save_restore_register( |
---|
| 1781 | + stream, cs, false /* restore */, CS_GPR(i), |
---|
| 1782 | + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); |
---|
| 1783 | + cs = save_restore_register( |
---|
| 1784 | + stream, cs, false /* restore */, MI_PREDICATE_RESULT_1, |
---|
| 1785 | + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); |
---|
| 1786 | + |
---|
| 1787 | + /* And return to the ring. */ |
---|
| 1788 | + *cs++ = MI_BATCH_BUFFER_END; |
---|
| 1789 | + |
---|
| 1790 | + GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch)); |
---|
| 1791 | + |
---|
| 1792 | + i915_gem_object_flush_map(bo); |
---|
| 1793 | + __i915_gem_object_release_map(bo); |
---|
| 1794 | + |
---|
| 1795 | + stream->noa_wait = vma; |
---|
| 1796 | + return 0; |
---|
| 1797 | + |
---|
| 1798 | +err_unpin: |
---|
| 1799 | + i915_vma_unpin_and_release(&vma, 0); |
---|
| 1800 | +err_unref: |
---|
| 1801 | + i915_gem_object_put(bo); |
---|
| 1802 | + return ret; |
---|
| 1803 | +} |
---|
| 1804 | + |
---|
| 1805 | +static u32 *write_cs_mi_lri(u32 *cs, |
---|
| 1806 | + const struct i915_oa_reg *reg_data, |
---|
| 1807 | + u32 n_regs) |
---|
1557 | 1808 | { |
---|
1558 | 1809 | u32 i; |
---|
1559 | 1810 | |
---|
1560 | 1811 | for (i = 0; i < n_regs; i++) { |
---|
1561 | | - const struct i915_oa_reg *reg = regs + i; |
---|
| 1812 | + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { |
---|
| 1813 | + u32 n_lri = min_t(u32, |
---|
| 1814 | + n_regs - i, |
---|
| 1815 | + MI_LOAD_REGISTER_IMM_MAX_REGS); |
---|
1562 | 1816 | |
---|
1563 | | - I915_WRITE(reg->addr, reg->value); |
---|
| 1817 | + *cs++ = MI_LOAD_REGISTER_IMM(n_lri); |
---|
| 1818 | + } |
---|
| 1819 | + *cs++ = i915_mmio_reg_offset(reg_data[i].addr); |
---|
| 1820 | + *cs++ = reg_data[i].value; |
---|
1564 | 1821 | } |
---|
| 1822 | + |
---|
| 1823 | + return cs; |
---|
1565 | 1824 | } |
---|
1566 | 1825 | |
---|
1567 | | -static int hsw_enable_metric_set(struct drm_i915_private *dev_priv, |
---|
1568 | | - const struct i915_oa_config *oa_config) |
---|
| 1826 | +static int num_lri_dwords(int num_regs) |
---|
1569 | 1827 | { |
---|
1570 | | - /* PRM: |
---|
| 1828 | + int count = 0; |
---|
| 1829 | + |
---|
| 1830 | + if (num_regs > 0) { |
---|
| 1831 | + count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS); |
---|
| 1832 | + count += num_regs * 2; |
---|
| 1833 | + } |
---|
| 1834 | + |
---|
| 1835 | + return count; |
---|
| 1836 | +} |
---|
| 1837 | + |
---|
| 1838 | +static struct i915_oa_config_bo * |
---|
| 1839 | +alloc_oa_config_buffer(struct i915_perf_stream *stream, |
---|
| 1840 | + struct i915_oa_config *oa_config) |
---|
| 1841 | +{ |
---|
| 1842 | + struct drm_i915_gem_object *obj; |
---|
| 1843 | + struct i915_oa_config_bo *oa_bo; |
---|
| 1844 | + size_t config_length = 0; |
---|
| 1845 | + u32 *cs; |
---|
| 1846 | + int err; |
---|
| 1847 | + |
---|
| 1848 | + oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL); |
---|
| 1849 | + if (!oa_bo) |
---|
| 1850 | + return ERR_PTR(-ENOMEM); |
---|
| 1851 | + |
---|
| 1852 | + config_length += num_lri_dwords(oa_config->mux_regs_len); |
---|
| 1853 | + config_length += num_lri_dwords(oa_config->b_counter_regs_len); |
---|
| 1854 | + config_length += num_lri_dwords(oa_config->flex_regs_len); |
---|
| 1855 | + config_length += 3; /* MI_BATCH_BUFFER_START */ |
---|
| 1856 | + config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE); |
---|
| 1857 | + |
---|
| 1858 | + obj = i915_gem_object_create_shmem(stream->perf->i915, config_length); |
---|
| 1859 | + if (IS_ERR(obj)) { |
---|
| 1860 | + err = PTR_ERR(obj); |
---|
| 1861 | + goto err_free; |
---|
| 1862 | + } |
---|
| 1863 | + |
---|
| 1864 | + cs = i915_gem_object_pin_map(obj, I915_MAP_WB); |
---|
| 1865 | + if (IS_ERR(cs)) { |
---|
| 1866 | + err = PTR_ERR(cs); |
---|
| 1867 | + goto err_oa_bo; |
---|
| 1868 | + } |
---|
| 1869 | + |
---|
| 1870 | + cs = write_cs_mi_lri(cs, |
---|
| 1871 | + oa_config->mux_regs, |
---|
| 1872 | + oa_config->mux_regs_len); |
---|
| 1873 | + cs = write_cs_mi_lri(cs, |
---|
| 1874 | + oa_config->b_counter_regs, |
---|
| 1875 | + oa_config->b_counter_regs_len); |
---|
| 1876 | + cs = write_cs_mi_lri(cs, |
---|
| 1877 | + oa_config->flex_regs, |
---|
| 1878 | + oa_config->flex_regs_len); |
---|
| 1879 | + |
---|
| 1880 | + /* Jump into the active wait. */ |
---|
| 1881 | + *cs++ = (INTEL_GEN(stream->perf->i915) < 8 ? |
---|
| 1882 | + MI_BATCH_BUFFER_START : |
---|
| 1883 | + MI_BATCH_BUFFER_START_GEN8); |
---|
| 1884 | + *cs++ = i915_ggtt_offset(stream->noa_wait); |
---|
| 1885 | + *cs++ = 0; |
---|
| 1886 | + |
---|
| 1887 | + i915_gem_object_flush_map(obj); |
---|
| 1888 | + __i915_gem_object_release_map(obj); |
---|
| 1889 | + |
---|
| 1890 | + oa_bo->vma = i915_vma_instance(obj, |
---|
| 1891 | + &stream->engine->gt->ggtt->vm, |
---|
| 1892 | + NULL); |
---|
| 1893 | + if (IS_ERR(oa_bo->vma)) { |
---|
| 1894 | + err = PTR_ERR(oa_bo->vma); |
---|
| 1895 | + goto err_oa_bo; |
---|
| 1896 | + } |
---|
| 1897 | + |
---|
| 1898 | + oa_bo->oa_config = i915_oa_config_get(oa_config); |
---|
| 1899 | + llist_add(&oa_bo->node, &stream->oa_config_bos); |
---|
| 1900 | + |
---|
| 1901 | + return oa_bo; |
---|
| 1902 | + |
---|
| 1903 | +err_oa_bo: |
---|
| 1904 | + i915_gem_object_put(obj); |
---|
| 1905 | +err_free: |
---|
| 1906 | + kfree(oa_bo); |
---|
| 1907 | + return ERR_PTR(err); |
---|
| 1908 | +} |
---|
| 1909 | + |
---|
| 1910 | +static struct i915_vma * |
---|
| 1911 | +get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) |
---|
| 1912 | +{ |
---|
| 1913 | + struct i915_oa_config_bo *oa_bo; |
---|
| 1914 | + |
---|
| 1915 | + /* |
---|
| 1916 | + * Look for the buffer in the already allocated BOs attached |
---|
| 1917 | + * to the stream. |
---|
| 1918 | + */ |
---|
| 1919 | + llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { |
---|
| 1920 | + if (oa_bo->oa_config == oa_config && |
---|
| 1921 | + memcmp(oa_bo->oa_config->uuid, |
---|
| 1922 | + oa_config->uuid, |
---|
| 1923 | + sizeof(oa_config->uuid)) == 0) |
---|
| 1924 | + goto out; |
---|
| 1925 | + } |
---|
| 1926 | + |
---|
| 1927 | + oa_bo = alloc_oa_config_buffer(stream, oa_config); |
---|
| 1928 | + if (IS_ERR(oa_bo)) |
---|
| 1929 | + return ERR_CAST(oa_bo); |
---|
| 1930 | + |
---|
| 1931 | +out: |
---|
| 1932 | + return i915_vma_get(oa_bo->vma); |
---|
| 1933 | +} |
---|
| 1934 | + |
---|
| 1935 | +static int |
---|
| 1936 | +emit_oa_config(struct i915_perf_stream *stream, |
---|
| 1937 | + struct i915_oa_config *oa_config, |
---|
| 1938 | + struct intel_context *ce, |
---|
| 1939 | + struct i915_active *active) |
---|
| 1940 | +{ |
---|
| 1941 | + struct i915_request *rq; |
---|
| 1942 | + struct i915_vma *vma; |
---|
| 1943 | + struct i915_gem_ww_ctx ww; |
---|
| 1944 | + int err; |
---|
| 1945 | + |
---|
| 1946 | + vma = get_oa_vma(stream, oa_config); |
---|
| 1947 | + if (IS_ERR(vma)) |
---|
| 1948 | + return PTR_ERR(vma); |
---|
| 1949 | + |
---|
| 1950 | + i915_gem_ww_ctx_init(&ww, true); |
---|
| 1951 | +retry: |
---|
| 1952 | + err = i915_gem_object_lock(vma->obj, &ww); |
---|
| 1953 | + if (err) |
---|
| 1954 | + goto err; |
---|
| 1955 | + |
---|
| 1956 | + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH); |
---|
| 1957 | + if (err) |
---|
| 1958 | + goto err; |
---|
| 1959 | + |
---|
| 1960 | + intel_engine_pm_get(ce->engine); |
---|
| 1961 | + rq = i915_request_create(ce); |
---|
| 1962 | + intel_engine_pm_put(ce->engine); |
---|
| 1963 | + if (IS_ERR(rq)) { |
---|
| 1964 | + err = PTR_ERR(rq); |
---|
| 1965 | + goto err_vma_unpin; |
---|
| 1966 | + } |
---|
| 1967 | + |
---|
| 1968 | + if (!IS_ERR_OR_NULL(active)) { |
---|
| 1969 | + /* After all individual context modifications */ |
---|
| 1970 | + err = i915_request_await_active(rq, active, |
---|
| 1971 | + I915_ACTIVE_AWAIT_ACTIVE); |
---|
| 1972 | + if (err) |
---|
| 1973 | + goto err_add_request; |
---|
| 1974 | + |
---|
| 1975 | + err = i915_active_add_request(active, rq); |
---|
| 1976 | + if (err) |
---|
| 1977 | + goto err_add_request; |
---|
| 1978 | + } |
---|
| 1979 | + |
---|
| 1980 | + err = i915_request_await_object(rq, vma->obj, 0); |
---|
| 1981 | + if (!err) |
---|
| 1982 | + err = i915_vma_move_to_active(vma, rq, 0); |
---|
| 1983 | + if (err) |
---|
| 1984 | + goto err_add_request; |
---|
| 1985 | + |
---|
| 1986 | + err = rq->engine->emit_bb_start(rq, |
---|
| 1987 | + vma->node.start, 0, |
---|
| 1988 | + I915_DISPATCH_SECURE); |
---|
| 1989 | + if (err) |
---|
| 1990 | + goto err_add_request; |
---|
| 1991 | + |
---|
| 1992 | +err_add_request: |
---|
| 1993 | + i915_request_add(rq); |
---|
| 1994 | +err_vma_unpin: |
---|
| 1995 | + i915_vma_unpin(vma); |
---|
| 1996 | +err: |
---|
| 1997 | + if (err == -EDEADLK) { |
---|
| 1998 | + err = i915_gem_ww_ctx_backoff(&ww); |
---|
| 1999 | + if (!err) |
---|
| 2000 | + goto retry; |
---|
| 2001 | + } |
---|
| 2002 | + |
---|
| 2003 | + i915_gem_ww_ctx_fini(&ww); |
---|
| 2004 | + i915_vma_put(vma); |
---|
| 2005 | + return err; |
---|
| 2006 | +} |
---|
| 2007 | + |
---|
| 2008 | +static struct intel_context *oa_context(struct i915_perf_stream *stream) |
---|
| 2009 | +{ |
---|
| 2010 | + return stream->pinned_ctx ?: stream->engine->kernel_context; |
---|
| 2011 | +} |
---|
| 2012 | + |
---|
| 2013 | +static int |
---|
| 2014 | +hsw_enable_metric_set(struct i915_perf_stream *stream, |
---|
| 2015 | + struct i915_active *active) |
---|
| 2016 | +{ |
---|
| 2017 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 2018 | + |
---|
| 2019 | + /* |
---|
| 2020 | + * PRM: |
---|
1571 | 2021 | * |
---|
1572 | 2022 | * OA unit is using “crclk” for its functionality. When trunk |
---|
1573 | 2023 | * level clock gating takes place, OA clock would be gated, |
---|
.. | .. |
---|
1576 | 2026 | * count the events from non-render domain. Unit level clock |
---|
1577 | 2027 | * gating for RCS should also be disabled. |
---|
1578 | 2028 | */ |
---|
1579 | | - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & |
---|
1580 | | - ~GEN7_DOP_CLOCK_GATE_ENABLE)); |
---|
1581 | | - I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | |
---|
1582 | | - GEN6_CSUNIT_CLOCK_GATE_DISABLE)); |
---|
| 2029 | + intel_uncore_rmw(uncore, GEN7_MISCCPCTL, |
---|
| 2030 | + GEN7_DOP_CLOCK_GATE_ENABLE, 0); |
---|
| 2031 | + intel_uncore_rmw(uncore, GEN6_UCGCTL1, |
---|
| 2032 | + 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); |
---|
1583 | 2033 | |
---|
1584 | | - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); |
---|
| 2034 | + return emit_oa_config(stream, |
---|
| 2035 | + stream->oa_config, oa_context(stream), |
---|
| 2036 | + active); |
---|
| 2037 | +} |
---|
1585 | 2038 | |
---|
1586 | | - /* It apparently takes a fairly long time for a new MUX |
---|
1587 | | - * configuration to be be applied after these register writes. |
---|
1588 | | - * This delay duration was derived empirically based on the |
---|
1589 | | - * render_basic config but hopefully it covers the maximum |
---|
1590 | | - * configuration latency. |
---|
1591 | | - * |
---|
1592 | | - * As a fallback, the checks in _append_oa_reports() to skip |
---|
1593 | | - * invalid OA reports do also seem to work to discard reports |
---|
1594 | | - * generated before this config has completed - albeit not |
---|
1595 | | - * silently. |
---|
1596 | | - * |
---|
1597 | | - * Unfortunately this is essentially a magic number, since we |
---|
1598 | | - * don't currently know of a reliable mechanism for predicting |
---|
1599 | | - * how long the MUX config will take to apply and besides |
---|
1600 | | - * seeing invalid reports we don't know of a reliable way to |
---|
1601 | | - * explicitly check that the MUX config has landed. |
---|
1602 | | - * |
---|
1603 | | - * It's even possible we've miss characterized the underlying |
---|
1604 | | - * problem - it just seems like the simplest explanation why |
---|
1605 | | - * a delay at this location would mitigate any invalid reports. |
---|
| 2039 | +static void hsw_disable_metric_set(struct i915_perf_stream *stream) |
---|
| 2040 | +{ |
---|
| 2041 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 2042 | + |
---|
| 2043 | + intel_uncore_rmw(uncore, GEN6_UCGCTL1, |
---|
| 2044 | + GEN6_CSUNIT_CLOCK_GATE_DISABLE, 0); |
---|
| 2045 | + intel_uncore_rmw(uncore, GEN7_MISCCPCTL, |
---|
| 2046 | + 0, GEN7_DOP_CLOCK_GATE_ENABLE); |
---|
| 2047 | + |
---|
| 2048 | + intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); |
---|
| 2049 | +} |
---|
| 2050 | + |
---|
| 2051 | +static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config, |
---|
| 2052 | + i915_reg_t reg) |
---|
| 2053 | +{ |
---|
| 2054 | + u32 mmio = i915_mmio_reg_offset(reg); |
---|
| 2055 | + int i; |
---|
| 2056 | + |
---|
| 2057 | + /* |
---|
| 2058 | + * This arbitrary default will select the 'EU FPU0 Pipeline |
---|
| 2059 | + * Active' event. In the future it's anticipated that there |
---|
| 2060 | + * will be an explicit 'No Event' we can select, but not yet... |
---|
1606 | 2061 | */ |
---|
1607 | | - usleep_range(15000, 20000); |
---|
| 2062 | + if (!oa_config) |
---|
| 2063 | + return 0; |
---|
1608 | 2064 | |
---|
1609 | | - config_oa_regs(dev_priv, oa_config->b_counter_regs, |
---|
1610 | | - oa_config->b_counter_regs_len); |
---|
| 2065 | + for (i = 0; i < oa_config->flex_regs_len; i++) { |
---|
| 2066 | + if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio) |
---|
| 2067 | + return oa_config->flex_regs[i].value; |
---|
| 2068 | + } |
---|
1611 | 2069 | |
---|
1612 | 2070 | return 0; |
---|
1613 | 2071 | } |
---|
1614 | | - |
---|
1615 | | -static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) |
---|
1616 | | -{ |
---|
1617 | | - I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) & |
---|
1618 | | - ~GEN6_CSUNIT_CLOCK_GATE_DISABLE)); |
---|
1619 | | - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) | |
---|
1620 | | - GEN7_DOP_CLOCK_GATE_ENABLE)); |
---|
1621 | | - |
---|
1622 | | - I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & |
---|
1623 | | - ~GT_NOA_ENABLE)); |
---|
1624 | | -} |
---|
1625 | | - |
---|
1626 | 2072 | /* |
---|
1627 | 2073 | * NB: It must always remain pointer safe to run this even if the OA unit |
---|
1628 | 2074 | * has been disabled. |
---|
.. | .. |
---|
1630 | 2076 | * It's fine to put out-of-date values into these per-context registers |
---|
1631 | 2077 | * in the case that the OA unit has been disabled. |
---|
1632 | 2078 | */ |
---|
1633 | | -static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, |
---|
1634 | | - u32 *reg_state, |
---|
1635 | | - const struct i915_oa_config *oa_config) |
---|
| 2079 | +static void |
---|
| 2080 | +gen8_update_reg_state_unlocked(const struct intel_context *ce, |
---|
| 2081 | + const struct i915_perf_stream *stream) |
---|
1636 | 2082 | { |
---|
1637 | | - struct drm_i915_private *dev_priv = ctx->i915; |
---|
1638 | | - u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; |
---|
1639 | | - u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; |
---|
| 2083 | + u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset; |
---|
| 2084 | + u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; |
---|
1640 | 2085 | /* The MMIO offsets for Flex EU registers aren't contiguous */ |
---|
1641 | | - u32 flex_mmio[] = { |
---|
1642 | | - i915_mmio_reg_offset(EU_PERF_CNTL0), |
---|
1643 | | - i915_mmio_reg_offset(EU_PERF_CNTL1), |
---|
1644 | | - i915_mmio_reg_offset(EU_PERF_CNTL2), |
---|
1645 | | - i915_mmio_reg_offset(EU_PERF_CNTL3), |
---|
1646 | | - i915_mmio_reg_offset(EU_PERF_CNTL4), |
---|
1647 | | - i915_mmio_reg_offset(EU_PERF_CNTL5), |
---|
1648 | | - i915_mmio_reg_offset(EU_PERF_CNTL6), |
---|
| 2086 | + i915_reg_t flex_regs[] = { |
---|
| 2087 | + EU_PERF_CNTL0, |
---|
| 2088 | + EU_PERF_CNTL1, |
---|
| 2089 | + EU_PERF_CNTL2, |
---|
| 2090 | + EU_PERF_CNTL3, |
---|
| 2091 | + EU_PERF_CNTL4, |
---|
| 2092 | + EU_PERF_CNTL5, |
---|
| 2093 | + EU_PERF_CNTL6, |
---|
1649 | 2094 | }; |
---|
| 2095 | + u32 *reg_state = ce->lrc_reg_state; |
---|
1650 | 2096 | int i; |
---|
1651 | 2097 | |
---|
1652 | | - reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL); |
---|
1653 | | - reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent << |
---|
1654 | | - GEN8_OA_TIMER_PERIOD_SHIFT) | |
---|
1655 | | - (dev_priv->perf.oa.periodic ? |
---|
1656 | | - GEN8_OA_TIMER_ENABLE : 0) | |
---|
1657 | | - GEN8_OA_COUNTER_RESUME; |
---|
| 2098 | + reg_state[ctx_oactxctrl + 1] = |
---|
| 2099 | + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | |
---|
| 2100 | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | |
---|
| 2101 | + GEN8_OA_COUNTER_RESUME; |
---|
1658 | 2102 | |
---|
1659 | | - for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) { |
---|
1660 | | - u32 state_offset = ctx_flexeu0 + i * 2; |
---|
1661 | | - u32 mmio = flex_mmio[i]; |
---|
1662 | | - |
---|
1663 | | - /* |
---|
1664 | | - * This arbitrary default will select the 'EU FPU0 Pipeline |
---|
1665 | | - * Active' event. In the future it's anticipated that there |
---|
1666 | | - * will be an explicit 'No Event' we can select, but not yet... |
---|
1667 | | - */ |
---|
1668 | | - u32 value = 0; |
---|
1669 | | - |
---|
1670 | | - if (oa_config) { |
---|
1671 | | - u32 j; |
---|
1672 | | - |
---|
1673 | | - for (j = 0; j < oa_config->flex_regs_len; j++) { |
---|
1674 | | - if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) { |
---|
1675 | | - value = oa_config->flex_regs[j].value; |
---|
1676 | | - break; |
---|
1677 | | - } |
---|
1678 | | - } |
---|
1679 | | - } |
---|
1680 | | - |
---|
1681 | | - reg_state[state_offset] = mmio; |
---|
1682 | | - reg_state[state_offset+1] = value; |
---|
1683 | | - } |
---|
| 2103 | + for (i = 0; i < ARRAY_SIZE(flex_regs); i++) |
---|
| 2104 | + reg_state[ctx_flexeu0 + i * 2 + 1] = |
---|
| 2105 | + oa_config_flex_reg(stream->oa_config, flex_regs[i]); |
---|
1684 | 2106 | } |
---|
1685 | 2107 | |
---|
1686 | | -/* |
---|
1687 | | - * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This |
---|
1688 | | - * is only used by the kernel context. |
---|
1689 | | - */ |
---|
1690 | | -static int gen8_emit_oa_config(struct i915_request *rq, |
---|
1691 | | - const struct i915_oa_config *oa_config) |
---|
1692 | | -{ |
---|
1693 | | - struct drm_i915_private *dev_priv = rq->i915; |
---|
1694 | | - /* The MMIO offsets for Flex EU registers aren't contiguous */ |
---|
1695 | | - u32 flex_mmio[] = { |
---|
1696 | | - i915_mmio_reg_offset(EU_PERF_CNTL0), |
---|
1697 | | - i915_mmio_reg_offset(EU_PERF_CNTL1), |
---|
1698 | | - i915_mmio_reg_offset(EU_PERF_CNTL2), |
---|
1699 | | - i915_mmio_reg_offset(EU_PERF_CNTL3), |
---|
1700 | | - i915_mmio_reg_offset(EU_PERF_CNTL4), |
---|
1701 | | - i915_mmio_reg_offset(EU_PERF_CNTL5), |
---|
1702 | | - i915_mmio_reg_offset(EU_PERF_CNTL6), |
---|
1703 | | - }; |
---|
1704 | | - u32 *cs; |
---|
1705 | | - int i; |
---|
| 2108 | +struct flex { |
---|
| 2109 | + i915_reg_t reg; |
---|
| 2110 | + u32 offset; |
---|
| 2111 | + u32 value; |
---|
| 2112 | +}; |
---|
1706 | 2113 | |
---|
1707 | | - cs = intel_ring_begin(rq, ARRAY_SIZE(flex_mmio) * 2 + 4); |
---|
| 2114 | +static int |
---|
| 2115 | +gen8_store_flex(struct i915_request *rq, |
---|
| 2116 | + struct intel_context *ce, |
---|
| 2117 | + const struct flex *flex, unsigned int count) |
---|
| 2118 | +{ |
---|
| 2119 | + u32 offset; |
---|
| 2120 | + u32 *cs; |
---|
| 2121 | + |
---|
| 2122 | + cs = intel_ring_begin(rq, 4 * count); |
---|
1708 | 2123 | if (IS_ERR(cs)) |
---|
1709 | 2124 | return PTR_ERR(cs); |
---|
1710 | 2125 | |
---|
1711 | | - *cs++ = MI_LOAD_REGISTER_IMM(ARRAY_SIZE(flex_mmio) + 1); |
---|
| 2126 | + offset = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET; |
---|
| 2127 | + do { |
---|
| 2128 | + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; |
---|
| 2129 | + *cs++ = offset + flex->offset * sizeof(u32); |
---|
| 2130 | + *cs++ = 0; |
---|
| 2131 | + *cs++ = flex->value; |
---|
| 2132 | + } while (flex++, --count); |
---|
1712 | 2133 | |
---|
1713 | | - *cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL); |
---|
1714 | | - *cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | |
---|
1715 | | - (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | |
---|
1716 | | - GEN8_OA_COUNTER_RESUME; |
---|
1717 | | - |
---|
1718 | | - for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) { |
---|
1719 | | - u32 mmio = flex_mmio[i]; |
---|
1720 | | - |
---|
1721 | | - /* |
---|
1722 | | - * This arbitrary default will select the 'EU FPU0 Pipeline |
---|
1723 | | - * Active' event. In the future it's anticipated that there |
---|
1724 | | - * will be an explicit 'No Event' we can select, but not |
---|
1725 | | - * yet... |
---|
1726 | | - */ |
---|
1727 | | - u32 value = 0; |
---|
1728 | | - |
---|
1729 | | - if (oa_config) { |
---|
1730 | | - u32 j; |
---|
1731 | | - |
---|
1732 | | - for (j = 0; j < oa_config->flex_regs_len; j++) { |
---|
1733 | | - if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) { |
---|
1734 | | - value = oa_config->flex_regs[j].value; |
---|
1735 | | - break; |
---|
1736 | | - } |
---|
1737 | | - } |
---|
1738 | | - } |
---|
1739 | | - |
---|
1740 | | - *cs++ = mmio; |
---|
1741 | | - *cs++ = value; |
---|
1742 | | - } |
---|
1743 | | - |
---|
1744 | | - *cs++ = MI_NOOP; |
---|
1745 | 2134 | intel_ring_advance(rq, cs); |
---|
1746 | 2135 | |
---|
1747 | 2136 | return 0; |
---|
1748 | 2137 | } |
---|
1749 | 2138 | |
---|
1750 | | -static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv, |
---|
1751 | | - const struct i915_oa_config *oa_config) |
---|
| 2139 | +static int |
---|
| 2140 | +gen8_load_flex(struct i915_request *rq, |
---|
| 2141 | + struct intel_context *ce, |
---|
| 2142 | + const struct flex *flex, unsigned int count) |
---|
1752 | 2143 | { |
---|
1753 | | - struct intel_engine_cs *engine = dev_priv->engine[RCS]; |
---|
1754 | | - struct i915_timeline *timeline; |
---|
| 2144 | + u32 *cs; |
---|
| 2145 | + |
---|
| 2146 | + GEM_BUG_ON(!count || count > 63); |
---|
| 2147 | + |
---|
| 2148 | + cs = intel_ring_begin(rq, 2 * count + 2); |
---|
| 2149 | + if (IS_ERR(cs)) |
---|
| 2150 | + return PTR_ERR(cs); |
---|
| 2151 | + |
---|
| 2152 | + *cs++ = MI_LOAD_REGISTER_IMM(count); |
---|
| 2153 | + do { |
---|
| 2154 | + *cs++ = i915_mmio_reg_offset(flex->reg); |
---|
| 2155 | + *cs++ = flex->value; |
---|
| 2156 | + } while (flex++, --count); |
---|
| 2157 | + *cs++ = MI_NOOP; |
---|
| 2158 | + |
---|
| 2159 | + intel_ring_advance(rq, cs); |
---|
| 2160 | + |
---|
| 2161 | + return 0; |
---|
| 2162 | +} |
---|
| 2163 | + |
---|
| 2164 | +static int gen8_modify_context(struct intel_context *ce, |
---|
| 2165 | + const struct flex *flex, unsigned int count) |
---|
| 2166 | +{ |
---|
1755 | 2167 | struct i915_request *rq; |
---|
1756 | | - int ret; |
---|
| 2168 | + int err; |
---|
1757 | 2169 | |
---|
1758 | | - lockdep_assert_held(&dev_priv->drm.struct_mutex); |
---|
1759 | | - |
---|
1760 | | - i915_retire_requests(dev_priv); |
---|
1761 | | - |
---|
1762 | | - rq = i915_request_alloc(engine, dev_priv->kernel_context); |
---|
| 2170 | + rq = intel_engine_create_kernel_request(ce->engine); |
---|
1763 | 2171 | if (IS_ERR(rq)) |
---|
1764 | 2172 | return PTR_ERR(rq); |
---|
1765 | 2173 | |
---|
1766 | | - ret = gen8_emit_oa_config(rq, oa_config); |
---|
1767 | | - if (ret) { |
---|
1768 | | - i915_request_add(rq); |
---|
1769 | | - return ret; |
---|
1770 | | - } |
---|
1771 | | - |
---|
1772 | | - /* Queue this switch after all other activity */ |
---|
1773 | | - list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { |
---|
1774 | | - struct i915_request *prev; |
---|
1775 | | - |
---|
1776 | | - prev = i915_gem_active_raw(&timeline->last_request, |
---|
1777 | | - &dev_priv->drm.struct_mutex); |
---|
1778 | | - if (prev) |
---|
1779 | | - i915_request_await_dma_fence(rq, &prev->fence); |
---|
1780 | | - } |
---|
| 2174 | + /* Serialise with the remote context */ |
---|
| 2175 | + err = intel_context_prepare_remote_request(ce, rq); |
---|
| 2176 | + if (err == 0) |
---|
| 2177 | + err = gen8_store_flex(rq, ce, flex, count); |
---|
1781 | 2178 | |
---|
1782 | 2179 | i915_request_add(rq); |
---|
| 2180 | + return err; |
---|
| 2181 | +} |
---|
1783 | 2182 | |
---|
1784 | | - return 0; |
---|
| 2183 | +static int |
---|
| 2184 | +gen8_modify_self(struct intel_context *ce, |
---|
| 2185 | + const struct flex *flex, unsigned int count, |
---|
| 2186 | + struct i915_active *active) |
---|
| 2187 | +{ |
---|
| 2188 | + struct i915_request *rq; |
---|
| 2189 | + int err; |
---|
| 2190 | + |
---|
| 2191 | + intel_engine_pm_get(ce->engine); |
---|
| 2192 | + rq = i915_request_create(ce); |
---|
| 2193 | + intel_engine_pm_put(ce->engine); |
---|
| 2194 | + if (IS_ERR(rq)) |
---|
| 2195 | + return PTR_ERR(rq); |
---|
| 2196 | + |
---|
| 2197 | + if (!IS_ERR_OR_NULL(active)) { |
---|
| 2198 | + err = i915_active_add_request(active, rq); |
---|
| 2199 | + if (err) |
---|
| 2200 | + goto err_add_request; |
---|
| 2201 | + } |
---|
| 2202 | + |
---|
| 2203 | + err = gen8_load_flex(rq, ce, flex, count); |
---|
| 2204 | + if (err) |
---|
| 2205 | + goto err_add_request; |
---|
| 2206 | + |
---|
| 2207 | +err_add_request: |
---|
| 2208 | + i915_request_add(rq); |
---|
| 2209 | + return err; |
---|
| 2210 | +} |
---|
| 2211 | + |
---|
| 2212 | +static int gen8_configure_context(struct i915_gem_context *ctx, |
---|
| 2213 | + struct flex *flex, unsigned int count) |
---|
| 2214 | +{ |
---|
| 2215 | + struct i915_gem_engines_iter it; |
---|
| 2216 | + struct intel_context *ce; |
---|
| 2217 | + int err = 0; |
---|
| 2218 | + |
---|
| 2219 | + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { |
---|
| 2220 | + GEM_BUG_ON(ce == ce->engine->kernel_context); |
---|
| 2221 | + |
---|
| 2222 | + if (ce->engine->class != RENDER_CLASS) |
---|
| 2223 | + continue; |
---|
| 2224 | + |
---|
| 2225 | + /* Otherwise OA settings will be set upon first use */ |
---|
| 2226 | + if (!intel_context_pin_if_active(ce)) |
---|
| 2227 | + continue; |
---|
| 2228 | + |
---|
| 2229 | + flex->value = intel_sseu_make_rpcs(ce->engine->gt, &ce->sseu); |
---|
| 2230 | + err = gen8_modify_context(ce, flex, count); |
---|
| 2231 | + |
---|
| 2232 | + intel_context_unpin(ce); |
---|
| 2233 | + if (err) |
---|
| 2234 | + break; |
---|
| 2235 | + } |
---|
| 2236 | + i915_gem_context_unlock_engines(ctx); |
---|
| 2237 | + |
---|
| 2238 | + return err; |
---|
| 2239 | +} |
---|
| 2240 | + |
---|
| 2241 | +static int gen12_configure_oar_context(struct i915_perf_stream *stream, |
---|
| 2242 | + struct i915_active *active) |
---|
| 2243 | +{ |
---|
| 2244 | + int err; |
---|
| 2245 | + struct intel_context *ce = stream->pinned_ctx; |
---|
| 2246 | + u32 format = stream->oa_buffer.format; |
---|
| 2247 | + struct flex regs_context[] = { |
---|
| 2248 | + { |
---|
| 2249 | + GEN8_OACTXCONTROL, |
---|
| 2250 | + stream->perf->ctx_oactxctrl_offset + 1, |
---|
| 2251 | + active ? GEN8_OA_COUNTER_RESUME : 0, |
---|
| 2252 | + }, |
---|
| 2253 | + }; |
---|
| 2254 | + /* Offsets in regs_lri are not used since this configuration is only |
---|
| 2255 | + * applied using LRI. Initialize the correct offsets for posterity. |
---|
| 2256 | + */ |
---|
| 2257 | +#define GEN12_OAR_OACONTROL_OFFSET 0x5B0 |
---|
| 2258 | + struct flex regs_lri[] = { |
---|
| 2259 | + { |
---|
| 2260 | + GEN12_OAR_OACONTROL, |
---|
| 2261 | + GEN12_OAR_OACONTROL_OFFSET + 1, |
---|
| 2262 | + (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | |
---|
| 2263 | + (active ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0) |
---|
| 2264 | + }, |
---|
| 2265 | + { |
---|
| 2266 | + RING_CONTEXT_CONTROL(ce->engine->mmio_base), |
---|
| 2267 | + CTX_CONTEXT_CONTROL, |
---|
| 2268 | + _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, |
---|
| 2269 | + active ? |
---|
| 2270 | + GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : |
---|
| 2271 | + 0) |
---|
| 2272 | + }, |
---|
| 2273 | + }; |
---|
| 2274 | + |
---|
| 2275 | + /* Modify the context image of pinned context with regs_context*/ |
---|
| 2276 | + err = intel_context_lock_pinned(ce); |
---|
| 2277 | + if (err) |
---|
| 2278 | + return err; |
---|
| 2279 | + |
---|
| 2280 | + err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context)); |
---|
| 2281 | + intel_context_unlock_pinned(ce); |
---|
| 2282 | + if (err) |
---|
| 2283 | + return err; |
---|
| 2284 | + |
---|
| 2285 | + /* Apply regs_lri using LRI with pinned context */ |
---|
| 2286 | + return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active); |
---|
1785 | 2287 | } |
---|
1786 | 2288 | |
---|
1787 | 2289 | /* |
---|
.. | .. |
---|
1807 | 2309 | * per-context OA state. |
---|
1808 | 2310 | * |
---|
1809 | 2311 | * Note: it's only the RCS/Render context that has any OA state. |
---|
| 2312 | + * Note: the first flex register passed must always be R_PWR_CLK_STATE |
---|
1810 | 2313 | */ |
---|
1811 | | -static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, |
---|
1812 | | - const struct i915_oa_config *oa_config) |
---|
| 2314 | +static int |
---|
| 2315 | +oa_configure_all_contexts(struct i915_perf_stream *stream, |
---|
| 2316 | + struct flex *regs, |
---|
| 2317 | + size_t num_regs, |
---|
| 2318 | + struct i915_active *active) |
---|
1813 | 2319 | { |
---|
1814 | | - struct intel_engine_cs *engine = dev_priv->engine[RCS]; |
---|
1815 | | - struct i915_gem_context *ctx; |
---|
1816 | | - int ret; |
---|
1817 | | - unsigned int wait_flags = I915_WAIT_LOCKED; |
---|
| 2320 | + struct drm_i915_private *i915 = stream->perf->i915; |
---|
| 2321 | + struct intel_engine_cs *engine; |
---|
| 2322 | + struct i915_gem_context *ctx, *cn; |
---|
| 2323 | + int err; |
---|
1818 | 2324 | |
---|
1819 | | - lockdep_assert_held(&dev_priv->drm.struct_mutex); |
---|
1820 | | - |
---|
1821 | | - /* Switch away from any user context. */ |
---|
1822 | | - ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config); |
---|
1823 | | - if (ret) |
---|
1824 | | - goto out; |
---|
| 2325 | + lockdep_assert_held(&stream->perf->lock); |
---|
1825 | 2326 | |
---|
1826 | 2327 | /* |
---|
1827 | 2328 | * The OA register config is setup through the context image. This image |
---|
.. | .. |
---|
1833 | 2334 | * this might leave small interval of time where the OA unit is |
---|
1834 | 2335 | * configured at an invalid sampling period. |
---|
1835 | 2336 | * |
---|
1836 | | - * So far the best way to work around this issue seems to be draining |
---|
1837 | | - * the GPU from any submitted work. |
---|
| 2337 | + * Note that since we emit all requests from a single ring, there |
---|
| 2338 | + * is still an implicit global barrier here that may cause a high |
---|
| 2339 | + * priority context to wait for an otherwise independent low priority |
---|
| 2340 | + * context. Contexts idle at the time of reconfiguration are not |
---|
| 2341 | + * trapped behind the barrier. |
---|
1838 | 2342 | */ |
---|
1839 | | - ret = i915_gem_wait_for_idle(dev_priv, |
---|
1840 | | - wait_flags, |
---|
1841 | | - MAX_SCHEDULE_TIMEOUT); |
---|
1842 | | - if (ret) |
---|
1843 | | - goto out; |
---|
1844 | | - |
---|
1845 | | - /* Update all contexts now that we've stalled the submission. */ |
---|
1846 | | - list_for_each_entry(ctx, &dev_priv->contexts.list, link) { |
---|
1847 | | - struct intel_context *ce = to_intel_context(ctx, engine); |
---|
1848 | | - u32 *regs; |
---|
1849 | | - |
---|
1850 | | - /* OA settings will be set upon first use */ |
---|
1851 | | - if (!ce->state) |
---|
| 2343 | + spin_lock(&i915->gem.contexts.lock); |
---|
| 2344 | + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { |
---|
| 2345 | + if (!kref_get_unless_zero(&ctx->ref)) |
---|
1852 | 2346 | continue; |
---|
1853 | 2347 | |
---|
1854 | | - regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); |
---|
1855 | | - if (IS_ERR(regs)) { |
---|
1856 | | - ret = PTR_ERR(regs); |
---|
1857 | | - goto out; |
---|
| 2348 | + spin_unlock(&i915->gem.contexts.lock); |
---|
| 2349 | + |
---|
| 2350 | + err = gen8_configure_context(ctx, regs, num_regs); |
---|
| 2351 | + if (err) { |
---|
| 2352 | + i915_gem_context_put(ctx); |
---|
| 2353 | + return err; |
---|
1858 | 2354 | } |
---|
1859 | 2355 | |
---|
1860 | | - ce->state->obj->mm.dirty = true; |
---|
1861 | | - regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); |
---|
| 2356 | + spin_lock(&i915->gem.contexts.lock); |
---|
| 2357 | + list_safe_reset_next(ctx, cn, link); |
---|
| 2358 | + i915_gem_context_put(ctx); |
---|
| 2359 | + } |
---|
| 2360 | + spin_unlock(&i915->gem.contexts.lock); |
---|
1862 | 2361 | |
---|
1863 | | - gen8_update_reg_state_unlocked(ctx, regs, oa_config); |
---|
| 2362 | + /* |
---|
| 2363 | + * After updating all other contexts, we need to modify ourselves. |
---|
| 2364 | + * If we don't modify the kernel_context, we do not get events while |
---|
| 2365 | + * idle. |
---|
| 2366 | + */ |
---|
| 2367 | + for_each_uabi_engine(engine, i915) { |
---|
| 2368 | + struct intel_context *ce = engine->kernel_context; |
---|
1864 | 2369 | |
---|
1865 | | - i915_gem_object_unpin_map(ce->state->obj); |
---|
| 2370 | + if (engine->class != RENDER_CLASS) |
---|
| 2371 | + continue; |
---|
| 2372 | + |
---|
| 2373 | + regs[0].value = intel_sseu_make_rpcs(engine->gt, &ce->sseu); |
---|
| 2374 | + |
---|
| 2375 | + err = gen8_modify_self(ce, regs, num_regs, active); |
---|
| 2376 | + if (err) |
---|
| 2377 | + return err; |
---|
1866 | 2378 | } |
---|
1867 | 2379 | |
---|
1868 | | - out: |
---|
1869 | | - return ret; |
---|
| 2380 | + return 0; |
---|
1870 | 2381 | } |
---|
1871 | 2382 | |
---|
1872 | | -static int gen8_enable_metric_set(struct drm_i915_private *dev_priv, |
---|
1873 | | - const struct i915_oa_config *oa_config) |
---|
| 2383 | +static int |
---|
| 2384 | +gen12_configure_all_contexts(struct i915_perf_stream *stream, |
---|
| 2385 | + const struct i915_oa_config *oa_config, |
---|
| 2386 | + struct i915_active *active) |
---|
1874 | 2387 | { |
---|
| 2388 | + struct flex regs[] = { |
---|
| 2389 | + { |
---|
| 2390 | + GEN8_R_PWR_CLK_STATE, |
---|
| 2391 | + CTX_R_PWR_CLK_STATE, |
---|
| 2392 | + }, |
---|
| 2393 | + }; |
---|
| 2394 | + |
---|
| 2395 | + return oa_configure_all_contexts(stream, |
---|
| 2396 | + regs, ARRAY_SIZE(regs), |
---|
| 2397 | + active); |
---|
| 2398 | +} |
---|
| 2399 | + |
---|
| 2400 | +static int |
---|
| 2401 | +lrc_configure_all_contexts(struct i915_perf_stream *stream, |
---|
| 2402 | + const struct i915_oa_config *oa_config, |
---|
| 2403 | + struct i915_active *active) |
---|
| 2404 | +{ |
---|
| 2405 | + /* The MMIO offsets for Flex EU registers aren't contiguous */ |
---|
| 2406 | + const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; |
---|
| 2407 | +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) |
---|
| 2408 | + struct flex regs[] = { |
---|
| 2409 | + { |
---|
| 2410 | + GEN8_R_PWR_CLK_STATE, |
---|
| 2411 | + CTX_R_PWR_CLK_STATE, |
---|
| 2412 | + }, |
---|
| 2413 | + { |
---|
| 2414 | + GEN8_OACTXCONTROL, |
---|
| 2415 | + stream->perf->ctx_oactxctrl_offset + 1, |
---|
| 2416 | + }, |
---|
| 2417 | + { EU_PERF_CNTL0, ctx_flexeuN(0) }, |
---|
| 2418 | + { EU_PERF_CNTL1, ctx_flexeuN(1) }, |
---|
| 2419 | + { EU_PERF_CNTL2, ctx_flexeuN(2) }, |
---|
| 2420 | + { EU_PERF_CNTL3, ctx_flexeuN(3) }, |
---|
| 2421 | + { EU_PERF_CNTL4, ctx_flexeuN(4) }, |
---|
| 2422 | + { EU_PERF_CNTL5, ctx_flexeuN(5) }, |
---|
| 2423 | + { EU_PERF_CNTL6, ctx_flexeuN(6) }, |
---|
| 2424 | + }; |
---|
| 2425 | +#undef ctx_flexeuN |
---|
| 2426 | + int i; |
---|
| 2427 | + |
---|
| 2428 | + regs[1].value = |
---|
| 2429 | + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | |
---|
| 2430 | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | |
---|
| 2431 | + GEN8_OA_COUNTER_RESUME; |
---|
| 2432 | + |
---|
| 2433 | + for (i = 2; i < ARRAY_SIZE(regs); i++) |
---|
| 2434 | + regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); |
---|
| 2435 | + |
---|
| 2436 | + return oa_configure_all_contexts(stream, |
---|
| 2437 | + regs, ARRAY_SIZE(regs), |
---|
| 2438 | + active); |
---|
| 2439 | +} |
---|
| 2440 | + |
---|
| 2441 | +static int |
---|
| 2442 | +gen8_enable_metric_set(struct i915_perf_stream *stream, |
---|
| 2443 | + struct i915_active *active) |
---|
| 2444 | +{ |
---|
| 2445 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 2446 | + struct i915_oa_config *oa_config = stream->oa_config; |
---|
1875 | 2447 | int ret; |
---|
1876 | 2448 | |
---|
1877 | 2449 | /* |
---|
.. | .. |
---|
1897 | 2469 | * be read back from automatically triggered reports, as part of the |
---|
1898 | 2470 | * RPT_ID field. |
---|
1899 | 2471 | */ |
---|
1900 | | - if (IS_GEN(dev_priv, 9, 11)) { |
---|
1901 | | - I915_WRITE(GEN8_OA_DEBUG, |
---|
1902 | | - _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | |
---|
1903 | | - GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); |
---|
| 2472 | + if (IS_GEN_RANGE(stream->perf->i915, 9, 11)) { |
---|
| 2473 | + intel_uncore_write(uncore, GEN8_OA_DEBUG, |
---|
| 2474 | + _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | |
---|
| 2475 | + GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); |
---|
1904 | 2476 | } |
---|
1905 | 2477 | |
---|
1906 | 2478 | /* |
---|
.. | .. |
---|
1908 | 2480 | * to make sure all slices/subslices are ON before writing to NOA |
---|
1909 | 2481 | * registers. |
---|
1910 | 2482 | */ |
---|
1911 | | - ret = gen8_configure_all_contexts(dev_priv, oa_config); |
---|
| 2483 | + ret = lrc_configure_all_contexts(stream, oa_config, active); |
---|
1912 | 2484 | if (ret) |
---|
1913 | 2485 | return ret; |
---|
1914 | 2486 | |
---|
1915 | | - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); |
---|
1916 | | - |
---|
1917 | | - config_oa_regs(dev_priv, oa_config->b_counter_regs, |
---|
1918 | | - oa_config->b_counter_regs_len); |
---|
1919 | | - |
---|
1920 | | - return 0; |
---|
| 2487 | + return emit_oa_config(stream, |
---|
| 2488 | + stream->oa_config, oa_context(stream), |
---|
| 2489 | + active); |
---|
1921 | 2490 | } |
---|
1922 | 2491 | |
---|
1923 | | -static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) |
---|
| 2492 | +static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream) |
---|
1924 | 2493 | { |
---|
1925 | | - /* Reset all contexts' slices/subslices configurations. */ |
---|
1926 | | - gen8_configure_all_contexts(dev_priv, NULL); |
---|
1927 | | - |
---|
1928 | | - I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & |
---|
1929 | | - ~GT_NOA_ENABLE)); |
---|
| 2494 | + return _MASKED_FIELD(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS, |
---|
| 2495 | + (stream->sample_flags & SAMPLE_OA_REPORT) ? |
---|
| 2496 | + 0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); |
---|
1930 | 2497 | } |
---|
1931 | 2498 | |
---|
1932 | | -static void gen10_disable_metric_set(struct drm_i915_private *dev_priv) |
---|
| 2499 | +static int |
---|
| 2500 | +gen12_enable_metric_set(struct i915_perf_stream *stream, |
---|
| 2501 | + struct i915_active *active) |
---|
1933 | 2502 | { |
---|
| 2503 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 2504 | + struct i915_oa_config *oa_config = stream->oa_config; |
---|
| 2505 | + bool periodic = stream->periodic; |
---|
| 2506 | + u32 period_exponent = stream->period_exponent; |
---|
| 2507 | + int ret; |
---|
| 2508 | + |
---|
| 2509 | + intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, |
---|
| 2510 | + /* Disable clk ratio reports, like previous Gens. */ |
---|
| 2511 | + _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | |
---|
| 2512 | + GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) | |
---|
| 2513 | + /* |
---|
| 2514 | + * If the user didn't require OA reports, instruct |
---|
| 2515 | + * the hardware not to emit ctx switch reports. |
---|
| 2516 | + */ |
---|
| 2517 | + oag_report_ctx_switches(stream)); |
---|
| 2518 | + |
---|
| 2519 | + intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ? |
---|
| 2520 | + (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME | |
---|
| 2521 | + GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE | |
---|
| 2522 | + (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) |
---|
| 2523 | + : 0); |
---|
| 2524 | + |
---|
| 2525 | + /* |
---|
| 2526 | + * Update all contexts prior writing the mux configurations as we need |
---|
| 2527 | + * to make sure all slices/subslices are ON before writing to NOA |
---|
| 2528 | + * registers. |
---|
| 2529 | + */ |
---|
| 2530 | + ret = gen12_configure_all_contexts(stream, oa_config, active); |
---|
| 2531 | + if (ret) |
---|
| 2532 | + return ret; |
---|
| 2533 | + |
---|
| 2534 | + /* |
---|
| 2535 | + * For Gen12, performance counters are context |
---|
| 2536 | + * saved/restored. Only enable it for the context that |
---|
| 2537 | + * requested this. |
---|
| 2538 | + */ |
---|
| 2539 | + if (stream->ctx) { |
---|
| 2540 | + ret = gen12_configure_oar_context(stream, active); |
---|
| 2541 | + if (ret) |
---|
| 2542 | + return ret; |
---|
| 2543 | + } |
---|
| 2544 | + |
---|
| 2545 | + return emit_oa_config(stream, |
---|
| 2546 | + stream->oa_config, oa_context(stream), |
---|
| 2547 | + active); |
---|
| 2548 | +} |
---|
| 2549 | + |
---|
| 2550 | +static void gen8_disable_metric_set(struct i915_perf_stream *stream) |
---|
| 2551 | +{ |
---|
| 2552 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 2553 | + |
---|
1934 | 2554 | /* Reset all contexts' slices/subslices configurations. */ |
---|
1935 | | - gen8_configure_all_contexts(dev_priv, NULL); |
---|
| 2555 | + lrc_configure_all_contexts(stream, NULL, NULL); |
---|
| 2556 | + |
---|
| 2557 | + intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); |
---|
| 2558 | +} |
---|
| 2559 | + |
---|
| 2560 | +static void gen10_disable_metric_set(struct i915_perf_stream *stream) |
---|
| 2561 | +{ |
---|
| 2562 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 2563 | + |
---|
| 2564 | + /* Reset all contexts' slices/subslices configurations. */ |
---|
| 2565 | + lrc_configure_all_contexts(stream, NULL, NULL); |
---|
1936 | 2566 | |
---|
1937 | 2567 | /* Make sure we disable noa to save power. */ |
---|
1938 | | - I915_WRITE(RPM_CONFIG1, |
---|
1939 | | - I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE); |
---|
| 2568 | + intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); |
---|
1940 | 2569 | } |
---|
1941 | 2570 | |
---|
1942 | | -static void gen7_oa_enable(struct drm_i915_private *dev_priv) |
---|
| 2571 | +static void gen12_disable_metric_set(struct i915_perf_stream *stream) |
---|
1943 | 2572 | { |
---|
1944 | | - struct i915_gem_context *ctx = |
---|
1945 | | - dev_priv->perf.oa.exclusive_stream->ctx; |
---|
1946 | | - u32 ctx_id = dev_priv->perf.oa.specific_ctx_id; |
---|
1947 | | - bool periodic = dev_priv->perf.oa.periodic; |
---|
1948 | | - u32 period_exponent = dev_priv->perf.oa.period_exponent; |
---|
1949 | | - u32 report_format = dev_priv->perf.oa.oa_buffer.format; |
---|
| 2573 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 2574 | + |
---|
| 2575 | + /* Reset all contexts' slices/subslices configurations. */ |
---|
| 2576 | + gen12_configure_all_contexts(stream, NULL, NULL); |
---|
| 2577 | + |
---|
| 2578 | + /* disable the context save/restore or OAR counters */ |
---|
| 2579 | + if (stream->ctx) |
---|
| 2580 | + gen12_configure_oar_context(stream, NULL); |
---|
| 2581 | + |
---|
| 2582 | + /* Make sure we disable noa to save power. */ |
---|
| 2583 | + intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); |
---|
| 2584 | +} |
---|
| 2585 | + |
---|
| 2586 | +static void gen7_oa_enable(struct i915_perf_stream *stream) |
---|
| 2587 | +{ |
---|
| 2588 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 2589 | + struct i915_gem_context *ctx = stream->ctx; |
---|
| 2590 | + u32 ctx_id = stream->specific_ctx_id; |
---|
| 2591 | + bool periodic = stream->periodic; |
---|
| 2592 | + u32 period_exponent = stream->period_exponent; |
---|
| 2593 | + u32 report_format = stream->oa_buffer.format; |
---|
1950 | 2594 | |
---|
1951 | 2595 | /* |
---|
1952 | 2596 | * Reset buf pointers so we don't forward reports from before now. |
---|
.. | .. |
---|
1957 | 2601 | * on the assumption that certain fields are written to zeroed |
---|
1958 | 2602 | * memory which this helps maintains. |
---|
1959 | 2603 | */ |
---|
1960 | | - gen7_init_oa_buffer(dev_priv); |
---|
| 2604 | + gen7_init_oa_buffer(stream); |
---|
1961 | 2605 | |
---|
1962 | | - I915_WRITE(GEN7_OACONTROL, |
---|
1963 | | - (ctx_id & GEN7_OACONTROL_CTX_MASK) | |
---|
1964 | | - (period_exponent << |
---|
1965 | | - GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | |
---|
1966 | | - (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | |
---|
1967 | | - (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | |
---|
1968 | | - (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | |
---|
1969 | | - GEN7_OACONTROL_ENABLE); |
---|
| 2606 | + intel_uncore_write(uncore, GEN7_OACONTROL, |
---|
| 2607 | + (ctx_id & GEN7_OACONTROL_CTX_MASK) | |
---|
| 2608 | + (period_exponent << |
---|
| 2609 | + GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | |
---|
| 2610 | + (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | |
---|
| 2611 | + (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | |
---|
| 2612 | + (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | |
---|
| 2613 | + GEN7_OACONTROL_ENABLE); |
---|
1970 | 2614 | } |
---|
1971 | 2615 | |
---|
1972 | | -static void gen8_oa_enable(struct drm_i915_private *dev_priv) |
---|
| 2616 | +static void gen8_oa_enable(struct i915_perf_stream *stream) |
---|
1973 | 2617 | { |
---|
1974 | | - u32 report_format = dev_priv->perf.oa.oa_buffer.format; |
---|
| 2618 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 2619 | + u32 report_format = stream->oa_buffer.format; |
---|
1975 | 2620 | |
---|
1976 | 2621 | /* |
---|
1977 | 2622 | * Reset buf pointers so we don't forward reports from before now. |
---|
.. | .. |
---|
1982 | 2627 | * on the assumption that certain fields are written to zeroed |
---|
1983 | 2628 | * memory which this helps maintains. |
---|
1984 | 2629 | */ |
---|
1985 | | - gen8_init_oa_buffer(dev_priv); |
---|
| 2630 | + gen8_init_oa_buffer(stream); |
---|
1986 | 2631 | |
---|
1987 | 2632 | /* |
---|
1988 | 2633 | * Note: we don't rely on the hardware to perform single context |
---|
1989 | 2634 | * filtering and instead filter on the cpu based on the context-id |
---|
1990 | 2635 | * field of reports |
---|
1991 | 2636 | */ |
---|
1992 | | - I915_WRITE(GEN8_OACONTROL, (report_format << |
---|
1993 | | - GEN8_OA_REPORT_FORMAT_SHIFT) | |
---|
1994 | | - GEN8_OA_COUNTER_ENABLE); |
---|
| 2637 | + intel_uncore_write(uncore, GEN8_OACONTROL, |
---|
| 2638 | + (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) | |
---|
| 2639 | + GEN8_OA_COUNTER_ENABLE); |
---|
| 2640 | +} |
---|
| 2641 | + |
---|
| 2642 | +static void gen12_oa_enable(struct i915_perf_stream *stream) |
---|
| 2643 | +{ |
---|
| 2644 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 2645 | + u32 report_format = stream->oa_buffer.format; |
---|
| 2646 | + |
---|
| 2647 | + /* |
---|
| 2648 | + * If we don't want OA reports from the OA buffer, then we don't even |
---|
| 2649 | + * need to program the OAG unit. |
---|
| 2650 | + */ |
---|
| 2651 | + if (!(stream->sample_flags & SAMPLE_OA_REPORT)) |
---|
| 2652 | + return; |
---|
| 2653 | + |
---|
| 2654 | + gen12_init_oa_buffer(stream); |
---|
| 2655 | + |
---|
| 2656 | + intel_uncore_write(uncore, GEN12_OAG_OACONTROL, |
---|
| 2657 | + (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) | |
---|
| 2658 | + GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE); |
---|
1995 | 2659 | } |
---|
1996 | 2660 | |
---|
1997 | 2661 | /** |
---|
.. | .. |
---|
2005 | 2669 | */ |
---|
2006 | 2670 | static void i915_oa_stream_enable(struct i915_perf_stream *stream) |
---|
2007 | 2671 | { |
---|
2008 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
| 2672 | + stream->pollin = false; |
---|
2009 | 2673 | |
---|
2010 | | - dev_priv->perf.oa.ops.oa_enable(dev_priv); |
---|
| 2674 | + stream->perf->ops.oa_enable(stream); |
---|
2011 | 2675 | |
---|
2012 | | - if (dev_priv->perf.oa.periodic) |
---|
2013 | | - hrtimer_start(&dev_priv->perf.oa.poll_check_timer, |
---|
2014 | | - ns_to_ktime(POLL_PERIOD), |
---|
| 2676 | + if (stream->sample_flags & SAMPLE_OA_REPORT) |
---|
| 2677 | + hrtimer_start(&stream->poll_check_timer, |
---|
| 2678 | + ns_to_ktime(stream->poll_oa_period), |
---|
2015 | 2679 | HRTIMER_MODE_REL_PINNED); |
---|
2016 | 2680 | } |
---|
2017 | 2681 | |
---|
2018 | | -static void gen7_oa_disable(struct drm_i915_private *dev_priv) |
---|
| 2682 | +static void gen7_oa_disable(struct i915_perf_stream *stream) |
---|
2019 | 2683 | { |
---|
2020 | | - I915_WRITE(GEN7_OACONTROL, 0); |
---|
2021 | | - if (intel_wait_for_register(dev_priv, |
---|
| 2684 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 2685 | + |
---|
| 2686 | + intel_uncore_write(uncore, GEN7_OACONTROL, 0); |
---|
| 2687 | + if (intel_wait_for_register(uncore, |
---|
2022 | 2688 | GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0, |
---|
2023 | 2689 | 50)) |
---|
2024 | | - DRM_ERROR("wait for OA to be disabled timed out\n"); |
---|
| 2690 | + drm_err(&stream->perf->i915->drm, |
---|
| 2691 | + "wait for OA to be disabled timed out\n"); |
---|
2025 | 2692 | } |
---|
2026 | 2693 | |
---|
2027 | | -static void gen8_oa_disable(struct drm_i915_private *dev_priv) |
---|
| 2694 | +static void gen8_oa_disable(struct i915_perf_stream *stream) |
---|
2028 | 2695 | { |
---|
2029 | | - I915_WRITE(GEN8_OACONTROL, 0); |
---|
2030 | | - if (intel_wait_for_register(dev_priv, |
---|
| 2696 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 2697 | + |
---|
| 2698 | + intel_uncore_write(uncore, GEN8_OACONTROL, 0); |
---|
| 2699 | + if (intel_wait_for_register(uncore, |
---|
2031 | 2700 | GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0, |
---|
2032 | 2701 | 50)) |
---|
2033 | | - DRM_ERROR("wait for OA to be disabled timed out\n"); |
---|
| 2702 | + drm_err(&stream->perf->i915->drm, |
---|
| 2703 | + "wait for OA to be disabled timed out\n"); |
---|
| 2704 | +} |
---|
| 2705 | + |
---|
| 2706 | +static void gen12_oa_disable(struct i915_perf_stream *stream) |
---|
| 2707 | +{ |
---|
| 2708 | + struct intel_uncore *uncore = stream->uncore; |
---|
| 2709 | + |
---|
| 2710 | + intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0); |
---|
| 2711 | + if (intel_wait_for_register(uncore, |
---|
| 2712 | + GEN12_OAG_OACONTROL, |
---|
| 2713 | + GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0, |
---|
| 2714 | + 50)) |
---|
| 2715 | + drm_err(&stream->perf->i915->drm, |
---|
| 2716 | + "wait for OA to be disabled timed out\n"); |
---|
| 2717 | + |
---|
| 2718 | + intel_uncore_write(uncore, GEN12_OA_TLB_INV_CR, 1); |
---|
| 2719 | + if (intel_wait_for_register(uncore, |
---|
| 2720 | + GEN12_OA_TLB_INV_CR, |
---|
| 2721 | + 1, 0, |
---|
| 2722 | + 50)) |
---|
| 2723 | + drm_err(&stream->perf->i915->drm, |
---|
| 2724 | + "wait for OA tlb invalidate timed out\n"); |
---|
2034 | 2725 | } |
---|
2035 | 2726 | |
---|
2036 | 2727 | /** |
---|
.. | .. |
---|
2043 | 2734 | */ |
---|
2044 | 2735 | static void i915_oa_stream_disable(struct i915_perf_stream *stream) |
---|
2045 | 2736 | { |
---|
2046 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
| 2737 | + stream->perf->ops.oa_disable(stream); |
---|
2047 | 2738 | |
---|
2048 | | - dev_priv->perf.oa.ops.oa_disable(dev_priv); |
---|
2049 | | - |
---|
2050 | | - if (dev_priv->perf.oa.periodic) |
---|
2051 | | - hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer); |
---|
| 2739 | + if (stream->sample_flags & SAMPLE_OA_REPORT) |
---|
| 2740 | + hrtimer_cancel(&stream->poll_check_timer); |
---|
2052 | 2741 | } |
---|
2053 | 2742 | |
---|
2054 | 2743 | static const struct i915_perf_stream_ops i915_oa_stream_ops = { |
---|
.. | .. |
---|
2059 | 2748 | .poll_wait = i915_oa_poll_wait, |
---|
2060 | 2749 | .read = i915_oa_read, |
---|
2061 | 2750 | }; |
---|
| 2751 | + |
---|
| 2752 | +static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream) |
---|
| 2753 | +{ |
---|
| 2754 | + struct i915_active *active; |
---|
| 2755 | + int err; |
---|
| 2756 | + |
---|
| 2757 | + active = i915_active_create(); |
---|
| 2758 | + if (!active) |
---|
| 2759 | + return -ENOMEM; |
---|
| 2760 | + |
---|
| 2761 | + err = stream->perf->ops.enable_metric_set(stream, active); |
---|
| 2762 | + if (err == 0) |
---|
| 2763 | + __i915_active_wait(active, TASK_UNINTERRUPTIBLE); |
---|
| 2764 | + |
---|
| 2765 | + i915_active_put(active); |
---|
| 2766 | + return err; |
---|
| 2767 | +} |
---|
| 2768 | + |
---|
| 2769 | +static void |
---|
| 2770 | +get_default_sseu_config(struct intel_sseu *out_sseu, |
---|
| 2771 | + struct intel_engine_cs *engine) |
---|
| 2772 | +{ |
---|
| 2773 | + const struct sseu_dev_info *devinfo_sseu = &engine->gt->info.sseu; |
---|
| 2774 | + |
---|
| 2775 | + *out_sseu = intel_sseu_from_device_info(devinfo_sseu); |
---|
| 2776 | + |
---|
| 2777 | + if (IS_GEN(engine->i915, 11)) { |
---|
| 2778 | + /* |
---|
| 2779 | + * We only need subslice count so it doesn't matter which ones |
---|
| 2780 | + * we select - just turn off low bits in the amount of half of |
---|
| 2781 | + * all available subslices per slice. |
---|
| 2782 | + */ |
---|
| 2783 | + out_sseu->subslice_mask = |
---|
| 2784 | + ~(~0 << (hweight8(out_sseu->subslice_mask) / 2)); |
---|
| 2785 | + out_sseu->slice_mask = 0x1; |
---|
| 2786 | + } |
---|
| 2787 | +} |
---|
| 2788 | + |
---|
| 2789 | +static int |
---|
| 2790 | +get_sseu_config(struct intel_sseu *out_sseu, |
---|
| 2791 | + struct intel_engine_cs *engine, |
---|
| 2792 | + const struct drm_i915_gem_context_param_sseu *drm_sseu) |
---|
| 2793 | +{ |
---|
| 2794 | + if (drm_sseu->engine.engine_class != engine->uabi_class || |
---|
| 2795 | + drm_sseu->engine.engine_instance != engine->uabi_instance) |
---|
| 2796 | + return -EINVAL; |
---|
| 2797 | + |
---|
| 2798 | + return i915_gem_user_to_context_sseu(engine->gt, drm_sseu, out_sseu); |
---|
| 2799 | +} |
---|
2062 | 2800 | |
---|
2063 | 2801 | /** |
---|
2064 | 2802 | * i915_oa_stream_init - validate combined props for OA stream and init |
---|
.. | .. |
---|
2082 | 2820 | struct drm_i915_perf_open_param *param, |
---|
2083 | 2821 | struct perf_open_properties *props) |
---|
2084 | 2822 | { |
---|
2085 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
| 2823 | + struct drm_i915_private *i915 = stream->perf->i915; |
---|
| 2824 | + struct i915_perf *perf = stream->perf; |
---|
2086 | 2825 | int format_size; |
---|
2087 | 2826 | int ret; |
---|
2088 | 2827 | |
---|
2089 | | - /* If the sysfs metrics/ directory wasn't registered for some |
---|
| 2828 | + if (!props->engine) { |
---|
| 2829 | + DRM_DEBUG("OA engine not specified\n"); |
---|
| 2830 | + return -EINVAL; |
---|
| 2831 | + } |
---|
| 2832 | + |
---|
| 2833 | + /* |
---|
| 2834 | + * If the sysfs metrics/ directory wasn't registered for some |
---|
2090 | 2835 | * reason then don't let userspace try their luck with config |
---|
2091 | 2836 | * IDs |
---|
2092 | 2837 | */ |
---|
2093 | | - if (!dev_priv->perf.metrics_kobj) { |
---|
| 2838 | + if (!perf->metrics_kobj) { |
---|
2094 | 2839 | DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); |
---|
2095 | 2840 | return -EINVAL; |
---|
2096 | 2841 | } |
---|
2097 | 2842 | |
---|
2098 | | - if (!(props->sample_flags & SAMPLE_OA_REPORT)) { |
---|
| 2843 | + if (!(props->sample_flags & SAMPLE_OA_REPORT) && |
---|
| 2844 | + (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) { |
---|
2099 | 2845 | DRM_DEBUG("Only OA report sampling supported\n"); |
---|
2100 | 2846 | return -EINVAL; |
---|
2101 | 2847 | } |
---|
2102 | 2848 | |
---|
2103 | | - if (!dev_priv->perf.oa.ops.init_oa_buffer) { |
---|
| 2849 | + if (!perf->ops.enable_metric_set) { |
---|
2104 | 2850 | DRM_DEBUG("OA unit not supported\n"); |
---|
2105 | 2851 | return -ENODEV; |
---|
2106 | 2852 | } |
---|
2107 | 2853 | |
---|
2108 | | - /* To avoid the complexity of having to accurately filter |
---|
| 2854 | + /* |
---|
| 2855 | + * To avoid the complexity of having to accurately filter |
---|
2109 | 2856 | * counter reports and marshal to the appropriate client |
---|
2110 | 2857 | * we currently only allow exclusive access |
---|
2111 | 2858 | */ |
---|
2112 | | - if (dev_priv->perf.oa.exclusive_stream) { |
---|
| 2859 | + if (perf->exclusive_stream) { |
---|
2113 | 2860 | DRM_DEBUG("OA unit already in use\n"); |
---|
2114 | 2861 | return -EBUSY; |
---|
2115 | 2862 | } |
---|
.. | .. |
---|
2119 | 2866 | return -EINVAL; |
---|
2120 | 2867 | } |
---|
2121 | 2868 | |
---|
2122 | | - /* We set up some ratelimit state to potentially throttle any _NOTES |
---|
2123 | | - * about spurious, invalid OA reports which we don't forward to |
---|
2124 | | - * userspace. |
---|
2125 | | - * |
---|
2126 | | - * The initialization is associated with opening the stream (not driver |
---|
2127 | | - * init) considering we print a _NOTE about any throttling when closing |
---|
2128 | | - * the stream instead of waiting until driver _fini which no one would |
---|
2129 | | - * ever see. |
---|
2130 | | - * |
---|
2131 | | - * Using the same limiting factors as printk_ratelimit() |
---|
2132 | | - */ |
---|
2133 | | - ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs, |
---|
2134 | | - 5 * HZ, 10); |
---|
2135 | | - /* Since we use a DRM_NOTE for spurious reports it would be |
---|
2136 | | - * inconsistent to let __ratelimit() automatically print a warning for |
---|
2137 | | - * throttling. |
---|
2138 | | - */ |
---|
2139 | | - ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs, |
---|
2140 | | - RATELIMIT_MSG_ON_RELEASE); |
---|
| 2869 | + stream->engine = props->engine; |
---|
| 2870 | + stream->uncore = stream->engine->gt->uncore; |
---|
2141 | 2871 | |
---|
2142 | 2872 | stream->sample_size = sizeof(struct drm_i915_perf_record_header); |
---|
2143 | 2873 | |
---|
2144 | | - format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size; |
---|
| 2874 | + format_size = perf->oa_formats[props->oa_format].size; |
---|
2145 | 2875 | |
---|
2146 | | - stream->sample_flags |= SAMPLE_OA_REPORT; |
---|
| 2876 | + stream->sample_flags = props->sample_flags; |
---|
2147 | 2877 | stream->sample_size += format_size; |
---|
2148 | 2878 | |
---|
2149 | | - dev_priv->perf.oa.oa_buffer.format_size = format_size; |
---|
2150 | | - if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0)) |
---|
| 2879 | + stream->oa_buffer.format_size = format_size; |
---|
| 2880 | + if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format_size == 0)) |
---|
2151 | 2881 | return -EINVAL; |
---|
2152 | 2882 | |
---|
2153 | | - dev_priv->perf.oa.oa_buffer.format = |
---|
2154 | | - dev_priv->perf.oa.oa_formats[props->oa_format].format; |
---|
| 2883 | + stream->hold_preemption = props->hold_preemption; |
---|
2155 | 2884 | |
---|
2156 | | - dev_priv->perf.oa.periodic = props->oa_periodic; |
---|
2157 | | - if (dev_priv->perf.oa.periodic) |
---|
2158 | | - dev_priv->perf.oa.period_exponent = props->oa_period_exponent; |
---|
| 2885 | + stream->oa_buffer.format = |
---|
| 2886 | + perf->oa_formats[props->oa_format].format; |
---|
| 2887 | + |
---|
| 2888 | + stream->periodic = props->oa_periodic; |
---|
| 2889 | + if (stream->periodic) |
---|
| 2890 | + stream->period_exponent = props->oa_period_exponent; |
---|
2159 | 2891 | |
---|
2160 | 2892 | if (stream->ctx) { |
---|
2161 | 2893 | ret = oa_get_render_ctx_id(stream); |
---|
.. | .. |
---|
2165 | 2897 | } |
---|
2166 | 2898 | } |
---|
2167 | 2899 | |
---|
2168 | | - ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config); |
---|
| 2900 | + ret = alloc_noa_wait(stream); |
---|
2169 | 2901 | if (ret) { |
---|
| 2902 | + DRM_DEBUG("Unable to allocate NOA wait batch buffer\n"); |
---|
| 2903 | + goto err_noa_wait_alloc; |
---|
| 2904 | + } |
---|
| 2905 | + |
---|
| 2906 | + stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set); |
---|
| 2907 | + if (!stream->oa_config) { |
---|
2170 | 2908 | DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); |
---|
| 2909 | + ret = -EINVAL; |
---|
2171 | 2910 | goto err_config; |
---|
2172 | 2911 | } |
---|
2173 | 2912 | |
---|
.. | .. |
---|
2183 | 2922 | * In our case we are expecting that taking pm + FORCEWAKE |
---|
2184 | 2923 | * references will effectively disable RC6. |
---|
2185 | 2924 | */ |
---|
2186 | | - intel_runtime_pm_get(dev_priv); |
---|
2187 | | - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); |
---|
| 2925 | + intel_engine_pm_get(stream->engine); |
---|
| 2926 | + intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL); |
---|
2188 | 2927 | |
---|
2189 | | - ret = alloc_oa_buffer(dev_priv); |
---|
| 2928 | + ret = alloc_oa_buffer(stream); |
---|
2190 | 2929 | if (ret) |
---|
2191 | 2930 | goto err_oa_buf_alloc; |
---|
2192 | 2931 | |
---|
2193 | | - ret = i915_mutex_lock_interruptible(&dev_priv->drm); |
---|
2194 | | - if (ret) |
---|
2195 | | - goto err_lock; |
---|
| 2932 | + stream->ops = &i915_oa_stream_ops; |
---|
2196 | 2933 | |
---|
2197 | | - ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv, |
---|
2198 | | - stream->oa_config); |
---|
| 2934 | + perf->sseu = props->sseu; |
---|
| 2935 | + WRITE_ONCE(perf->exclusive_stream, stream); |
---|
| 2936 | + |
---|
| 2937 | + ret = i915_perf_stream_enable_sync(stream); |
---|
2199 | 2938 | if (ret) { |
---|
2200 | 2939 | DRM_DEBUG("Unable to enable metric set\n"); |
---|
2201 | 2940 | goto err_enable; |
---|
2202 | 2941 | } |
---|
2203 | 2942 | |
---|
2204 | | - stream->ops = &i915_oa_stream_ops; |
---|
| 2943 | + DRM_DEBUG("opening stream oa config uuid=%s\n", |
---|
| 2944 | + stream->oa_config->uuid); |
---|
2205 | 2945 | |
---|
2206 | | - dev_priv->perf.oa.exclusive_stream = stream; |
---|
2207 | | - |
---|
2208 | | - mutex_unlock(&dev_priv->drm.struct_mutex); |
---|
| 2946 | + hrtimer_init(&stream->poll_check_timer, |
---|
| 2947 | + CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
---|
| 2948 | + stream->poll_check_timer.function = oa_poll_check_timer_cb; |
---|
| 2949 | + init_waitqueue_head(&stream->poll_wq); |
---|
| 2950 | + spin_lock_init(&stream->oa_buffer.ptr_lock); |
---|
2209 | 2951 | |
---|
2210 | 2952 | return 0; |
---|
2211 | 2953 | |
---|
2212 | 2954 | err_enable: |
---|
2213 | | - dev_priv->perf.oa.ops.disable_metric_set(dev_priv); |
---|
2214 | | - mutex_unlock(&dev_priv->drm.struct_mutex); |
---|
| 2955 | + WRITE_ONCE(perf->exclusive_stream, NULL); |
---|
| 2956 | + perf->ops.disable_metric_set(stream); |
---|
2215 | 2957 | |
---|
2216 | | -err_lock: |
---|
2217 | | - free_oa_buffer(dev_priv); |
---|
| 2958 | + free_oa_buffer(stream); |
---|
2218 | 2959 | |
---|
2219 | 2960 | err_oa_buf_alloc: |
---|
2220 | | - put_oa_config(dev_priv, stream->oa_config); |
---|
| 2961 | + free_oa_configs(stream); |
---|
2221 | 2962 | |
---|
2222 | | - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); |
---|
2223 | | - intel_runtime_pm_put(dev_priv); |
---|
| 2963 | + intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); |
---|
| 2964 | + intel_engine_pm_put(stream->engine); |
---|
2224 | 2965 | |
---|
2225 | 2966 | err_config: |
---|
| 2967 | + free_noa_wait(stream); |
---|
| 2968 | + |
---|
| 2969 | +err_noa_wait_alloc: |
---|
2226 | 2970 | if (stream->ctx) |
---|
2227 | 2971 | oa_put_render_ctx_id(stream); |
---|
2228 | 2972 | |
---|
2229 | 2973 | return ret; |
---|
2230 | 2974 | } |
---|
2231 | 2975 | |
---|
2232 | | -void i915_oa_init_reg_state(struct intel_engine_cs *engine, |
---|
2233 | | - struct i915_gem_context *ctx, |
---|
2234 | | - u32 *reg_state) |
---|
| 2976 | +void i915_oa_init_reg_state(const struct intel_context *ce, |
---|
| 2977 | + const struct intel_engine_cs *engine) |
---|
2235 | 2978 | { |
---|
2236 | 2979 | struct i915_perf_stream *stream; |
---|
2237 | 2980 | |
---|
2238 | | - if (engine->id != RCS) |
---|
| 2981 | + if (engine->class != RENDER_CLASS) |
---|
2239 | 2982 | return; |
---|
2240 | 2983 | |
---|
2241 | | - stream = engine->i915->perf.oa.exclusive_stream; |
---|
2242 | | - if (stream) |
---|
2243 | | - gen8_update_reg_state_unlocked(ctx, reg_state, stream->oa_config); |
---|
2244 | | -} |
---|
2245 | | - |
---|
2246 | | -/** |
---|
2247 | | - * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation |
---|
2248 | | - * @stream: An i915 perf stream |
---|
2249 | | - * @file: An i915 perf stream file |
---|
2250 | | - * @buf: destination buffer given by userspace |
---|
2251 | | - * @count: the number of bytes userspace wants to read |
---|
2252 | | - * @ppos: (inout) file seek position (unused) |
---|
2253 | | - * |
---|
2254 | | - * Besides wrapping &i915_perf_stream_ops->read this provides a common place to |
---|
2255 | | - * ensure that if we've successfully copied any data then reporting that takes |
---|
2256 | | - * precedence over any internal error status, so the data isn't lost. |
---|
2257 | | - * |
---|
2258 | | - * For example ret will be -ENOSPC whenever there is more buffered data than |
---|
2259 | | - * can be copied to userspace, but that's only interesting if we weren't able |
---|
2260 | | - * to copy some data because it implies the userspace buffer is too small to |
---|
2261 | | - * receive a single record (and we never split records). |
---|
2262 | | - * |
---|
2263 | | - * Another case with ret == -EFAULT is more of a grey area since it would seem |
---|
2264 | | - * like bad form for userspace to ask us to overrun its buffer, but the user |
---|
2265 | | - * knows best: |
---|
2266 | | - * |
---|
2267 | | - * http://yarchive.net/comp/linux/partial_reads_writes.html |
---|
2268 | | - * |
---|
2269 | | - * Returns: The number of bytes copied or a negative error code on failure. |
---|
2270 | | - */ |
---|
2271 | | -static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream, |
---|
2272 | | - struct file *file, |
---|
2273 | | - char __user *buf, |
---|
2274 | | - size_t count, |
---|
2275 | | - loff_t *ppos) |
---|
2276 | | -{ |
---|
2277 | | - /* Note we keep the offset (aka bytes read) separate from any |
---|
2278 | | - * error status so that the final check for whether we return |
---|
2279 | | - * the bytes read with a higher precedence than any error (see |
---|
2280 | | - * comment below) doesn't need to be handled/duplicated in |
---|
2281 | | - * stream->ops->read() implementations. |
---|
2282 | | - */ |
---|
2283 | | - size_t offset = 0; |
---|
2284 | | - int ret = stream->ops->read(stream, buf, count, &offset); |
---|
2285 | | - |
---|
2286 | | - return offset ?: (ret ?: -EAGAIN); |
---|
| 2984 | + /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */ |
---|
| 2985 | + stream = READ_ONCE(engine->i915->perf.exclusive_stream); |
---|
| 2986 | + if (stream && INTEL_GEN(stream->perf->i915) < 12) |
---|
| 2987 | + gen8_update_reg_state_unlocked(ce, stream); |
---|
2287 | 2988 | } |
---|
2288 | 2989 | |
---|
2289 | 2990 | /** |
---|
.. | .. |
---|
2310 | 3011 | loff_t *ppos) |
---|
2311 | 3012 | { |
---|
2312 | 3013 | struct i915_perf_stream *stream = file->private_data; |
---|
2313 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
2314 | | - ssize_t ret; |
---|
| 3014 | + struct i915_perf *perf = stream->perf; |
---|
| 3015 | + size_t offset = 0; |
---|
| 3016 | + int ret; |
---|
2315 | 3017 | |
---|
2316 | 3018 | /* To ensure it's handled consistently we simply treat all reads of a |
---|
2317 | 3019 | * disabled stream as an error. In particular it might otherwise lead |
---|
2318 | 3020 | * to a deadlock for blocking file descriptors... |
---|
2319 | 3021 | */ |
---|
2320 | | - if (!stream->enabled) |
---|
| 3022 | + if (!stream->enabled || !(stream->sample_flags & SAMPLE_OA_REPORT)) |
---|
2321 | 3023 | return -EIO; |
---|
2322 | 3024 | |
---|
2323 | 3025 | if (!(file->f_flags & O_NONBLOCK)) { |
---|
.. | .. |
---|
2333 | 3035 | if (ret) |
---|
2334 | 3036 | return ret; |
---|
2335 | 3037 | |
---|
2336 | | - mutex_lock(&dev_priv->perf.lock); |
---|
2337 | | - ret = i915_perf_read_locked(stream, file, |
---|
2338 | | - buf, count, ppos); |
---|
2339 | | - mutex_unlock(&dev_priv->perf.lock); |
---|
2340 | | - } while (ret == -EAGAIN); |
---|
| 3038 | + mutex_lock(&perf->lock); |
---|
| 3039 | + ret = stream->ops->read(stream, buf, count, &offset); |
---|
| 3040 | + mutex_unlock(&perf->lock); |
---|
| 3041 | + } while (!offset && !ret); |
---|
2341 | 3042 | } else { |
---|
2342 | | - mutex_lock(&dev_priv->perf.lock); |
---|
2343 | | - ret = i915_perf_read_locked(stream, file, buf, count, ppos); |
---|
2344 | | - mutex_unlock(&dev_priv->perf.lock); |
---|
| 3043 | + mutex_lock(&perf->lock); |
---|
| 3044 | + ret = stream->ops->read(stream, buf, count, &offset); |
---|
| 3045 | + mutex_unlock(&perf->lock); |
---|
2345 | 3046 | } |
---|
2346 | 3047 | |
---|
2347 | 3048 | /* We allow the poll checking to sometimes report false positive EPOLLIN |
---|
.. | .. |
---|
2351 | 3052 | * and read() returning -EAGAIN. Clearing the oa.pollin state here |
---|
2352 | 3053 | * effectively ensures we back off until the next hrtimer callback |
---|
2353 | 3054 | * before reporting another EPOLLIN event. |
---|
| 3055 | + * The exception to this is if ops->read() returned -ENOSPC which means |
---|
| 3056 | + * that more OA data is available than could fit in the user provided |
---|
| 3057 | + * buffer. In this case we want the next poll() call to not block. |
---|
2354 | 3058 | */ |
---|
2355 | | - if (ret >= 0 || ret == -EAGAIN) { |
---|
2356 | | - /* Maybe make ->pollin per-stream state if we support multiple |
---|
2357 | | - * concurrent streams in the future. |
---|
2358 | | - */ |
---|
2359 | | - dev_priv->perf.oa.pollin = false; |
---|
2360 | | - } |
---|
| 3059 | + if (ret != -ENOSPC) |
---|
| 3060 | + stream->pollin = false; |
---|
2361 | 3061 | |
---|
2362 | | - return ret; |
---|
| 3062 | + /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, ... */ |
---|
| 3063 | + return offset ?: (ret ?: -EAGAIN); |
---|
2363 | 3064 | } |
---|
2364 | 3065 | |
---|
2365 | 3066 | static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) |
---|
2366 | 3067 | { |
---|
2367 | | - struct drm_i915_private *dev_priv = |
---|
2368 | | - container_of(hrtimer, typeof(*dev_priv), |
---|
2369 | | - perf.oa.poll_check_timer); |
---|
| 3068 | + struct i915_perf_stream *stream = |
---|
| 3069 | + container_of(hrtimer, typeof(*stream), poll_check_timer); |
---|
2370 | 3070 | |
---|
2371 | | - if (oa_buffer_check_unlocked(dev_priv)) { |
---|
2372 | | - dev_priv->perf.oa.pollin = true; |
---|
2373 | | - wake_up(&dev_priv->perf.oa.poll_wq); |
---|
| 3071 | + if (oa_buffer_check_unlocked(stream)) { |
---|
| 3072 | + stream->pollin = true; |
---|
| 3073 | + wake_up(&stream->poll_wq); |
---|
2374 | 3074 | } |
---|
2375 | 3075 | |
---|
2376 | | - hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD)); |
---|
| 3076 | + hrtimer_forward_now(hrtimer, |
---|
| 3077 | + ns_to_ktime(stream->poll_oa_period)); |
---|
2377 | 3078 | |
---|
2378 | 3079 | return HRTIMER_RESTART; |
---|
2379 | 3080 | } |
---|
2380 | 3081 | |
---|
2381 | 3082 | /** |
---|
2382 | 3083 | * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream |
---|
2383 | | - * @dev_priv: i915 device instance |
---|
2384 | 3084 | * @stream: An i915 perf stream |
---|
2385 | 3085 | * @file: An i915 perf stream file |
---|
2386 | 3086 | * @wait: poll() state table |
---|
.. | .. |
---|
2389 | 3089 | * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that |
---|
2390 | 3090 | * will be woken for new stream data. |
---|
2391 | 3091 | * |
---|
2392 | | - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize |
---|
| 3092 | + * Note: The &perf->lock mutex has been taken to serialize |
---|
2393 | 3093 | * with any non-file-operation driver hooks. |
---|
2394 | 3094 | * |
---|
2395 | 3095 | * Returns: any poll events that are ready without sleeping |
---|
2396 | 3096 | */ |
---|
2397 | | -static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, |
---|
2398 | | - struct i915_perf_stream *stream, |
---|
2399 | | - struct file *file, |
---|
2400 | | - poll_table *wait) |
---|
| 3097 | +static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream, |
---|
| 3098 | + struct file *file, |
---|
| 3099 | + poll_table *wait) |
---|
2401 | 3100 | { |
---|
2402 | 3101 | __poll_t events = 0; |
---|
2403 | 3102 | |
---|
.. | .. |
---|
2409 | 3108 | * the hrtimer/oa_poll_check_timer_cb to notify us when there are |
---|
2410 | 3109 | * samples to read. |
---|
2411 | 3110 | */ |
---|
2412 | | - if (dev_priv->perf.oa.pollin) |
---|
| 3111 | + if (stream->pollin) |
---|
2413 | 3112 | events |= EPOLLIN; |
---|
2414 | 3113 | |
---|
2415 | 3114 | return events; |
---|
.. | .. |
---|
2431 | 3130 | static __poll_t i915_perf_poll(struct file *file, poll_table *wait) |
---|
2432 | 3131 | { |
---|
2433 | 3132 | struct i915_perf_stream *stream = file->private_data; |
---|
2434 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
| 3133 | + struct i915_perf *perf = stream->perf; |
---|
2435 | 3134 | __poll_t ret; |
---|
2436 | 3135 | |
---|
2437 | | - mutex_lock(&dev_priv->perf.lock); |
---|
2438 | | - ret = i915_perf_poll_locked(dev_priv, stream, file, wait); |
---|
2439 | | - mutex_unlock(&dev_priv->perf.lock); |
---|
| 3136 | + mutex_lock(&perf->lock); |
---|
| 3137 | + ret = i915_perf_poll_locked(stream, file, wait); |
---|
| 3138 | + mutex_unlock(&perf->lock); |
---|
2440 | 3139 | |
---|
2441 | 3140 | return ret; |
---|
2442 | 3141 | } |
---|
.. | .. |
---|
2461 | 3160 | |
---|
2462 | 3161 | if (stream->ops->enable) |
---|
2463 | 3162 | stream->ops->enable(stream); |
---|
| 3163 | + |
---|
| 3164 | + if (stream->hold_preemption) |
---|
| 3165 | + intel_context_set_nopreempt(stream->pinned_ctx); |
---|
2464 | 3166 | } |
---|
2465 | 3167 | |
---|
2466 | 3168 | /** |
---|
.. | .. |
---|
2485 | 3187 | /* Allow stream->ops->disable() to refer to this */ |
---|
2486 | 3188 | stream->enabled = false; |
---|
2487 | 3189 | |
---|
| 3190 | + if (stream->hold_preemption) |
---|
| 3191 | + intel_context_clear_nopreempt(stream->pinned_ctx); |
---|
| 3192 | + |
---|
2488 | 3193 | if (stream->ops->disable) |
---|
2489 | 3194 | stream->ops->disable(stream); |
---|
| 3195 | +} |
---|
| 3196 | + |
---|
| 3197 | +static long i915_perf_config_locked(struct i915_perf_stream *stream, |
---|
| 3198 | + unsigned long metrics_set) |
---|
| 3199 | +{ |
---|
| 3200 | + struct i915_oa_config *config; |
---|
| 3201 | + long ret = stream->oa_config->id; |
---|
| 3202 | + |
---|
| 3203 | + config = i915_perf_get_oa_config(stream->perf, metrics_set); |
---|
| 3204 | + if (!config) |
---|
| 3205 | + return -EINVAL; |
---|
| 3206 | + |
---|
| 3207 | + if (config != stream->oa_config) { |
---|
| 3208 | + int err; |
---|
| 3209 | + |
---|
| 3210 | + /* |
---|
| 3211 | + * If OA is bound to a specific context, emit the |
---|
| 3212 | + * reconfiguration inline from that context. The update |
---|
| 3213 | + * will then be ordered with respect to submission on that |
---|
| 3214 | + * context. |
---|
| 3215 | + * |
---|
| 3216 | + * When set globally, we use a low priority kernel context, |
---|
| 3217 | + * so it will effectively take effect when idle. |
---|
| 3218 | + */ |
---|
| 3219 | + err = emit_oa_config(stream, config, oa_context(stream), NULL); |
---|
| 3220 | + if (!err) |
---|
| 3221 | + config = xchg(&stream->oa_config, config); |
---|
| 3222 | + else |
---|
| 3223 | + ret = err; |
---|
| 3224 | + } |
---|
| 3225 | + |
---|
| 3226 | + i915_oa_config_put(config); |
---|
| 3227 | + |
---|
| 3228 | + return ret; |
---|
2490 | 3229 | } |
---|
2491 | 3230 | |
---|
2492 | 3231 | /** |
---|
.. | .. |
---|
2495 | 3234 | * @cmd: the ioctl request |
---|
2496 | 3235 | * @arg: the ioctl data |
---|
2497 | 3236 | * |
---|
2498 | | - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize |
---|
| 3237 | + * Note: The &perf->lock mutex has been taken to serialize |
---|
2499 | 3238 | * with any non-file-operation driver hooks. |
---|
2500 | 3239 | * |
---|
2501 | 3240 | * Returns: zero on success or a negative error code. Returns -EINVAL for |
---|
.. | .. |
---|
2512 | 3251 | case I915_PERF_IOCTL_DISABLE: |
---|
2513 | 3252 | i915_perf_disable_locked(stream); |
---|
2514 | 3253 | return 0; |
---|
| 3254 | + case I915_PERF_IOCTL_CONFIG: |
---|
| 3255 | + return i915_perf_config_locked(stream, arg); |
---|
2515 | 3256 | } |
---|
2516 | 3257 | |
---|
2517 | 3258 | return -EINVAL; |
---|
.. | .. |
---|
2533 | 3274 | unsigned long arg) |
---|
2534 | 3275 | { |
---|
2535 | 3276 | struct i915_perf_stream *stream = file->private_data; |
---|
2536 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
| 3277 | + struct i915_perf *perf = stream->perf; |
---|
2537 | 3278 | long ret; |
---|
2538 | 3279 | |
---|
2539 | | - mutex_lock(&dev_priv->perf.lock); |
---|
| 3280 | + mutex_lock(&perf->lock); |
---|
2540 | 3281 | ret = i915_perf_ioctl_locked(stream, cmd, arg); |
---|
2541 | | - mutex_unlock(&dev_priv->perf.lock); |
---|
| 3282 | + mutex_unlock(&perf->lock); |
---|
2542 | 3283 | |
---|
2543 | 3284 | return ret; |
---|
2544 | 3285 | } |
---|
.. | .. |
---|
2550 | 3291 | * Frees all resources associated with the given i915 perf @stream, disabling |
---|
2551 | 3292 | * any associated data capture in the process. |
---|
2552 | 3293 | * |
---|
2553 | | - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize |
---|
| 3294 | + * Note: The &perf->lock mutex has been taken to serialize |
---|
2554 | 3295 | * with any non-file-operation driver hooks. |
---|
2555 | 3296 | */ |
---|
2556 | 3297 | static void i915_perf_destroy_locked(struct i915_perf_stream *stream) |
---|
.. | .. |
---|
2560 | 3301 | |
---|
2561 | 3302 | if (stream->ops->destroy) |
---|
2562 | 3303 | stream->ops->destroy(stream); |
---|
2563 | | - |
---|
2564 | | - list_del(&stream->link); |
---|
2565 | 3304 | |
---|
2566 | 3305 | if (stream->ctx) |
---|
2567 | 3306 | i915_gem_context_put(stream->ctx); |
---|
.. | .. |
---|
2583 | 3322 | static int i915_perf_release(struct inode *inode, struct file *file) |
---|
2584 | 3323 | { |
---|
2585 | 3324 | struct i915_perf_stream *stream = file->private_data; |
---|
2586 | | - struct drm_i915_private *dev_priv = stream->dev_priv; |
---|
| 3325 | + struct i915_perf *perf = stream->perf; |
---|
2587 | 3326 | |
---|
2588 | | - mutex_lock(&dev_priv->perf.lock); |
---|
| 3327 | + mutex_lock(&perf->lock); |
---|
2589 | 3328 | i915_perf_destroy_locked(stream); |
---|
2590 | | - mutex_unlock(&dev_priv->perf.lock); |
---|
| 3329 | + mutex_unlock(&perf->lock); |
---|
| 3330 | + |
---|
| 3331 | + /* Release the reference the perf stream kept on the driver. */ |
---|
| 3332 | + drm_dev_put(&perf->i915->drm); |
---|
2591 | 3333 | |
---|
2592 | 3334 | return 0; |
---|
2593 | 3335 | } |
---|
.. | .. |
---|
2609 | 3351 | |
---|
2610 | 3352 | /** |
---|
2611 | 3353 | * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD |
---|
2612 | | - * @dev_priv: i915 device instance |
---|
| 3354 | + * @perf: i915 perf instance |
---|
2613 | 3355 | * @param: The open parameters passed to 'DRM_I915_PERF_OPEN` |
---|
2614 | 3356 | * @props: individually validated u64 property value pairs |
---|
2615 | 3357 | * @file: drm file |
---|
.. | .. |
---|
2617 | 3359 | * See i915_perf_ioctl_open() for interface details. |
---|
2618 | 3360 | * |
---|
2619 | 3361 | * Implements further stream config validation and stream initialization on |
---|
2620 | | - * behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex |
---|
| 3362 | + * behalf of i915_perf_open_ioctl() with the &perf->lock mutex |
---|
2621 | 3363 | * taken to serialize with any non-file-operation driver hooks. |
---|
2622 | 3364 | * |
---|
2623 | 3365 | * Note: at this point the @props have only been validated in isolation and |
---|
.. | .. |
---|
2632 | 3374 | * Returns: zero on success or a negative error code. |
---|
2633 | 3375 | */ |
---|
2634 | 3376 | static int |
---|
2635 | | -i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, |
---|
| 3377 | +i915_perf_open_ioctl_locked(struct i915_perf *perf, |
---|
2636 | 3378 | struct drm_i915_perf_open_param *param, |
---|
2637 | 3379 | struct perf_open_properties *props, |
---|
2638 | 3380 | struct drm_file *file) |
---|
.. | .. |
---|
2663 | 3405 | * rest of the system, which we consider acceptable for a |
---|
2664 | 3406 | * non-privileged client. |
---|
2665 | 3407 | * |
---|
2666 | | - * For Gen8+ the OA unit no longer supports clock gating off for a |
---|
| 3408 | + * For Gen8->11 the OA unit no longer supports clock gating off for a |
---|
2667 | 3409 | * specific context and the kernel can't securely stop the counters |
---|
2668 | 3410 | * from updating as system-wide / global values. Even though we can |
---|
2669 | 3411 | * filter reports based on the included context ID we can't block |
---|
2670 | 3412 | * clients from seeing the raw / global counter values via |
---|
2671 | 3413 | * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to |
---|
2672 | 3414 | * enable the OA unit by default. |
---|
| 3415 | + * |
---|
| 3416 | + * For Gen12+ we gain a new OAR unit that only monitors the RCS on a |
---|
| 3417 | + * per context basis. So we can relax requirements there if the user |
---|
| 3418 | + * doesn't request global stream access (i.e. query based sampling |
---|
| 3419 | + * using MI_RECORD_PERF_COUNT. |
---|
2673 | 3420 | */ |
---|
2674 | | - if (IS_HASWELL(dev_priv) && specific_ctx) |
---|
| 3421 | + if (IS_HASWELL(perf->i915) && specific_ctx) |
---|
2675 | 3422 | privileged_op = false; |
---|
| 3423 | + else if (IS_GEN(perf->i915, 12) && specific_ctx && |
---|
| 3424 | + (props->sample_flags & SAMPLE_OA_REPORT) == 0) |
---|
| 3425 | + privileged_op = false; |
---|
| 3426 | + |
---|
| 3427 | + if (props->hold_preemption) { |
---|
| 3428 | + if (!props->single_context) { |
---|
| 3429 | + DRM_DEBUG("preemption disable with no context\n"); |
---|
| 3430 | + ret = -EINVAL; |
---|
| 3431 | + goto err; |
---|
| 3432 | + } |
---|
| 3433 | + privileged_op = true; |
---|
| 3434 | + } |
---|
| 3435 | + |
---|
| 3436 | + /* |
---|
| 3437 | + * Asking for SSEU configuration is a priviliged operation. |
---|
| 3438 | + */ |
---|
| 3439 | + if (props->has_sseu) |
---|
| 3440 | + privileged_op = true; |
---|
| 3441 | + else |
---|
| 3442 | + get_default_sseu_config(&props->sseu, props->engine); |
---|
2676 | 3443 | |
---|
2677 | 3444 | /* Similar to perf's kernel.perf_paranoid_cpu sysctl option |
---|
2678 | 3445 | * we check a dev.i915.perf_stream_paranoid sysctl option |
---|
2679 | 3446 | * to determine if it's ok to access system wide OA counters |
---|
2680 | | - * without CAP_SYS_ADMIN privileges. |
---|
| 3447 | + * without CAP_PERFMON or CAP_SYS_ADMIN privileges. |
---|
2681 | 3448 | */ |
---|
2682 | 3449 | if (privileged_op && |
---|
2683 | | - i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { |
---|
2684 | | - DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n"); |
---|
| 3450 | + i915_perf_stream_paranoid && !perfmon_capable()) { |
---|
| 3451 | + DRM_DEBUG("Insufficient privileges to open i915 perf stream\n"); |
---|
2685 | 3452 | ret = -EACCES; |
---|
2686 | 3453 | goto err_ctx; |
---|
2687 | 3454 | } |
---|
.. | .. |
---|
2692 | 3459 | goto err_ctx; |
---|
2693 | 3460 | } |
---|
2694 | 3461 | |
---|
2695 | | - stream->dev_priv = dev_priv; |
---|
| 3462 | + stream->perf = perf; |
---|
2696 | 3463 | stream->ctx = specific_ctx; |
---|
| 3464 | + stream->poll_oa_period = props->poll_oa_period; |
---|
2697 | 3465 | |
---|
2698 | 3466 | ret = i915_oa_stream_init(stream, param, props); |
---|
2699 | 3467 | if (ret) |
---|
.. | .. |
---|
2708 | 3476 | goto err_flags; |
---|
2709 | 3477 | } |
---|
2710 | 3478 | |
---|
2711 | | - list_add(&stream->link, &dev_priv->perf.streams); |
---|
2712 | | - |
---|
2713 | 3479 | if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) |
---|
2714 | 3480 | f_flags |= O_CLOEXEC; |
---|
2715 | 3481 | if (param->flags & I915_PERF_FLAG_FD_NONBLOCK) |
---|
.. | .. |
---|
2718 | 3484 | stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags); |
---|
2719 | 3485 | if (stream_fd < 0) { |
---|
2720 | 3486 | ret = stream_fd; |
---|
2721 | | - goto err_open; |
---|
| 3487 | + goto err_flags; |
---|
2722 | 3488 | } |
---|
2723 | 3489 | |
---|
2724 | 3490 | if (!(param->flags & I915_PERF_FLAG_DISABLED)) |
---|
2725 | 3491 | i915_perf_enable_locked(stream); |
---|
2726 | 3492 | |
---|
| 3493 | + /* Take a reference on the driver that will be kept with stream_fd |
---|
| 3494 | + * until its release. |
---|
| 3495 | + */ |
---|
| 3496 | + drm_dev_get(&perf->i915->drm); |
---|
| 3497 | + |
---|
2727 | 3498 | return stream_fd; |
---|
2728 | 3499 | |
---|
2729 | | -err_open: |
---|
2730 | | - list_del(&stream->link); |
---|
2731 | 3500 | err_flags: |
---|
2732 | 3501 | if (stream->ops->destroy) |
---|
2733 | 3502 | stream->ops->destroy(stream); |
---|
.. | .. |
---|
2740 | 3509 | return ret; |
---|
2741 | 3510 | } |
---|
2742 | 3511 | |
---|
2743 | | -static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) |
---|
| 3512 | +static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) |
---|
2744 | 3513 | { |
---|
2745 | | - return div64_u64(1000000000ULL * (2ULL << exponent), |
---|
2746 | | - 1000ULL * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz); |
---|
| 3514 | + return i915_cs_timestamp_ticks_to_ns(perf->i915, 2ULL << exponent); |
---|
2747 | 3515 | } |
---|
2748 | 3516 | |
---|
2749 | 3517 | /** |
---|
2750 | 3518 | * read_properties_unlocked - validate + copy userspace stream open properties |
---|
2751 | | - * @dev_priv: i915 device instance |
---|
| 3519 | + * @perf: i915 perf instance |
---|
2752 | 3520 | * @uprops: The array of u64 key value pairs given by userspace |
---|
2753 | 3521 | * @n_props: The number of key value pairs expected in @uprops |
---|
2754 | 3522 | * @props: The stream configuration built up while validating properties |
---|
.. | .. |
---|
2761 | 3529 | * we shouldn't validate or assume anything about ordering here. This doesn't |
---|
2762 | 3530 | * rule out defining new properties with ordering requirements in the future. |
---|
2763 | 3531 | */ |
---|
2764 | | -static int read_properties_unlocked(struct drm_i915_private *dev_priv, |
---|
| 3532 | +static int read_properties_unlocked(struct i915_perf *perf, |
---|
2765 | 3533 | u64 __user *uprops, |
---|
2766 | 3534 | u32 n_props, |
---|
2767 | 3535 | struct perf_open_properties *props) |
---|
2768 | 3536 | { |
---|
2769 | 3537 | u64 __user *uprop = uprops; |
---|
2770 | 3538 | u32 i; |
---|
| 3539 | + int ret; |
---|
2771 | 3540 | |
---|
2772 | 3541 | memset(props, 0, sizeof(struct perf_open_properties)); |
---|
| 3542 | + props->poll_oa_period = DEFAULT_POLL_PERIOD_NS; |
---|
2773 | 3543 | |
---|
2774 | 3544 | if (!n_props) { |
---|
2775 | 3545 | DRM_DEBUG("No i915 perf properties given\n"); |
---|
| 3546 | + return -EINVAL; |
---|
| 3547 | + } |
---|
| 3548 | + |
---|
| 3549 | + /* At the moment we only support using i915-perf on the RCS. */ |
---|
| 3550 | + props->engine = intel_engine_lookup_user(perf->i915, |
---|
| 3551 | + I915_ENGINE_CLASS_RENDER, |
---|
| 3552 | + 0); |
---|
| 3553 | + if (!props->engine) { |
---|
| 3554 | + DRM_DEBUG("No RENDER-capable engines\n"); |
---|
2776 | 3555 | return -EINVAL; |
---|
2777 | 3556 | } |
---|
2778 | 3557 | |
---|
.. | .. |
---|
2790 | 3569 | for (i = 0; i < n_props; i++) { |
---|
2791 | 3570 | u64 oa_period, oa_freq_hz; |
---|
2792 | 3571 | u64 id, value; |
---|
2793 | | - int ret; |
---|
2794 | 3572 | |
---|
2795 | 3573 | ret = get_user(id, uprop); |
---|
2796 | 3574 | if (ret) |
---|
.. | .. |
---|
2827 | 3605 | value); |
---|
2828 | 3606 | return -EINVAL; |
---|
2829 | 3607 | } |
---|
2830 | | - if (!dev_priv->perf.oa.oa_formats[value].size) { |
---|
| 3608 | + if (!perf->oa_formats[value].size) { |
---|
2831 | 3609 | DRM_DEBUG("Unsupported OA report format %llu\n", |
---|
2832 | 3610 | value); |
---|
2833 | 3611 | return -EINVAL; |
---|
.. | .. |
---|
2848 | 3626 | */ |
---|
2849 | 3627 | |
---|
2850 | 3628 | BUILD_BUG_ON(sizeof(oa_period) != 8); |
---|
2851 | | - oa_period = oa_exponent_to_ns(dev_priv, value); |
---|
| 3629 | + oa_period = oa_exponent_to_ns(perf, value); |
---|
2852 | 3630 | |
---|
2853 | 3631 | /* This check is primarily to ensure that oa_period <= |
---|
2854 | 3632 | * UINT32_MAX (before passing to do_div which only |
---|
.. | .. |
---|
2863 | 3641 | } else |
---|
2864 | 3642 | oa_freq_hz = 0; |
---|
2865 | 3643 | |
---|
2866 | | - if (oa_freq_hz > i915_oa_max_sample_rate && |
---|
2867 | | - !capable(CAP_SYS_ADMIN)) { |
---|
2868 | | - DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n", |
---|
| 3644 | + if (oa_freq_hz > i915_oa_max_sample_rate && !perfmon_capable()) { |
---|
| 3645 | + DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n", |
---|
2869 | 3646 | i915_oa_max_sample_rate); |
---|
2870 | 3647 | return -EACCES; |
---|
2871 | 3648 | } |
---|
2872 | 3649 | |
---|
2873 | 3650 | props->oa_periodic = true; |
---|
2874 | 3651 | props->oa_period_exponent = value; |
---|
| 3652 | + break; |
---|
| 3653 | + case DRM_I915_PERF_PROP_HOLD_PREEMPTION: |
---|
| 3654 | + props->hold_preemption = !!value; |
---|
| 3655 | + break; |
---|
| 3656 | + case DRM_I915_PERF_PROP_GLOBAL_SSEU: { |
---|
| 3657 | + struct drm_i915_gem_context_param_sseu user_sseu; |
---|
| 3658 | + |
---|
| 3659 | + if (copy_from_user(&user_sseu, |
---|
| 3660 | + u64_to_user_ptr(value), |
---|
| 3661 | + sizeof(user_sseu))) { |
---|
| 3662 | + DRM_DEBUG("Unable to copy global sseu parameter\n"); |
---|
| 3663 | + return -EFAULT; |
---|
| 3664 | + } |
---|
| 3665 | + |
---|
| 3666 | + ret = get_sseu_config(&props->sseu, props->engine, &user_sseu); |
---|
| 3667 | + if (ret) { |
---|
| 3668 | + DRM_DEBUG("Invalid SSEU configuration\n"); |
---|
| 3669 | + return ret; |
---|
| 3670 | + } |
---|
| 3671 | + props->has_sseu = true; |
---|
| 3672 | + break; |
---|
| 3673 | + } |
---|
| 3674 | + case DRM_I915_PERF_PROP_POLL_OA_PERIOD: |
---|
| 3675 | + if (value < 100000 /* 100us */) { |
---|
| 3676 | + DRM_DEBUG("OA availability timer too small (%lluns < 100us)\n", |
---|
| 3677 | + value); |
---|
| 3678 | + return -EINVAL; |
---|
| 3679 | + } |
---|
| 3680 | + props->poll_oa_period = value; |
---|
2875 | 3681 | break; |
---|
2876 | 3682 | case DRM_I915_PERF_PROP_MAX: |
---|
2877 | 3683 | MISSING_CASE(id); |
---|
.. | .. |
---|
2899 | 3705 | * buffered data written by the GPU besides periodic OA metrics. |
---|
2900 | 3706 | * |
---|
2901 | 3707 | * Note we copy the properties from userspace outside of the i915 perf |
---|
2902 | | - * mutex to avoid an awkward lockdep with mmap_sem. |
---|
| 3708 | + * mutex to avoid an awkward lockdep with mmap_lock. |
---|
2903 | 3709 | * |
---|
2904 | 3710 | * Most of the implementation details are handled by |
---|
2905 | | - * i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock |
---|
| 3711 | + * i915_perf_open_ioctl_locked() after taking the &perf->lock |
---|
2906 | 3712 | * mutex for serializing with any non-file-operation driver hooks. |
---|
2907 | 3713 | * |
---|
2908 | 3714 | * Return: A newly opened i915 Perf stream file descriptor or negative |
---|
.. | .. |
---|
2911 | 3717 | int i915_perf_open_ioctl(struct drm_device *dev, void *data, |
---|
2912 | 3718 | struct drm_file *file) |
---|
2913 | 3719 | { |
---|
2914 | | - struct drm_i915_private *dev_priv = dev->dev_private; |
---|
| 3720 | + struct i915_perf *perf = &to_i915(dev)->perf; |
---|
2915 | 3721 | struct drm_i915_perf_open_param *param = data; |
---|
2916 | 3722 | struct perf_open_properties props; |
---|
2917 | 3723 | u32 known_open_flags; |
---|
2918 | 3724 | int ret; |
---|
2919 | 3725 | |
---|
2920 | | - if (!dev_priv->perf.initialized) { |
---|
| 3726 | + if (!perf->i915) { |
---|
2921 | 3727 | DRM_DEBUG("i915 perf interface not available for this system\n"); |
---|
2922 | 3728 | return -ENOTSUPP; |
---|
2923 | 3729 | } |
---|
.. | .. |
---|
2930 | 3736 | return -EINVAL; |
---|
2931 | 3737 | } |
---|
2932 | 3738 | |
---|
2933 | | - ret = read_properties_unlocked(dev_priv, |
---|
| 3739 | + ret = read_properties_unlocked(perf, |
---|
2934 | 3740 | u64_to_user_ptr(param->properties_ptr), |
---|
2935 | 3741 | param->num_properties, |
---|
2936 | 3742 | &props); |
---|
2937 | 3743 | if (ret) |
---|
2938 | 3744 | return ret; |
---|
2939 | 3745 | |
---|
2940 | | - mutex_lock(&dev_priv->perf.lock); |
---|
2941 | | - ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file); |
---|
2942 | | - mutex_unlock(&dev_priv->perf.lock); |
---|
| 3746 | + mutex_lock(&perf->lock); |
---|
| 3747 | + ret = i915_perf_open_ioctl_locked(perf, param, &props, file); |
---|
| 3748 | + mutex_unlock(&perf->lock); |
---|
2943 | 3749 | |
---|
2944 | 3750 | return ret; |
---|
2945 | 3751 | } |
---|
2946 | 3752 | |
---|
2947 | 3753 | /** |
---|
2948 | 3754 | * i915_perf_register - exposes i915-perf to userspace |
---|
2949 | | - * @dev_priv: i915 device instance |
---|
| 3755 | + * @i915: i915 device instance |
---|
2950 | 3756 | * |
---|
2951 | 3757 | * In particular OA metric sets are advertised under a sysfs metrics/ |
---|
2952 | 3758 | * directory allowing userspace to enumerate valid IDs that can be |
---|
2953 | 3759 | * used to open an i915-perf stream. |
---|
2954 | 3760 | */ |
---|
2955 | | -void i915_perf_register(struct drm_i915_private *dev_priv) |
---|
| 3761 | +void i915_perf_register(struct drm_i915_private *i915) |
---|
2956 | 3762 | { |
---|
2957 | | - int ret; |
---|
| 3763 | + struct i915_perf *perf = &i915->perf; |
---|
2958 | 3764 | |
---|
2959 | | - if (!dev_priv->perf.initialized) |
---|
| 3765 | + if (!perf->i915) |
---|
2960 | 3766 | return; |
---|
2961 | 3767 | |
---|
2962 | 3768 | /* To be sure we're synchronized with an attempted |
---|
2963 | 3769 | * i915_perf_open_ioctl(); considering that we register after |
---|
2964 | 3770 | * being exposed to userspace. |
---|
2965 | 3771 | */ |
---|
2966 | | - mutex_lock(&dev_priv->perf.lock); |
---|
| 3772 | + mutex_lock(&perf->lock); |
---|
2967 | 3773 | |
---|
2968 | | - dev_priv->perf.metrics_kobj = |
---|
| 3774 | + perf->metrics_kobj = |
---|
2969 | 3775 | kobject_create_and_add("metrics", |
---|
2970 | | - &dev_priv->drm.primary->kdev->kobj); |
---|
2971 | | - if (!dev_priv->perf.metrics_kobj) |
---|
2972 | | - goto exit; |
---|
| 3776 | + &i915->drm.primary->kdev->kobj); |
---|
2973 | 3777 | |
---|
2974 | | - sysfs_attr_init(&dev_priv->perf.oa.test_config.sysfs_metric_id.attr); |
---|
2975 | | - |
---|
2976 | | - if (IS_HASWELL(dev_priv)) { |
---|
2977 | | - i915_perf_load_test_config_hsw(dev_priv); |
---|
2978 | | - } else if (IS_BROADWELL(dev_priv)) { |
---|
2979 | | - i915_perf_load_test_config_bdw(dev_priv); |
---|
2980 | | - } else if (IS_CHERRYVIEW(dev_priv)) { |
---|
2981 | | - i915_perf_load_test_config_chv(dev_priv); |
---|
2982 | | - } else if (IS_SKYLAKE(dev_priv)) { |
---|
2983 | | - if (IS_SKL_GT2(dev_priv)) |
---|
2984 | | - i915_perf_load_test_config_sklgt2(dev_priv); |
---|
2985 | | - else if (IS_SKL_GT3(dev_priv)) |
---|
2986 | | - i915_perf_load_test_config_sklgt3(dev_priv); |
---|
2987 | | - else if (IS_SKL_GT4(dev_priv)) |
---|
2988 | | - i915_perf_load_test_config_sklgt4(dev_priv); |
---|
2989 | | - } else if (IS_BROXTON(dev_priv)) { |
---|
2990 | | - i915_perf_load_test_config_bxt(dev_priv); |
---|
2991 | | - } else if (IS_KABYLAKE(dev_priv)) { |
---|
2992 | | - if (IS_KBL_GT2(dev_priv)) |
---|
2993 | | - i915_perf_load_test_config_kblgt2(dev_priv); |
---|
2994 | | - else if (IS_KBL_GT3(dev_priv)) |
---|
2995 | | - i915_perf_load_test_config_kblgt3(dev_priv); |
---|
2996 | | - } else if (IS_GEMINILAKE(dev_priv)) { |
---|
2997 | | - i915_perf_load_test_config_glk(dev_priv); |
---|
2998 | | - } else if (IS_COFFEELAKE(dev_priv)) { |
---|
2999 | | - if (IS_CFL_GT2(dev_priv)) |
---|
3000 | | - i915_perf_load_test_config_cflgt2(dev_priv); |
---|
3001 | | - if (IS_CFL_GT3(dev_priv)) |
---|
3002 | | - i915_perf_load_test_config_cflgt3(dev_priv); |
---|
3003 | | - } else if (IS_CANNONLAKE(dev_priv)) { |
---|
3004 | | - i915_perf_load_test_config_cnl(dev_priv); |
---|
3005 | | - } else if (IS_ICELAKE(dev_priv)) { |
---|
3006 | | - i915_perf_load_test_config_icl(dev_priv); |
---|
3007 | | - } |
---|
3008 | | - |
---|
3009 | | - if (dev_priv->perf.oa.test_config.id == 0) |
---|
3010 | | - goto sysfs_error; |
---|
3011 | | - |
---|
3012 | | - ret = sysfs_create_group(dev_priv->perf.metrics_kobj, |
---|
3013 | | - &dev_priv->perf.oa.test_config.sysfs_metric); |
---|
3014 | | - if (ret) |
---|
3015 | | - goto sysfs_error; |
---|
3016 | | - |
---|
3017 | | - atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1); |
---|
3018 | | - |
---|
3019 | | - goto exit; |
---|
3020 | | - |
---|
3021 | | -sysfs_error: |
---|
3022 | | - kobject_put(dev_priv->perf.metrics_kobj); |
---|
3023 | | - dev_priv->perf.metrics_kobj = NULL; |
---|
3024 | | - |
---|
3025 | | -exit: |
---|
3026 | | - mutex_unlock(&dev_priv->perf.lock); |
---|
| 3778 | + mutex_unlock(&perf->lock); |
---|
3027 | 3779 | } |
---|
3028 | 3780 | |
---|
3029 | 3781 | /** |
---|
3030 | 3782 | * i915_perf_unregister - hide i915-perf from userspace |
---|
3031 | | - * @dev_priv: i915 device instance |
---|
| 3783 | + * @i915: i915 device instance |
---|
3032 | 3784 | * |
---|
3033 | 3785 | * i915-perf state cleanup is split up into an 'unregister' and |
---|
3034 | 3786 | * 'deinit' phase where the interface is first hidden from |
---|
3035 | 3787 | * userspace by i915_perf_unregister() before cleaning up |
---|
3036 | 3788 | * remaining state in i915_perf_fini(). |
---|
3037 | 3789 | */ |
---|
3038 | | -void i915_perf_unregister(struct drm_i915_private *dev_priv) |
---|
| 3790 | +void i915_perf_unregister(struct drm_i915_private *i915) |
---|
3039 | 3791 | { |
---|
3040 | | - if (!dev_priv->perf.metrics_kobj) |
---|
| 3792 | + struct i915_perf *perf = &i915->perf; |
---|
| 3793 | + |
---|
| 3794 | + if (!perf->metrics_kobj) |
---|
3041 | 3795 | return; |
---|
3042 | 3796 | |
---|
3043 | | - sysfs_remove_group(dev_priv->perf.metrics_kobj, |
---|
3044 | | - &dev_priv->perf.oa.test_config.sysfs_metric); |
---|
3045 | | - |
---|
3046 | | - kobject_put(dev_priv->perf.metrics_kobj); |
---|
3047 | | - dev_priv->perf.metrics_kobj = NULL; |
---|
| 3797 | + kobject_put(perf->metrics_kobj); |
---|
| 3798 | + perf->metrics_kobj = NULL; |
---|
3048 | 3799 | } |
---|
3049 | 3800 | |
---|
3050 | | -static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) |
---|
| 3801 | +static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr) |
---|
3051 | 3802 | { |
---|
3052 | 3803 | static const i915_reg_t flex_eu_regs[] = { |
---|
3053 | 3804 | EU_PERF_CNTL0, |
---|
.. | .. |
---|
3067 | 3818 | return false; |
---|
3068 | 3819 | } |
---|
3069 | 3820 | |
---|
3070 | | -static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr) |
---|
| 3821 | +#define ADDR_IN_RANGE(addr, start, end) \ |
---|
| 3822 | + ((addr) >= (start) && \ |
---|
| 3823 | + (addr) <= (end)) |
---|
| 3824 | + |
---|
| 3825 | +#define REG_IN_RANGE(addr, start, end) \ |
---|
| 3826 | + ((addr) >= i915_mmio_reg_offset(start) && \ |
---|
| 3827 | + (addr) <= i915_mmio_reg_offset(end)) |
---|
| 3828 | + |
---|
| 3829 | +#define REG_EQUAL(addr, mmio) \ |
---|
| 3830 | + ((addr) == i915_mmio_reg_offset(mmio)) |
---|
| 3831 | + |
---|
| 3832 | +static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) |
---|
3071 | 3833 | { |
---|
3072 | | - return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && |
---|
3073 | | - addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || |
---|
3074 | | - (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) && |
---|
3075 | | - addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) || |
---|
3076 | | - (addr >= i915_mmio_reg_offset(OACEC0_0) && |
---|
3077 | | - addr <= i915_mmio_reg_offset(OACEC7_1)); |
---|
| 3834 | + return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8) || |
---|
| 3835 | + REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8) || |
---|
| 3836 | + REG_IN_RANGE(addr, OACEC0_0, OACEC7_1); |
---|
3078 | 3837 | } |
---|
3079 | 3838 | |
---|
3080 | | -static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) |
---|
| 3839 | +static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr) |
---|
3081 | 3840 | { |
---|
3082 | | - return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || |
---|
3083 | | - (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && |
---|
3084 | | - addr <= i915_mmio_reg_offset(NOA_WRITE)) || |
---|
3085 | | - (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) && |
---|
3086 | | - addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) || |
---|
3087 | | - (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) && |
---|
3088 | | - addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); |
---|
| 3841 | + return REG_EQUAL(addr, HALF_SLICE_CHICKEN2) || |
---|
| 3842 | + REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE) || |
---|
| 3843 | + REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI) || |
---|
| 3844 | + REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI); |
---|
3089 | 3845 | } |
---|
3090 | 3846 | |
---|
3091 | | -static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) |
---|
| 3847 | +static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr) |
---|
3092 | 3848 | { |
---|
3093 | | - return gen7_is_valid_mux_addr(dev_priv, addr) || |
---|
3094 | | - addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || |
---|
3095 | | - (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && |
---|
3096 | | - addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); |
---|
| 3849 | + return gen7_is_valid_mux_addr(perf, addr) || |
---|
| 3850 | + REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || |
---|
| 3851 | + REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8)); |
---|
3097 | 3852 | } |
---|
3098 | 3853 | |
---|
3099 | | -static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) |
---|
| 3854 | +static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr) |
---|
3100 | 3855 | { |
---|
3101 | | - return gen8_is_valid_mux_addr(dev_priv, addr) || |
---|
3102 | | - (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && |
---|
3103 | | - addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); |
---|
| 3856 | + return gen8_is_valid_mux_addr(perf, addr) || |
---|
| 3857 | + REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || |
---|
| 3858 | + REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI); |
---|
3104 | 3859 | } |
---|
3105 | 3860 | |
---|
3106 | | -static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) |
---|
| 3861 | +static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr) |
---|
3107 | 3862 | { |
---|
3108 | | - return gen7_is_valid_mux_addr(dev_priv, addr) || |
---|
3109 | | - (addr >= 0x25100 && addr <= 0x2FF90) || |
---|
3110 | | - (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && |
---|
3111 | | - addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || |
---|
3112 | | - addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); |
---|
| 3863 | + return gen7_is_valid_mux_addr(perf, addr) || |
---|
| 3864 | + ADDR_IN_RANGE(addr, 0x25100, 0x2FF90) || |
---|
| 3865 | + REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9) || |
---|
| 3866 | + REG_EQUAL(addr, HSW_MBVID2_MISR0); |
---|
3113 | 3867 | } |
---|
3114 | 3868 | |
---|
3115 | | -static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) |
---|
| 3869 | +static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) |
---|
3116 | 3870 | { |
---|
3117 | | - return gen7_is_valid_mux_addr(dev_priv, addr) || |
---|
3118 | | - (addr >= 0x182300 && addr <= 0x1823A4); |
---|
| 3871 | + return gen7_is_valid_mux_addr(perf, addr) || |
---|
| 3872 | + ADDR_IN_RANGE(addr, 0x182300, 0x1823A4); |
---|
3119 | 3873 | } |
---|
3120 | 3874 | |
---|
3121 | | -static uint32_t mask_reg_value(u32 reg, u32 val) |
---|
| 3875 | +static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) |
---|
| 3876 | +{ |
---|
| 3877 | + return REG_IN_RANGE(addr, GEN12_OAG_OASTARTTRIG1, GEN12_OAG_OASTARTTRIG8) || |
---|
| 3878 | + REG_IN_RANGE(addr, GEN12_OAG_OAREPORTTRIG1, GEN12_OAG_OAREPORTTRIG8) || |
---|
| 3879 | + REG_IN_RANGE(addr, GEN12_OAG_CEC0_0, GEN12_OAG_CEC7_1) || |
---|
| 3880 | + REG_IN_RANGE(addr, GEN12_OAG_SCEC0_0, GEN12_OAG_SCEC7_1) || |
---|
| 3881 | + REG_EQUAL(addr, GEN12_OAA_DBG_REG) || |
---|
| 3882 | + REG_EQUAL(addr, GEN12_OAG_OA_PESS) || |
---|
| 3883 | + REG_EQUAL(addr, GEN12_OAG_SPCTR_CNF); |
---|
| 3884 | +} |
---|
| 3885 | + |
---|
| 3886 | +static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) |
---|
| 3887 | +{ |
---|
| 3888 | + return REG_EQUAL(addr, NOA_WRITE) || |
---|
| 3889 | + REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || |
---|
| 3890 | + REG_EQUAL(addr, GDT_CHICKEN_BITS) || |
---|
| 3891 | + REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || |
---|
| 3892 | + REG_EQUAL(addr, RPM_CONFIG0) || |
---|
| 3893 | + REG_EQUAL(addr, RPM_CONFIG1) || |
---|
| 3894 | + REG_IN_RANGE(addr, NOA_CONFIG(0), NOA_CONFIG(8)); |
---|
| 3895 | +} |
---|
| 3896 | + |
---|
| 3897 | +static u32 mask_reg_value(u32 reg, u32 val) |
---|
3122 | 3898 | { |
---|
3123 | 3899 | /* HALF_SLICE_CHICKEN2 is programmed with a the |
---|
3124 | 3900 | * WaDisableSTUnitPowerOptimization workaround. Make sure the value |
---|
3125 | 3901 | * programmed by userspace doesn't change this. |
---|
3126 | 3902 | */ |
---|
3127 | | - if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg) |
---|
| 3903 | + if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2)) |
---|
3128 | 3904 | val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); |
---|
3129 | 3905 | |
---|
3130 | 3906 | /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function |
---|
3131 | 3907 | * indicated by its name and a bunch of selection fields used by OA |
---|
3132 | 3908 | * configs. |
---|
3133 | 3909 | */ |
---|
3134 | | - if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg) |
---|
| 3910 | + if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT)) |
---|
3135 | 3911 | val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); |
---|
3136 | 3912 | |
---|
3137 | 3913 | return val; |
---|
3138 | 3914 | } |
---|
3139 | 3915 | |
---|
3140 | | -static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv, |
---|
3141 | | - bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr), |
---|
| 3916 | +static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf, |
---|
| 3917 | + bool (*is_valid)(struct i915_perf *perf, u32 addr), |
---|
3142 | 3918 | u32 __user *regs, |
---|
3143 | 3919 | u32 n_regs) |
---|
3144 | 3920 | { |
---|
.. | .. |
---|
3148 | 3924 | |
---|
3149 | 3925 | if (!n_regs) |
---|
3150 | 3926 | return NULL; |
---|
3151 | | - |
---|
3152 | | - if (!access_ok(VERIFY_READ, regs, n_regs * sizeof(u32) * 2)) |
---|
3153 | | - return ERR_PTR(-EFAULT); |
---|
3154 | 3927 | |
---|
3155 | 3928 | /* No is_valid function means we're not allowing any register to be programmed. */ |
---|
3156 | 3929 | GEM_BUG_ON(!is_valid); |
---|
.. | .. |
---|
3168 | 3941 | if (err) |
---|
3169 | 3942 | goto addr_err; |
---|
3170 | 3943 | |
---|
3171 | | - if (!is_valid(dev_priv, addr)) { |
---|
| 3944 | + if (!is_valid(perf, addr)) { |
---|
3172 | 3945 | DRM_DEBUG("Invalid oa_reg address: %X\n", addr); |
---|
3173 | 3946 | err = -EINVAL; |
---|
3174 | 3947 | goto addr_err; |
---|
.. | .. |
---|
3191 | 3964 | return ERR_PTR(err); |
---|
3192 | 3965 | } |
---|
3193 | 3966 | |
---|
3194 | | -static ssize_t show_dynamic_id(struct device *dev, |
---|
3195 | | - struct device_attribute *attr, |
---|
| 3967 | +static ssize_t show_dynamic_id(struct kobject *kobj, |
---|
| 3968 | + struct kobj_attribute *attr, |
---|
3196 | 3969 | char *buf) |
---|
3197 | 3970 | { |
---|
3198 | 3971 | struct i915_oa_config *oa_config = |
---|
.. | .. |
---|
3201 | 3974 | return sprintf(buf, "%d\n", oa_config->id); |
---|
3202 | 3975 | } |
---|
3203 | 3976 | |
---|
3204 | | -static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, |
---|
| 3977 | +static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf, |
---|
3205 | 3978 | struct i915_oa_config *oa_config) |
---|
3206 | 3979 | { |
---|
3207 | 3980 | sysfs_attr_init(&oa_config->sysfs_metric_id.attr); |
---|
.. | .. |
---|
3216 | 3989 | oa_config->sysfs_metric.name = oa_config->uuid; |
---|
3217 | 3990 | oa_config->sysfs_metric.attrs = oa_config->attrs; |
---|
3218 | 3991 | |
---|
3219 | | - return sysfs_create_group(dev_priv->perf.metrics_kobj, |
---|
| 3992 | + return sysfs_create_group(perf->metrics_kobj, |
---|
3220 | 3993 | &oa_config->sysfs_metric); |
---|
3221 | 3994 | } |
---|
3222 | 3995 | |
---|
.. | .. |
---|
3236 | 4009 | int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, |
---|
3237 | 4010 | struct drm_file *file) |
---|
3238 | 4011 | { |
---|
3239 | | - struct drm_i915_private *dev_priv = dev->dev_private; |
---|
| 4012 | + struct i915_perf *perf = &to_i915(dev)->perf; |
---|
3240 | 4013 | struct drm_i915_perf_oa_config *args = data; |
---|
3241 | 4014 | struct i915_oa_config *oa_config, *tmp; |
---|
| 4015 | + struct i915_oa_reg *regs; |
---|
3242 | 4016 | int err, id; |
---|
3243 | 4017 | |
---|
3244 | | - if (!dev_priv->perf.initialized) { |
---|
| 4018 | + if (!perf->i915) { |
---|
3245 | 4019 | DRM_DEBUG("i915 perf interface not available for this system\n"); |
---|
3246 | 4020 | return -ENOTSUPP; |
---|
3247 | 4021 | } |
---|
3248 | 4022 | |
---|
3249 | | - if (!dev_priv->perf.metrics_kobj) { |
---|
| 4023 | + if (!perf->metrics_kobj) { |
---|
3250 | 4024 | DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); |
---|
3251 | 4025 | return -EINVAL; |
---|
3252 | 4026 | } |
---|
3253 | 4027 | |
---|
3254 | | - if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { |
---|
| 4028 | + if (i915_perf_stream_paranoid && !perfmon_capable()) { |
---|
3255 | 4029 | DRM_DEBUG("Insufficient privileges to add i915 OA config\n"); |
---|
3256 | 4030 | return -EACCES; |
---|
3257 | 4031 | } |
---|
.. | .. |
---|
3269 | 4043 | return -ENOMEM; |
---|
3270 | 4044 | } |
---|
3271 | 4045 | |
---|
3272 | | - atomic_set(&oa_config->ref_count, 1); |
---|
| 4046 | + oa_config->perf = perf; |
---|
| 4047 | + kref_init(&oa_config->ref); |
---|
3273 | 4048 | |
---|
3274 | 4049 | if (!uuid_is_valid(args->uuid)) { |
---|
3275 | 4050 | DRM_DEBUG("Invalid uuid format for OA config\n"); |
---|
.. | .. |
---|
3283 | 4058 | memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid)); |
---|
3284 | 4059 | |
---|
3285 | 4060 | oa_config->mux_regs_len = args->n_mux_regs; |
---|
3286 | | - oa_config->mux_regs = |
---|
3287 | | - alloc_oa_regs(dev_priv, |
---|
3288 | | - dev_priv->perf.oa.ops.is_valid_mux_reg, |
---|
3289 | | - u64_to_user_ptr(args->mux_regs_ptr), |
---|
3290 | | - args->n_mux_regs); |
---|
| 4061 | + regs = alloc_oa_regs(perf, |
---|
| 4062 | + perf->ops.is_valid_mux_reg, |
---|
| 4063 | + u64_to_user_ptr(args->mux_regs_ptr), |
---|
| 4064 | + args->n_mux_regs); |
---|
3291 | 4065 | |
---|
3292 | | - if (IS_ERR(oa_config->mux_regs)) { |
---|
| 4066 | + if (IS_ERR(regs)) { |
---|
3293 | 4067 | DRM_DEBUG("Failed to create OA config for mux_regs\n"); |
---|
3294 | | - err = PTR_ERR(oa_config->mux_regs); |
---|
| 4068 | + err = PTR_ERR(regs); |
---|
3295 | 4069 | goto reg_err; |
---|
3296 | 4070 | } |
---|
| 4071 | + oa_config->mux_regs = regs; |
---|
3297 | 4072 | |
---|
3298 | 4073 | oa_config->b_counter_regs_len = args->n_boolean_regs; |
---|
3299 | | - oa_config->b_counter_regs = |
---|
3300 | | - alloc_oa_regs(dev_priv, |
---|
3301 | | - dev_priv->perf.oa.ops.is_valid_b_counter_reg, |
---|
3302 | | - u64_to_user_ptr(args->boolean_regs_ptr), |
---|
3303 | | - args->n_boolean_regs); |
---|
| 4074 | + regs = alloc_oa_regs(perf, |
---|
| 4075 | + perf->ops.is_valid_b_counter_reg, |
---|
| 4076 | + u64_to_user_ptr(args->boolean_regs_ptr), |
---|
| 4077 | + args->n_boolean_regs); |
---|
3304 | 4078 | |
---|
3305 | | - if (IS_ERR(oa_config->b_counter_regs)) { |
---|
| 4079 | + if (IS_ERR(regs)) { |
---|
3306 | 4080 | DRM_DEBUG("Failed to create OA config for b_counter_regs\n"); |
---|
3307 | | - err = PTR_ERR(oa_config->b_counter_regs); |
---|
| 4081 | + err = PTR_ERR(regs); |
---|
3308 | 4082 | goto reg_err; |
---|
3309 | 4083 | } |
---|
| 4084 | + oa_config->b_counter_regs = regs; |
---|
3310 | 4085 | |
---|
3311 | | - if (INTEL_GEN(dev_priv) < 8) { |
---|
| 4086 | + if (INTEL_GEN(perf->i915) < 8) { |
---|
3312 | 4087 | if (args->n_flex_regs != 0) { |
---|
3313 | 4088 | err = -EINVAL; |
---|
3314 | 4089 | goto reg_err; |
---|
3315 | 4090 | } |
---|
3316 | 4091 | } else { |
---|
3317 | 4092 | oa_config->flex_regs_len = args->n_flex_regs; |
---|
3318 | | - oa_config->flex_regs = |
---|
3319 | | - alloc_oa_regs(dev_priv, |
---|
3320 | | - dev_priv->perf.oa.ops.is_valid_flex_reg, |
---|
3321 | | - u64_to_user_ptr(args->flex_regs_ptr), |
---|
3322 | | - args->n_flex_regs); |
---|
| 4093 | + regs = alloc_oa_regs(perf, |
---|
| 4094 | + perf->ops.is_valid_flex_reg, |
---|
| 4095 | + u64_to_user_ptr(args->flex_regs_ptr), |
---|
| 4096 | + args->n_flex_regs); |
---|
3323 | 4097 | |
---|
3324 | | - if (IS_ERR(oa_config->flex_regs)) { |
---|
| 4098 | + if (IS_ERR(regs)) { |
---|
3325 | 4099 | DRM_DEBUG("Failed to create OA config for flex_regs\n"); |
---|
3326 | | - err = PTR_ERR(oa_config->flex_regs); |
---|
| 4100 | + err = PTR_ERR(regs); |
---|
3327 | 4101 | goto reg_err; |
---|
3328 | 4102 | } |
---|
| 4103 | + oa_config->flex_regs = regs; |
---|
3329 | 4104 | } |
---|
3330 | 4105 | |
---|
3331 | | - err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); |
---|
| 4106 | + err = mutex_lock_interruptible(&perf->metrics_lock); |
---|
3332 | 4107 | if (err) |
---|
3333 | 4108 | goto reg_err; |
---|
3334 | 4109 | |
---|
3335 | 4110 | /* We shouldn't have too many configs, so this iteration shouldn't be |
---|
3336 | 4111 | * too costly. |
---|
3337 | 4112 | */ |
---|
3338 | | - idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) { |
---|
| 4113 | + idr_for_each_entry(&perf->metrics_idr, tmp, id) { |
---|
3339 | 4114 | if (!strcmp(tmp->uuid, oa_config->uuid)) { |
---|
3340 | 4115 | DRM_DEBUG("OA config already exists with this uuid\n"); |
---|
3341 | 4116 | err = -EADDRINUSE; |
---|
.. | .. |
---|
3343 | 4118 | } |
---|
3344 | 4119 | } |
---|
3345 | 4120 | |
---|
3346 | | - err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config); |
---|
| 4121 | + err = create_dynamic_oa_sysfs_entry(perf, oa_config); |
---|
3347 | 4122 | if (err) { |
---|
3348 | 4123 | DRM_DEBUG("Failed to create sysfs entry for OA config\n"); |
---|
3349 | 4124 | goto sysfs_err; |
---|
3350 | 4125 | } |
---|
3351 | 4126 | |
---|
3352 | 4127 | /* Config id 0 is invalid, id 1 for kernel stored test config. */ |
---|
3353 | | - oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr, |
---|
| 4128 | + oa_config->id = idr_alloc(&perf->metrics_idr, |
---|
3354 | 4129 | oa_config, 2, |
---|
3355 | 4130 | 0, GFP_KERNEL); |
---|
3356 | 4131 | if (oa_config->id < 0) { |
---|
.. | .. |
---|
3359 | 4134 | goto sysfs_err; |
---|
3360 | 4135 | } |
---|
3361 | 4136 | |
---|
3362 | | - mutex_unlock(&dev_priv->perf.metrics_lock); |
---|
| 4137 | + mutex_unlock(&perf->metrics_lock); |
---|
3363 | 4138 | |
---|
3364 | 4139 | DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id); |
---|
3365 | 4140 | |
---|
3366 | 4141 | return oa_config->id; |
---|
3367 | 4142 | |
---|
3368 | 4143 | sysfs_err: |
---|
3369 | | - mutex_unlock(&dev_priv->perf.metrics_lock); |
---|
| 4144 | + mutex_unlock(&perf->metrics_lock); |
---|
3370 | 4145 | reg_err: |
---|
3371 | | - put_oa_config(dev_priv, oa_config); |
---|
| 4146 | + i915_oa_config_put(oa_config); |
---|
3372 | 4147 | DRM_DEBUG("Failed to add new OA config\n"); |
---|
3373 | 4148 | return err; |
---|
3374 | 4149 | } |
---|
.. | .. |
---|
3387 | 4162 | int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, |
---|
3388 | 4163 | struct drm_file *file) |
---|
3389 | 4164 | { |
---|
3390 | | - struct drm_i915_private *dev_priv = dev->dev_private; |
---|
| 4165 | + struct i915_perf *perf = &to_i915(dev)->perf; |
---|
3391 | 4166 | u64 *arg = data; |
---|
3392 | 4167 | struct i915_oa_config *oa_config; |
---|
3393 | 4168 | int ret; |
---|
3394 | 4169 | |
---|
3395 | | - if (!dev_priv->perf.initialized) { |
---|
| 4170 | + if (!perf->i915) { |
---|
3396 | 4171 | DRM_DEBUG("i915 perf interface not available for this system\n"); |
---|
3397 | 4172 | return -ENOTSUPP; |
---|
3398 | 4173 | } |
---|
3399 | 4174 | |
---|
3400 | | - if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { |
---|
| 4175 | + if (i915_perf_stream_paranoid && !perfmon_capable()) { |
---|
3401 | 4176 | DRM_DEBUG("Insufficient privileges to remove i915 OA config\n"); |
---|
3402 | 4177 | return -EACCES; |
---|
3403 | 4178 | } |
---|
3404 | 4179 | |
---|
3405 | | - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); |
---|
| 4180 | + ret = mutex_lock_interruptible(&perf->metrics_lock); |
---|
3406 | 4181 | if (ret) |
---|
3407 | | - goto lock_err; |
---|
| 4182 | + return ret; |
---|
3408 | 4183 | |
---|
3409 | | - oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg); |
---|
| 4184 | + oa_config = idr_find(&perf->metrics_idr, *arg); |
---|
3410 | 4185 | if (!oa_config) { |
---|
3411 | 4186 | DRM_DEBUG("Failed to remove unknown OA config\n"); |
---|
3412 | 4187 | ret = -ENOENT; |
---|
3413 | | - goto config_err; |
---|
| 4188 | + goto err_unlock; |
---|
3414 | 4189 | } |
---|
3415 | 4190 | |
---|
3416 | 4191 | GEM_BUG_ON(*arg != oa_config->id); |
---|
3417 | 4192 | |
---|
3418 | | - sysfs_remove_group(dev_priv->perf.metrics_kobj, |
---|
3419 | | - &oa_config->sysfs_metric); |
---|
| 4193 | + sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric); |
---|
3420 | 4194 | |
---|
3421 | | - idr_remove(&dev_priv->perf.metrics_idr, *arg); |
---|
| 4195 | + idr_remove(&perf->metrics_idr, *arg); |
---|
| 4196 | + |
---|
| 4197 | + mutex_unlock(&perf->metrics_lock); |
---|
3422 | 4198 | |
---|
3423 | 4199 | DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); |
---|
3424 | 4200 | |
---|
3425 | | - put_oa_config(dev_priv, oa_config); |
---|
| 4201 | + i915_oa_config_put(oa_config); |
---|
3426 | 4202 | |
---|
3427 | | -config_err: |
---|
3428 | | - mutex_unlock(&dev_priv->perf.metrics_lock); |
---|
3429 | | -lock_err: |
---|
| 4203 | + return 0; |
---|
| 4204 | + |
---|
| 4205 | +err_unlock: |
---|
| 4206 | + mutex_unlock(&perf->metrics_lock); |
---|
3430 | 4207 | return ret; |
---|
3431 | 4208 | } |
---|
3432 | 4209 | |
---|
.. | .. |
---|
3437 | 4214 | .maxlen = sizeof(i915_perf_stream_paranoid), |
---|
3438 | 4215 | .mode = 0644, |
---|
3439 | 4216 | .proc_handler = proc_dointvec_minmax, |
---|
3440 | | - .extra1 = &zero, |
---|
3441 | | - .extra2 = &one, |
---|
| 4217 | + .extra1 = SYSCTL_ZERO, |
---|
| 4218 | + .extra2 = SYSCTL_ONE, |
---|
3442 | 4219 | }, |
---|
3443 | 4220 | { |
---|
3444 | 4221 | .procname = "oa_max_sample_rate", |
---|
.. | .. |
---|
3446 | 4223 | .maxlen = sizeof(i915_oa_max_sample_rate), |
---|
3447 | 4224 | .mode = 0644, |
---|
3448 | 4225 | .proc_handler = proc_dointvec_minmax, |
---|
3449 | | - .extra1 = &zero, |
---|
| 4226 | + .extra1 = SYSCTL_ZERO, |
---|
3450 | 4227 | .extra2 = &oa_sample_rate_hard_limit, |
---|
3451 | 4228 | }, |
---|
3452 | 4229 | {} |
---|
.. | .. |
---|
3473 | 4250 | }; |
---|
3474 | 4251 | |
---|
3475 | 4252 | /** |
---|
3476 | | - * i915_perf_init - initialize i915-perf state on module load |
---|
3477 | | - * @dev_priv: i915 device instance |
---|
| 4253 | + * i915_perf_init - initialize i915-perf state on module bind |
---|
| 4254 | + * @i915: i915 device instance |
---|
3478 | 4255 | * |
---|
3479 | 4256 | * Initializes i915-perf state without exposing anything to userspace. |
---|
3480 | 4257 | * |
---|
3481 | 4258 | * Note: i915-perf initialization is split into an 'init' and 'register' |
---|
3482 | 4259 | * phase with the i915_perf_register() exposing state to userspace. |
---|
3483 | 4260 | */ |
---|
3484 | | -void i915_perf_init(struct drm_i915_private *dev_priv) |
---|
| 4261 | +void i915_perf_init(struct drm_i915_private *i915) |
---|
3485 | 4262 | { |
---|
3486 | | - if (IS_HASWELL(dev_priv)) { |
---|
3487 | | - dev_priv->perf.oa.ops.is_valid_b_counter_reg = |
---|
3488 | | - gen7_is_valid_b_counter_addr; |
---|
3489 | | - dev_priv->perf.oa.ops.is_valid_mux_reg = |
---|
3490 | | - hsw_is_valid_mux_addr; |
---|
3491 | | - dev_priv->perf.oa.ops.is_valid_flex_reg = NULL; |
---|
3492 | | - dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer; |
---|
3493 | | - dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set; |
---|
3494 | | - dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set; |
---|
3495 | | - dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable; |
---|
3496 | | - dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable; |
---|
3497 | | - dev_priv->perf.oa.ops.read = gen7_oa_read; |
---|
3498 | | - dev_priv->perf.oa.ops.oa_hw_tail_read = |
---|
3499 | | - gen7_oa_hw_tail_read; |
---|
| 4263 | + struct i915_perf *perf = &i915->perf; |
---|
3500 | 4264 | |
---|
3501 | | - dev_priv->perf.oa.oa_formats = hsw_oa_formats; |
---|
3502 | | - } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { |
---|
| 4265 | + /* XXX const struct i915_perf_ops! */ |
---|
| 4266 | + |
---|
| 4267 | + if (IS_HASWELL(i915)) { |
---|
| 4268 | + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; |
---|
| 4269 | + perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr; |
---|
| 4270 | + perf->ops.is_valid_flex_reg = NULL; |
---|
| 4271 | + perf->ops.enable_metric_set = hsw_enable_metric_set; |
---|
| 4272 | + perf->ops.disable_metric_set = hsw_disable_metric_set; |
---|
| 4273 | + perf->ops.oa_enable = gen7_oa_enable; |
---|
| 4274 | + perf->ops.oa_disable = gen7_oa_disable; |
---|
| 4275 | + perf->ops.read = gen7_oa_read; |
---|
| 4276 | + perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read; |
---|
| 4277 | + |
---|
| 4278 | + perf->oa_formats = hsw_oa_formats; |
---|
| 4279 | + } else if (HAS_LOGICAL_RING_CONTEXTS(i915)) { |
---|
3503 | 4280 | /* Note: that although we could theoretically also support the |
---|
3504 | 4281 | * legacy ringbuffer mode on BDW (and earlier iterations of |
---|
3505 | 4282 | * this driver, before upstreaming did this) it didn't seem |
---|
3506 | 4283 | * worth the complexity to maintain now that BDW+ enable |
---|
3507 | 4284 | * execlist mode by default. |
---|
3508 | 4285 | */ |
---|
3509 | | - dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; |
---|
| 4286 | + perf->ops.read = gen8_oa_read; |
---|
3510 | 4287 | |
---|
3511 | | - dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer; |
---|
3512 | | - dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable; |
---|
3513 | | - dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable; |
---|
3514 | | - dev_priv->perf.oa.ops.read = gen8_oa_read; |
---|
3515 | | - dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; |
---|
| 4288 | + if (IS_GEN_RANGE(i915, 8, 9)) { |
---|
| 4289 | + perf->oa_formats = gen8_plus_oa_formats; |
---|
3516 | 4290 | |
---|
3517 | | - if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv)) { |
---|
3518 | | - dev_priv->perf.oa.ops.is_valid_b_counter_reg = |
---|
| 4291 | + perf->ops.is_valid_b_counter_reg = |
---|
3519 | 4292 | gen7_is_valid_b_counter_addr; |
---|
3520 | | - dev_priv->perf.oa.ops.is_valid_mux_reg = |
---|
| 4293 | + perf->ops.is_valid_mux_reg = |
---|
3521 | 4294 | gen8_is_valid_mux_addr; |
---|
3522 | | - dev_priv->perf.oa.ops.is_valid_flex_reg = |
---|
| 4295 | + perf->ops.is_valid_flex_reg = |
---|
3523 | 4296 | gen8_is_valid_flex_addr; |
---|
3524 | 4297 | |
---|
3525 | | - if (IS_CHERRYVIEW(dev_priv)) { |
---|
3526 | | - dev_priv->perf.oa.ops.is_valid_mux_reg = |
---|
| 4298 | + if (IS_CHERRYVIEW(i915)) { |
---|
| 4299 | + perf->ops.is_valid_mux_reg = |
---|
3527 | 4300 | chv_is_valid_mux_addr; |
---|
3528 | 4301 | } |
---|
3529 | 4302 | |
---|
3530 | | - dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; |
---|
3531 | | - dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set; |
---|
| 4303 | + perf->ops.oa_enable = gen8_oa_enable; |
---|
| 4304 | + perf->ops.oa_disable = gen8_oa_disable; |
---|
| 4305 | + perf->ops.enable_metric_set = gen8_enable_metric_set; |
---|
| 4306 | + perf->ops.disable_metric_set = gen8_disable_metric_set; |
---|
| 4307 | + perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; |
---|
3532 | 4308 | |
---|
3533 | | - if (IS_GEN8(dev_priv)) { |
---|
3534 | | - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; |
---|
3535 | | - dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; |
---|
| 4309 | + if (IS_GEN(i915, 8)) { |
---|
| 4310 | + perf->ctx_oactxctrl_offset = 0x120; |
---|
| 4311 | + perf->ctx_flexeu0_offset = 0x2ce; |
---|
3536 | 4312 | |
---|
3537 | | - dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); |
---|
| 4313 | + perf->gen8_valid_ctx_bit = BIT(25); |
---|
3538 | 4314 | } else { |
---|
3539 | | - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; |
---|
3540 | | - dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; |
---|
| 4315 | + perf->ctx_oactxctrl_offset = 0x128; |
---|
| 4316 | + perf->ctx_flexeu0_offset = 0x3de; |
---|
3541 | 4317 | |
---|
3542 | | - dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); |
---|
| 4318 | + perf->gen8_valid_ctx_bit = BIT(16); |
---|
3543 | 4319 | } |
---|
3544 | | - } else if (IS_GEN(dev_priv, 10, 11)) { |
---|
3545 | | - dev_priv->perf.oa.ops.is_valid_b_counter_reg = |
---|
| 4320 | + } else if (IS_GEN_RANGE(i915, 10, 11)) { |
---|
| 4321 | + perf->oa_formats = gen8_plus_oa_formats; |
---|
| 4322 | + |
---|
| 4323 | + perf->ops.is_valid_b_counter_reg = |
---|
3546 | 4324 | gen7_is_valid_b_counter_addr; |
---|
3547 | | - dev_priv->perf.oa.ops.is_valid_mux_reg = |
---|
| 4325 | + perf->ops.is_valid_mux_reg = |
---|
3548 | 4326 | gen10_is_valid_mux_addr; |
---|
3549 | | - dev_priv->perf.oa.ops.is_valid_flex_reg = |
---|
| 4327 | + perf->ops.is_valid_flex_reg = |
---|
3550 | 4328 | gen8_is_valid_flex_addr; |
---|
3551 | 4329 | |
---|
3552 | | - dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; |
---|
3553 | | - dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set; |
---|
| 4330 | + perf->ops.oa_enable = gen8_oa_enable; |
---|
| 4331 | + perf->ops.oa_disable = gen8_oa_disable; |
---|
| 4332 | + perf->ops.enable_metric_set = gen8_enable_metric_set; |
---|
| 4333 | + perf->ops.disable_metric_set = gen10_disable_metric_set; |
---|
| 4334 | + perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; |
---|
3554 | 4335 | |
---|
3555 | | - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; |
---|
3556 | | - dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; |
---|
| 4336 | + if (IS_GEN(i915, 10)) { |
---|
| 4337 | + perf->ctx_oactxctrl_offset = 0x128; |
---|
| 4338 | + perf->ctx_flexeu0_offset = 0x3de; |
---|
| 4339 | + } else { |
---|
| 4340 | + perf->ctx_oactxctrl_offset = 0x124; |
---|
| 4341 | + perf->ctx_flexeu0_offset = 0x78e; |
---|
| 4342 | + } |
---|
| 4343 | + perf->gen8_valid_ctx_bit = BIT(16); |
---|
| 4344 | + } else if (IS_GEN(i915, 12)) { |
---|
| 4345 | + perf->oa_formats = gen12_oa_formats; |
---|
3557 | 4346 | |
---|
3558 | | - dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); |
---|
| 4347 | + perf->ops.is_valid_b_counter_reg = |
---|
| 4348 | + gen12_is_valid_b_counter_addr; |
---|
| 4349 | + perf->ops.is_valid_mux_reg = |
---|
| 4350 | + gen12_is_valid_mux_addr; |
---|
| 4351 | + perf->ops.is_valid_flex_reg = |
---|
| 4352 | + gen8_is_valid_flex_addr; |
---|
| 4353 | + |
---|
| 4354 | + perf->ops.oa_enable = gen12_oa_enable; |
---|
| 4355 | + perf->ops.oa_disable = gen12_oa_disable; |
---|
| 4356 | + perf->ops.enable_metric_set = gen12_enable_metric_set; |
---|
| 4357 | + perf->ops.disable_metric_set = gen12_disable_metric_set; |
---|
| 4358 | + perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read; |
---|
| 4359 | + |
---|
| 4360 | + perf->ctx_flexeu0_offset = 0; |
---|
| 4361 | + perf->ctx_oactxctrl_offset = 0x144; |
---|
3559 | 4362 | } |
---|
3560 | 4363 | } |
---|
3561 | 4364 | |
---|
3562 | | - if (dev_priv->perf.oa.ops.enable_metric_set) { |
---|
3563 | | - hrtimer_init(&dev_priv->perf.oa.poll_check_timer, |
---|
3564 | | - CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
---|
3565 | | - dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb; |
---|
3566 | | - init_waitqueue_head(&dev_priv->perf.oa.poll_wq); |
---|
| 4365 | + if (perf->ops.enable_metric_set) { |
---|
| 4366 | + mutex_init(&perf->lock); |
---|
3567 | 4367 | |
---|
3568 | | - INIT_LIST_HEAD(&dev_priv->perf.streams); |
---|
3569 | | - mutex_init(&dev_priv->perf.lock); |
---|
3570 | | - spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); |
---|
| 4368 | + oa_sample_rate_hard_limit = |
---|
| 4369 | + RUNTIME_INFO(i915)->cs_timestamp_frequency_hz / 2; |
---|
3571 | 4370 | |
---|
3572 | | - oa_sample_rate_hard_limit = 1000 * |
---|
3573 | | - (INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz / 2); |
---|
3574 | | - dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); |
---|
| 4371 | + mutex_init(&perf->metrics_lock); |
---|
| 4372 | + idr_init(&perf->metrics_idr); |
---|
3575 | 4373 | |
---|
3576 | | - mutex_init(&dev_priv->perf.metrics_lock); |
---|
3577 | | - idr_init(&dev_priv->perf.metrics_idr); |
---|
| 4374 | + /* We set up some ratelimit state to potentially throttle any |
---|
| 4375 | + * _NOTES about spurious, invalid OA reports which we don't |
---|
| 4376 | + * forward to userspace. |
---|
| 4377 | + * |
---|
| 4378 | + * We print a _NOTE about any throttling when closing the |
---|
| 4379 | + * stream instead of waiting until driver _fini which no one |
---|
| 4380 | + * would ever see. |
---|
| 4381 | + * |
---|
| 4382 | + * Using the same limiting factors as printk_ratelimit() |
---|
| 4383 | + */ |
---|
| 4384 | + ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10); |
---|
| 4385 | + /* Since we use a DRM_NOTE for spurious reports it would be |
---|
| 4386 | + * inconsistent to let __ratelimit() automatically print a |
---|
| 4387 | + * warning for throttling. |
---|
| 4388 | + */ |
---|
| 4389 | + ratelimit_set_flags(&perf->spurious_report_rs, |
---|
| 4390 | + RATELIMIT_MSG_ON_RELEASE); |
---|
3578 | 4391 | |
---|
3579 | | - dev_priv->perf.initialized = true; |
---|
| 4392 | + ratelimit_state_init(&perf->tail_pointer_race, |
---|
| 4393 | + 5 * HZ, 10); |
---|
| 4394 | + ratelimit_set_flags(&perf->tail_pointer_race, |
---|
| 4395 | + RATELIMIT_MSG_ON_RELEASE); |
---|
| 4396 | + |
---|
| 4397 | + atomic64_set(&perf->noa_programming_delay, |
---|
| 4398 | + 500 * 1000 /* 500us */); |
---|
| 4399 | + |
---|
| 4400 | + perf->i915 = i915; |
---|
3580 | 4401 | } |
---|
3581 | 4402 | } |
---|
3582 | 4403 | |
---|
3583 | 4404 | static int destroy_config(int id, void *p, void *data) |
---|
3584 | 4405 | { |
---|
3585 | | - struct drm_i915_private *dev_priv = data; |
---|
3586 | | - struct i915_oa_config *oa_config = p; |
---|
3587 | | - |
---|
3588 | | - put_oa_config(dev_priv, oa_config); |
---|
3589 | | - |
---|
| 4406 | + i915_oa_config_put(p); |
---|
3590 | 4407 | return 0; |
---|
| 4408 | +} |
---|
| 4409 | + |
---|
| 4410 | +void i915_perf_sysctl_register(void) |
---|
| 4411 | +{ |
---|
| 4412 | + sysctl_header = register_sysctl_table(dev_root); |
---|
| 4413 | +} |
---|
| 4414 | + |
---|
| 4415 | +void i915_perf_sysctl_unregister(void) |
---|
| 4416 | +{ |
---|
| 4417 | + unregister_sysctl_table(sysctl_header); |
---|
3591 | 4418 | } |
---|
3592 | 4419 | |
---|
3593 | 4420 | /** |
---|
3594 | 4421 | * i915_perf_fini - Counter part to i915_perf_init() |
---|
3595 | | - * @dev_priv: i915 device instance |
---|
| 4422 | + * @i915: i915 device instance |
---|
3596 | 4423 | */ |
---|
3597 | | -void i915_perf_fini(struct drm_i915_private *dev_priv) |
---|
| 4424 | +void i915_perf_fini(struct drm_i915_private *i915) |
---|
3598 | 4425 | { |
---|
3599 | | - if (!dev_priv->perf.initialized) |
---|
| 4426 | + struct i915_perf *perf = &i915->perf; |
---|
| 4427 | + |
---|
| 4428 | + if (!perf->i915) |
---|
3600 | 4429 | return; |
---|
3601 | 4430 | |
---|
3602 | | - idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv); |
---|
3603 | | - idr_destroy(&dev_priv->perf.metrics_idr); |
---|
| 4431 | + idr_for_each(&perf->metrics_idr, destroy_config, perf); |
---|
| 4432 | + idr_destroy(&perf->metrics_idr); |
---|
3604 | 4433 | |
---|
3605 | | - unregister_sysctl_table(dev_priv->perf.sysctl_header); |
---|
3606 | | - |
---|
3607 | | - memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops)); |
---|
3608 | | - |
---|
3609 | | - dev_priv->perf.initialized = false; |
---|
| 4434 | + memset(&perf->ops, 0, sizeof(perf->ops)); |
---|
| 4435 | + perf->i915 = NULL; |
---|
3610 | 4436 | } |
---|
| 4437 | + |
---|
| 4438 | +/** |
---|
| 4439 | + * i915_perf_ioctl_version - Version of the i915-perf subsystem |
---|
| 4440 | + * |
---|
| 4441 | + * This version number is used by userspace to detect available features. |
---|
| 4442 | + */ |
---|
| 4443 | +int i915_perf_ioctl_version(void) |
---|
| 4444 | +{ |
---|
| 4445 | + /* |
---|
| 4446 | + * 1: Initial version |
---|
| 4447 | + * I915_PERF_IOCTL_ENABLE |
---|
| 4448 | + * I915_PERF_IOCTL_DISABLE |
---|
| 4449 | + * |
---|
| 4450 | + * 2: Added runtime modification of OA config. |
---|
| 4451 | + * I915_PERF_IOCTL_CONFIG |
---|
| 4452 | + * |
---|
| 4453 | + * 3: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold |
---|
| 4454 | + * preemption on a particular context so that performance data is |
---|
| 4455 | + * accessible from a delta of MI_RPC reports without looking at the |
---|
| 4456 | + * OA buffer. |
---|
| 4457 | + * |
---|
| 4458 | + * 4: Add DRM_I915_PERF_PROP_ALLOWED_SSEU to limit what contexts can |
---|
| 4459 | + * be run for the duration of the performance recording based on |
---|
| 4460 | + * their SSEU configuration. |
---|
| 4461 | + * |
---|
| 4462 | + * 5: Add DRM_I915_PERF_PROP_POLL_OA_PERIOD parameter that controls the |
---|
| 4463 | + * interval for the hrtimer used to check for OA data. |
---|
| 4464 | + */ |
---|
| 4465 | + return 5; |
---|
| 4466 | +} |
---|
| 4467 | + |
---|
| 4468 | +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
---|
| 4469 | +#include "selftests/i915_perf.c" |
---|
| 4470 | +#endif |
---|