hc
2024-05-10 61598093bbdd283a7edc367d900f223070ead8d2
kernel/drivers/gpu/drm/i915/i915_pmu.c
....@@ -5,9 +5,18 @@
55 */
66
77 #include <linux/irq.h>
8
-#include "i915_pmu.h"
9
-#include "intel_ringbuffer.h"
8
+#include <linux/pm_runtime.h>
9
+
10
+#include "gt/intel_engine.h"
11
+#include "gt/intel_engine_pm.h"
12
+#include "gt/intel_engine_user.h"
13
+#include "gt/intel_gt_pm.h"
14
+#include "gt/intel_rc6.h"
15
+#include "gt/intel_rps.h"
16
+
1017 #include "i915_drv.h"
18
+#include "i915_pmu.h"
19
+#include "intel_pm.h"
1120
1221 /* Frequency for the sampling timer for events which need it. */
1322 #define FREQUENCY 200
....@@ -70,8 +79,9 @@
7079 return config_enabled_bit(event->attr.config);
7180 }
7281
73
-static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
82
+static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
7483 {
84
+ struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
7585 u64 enable;
7686
7787 /*
....@@ -79,7 +89,7 @@
7989 *
8090 * We start with a bitmask of all currently enabled events.
8191 */
82
- enable = i915->pmu.enable;
92
+ enable = pmu->enable;
8393
8494 /*
8595 * Mask out all the ones which do not need the timer, or in
....@@ -98,10 +108,8 @@
98108 /*
99109 * Also there is software busyness tracking available we do not
100110 * need the timer for I915_SAMPLE_BUSY counter.
101
- *
102
- * Use RCS as proxy for all engines.
103111 */
104
- else if (intel_engine_supports_stats(i915->engine[RCS]))
112
+ else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
105113 enable &= ~BIT(I915_SAMPLE_BUSY);
106114
107115 /*
....@@ -110,50 +118,151 @@
110118 return enable;
111119 }
112120
113
-void i915_pmu_gt_parked(struct drm_i915_private *i915)
121
+static u64 __get_rc6(struct intel_gt *gt)
114122 {
115
- if (!i915->pmu.base.event_init)
116
- return;
123
+ struct drm_i915_private *i915 = gt->i915;
124
+ u64 val;
117125
118
- spin_lock_irq(&i915->pmu.lock);
119
- /*
120
- * Signal sampling timer to stop if only engine events are enabled and
121
- * GPU went idle.
122
- */
123
- i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
124
- spin_unlock_irq(&i915->pmu.lock);
126
+ val = intel_rc6_residency_ns(&gt->rc6,
127
+ IS_VALLEYVIEW(i915) ?
128
+ VLV_GT_RENDER_RC6 :
129
+ GEN6_GT_GFX_RC6);
130
+
131
+ if (HAS_RC6p(i915))
132
+ val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6p);
133
+
134
+ if (HAS_RC6pp(i915))
135
+ val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6pp);
136
+
137
+ return val;
125138 }
126139
127
-static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
140
+#if IS_ENABLED(CONFIG_PM)
141
+
142
+static inline s64 ktime_since(const ktime_t kt)
128143 {
129
- if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
130
- i915->pmu.timer_enabled = true;
131
- i915->pmu.timer_last = ktime_get();
132
- hrtimer_start_range_ns(&i915->pmu.timer,
144
+ return ktime_to_ns(ktime_sub(ktime_get(), kt));
145
+}
146
+
147
+static u64 get_rc6(struct intel_gt *gt)
148
+{
149
+ struct drm_i915_private *i915 = gt->i915;
150
+ struct i915_pmu *pmu = &i915->pmu;
151
+ unsigned long flags;
152
+ bool awake = false;
153
+ u64 val;
154
+
155
+ if (intel_gt_pm_get_if_awake(gt)) {
156
+ val = __get_rc6(gt);
157
+ intel_gt_pm_put_async(gt);
158
+ awake = true;
159
+ }
160
+
161
+ spin_lock_irqsave(&pmu->lock, flags);
162
+
163
+ if (awake) {
164
+ pmu->sample[__I915_SAMPLE_RC6].cur = val;
165
+ } else {
166
+ /*
167
+ * We think we are runtime suspended.
168
+ *
169
+ * Report the delta from when the device was suspended to now,
170
+ * on top of the last known real value, as the approximated RC6
171
+ * counter value.
172
+ */
173
+ val = ktime_since(pmu->sleep_last);
174
+ val += pmu->sample[__I915_SAMPLE_RC6].cur;
175
+ }
176
+
177
+ if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
178
+ val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
179
+ else
180
+ pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
181
+
182
+ spin_unlock_irqrestore(&pmu->lock, flags);
183
+
184
+ return val;
185
+}
186
+
187
+static void init_rc6(struct i915_pmu *pmu)
188
+{
189
+ struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
190
+ intel_wakeref_t wakeref;
191
+
192
+ with_intel_runtime_pm(i915->gt.uncore->rpm, wakeref) {
193
+ pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
194
+ pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur =
195
+ pmu->sample[__I915_SAMPLE_RC6].cur;
196
+ pmu->sleep_last = ktime_get();
197
+ }
198
+}
199
+
200
+static void park_rc6(struct drm_i915_private *i915)
201
+{
202
+ struct i915_pmu *pmu = &i915->pmu;
203
+
204
+ pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
205
+ pmu->sleep_last = ktime_get();
206
+}
207
+
208
+#else
209
+
210
+static u64 get_rc6(struct intel_gt *gt)
211
+{
212
+ return __get_rc6(gt);
213
+}
214
+
215
+static void init_rc6(struct i915_pmu *pmu) { }
216
+static void park_rc6(struct drm_i915_private *i915) {}
217
+
218
+#endif
219
+
220
+static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
221
+{
222
+ if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
223
+ pmu->timer_enabled = true;
224
+ pmu->timer_last = ktime_get();
225
+ hrtimer_start_range_ns(&pmu->timer,
133226 ns_to_ktime(PERIOD), 0,
134227 HRTIMER_MODE_REL_PINNED);
135228 }
136229 }
137230
138
-void i915_pmu_gt_unparked(struct drm_i915_private *i915)
231
+void i915_pmu_gt_parked(struct drm_i915_private *i915)
139232 {
140
- if (!i915->pmu.base.event_init)
233
+ struct i915_pmu *pmu = &i915->pmu;
234
+
235
+ if (!pmu->base.event_init)
141236 return;
142237
143
- spin_lock_irq(&i915->pmu.lock);
238
+ spin_lock_irq(&pmu->lock);
239
+
240
+ park_rc6(i915);
241
+
242
+ /*
243
+ * Signal sampling timer to stop if only engine events are enabled and
244
+ * GPU went idle.
245
+ */
246
+ pmu->timer_enabled = pmu_needs_timer(pmu, false);
247
+
248
+ spin_unlock_irq(&pmu->lock);
249
+}
250
+
251
+void i915_pmu_gt_unparked(struct drm_i915_private *i915)
252
+{
253
+ struct i915_pmu *pmu = &i915->pmu;
254
+
255
+ if (!pmu->base.event_init)
256
+ return;
257
+
258
+ spin_lock_irq(&pmu->lock);
259
+
144260 /*
145261 * Re-enable sampling timer when GPU goes active.
146262 */
147
- __i915_pmu_maybe_start_timer(i915);
148
- spin_unlock_irq(&i915->pmu.lock);
149
-}
263
+ __i915_pmu_maybe_start_timer(pmu);
150264
151
-static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
152
-{
153
- if (!fw)
154
- intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
155
-
156
- return true;
265
+ spin_unlock_irq(&pmu->lock);
157266 }
158267
159268 static void
....@@ -162,55 +271,79 @@
162271 sample->cur += val;
163272 }
164273
165
-static void
166
-engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
274
+static bool exclusive_mmio_access(const struct drm_i915_private *i915)
167275 {
276
+ /*
277
+ * We have to avoid concurrent mmio cache line access on gen7 or
278
+ * risk a machine hang. For a fun history lesson dig out the old
279
+ * userspace intel_gpu_top and run it on Ivybridge or Haswell!
280
+ */
281
+ return IS_GEN(i915, 7);
282
+}
283
+
284
+static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
285
+{
286
+ struct intel_engine_pmu *pmu = &engine->pmu;
287
+ bool busy;
288
+ u32 val;
289
+
290
+ val = ENGINE_READ_FW(engine, RING_CTL);
291
+ if (val == 0) /* powerwell off => engine idle */
292
+ return;
293
+
294
+ if (val & RING_WAIT)
295
+ add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
296
+ if (val & RING_WAIT_SEMAPHORE)
297
+ add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
298
+
299
+ /* No need to sample when busy stats are supported. */
300
+ if (intel_engine_supports_stats(engine))
301
+ return;
302
+
303
+ /*
304
+ * While waiting on a semaphore or event, MI_MODE reports the
305
+ * ring as idle. However, previously using the seqno, and with
306
+ * execlists sampling, we account for the ring waiting as the
307
+ * engine being busy. Therefore, we record the sample as being
308
+ * busy if either waiting or !idle.
309
+ */
310
+ busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
311
+ if (!busy) {
312
+ val = ENGINE_READ_FW(engine, RING_MI_MODE);
313
+ busy = !(val & MODE_IDLE);
314
+ }
315
+ if (busy)
316
+ add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
317
+}
318
+
319
+static void
320
+engines_sample(struct intel_gt *gt, unsigned int period_ns)
321
+{
322
+ struct drm_i915_private *i915 = gt->i915;
168323 struct intel_engine_cs *engine;
169324 enum intel_engine_id id;
170
- bool fw = false;
325
+ unsigned long flags;
171326
172
- if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
327
+ if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
173328 return;
174329
175
- if (!dev_priv->gt.awake)
330
+ if (!intel_gt_pm_is_awake(gt))
176331 return;
177332
178
- if (!intel_runtime_pm_get_if_in_use(dev_priv))
179
- return;
333
+ for_each_engine(engine, gt, id) {
334
+ if (!intel_engine_pm_get_if_awake(engine))
335
+ continue;
180336
181
- for_each_engine(engine, dev_priv, id) {
182
- u32 current_seqno = intel_engine_get_seqno(engine);
183
- u32 last_seqno = intel_engine_last_submit(engine);
184
- u32 val;
185
-
186
- val = !i915_seqno_passed(current_seqno, last_seqno);
187
-
188
- if (val)
189
- add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
190
- period_ns);
191
-
192
- if (val && (engine->pmu.enable &
193
- (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
194
- fw = grab_forcewake(dev_priv, fw);
195
-
196
- val = I915_READ_FW(RING_CTL(engine->mmio_base));
337
+ if (exclusive_mmio_access(i915)) {
338
+ spin_lock_irqsave(&engine->uncore->lock, flags);
339
+ engine_sample(engine, period_ns);
340
+ spin_unlock_irqrestore(&engine->uncore->lock, flags);
197341 } else {
198
- val = 0;
342
+ engine_sample(engine, period_ns);
199343 }
200344
201
- if (val & RING_WAIT)
202
- add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
203
- period_ns);
204
-
205
- if (val & RING_WAIT_SEMAPHORE)
206
- add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
207
- period_ns);
345
+ intel_engine_pm_put_async(engine);
208346 }
209
-
210
- if (fw)
211
- intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
212
-
213
- intel_runtime_pm_put(dev_priv);
214347 }
215348
216349 static void
....@@ -219,48 +352,74 @@
219352 sample->cur += mul_u32_u32(val, mul);
220353 }
221354
222
-static void
223
-frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
355
+static bool frequency_sampling_enabled(struct i915_pmu *pmu)
224356 {
225
- if (dev_priv->pmu.enable &
226
- config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
357
+ return pmu->enable &
358
+ (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
359
+ config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY));
360
+}
361
+
362
+static void
363
+frequency_sample(struct intel_gt *gt, unsigned int period_ns)
364
+{
365
+ struct drm_i915_private *i915 = gt->i915;
366
+ struct intel_uncore *uncore = gt->uncore;
367
+ struct i915_pmu *pmu = &i915->pmu;
368
+ struct intel_rps *rps = &gt->rps;
369
+
370
+ if (!frequency_sampling_enabled(pmu))
371
+ return;
372
+
373
+ /* Report 0/0 (actual/requested) frequency while parked. */
374
+ if (!intel_gt_pm_get_if_awake(gt))
375
+ return;
376
+
377
+ if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
227378 u32 val;
228379
229
- val = dev_priv->gt_pm.rps.cur_freq;
230
- if (dev_priv->gt.awake &&
231
- intel_runtime_pm_get_if_in_use(dev_priv)) {
232
- val = intel_get_cagf(dev_priv,
233
- I915_READ_NOTRACE(GEN6_RPSTAT1));
234
- intel_runtime_pm_put(dev_priv);
235
- }
380
+ /*
381
+ * We take a quick peek here without using forcewake
382
+ * so that we don't perturb the system under observation
383
+ * (forcewake => !rc6 => increased power use). We expect
384
+ * that if the read fails because it is outside of the
385
+ * mmio power well, then it will return 0 -- in which
386
+ * case we assume the system is running at the intended
387
+ * frequency. Fortunately, the read should rarely fail!
388
+ */
389
+ val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
390
+ if (val)
391
+ val = intel_rps_get_cagf(rps, val);
392
+ else
393
+ val = rps->cur_freq;
236394
237
- add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
238
- intel_gpu_freq(dev_priv, val),
395
+ add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
396
+ intel_gpu_freq(rps, val), period_ns / 1000);
397
+ }
398
+
399
+ if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
400
+ add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
401
+ intel_gpu_freq(rps, rps->cur_freq),
239402 period_ns / 1000);
240403 }
241404
242
- if (dev_priv->pmu.enable &
243
- config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
244
- add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ],
245
- intel_gpu_freq(dev_priv,
246
- dev_priv->gt_pm.rps.cur_freq),
247
- period_ns / 1000);
248
- }
405
+ intel_gt_pm_put_async(gt);
249406 }
250407
251408 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
252409 {
253410 struct drm_i915_private *i915 =
254411 container_of(hrtimer, struct drm_i915_private, pmu.timer);
412
+ struct i915_pmu *pmu = &i915->pmu;
413
+ struct intel_gt *gt = &i915->gt;
255414 unsigned int period_ns;
256415 ktime_t now;
257416
258
- if (!READ_ONCE(i915->pmu.timer_enabled))
417
+ if (!READ_ONCE(pmu->timer_enabled))
259418 return HRTIMER_NORESTART;
260419
261420 now = ktime_get();
262
- period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last));
263
- i915->pmu.timer_last = now;
421
+ period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
422
+ pmu->timer_last = now;
264423
265424 /*
266425 * Strictly speaking the passed in period may not be 100% accurate for
....@@ -268,8 +427,8 @@
268427 * grabbing the forcewake. However the potential error from timer call-
269428 * back delay greatly dominates this so we keep it simple.
270429 */
271
- engines_sample(i915, period_ns);
272
- frequency_sample(i915, period_ns);
430
+ engines_sample(gt, period_ns);
431
+ frequency_sample(gt, period_ns);
273432
274433 hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
275434
....@@ -292,29 +451,12 @@
292451 return sum;
293452 }
294453
295
-static void engine_event_destroy(struct perf_event *event)
454
+static void i915_pmu_event_destroy(struct perf_event *event)
296455 {
297456 struct drm_i915_private *i915 =
298457 container_of(event->pmu, typeof(*i915), pmu.base);
299
- struct intel_engine_cs *engine;
300458
301
- engine = intel_engine_lookup_user(i915,
302
- engine_event_class(event),
303
- engine_event_instance(event));
304
- if (WARN_ON_ONCE(!engine))
305
- return;
306
-
307
- if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
308
- intel_engine_supports_stats(engine))
309
- intel_disable_engine_stats(engine);
310
-}
311
-
312
-static void i915_pmu_event_destroy(struct perf_event *event)
313
-{
314
- WARN_ON(event->parent);
315
-
316
- if (is_engine_event(event))
317
- engine_event_destroy(event);
459
+ drm_WARN_ON(&i915->drm, event->parent);
318460 }
319461
320462 static int
....@@ -344,7 +486,7 @@
344486 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
345487 /* Requires a mutex for sampling! */
346488 return -ENODEV;
347
- /* Fall-through. */
489
+ fallthrough;
348490 case I915_PMU_REQUESTED_FREQUENCY:
349491 if (INTEL_GEN(i915) < 6)
350492 return -ENODEV;
....@@ -367,23 +509,13 @@
367509 struct drm_i915_private *i915 =
368510 container_of(event->pmu, typeof(*i915), pmu.base);
369511 struct intel_engine_cs *engine;
370
- u8 sample;
371
- int ret;
372512
373513 engine = intel_engine_lookup_user(i915, engine_event_class(event),
374514 engine_event_instance(event));
375515 if (!engine)
376516 return -ENODEV;
377517
378
- sample = engine_event_sample(event);
379
- ret = engine_event_status(engine, sample);
380
- if (ret)
381
- return ret;
382
-
383
- if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
384
- ret = intel_enable_engine_stats(engine);
385
-
386
- return ret;
518
+ return engine_event_status(engine, engine_event_sample(event));
387519 }
388520
389521 static int i915_pmu_event_init(struct perf_event *event)
....@@ -422,108 +554,11 @@
422554 return 0;
423555 }
424556
425
-static u64 __get_rc6(struct drm_i915_private *i915)
426
-{
427
- u64 val;
428
-
429
- val = intel_rc6_residency_ns(i915,
430
- IS_VALLEYVIEW(i915) ?
431
- VLV_GT_RENDER_RC6 :
432
- GEN6_GT_GFX_RC6);
433
-
434
- if (HAS_RC6p(i915))
435
- val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
436
-
437
- if (HAS_RC6pp(i915))
438
- val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
439
-
440
- return val;
441
-}
442
-
443
-static u64 get_rc6(struct drm_i915_private *i915)
444
-{
445
-#if IS_ENABLED(CONFIG_PM)
446
- unsigned long flags;
447
- u64 val;
448
-
449
- if (intel_runtime_pm_get_if_in_use(i915)) {
450
- val = __get_rc6(i915);
451
- intel_runtime_pm_put(i915);
452
-
453
- /*
454
- * If we are coming back from being runtime suspended we must
455
- * be careful not to report a larger value than returned
456
- * previously.
457
- */
458
-
459
- spin_lock_irqsave(&i915->pmu.lock, flags);
460
-
461
- if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
462
- i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
463
- i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
464
- } else {
465
- val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
466
- }
467
-
468
- spin_unlock_irqrestore(&i915->pmu.lock, flags);
469
- } else {
470
- struct pci_dev *pdev = i915->drm.pdev;
471
- struct device *kdev = &pdev->dev;
472
-
473
- /*
474
- * We are runtime suspended.
475
- *
476
- * Report the delta from when the device was suspended to now,
477
- * on top of the last known real value, as the approximated RC6
478
- * counter value.
479
- */
480
- spin_lock_irqsave(&i915->pmu.lock, flags);
481
- spin_lock(&kdev->power.lock);
482
-
483
- /*
484
- * After the above branch intel_runtime_pm_get_if_in_use failed
485
- * to get the runtime PM reference we cannot assume we are in
486
- * runtime suspend since we can either: a) race with coming out
487
- * of it before we took the power.lock, or b) there are other
488
- * states than suspended which can bring us here.
489
- *
490
- * We need to double-check that we are indeed currently runtime
491
- * suspended and if not we cannot do better than report the last
492
- * known RC6 value.
493
- */
494
- if (kdev->power.runtime_status == RPM_SUSPENDED) {
495
- if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
496
- i915->pmu.suspended_jiffies_last =
497
- kdev->power.suspended_jiffies;
498
-
499
- val = kdev->power.suspended_jiffies -
500
- i915->pmu.suspended_jiffies_last;
501
- val += jiffies - kdev->power.accounting_timestamp;
502
-
503
- val = jiffies_to_nsecs(val);
504
- val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
505
-
506
- i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
507
- } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
508
- val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
509
- } else {
510
- val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
511
- }
512
-
513
- spin_unlock(&kdev->power.lock);
514
- spin_unlock_irqrestore(&i915->pmu.lock, flags);
515
- }
516
-
517
- return val;
518
-#else
519
- return __get_rc6(i915);
520
-#endif
521
-}
522
-
523557 static u64 __i915_pmu_event_read(struct perf_event *event)
524558 {
525559 struct drm_i915_private *i915 =
526560 container_of(event->pmu, typeof(*i915), pmu.base);
561
+ struct i915_pmu *pmu = &i915->pmu;
527562 u64 val = 0;
528563
529564 if (is_engine_event(event)) {
....@@ -534,11 +569,14 @@
534569 engine_event_class(event),
535570 engine_event_instance(event));
536571
537
- if (WARN_ON_ONCE(!engine)) {
572
+ if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
538573 /* Do nothing */
539574 } else if (sample == I915_SAMPLE_BUSY &&
540575 intel_engine_supports_stats(engine)) {
541
- val = ktime_to_ns(intel_engine_get_busy_time(engine));
576
+ ktime_t unused;
577
+
578
+ val = ktime_to_ns(intel_engine_get_busy_time(engine,
579
+ &unused));
542580 } else {
543581 val = engine->pmu.sample[sample].cur;
544582 }
....@@ -546,19 +584,19 @@
546584 switch (event->attr.config) {
547585 case I915_PMU_ACTUAL_FREQUENCY:
548586 val =
549
- div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
587
+ div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
550588 USEC_PER_SEC /* to MHz */);
551589 break;
552590 case I915_PMU_REQUESTED_FREQUENCY:
553591 val =
554
- div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
592
+ div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
555593 USEC_PER_SEC /* to MHz */);
556594 break;
557595 case I915_PMU_INTERRUPTS:
558596 val = count_interrupts(i915);
559597 break;
560598 case I915_PMU_RC6_RESIDENCY:
561
- val = get_rc6(i915);
599
+ val = get_rc6(&i915->gt);
562600 break;
563601 }
564602 }
....@@ -586,23 +624,26 @@
586624 struct drm_i915_private *i915 =
587625 container_of(event->pmu, typeof(*i915), pmu.base);
588626 unsigned int bit = event_enabled_bit(event);
627
+ struct i915_pmu *pmu = &i915->pmu;
589628 unsigned long flags;
590629
591
- spin_lock_irqsave(&i915->pmu.lock, flags);
630
+ spin_lock_irqsave(&pmu->lock, flags);
592631
593632 /*
594633 * Update the bitmask of enabled events and increment
595634 * the event reference counter.
596635 */
597
- GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
598
- GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
599
- i915->pmu.enable |= BIT_ULL(bit);
600
- i915->pmu.enable_count[bit]++;
636
+ BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
637
+ GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
638
+ GEM_BUG_ON(pmu->enable_count[bit] == ~0);
639
+
640
+ pmu->enable |= BIT_ULL(bit);
641
+ pmu->enable_count[bit]++;
601642
602643 /*
603644 * Start the sampling timer if needed and not already enabled.
604645 */
605
- __i915_pmu_maybe_start_timer(i915);
646
+ __i915_pmu_maybe_start_timer(pmu);
606647
607648 /*
608649 * For per-engine events the bitmask and reference counting
....@@ -615,15 +656,20 @@
615656 engine = intel_engine_lookup_user(i915,
616657 engine_event_class(event),
617658 engine_event_instance(event));
618
- GEM_BUG_ON(!engine);
619
- engine->pmu.enable |= BIT(sample);
620659
621
- GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
660
+ BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
661
+ I915_ENGINE_SAMPLE_COUNT);
662
+ BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
663
+ I915_ENGINE_SAMPLE_COUNT);
664
+ GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
665
+ GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
622666 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
667
+
668
+ engine->pmu.enable |= BIT(sample);
623669 engine->pmu.enable_count[sample]++;
624670 }
625671
626
- spin_unlock_irqrestore(&i915->pmu.lock, flags);
672
+ spin_unlock_irqrestore(&pmu->lock, flags);
627673
628674 /*
629675 * Store the current counter value so we can report the correct delta
....@@ -638,9 +684,10 @@
638684 struct drm_i915_private *i915 =
639685 container_of(event->pmu, typeof(*i915), pmu.base);
640686 unsigned int bit = event_enabled_bit(event);
687
+ struct i915_pmu *pmu = &i915->pmu;
641688 unsigned long flags;
642689
643
- spin_lock_irqsave(&i915->pmu.lock, flags);
690
+ spin_lock_irqsave(&pmu->lock, flags);
644691
645692 if (is_engine_event(event)) {
646693 u8 sample = engine_event_sample(event);
....@@ -649,9 +696,11 @@
649696 engine = intel_engine_lookup_user(i915,
650697 engine_event_class(event),
651698 engine_event_instance(event));
652
- GEM_BUG_ON(!engine);
653
- GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
699
+
700
+ GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
701
+ GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
654702 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
703
+
655704 /*
656705 * Decrement the reference count and clear the enabled
657706 * bitmask when the last listener on an event goes away.
....@@ -660,18 +709,18 @@
660709 engine->pmu.enable &= ~BIT(sample);
661710 }
662711
663
- GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
664
- GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
712
+ GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
713
+ GEM_BUG_ON(pmu->enable_count[bit] == 0);
665714 /*
666715 * Decrement the reference count and clear the enabled
667716 * bitmask when the last listener on an event goes away.
668717 */
669
- if (--i915->pmu.enable_count[bit] == 0) {
670
- i915->pmu.enable &= ~BIT_ULL(bit);
671
- i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
718
+ if (--pmu->enable_count[bit] == 0) {
719
+ pmu->enable &= ~BIT_ULL(bit);
720
+ pmu->timer_enabled &= pmu_needs_timer(pmu, true);
672721 }
673722
674
- spin_unlock_irqrestore(&i915->pmu.lock, flags);
723
+ spin_unlock_irqrestore(&pmu->lock, flags);
675724 }
676725
677726 static void i915_pmu_event_start(struct perf_event *event, int flags)
....@@ -750,11 +799,6 @@
750799 return sprintf(buf, "config=0x%lx\n", eattr->val);
751800 }
752801
753
-static struct attribute_group i915_pmu_events_attr_group = {
754
- .name = "events",
755
- /* Patch in attrs at runtime. */
756
-};
757
-
758802 static ssize_t
759803 i915_pmu_get_attr_cpumask(struct device *dev,
760804 struct device_attribute *attr,
....@@ -772,13 +816,6 @@
772816
773817 static const struct attribute_group i915_pmu_cpumask_attr_group = {
774818 .attrs = i915_cpumask_attrs,
775
-};
776
-
777
-static const struct attribute_group *i915_pmu_attr_groups[] = {
778
- &i915_pmu_format_attr_group,
779
- &i915_pmu_events_attr_group,
780
- &i915_pmu_cpumask_attr_group,
781
- NULL
782819 };
783820
784821 #define __event(__config, __name, __unit) \
....@@ -820,8 +857,9 @@
820857 }
821858
822859 static struct attribute **
823
-create_event_attributes(struct drm_i915_private *i915)
860
+create_event_attributes(struct i915_pmu *pmu)
824861 {
862
+ struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
825863 static const struct {
826864 u64 config;
827865 const char *name;
....@@ -845,7 +883,6 @@
845883 struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
846884 struct attribute **attr = NULL, **attr_iter;
847885 struct intel_engine_cs *engine;
848
- enum intel_engine_id id;
849886 unsigned int i;
850887
851888 /* Count how many counters we will be exposing. */
....@@ -854,7 +891,7 @@
854891 count++;
855892 }
856893
857
- for_each_engine(engine, i915, id) {
894
+ for_each_uabi_engine(engine, i915) {
858895 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
859896 if (!engine_event_status(engine,
860897 engine_events[i].sample))
....@@ -905,7 +942,7 @@
905942 }
906943
907944 /* Initialize supported engine counters. */
908
- for_each_engine(engine, i915, id) {
945
+ for_each_uabi_engine(engine, i915) {
909946 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
910947 char *str;
911948
....@@ -922,7 +959,7 @@
922959 i915_iter =
923960 add_i915_attr(i915_iter, str,
924961 __I915_PMU_ENGINE(engine->uabi_class,
925
- engine->instance,
962
+ engine->uabi_instance,
926963 engine_events[i].sample));
927964
928965 str = kasprintf(GFP_KERNEL, "%s-%s.unit",
....@@ -935,8 +972,8 @@
935972 }
936973 }
937974
938
- i915->pmu.i915_attr = i915_attr;
939
- i915->pmu.pmu_attr = pmu_attr;
975
+ pmu->i915_attr = i915_attr;
976
+ pmu->pmu_attr = pmu_attr;
940977
941978 return attr;
942979
....@@ -952,25 +989,25 @@
952989 return NULL;
953990 }
954991
955
-static void free_event_attributes(struct drm_i915_private *i915)
992
+static void free_event_attributes(struct i915_pmu *pmu)
956993 {
957
- struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
994
+ struct attribute **attr_iter = pmu->events_attr_group.attrs;
958995
959996 for (; *attr_iter; attr_iter++)
960997 kfree((*attr_iter)->name);
961998
962
- kfree(i915_pmu_events_attr_group.attrs);
963
- kfree(i915->pmu.i915_attr);
964
- kfree(i915->pmu.pmu_attr);
999
+ kfree(pmu->events_attr_group.attrs);
1000
+ kfree(pmu->i915_attr);
1001
+ kfree(pmu->pmu_attr);
9651002
966
- i915_pmu_events_attr_group.attrs = NULL;
967
- i915->pmu.i915_attr = NULL;
968
- i915->pmu.pmu_attr = NULL;
1003
+ pmu->events_attr_group.attrs = NULL;
1004
+ pmu->i915_attr = NULL;
1005
+ pmu->pmu_attr = NULL;
9691006 }
9701007
9711008 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
9721009 {
973
- struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
1010
+ struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
9741011
9751012 GEM_BUG_ON(!pmu->base.event_init);
9761013
....@@ -983,7 +1020,7 @@
9831020
9841021 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
9851022 {
986
- struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
1023
+ struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
9871024 unsigned int target;
9881025
9891026 GEM_BUG_ON(!pmu->base.event_init);
....@@ -1000,9 +1037,7 @@
10001037 return 0;
10011038 }
10021039
1003
-static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1004
-
1005
-static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
1040
+static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
10061041 {
10071042 enum cpuhp_state slot;
10081043 int ret;
....@@ -1015,82 +1050,135 @@
10151050 return ret;
10161051
10171052 slot = ret;
1018
- ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
1053
+ ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node);
10191054 if (ret) {
10201055 cpuhp_remove_multi_state(slot);
10211056 return ret;
10221057 }
10231058
1024
- cpuhp_slot = slot;
1059
+ pmu->cpuhp.slot = slot;
10251060 return 0;
10261061 }
10271062
1028
-static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
1063
+static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
10291064 {
1030
- WARN_ON(cpuhp_slot == CPUHP_INVALID);
1031
- WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
1032
- cpuhp_remove_multi_state(cpuhp_slot);
1065
+ struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
1066
+
1067
+ drm_WARN_ON(&i915->drm, pmu->cpuhp.slot == CPUHP_INVALID);
1068
+ drm_WARN_ON(&i915->drm, cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node));
1069
+ cpuhp_remove_multi_state(pmu->cpuhp.slot);
1070
+ pmu->cpuhp.slot = CPUHP_INVALID;
1071
+}
1072
+
1073
+static bool is_igp(struct drm_i915_private *i915)
1074
+{
1075
+ struct pci_dev *pdev = i915->drm.pdev;
1076
+
1077
+ /* IGP is 0000:00:02.0 */
1078
+ return pci_domain_nr(pdev->bus) == 0 &&
1079
+ pdev->bus->number == 0 &&
1080
+ PCI_SLOT(pdev->devfn) == 2 &&
1081
+ PCI_FUNC(pdev->devfn) == 0;
10331082 }
10341083
10351084 void i915_pmu_register(struct drm_i915_private *i915)
10361085 {
1037
- int ret;
1086
+ struct i915_pmu *pmu = &i915->pmu;
1087
+ const struct attribute_group *attr_groups[] = {
1088
+ &i915_pmu_format_attr_group,
1089
+ &pmu->events_attr_group,
1090
+ &i915_pmu_cpumask_attr_group,
1091
+ NULL
1092
+ };
1093
+
1094
+ int ret = -ENOMEM;
10381095
10391096 if (INTEL_GEN(i915) <= 2) {
1040
- DRM_INFO("PMU not supported for this GPU.");
1097
+ drm_info(&i915->drm, "PMU not supported for this GPU.");
10411098 return;
10421099 }
10431100
1044
- i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
1045
- if (!i915_pmu_events_attr_group.attrs) {
1046
- ret = -ENOMEM;
1047
- goto err;
1101
+ spin_lock_init(&pmu->lock);
1102
+ hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1103
+ pmu->timer.function = i915_sample;
1104
+ pmu->cpuhp.slot = CPUHP_INVALID;
1105
+ init_rc6(pmu);
1106
+
1107
+ if (!is_igp(i915)) {
1108
+ pmu->name = kasprintf(GFP_KERNEL,
1109
+ "i915_%s",
1110
+ dev_name(i915->drm.dev));
1111
+ if (pmu->name) {
1112
+ /* tools/perf reserves colons as special. */
1113
+ strreplace((char *)pmu->name, ':', '_');
1114
+ }
1115
+ } else {
1116
+ pmu->name = "i915";
10481117 }
1049
-
1050
- i915->pmu.base.attr_groups = i915_pmu_attr_groups;
1051
- i915->pmu.base.task_ctx_nr = perf_invalid_context;
1052
- i915->pmu.base.event_init = i915_pmu_event_init;
1053
- i915->pmu.base.add = i915_pmu_event_add;
1054
- i915->pmu.base.del = i915_pmu_event_del;
1055
- i915->pmu.base.start = i915_pmu_event_start;
1056
- i915->pmu.base.stop = i915_pmu_event_stop;
1057
- i915->pmu.base.read = i915_pmu_event_read;
1058
- i915->pmu.base.event_idx = i915_pmu_event_event_idx;
1059
-
1060
- spin_lock_init(&i915->pmu.lock);
1061
- hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1062
- i915->pmu.timer.function = i915_sample;
1063
-
1064
- ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
1065
- if (ret)
1118
+ if (!pmu->name)
10661119 goto err;
10671120
1068
- ret = i915_pmu_register_cpuhp_state(i915);
1121
+ pmu->events_attr_group.name = "events";
1122
+ pmu->events_attr_group.attrs = create_event_attributes(pmu);
1123
+ if (!pmu->events_attr_group.attrs)
1124
+ goto err_name;
1125
+
1126
+ pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
1127
+ GFP_KERNEL);
1128
+ if (!pmu->base.attr_groups)
1129
+ goto err_attr;
1130
+
1131
+ pmu->base.module = THIS_MODULE;
1132
+ pmu->base.task_ctx_nr = perf_invalid_context;
1133
+ pmu->base.event_init = i915_pmu_event_init;
1134
+ pmu->base.add = i915_pmu_event_add;
1135
+ pmu->base.del = i915_pmu_event_del;
1136
+ pmu->base.start = i915_pmu_event_start;
1137
+ pmu->base.stop = i915_pmu_event_stop;
1138
+ pmu->base.read = i915_pmu_event_read;
1139
+ pmu->base.event_idx = i915_pmu_event_event_idx;
1140
+
1141
+ ret = perf_pmu_register(&pmu->base, pmu->name, -1);
1142
+ if (ret)
1143
+ goto err_groups;
1144
+
1145
+ ret = i915_pmu_register_cpuhp_state(pmu);
10691146 if (ret)
10701147 goto err_unreg;
10711148
10721149 return;
10731150
10741151 err_unreg:
1075
- perf_pmu_unregister(&i915->pmu.base);
1152
+ perf_pmu_unregister(&pmu->base);
1153
+err_groups:
1154
+ kfree(pmu->base.attr_groups);
1155
+err_attr:
1156
+ pmu->base.event_init = NULL;
1157
+ free_event_attributes(pmu);
1158
+err_name:
1159
+ if (!is_igp(i915))
1160
+ kfree(pmu->name);
10761161 err:
1077
- i915->pmu.base.event_init = NULL;
1078
- free_event_attributes(i915);
1079
- DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
1162
+ drm_notice(&i915->drm, "Failed to register PMU!\n");
10801163 }
10811164
10821165 void i915_pmu_unregister(struct drm_i915_private *i915)
10831166 {
1084
- if (!i915->pmu.base.event_init)
1167
+ struct i915_pmu *pmu = &i915->pmu;
1168
+
1169
+ if (!pmu->base.event_init)
10851170 return;
10861171
1087
- WARN_ON(i915->pmu.enable);
1172
+ drm_WARN_ON(&i915->drm, pmu->enable);
10881173
1089
- hrtimer_cancel(&i915->pmu.timer);
1174
+ hrtimer_cancel(&pmu->timer);
10901175
1091
- i915_pmu_unregister_cpuhp_state(i915);
1176
+ i915_pmu_unregister_cpuhp_state(pmu);
10921177
1093
- perf_pmu_unregister(&i915->pmu.base);
1094
- i915->pmu.base.event_init = NULL;
1095
- free_event_attributes(i915);
1178
+ perf_pmu_unregister(&pmu->base);
1179
+ pmu->base.event_init = NULL;
1180
+ kfree(pmu->base.attr_groups);
1181
+ if (!is_igp(i915))
1182
+ kfree(pmu->name);
1183
+ free_event_attributes(pmu);
10961184 }