| .. | .. |
|---|
| 5 | 5 | */ |
|---|
| 6 | 6 | |
|---|
| 7 | 7 | #include <linux/irq.h> |
|---|
| 8 | | -#include "i915_pmu.h" |
|---|
| 9 | | -#include "intel_ringbuffer.h" |
|---|
| 8 | +#include <linux/pm_runtime.h> |
|---|
| 9 | + |
|---|
| 10 | +#include "gt/intel_engine.h" |
|---|
| 11 | +#include "gt/intel_engine_pm.h" |
|---|
| 12 | +#include "gt/intel_engine_user.h" |
|---|
| 13 | +#include "gt/intel_gt_pm.h" |
|---|
| 14 | +#include "gt/intel_rc6.h" |
|---|
| 15 | +#include "gt/intel_rps.h" |
|---|
| 16 | + |
|---|
| 10 | 17 | #include "i915_drv.h" |
|---|
| 18 | +#include "i915_pmu.h" |
|---|
| 19 | +#include "intel_pm.h" |
|---|
| 11 | 20 | |
|---|
| 12 | 21 | /* Frequency for the sampling timer for events which need it. */ |
|---|
| 13 | 22 | #define FREQUENCY 200 |
|---|
| .. | .. |
|---|
| 70 | 79 | return config_enabled_bit(event->attr.config); |
|---|
| 71 | 80 | } |
|---|
| 72 | 81 | |
|---|
| 73 | | -static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) |
|---|
| 82 | +static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) |
|---|
| 74 | 83 | { |
|---|
| 84 | + struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); |
|---|
| 75 | 85 | u64 enable; |
|---|
| 76 | 86 | |
|---|
| 77 | 87 | /* |
|---|
| .. | .. |
|---|
| 79 | 89 | * |
|---|
| 80 | 90 | * We start with a bitmask of all currently enabled events. |
|---|
| 81 | 91 | */ |
|---|
| 82 | | - enable = i915->pmu.enable; |
|---|
| 92 | + enable = pmu->enable; |
|---|
| 83 | 93 | |
|---|
| 84 | 94 | /* |
|---|
| 85 | 95 | * Mask out all the ones which do not need the timer, or in |
|---|
| .. | .. |
|---|
| 98 | 108 | /* |
|---|
| 99 | 109 | * Also there is software busyness tracking available we do not |
|---|
| 100 | 110 | * need the timer for I915_SAMPLE_BUSY counter. |
|---|
| 101 | | - * |
|---|
| 102 | | - * Use RCS as proxy for all engines. |
|---|
| 103 | 111 | */ |
|---|
| 104 | | - else if (intel_engine_supports_stats(i915->engine[RCS])) |
|---|
| 112 | + else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) |
|---|
| 105 | 113 | enable &= ~BIT(I915_SAMPLE_BUSY); |
|---|
| 106 | 114 | |
|---|
| 107 | 115 | /* |
|---|
| .. | .. |
|---|
| 110 | 118 | return enable; |
|---|
| 111 | 119 | } |
|---|
| 112 | 120 | |
|---|
| 113 | | -void i915_pmu_gt_parked(struct drm_i915_private *i915) |
|---|
| 121 | +static u64 __get_rc6(struct intel_gt *gt) |
|---|
| 114 | 122 | { |
|---|
| 115 | | - if (!i915->pmu.base.event_init) |
|---|
| 116 | | - return; |
|---|
| 123 | + struct drm_i915_private *i915 = gt->i915; |
|---|
| 124 | + u64 val; |
|---|
| 117 | 125 | |
|---|
| 118 | | - spin_lock_irq(&i915->pmu.lock); |
|---|
| 119 | | - /* |
|---|
| 120 | | - * Signal sampling timer to stop if only engine events are enabled and |
|---|
| 121 | | - * GPU went idle. |
|---|
| 122 | | - */ |
|---|
| 123 | | - i915->pmu.timer_enabled = pmu_needs_timer(i915, false); |
|---|
| 124 | | - spin_unlock_irq(&i915->pmu.lock); |
|---|
| 126 | + val = intel_rc6_residency_ns(>->rc6, |
|---|
| 127 | + IS_VALLEYVIEW(i915) ? |
|---|
| 128 | + VLV_GT_RENDER_RC6 : |
|---|
| 129 | + GEN6_GT_GFX_RC6); |
|---|
| 130 | + |
|---|
| 131 | + if (HAS_RC6p(i915)) |
|---|
| 132 | + val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6p); |
|---|
| 133 | + |
|---|
| 134 | + if (HAS_RC6pp(i915)) |
|---|
| 135 | + val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6pp); |
|---|
| 136 | + |
|---|
| 137 | + return val; |
|---|
| 125 | 138 | } |
|---|
| 126 | 139 | |
|---|
| 127 | | -static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915) |
|---|
| 140 | +#if IS_ENABLED(CONFIG_PM) |
|---|
| 141 | + |
|---|
| 142 | +static inline s64 ktime_since(const ktime_t kt) |
|---|
| 128 | 143 | { |
|---|
| 129 | | - if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) { |
|---|
| 130 | | - i915->pmu.timer_enabled = true; |
|---|
| 131 | | - i915->pmu.timer_last = ktime_get(); |
|---|
| 132 | | - hrtimer_start_range_ns(&i915->pmu.timer, |
|---|
| 144 | + return ktime_to_ns(ktime_sub(ktime_get(), kt)); |
|---|
| 145 | +} |
|---|
| 146 | + |
|---|
| 147 | +static u64 get_rc6(struct intel_gt *gt) |
|---|
| 148 | +{ |
|---|
| 149 | + struct drm_i915_private *i915 = gt->i915; |
|---|
| 150 | + struct i915_pmu *pmu = &i915->pmu; |
|---|
| 151 | + unsigned long flags; |
|---|
| 152 | + bool awake = false; |
|---|
| 153 | + u64 val; |
|---|
| 154 | + |
|---|
| 155 | + if (intel_gt_pm_get_if_awake(gt)) { |
|---|
| 156 | + val = __get_rc6(gt); |
|---|
| 157 | + intel_gt_pm_put_async(gt); |
|---|
| 158 | + awake = true; |
|---|
| 159 | + } |
|---|
| 160 | + |
|---|
| 161 | + spin_lock_irqsave(&pmu->lock, flags); |
|---|
| 162 | + |
|---|
| 163 | + if (awake) { |
|---|
| 164 | + pmu->sample[__I915_SAMPLE_RC6].cur = val; |
|---|
| 165 | + } else { |
|---|
| 166 | + /* |
|---|
| 167 | + * We think we are runtime suspended. |
|---|
| 168 | + * |
|---|
| 169 | + * Report the delta from when the device was suspended to now, |
|---|
| 170 | + * on top of the last known real value, as the approximated RC6 |
|---|
| 171 | + * counter value. |
|---|
| 172 | + */ |
|---|
| 173 | + val = ktime_since(pmu->sleep_last); |
|---|
| 174 | + val += pmu->sample[__I915_SAMPLE_RC6].cur; |
|---|
| 175 | + } |
|---|
| 176 | + |
|---|
| 177 | + if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur) |
|---|
| 178 | + val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur; |
|---|
| 179 | + else |
|---|
| 180 | + pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val; |
|---|
| 181 | + |
|---|
| 182 | + spin_unlock_irqrestore(&pmu->lock, flags); |
|---|
| 183 | + |
|---|
| 184 | + return val; |
|---|
| 185 | +} |
|---|
| 186 | + |
|---|
| 187 | +static void init_rc6(struct i915_pmu *pmu) |
|---|
| 188 | +{ |
|---|
| 189 | + struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); |
|---|
| 190 | + intel_wakeref_t wakeref; |
|---|
| 191 | + |
|---|
| 192 | + with_intel_runtime_pm(i915->gt.uncore->rpm, wakeref) { |
|---|
| 193 | + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); |
|---|
| 194 | + pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = |
|---|
| 195 | + pmu->sample[__I915_SAMPLE_RC6].cur; |
|---|
| 196 | + pmu->sleep_last = ktime_get(); |
|---|
| 197 | + } |
|---|
| 198 | +} |
|---|
| 199 | + |
|---|
| 200 | +static void park_rc6(struct drm_i915_private *i915) |
|---|
| 201 | +{ |
|---|
| 202 | + struct i915_pmu *pmu = &i915->pmu; |
|---|
| 203 | + |
|---|
| 204 | + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); |
|---|
| 205 | + pmu->sleep_last = ktime_get(); |
|---|
| 206 | +} |
|---|
| 207 | + |
|---|
| 208 | +#else |
|---|
| 209 | + |
|---|
| 210 | +static u64 get_rc6(struct intel_gt *gt) |
|---|
| 211 | +{ |
|---|
| 212 | + return __get_rc6(gt); |
|---|
| 213 | +} |
|---|
| 214 | + |
|---|
| 215 | +static void init_rc6(struct i915_pmu *pmu) { } |
|---|
| 216 | +static void park_rc6(struct drm_i915_private *i915) {} |
|---|
| 217 | + |
|---|
| 218 | +#endif |
|---|
| 219 | + |
|---|
| 220 | +static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) |
|---|
| 221 | +{ |
|---|
| 222 | + if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) { |
|---|
| 223 | + pmu->timer_enabled = true; |
|---|
| 224 | + pmu->timer_last = ktime_get(); |
|---|
| 225 | + hrtimer_start_range_ns(&pmu->timer, |
|---|
| 133 | 226 | ns_to_ktime(PERIOD), 0, |
|---|
| 134 | 227 | HRTIMER_MODE_REL_PINNED); |
|---|
| 135 | 228 | } |
|---|
| 136 | 229 | } |
|---|
| 137 | 230 | |
|---|
| 138 | | -void i915_pmu_gt_unparked(struct drm_i915_private *i915) |
|---|
| 231 | +void i915_pmu_gt_parked(struct drm_i915_private *i915) |
|---|
| 139 | 232 | { |
|---|
| 140 | | - if (!i915->pmu.base.event_init) |
|---|
| 233 | + struct i915_pmu *pmu = &i915->pmu; |
|---|
| 234 | + |
|---|
| 235 | + if (!pmu->base.event_init) |
|---|
| 141 | 236 | return; |
|---|
| 142 | 237 | |
|---|
| 143 | | - spin_lock_irq(&i915->pmu.lock); |
|---|
| 238 | + spin_lock_irq(&pmu->lock); |
|---|
| 239 | + |
|---|
| 240 | + park_rc6(i915); |
|---|
| 241 | + |
|---|
| 242 | + /* |
|---|
| 243 | + * Signal sampling timer to stop if only engine events are enabled and |
|---|
| 244 | + * GPU went idle. |
|---|
| 245 | + */ |
|---|
| 246 | + pmu->timer_enabled = pmu_needs_timer(pmu, false); |
|---|
| 247 | + |
|---|
| 248 | + spin_unlock_irq(&pmu->lock); |
|---|
| 249 | +} |
|---|
| 250 | + |
|---|
| 251 | +void i915_pmu_gt_unparked(struct drm_i915_private *i915) |
|---|
| 252 | +{ |
|---|
| 253 | + struct i915_pmu *pmu = &i915->pmu; |
|---|
| 254 | + |
|---|
| 255 | + if (!pmu->base.event_init) |
|---|
| 256 | + return; |
|---|
| 257 | + |
|---|
| 258 | + spin_lock_irq(&pmu->lock); |
|---|
| 259 | + |
|---|
| 144 | 260 | /* |
|---|
| 145 | 261 | * Re-enable sampling timer when GPU goes active. |
|---|
| 146 | 262 | */ |
|---|
| 147 | | - __i915_pmu_maybe_start_timer(i915); |
|---|
| 148 | | - spin_unlock_irq(&i915->pmu.lock); |
|---|
| 149 | | -} |
|---|
| 263 | + __i915_pmu_maybe_start_timer(pmu); |
|---|
| 150 | 264 | |
|---|
| 151 | | -static bool grab_forcewake(struct drm_i915_private *i915, bool fw) |
|---|
| 152 | | -{ |
|---|
| 153 | | - if (!fw) |
|---|
| 154 | | - intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); |
|---|
| 155 | | - |
|---|
| 156 | | - return true; |
|---|
| 265 | + spin_unlock_irq(&pmu->lock); |
|---|
| 157 | 266 | } |
|---|
| 158 | 267 | |
|---|
| 159 | 268 | static void |
|---|
| .. | .. |
|---|
| 162 | 271 | sample->cur += val; |
|---|
| 163 | 272 | } |
|---|
| 164 | 273 | |
|---|
| 165 | | -static void |
|---|
| 166 | | -engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) |
|---|
| 274 | +static bool exclusive_mmio_access(const struct drm_i915_private *i915) |
|---|
| 167 | 275 | { |
|---|
| 276 | + /* |
|---|
| 277 | + * We have to avoid concurrent mmio cache line access on gen7 or |
|---|
| 278 | + * risk a machine hang. For a fun history lesson dig out the old |
|---|
| 279 | + * userspace intel_gpu_top and run it on Ivybridge or Haswell! |
|---|
| 280 | + */ |
|---|
| 281 | + return IS_GEN(i915, 7); |
|---|
| 282 | +} |
|---|
| 283 | + |
|---|
| 284 | +static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) |
|---|
| 285 | +{ |
|---|
| 286 | + struct intel_engine_pmu *pmu = &engine->pmu; |
|---|
| 287 | + bool busy; |
|---|
| 288 | + u32 val; |
|---|
| 289 | + |
|---|
| 290 | + val = ENGINE_READ_FW(engine, RING_CTL); |
|---|
| 291 | + if (val == 0) /* powerwell off => engine idle */ |
|---|
| 292 | + return; |
|---|
| 293 | + |
|---|
| 294 | + if (val & RING_WAIT) |
|---|
| 295 | + add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); |
|---|
| 296 | + if (val & RING_WAIT_SEMAPHORE) |
|---|
| 297 | + add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); |
|---|
| 298 | + |
|---|
| 299 | + /* No need to sample when busy stats are supported. */ |
|---|
| 300 | + if (intel_engine_supports_stats(engine)) |
|---|
| 301 | + return; |
|---|
| 302 | + |
|---|
| 303 | + /* |
|---|
| 304 | + * While waiting on a semaphore or event, MI_MODE reports the |
|---|
| 305 | + * ring as idle. However, previously using the seqno, and with |
|---|
| 306 | + * execlists sampling, we account for the ring waiting as the |
|---|
| 307 | + * engine being busy. Therefore, we record the sample as being |
|---|
| 308 | + * busy if either waiting or !idle. |
|---|
| 309 | + */ |
|---|
| 310 | + busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); |
|---|
| 311 | + if (!busy) { |
|---|
| 312 | + val = ENGINE_READ_FW(engine, RING_MI_MODE); |
|---|
| 313 | + busy = !(val & MODE_IDLE); |
|---|
| 314 | + } |
|---|
| 315 | + if (busy) |
|---|
| 316 | + add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); |
|---|
| 317 | +} |
|---|
| 318 | + |
|---|
| 319 | +static void |
|---|
| 320 | +engines_sample(struct intel_gt *gt, unsigned int period_ns) |
|---|
| 321 | +{ |
|---|
| 322 | + struct drm_i915_private *i915 = gt->i915; |
|---|
| 168 | 323 | struct intel_engine_cs *engine; |
|---|
| 169 | 324 | enum intel_engine_id id; |
|---|
| 170 | | - bool fw = false; |
|---|
| 325 | + unsigned long flags; |
|---|
| 171 | 326 | |
|---|
| 172 | | - if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0) |
|---|
| 327 | + if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) |
|---|
| 173 | 328 | return; |
|---|
| 174 | 329 | |
|---|
| 175 | | - if (!dev_priv->gt.awake) |
|---|
| 330 | + if (!intel_gt_pm_is_awake(gt)) |
|---|
| 176 | 331 | return; |
|---|
| 177 | 332 | |
|---|
| 178 | | - if (!intel_runtime_pm_get_if_in_use(dev_priv)) |
|---|
| 179 | | - return; |
|---|
| 333 | + for_each_engine(engine, gt, id) { |
|---|
| 334 | + if (!intel_engine_pm_get_if_awake(engine)) |
|---|
| 335 | + continue; |
|---|
| 180 | 336 | |
|---|
| 181 | | - for_each_engine(engine, dev_priv, id) { |
|---|
| 182 | | - u32 current_seqno = intel_engine_get_seqno(engine); |
|---|
| 183 | | - u32 last_seqno = intel_engine_last_submit(engine); |
|---|
| 184 | | - u32 val; |
|---|
| 185 | | - |
|---|
| 186 | | - val = !i915_seqno_passed(current_seqno, last_seqno); |
|---|
| 187 | | - |
|---|
| 188 | | - if (val) |
|---|
| 189 | | - add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY], |
|---|
| 190 | | - period_ns); |
|---|
| 191 | | - |
|---|
| 192 | | - if (val && (engine->pmu.enable & |
|---|
| 193 | | - (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) { |
|---|
| 194 | | - fw = grab_forcewake(dev_priv, fw); |
|---|
| 195 | | - |
|---|
| 196 | | - val = I915_READ_FW(RING_CTL(engine->mmio_base)); |
|---|
| 337 | + if (exclusive_mmio_access(i915)) { |
|---|
| 338 | + spin_lock_irqsave(&engine->uncore->lock, flags); |
|---|
| 339 | + engine_sample(engine, period_ns); |
|---|
| 340 | + spin_unlock_irqrestore(&engine->uncore->lock, flags); |
|---|
| 197 | 341 | } else { |
|---|
| 198 | | - val = 0; |
|---|
| 342 | + engine_sample(engine, period_ns); |
|---|
| 199 | 343 | } |
|---|
| 200 | 344 | |
|---|
| 201 | | - if (val & RING_WAIT) |
|---|
| 202 | | - add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT], |
|---|
| 203 | | - period_ns); |
|---|
| 204 | | - |
|---|
| 205 | | - if (val & RING_WAIT_SEMAPHORE) |
|---|
| 206 | | - add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA], |
|---|
| 207 | | - period_ns); |
|---|
| 345 | + intel_engine_pm_put_async(engine); |
|---|
| 208 | 346 | } |
|---|
| 209 | | - |
|---|
| 210 | | - if (fw) |
|---|
| 211 | | - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); |
|---|
| 212 | | - |
|---|
| 213 | | - intel_runtime_pm_put(dev_priv); |
|---|
| 214 | 347 | } |
|---|
| 215 | 348 | |
|---|
| 216 | 349 | static void |
|---|
| .. | .. |
|---|
| 219 | 352 | sample->cur += mul_u32_u32(val, mul); |
|---|
| 220 | 353 | } |
|---|
| 221 | 354 | |
|---|
| 222 | | -static void |
|---|
| 223 | | -frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) |
|---|
| 355 | +static bool frequency_sampling_enabled(struct i915_pmu *pmu) |
|---|
| 224 | 356 | { |
|---|
| 225 | | - if (dev_priv->pmu.enable & |
|---|
| 226 | | - config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { |
|---|
| 357 | + return pmu->enable & |
|---|
| 358 | + (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | |
|---|
| 359 | + config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)); |
|---|
| 360 | +} |
|---|
| 361 | + |
|---|
| 362 | +static void |
|---|
| 363 | +frequency_sample(struct intel_gt *gt, unsigned int period_ns) |
|---|
| 364 | +{ |
|---|
| 365 | + struct drm_i915_private *i915 = gt->i915; |
|---|
| 366 | + struct intel_uncore *uncore = gt->uncore; |
|---|
| 367 | + struct i915_pmu *pmu = &i915->pmu; |
|---|
| 368 | + struct intel_rps *rps = >->rps; |
|---|
| 369 | + |
|---|
| 370 | + if (!frequency_sampling_enabled(pmu)) |
|---|
| 371 | + return; |
|---|
| 372 | + |
|---|
| 373 | + /* Report 0/0 (actual/requested) frequency while parked. */ |
|---|
| 374 | + if (!intel_gt_pm_get_if_awake(gt)) |
|---|
| 375 | + return; |
|---|
| 376 | + |
|---|
| 377 | + if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { |
|---|
| 227 | 378 | u32 val; |
|---|
| 228 | 379 | |
|---|
| 229 | | - val = dev_priv->gt_pm.rps.cur_freq; |
|---|
| 230 | | - if (dev_priv->gt.awake && |
|---|
| 231 | | - intel_runtime_pm_get_if_in_use(dev_priv)) { |
|---|
| 232 | | - val = intel_get_cagf(dev_priv, |
|---|
| 233 | | - I915_READ_NOTRACE(GEN6_RPSTAT1)); |
|---|
| 234 | | - intel_runtime_pm_put(dev_priv); |
|---|
| 235 | | - } |
|---|
| 380 | + /* |
|---|
| 381 | + * We take a quick peek here without using forcewake |
|---|
| 382 | + * so that we don't perturb the system under observation |
|---|
| 383 | + * (forcewake => !rc6 => increased power use). We expect |
|---|
| 384 | + * that if the read fails because it is outside of the |
|---|
| 385 | + * mmio power well, then it will return 0 -- in which |
|---|
| 386 | + * case we assume the system is running at the intended |
|---|
| 387 | + * frequency. Fortunately, the read should rarely fail! |
|---|
| 388 | + */ |
|---|
| 389 | + val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1); |
|---|
| 390 | + if (val) |
|---|
| 391 | + val = intel_rps_get_cagf(rps, val); |
|---|
| 392 | + else |
|---|
| 393 | + val = rps->cur_freq; |
|---|
| 236 | 394 | |
|---|
| 237 | | - add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT], |
|---|
| 238 | | - intel_gpu_freq(dev_priv, val), |
|---|
| 395 | + add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], |
|---|
| 396 | + intel_gpu_freq(rps, val), period_ns / 1000); |
|---|
| 397 | + } |
|---|
| 398 | + |
|---|
| 399 | + if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { |
|---|
| 400 | + add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ], |
|---|
| 401 | + intel_gpu_freq(rps, rps->cur_freq), |
|---|
| 239 | 402 | period_ns / 1000); |
|---|
| 240 | 403 | } |
|---|
| 241 | 404 | |
|---|
| 242 | | - if (dev_priv->pmu.enable & |
|---|
| 243 | | - config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { |
|---|
| 244 | | - add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], |
|---|
| 245 | | - intel_gpu_freq(dev_priv, |
|---|
| 246 | | - dev_priv->gt_pm.rps.cur_freq), |
|---|
| 247 | | - period_ns / 1000); |
|---|
| 248 | | - } |
|---|
| 405 | + intel_gt_pm_put_async(gt); |
|---|
| 249 | 406 | } |
|---|
| 250 | 407 | |
|---|
| 251 | 408 | static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) |
|---|
| 252 | 409 | { |
|---|
| 253 | 410 | struct drm_i915_private *i915 = |
|---|
| 254 | 411 | container_of(hrtimer, struct drm_i915_private, pmu.timer); |
|---|
| 412 | + struct i915_pmu *pmu = &i915->pmu; |
|---|
| 413 | + struct intel_gt *gt = &i915->gt; |
|---|
| 255 | 414 | unsigned int period_ns; |
|---|
| 256 | 415 | ktime_t now; |
|---|
| 257 | 416 | |
|---|
| 258 | | - if (!READ_ONCE(i915->pmu.timer_enabled)) |
|---|
| 417 | + if (!READ_ONCE(pmu->timer_enabled)) |
|---|
| 259 | 418 | return HRTIMER_NORESTART; |
|---|
| 260 | 419 | |
|---|
| 261 | 420 | now = ktime_get(); |
|---|
| 262 | | - period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last)); |
|---|
| 263 | | - i915->pmu.timer_last = now; |
|---|
| 421 | + period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last)); |
|---|
| 422 | + pmu->timer_last = now; |
|---|
| 264 | 423 | |
|---|
| 265 | 424 | /* |
|---|
| 266 | 425 | * Strictly speaking the passed in period may not be 100% accurate for |
|---|
| .. | .. |
|---|
| 268 | 427 | * grabbing the forcewake. However the potential error from timer call- |
|---|
| 269 | 428 | * back delay greatly dominates this so we keep it simple. |
|---|
| 270 | 429 | */ |
|---|
| 271 | | - engines_sample(i915, period_ns); |
|---|
| 272 | | - frequency_sample(i915, period_ns); |
|---|
| 430 | + engines_sample(gt, period_ns); |
|---|
| 431 | + frequency_sample(gt, period_ns); |
|---|
| 273 | 432 | |
|---|
| 274 | 433 | hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD)); |
|---|
| 275 | 434 | |
|---|
| .. | .. |
|---|
| 292 | 451 | return sum; |
|---|
| 293 | 452 | } |
|---|
| 294 | 453 | |
|---|
| 295 | | -static void engine_event_destroy(struct perf_event *event) |
|---|
| 454 | +static void i915_pmu_event_destroy(struct perf_event *event) |
|---|
| 296 | 455 | { |
|---|
| 297 | 456 | struct drm_i915_private *i915 = |
|---|
| 298 | 457 | container_of(event->pmu, typeof(*i915), pmu.base); |
|---|
| 299 | | - struct intel_engine_cs *engine; |
|---|
| 300 | 458 | |
|---|
| 301 | | - engine = intel_engine_lookup_user(i915, |
|---|
| 302 | | - engine_event_class(event), |
|---|
| 303 | | - engine_event_instance(event)); |
|---|
| 304 | | - if (WARN_ON_ONCE(!engine)) |
|---|
| 305 | | - return; |
|---|
| 306 | | - |
|---|
| 307 | | - if (engine_event_sample(event) == I915_SAMPLE_BUSY && |
|---|
| 308 | | - intel_engine_supports_stats(engine)) |
|---|
| 309 | | - intel_disable_engine_stats(engine); |
|---|
| 310 | | -} |
|---|
| 311 | | - |
|---|
| 312 | | -static void i915_pmu_event_destroy(struct perf_event *event) |
|---|
| 313 | | -{ |
|---|
| 314 | | - WARN_ON(event->parent); |
|---|
| 315 | | - |
|---|
| 316 | | - if (is_engine_event(event)) |
|---|
| 317 | | - engine_event_destroy(event); |
|---|
| 459 | + drm_WARN_ON(&i915->drm, event->parent); |
|---|
| 318 | 460 | } |
|---|
| 319 | 461 | |
|---|
| 320 | 462 | static int |
|---|
| .. | .. |
|---|
| 344 | 486 | if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) |
|---|
| 345 | 487 | /* Requires a mutex for sampling! */ |
|---|
| 346 | 488 | return -ENODEV; |
|---|
| 347 | | - /* Fall-through. */ |
|---|
| 489 | + fallthrough; |
|---|
| 348 | 490 | case I915_PMU_REQUESTED_FREQUENCY: |
|---|
| 349 | 491 | if (INTEL_GEN(i915) < 6) |
|---|
| 350 | 492 | return -ENODEV; |
|---|
| .. | .. |
|---|
| 367 | 509 | struct drm_i915_private *i915 = |
|---|
| 368 | 510 | container_of(event->pmu, typeof(*i915), pmu.base); |
|---|
| 369 | 511 | struct intel_engine_cs *engine; |
|---|
| 370 | | - u8 sample; |
|---|
| 371 | | - int ret; |
|---|
| 372 | 512 | |
|---|
| 373 | 513 | engine = intel_engine_lookup_user(i915, engine_event_class(event), |
|---|
| 374 | 514 | engine_event_instance(event)); |
|---|
| 375 | 515 | if (!engine) |
|---|
| 376 | 516 | return -ENODEV; |
|---|
| 377 | 517 | |
|---|
| 378 | | - sample = engine_event_sample(event); |
|---|
| 379 | | - ret = engine_event_status(engine, sample); |
|---|
| 380 | | - if (ret) |
|---|
| 381 | | - return ret; |
|---|
| 382 | | - |
|---|
| 383 | | - if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine)) |
|---|
| 384 | | - ret = intel_enable_engine_stats(engine); |
|---|
| 385 | | - |
|---|
| 386 | | - return ret; |
|---|
| 518 | + return engine_event_status(engine, engine_event_sample(event)); |
|---|
| 387 | 519 | } |
|---|
| 388 | 520 | |
|---|
| 389 | 521 | static int i915_pmu_event_init(struct perf_event *event) |
|---|
| .. | .. |
|---|
| 422 | 554 | return 0; |
|---|
| 423 | 555 | } |
|---|
| 424 | 556 | |
|---|
| 425 | | -static u64 __get_rc6(struct drm_i915_private *i915) |
|---|
| 426 | | -{ |
|---|
| 427 | | - u64 val; |
|---|
| 428 | | - |
|---|
| 429 | | - val = intel_rc6_residency_ns(i915, |
|---|
| 430 | | - IS_VALLEYVIEW(i915) ? |
|---|
| 431 | | - VLV_GT_RENDER_RC6 : |
|---|
| 432 | | - GEN6_GT_GFX_RC6); |
|---|
| 433 | | - |
|---|
| 434 | | - if (HAS_RC6p(i915)) |
|---|
| 435 | | - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); |
|---|
| 436 | | - |
|---|
| 437 | | - if (HAS_RC6pp(i915)) |
|---|
| 438 | | - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); |
|---|
| 439 | | - |
|---|
| 440 | | - return val; |
|---|
| 441 | | -} |
|---|
| 442 | | - |
|---|
| 443 | | -static u64 get_rc6(struct drm_i915_private *i915) |
|---|
| 444 | | -{ |
|---|
| 445 | | -#if IS_ENABLED(CONFIG_PM) |
|---|
| 446 | | - unsigned long flags; |
|---|
| 447 | | - u64 val; |
|---|
| 448 | | - |
|---|
| 449 | | - if (intel_runtime_pm_get_if_in_use(i915)) { |
|---|
| 450 | | - val = __get_rc6(i915); |
|---|
| 451 | | - intel_runtime_pm_put(i915); |
|---|
| 452 | | - |
|---|
| 453 | | - /* |
|---|
| 454 | | - * If we are coming back from being runtime suspended we must |
|---|
| 455 | | - * be careful not to report a larger value than returned |
|---|
| 456 | | - * previously. |
|---|
| 457 | | - */ |
|---|
| 458 | | - |
|---|
| 459 | | - spin_lock_irqsave(&i915->pmu.lock, flags); |
|---|
| 460 | | - |
|---|
| 461 | | - if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { |
|---|
| 462 | | - i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; |
|---|
| 463 | | - i915->pmu.sample[__I915_SAMPLE_RC6].cur = val; |
|---|
| 464 | | - } else { |
|---|
| 465 | | - val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; |
|---|
| 466 | | - } |
|---|
| 467 | | - |
|---|
| 468 | | - spin_unlock_irqrestore(&i915->pmu.lock, flags); |
|---|
| 469 | | - } else { |
|---|
| 470 | | - struct pci_dev *pdev = i915->drm.pdev; |
|---|
| 471 | | - struct device *kdev = &pdev->dev; |
|---|
| 472 | | - |
|---|
| 473 | | - /* |
|---|
| 474 | | - * We are runtime suspended. |
|---|
| 475 | | - * |
|---|
| 476 | | - * Report the delta from when the device was suspended to now, |
|---|
| 477 | | - * on top of the last known real value, as the approximated RC6 |
|---|
| 478 | | - * counter value. |
|---|
| 479 | | - */ |
|---|
| 480 | | - spin_lock_irqsave(&i915->pmu.lock, flags); |
|---|
| 481 | | - spin_lock(&kdev->power.lock); |
|---|
| 482 | | - |
|---|
| 483 | | - /* |
|---|
| 484 | | - * After the above branch intel_runtime_pm_get_if_in_use failed |
|---|
| 485 | | - * to get the runtime PM reference we cannot assume we are in |
|---|
| 486 | | - * runtime suspend since we can either: a) race with coming out |
|---|
| 487 | | - * of it before we took the power.lock, or b) there are other |
|---|
| 488 | | - * states than suspended which can bring us here. |
|---|
| 489 | | - * |
|---|
| 490 | | - * We need to double-check that we are indeed currently runtime |
|---|
| 491 | | - * suspended and if not we cannot do better than report the last |
|---|
| 492 | | - * known RC6 value. |
|---|
| 493 | | - */ |
|---|
| 494 | | - if (kdev->power.runtime_status == RPM_SUSPENDED) { |
|---|
| 495 | | - if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) |
|---|
| 496 | | - i915->pmu.suspended_jiffies_last = |
|---|
| 497 | | - kdev->power.suspended_jiffies; |
|---|
| 498 | | - |
|---|
| 499 | | - val = kdev->power.suspended_jiffies - |
|---|
| 500 | | - i915->pmu.suspended_jiffies_last; |
|---|
| 501 | | - val += jiffies - kdev->power.accounting_timestamp; |
|---|
| 502 | | - |
|---|
| 503 | | - val = jiffies_to_nsecs(val); |
|---|
| 504 | | - val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; |
|---|
| 505 | | - |
|---|
| 506 | | - i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; |
|---|
| 507 | | - } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { |
|---|
| 508 | | - val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; |
|---|
| 509 | | - } else { |
|---|
| 510 | | - val = i915->pmu.sample[__I915_SAMPLE_RC6].cur; |
|---|
| 511 | | - } |
|---|
| 512 | | - |
|---|
| 513 | | - spin_unlock(&kdev->power.lock); |
|---|
| 514 | | - spin_unlock_irqrestore(&i915->pmu.lock, flags); |
|---|
| 515 | | - } |
|---|
| 516 | | - |
|---|
| 517 | | - return val; |
|---|
| 518 | | -#else |
|---|
| 519 | | - return __get_rc6(i915); |
|---|
| 520 | | -#endif |
|---|
| 521 | | -} |
|---|
| 522 | | - |
|---|
| 523 | 557 | static u64 __i915_pmu_event_read(struct perf_event *event) |
|---|
| 524 | 558 | { |
|---|
| 525 | 559 | struct drm_i915_private *i915 = |
|---|
| 526 | 560 | container_of(event->pmu, typeof(*i915), pmu.base); |
|---|
| 561 | + struct i915_pmu *pmu = &i915->pmu; |
|---|
| 527 | 562 | u64 val = 0; |
|---|
| 528 | 563 | |
|---|
| 529 | 564 | if (is_engine_event(event)) { |
|---|
| .. | .. |
|---|
| 534 | 569 | engine_event_class(event), |
|---|
| 535 | 570 | engine_event_instance(event)); |
|---|
| 536 | 571 | |
|---|
| 537 | | - if (WARN_ON_ONCE(!engine)) { |
|---|
| 572 | + if (drm_WARN_ON_ONCE(&i915->drm, !engine)) { |
|---|
| 538 | 573 | /* Do nothing */ |
|---|
| 539 | 574 | } else if (sample == I915_SAMPLE_BUSY && |
|---|
| 540 | 575 | intel_engine_supports_stats(engine)) { |
|---|
| 541 | | - val = ktime_to_ns(intel_engine_get_busy_time(engine)); |
|---|
| 576 | + ktime_t unused; |
|---|
| 577 | + |
|---|
| 578 | + val = ktime_to_ns(intel_engine_get_busy_time(engine, |
|---|
| 579 | + &unused)); |
|---|
| 542 | 580 | } else { |
|---|
| 543 | 581 | val = engine->pmu.sample[sample].cur; |
|---|
| 544 | 582 | } |
|---|
| .. | .. |
|---|
| 546 | 584 | switch (event->attr.config) { |
|---|
| 547 | 585 | case I915_PMU_ACTUAL_FREQUENCY: |
|---|
| 548 | 586 | val = |
|---|
| 549 | | - div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur, |
|---|
| 587 | + div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur, |
|---|
| 550 | 588 | USEC_PER_SEC /* to MHz */); |
|---|
| 551 | 589 | break; |
|---|
| 552 | 590 | case I915_PMU_REQUESTED_FREQUENCY: |
|---|
| 553 | 591 | val = |
|---|
| 554 | | - div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur, |
|---|
| 592 | + div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur, |
|---|
| 555 | 593 | USEC_PER_SEC /* to MHz */); |
|---|
| 556 | 594 | break; |
|---|
| 557 | 595 | case I915_PMU_INTERRUPTS: |
|---|
| 558 | 596 | val = count_interrupts(i915); |
|---|
| 559 | 597 | break; |
|---|
| 560 | 598 | case I915_PMU_RC6_RESIDENCY: |
|---|
| 561 | | - val = get_rc6(i915); |
|---|
| 599 | + val = get_rc6(&i915->gt); |
|---|
| 562 | 600 | break; |
|---|
| 563 | 601 | } |
|---|
| 564 | 602 | } |
|---|
| .. | .. |
|---|
| 586 | 624 | struct drm_i915_private *i915 = |
|---|
| 587 | 625 | container_of(event->pmu, typeof(*i915), pmu.base); |
|---|
| 588 | 626 | unsigned int bit = event_enabled_bit(event); |
|---|
| 627 | + struct i915_pmu *pmu = &i915->pmu; |
|---|
| 589 | 628 | unsigned long flags; |
|---|
| 590 | 629 | |
|---|
| 591 | | - spin_lock_irqsave(&i915->pmu.lock, flags); |
|---|
| 630 | + spin_lock_irqsave(&pmu->lock, flags); |
|---|
| 592 | 631 | |
|---|
| 593 | 632 | /* |
|---|
| 594 | 633 | * Update the bitmask of enabled events and increment |
|---|
| 595 | 634 | * the event reference counter. |
|---|
| 596 | 635 | */ |
|---|
| 597 | | - GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); |
|---|
| 598 | | - GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0); |
|---|
| 599 | | - i915->pmu.enable |= BIT_ULL(bit); |
|---|
| 600 | | - i915->pmu.enable_count[bit]++; |
|---|
| 636 | + BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS); |
|---|
| 637 | + GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); |
|---|
| 638 | + GEM_BUG_ON(pmu->enable_count[bit] == ~0); |
|---|
| 639 | + |
|---|
| 640 | + pmu->enable |= BIT_ULL(bit); |
|---|
| 641 | + pmu->enable_count[bit]++; |
|---|
| 601 | 642 | |
|---|
| 602 | 643 | /* |
|---|
| 603 | 644 | * Start the sampling timer if needed and not already enabled. |
|---|
| 604 | 645 | */ |
|---|
| 605 | | - __i915_pmu_maybe_start_timer(i915); |
|---|
| 646 | + __i915_pmu_maybe_start_timer(pmu); |
|---|
| 606 | 647 | |
|---|
| 607 | 648 | /* |
|---|
| 608 | 649 | * For per-engine events the bitmask and reference counting |
|---|
| .. | .. |
|---|
| 615 | 656 | engine = intel_engine_lookup_user(i915, |
|---|
| 616 | 657 | engine_event_class(event), |
|---|
| 617 | 658 | engine_event_instance(event)); |
|---|
| 618 | | - GEM_BUG_ON(!engine); |
|---|
| 619 | | - engine->pmu.enable |= BIT(sample); |
|---|
| 620 | 659 | |
|---|
| 621 | | - GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); |
|---|
| 660 | + BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) != |
|---|
| 661 | + I915_ENGINE_SAMPLE_COUNT); |
|---|
| 662 | + BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) != |
|---|
| 663 | + I915_ENGINE_SAMPLE_COUNT); |
|---|
| 664 | + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); |
|---|
| 665 | + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); |
|---|
| 622 | 666 | GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); |
|---|
| 667 | + |
|---|
| 668 | + engine->pmu.enable |= BIT(sample); |
|---|
| 623 | 669 | engine->pmu.enable_count[sample]++; |
|---|
| 624 | 670 | } |
|---|
| 625 | 671 | |
|---|
| 626 | | - spin_unlock_irqrestore(&i915->pmu.lock, flags); |
|---|
| 672 | + spin_unlock_irqrestore(&pmu->lock, flags); |
|---|
| 627 | 673 | |
|---|
| 628 | 674 | /* |
|---|
| 629 | 675 | * Store the current counter value so we can report the correct delta |
|---|
| .. | .. |
|---|
| 638 | 684 | struct drm_i915_private *i915 = |
|---|
| 639 | 685 | container_of(event->pmu, typeof(*i915), pmu.base); |
|---|
| 640 | 686 | unsigned int bit = event_enabled_bit(event); |
|---|
| 687 | + struct i915_pmu *pmu = &i915->pmu; |
|---|
| 641 | 688 | unsigned long flags; |
|---|
| 642 | 689 | |
|---|
| 643 | | - spin_lock_irqsave(&i915->pmu.lock, flags); |
|---|
| 690 | + spin_lock_irqsave(&pmu->lock, flags); |
|---|
| 644 | 691 | |
|---|
| 645 | 692 | if (is_engine_event(event)) { |
|---|
| 646 | 693 | u8 sample = engine_event_sample(event); |
|---|
| .. | .. |
|---|
| 649 | 696 | engine = intel_engine_lookup_user(i915, |
|---|
| 650 | 697 | engine_event_class(event), |
|---|
| 651 | 698 | engine_event_instance(event)); |
|---|
| 652 | | - GEM_BUG_ON(!engine); |
|---|
| 653 | | - GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); |
|---|
| 699 | + |
|---|
| 700 | + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); |
|---|
| 701 | + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); |
|---|
| 654 | 702 | GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); |
|---|
| 703 | + |
|---|
| 655 | 704 | /* |
|---|
| 656 | 705 | * Decrement the reference count and clear the enabled |
|---|
| 657 | 706 | * bitmask when the last listener on an event goes away. |
|---|
| .. | .. |
|---|
| 660 | 709 | engine->pmu.enable &= ~BIT(sample); |
|---|
| 661 | 710 | } |
|---|
| 662 | 711 | |
|---|
| 663 | | - GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); |
|---|
| 664 | | - GEM_BUG_ON(i915->pmu.enable_count[bit] == 0); |
|---|
| 712 | + GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); |
|---|
| 713 | + GEM_BUG_ON(pmu->enable_count[bit] == 0); |
|---|
| 665 | 714 | /* |
|---|
| 666 | 715 | * Decrement the reference count and clear the enabled |
|---|
| 667 | 716 | * bitmask when the last listener on an event goes away. |
|---|
| 668 | 717 | */ |
|---|
| 669 | | - if (--i915->pmu.enable_count[bit] == 0) { |
|---|
| 670 | | - i915->pmu.enable &= ~BIT_ULL(bit); |
|---|
| 671 | | - i915->pmu.timer_enabled &= pmu_needs_timer(i915, true); |
|---|
| 718 | + if (--pmu->enable_count[bit] == 0) { |
|---|
| 719 | + pmu->enable &= ~BIT_ULL(bit); |
|---|
| 720 | + pmu->timer_enabled &= pmu_needs_timer(pmu, true); |
|---|
| 672 | 721 | } |
|---|
| 673 | 722 | |
|---|
| 674 | | - spin_unlock_irqrestore(&i915->pmu.lock, flags); |
|---|
| 723 | + spin_unlock_irqrestore(&pmu->lock, flags); |
|---|
| 675 | 724 | } |
|---|
| 676 | 725 | |
|---|
| 677 | 726 | static void i915_pmu_event_start(struct perf_event *event, int flags) |
|---|
| .. | .. |
|---|
| 750 | 799 | return sprintf(buf, "config=0x%lx\n", eattr->val); |
|---|
| 751 | 800 | } |
|---|
| 752 | 801 | |
|---|
| 753 | | -static struct attribute_group i915_pmu_events_attr_group = { |
|---|
| 754 | | - .name = "events", |
|---|
| 755 | | - /* Patch in attrs at runtime. */ |
|---|
| 756 | | -}; |
|---|
| 757 | | - |
|---|
| 758 | 802 | static ssize_t |
|---|
| 759 | 803 | i915_pmu_get_attr_cpumask(struct device *dev, |
|---|
| 760 | 804 | struct device_attribute *attr, |
|---|
| .. | .. |
|---|
| 772 | 816 | |
|---|
| 773 | 817 | static const struct attribute_group i915_pmu_cpumask_attr_group = { |
|---|
| 774 | 818 | .attrs = i915_cpumask_attrs, |
|---|
| 775 | | -}; |
|---|
| 776 | | - |
|---|
| 777 | | -static const struct attribute_group *i915_pmu_attr_groups[] = { |
|---|
| 778 | | - &i915_pmu_format_attr_group, |
|---|
| 779 | | - &i915_pmu_events_attr_group, |
|---|
| 780 | | - &i915_pmu_cpumask_attr_group, |
|---|
| 781 | | - NULL |
|---|
| 782 | 819 | }; |
|---|
| 783 | 820 | |
|---|
| 784 | 821 | #define __event(__config, __name, __unit) \ |
|---|
| .. | .. |
|---|
| 820 | 857 | } |
|---|
| 821 | 858 | |
|---|
| 822 | 859 | static struct attribute ** |
|---|
| 823 | | -create_event_attributes(struct drm_i915_private *i915) |
|---|
| 860 | +create_event_attributes(struct i915_pmu *pmu) |
|---|
| 824 | 861 | { |
|---|
| 862 | + struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); |
|---|
| 825 | 863 | static const struct { |
|---|
| 826 | 864 | u64 config; |
|---|
| 827 | 865 | const char *name; |
|---|
| .. | .. |
|---|
| 845 | 883 | struct i915_ext_attribute *i915_attr = NULL, *i915_iter; |
|---|
| 846 | 884 | struct attribute **attr = NULL, **attr_iter; |
|---|
| 847 | 885 | struct intel_engine_cs *engine; |
|---|
| 848 | | - enum intel_engine_id id; |
|---|
| 849 | 886 | unsigned int i; |
|---|
| 850 | 887 | |
|---|
| 851 | 888 | /* Count how many counters we will be exposing. */ |
|---|
| .. | .. |
|---|
| 854 | 891 | count++; |
|---|
| 855 | 892 | } |
|---|
| 856 | 893 | |
|---|
| 857 | | - for_each_engine(engine, i915, id) { |
|---|
| 894 | + for_each_uabi_engine(engine, i915) { |
|---|
| 858 | 895 | for (i = 0; i < ARRAY_SIZE(engine_events); i++) { |
|---|
| 859 | 896 | if (!engine_event_status(engine, |
|---|
| 860 | 897 | engine_events[i].sample)) |
|---|
| .. | .. |
|---|
| 905 | 942 | } |
|---|
| 906 | 943 | |
|---|
| 907 | 944 | /* Initialize supported engine counters. */ |
|---|
| 908 | | - for_each_engine(engine, i915, id) { |
|---|
| 945 | + for_each_uabi_engine(engine, i915) { |
|---|
| 909 | 946 | for (i = 0; i < ARRAY_SIZE(engine_events); i++) { |
|---|
| 910 | 947 | char *str; |
|---|
| 911 | 948 | |
|---|
| .. | .. |
|---|
| 922 | 959 | i915_iter = |
|---|
| 923 | 960 | add_i915_attr(i915_iter, str, |
|---|
| 924 | 961 | __I915_PMU_ENGINE(engine->uabi_class, |
|---|
| 925 | | - engine->instance, |
|---|
| 962 | + engine->uabi_instance, |
|---|
| 926 | 963 | engine_events[i].sample)); |
|---|
| 927 | 964 | |
|---|
| 928 | 965 | str = kasprintf(GFP_KERNEL, "%s-%s.unit", |
|---|
| .. | .. |
|---|
| 935 | 972 | } |
|---|
| 936 | 973 | } |
|---|
| 937 | 974 | |
|---|
| 938 | | - i915->pmu.i915_attr = i915_attr; |
|---|
| 939 | | - i915->pmu.pmu_attr = pmu_attr; |
|---|
| 975 | + pmu->i915_attr = i915_attr; |
|---|
| 976 | + pmu->pmu_attr = pmu_attr; |
|---|
| 940 | 977 | |
|---|
| 941 | 978 | return attr; |
|---|
| 942 | 979 | |
|---|
| .. | .. |
|---|
| 952 | 989 | return NULL; |
|---|
| 953 | 990 | } |
|---|
| 954 | 991 | |
|---|
| 955 | | -static void free_event_attributes(struct drm_i915_private *i915) |
|---|
| 992 | +static void free_event_attributes(struct i915_pmu *pmu) |
|---|
| 956 | 993 | { |
|---|
| 957 | | - struct attribute **attr_iter = i915_pmu_events_attr_group.attrs; |
|---|
| 994 | + struct attribute **attr_iter = pmu->events_attr_group.attrs; |
|---|
| 958 | 995 | |
|---|
| 959 | 996 | for (; *attr_iter; attr_iter++) |
|---|
| 960 | 997 | kfree((*attr_iter)->name); |
|---|
| 961 | 998 | |
|---|
| 962 | | - kfree(i915_pmu_events_attr_group.attrs); |
|---|
| 963 | | - kfree(i915->pmu.i915_attr); |
|---|
| 964 | | - kfree(i915->pmu.pmu_attr); |
|---|
| 999 | + kfree(pmu->events_attr_group.attrs); |
|---|
| 1000 | + kfree(pmu->i915_attr); |
|---|
| 1001 | + kfree(pmu->pmu_attr); |
|---|
| 965 | 1002 | |
|---|
| 966 | | - i915_pmu_events_attr_group.attrs = NULL; |
|---|
| 967 | | - i915->pmu.i915_attr = NULL; |
|---|
| 968 | | - i915->pmu.pmu_attr = NULL; |
|---|
| 1003 | + pmu->events_attr_group.attrs = NULL; |
|---|
| 1004 | + pmu->i915_attr = NULL; |
|---|
| 1005 | + pmu->pmu_attr = NULL; |
|---|
| 969 | 1006 | } |
|---|
| 970 | 1007 | |
|---|
| 971 | 1008 | static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) |
|---|
| 972 | 1009 | { |
|---|
| 973 | | - struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); |
|---|
| 1010 | + struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); |
|---|
| 974 | 1011 | |
|---|
| 975 | 1012 | GEM_BUG_ON(!pmu->base.event_init); |
|---|
| 976 | 1013 | |
|---|
| .. | .. |
|---|
| 983 | 1020 | |
|---|
| 984 | 1021 | static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) |
|---|
| 985 | 1022 | { |
|---|
| 986 | | - struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); |
|---|
| 1023 | + struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); |
|---|
| 987 | 1024 | unsigned int target; |
|---|
| 988 | 1025 | |
|---|
| 989 | 1026 | GEM_BUG_ON(!pmu->base.event_init); |
|---|
| .. | .. |
|---|
| 1000 | 1037 | return 0; |
|---|
| 1001 | 1038 | } |
|---|
| 1002 | 1039 | |
|---|
| 1003 | | -static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; |
|---|
| 1004 | | - |
|---|
| 1005 | | -static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915) |
|---|
| 1040 | +static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) |
|---|
| 1006 | 1041 | { |
|---|
| 1007 | 1042 | enum cpuhp_state slot; |
|---|
| 1008 | 1043 | int ret; |
|---|
| .. | .. |
|---|
| 1015 | 1050 | return ret; |
|---|
| 1016 | 1051 | |
|---|
| 1017 | 1052 | slot = ret; |
|---|
| 1018 | | - ret = cpuhp_state_add_instance(slot, &i915->pmu.node); |
|---|
| 1053 | + ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node); |
|---|
| 1019 | 1054 | if (ret) { |
|---|
| 1020 | 1055 | cpuhp_remove_multi_state(slot); |
|---|
| 1021 | 1056 | return ret; |
|---|
| 1022 | 1057 | } |
|---|
| 1023 | 1058 | |
|---|
| 1024 | | - cpuhp_slot = slot; |
|---|
| 1059 | + pmu->cpuhp.slot = slot; |
|---|
| 1025 | 1060 | return 0; |
|---|
| 1026 | 1061 | } |
|---|
| 1027 | 1062 | |
|---|
| 1028 | | -static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915) |
|---|
| 1063 | +static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) |
|---|
| 1029 | 1064 | { |
|---|
| 1030 | | - WARN_ON(cpuhp_slot == CPUHP_INVALID); |
|---|
| 1031 | | - WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node)); |
|---|
| 1032 | | - cpuhp_remove_multi_state(cpuhp_slot); |
|---|
| 1065 | + struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); |
|---|
| 1066 | + |
|---|
| 1067 | + drm_WARN_ON(&i915->drm, pmu->cpuhp.slot == CPUHP_INVALID); |
|---|
| 1068 | + drm_WARN_ON(&i915->drm, cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node)); |
|---|
| 1069 | + cpuhp_remove_multi_state(pmu->cpuhp.slot); |
|---|
| 1070 | + pmu->cpuhp.slot = CPUHP_INVALID; |
|---|
| 1071 | +} |
|---|
| 1072 | + |
|---|
| 1073 | +static bool is_igp(struct drm_i915_private *i915) |
|---|
| 1074 | +{ |
|---|
| 1075 | + struct pci_dev *pdev = i915->drm.pdev; |
|---|
| 1076 | + |
|---|
| 1077 | + /* IGP is 0000:00:02.0 */ |
|---|
| 1078 | + return pci_domain_nr(pdev->bus) == 0 && |
|---|
| 1079 | + pdev->bus->number == 0 && |
|---|
| 1080 | + PCI_SLOT(pdev->devfn) == 2 && |
|---|
| 1081 | + PCI_FUNC(pdev->devfn) == 0; |
|---|
| 1033 | 1082 | } |
|---|
| 1034 | 1083 | |
|---|
| 1035 | 1084 | void i915_pmu_register(struct drm_i915_private *i915) |
|---|
| 1036 | 1085 | { |
|---|
| 1037 | | - int ret; |
|---|
| 1086 | + struct i915_pmu *pmu = &i915->pmu; |
|---|
| 1087 | + const struct attribute_group *attr_groups[] = { |
|---|
| 1088 | + &i915_pmu_format_attr_group, |
|---|
| 1089 | + &pmu->events_attr_group, |
|---|
| 1090 | + &i915_pmu_cpumask_attr_group, |
|---|
| 1091 | + NULL |
|---|
| 1092 | + }; |
|---|
| 1093 | + |
|---|
| 1094 | + int ret = -ENOMEM; |
|---|
| 1038 | 1095 | |
|---|
| 1039 | 1096 | if (INTEL_GEN(i915) <= 2) { |
|---|
| 1040 | | - DRM_INFO("PMU not supported for this GPU."); |
|---|
| 1097 | + drm_info(&i915->drm, "PMU not supported for this GPU."); |
|---|
| 1041 | 1098 | return; |
|---|
| 1042 | 1099 | } |
|---|
| 1043 | 1100 | |
|---|
| 1044 | | - i915_pmu_events_attr_group.attrs = create_event_attributes(i915); |
|---|
| 1045 | | - if (!i915_pmu_events_attr_group.attrs) { |
|---|
| 1046 | | - ret = -ENOMEM; |
|---|
| 1047 | | - goto err; |
|---|
| 1101 | + spin_lock_init(&pmu->lock); |
|---|
| 1102 | + hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
|---|
| 1103 | + pmu->timer.function = i915_sample; |
|---|
| 1104 | + pmu->cpuhp.slot = CPUHP_INVALID; |
|---|
| 1105 | + init_rc6(pmu); |
|---|
| 1106 | + |
|---|
| 1107 | + if (!is_igp(i915)) { |
|---|
| 1108 | + pmu->name = kasprintf(GFP_KERNEL, |
|---|
| 1109 | + "i915_%s", |
|---|
| 1110 | + dev_name(i915->drm.dev)); |
|---|
| 1111 | + if (pmu->name) { |
|---|
| 1112 | + /* tools/perf reserves colons as special. */ |
|---|
| 1113 | + strreplace((char *)pmu->name, ':', '_'); |
|---|
| 1114 | + } |
|---|
| 1115 | + } else { |
|---|
| 1116 | + pmu->name = "i915"; |
|---|
| 1048 | 1117 | } |
|---|
| 1049 | | - |
|---|
| 1050 | | - i915->pmu.base.attr_groups = i915_pmu_attr_groups; |
|---|
| 1051 | | - i915->pmu.base.task_ctx_nr = perf_invalid_context; |
|---|
| 1052 | | - i915->pmu.base.event_init = i915_pmu_event_init; |
|---|
| 1053 | | - i915->pmu.base.add = i915_pmu_event_add; |
|---|
| 1054 | | - i915->pmu.base.del = i915_pmu_event_del; |
|---|
| 1055 | | - i915->pmu.base.start = i915_pmu_event_start; |
|---|
| 1056 | | - i915->pmu.base.stop = i915_pmu_event_stop; |
|---|
| 1057 | | - i915->pmu.base.read = i915_pmu_event_read; |
|---|
| 1058 | | - i915->pmu.base.event_idx = i915_pmu_event_event_idx; |
|---|
| 1059 | | - |
|---|
| 1060 | | - spin_lock_init(&i915->pmu.lock); |
|---|
| 1061 | | - hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
|---|
| 1062 | | - i915->pmu.timer.function = i915_sample; |
|---|
| 1063 | | - |
|---|
| 1064 | | - ret = perf_pmu_register(&i915->pmu.base, "i915", -1); |
|---|
| 1065 | | - if (ret) |
|---|
| 1118 | + if (!pmu->name) |
|---|
| 1066 | 1119 | goto err; |
|---|
| 1067 | 1120 | |
|---|
| 1068 | | - ret = i915_pmu_register_cpuhp_state(i915); |
|---|
| 1121 | + pmu->events_attr_group.name = "events"; |
|---|
| 1122 | + pmu->events_attr_group.attrs = create_event_attributes(pmu); |
|---|
| 1123 | + if (!pmu->events_attr_group.attrs) |
|---|
| 1124 | + goto err_name; |
|---|
| 1125 | + |
|---|
| 1126 | + pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups), |
|---|
| 1127 | + GFP_KERNEL); |
|---|
| 1128 | + if (!pmu->base.attr_groups) |
|---|
| 1129 | + goto err_attr; |
|---|
| 1130 | + |
|---|
| 1131 | + pmu->base.module = THIS_MODULE; |
|---|
| 1132 | + pmu->base.task_ctx_nr = perf_invalid_context; |
|---|
| 1133 | + pmu->base.event_init = i915_pmu_event_init; |
|---|
| 1134 | + pmu->base.add = i915_pmu_event_add; |
|---|
| 1135 | + pmu->base.del = i915_pmu_event_del; |
|---|
| 1136 | + pmu->base.start = i915_pmu_event_start; |
|---|
| 1137 | + pmu->base.stop = i915_pmu_event_stop; |
|---|
| 1138 | + pmu->base.read = i915_pmu_event_read; |
|---|
| 1139 | + pmu->base.event_idx = i915_pmu_event_event_idx; |
|---|
| 1140 | + |
|---|
| 1141 | + ret = perf_pmu_register(&pmu->base, pmu->name, -1); |
|---|
| 1142 | + if (ret) |
|---|
| 1143 | + goto err_groups; |
|---|
| 1144 | + |
|---|
| 1145 | + ret = i915_pmu_register_cpuhp_state(pmu); |
|---|
| 1069 | 1146 | if (ret) |
|---|
| 1070 | 1147 | goto err_unreg; |
|---|
| 1071 | 1148 | |
|---|
| 1072 | 1149 | return; |
|---|
| 1073 | 1150 | |
|---|
| 1074 | 1151 | err_unreg: |
|---|
| 1075 | | - perf_pmu_unregister(&i915->pmu.base); |
|---|
| 1152 | + perf_pmu_unregister(&pmu->base); |
|---|
| 1153 | +err_groups: |
|---|
| 1154 | + kfree(pmu->base.attr_groups); |
|---|
| 1155 | +err_attr: |
|---|
| 1156 | + pmu->base.event_init = NULL; |
|---|
| 1157 | + free_event_attributes(pmu); |
|---|
| 1158 | +err_name: |
|---|
| 1159 | + if (!is_igp(i915)) |
|---|
| 1160 | + kfree(pmu->name); |
|---|
| 1076 | 1161 | err: |
|---|
| 1077 | | - i915->pmu.base.event_init = NULL; |
|---|
| 1078 | | - free_event_attributes(i915); |
|---|
| 1079 | | - DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); |
|---|
| 1162 | + drm_notice(&i915->drm, "Failed to register PMU!\n"); |
|---|
| 1080 | 1163 | } |
|---|
| 1081 | 1164 | |
|---|
| 1082 | 1165 | void i915_pmu_unregister(struct drm_i915_private *i915) |
|---|
| 1083 | 1166 | { |
|---|
| 1084 | | - if (!i915->pmu.base.event_init) |
|---|
| 1167 | + struct i915_pmu *pmu = &i915->pmu; |
|---|
| 1168 | + |
|---|
| 1169 | + if (!pmu->base.event_init) |
|---|
| 1085 | 1170 | return; |
|---|
| 1086 | 1171 | |
|---|
| 1087 | | - WARN_ON(i915->pmu.enable); |
|---|
| 1172 | + drm_WARN_ON(&i915->drm, pmu->enable); |
|---|
| 1088 | 1173 | |
|---|
| 1089 | | - hrtimer_cancel(&i915->pmu.timer); |
|---|
| 1174 | + hrtimer_cancel(&pmu->timer); |
|---|
| 1090 | 1175 | |
|---|
| 1091 | | - i915_pmu_unregister_cpuhp_state(i915); |
|---|
| 1176 | + i915_pmu_unregister_cpuhp_state(pmu); |
|---|
| 1092 | 1177 | |
|---|
| 1093 | | - perf_pmu_unregister(&i915->pmu.base); |
|---|
| 1094 | | - i915->pmu.base.event_init = NULL; |
|---|
| 1095 | | - free_event_attributes(i915); |
|---|
| 1178 | + perf_pmu_unregister(&pmu->base); |
|---|
| 1179 | + pmu->base.event_init = NULL; |
|---|
| 1180 | + kfree(pmu->base.attr_groups); |
|---|
| 1181 | + if (!is_igp(i915)) |
|---|
| 1182 | + kfree(pmu->name); |
|---|
| 1183 | + free_event_attributes(pmu); |
|---|
| 1096 | 1184 | } |
|---|