From 2f7c68cb55ecb7331f2381deb497c27155f32faf Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Wed, 03 Jan 2024 09:43:39 +0000 Subject: [PATCH] update kernel to 5.10.198 --- kernel/drivers/gpu/drm/i915/i915_pmu.c | 730 +++++++++++++++++++++++++++++++------------------------ 1 files changed, 409 insertions(+), 321 deletions(-) diff --git a/kernel/drivers/gpu/drm/i915/i915_pmu.c b/kernel/drivers/gpu/drm/i915/i915_pmu.c index b7fda69..3c9ac66 100644 --- a/kernel/drivers/gpu/drm/i915/i915_pmu.c +++ b/kernel/drivers/gpu/drm/i915/i915_pmu.c @@ -5,9 +5,18 @@ */ #include <linux/irq.h> -#include "i915_pmu.h" -#include "intel_ringbuffer.h" +#include <linux/pm_runtime.h> + +#include "gt/intel_engine.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_user.h" +#include "gt/intel_gt_pm.h" +#include "gt/intel_rc6.h" +#include "gt/intel_rps.h" + #include "i915_drv.h" +#include "i915_pmu.h" +#include "intel_pm.h" /* Frequency for the sampling timer for events which need it. */ #define FREQUENCY 200 @@ -70,8 +79,9 @@ return config_enabled_bit(event->attr.config); } -static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) +static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) { + struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); u64 enable; /* @@ -79,7 +89,7 @@ * * We start with a bitmask of all currently enabled events. */ - enable = i915->pmu.enable; + enable = pmu->enable; /* * Mask out all the ones which do not need the timer, or in @@ -98,10 +108,8 @@ /* * Also there is software busyness tracking available we do not * need the timer for I915_SAMPLE_BUSY counter. - * - * Use RCS as proxy for all engines. */ - else if (intel_engine_supports_stats(i915->engine[RCS])) + else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) enable &= ~BIT(I915_SAMPLE_BUSY); /* @@ -110,50 +118,151 @@ return enable; } -void i915_pmu_gt_parked(struct drm_i915_private *i915) +static u64 __get_rc6(struct intel_gt *gt) { - if (!i915->pmu.base.event_init) - return; + struct drm_i915_private *i915 = gt->i915; + u64 val; - spin_lock_irq(&i915->pmu.lock); - /* - * Signal sampling timer to stop if only engine events are enabled and - * GPU went idle. - */ - i915->pmu.timer_enabled = pmu_needs_timer(i915, false); - spin_unlock_irq(&i915->pmu.lock); + val = intel_rc6_residency_ns(>->rc6, + IS_VALLEYVIEW(i915) ? + VLV_GT_RENDER_RC6 : + GEN6_GT_GFX_RC6); + + if (HAS_RC6p(i915)) + val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6p); + + if (HAS_RC6pp(i915)) + val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6pp); + + return val; } -static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915) +#if IS_ENABLED(CONFIG_PM) + +static inline s64 ktime_since(const ktime_t kt) { - if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) { - i915->pmu.timer_enabled = true; - i915->pmu.timer_last = ktime_get(); - hrtimer_start_range_ns(&i915->pmu.timer, + return ktime_to_ns(ktime_sub(ktime_get(), kt)); +} + +static u64 get_rc6(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct i915_pmu *pmu = &i915->pmu; + unsigned long flags; + bool awake = false; + u64 val; + + if (intel_gt_pm_get_if_awake(gt)) { + val = __get_rc6(gt); + intel_gt_pm_put_async(gt); + awake = true; + } + + spin_lock_irqsave(&pmu->lock, flags); + + if (awake) { + pmu->sample[__I915_SAMPLE_RC6].cur = val; + } else { + /* + * We think we are runtime suspended. + * + * Report the delta from when the device was suspended to now, + * on top of the last known real value, as the approximated RC6 + * counter value. + */ + val = ktime_since(pmu->sleep_last); + val += pmu->sample[__I915_SAMPLE_RC6].cur; + } + + if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur) + val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur; + else + pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val; + + spin_unlock_irqrestore(&pmu->lock, flags); + + return val; +} + +static void init_rc6(struct i915_pmu *pmu) +{ + struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); + intel_wakeref_t wakeref; + + with_intel_runtime_pm(i915->gt.uncore->rpm, wakeref) { + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); + pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = + pmu->sample[__I915_SAMPLE_RC6].cur; + pmu->sleep_last = ktime_get(); + } +} + +static void park_rc6(struct drm_i915_private *i915) +{ + struct i915_pmu *pmu = &i915->pmu; + + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); + pmu->sleep_last = ktime_get(); +} + +#else + +static u64 get_rc6(struct intel_gt *gt) +{ + return __get_rc6(gt); +} + +static void init_rc6(struct i915_pmu *pmu) { } +static void park_rc6(struct drm_i915_private *i915) {} + +#endif + +static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) +{ + if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) { + pmu->timer_enabled = true; + pmu->timer_last = ktime_get(); + hrtimer_start_range_ns(&pmu->timer, ns_to_ktime(PERIOD), 0, HRTIMER_MODE_REL_PINNED); } } -void i915_pmu_gt_unparked(struct drm_i915_private *i915) +void i915_pmu_gt_parked(struct drm_i915_private *i915) { - if (!i915->pmu.base.event_init) + struct i915_pmu *pmu = &i915->pmu; + + if (!pmu->base.event_init) return; - spin_lock_irq(&i915->pmu.lock); + spin_lock_irq(&pmu->lock); + + park_rc6(i915); + + /* + * Signal sampling timer to stop if only engine events are enabled and + * GPU went idle. + */ + pmu->timer_enabled = pmu_needs_timer(pmu, false); + + spin_unlock_irq(&pmu->lock); +} + +void i915_pmu_gt_unparked(struct drm_i915_private *i915) +{ + struct i915_pmu *pmu = &i915->pmu; + + if (!pmu->base.event_init) + return; + + spin_lock_irq(&pmu->lock); + /* * Re-enable sampling timer when GPU goes active. */ - __i915_pmu_maybe_start_timer(i915); - spin_unlock_irq(&i915->pmu.lock); -} + __i915_pmu_maybe_start_timer(pmu); -static bool grab_forcewake(struct drm_i915_private *i915, bool fw) -{ - if (!fw) - intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); - - return true; + spin_unlock_irq(&pmu->lock); } static void @@ -162,55 +271,79 @@ sample->cur += val; } -static void -engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) +static bool exclusive_mmio_access(const struct drm_i915_private *i915) { + /* + * We have to avoid concurrent mmio cache line access on gen7 or + * risk a machine hang. For a fun history lesson dig out the old + * userspace intel_gpu_top and run it on Ivybridge or Haswell! + */ + return IS_GEN(i915, 7); +} + +static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) +{ + struct intel_engine_pmu *pmu = &engine->pmu; + bool busy; + u32 val; + + val = ENGINE_READ_FW(engine, RING_CTL); + if (val == 0) /* powerwell off => engine idle */ + return; + + if (val & RING_WAIT) + add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); + if (val & RING_WAIT_SEMAPHORE) + add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); + + /* No need to sample when busy stats are supported. */ + if (intel_engine_supports_stats(engine)) + return; + + /* + * While waiting on a semaphore or event, MI_MODE reports the + * ring as idle. However, previously using the seqno, and with + * execlists sampling, we account for the ring waiting as the + * engine being busy. Therefore, we record the sample as being + * busy if either waiting or !idle. + */ + busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); + if (!busy) { + val = ENGINE_READ_FW(engine, RING_MI_MODE); + busy = !(val & MODE_IDLE); + } + if (busy) + add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); +} + +static void +engines_sample(struct intel_gt *gt, unsigned int period_ns) +{ + struct drm_i915_private *i915 = gt->i915; struct intel_engine_cs *engine; enum intel_engine_id id; - bool fw = false; + unsigned long flags; - if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0) + if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) return; - if (!dev_priv->gt.awake) + if (!intel_gt_pm_is_awake(gt)) return; - if (!intel_runtime_pm_get_if_in_use(dev_priv)) - return; + for_each_engine(engine, gt, id) { + if (!intel_engine_pm_get_if_awake(engine)) + continue; - for_each_engine(engine, dev_priv, id) { - u32 current_seqno = intel_engine_get_seqno(engine); - u32 last_seqno = intel_engine_last_submit(engine); - u32 val; - - val = !i915_seqno_passed(current_seqno, last_seqno); - - if (val) - add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY], - period_ns); - - if (val && (engine->pmu.enable & - (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) { - fw = grab_forcewake(dev_priv, fw); - - val = I915_READ_FW(RING_CTL(engine->mmio_base)); + if (exclusive_mmio_access(i915)) { + spin_lock_irqsave(&engine->uncore->lock, flags); + engine_sample(engine, period_ns); + spin_unlock_irqrestore(&engine->uncore->lock, flags); } else { - val = 0; + engine_sample(engine, period_ns); } - if (val & RING_WAIT) - add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT], - period_ns); - - if (val & RING_WAIT_SEMAPHORE) - add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA], - period_ns); + intel_engine_pm_put_async(engine); } - - if (fw) - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - - intel_runtime_pm_put(dev_priv); } static void @@ -219,48 +352,74 @@ sample->cur += mul_u32_u32(val, mul); } -static void -frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) +static bool frequency_sampling_enabled(struct i915_pmu *pmu) { - if (dev_priv->pmu.enable & - config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { + return pmu->enable & + (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | + config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)); +} + +static void +frequency_sample(struct intel_gt *gt, unsigned int period_ns) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct i915_pmu *pmu = &i915->pmu; + struct intel_rps *rps = >->rps; + + if (!frequency_sampling_enabled(pmu)) + return; + + /* Report 0/0 (actual/requested) frequency while parked. */ + if (!intel_gt_pm_get_if_awake(gt)) + return; + + if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { u32 val; - val = dev_priv->gt_pm.rps.cur_freq; - if (dev_priv->gt.awake && - intel_runtime_pm_get_if_in_use(dev_priv)) { - val = intel_get_cagf(dev_priv, - I915_READ_NOTRACE(GEN6_RPSTAT1)); - intel_runtime_pm_put(dev_priv); - } + /* + * We take a quick peek here without using forcewake + * so that we don't perturb the system under observation + * (forcewake => !rc6 => increased power use). We expect + * that if the read fails because it is outside of the + * mmio power well, then it will return 0 -- in which + * case we assume the system is running at the intended + * frequency. Fortunately, the read should rarely fail! + */ + val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1); + if (val) + val = intel_rps_get_cagf(rps, val); + else + val = rps->cur_freq; - add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT], - intel_gpu_freq(dev_priv, val), + add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], + intel_gpu_freq(rps, val), period_ns / 1000); + } + + if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { + add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ], + intel_gpu_freq(rps, rps->cur_freq), period_ns / 1000); } - if (dev_priv->pmu.enable & - config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { - add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.cur_freq), - period_ns / 1000); - } + intel_gt_pm_put_async(gt); } static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) { struct drm_i915_private *i915 = container_of(hrtimer, struct drm_i915_private, pmu.timer); + struct i915_pmu *pmu = &i915->pmu; + struct intel_gt *gt = &i915->gt; unsigned int period_ns; ktime_t now; - if (!READ_ONCE(i915->pmu.timer_enabled)) + if (!READ_ONCE(pmu->timer_enabled)) return HRTIMER_NORESTART; now = ktime_get(); - period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last)); - i915->pmu.timer_last = now; + period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last)); + pmu->timer_last = now; /* * Strictly speaking the passed in period may not be 100% accurate for @@ -268,8 +427,8 @@ * grabbing the forcewake. However the potential error from timer call- * back delay greatly dominates this so we keep it simple. */ - engines_sample(i915, period_ns); - frequency_sample(i915, period_ns); + engines_sample(gt, period_ns); + frequency_sample(gt, period_ns); hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD)); @@ -292,29 +451,12 @@ return sum; } -static void engine_event_destroy(struct perf_event *event) +static void i915_pmu_event_destroy(struct perf_event *event) { struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); - struct intel_engine_cs *engine; - engine = intel_engine_lookup_user(i915, - engine_event_class(event), - engine_event_instance(event)); - if (WARN_ON_ONCE(!engine)) - return; - - if (engine_event_sample(event) == I915_SAMPLE_BUSY && - intel_engine_supports_stats(engine)) - intel_disable_engine_stats(engine); -} - -static void i915_pmu_event_destroy(struct perf_event *event) -{ - WARN_ON(event->parent); - - if (is_engine_event(event)) - engine_event_destroy(event); + drm_WARN_ON(&i915->drm, event->parent); } static int @@ -344,7 +486,7 @@ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) /* Requires a mutex for sampling! */ return -ENODEV; - /* Fall-through. */ + fallthrough; case I915_PMU_REQUESTED_FREQUENCY: if (INTEL_GEN(i915) < 6) return -ENODEV; @@ -367,23 +509,13 @@ struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); struct intel_engine_cs *engine; - u8 sample; - int ret; engine = intel_engine_lookup_user(i915, engine_event_class(event), engine_event_instance(event)); if (!engine) return -ENODEV; - sample = engine_event_sample(event); - ret = engine_event_status(engine, sample); - if (ret) - return ret; - - if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine)) - ret = intel_enable_engine_stats(engine); - - return ret; + return engine_event_status(engine, engine_event_sample(event)); } static int i915_pmu_event_init(struct perf_event *event) @@ -422,108 +554,11 @@ return 0; } -static u64 __get_rc6(struct drm_i915_private *i915) -{ - u64 val; - - val = intel_rc6_residency_ns(i915, - IS_VALLEYVIEW(i915) ? - VLV_GT_RENDER_RC6 : - GEN6_GT_GFX_RC6); - - if (HAS_RC6p(i915)) - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); - - if (HAS_RC6pp(i915)) - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); - - return val; -} - -static u64 get_rc6(struct drm_i915_private *i915) -{ -#if IS_ENABLED(CONFIG_PM) - unsigned long flags; - u64 val; - - if (intel_runtime_pm_get_if_in_use(i915)) { - val = __get_rc6(i915); - intel_runtime_pm_put(i915); - - /* - * If we are coming back from being runtime suspended we must - * be careful not to report a larger value than returned - * previously. - */ - - spin_lock_irqsave(&i915->pmu.lock, flags); - - if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { - i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; - i915->pmu.sample[__I915_SAMPLE_RC6].cur = val; - } else { - val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; - } - - spin_unlock_irqrestore(&i915->pmu.lock, flags); - } else { - struct pci_dev *pdev = i915->drm.pdev; - struct device *kdev = &pdev->dev; - - /* - * We are runtime suspended. - * - * Report the delta from when the device was suspended to now, - * on top of the last known real value, as the approximated RC6 - * counter value. - */ - spin_lock_irqsave(&i915->pmu.lock, flags); - spin_lock(&kdev->power.lock); - - /* - * After the above branch intel_runtime_pm_get_if_in_use failed - * to get the runtime PM reference we cannot assume we are in - * runtime suspend since we can either: a) race with coming out - * of it before we took the power.lock, or b) there are other - * states than suspended which can bring us here. - * - * We need to double-check that we are indeed currently runtime - * suspended and if not we cannot do better than report the last - * known RC6 value. - */ - if (kdev->power.runtime_status == RPM_SUSPENDED) { - if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) - i915->pmu.suspended_jiffies_last = - kdev->power.suspended_jiffies; - - val = kdev->power.suspended_jiffies - - i915->pmu.suspended_jiffies_last; - val += jiffies - kdev->power.accounting_timestamp; - - val = jiffies_to_nsecs(val); - val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; - - i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; - } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { - val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; - } else { - val = i915->pmu.sample[__I915_SAMPLE_RC6].cur; - } - - spin_unlock(&kdev->power.lock); - spin_unlock_irqrestore(&i915->pmu.lock, flags); - } - - return val; -#else - return __get_rc6(i915); -#endif -} - static u64 __i915_pmu_event_read(struct perf_event *event) { struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); + struct i915_pmu *pmu = &i915->pmu; u64 val = 0; if (is_engine_event(event)) { @@ -534,11 +569,14 @@ engine_event_class(event), engine_event_instance(event)); - if (WARN_ON_ONCE(!engine)) { + if (drm_WARN_ON_ONCE(&i915->drm, !engine)) { /* Do nothing */ } else if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine)) { - val = ktime_to_ns(intel_engine_get_busy_time(engine)); + ktime_t unused; + + val = ktime_to_ns(intel_engine_get_busy_time(engine, + &unused)); } else { val = engine->pmu.sample[sample].cur; } @@ -546,19 +584,19 @@ switch (event->attr.config) { case I915_PMU_ACTUAL_FREQUENCY: val = - div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur, + div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur, USEC_PER_SEC /* to MHz */); break; case I915_PMU_REQUESTED_FREQUENCY: val = - div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur, + div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur, USEC_PER_SEC /* to MHz */); break; case I915_PMU_INTERRUPTS: val = count_interrupts(i915); break; case I915_PMU_RC6_RESIDENCY: - val = get_rc6(i915); + val = get_rc6(&i915->gt); break; } } @@ -586,23 +624,26 @@ struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); unsigned int bit = event_enabled_bit(event); + struct i915_pmu *pmu = &i915->pmu; unsigned long flags; - spin_lock_irqsave(&i915->pmu.lock, flags); + spin_lock_irqsave(&pmu->lock, flags); /* * Update the bitmask of enabled events and increment * the event reference counter. */ - GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); - GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0); - i915->pmu.enable |= BIT_ULL(bit); - i915->pmu.enable_count[bit]++; + BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS); + GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); + GEM_BUG_ON(pmu->enable_count[bit] == ~0); + + pmu->enable |= BIT_ULL(bit); + pmu->enable_count[bit]++; /* * Start the sampling timer if needed and not already enabled. */ - __i915_pmu_maybe_start_timer(i915); + __i915_pmu_maybe_start_timer(pmu); /* * For per-engine events the bitmask and reference counting @@ -615,15 +656,20 @@ engine = intel_engine_lookup_user(i915, engine_event_class(event), engine_event_instance(event)); - GEM_BUG_ON(!engine); - engine->pmu.enable |= BIT(sample); - GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) != + I915_ENGINE_SAMPLE_COUNT); + BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) != + I915_ENGINE_SAMPLE_COUNT); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); + + engine->pmu.enable |= BIT(sample); engine->pmu.enable_count[sample]++; } - spin_unlock_irqrestore(&i915->pmu.lock, flags); + spin_unlock_irqrestore(&pmu->lock, flags); /* * Store the current counter value so we can report the correct delta @@ -638,9 +684,10 @@ struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); unsigned int bit = event_enabled_bit(event); + struct i915_pmu *pmu = &i915->pmu; unsigned long flags; - spin_lock_irqsave(&i915->pmu.lock, flags); + spin_lock_irqsave(&pmu->lock, flags); if (is_engine_event(event)) { u8 sample = engine_event_sample(event); @@ -649,9 +696,11 @@ engine = intel_engine_lookup_user(i915, engine_event_class(event), engine_event_instance(event)); - GEM_BUG_ON(!engine); - GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); + /* * Decrement the reference count and clear the enabled * bitmask when the last listener on an event goes away. @@ -660,18 +709,18 @@ engine->pmu.enable &= ~BIT(sample); } - GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); - GEM_BUG_ON(i915->pmu.enable_count[bit] == 0); + GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); + GEM_BUG_ON(pmu->enable_count[bit] == 0); /* * Decrement the reference count and clear the enabled * bitmask when the last listener on an event goes away. */ - if (--i915->pmu.enable_count[bit] == 0) { - i915->pmu.enable &= ~BIT_ULL(bit); - i915->pmu.timer_enabled &= pmu_needs_timer(i915, true); + if (--pmu->enable_count[bit] == 0) { + pmu->enable &= ~BIT_ULL(bit); + pmu->timer_enabled &= pmu_needs_timer(pmu, true); } - spin_unlock_irqrestore(&i915->pmu.lock, flags); + spin_unlock_irqrestore(&pmu->lock, flags); } static void i915_pmu_event_start(struct perf_event *event, int flags) @@ -750,11 +799,6 @@ return sprintf(buf, "config=0x%lx\n", eattr->val); } -static struct attribute_group i915_pmu_events_attr_group = { - .name = "events", - /* Patch in attrs at runtime. */ -}; - static ssize_t i915_pmu_get_attr_cpumask(struct device *dev, struct device_attribute *attr, @@ -772,13 +816,6 @@ static const struct attribute_group i915_pmu_cpumask_attr_group = { .attrs = i915_cpumask_attrs, -}; - -static const struct attribute_group *i915_pmu_attr_groups[] = { - &i915_pmu_format_attr_group, - &i915_pmu_events_attr_group, - &i915_pmu_cpumask_attr_group, - NULL }; #define __event(__config, __name, __unit) \ @@ -820,8 +857,9 @@ } static struct attribute ** -create_event_attributes(struct drm_i915_private *i915) +create_event_attributes(struct i915_pmu *pmu) { + struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); static const struct { u64 config; const char *name; @@ -845,7 +883,6 @@ struct i915_ext_attribute *i915_attr = NULL, *i915_iter; struct attribute **attr = NULL, **attr_iter; struct intel_engine_cs *engine; - enum intel_engine_id id; unsigned int i; /* Count how many counters we will be exposing. */ @@ -854,7 +891,7 @@ count++; } - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { for (i = 0; i < ARRAY_SIZE(engine_events); i++) { if (!engine_event_status(engine, engine_events[i].sample)) @@ -905,7 +942,7 @@ } /* Initialize supported engine counters. */ - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { for (i = 0; i < ARRAY_SIZE(engine_events); i++) { char *str; @@ -922,7 +959,7 @@ i915_iter = add_i915_attr(i915_iter, str, __I915_PMU_ENGINE(engine->uabi_class, - engine->instance, + engine->uabi_instance, engine_events[i].sample)); str = kasprintf(GFP_KERNEL, "%s-%s.unit", @@ -935,8 +972,8 @@ } } - i915->pmu.i915_attr = i915_attr; - i915->pmu.pmu_attr = pmu_attr; + pmu->i915_attr = i915_attr; + pmu->pmu_attr = pmu_attr; return attr; @@ -952,25 +989,25 @@ return NULL; } -static void free_event_attributes(struct drm_i915_private *i915) +static void free_event_attributes(struct i915_pmu *pmu) { - struct attribute **attr_iter = i915_pmu_events_attr_group.attrs; + struct attribute **attr_iter = pmu->events_attr_group.attrs; for (; *attr_iter; attr_iter++) kfree((*attr_iter)->name); - kfree(i915_pmu_events_attr_group.attrs); - kfree(i915->pmu.i915_attr); - kfree(i915->pmu.pmu_attr); + kfree(pmu->events_attr_group.attrs); + kfree(pmu->i915_attr); + kfree(pmu->pmu_attr); - i915_pmu_events_attr_group.attrs = NULL; - i915->pmu.i915_attr = NULL; - i915->pmu.pmu_attr = NULL; + pmu->events_attr_group.attrs = NULL; + pmu->i915_attr = NULL; + pmu->pmu_attr = NULL; } static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) { - struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); + struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); GEM_BUG_ON(!pmu->base.event_init); @@ -983,7 +1020,7 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) { - struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); + struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); unsigned int target; GEM_BUG_ON(!pmu->base.event_init); @@ -1000,9 +1037,7 @@ return 0; } -static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; - -static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915) +static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) { enum cpuhp_state slot; int ret; @@ -1015,82 +1050,135 @@ return ret; slot = ret; - ret = cpuhp_state_add_instance(slot, &i915->pmu.node); + ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node); if (ret) { cpuhp_remove_multi_state(slot); return ret; } - cpuhp_slot = slot; + pmu->cpuhp.slot = slot; return 0; } -static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915) +static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) { - WARN_ON(cpuhp_slot == CPUHP_INVALID); - WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node)); - cpuhp_remove_multi_state(cpuhp_slot); + struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); + + drm_WARN_ON(&i915->drm, pmu->cpuhp.slot == CPUHP_INVALID); + drm_WARN_ON(&i915->drm, cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node)); + cpuhp_remove_multi_state(pmu->cpuhp.slot); + pmu->cpuhp.slot = CPUHP_INVALID; +} + +static bool is_igp(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + /* IGP is 0000:00:02.0 */ + return pci_domain_nr(pdev->bus) == 0 && + pdev->bus->number == 0 && + PCI_SLOT(pdev->devfn) == 2 && + PCI_FUNC(pdev->devfn) == 0; } void i915_pmu_register(struct drm_i915_private *i915) { - int ret; + struct i915_pmu *pmu = &i915->pmu; + const struct attribute_group *attr_groups[] = { + &i915_pmu_format_attr_group, + &pmu->events_attr_group, + &i915_pmu_cpumask_attr_group, + NULL + }; + + int ret = -ENOMEM; if (INTEL_GEN(i915) <= 2) { - DRM_INFO("PMU not supported for this GPU."); + drm_info(&i915->drm, "PMU not supported for this GPU."); return; } - i915_pmu_events_attr_group.attrs = create_event_attributes(i915); - if (!i915_pmu_events_attr_group.attrs) { - ret = -ENOMEM; - goto err; + spin_lock_init(&pmu->lock); + hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + pmu->timer.function = i915_sample; + pmu->cpuhp.slot = CPUHP_INVALID; + init_rc6(pmu); + + if (!is_igp(i915)) { + pmu->name = kasprintf(GFP_KERNEL, + "i915_%s", + dev_name(i915->drm.dev)); + if (pmu->name) { + /* tools/perf reserves colons as special. */ + strreplace((char *)pmu->name, ':', '_'); + } + } else { + pmu->name = "i915"; } - - i915->pmu.base.attr_groups = i915_pmu_attr_groups; - i915->pmu.base.task_ctx_nr = perf_invalid_context; - i915->pmu.base.event_init = i915_pmu_event_init; - i915->pmu.base.add = i915_pmu_event_add; - i915->pmu.base.del = i915_pmu_event_del; - i915->pmu.base.start = i915_pmu_event_start; - i915->pmu.base.stop = i915_pmu_event_stop; - i915->pmu.base.read = i915_pmu_event_read; - i915->pmu.base.event_idx = i915_pmu_event_event_idx; - - spin_lock_init(&i915->pmu.lock); - hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - i915->pmu.timer.function = i915_sample; - - ret = perf_pmu_register(&i915->pmu.base, "i915", -1); - if (ret) + if (!pmu->name) goto err; - ret = i915_pmu_register_cpuhp_state(i915); + pmu->events_attr_group.name = "events"; + pmu->events_attr_group.attrs = create_event_attributes(pmu); + if (!pmu->events_attr_group.attrs) + goto err_name; + + pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups), + GFP_KERNEL); + if (!pmu->base.attr_groups) + goto err_attr; + + pmu->base.module = THIS_MODULE; + pmu->base.task_ctx_nr = perf_invalid_context; + pmu->base.event_init = i915_pmu_event_init; + pmu->base.add = i915_pmu_event_add; + pmu->base.del = i915_pmu_event_del; + pmu->base.start = i915_pmu_event_start; + pmu->base.stop = i915_pmu_event_stop; + pmu->base.read = i915_pmu_event_read; + pmu->base.event_idx = i915_pmu_event_event_idx; + + ret = perf_pmu_register(&pmu->base, pmu->name, -1); + if (ret) + goto err_groups; + + ret = i915_pmu_register_cpuhp_state(pmu); if (ret) goto err_unreg; return; err_unreg: - perf_pmu_unregister(&i915->pmu.base); + perf_pmu_unregister(&pmu->base); +err_groups: + kfree(pmu->base.attr_groups); +err_attr: + pmu->base.event_init = NULL; + free_event_attributes(pmu); +err_name: + if (!is_igp(i915)) + kfree(pmu->name); err: - i915->pmu.base.event_init = NULL; - free_event_attributes(i915); - DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); + drm_notice(&i915->drm, "Failed to register PMU!\n"); } void i915_pmu_unregister(struct drm_i915_private *i915) { - if (!i915->pmu.base.event_init) + struct i915_pmu *pmu = &i915->pmu; + + if (!pmu->base.event_init) return; - WARN_ON(i915->pmu.enable); + drm_WARN_ON(&i915->drm, pmu->enable); - hrtimer_cancel(&i915->pmu.timer); + hrtimer_cancel(&pmu->timer); - i915_pmu_unregister_cpuhp_state(i915); + i915_pmu_unregister_cpuhp_state(pmu); - perf_pmu_unregister(&i915->pmu.base); - i915->pmu.base.event_init = NULL; - free_event_attributes(i915); + perf_pmu_unregister(&pmu->base); + pmu->base.event_init = NULL; + kfree(pmu->base.attr_groups); + if (!is_igp(i915)) + kfree(pmu->name); + free_event_attributes(pmu); } -- Gitblit v1.6.2