From 2f7c68cb55ecb7331f2381deb497c27155f32faf Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 03 Jan 2024 09:43:39 +0000
Subject: [PATCH] update kernel to 5.10.198

---
 kernel/drivers/gpu/drm/i915/i915_pmu.c |  730 +++++++++++++++++++++++++++++++------------------------
 1 files changed, 409 insertions(+), 321 deletions(-)

diff --git a/kernel/drivers/gpu/drm/i915/i915_pmu.c b/kernel/drivers/gpu/drm/i915/i915_pmu.c
index b7fda69..3c9ac66 100644
--- a/kernel/drivers/gpu/drm/i915/i915_pmu.c
+++ b/kernel/drivers/gpu/drm/i915/i915_pmu.c
@@ -5,9 +5,18 @@
  */
 
 #include <linux/irq.h>
-#include "i915_pmu.h"
-#include "intel_ringbuffer.h"
+#include <linux/pm_runtime.h>
+
+#include "gt/intel_engine.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_engine_user.h"
+#include "gt/intel_gt_pm.h"
+#include "gt/intel_rc6.h"
+#include "gt/intel_rps.h"
+
 #include "i915_drv.h"
+#include "i915_pmu.h"
+#include "intel_pm.h"
 
 /* Frequency for the sampling timer for events which need it. */
 #define FREQUENCY 200
@@ -70,8 +79,9 @@
 	return config_enabled_bit(event->attr.config);
 }
 
-static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
+static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
 {
+	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
 	u64 enable;
 
 	/*
@@ -79,7 +89,7 @@
 	 *
 	 * We start with a bitmask of all currently enabled events.
 	 */
-	enable = i915->pmu.enable;
+	enable = pmu->enable;
 
 	/*
 	 * Mask out all the ones which do not need the timer, or in
@@ -98,10 +108,8 @@
 	/*
 	 * Also there is software busyness tracking available we do not
 	 * need the timer for I915_SAMPLE_BUSY counter.
-	 *
-	 * Use RCS as proxy for all engines.
 	 */
-	else if (intel_engine_supports_stats(i915->engine[RCS]))
+	else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
 		enable &= ~BIT(I915_SAMPLE_BUSY);
 
 	/*
@@ -110,50 +118,151 @@
 	return enable;
 }
 
-void i915_pmu_gt_parked(struct drm_i915_private *i915)
+static u64 __get_rc6(struct intel_gt *gt)
 {
-	if (!i915->pmu.base.event_init)
-		return;
+	struct drm_i915_private *i915 = gt->i915;
+	u64 val;
 
-	spin_lock_irq(&i915->pmu.lock);
-	/*
-	 * Signal sampling timer to stop if only engine events are enabled and
-	 * GPU went idle.
-	 */
-	i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
-	spin_unlock_irq(&i915->pmu.lock);
+	val = intel_rc6_residency_ns(&gt->rc6,
+				     IS_VALLEYVIEW(i915) ?
+				     VLV_GT_RENDER_RC6 :
+				     GEN6_GT_GFX_RC6);
+
+	if (HAS_RC6p(i915))
+		val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6p);
+
+	if (HAS_RC6pp(i915))
+		val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6pp);
+
+	return val;
 }
 
-static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
+#if IS_ENABLED(CONFIG_PM)
+
+static inline s64 ktime_since(const ktime_t kt)
 {
-	if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
-		i915->pmu.timer_enabled = true;
-		i915->pmu.timer_last = ktime_get();
-		hrtimer_start_range_ns(&i915->pmu.timer,
+	return ktime_to_ns(ktime_sub(ktime_get(), kt));
+}
+
+static u64 get_rc6(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	struct i915_pmu *pmu = &i915->pmu;
+	unsigned long flags;
+	bool awake = false;
+	u64 val;
+
+	if (intel_gt_pm_get_if_awake(gt)) {
+		val = __get_rc6(gt);
+		intel_gt_pm_put_async(gt);
+		awake = true;
+	}
+
+	spin_lock_irqsave(&pmu->lock, flags);
+
+	if (awake) {
+		pmu->sample[__I915_SAMPLE_RC6].cur = val;
+	} else {
+		/*
+		 * We think we are runtime suspended.
+		 *
+		 * Report the delta from when the device was suspended to now,
+		 * on top of the last known real value, as the approximated RC6
+		 * counter value.
+		 */
+		val = ktime_since(pmu->sleep_last);
+		val += pmu->sample[__I915_SAMPLE_RC6].cur;
+	}
+
+	if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
+		val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
+	else
+		pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
+
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
+	return val;
+}
+
+static void init_rc6(struct i915_pmu *pmu)
+{
+	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
+	intel_wakeref_t wakeref;
+
+	with_intel_runtime_pm(i915->gt.uncore->rpm, wakeref) {
+		pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
+		pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur =
+					pmu->sample[__I915_SAMPLE_RC6].cur;
+		pmu->sleep_last = ktime_get();
+	}
+}
+
+static void park_rc6(struct drm_i915_private *i915)
+{
+	struct i915_pmu *pmu = &i915->pmu;
+
+	pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
+	pmu->sleep_last = ktime_get();
+}
+
+#else
+
+static u64 get_rc6(struct intel_gt *gt)
+{
+	return __get_rc6(gt);
+}
+
+static void init_rc6(struct i915_pmu *pmu) { }
+static void park_rc6(struct drm_i915_private *i915) {}
+
+#endif
+
+static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
+{
+	if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
+		pmu->timer_enabled = true;
+		pmu->timer_last = ktime_get();
+		hrtimer_start_range_ns(&pmu->timer,
 				       ns_to_ktime(PERIOD), 0,
 				       HRTIMER_MODE_REL_PINNED);
 	}
 }
 
-void i915_pmu_gt_unparked(struct drm_i915_private *i915)
+void i915_pmu_gt_parked(struct drm_i915_private *i915)
 {
-	if (!i915->pmu.base.event_init)
+	struct i915_pmu *pmu = &i915->pmu;
+
+	if (!pmu->base.event_init)
 		return;
 
-	spin_lock_irq(&i915->pmu.lock);
+	spin_lock_irq(&pmu->lock);
+
+	park_rc6(i915);
+
+	/*
+	 * Signal sampling timer to stop if only engine events are enabled and
+	 * GPU went idle.
+	 */
+	pmu->timer_enabled = pmu_needs_timer(pmu, false);
+
+	spin_unlock_irq(&pmu->lock);
+}
+
+void i915_pmu_gt_unparked(struct drm_i915_private *i915)
+{
+	struct i915_pmu *pmu = &i915->pmu;
+
+	if (!pmu->base.event_init)
+		return;
+
+	spin_lock_irq(&pmu->lock);
+
 	/*
 	 * Re-enable sampling timer when GPU goes active.
 	 */
-	__i915_pmu_maybe_start_timer(i915);
-	spin_unlock_irq(&i915->pmu.lock);
-}
+	__i915_pmu_maybe_start_timer(pmu);
 
-static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
-{
-	if (!fw)
-		intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
-
-	return true;
+	spin_unlock_irq(&pmu->lock);
 }
 
 static void
@@ -162,55 +271,79 @@
 	sample->cur += val;
 }
 
-static void
-engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
+static bool exclusive_mmio_access(const struct drm_i915_private *i915)
 {
+	/*
+	 * We have to avoid concurrent mmio cache line access on gen7 or
+	 * risk a machine hang. For a fun history lesson dig out the old
+	 * userspace intel_gpu_top and run it on Ivybridge or Haswell!
+	 */
+	return IS_GEN(i915, 7);
+}
+
+static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
+{
+	struct intel_engine_pmu *pmu = &engine->pmu;
+	bool busy;
+	u32 val;
+
+	val = ENGINE_READ_FW(engine, RING_CTL);
+	if (val == 0) /* powerwell off => engine idle */
+		return;
+
+	if (val & RING_WAIT)
+		add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
+	if (val & RING_WAIT_SEMAPHORE)
+		add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
+
+	/* No need to sample when busy stats are supported. */
+	if (intel_engine_supports_stats(engine))
+		return;
+
+	/*
+	 * While waiting on a semaphore or event, MI_MODE reports the
+	 * ring as idle. However, previously using the seqno, and with
+	 * execlists sampling, we account for the ring waiting as the
+	 * engine being busy. Therefore, we record the sample as being
+	 * busy if either waiting or !idle.
+	 */
+	busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
+	if (!busy) {
+		val = ENGINE_READ_FW(engine, RING_MI_MODE);
+		busy = !(val & MODE_IDLE);
+	}
+	if (busy)
+		add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
+}
+
+static void
+engines_sample(struct intel_gt *gt, unsigned int period_ns)
+{
+	struct drm_i915_private *i915 = gt->i915;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	bool fw = false;
+	unsigned long flags;
 
-	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
+	if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
 		return;
 
-	if (!dev_priv->gt.awake)
+	if (!intel_gt_pm_is_awake(gt))
 		return;
 
-	if (!intel_runtime_pm_get_if_in_use(dev_priv))
-		return;
+	for_each_engine(engine, gt, id) {
+		if (!intel_engine_pm_get_if_awake(engine))
+			continue;
 
-	for_each_engine(engine, dev_priv, id) {
-		u32 current_seqno = intel_engine_get_seqno(engine);
-		u32 last_seqno = intel_engine_last_submit(engine);
-		u32 val;
-
-		val = !i915_seqno_passed(current_seqno, last_seqno);
-
-		if (val)
-			add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
-				   period_ns);
-
-		if (val && (engine->pmu.enable &
-		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
-			fw = grab_forcewake(dev_priv, fw);
-
-			val = I915_READ_FW(RING_CTL(engine->mmio_base));
+		if (exclusive_mmio_access(i915)) {
+			spin_lock_irqsave(&engine->uncore->lock, flags);
+			engine_sample(engine, period_ns);
+			spin_unlock_irqrestore(&engine->uncore->lock, flags);
 		} else {
-			val = 0;
+			engine_sample(engine, period_ns);
 		}
 
-		if (val & RING_WAIT)
-			add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
-				   period_ns);
-
-		if (val & RING_WAIT_SEMAPHORE)
-			add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
-				   period_ns);
+		intel_engine_pm_put_async(engine);
 	}
-
-	if (fw)
-		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-
-	intel_runtime_pm_put(dev_priv);
 }
 
 static void
@@ -219,48 +352,74 @@
 	sample->cur += mul_u32_u32(val, mul);
 }
 
-static void
-frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
+static bool frequency_sampling_enabled(struct i915_pmu *pmu)
 {
-	if (dev_priv->pmu.enable &
-	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
+	return pmu->enable &
+	       (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
+		config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY));
+}
+
+static void
+frequency_sample(struct intel_gt *gt, unsigned int period_ns)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	struct intel_uncore *uncore = gt->uncore;
+	struct i915_pmu *pmu = &i915->pmu;
+	struct intel_rps *rps = &gt->rps;
+
+	if (!frequency_sampling_enabled(pmu))
+		return;
+
+	/* Report 0/0 (actual/requested) frequency while parked. */
+	if (!intel_gt_pm_get_if_awake(gt))
+		return;
+
+	if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
 		u32 val;
 
-		val = dev_priv->gt_pm.rps.cur_freq;
-		if (dev_priv->gt.awake &&
-		    intel_runtime_pm_get_if_in_use(dev_priv)) {
-			val = intel_get_cagf(dev_priv,
-					     I915_READ_NOTRACE(GEN6_RPSTAT1));
-			intel_runtime_pm_put(dev_priv);
-		}
+		/*
+		 * We take a quick peek here without using forcewake
+		 * so that we don't perturb the system under observation
+		 * (forcewake => !rc6 => increased power use). We expect
+		 * that if the read fails because it is outside of the
+		 * mmio power well, then it will return 0 -- in which
+		 * case we assume the system is running at the intended
+		 * frequency. Fortunately, the read should rarely fail!
+		 */
+		val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
+		if (val)
+			val = intel_rps_get_cagf(rps, val);
+		else
+			val = rps->cur_freq;
 
-		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
-				intel_gpu_freq(dev_priv, val),
+		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
+				intel_gpu_freq(rps, val), period_ns / 1000);
+	}
+
+	if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
+		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
+				intel_gpu_freq(rps, rps->cur_freq),
 				period_ns / 1000);
 	}
 
-	if (dev_priv->pmu.enable &
-	    config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
-		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ],
-				intel_gpu_freq(dev_priv,
-					       dev_priv->gt_pm.rps.cur_freq),
-				period_ns / 1000);
-	}
+	intel_gt_pm_put_async(gt);
 }
 
 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 {
 	struct drm_i915_private *i915 =
 		container_of(hrtimer, struct drm_i915_private, pmu.timer);
+	struct i915_pmu *pmu = &i915->pmu;
+	struct intel_gt *gt = &i915->gt;
 	unsigned int period_ns;
 	ktime_t now;
 
-	if (!READ_ONCE(i915->pmu.timer_enabled))
+	if (!READ_ONCE(pmu->timer_enabled))
 		return HRTIMER_NORESTART;
 
 	now = ktime_get();
-	period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last));
-	i915->pmu.timer_last = now;
+	period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
+	pmu->timer_last = now;
 
 	/*
 	 * Strictly speaking the passed in period may not be 100% accurate for
@@ -268,8 +427,8 @@
 	 * grabbing the forcewake. However the potential error from timer call-
 	 * back delay greatly dominates this so we keep it simple.
 	 */
-	engines_sample(i915, period_ns);
-	frequency_sample(i915, period_ns);
+	engines_sample(gt, period_ns);
+	frequency_sample(gt, period_ns);
 
 	hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
 
@@ -292,29 +451,12 @@
 	return sum;
 }
 
-static void engine_event_destroy(struct perf_event *event)
+static void i915_pmu_event_destroy(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
-	struct intel_engine_cs *engine;
 
-	engine = intel_engine_lookup_user(i915,
-					  engine_event_class(event),
-					  engine_event_instance(event));
-	if (WARN_ON_ONCE(!engine))
-		return;
-
-	if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
-	    intel_engine_supports_stats(engine))
-		intel_disable_engine_stats(engine);
-}
-
-static void i915_pmu_event_destroy(struct perf_event *event)
-{
-	WARN_ON(event->parent);
-
-	if (is_engine_event(event))
-		engine_event_destroy(event);
+	drm_WARN_ON(&i915->drm, event->parent);
 }
 
 static int
@@ -344,7 +486,7 @@
 		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
 			/* Requires a mutex for sampling! */
 			return -ENODEV;
-		/* Fall-through. */
+		fallthrough;
 	case I915_PMU_REQUESTED_FREQUENCY:
 		if (INTEL_GEN(i915) < 6)
 			return -ENODEV;
@@ -367,23 +509,13 @@
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
 	struct intel_engine_cs *engine;
-	u8 sample;
-	int ret;
 
 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
 					  engine_event_instance(event));
 	if (!engine)
 		return -ENODEV;
 
-	sample = engine_event_sample(event);
-	ret = engine_event_status(engine, sample);
-	if (ret)
-		return ret;
-
-	if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
-		ret = intel_enable_engine_stats(engine);
-
-	return ret;
+	return engine_event_status(engine, engine_event_sample(event));
 }
 
 static int i915_pmu_event_init(struct perf_event *event)
@@ -422,108 +554,11 @@
 	return 0;
 }
 
-static u64 __get_rc6(struct drm_i915_private *i915)
-{
-	u64 val;
-
-	val = intel_rc6_residency_ns(i915,
-				     IS_VALLEYVIEW(i915) ?
-				     VLV_GT_RENDER_RC6 :
-				     GEN6_GT_GFX_RC6);
-
-	if (HAS_RC6p(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
-
-	if (HAS_RC6pp(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
-
-	return val;
-}
-
-static u64 get_rc6(struct drm_i915_private *i915)
-{
-#if IS_ENABLED(CONFIG_PM)
-	unsigned long flags;
-	u64 val;
-
-	if (intel_runtime_pm_get_if_in_use(i915)) {
-		val = __get_rc6(i915);
-		intel_runtime_pm_put(i915);
-
-		/*
-		 * If we are coming back from being runtime suspended we must
-		 * be careful not to report a larger value than returned
-		 * previously.
-		 */
-
-		spin_lock_irqsave(&i915->pmu.lock, flags);
-
-		if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
-			i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
-		} else {
-			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		}
-
-		spin_unlock_irqrestore(&i915->pmu.lock, flags);
-	} else {
-		struct pci_dev *pdev = i915->drm.pdev;
-		struct device *kdev = &pdev->dev;
-
-		/*
-		 * We are runtime suspended.
-		 *
-		 * Report the delta from when the device was suspended to now,
-		 * on top of the last known real value, as the approximated RC6
-		 * counter value.
-		 */
-		spin_lock_irqsave(&i915->pmu.lock, flags);
-		spin_lock(&kdev->power.lock);
-
-		/*
-		 * After the above branch intel_runtime_pm_get_if_in_use failed
-		 * to get the runtime PM reference we cannot assume we are in
-		 * runtime suspend since we can either: a) race with coming out
-		 * of it before we took the power.lock, or b) there are other
-		 * states than suspended which can bring us here.
-		 *
-		 * We need to double-check that we are indeed currently runtime
-		 * suspended and if not we cannot do better than report the last
-		 * known RC6 value.
-		 */
-		if (kdev->power.runtime_status == RPM_SUSPENDED) {
-			if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
-				i915->pmu.suspended_jiffies_last =
-						  kdev->power.suspended_jiffies;
-
-			val = kdev->power.suspended_jiffies -
-			      i915->pmu.suspended_jiffies_last;
-			val += jiffies - kdev->power.accounting_timestamp;
-
-			val = jiffies_to_nsecs(val);
-			val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
-
-			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
-		} else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		} else {
-			val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
-		}
-
-		spin_unlock(&kdev->power.lock);
-		spin_unlock_irqrestore(&i915->pmu.lock, flags);
-	}
-
-	return val;
-#else
-	return __get_rc6(i915);
-#endif
-}
-
 static u64 __i915_pmu_event_read(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = &i915->pmu;
 	u64 val = 0;
 
 	if (is_engine_event(event)) {
@@ -534,11 +569,14 @@
 						  engine_event_class(event),
 						  engine_event_instance(event));
 
-		if (WARN_ON_ONCE(!engine)) {
+		if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
 			/* Do nothing */
 		} else if (sample == I915_SAMPLE_BUSY &&
 			   intel_engine_supports_stats(engine)) {
-			val = ktime_to_ns(intel_engine_get_busy_time(engine));
+			ktime_t unused;
+
+			val = ktime_to_ns(intel_engine_get_busy_time(engine,
+								     &unused));
 		} else {
 			val = engine->pmu.sample[sample].cur;
 		}
@@ -546,19 +584,19 @@
 		switch (event->attr.config) {
 		case I915_PMU_ACTUAL_FREQUENCY:
 			val =
-			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
+			   div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
 				   USEC_PER_SEC /* to MHz */);
 			break;
 		case I915_PMU_REQUESTED_FREQUENCY:
 			val =
-			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
+			   div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
 				   USEC_PER_SEC /* to MHz */);
 			break;
 		case I915_PMU_INTERRUPTS:
 			val = count_interrupts(i915);
 			break;
 		case I915_PMU_RC6_RESIDENCY:
-			val = get_rc6(i915);
+			val = get_rc6(&i915->gt);
 			break;
 		}
 	}
@@ -586,23 +624,26 @@
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
 	unsigned int bit = event_enabled_bit(event);
+	struct i915_pmu *pmu = &i915->pmu;
 	unsigned long flags;
 
-	spin_lock_irqsave(&i915->pmu.lock, flags);
+	spin_lock_irqsave(&pmu->lock, flags);
 
 	/*
 	 * Update the bitmask of enabled events and increment
 	 * the event reference counter.
 	 */
-	GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
-	GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
-	i915->pmu.enable |= BIT_ULL(bit);
-	i915->pmu.enable_count[bit]++;
+	BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
+	GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
+	GEM_BUG_ON(pmu->enable_count[bit] == ~0);
+
+	pmu->enable |= BIT_ULL(bit);
+	pmu->enable_count[bit]++;
 
 	/*
 	 * Start the sampling timer if needed and not already enabled.
 	 */
-	__i915_pmu_maybe_start_timer(i915);
+	__i915_pmu_maybe_start_timer(pmu);
 
 	/*
 	 * For per-engine events the bitmask and reference counting
@@ -615,15 +656,20 @@
 		engine = intel_engine_lookup_user(i915,
 						  engine_event_class(event),
 						  engine_event_instance(event));
-		GEM_BUG_ON(!engine);
-		engine->pmu.enable |= BIT(sample);
 
-		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
+		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
+			     I915_ENGINE_SAMPLE_COUNT);
+		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
+			     I915_ENGINE_SAMPLE_COUNT);
+		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
+		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
+
+		engine->pmu.enable |= BIT(sample);
 		engine->pmu.enable_count[sample]++;
 	}
 
-	spin_unlock_irqrestore(&i915->pmu.lock, flags);
+	spin_unlock_irqrestore(&pmu->lock, flags);
 
 	/*
 	 * Store the current counter value so we can report the correct delta
@@ -638,9 +684,10 @@
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
 	unsigned int bit = event_enabled_bit(event);
+	struct i915_pmu *pmu = &i915->pmu;
 	unsigned long flags;
 
-	spin_lock_irqsave(&i915->pmu.lock, flags);
+	spin_lock_irqsave(&pmu->lock, flags);
 
 	if (is_engine_event(event)) {
 		u8 sample = engine_event_sample(event);
@@ -649,9 +696,11 @@
 		engine = intel_engine_lookup_user(i915,
 						  engine_event_class(event),
 						  engine_event_instance(event));
-		GEM_BUG_ON(!engine);
-		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
+
+		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
+		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
 		GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
+
 		/*
 		 * Decrement the reference count and clear the enabled
 		 * bitmask when the last listener on an event goes away.
@@ -660,18 +709,18 @@
 			engine->pmu.enable &= ~BIT(sample);
 	}
 
-	GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
-	GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
+	GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
+	GEM_BUG_ON(pmu->enable_count[bit] == 0);
 	/*
 	 * Decrement the reference count and clear the enabled
 	 * bitmask when the last listener on an event goes away.
 	 */
-	if (--i915->pmu.enable_count[bit] == 0) {
-		i915->pmu.enable &= ~BIT_ULL(bit);
-		i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
+	if (--pmu->enable_count[bit] == 0) {
+		pmu->enable &= ~BIT_ULL(bit);
+		pmu->timer_enabled &= pmu_needs_timer(pmu, true);
 	}
 
-	spin_unlock_irqrestore(&i915->pmu.lock, flags);
+	spin_unlock_irqrestore(&pmu->lock, flags);
 }
 
 static void i915_pmu_event_start(struct perf_event *event, int flags)
@@ -750,11 +799,6 @@
 	return sprintf(buf, "config=0x%lx\n", eattr->val);
 }
 
-static struct attribute_group i915_pmu_events_attr_group = {
-	.name = "events",
-	/* Patch in attrs at runtime. */
-};
-
 static ssize_t
 i915_pmu_get_attr_cpumask(struct device *dev,
 			  struct device_attribute *attr,
@@ -772,13 +816,6 @@
 
 static const struct attribute_group i915_pmu_cpumask_attr_group = {
 	.attrs = i915_cpumask_attrs,
-};
-
-static const struct attribute_group *i915_pmu_attr_groups[] = {
-	&i915_pmu_format_attr_group,
-	&i915_pmu_events_attr_group,
-	&i915_pmu_cpumask_attr_group,
-	NULL
 };
 
 #define __event(__config, __name, __unit) \
@@ -820,8 +857,9 @@
 }
 
 static struct attribute **
-create_event_attributes(struct drm_i915_private *i915)
+create_event_attributes(struct i915_pmu *pmu)
 {
+	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
 	static const struct {
 		u64 config;
 		const char *name;
@@ -845,7 +883,6 @@
 	struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
 	struct attribute **attr = NULL, **attr_iter;
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	unsigned int i;
 
 	/* Count how many counters we will be exposing. */
@@ -854,7 +891,7 @@
 			count++;
 	}
 
-	for_each_engine(engine, i915, id) {
+	for_each_uabi_engine(engine, i915) {
 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
 			if (!engine_event_status(engine,
 						 engine_events[i].sample))
@@ -905,7 +942,7 @@
 	}
 
 	/* Initialize supported engine counters. */
-	for_each_engine(engine, i915, id) {
+	for_each_uabi_engine(engine, i915) {
 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
 			char *str;
 
@@ -922,7 +959,7 @@
 			i915_iter =
 				add_i915_attr(i915_iter, str,
 					      __I915_PMU_ENGINE(engine->uabi_class,
-								engine->instance,
+								engine->uabi_instance,
 								engine_events[i].sample));
 
 			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
@@ -935,8 +972,8 @@
 		}
 	}
 
-	i915->pmu.i915_attr = i915_attr;
-	i915->pmu.pmu_attr = pmu_attr;
+	pmu->i915_attr = i915_attr;
+	pmu->pmu_attr = pmu_attr;
 
 	return attr;
 
@@ -952,25 +989,25 @@
 	return NULL;
 }
 
-static void free_event_attributes(struct drm_i915_private *i915)
+static void free_event_attributes(struct i915_pmu *pmu)
 {
-	struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
+	struct attribute **attr_iter = pmu->events_attr_group.attrs;
 
 	for (; *attr_iter; attr_iter++)
 		kfree((*attr_iter)->name);
 
-	kfree(i915_pmu_events_attr_group.attrs);
-	kfree(i915->pmu.i915_attr);
-	kfree(i915->pmu.pmu_attr);
+	kfree(pmu->events_attr_group.attrs);
+	kfree(pmu->i915_attr);
+	kfree(pmu->pmu_attr);
 
-	i915_pmu_events_attr_group.attrs = NULL;
-	i915->pmu.i915_attr = NULL;
-	i915->pmu.pmu_attr = NULL;
+	pmu->events_attr_group.attrs = NULL;
+	pmu->i915_attr = NULL;
+	pmu->pmu_attr = NULL;
 }
 
 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
 {
-	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
+	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
 
 	GEM_BUG_ON(!pmu->base.event_init);
 
@@ -983,7 +1020,7 @@
 
 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
 {
-	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
+	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
 	unsigned int target;
 
 	GEM_BUG_ON(!pmu->base.event_init);
@@ -1000,9 +1037,7 @@
 	return 0;
 }
 
-static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
-
-static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
+static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
 {
 	enum cpuhp_state slot;
 	int ret;
@@ -1015,82 +1050,135 @@
 		return ret;
 
 	slot = ret;
-	ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
+	ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node);
 	if (ret) {
 		cpuhp_remove_multi_state(slot);
 		return ret;
 	}
 
-	cpuhp_slot = slot;
+	pmu->cpuhp.slot = slot;
 	return 0;
 }
 
-static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
+static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
 {
-	WARN_ON(cpuhp_slot == CPUHP_INVALID);
-	WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
-	cpuhp_remove_multi_state(cpuhp_slot);
+	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
+
+	drm_WARN_ON(&i915->drm, pmu->cpuhp.slot == CPUHP_INVALID);
+	drm_WARN_ON(&i915->drm, cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node));
+	cpuhp_remove_multi_state(pmu->cpuhp.slot);
+	pmu->cpuhp.slot = CPUHP_INVALID;
+}
+
+static bool is_igp(struct drm_i915_private *i915)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+
+	/* IGP is 0000:00:02.0 */
+	return pci_domain_nr(pdev->bus) == 0 &&
+	       pdev->bus->number == 0 &&
+	       PCI_SLOT(pdev->devfn) == 2 &&
+	       PCI_FUNC(pdev->devfn) == 0;
 }
 
 void i915_pmu_register(struct drm_i915_private *i915)
 {
-	int ret;
+	struct i915_pmu *pmu = &i915->pmu;
+	const struct attribute_group *attr_groups[] = {
+		&i915_pmu_format_attr_group,
+		&pmu->events_attr_group,
+		&i915_pmu_cpumask_attr_group,
+		NULL
+	};
+
+	int ret = -ENOMEM;
 
 	if (INTEL_GEN(i915) <= 2) {
-		DRM_INFO("PMU not supported for this GPU.");
+		drm_info(&i915->drm, "PMU not supported for this GPU.");
 		return;
 	}
 
-	i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
-	if (!i915_pmu_events_attr_group.attrs) {
-		ret = -ENOMEM;
-		goto err;
+	spin_lock_init(&pmu->lock);
+	hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	pmu->timer.function = i915_sample;
+	pmu->cpuhp.slot = CPUHP_INVALID;
+	init_rc6(pmu);
+
+	if (!is_igp(i915)) {
+		pmu->name = kasprintf(GFP_KERNEL,
+				      "i915_%s",
+				      dev_name(i915->drm.dev));
+		if (pmu->name) {
+			/* tools/perf reserves colons as special. */
+			strreplace((char *)pmu->name, ':', '_');
+		}
+	} else {
+		pmu->name = "i915";
 	}
-
-	i915->pmu.base.attr_groups	= i915_pmu_attr_groups;
-	i915->pmu.base.task_ctx_nr	= perf_invalid_context;
-	i915->pmu.base.event_init	= i915_pmu_event_init;
-	i915->pmu.base.add		= i915_pmu_event_add;
-	i915->pmu.base.del		= i915_pmu_event_del;
-	i915->pmu.base.start		= i915_pmu_event_start;
-	i915->pmu.base.stop		= i915_pmu_event_stop;
-	i915->pmu.base.read		= i915_pmu_event_read;
-	i915->pmu.base.event_idx	= i915_pmu_event_event_idx;
-
-	spin_lock_init(&i915->pmu.lock);
-	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	i915->pmu.timer.function = i915_sample;
-
-	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
-	if (ret)
+	if (!pmu->name)
 		goto err;
 
-	ret = i915_pmu_register_cpuhp_state(i915);
+	pmu->events_attr_group.name = "events";
+	pmu->events_attr_group.attrs = create_event_attributes(pmu);
+	if (!pmu->events_attr_group.attrs)
+		goto err_name;
+
+	pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
+					GFP_KERNEL);
+	if (!pmu->base.attr_groups)
+		goto err_attr;
+
+	pmu->base.module	= THIS_MODULE;
+	pmu->base.task_ctx_nr	= perf_invalid_context;
+	pmu->base.event_init	= i915_pmu_event_init;
+	pmu->base.add		= i915_pmu_event_add;
+	pmu->base.del		= i915_pmu_event_del;
+	pmu->base.start		= i915_pmu_event_start;
+	pmu->base.stop		= i915_pmu_event_stop;
+	pmu->base.read		= i915_pmu_event_read;
+	pmu->base.event_idx	= i915_pmu_event_event_idx;
+
+	ret = perf_pmu_register(&pmu->base, pmu->name, -1);
+	if (ret)
+		goto err_groups;
+
+	ret = i915_pmu_register_cpuhp_state(pmu);
 	if (ret)
 		goto err_unreg;
 
 	return;
 
 err_unreg:
-	perf_pmu_unregister(&i915->pmu.base);
+	perf_pmu_unregister(&pmu->base);
+err_groups:
+	kfree(pmu->base.attr_groups);
+err_attr:
+	pmu->base.event_init = NULL;
+	free_event_attributes(pmu);
+err_name:
+	if (!is_igp(i915))
+		kfree(pmu->name);
 err:
-	i915->pmu.base.event_init = NULL;
-	free_event_attributes(i915);
-	DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
+	drm_notice(&i915->drm, "Failed to register PMU!\n");
 }
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
 {
-	if (!i915->pmu.base.event_init)
+	struct i915_pmu *pmu = &i915->pmu;
+
+	if (!pmu->base.event_init)
 		return;
 
-	WARN_ON(i915->pmu.enable);
+	drm_WARN_ON(&i915->drm, pmu->enable);
 
-	hrtimer_cancel(&i915->pmu.timer);
+	hrtimer_cancel(&pmu->timer);
 
-	i915_pmu_unregister_cpuhp_state(i915);
+	i915_pmu_unregister_cpuhp_state(pmu);
 
-	perf_pmu_unregister(&i915->pmu.base);
-	i915->pmu.base.event_init = NULL;
-	free_event_attributes(i915);
+	perf_pmu_unregister(&pmu->base);
+	pmu->base.event_init = NULL;
+	kfree(pmu->base.attr_groups);
+	if (!is_igp(i915))
+		kfree(pmu->name);
+	free_event_attributes(pmu);
 }

--
Gitblit v1.6.2