~hc/RK356X_SDK_RELEASE.git

..	..	@@ -28,6 +28,7 @@
28	28	#include <linux/bitops.h>
29	29	#include <linux/device.h>
30	30	#include <linux/nospec.h>
	31	+#include <linux/static_call.h>
31	32
32	33	#include <asm/apic.h>
33	34	#include <asm/stacktrace.h>
..	..	@@ -44,12 +45,43 @@
44	45	#include "perf_event.h"
45	46
46	47	struct x86_pmu x86_pmu __read_mostly;
	48	+static struct pmu pmu;
47	49
48	50	DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
49	51	.enabled = 1,
	52	+ .pmu = &pmu,
50	53	};
51	54
	55	+DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
52	56	DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
	57	+
	58	+/*
	59	+ * This here uses DEFINE_STATIC_CALL_NULL() to get a static_call defined
	60	+ * from just a typename, as opposed to an actual function.
	61	+ */
	62	+DEFINE_STATIC_CALL_NULL(x86_pmu_handle_irq, *x86_pmu.handle_irq);
	63	+DEFINE_STATIC_CALL_NULL(x86_pmu_disable_all, *x86_pmu.disable_all);
	64	+DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all);
	65	+DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable);
	66	+DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable);
	67	+
	68	+DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
	69	+DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
	70	+DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
	71	+
	72	+DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events, *x86_pmu.schedule_events);
	73	+DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints);
	74	+DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints);
	75	+
	76	+DEFINE_STATIC_CALL_NULL(x86_pmu_start_scheduling, *x86_pmu.start_scheduling);
	77	+DEFINE_STATIC_CALL_NULL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling);
	78	+DEFINE_STATIC_CALL_NULL(x86_pmu_stop_scheduling, *x86_pmu.stop_scheduling);
	79	+
	80	+DEFINE_STATIC_CALL_NULL(x86_pmu_sched_task, *x86_pmu.sched_task);
	81	+DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
	82	+
	83	+DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
	84	+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
53	85
54	86	u64 __read_mostly hw_cache_event_ids
55	87	[PERF_COUNT_HW_CACHE_MAX]
..	..	@@ -70,11 +102,13 @@
70	102	struct hw_perf_event *hwc = &event->hw;
71	103	int shift = 64 - x86_pmu.cntval_bits;
72	104	u64 prev_raw_count, new_raw_count;
73		- int idx = hwc->idx;
74	105	u64 delta;
75	106
76		- if (idx == INTEL_PMC_IDX_FIXED_BTS)
	107	+ if (unlikely(!hwc->event_base))
77	108	return 0;
	109	+
	110	+ if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event)
	111	+ return x86_pmu.update_topdown_event(event);
78	112
79	113	/*
80	114	* Careful: an NMI might modify the previous event value.
..	..	@@ -340,10 +374,12 @@
340	374	if (!atomic_inc_not_zero(&pmc_refcount)) {
341	375	mutex_lock(&pmc_reserve_mutex);
342	376	if (atomic_read(&pmc_refcount) == 0) {
343		- if (!reserve_pmc_hardware())
	377	+ if (!reserve_pmc_hardware()) {
344	378	err = -EBUSY;
345		- else
	379	+ } else {
346	380	reserve_ds_buffers();
	381	+ reserve_lbr_buffers();
	382	+ }
347	383	}
348	384	if (!err)
349	385	atomic_inc(&pmc_refcount);
..	..	@@ -358,6 +394,7 @@
358	394	if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
359	395	release_pmc_hardware();
360	396	release_ds_buffers();
	397	+ release_lbr_buffers();
361	398	mutex_unlock(&pmc_reserve_mutex);
362	399	}
363	400	}
..	..	@@ -565,6 +602,21 @@
565	602	return -EINVAL;
566	603	}
567	604
	605	+ /* sample_regs_user never support XMM registers */
	606	+ if (unlikely(event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK))
	607	+ return -EINVAL;
	608	+ /*
	609	+ * Besides the general purpose registers, XMM registers may
	610	+ * be collected in PEBS on some platforms, e.g. Icelake
	611	+ */
	612	+ if (unlikely(event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK)) {
	613	+ if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS))
	614	+ return -EINVAL;
	615	+
	616	+ if (!event->attr.precise_ip)
	617	+ return -EINVAL;
	618	+ }
	619	+
568	620	return x86_setup_perfctr(event);
569	621	}
570	622
..	..	@@ -602,6 +654,7 @@
602	654	int idx;
603	655
604	656	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
	657	+ struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
605	658	u64 val;
606	659
607	660	if (!test_bit(idx, cpuc->active_mask))
..	..	@@ -611,6 +664,8 @@
611	664	continue;
612	665	val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
613	666	wrmsrl(x86_pmu_config_addr(idx), val);
	667	+ if (is_counter_pair(hwc))
	668	+ wrmsrl(x86_pmu_config_addr(idx + 1), 0);
614	669	}
615	670	}
616	671
..	..	@@ -641,7 +696,7 @@
641	696	cpuc->enabled = 0;
642	697	barrier();
643	698
644		- x86_pmu.disable_all();
	699	+ static_call(x86_pmu_disable_all)();
645	700	}
646	701
647	702	void x86_pmu_enable_all(int added)
..	..	@@ -659,13 +714,24 @@
659	714	}
660	715	}
661	716
662		-static struct pmu pmu;
663		-
664	717	static inline int is_x86_event(struct perf_event *event)
665	718	{
666	719	return event->pmu == &pmu;
667	720	}
668	721
	722	+struct pmu *x86_get_pmu(unsigned int cpu)
	723	+{
	724	+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
	725	+
	726	+ /*
	727	+ * All CPUs of the hybrid type have been offline.
	728	+ * The x86_get_pmu() should not be invoked.
	729	+ */
	730	+ if (WARN_ON_ONCE(!cpuc->pmu))
	731	+ return &pmu;
	732	+
	733	+ return cpuc->pmu;
	734	+}
669	735	/*
670	736	* Event scheduler state:
671	737	*
..	..	@@ -679,7 +745,7 @@
679	745	int counter; /* counter index */
680	746	int unassigned; /* number of events to be assigned left */
681	747	int nr_gp; /* number of GP counters used */
682		- unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
	748	+ u64 used;
683	749	};
684	750
685	751	/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
..	..	@@ -736,8 +802,12 @@
736	802	sched->saved_states--;
737	803	sched->state = sched->saved[sched->saved_states];
738	804
739		- /* continue with next counter: */
740		- clear_bit(sched->state.counter++, sched->state.used);
	805	+ /* this assignment didn't work out */
	806	+ /* XXX broken vs EVENT_PAIR */
	807	+ sched->state.used &= ~BIT_ULL(sched->state.counter);
	808	+
	809	+ /* try the next one */
	810	+ sched->state.counter++;
741	811
742	812	return true;
743	813	}
..	..	@@ -762,20 +832,32 @@
762	832	if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
763	833	idx = INTEL_PMC_IDX_FIXED;
764	834	for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
765		- if (!__test_and_set_bit(idx, sched->state.used))
766		- goto done;
	835	+ u64 mask = BIT_ULL(idx);
	836	+
	837	+ if (sched->state.used & mask)
	838	+ continue;
	839	+
	840	+ sched->state.used \|= mask;
	841	+ goto done;
767	842	}
768	843	}
769	844
770	845	/* Grab the first unused counter starting with idx */
771	846	idx = sched->state.counter;
772	847	for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
773		- if (!__test_and_set_bit(idx, sched->state.used)) {
774		- if (sched->state.nr_gp++ >= sched->max_gp)
775		- return false;
	848	+ u64 mask = BIT_ULL(idx);
776	849
777		- goto done;
778		- }
	850	+ if (c->flags & PERF_X86_EVENT_PAIR)
	851	+ mask \|= mask << 1;
	852	+
	853	+ if (sched->state.used & mask)
	854	+ continue;
	855	+
	856	+ if (sched->state.nr_gp++ >= sched->max_gp)
	857	+ return false;
	858	+
	859	+ sched->state.used \|= mask;
	860	+ goto done;
779	861	}
780	862
781	863	return false;
..	..	@@ -852,20 +934,42 @@
852	934	int x86_schedule_events(struct cpu_hw_events cpuc, int n, int assign)
853	935	{
854	936	struct event_constraint *c;
855		- unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
856	937	struct perf_event *e;
857		- int i, wmin, wmax, unsched = 0;
	938	+ int n0, i, wmin, wmax, unsched = 0;
858	939	struct hw_perf_event *hwc;
	940	+ u64 used_mask = 0;
859	941
860		- bitmap_zero(used_mask, X86_PMC_IDX_MAX);
	942	+ /*
	943	+ * Compute the number of events already present; see x86_pmu_add(),
	944	+ * validate_group() and x86_pmu_commit_txn(). For the former two
	945	+ * cpuc->n_events hasn't been updated yet, while for the latter
	946	+ * cpuc->n_txn contains the number of events added in the current
	947	+ * transaction.
	948	+ */
	949	+ n0 = cpuc->n_events;
	950	+ if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
	951	+ n0 -= cpuc->n_txn;
861	952
862		- if (x86_pmu.start_scheduling)
863		- x86_pmu.start_scheduling(cpuc);
	953	+ static_call_cond(x86_pmu_start_scheduling)(cpuc);
864	954
865	955	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
866		- cpuc->event_constraint[i] = NULL;
867		- c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
868		- cpuc->event_constraint[i] = c;
	956	+ c = cpuc->event_constraint[i];
	957	+
	958	+ /*
	959	+ * Previously scheduled events should have a cached constraint,
	960	+ * while new events should not have one.
	961	+ */
	962	+ WARN_ON_ONCE((c && i >= n0) \|\| (!c && i < n0));
	963	+
	964	+ /*
	965	+ * Request constraints for new events; or for those events that
	966	+ * have a dynamic constraint -- for those the constraint can
	967	+ * change due to external factors (sibling state, allow_tfa).
	968	+ */
	969	+ if (!c \|\| (c->flags & PERF_X86_EVENT_DYNAMIC)) {
	970	+ c = static_call(x86_pmu_get_event_constraints)(cpuc, i, cpuc->event_list[i]);
	971	+ cpuc->event_constraint[i] = c;
	972	+ }
869	973
870	974	wmin = min(wmin, c->weight);
871	975	wmax = max(wmax, c->weight);
..	..	@@ -875,6 +979,8 @@
875	979	* fastpath, try to reuse previous register
876	980	*/
877	981	for (i = 0; i < n; i++) {
	982	+ u64 mask;
	983	+
878	984	hwc = &cpuc->event_list[i]->hw;
879	985	c = cpuc->event_constraint[i];
880	986
..	..	@@ -886,11 +992,16 @@
886	992	if (!test_bit(hwc->idx, c->idxmsk))
887	993	break;
888	994
	995	+ mask = BIT_ULL(hwc->idx);
	996	+ if (is_counter_pair(hwc))
	997	+ mask \|= mask << 1;
	998	+
889	999	/* not already used */
890		- if (test_bit(hwc->idx, used_mask))
	1000	+ if (used_mask & mask)
891	1001	break;
892	1002
893		- __set_bit(hwc->idx, used_mask);
	1003	+ used_mask \|= mask;
	1004	+
894	1005	if (assign)
895	1006	assign[i] = hwc->idx;
896	1007	}
..	..	@@ -913,6 +1024,15 @@
913	1024	READ_ONCE(cpuc->excl_cntrs->exclusive_present))
914	1025	gpmax /= 2;
915	1026
	1027	+ /*
	1028	+ * Reduce the amount of available counters to allow fitting
	1029	+ * the extra Merge events needed by large increment events.
	1030	+ */
	1031	+ if (x86_pmu.flags & PMU_FL_PAIR) {
	1032	+ gpmax = x86_pmu.num_counters - cpuc->n_pair;
	1033	+ WARN_ON(gpmax <= 0);
	1034	+ }
	1035	+
916	1036	unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
917	1037	wmax, gpmax, assign);
918	1038	}
..	..	@@ -930,32 +1050,63 @@
930	1050	if (!unsched && assign) {
931	1051	for (i = 0; i < n; i++) {
932	1052	e = cpuc->event_list[i];
933		- e->hw.flags \|= PERF_X86_EVENT_COMMITTED;
934		- if (x86_pmu.commit_scheduling)
935		- x86_pmu.commit_scheduling(cpuc, i, assign[i]);
	1053	+ static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]);
936	1054	}
937	1055	} else {
938		- for (i = 0; i < n; i++) {
	1056	+ for (i = n0; i < n; i++) {
939	1057	e = cpuc->event_list[i];
940		- /*
941		- * do not put_constraint() on comitted events,
942		- * because they are good to go
943		- */
944		- if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
945		- continue;
946	1058
947	1059	/*
948	1060	* release events that failed scheduling
949	1061	*/
950		- if (x86_pmu.put_event_constraints)
951		- x86_pmu.put_event_constraints(cpuc, e);
	1062	+ static_call_cond(x86_pmu_put_event_constraints)(cpuc, e);
	1063	+
	1064	+ cpuc->event_constraint[i] = NULL;
952	1065	}
953	1066	}
954	1067
955		- if (x86_pmu.stop_scheduling)
956		- x86_pmu.stop_scheduling(cpuc);
	1068	+ static_call_cond(x86_pmu_stop_scheduling)(cpuc);
957	1069
958	1070	return unsched ? -EINVAL : 0;
	1071	+}
	1072	+
	1073	+static int add_nr_metric_event(struct cpu_hw_events *cpuc,
	1074	+ struct perf_event *event)
	1075	+{
	1076	+ if (is_metric_event(event)) {
	1077	+ if (cpuc->n_metric == INTEL_TD_METRIC_NUM)
	1078	+ return -EINVAL;
	1079	+ cpuc->n_metric++;
	1080	+ cpuc->n_txn_metric++;
	1081	+ }
	1082	+
	1083	+ return 0;
	1084	+}
	1085	+
	1086	+static void del_nr_metric_event(struct cpu_hw_events *cpuc,
	1087	+ struct perf_event *event)
	1088	+{
	1089	+ if (is_metric_event(event))
	1090	+ cpuc->n_metric--;
	1091	+}
	1092	+
	1093	+static int collect_event(struct cpu_hw_events cpuc, struct perf_event event,
	1094	+ int max_count, int n)
	1095	+{
	1096	+
	1097	+ if (x86_pmu.intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
	1098	+ return -EINVAL;
	1099	+
	1100	+ if (n >= max_count + cpuc->n_metric)
	1101	+ return -EINVAL;
	1102	+
	1103	+ cpuc->event_list[n] = event;
	1104	+ if (is_counter_pair(&event->hw)) {
	1105	+ cpuc->n_pair++;
	1106	+ cpuc->n_txn_pair++;
	1107	+ }
	1108	+
	1109	+ return 0;
959	1110	}
960	1111
961	1112	/*
..	..	@@ -971,25 +1122,44 @@
971	1122
972	1123	/* current number of events already accepted */
973	1124	n = cpuc->n_events;
	1125	+ if (!cpuc->n_events)
	1126	+ cpuc->pebs_output = 0;
	1127	+
	1128	+ if (!cpuc->is_fake && leader->attr.precise_ip) {
	1129	+ /*
	1130	+ * For PEBS->PT, if !aux_event, the group leader (PT) went
	1131	+ * away, the group was broken down and this singleton event
	1132	+ * can't schedule any more.
	1133	+ */
	1134	+ if (is_pebs_pt(leader) && !leader->aux_event)
	1135	+ return -EINVAL;
	1136	+
	1137	+ /*
	1138	+ * pebs_output: 0: no PEBS so far, 1: PT, 2: DS
	1139	+ */
	1140	+ if (cpuc->pebs_output &&
	1141	+ cpuc->pebs_output != is_pebs_pt(leader) + 1)
	1142	+ return -EINVAL;
	1143	+
	1144	+ cpuc->pebs_output = is_pebs_pt(leader) + 1;
	1145	+ }
974	1146
975	1147	if (is_x86_event(leader)) {
976		- if (n >= max_count)
	1148	+ if (collect_event(cpuc, leader, max_count, n))
977	1149	return -EINVAL;
978		- cpuc->event_list[n] = leader;
979	1150	n++;
980	1151	}
	1152	+
981	1153	if (!dogrp)
982	1154	return n;
983	1155
984	1156	for_each_sibling_event(event, leader) {
985		- if (!is_x86_event(event) \|\|
986		- event->state <= PERF_EVENT_STATE_OFF)
	1157	+ if (!is_x86_event(event) \|\| event->state <= PERF_EVENT_STATE_OFF)
987	1158	continue;
988	1159
989		- if (n >= max_count)
	1160	+ if (collect_event(cpuc, event, max_count, n))
990	1161	return -EINVAL;
991	1162
992		- cpuc->event_list[n] = event;
993	1163	n++;
994	1164	}
995	1165	return n;
..	..	@@ -999,23 +1169,58 @@
999	1169	struct cpu_hw_events *cpuc, int i)
1000	1170	{
1001	1171	struct hw_perf_event *hwc = &event->hw;
	1172	+ int idx;
1002	1173
1003		- hwc->idx = cpuc->assign[i];
	1174	+ idx = hwc->idx = cpuc->assign[i];
1004	1175	hwc->last_cpu = smp_processor_id();
1005	1176	hwc->last_tag = ++cpuc->tags[i];
1006	1177
1007		- if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
	1178	+ switch (hwc->idx) {
	1179	+ case INTEL_PMC_IDX_FIXED_BTS:
	1180	+ case INTEL_PMC_IDX_FIXED_VLBR:
1008	1181	hwc->config_base = 0;
1009	1182	hwc->event_base = 0;
1010		- } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
	1183	+ break;
	1184	+
	1185	+ case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
	1186	+ /* All the metric events are mapped onto the fixed counter 3. */
	1187	+ idx = INTEL_PMC_IDX_FIXED_SLOTS;
	1188	+ /* fall through */
	1189	+ case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
1011	1190	hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1012		- hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
1013		- hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) \| 1<<30;
1014		- } else {
	1191	+ hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
	1192	+ (idx - INTEL_PMC_IDX_FIXED);
	1193	+ hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) \|
	1194	+ INTEL_PMC_FIXED_RDPMC_BASE;
	1195	+ break;
	1196	+
	1197	+ default:
1015	1198	hwc->config_base = x86_pmu_config_addr(hwc->idx);
1016	1199	hwc->event_base = x86_pmu_event_addr(hwc->idx);
1017	1200	hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
	1201	+ break;
1018	1202	}
	1203	+}
	1204	+
	1205	+/**
	1206	+ * x86_perf_rdpmc_index - Return PMC counter used for event
	1207	+ * @event: the perf_event to which the PMC counter was assigned
	1208	+ *
	1209	+ * The counter assigned to this performance event may change if interrupts
	1210	+ * are enabled. This counter should thus never be used while interrupts are
	1211	+ * enabled. Before this function is used to obtain the assigned counter the
	1212	+ * event should be checked for validity using, for example,
	1213	+ * perf_event_read_local(), within the same interrupt disabled section in
	1214	+ * which this counter is planned to be used.
	1215	+ *
	1216	+ * Return: The index of the performance monitoring counter assigned to
	1217	+ * @perf_event.
	1218	+ */
	1219	+int x86_perf_rdpmc_index(struct perf_event *event)
	1220	+{
	1221	+ lockdep_assert_irqs_disabled();
	1222	+
	1223	+ return event->hw.event_base_rdpmc;
1019	1224	}
1020	1225
1021	1226	static inline int match_prev_assignment(struct hw_perf_event *hwc,
..	..	@@ -1098,7 +1303,7 @@
1098	1303	cpuc->enabled = 1;
1099	1304	barrier();
1100	1305
1101		- x86_pmu.enable_all(added);
	1306	+ static_call(x86_pmu_enable_all)(added);
1102	1307	}
1103	1308
1104	1309	static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
..	..	@@ -1114,8 +1319,12 @@
1114	1319	s64 period = hwc->sample_period;
1115	1320	int ret = 0, idx = hwc->idx;
1116	1321
1117		- if (idx == INTEL_PMC_IDX_FIXED_BTS)
	1322	+ if (unlikely(!hwc->event_base))
1118	1323	return 0;
	1324	+
	1325	+ if (unlikely(is_topdown_count(event)) &&
	1326	+ x86_pmu.set_topdown_event_period)
	1327	+ return x86_pmu.set_topdown_event_period(event);
1119	1328
1120	1329	/*
1121	1330	* If we are way outside a reasonable range then just skip forward:
..	..	@@ -1156,6 +1365,13 @@
1156	1365	wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
1157	1366
1158	1367	/*
	1368	+ * Sign extend the Merge event counter's upper 16 bits since
	1369	+ * we currently declare a 48-bit counter width
	1370	+ */
	1371	+ if (is_counter_pair(hwc))
	1372	+ wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff);
	1373	+
	1374	+ /*
1159	1375	* Due to erratum on certan cpu we need
1160	1376	* a second write to be sure the register
1161	1377	* is updated properly
..	..	@@ -1181,7 +1397,7 @@
1181	1397	* Add a single event to the PMU.
1182	1398	*
1183	1399	* The event is added to the group of enabled events
1184		- * but only if it can be scehduled with existing events.
	1400	+ * but only if it can be scheduled with existing events.
1185	1401	*/
1186	1402	static int x86_pmu_add(struct perf_event *event, int flags)
1187	1403	{
..	..	@@ -1212,7 +1428,7 @@
1212	1428	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
1213	1429	goto done_collect;
1214	1430
1215		- ret = x86_pmu.schedule_events(cpuc, n, assign);
	1431	+ ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
1216	1432	if (ret)
1217	1433	goto out;
1218	1434	/*
..	..	@@ -1230,13 +1446,11 @@
1230	1446	cpuc->n_added += n - n0;
1231	1447	cpuc->n_txn += n - n0;
1232	1448
1233		- if (x86_pmu.add) {
1234		- /*
1235		- * This is before x86_pmu_enable() will call x86_pmu_start(),
1236		- * so we enable LBRs before an event needs them etc..
1237		- */
1238		- x86_pmu.add(event);
1239		- }
	1449	+ /*
	1450	+ * This is before x86_pmu_enable() will call x86_pmu_start(),
	1451	+ * so we enable LBRs before an event needs them etc..
	1452	+ */
	1453	+ static_call_cond(x86_pmu_add)(event);
1240	1454
1241	1455	ret = 0;
1242	1456	out:
..	..	@@ -1264,7 +1478,7 @@
1264	1478	cpuc->events[idx] = event;
1265	1479	__set_bit(idx, cpuc->active_mask);
1266	1480	__set_bit(idx, cpuc->running);
1267		- x86_pmu.enable(event);
	1481	+ static_call(x86_pmu_enable)(event);
1268	1482	perf_event_update_userpage(event);
1269	1483	}
1270	1484
..	..	@@ -1334,7 +1548,7 @@
1334	1548	struct hw_perf_event *hwc = &event->hw;
1335	1549
1336	1550	if (test_bit(hwc->idx, cpuc->active_mask)) {
1337		- x86_pmu.disable(event);
	1551	+ static_call(x86_pmu_disable)(event);
1338	1552	__clear_bit(hwc->idx, cpuc->active_mask);
1339	1553	cpuc->events[hwc->idx] = NULL;
1340	1554	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
..	..	@@ -1355,11 +1569,6 @@
1355	1569	{
1356	1570	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1357	1571	int i;
1358		-
1359		- /*
1360		- * event is descheduled
1361		- */
1362		- event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
1363	1572
1364	1573	/*
1365	1574	* If we're called during a txn, we only need to undo x86_pmu.add.
..	..	@@ -1389,26 +1598,27 @@
1389	1598	if (i >= cpuc->n_events - cpuc->n_added)
1390	1599	--cpuc->n_added;
1391	1600
1392		- if (x86_pmu.put_event_constraints)
1393		- x86_pmu.put_event_constraints(cpuc, event);
	1601	+ static_call_cond(x86_pmu_put_event_constraints)(cpuc, event);
1394	1602
1395	1603	/* Delete the array entry. */
1396	1604	while (++i < cpuc->n_events) {
1397	1605	cpuc->event_list[i-1] = cpuc->event_list[i];
1398	1606	cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
1399	1607	}
	1608	+ cpuc->event_constraint[i-1] = NULL;
1400	1609	--cpuc->n_events;
	1610	+ if (x86_pmu.intel_cap.perf_metrics)
	1611	+ del_nr_metric_event(cpuc, event);
1401	1612
1402	1613	perf_event_update_userpage(event);
1403	1614
1404	1615	do_del:
1405		- if (x86_pmu.del) {
1406		- /*
1407		- * This is after x86_pmu_stop(); so we disable LBRs after any
1408		- * event can need them etc..
1409		- */
1410		- x86_pmu.del(event);
1411		- }
	1616	+
	1617	+ /*
	1618	+ * This is after x86_pmu_stop(); so we disable LBRs after any
	1619	+ * event can need them etc..
	1620	+ */
	1621	+ static_call_cond(x86_pmu_del)(event);
1412	1622	}
1413	1623
1414	1624	int x86_pmu_handle_irq(struct pt_regs *regs)
..	..	@@ -1486,7 +1696,7 @@
1486	1696	return NMI_DONE;
1487	1697
1488	1698	start_clock = sched_clock();
1489		- ret = x86_pmu.handle_irq(regs);
	1699	+ ret = static_call(x86_pmu_handle_irq)(regs);
1490	1700	finish_clock = sched_clock();
1491	1701
1492	1702	perf_sample_event_took(finish_clock - start_clock);
..	..	@@ -1562,78 +1772,19 @@
1562	1772
1563	1773	}
1564	1774
1565		-static struct attribute_group x86_pmu_format_group = {
	1775	+static struct attribute_group x86_pmu_format_group __ro_after_init = {
1566	1776	.name = "format",
1567	1777	.attrs = NULL,
1568	1778	};
1569	1779
1570		-/*
1571		- * Remove all undefined events (x86_pmu.event_map(id) == 0)
1572		- * out of events_attr attributes.
1573		- */
1574		-static void __init filter_events(struct attribute **attrs)
1575		-{
1576		- struct device_attribute *d;
1577		- struct perf_pmu_events_attr *pmu_attr;
1578		- int offset = 0;
1579		- int i, j;
1580		-
1581		- for (i = 0; attrs[i]; i++) {
1582		- d = (struct device_attribute *)attrs[i];
1583		- pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
1584		- /* str trumps id */
1585		- if (pmu_attr->event_str)
1586		- continue;
1587		- if (x86_pmu.event_map(i + offset))
1588		- continue;
1589		-
1590		- for (j = i; attrs[j]; j++)
1591		- attrs[j] = attrs[j + 1];
1592		-
1593		- /* Check the shifted attr. */
1594		- i--;
1595		-
1596		- /*
1597		- * event_map() is index based, the attrs array is organized
1598		- * by increasing event index. If we shift the events, then
1599		- * we need to compensate for the event_map(), otherwise
1600		- * we are looking up the wrong event in the map
1601		- */
1602		- offset++;
1603		- }
1604		-}
1605		-
1606		-/* Merge two pointer arrays */
1607		-__init struct attribute merge_attr(struct attribute a, struct attribute **b)
1608		-{
1609		- struct attribute **new;
1610		- int j, i;
1611		-
1612		- for (j = 0; a[j]; j++)
1613		- ;
1614		- for (i = 0; b[i]; i++)
1615		- j++;
1616		- j++;
1617		-
1618		- new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL);
1619		- if (!new)
1620		- return NULL;
1621		-
1622		- j = 0;
1623		- for (i = 0; a[i]; i++)
1624		- new[j++] = a[i];
1625		- for (i = 0; b[i]; i++)
1626		- new[j++] = b[i];
1627		- new[j] = NULL;
1628		-
1629		- return new;
1630		-}
1631		-
1632	1780	ssize_t events_sysfs_show(struct device dev, struct device_attribute attr, char *page)
1633	1781	{
1634		- struct perf_pmu_events_attr *pmu_attr = \
	1782	+ struct perf_pmu_events_attr *pmu_attr =
1635	1783	container_of(attr, struct perf_pmu_events_attr, attr);
1636		- u64 config = x86_pmu.event_map(pmu_attr->id);
	1784	+ u64 config = 0;
	1785	+
	1786	+ if (pmu_attr->id < x86_pmu.max_events)
	1787	+ config = x86_pmu.event_map(pmu_attr->id);
1637	1788
1638	1789	/* string trumps id */
1639	1790	if (pmu_attr->event_str)
..	..	@@ -1693,9 +1844,27 @@
1693	1844	NULL,
1694	1845	};
1695	1846
1696		-static struct attribute_group x86_pmu_events_group = {
	1847	+/*
	1848	+ * Remove all undefined events (x86_pmu.event_map(id) == 0)
	1849	+ * out of events_attr attributes.
	1850	+ */
	1851	+static umode_t
	1852	+is_visible(struct kobject kobj, struct attribute attr, int idx)
	1853	+{
	1854	+ struct perf_pmu_events_attr *pmu_attr;
	1855	+
	1856	+ if (idx >= x86_pmu.max_events)
	1857	+ return 0;
	1858	+
	1859	+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
	1860	+ /* str trumps id */
	1861	+ return pmu_attr->event_str \|\| x86_pmu.event_map(idx) ? attr->mode : 0;
	1862	+}
	1863	+
	1864	+static struct attribute_group x86_pmu_events_group __ro_after_init = {
1697	1865	.name = "events",
1698	1866	.attrs = events_attr,
	1867	+ .is_visible = is_visible,
1699	1868	};
1700	1869
1701	1870	ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
..	..	@@ -1740,6 +1909,38 @@
1740	1909	static struct attribute_group x86_pmu_attr_group;
1741	1910	static struct attribute_group x86_pmu_caps_group;
1742	1911
	1912	+static void x86_pmu_static_call_update(void)
	1913	+{
	1914	+ static_call_update(x86_pmu_handle_irq, x86_pmu.handle_irq);
	1915	+ static_call_update(x86_pmu_disable_all, x86_pmu.disable_all);
	1916	+ static_call_update(x86_pmu_enable_all, x86_pmu.enable_all);
	1917	+ static_call_update(x86_pmu_enable, x86_pmu.enable);
	1918	+ static_call_update(x86_pmu_disable, x86_pmu.disable);
	1919	+
	1920	+ static_call_update(x86_pmu_add, x86_pmu.add);
	1921	+ static_call_update(x86_pmu_del, x86_pmu.del);
	1922	+ static_call_update(x86_pmu_read, x86_pmu.read);
	1923	+
	1924	+ static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events);
	1925	+ static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints);
	1926	+ static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints);
	1927	+
	1928	+ static_call_update(x86_pmu_start_scheduling, x86_pmu.start_scheduling);
	1929	+ static_call_update(x86_pmu_commit_scheduling, x86_pmu.commit_scheduling);
	1930	+ static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling);
	1931	+
	1932	+ static_call_update(x86_pmu_sched_task, x86_pmu.sched_task);
	1933	+ static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx);
	1934	+
	1935	+ static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs);
	1936	+ static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
	1937	+}
	1938	+
	1939	+static void _x86_pmu_read(struct perf_event *event)
	1940	+{
	1941	+ x86_perf_event_update(event);
	1942	+}
	1943	+
1743	1944	static int __init init_hw_perf_events(void)
1744	1945	{
1745	1946	struct x86_pmu_quirk *quirk;
..	..	@@ -1753,6 +1954,14 @@
1753	1954	break;
1754	1955	case X86_VENDOR_AMD:
1755	1956	err = amd_pmu_init();
	1957	+ break;
	1958	+ case X86_VENDOR_HYGON:
	1959	+ err = amd_pmu_init();
	1960	+ x86_pmu.name = "HYGON";
	1961	+ break;
	1962	+ case X86_VENDOR_ZHAOXIN:
	1963	+ case X86_VENDOR_CENTAUR:
	1964	+ err = zhaoxin_pmu_init();
1756	1965	break;
1757	1966	default:
1758	1967	err = -ENOTSUPP;
..	..	@@ -1787,37 +1996,10 @@
1787	1996
1788	1997	x86_pmu_format_group.attrs = x86_pmu.format_attrs;
1789	1998
1790		- if (x86_pmu.caps_attrs) {
1791		- struct attribute **tmp;
1792		-
1793		- tmp = merge_attr(x86_pmu_caps_group.attrs, x86_pmu.caps_attrs);
1794		- if (!WARN_ON(!tmp))
1795		- x86_pmu_caps_group.attrs = tmp;
1796		- }
1797		-
1798		- if (x86_pmu.event_attrs)
1799		- x86_pmu_events_group.attrs = x86_pmu.event_attrs;
1800		-
1801	1999	if (!x86_pmu.events_sysfs_show)
1802	2000	x86_pmu_events_group.attrs = &empty_attrs;
1803		- else
1804		- filter_events(x86_pmu_events_group.attrs);
1805	2001
1806		- if (x86_pmu.cpu_events) {
1807		- struct attribute **tmp;
1808		-
1809		- tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
1810		- if (!WARN_ON(!tmp))
1811		- x86_pmu_events_group.attrs = tmp;
1812		- }
1813		-
1814		- if (x86_pmu.attrs) {
1815		- struct attribute **tmp;
1816		-
1817		- tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs);
1818		- if (!WARN_ON(!tmp))
1819		- x86_pmu_attr_group.attrs = tmp;
1820		- }
	2002	+ pmu.attr_update = x86_pmu.attr_update;
1821	2003
1822	2004	pr_info("... version: %d\n", x86_pmu.version);
1823	2005	pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
..	..	@@ -1826,6 +2008,11 @@
1826	2008	pr_info("... max period: %016Lx\n", x86_pmu.max_period);
1827	2009	pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1828	2010	pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
	2011	+
	2012	+ if (!x86_pmu.read)
	2013	+ x86_pmu.read = _x86_pmu_read;
	2014	+
	2015	+ x86_pmu_static_call_update();
1829	2016
1830	2017	/*
1831	2018	* Install callbacks. Core will call them for each online
..	..	@@ -1863,11 +2050,9 @@
1863	2050	}
1864	2051	early_initcall(init_hw_perf_events);
1865	2052
1866		-static inline void x86_pmu_read(struct perf_event *event)
	2053	+static void x86_pmu_read(struct perf_event *event)
1867	2054	{
1868		- if (x86_pmu.read)
1869		- return x86_pmu.read(event);
1870		- x86_perf_event_update(event);
	2055	+ static_call(x86_pmu_read)(event);
1871	2056	}
1872	2057
1873	2058	/*
..	..	@@ -1891,6 +2076,8 @@
1891	2076
1892	2077	perf_pmu_disable(pmu);
1893	2078	__this_cpu_write(cpu_hw_events.n_txn, 0);
	2079	+ __this_cpu_write(cpu_hw_events.n_txn_pair, 0);
	2080	+ __this_cpu_write(cpu_hw_events.n_txn_metric, 0);
1894	2081	}
1895	2082
1896	2083	/*
..	..	@@ -1916,6 +2103,8 @@
1916	2103	*/
1917	2104	__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
1918	2105	__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
	2106	+ __this_cpu_sub(cpu_hw_events.n_pair, __this_cpu_read(cpu_hw_events.n_txn_pair));
	2107	+ __this_cpu_sub(cpu_hw_events.n_metric, __this_cpu_read(cpu_hw_events.n_txn_metric));
1919	2108	perf_pmu_enable(pmu);
1920	2109	}
1921	2110
..	..	@@ -1944,7 +2133,7 @@
1944	2133	if (!x86_pmu_initialized())
1945	2134	return -EAGAIN;
1946	2135
1947		- ret = x86_pmu.schedule_events(cpuc, n, assign);
	2136	+ ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
1948	2137	if (ret)
1949	2138	return ret;
1950	2139
..	..	@@ -2004,7 +2193,7 @@
2004	2193	if (IS_ERR(fake_cpuc))
2005	2194	return PTR_ERR(fake_cpuc);
2006	2195
2007		- c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
	2196	+ c = x86_pmu.get_event_constraints(fake_cpuc, 0, event);
2008	2197
2009	2198	if (!c \|\| !c->weight)
2010	2199	ret = -EINVAL;
..	..	@@ -2052,8 +2241,7 @@
2052	2241	if (n < 0)
2053	2242	goto out;
2054	2243
2055		- fake_cpuc->n_events = n;
2056		-
	2244	+ fake_cpuc->n_events = 0;
2057	2245	ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
2058	2246
2059	2247	out:
..	..	@@ -2106,11 +2294,6 @@
2106	2294	return err;
2107	2295	}
2108	2296
2109		-static void refresh_pce(void *ignored)
2110		-{
2111		- load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
2112		-}
2113		-
2114	2297	static void x86_pmu_event_mapped(struct perf_event event, struct mm_struct mm)
2115	2298	{
2116	2299	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
..	..	@@ -2123,13 +2306,13 @@
2123	2306	* userspace with CR4.PCE clear while another task is still
2124	2307	* doing on_each_cpu_mask() to propagate CR4.PCE.
2125	2308	*
2126		- * For now, this can't happen because all callers hold mmap_sem
	2309	+ * For now, this can't happen because all callers hold mmap_lock
2127	2310	* for write. If this changes, we'll need a different solution.
2128	2311	*/
2129		- lockdep_assert_held_exclusive(&mm->mmap_sem);
	2312	+ mmap_assert_write_locked(mm);
2130	2313
2131	2314	if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
2132		- on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
	2315	+ on_each_cpu_mask(mm_cpumask(mm), cr4_update_pce, NULL, 1);
2133	2316	}
2134	2317
2135	2318	static void x86_pmu_event_unmapped(struct perf_event event, struct mm_struct mm)
..	..	@@ -2139,22 +2322,20 @@
2139	2322	return;
2140	2323
2141	2324	if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
2142		- on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
	2325	+ on_each_cpu_mask(mm_cpumask(mm), cr4_update_pce, NULL, 1);
2143	2326	}
2144	2327
2145	2328	static int x86_pmu_event_idx(struct perf_event *event)
2146	2329	{
2147		- int idx = event->hw.idx;
	2330	+ struct hw_perf_event *hwc = &event->hw;
2148	2331
2149		- if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
	2332	+ if (!(hwc->flags & PERF_X86_EVENT_RDPMC_ALLOWED))
2150	2333	return 0;
2151	2334
2152		- if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
2153		- idx -= INTEL_PMC_IDX_FIXED;
2154		- idx \|= 1 << 30;
2155		- }
2156		-
2157		- return idx + 1;
	2335	+ if (is_metric_idx(hwc->idx))
	2336	+ return INTEL_PMC_FIXED_RDPMC_METRICS + 1;
	2337	+ else
	2338	+ return hwc->event_base_rdpmc + 1;
2158	2339	}
2159	2340
2160	2341	static ssize_t get_attr_rdpmc(struct device *cdev,
..	..	@@ -2181,20 +2362,25 @@
2181	2362	if (x86_pmu.attr_rdpmc_broken)
2182	2363	return -ENOTSUPP;
2183	2364
2184		- if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
	2365	+ if (val != x86_pmu.attr_rdpmc) {
2185	2366	/*
2186		- * Changing into or out of always available, aka
2187		- * perf-event-bypassing mode. This path is extremely slow,
	2367	+ * Changing into or out of never available or always available,
	2368	+ * aka perf-event-bypassing mode. This path is extremely slow,
2188	2369	* but only root can trigger it, so it's okay.
2189	2370	*/
	2371	+ if (val == 0)
	2372	+ static_branch_inc(&rdpmc_never_available_key);
	2373	+ else if (x86_pmu.attr_rdpmc == 0)
	2374	+ static_branch_dec(&rdpmc_never_available_key);
	2375	+
2190	2376	if (val == 2)
2191	2377	static_branch_inc(&rdpmc_always_available_key);
2192		- else
	2378	+ else if (x86_pmu.attr_rdpmc == 2)
2193	2379	static_branch_dec(&rdpmc_always_available_key);
2194		- on_each_cpu(refresh_pce, NULL, 1);
2195		- }
2196	2380
2197		- x86_pmu.attr_rdpmc = val;
	2381	+ on_each_cpu(cr4_update_pce, NULL, 1);
	2382	+ x86_pmu.attr_rdpmc = val;
	2383	+ }
2198	2384
2199	2385	return count;
2200	2386	}
..	..	@@ -2206,7 +2392,7 @@
2206	2392	NULL,
2207	2393	};
2208	2394
2209		-static struct attribute_group x86_pmu_attr_group = {
	2395	+static struct attribute_group x86_pmu_attr_group __ro_after_init = {
2210	2396	.attrs = x86_pmu_attrs,
2211	2397	};
2212	2398
..	..	@@ -2224,7 +2410,7 @@
2224	2410	NULL
2225	2411	};
2226	2412
2227		-static struct attribute_group x86_pmu_caps_group = {
	2413	+static struct attribute_group x86_pmu_caps_group __ro_after_init = {
2228	2414	.name = "caps",
2229	2415	.attrs = x86_pmu_caps_attrs,
2230	2416	};
..	..	@@ -2239,8 +2425,13 @@
2239	2425
2240	2426	static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
2241	2427	{
2242		- if (x86_pmu.sched_task)
2243		- x86_pmu.sched_task(ctx, sched_in);
	2428	+ static_call_cond(x86_pmu_sched_task)(ctx, sched_in);
	2429	+}
	2430	+
	2431	+static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
	2432	+ struct perf_event_context *next)
	2433	+{
	2434	+ static_call_cond(x86_pmu_swap_task_ctx)(prev, next);
2244	2435	}
2245	2436
2246	2437	void perf_check_microcode(void)
..	..	@@ -2258,6 +2449,17 @@
2258	2449	if (x86_pmu.limit_period(event, value) > value)
2259	2450	return -EINVAL;
2260	2451	}
	2452	+
	2453	+ return 0;
	2454	+}
	2455	+
	2456	+static int x86_pmu_aux_output_match(struct perf_event *event)
	2457	+{
	2458	+ if (!(pmu.capabilities & PERF_PMU_CAP_AUX_OUTPUT))
	2459	+ return 0;
	2460	+
	2461	+ if (x86_pmu.aux_output_match)
	2462	+ return x86_pmu.aux_output_match(event);
2261	2463
2262	2464	return 0;
2263	2465	}
..	..	@@ -2285,8 +2487,10 @@
2285	2487
2286	2488	.event_idx = x86_pmu_event_idx,
2287	2489	.sched_task = x86_pmu_sched_task,
2288		- .task_ctx_size = sizeof(struct x86_perf_task_context),
	2490	+ .swap_task_ctx = x86_pmu_swap_task_ctx,
2289	2491	.check_period = x86_pmu_check_period,
	2492	+
	2493	+ .aux_output_match = x86_pmu_aux_output_match,
2290	2494	};
2291	2495
2292	2496	void arch_perf_update_userpage(struct perf_event *event,
..	..	@@ -2329,13 +2533,23 @@
2329	2533	cyc2ns_read_end();
2330	2534	}
2331	2535
	2536	+/*
	2537	+ * Determine whether the regs were taken from an irq/exception handler rather
	2538	+ * than from perf_arch_fetch_caller_regs().
	2539	+ */
	2540	+static bool perf_hw_regs(struct pt_regs *regs)
	2541	+{
	2542	+ return regs->flags & X86_EFLAGS_FIXED;
	2543	+}
	2544	+
2332	2545	void
2333	2546	perf_callchain_kernel(struct perf_callchain_entry_ctx entry, struct pt_regs regs)
2334	2547	{
	2548	+ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
2335	2549	struct unwind_state state;
2336	2550	unsigned long addr;
2337	2551
2338		- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
	2552	+ if (guest_cbs && guest_cbs->is_in_guest()) {
2339	2553	/* TODO: We don't support guest os callchain now */
2340	2554	return;
2341	2555	}
..	..	@@ -2343,8 +2557,12 @@
2343	2557	if (perf_callchain_store(entry, regs->ip))
2344	2558	return;
2345	2559
2346		- for (unwind_start(&state, current, regs, NULL); !unwind_done(&state);
2347		- unwind_next_frame(&state)) {
	2560	+ if (perf_hw_regs(regs))
	2561	+ unwind_start(&state, current, regs, NULL);
	2562	+ else
	2563	+ unwind_start(&state, current, NULL, (void *)regs->sp);
	2564	+
	2565	+ for (; !unwind_done(&state); unwind_next_frame(&state)) {
2348	2566	addr = unwind_get_return_address(&state);
2349	2567	if (!addr \|\| perf_callchain_store(entry, addr))
2350	2568	return;
..	..	@@ -2395,7 +2613,7 @@
2395	2613	/* 32-bit process in 64-bit kernel. */
2396	2614	unsigned long ss_base, cs_base;
2397	2615	struct stack_frame_ia32 frame;
2398		- const void __user *fp;
	2616	+ const struct stack_frame_ia32 __user *fp;
2399	2617
2400	2618	if (!test_thread_flag(TIF_IA32))
2401	2619	return 0;
..	..	@@ -2406,18 +2624,12 @@
2406	2624	fp = compat_ptr(ss_base + regs->bp);
2407	2625	pagefault_disable();
2408	2626	while (entry->nr < entry->max_stack) {
2409		- unsigned long bytes;
2410		- frame.next_frame = 0;
2411		- frame.return_address = 0;
2412		-
2413	2627	if (!valid_user_frame(fp, sizeof(frame)))
2414	2628	break;
2415	2629
2416		- bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
2417		- if (bytes != 0)
	2630	+ if (__get_user(frame.next_frame, &fp->next_frame))
2418	2631	break;
2419		- bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4);
2420		- if (bytes != 0)
	2632	+ if (__get_user(frame.return_address, &fp->return_address))
2421	2633	break;
2422	2634
2423	2635	perf_callchain_store(entry, cs_base + frame.return_address);
..	..	@@ -2437,10 +2649,11 @@
2437	2649	void
2438	2650	perf_callchain_user(struct perf_callchain_entry_ctx entry, struct pt_regs regs)
2439	2651	{
	2652	+ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
2440	2653	struct stack_frame frame;
2441		- const unsigned long __user *fp;
	2654	+ const struct stack_frame __user *fp;
2442	2655
2443		- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
	2656	+ if (guest_cbs && guest_cbs->is_in_guest()) {
2444	2657	/* TODO: We don't support guest os callchain now */
2445	2658	return;
2446	2659	}
..	..	@@ -2451,7 +2664,7 @@
2451	2664	if (regs->flags & (X86_VM_MASK \| PERF_EFLAGS_VM))
2452	2665	return;
2453	2666
2454		- fp = (unsigned long __user *)regs->bp;
	2667	+ fp = (void __user *)regs->bp;
2455	2668
2456	2669	perf_callchain_store(entry, regs->ip);
2457	2670
..	..	@@ -2463,19 +2676,12 @@
2463	2676
2464	2677	pagefault_disable();
2465	2678	while (entry->nr < entry->max_stack) {
2466		- unsigned long bytes;
2467		-
2468		- frame.next_frame = NULL;
2469		- frame.return_address = 0;
2470		-
2471	2679	if (!valid_user_frame(fp, sizeof(frame)))
2472	2680	break;
2473	2681
2474		- bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
2475		- if (bytes != 0)
	2682	+ if (__get_user(frame.next_frame, &fp->next_frame))
2476	2683	break;
2477		- bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp));
2478		- if (bytes != 0)
	2684	+ if (__get_user(frame.return_address, &fp->return_address))
2479	2685	break;
2480	2686
2481	2687	perf_callchain_store(entry, frame.return_address);
..	..	@@ -2524,18 +2730,21 @@
2524	2730
2525	2731	unsigned long perf_instruction_pointer(struct pt_regs *regs)
2526	2732	{
2527		- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
2528		- return perf_guest_cbs->get_guest_ip();
	2733	+ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
	2734	+
	2735	+ if (guest_cbs && guest_cbs->is_in_guest())
	2736	+ return guest_cbs->get_guest_ip();
2529	2737
2530	2738	return regs->ip + code_segment_base(regs);
2531	2739	}
2532	2740
2533	2741	unsigned long perf_misc_flags(struct pt_regs *regs)
2534	2742	{
	2743	+ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
2535	2744	int misc = 0;
2536	2745
2537		- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
2538		- if (perf_guest_cbs->is_user_mode())
	2746	+ if (guest_cbs && guest_cbs->is_in_guest()) {
	2747	+ if (guest_cbs->is_user_mode())
2539	2748	misc \|= PERF_RECORD_MISC_GUEST_USER;
2540	2749	else
2541	2750	misc \|= PERF_RECORD_MISC_GUEST_KERNEL;