~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,31 +1,24 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/*
2	3	* Copyright (C) 2013-2017 ARM Limited, All Rights Reserved.
3	4	* Author: Marc Zyngier <marc.zyngier@arm.com>
4		- *
5		- * This program is free software; you can redistribute it and/or modify
6		- * it under the terms of the GNU General Public License version 2 as
7		- * published by the Free Software Foundation.
8		- *
9		- * This program is distributed in the hope that it will be useful,
10		- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11		- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12		- * GNU General Public License for more details.
13		- *
14		- * You should have received a copy of the GNU General Public License
15		- * along with this program. If not, see <http://www.gnu.org/licenses/>.
16	5	*/
17	6
18	7	#include <linux/acpi.h>
19	8	#include <linux/acpi_iort.h>
	9	+#include <linux/bitfield.h>
20	10	#include <linux/bitmap.h>
21	11	#include <linux/cpu.h>
	12	+#include <linux/crash_dump.h>
22	13	#include <linux/delay.h>
23	14	#include <linux/dma-iommu.h>
	15	+#include <linux/efi.h>
24	16	#include <linux/interrupt.h>
	17	+#include <linux/iopoll.h>
25	18	#include <linux/irqdomain.h>
26	19	#include <linux/list.h>
27		-#include <linux/list_sort.h>
28	20	#include <linux/log2.h>
	21	+#include <linux/memblock.h>
29	22	#include <linux/mm.h>
30	23	#include <linux/msi.h>
31	24	#include <linux/of.h>
..	..	@@ -51,6 +44,7 @@
51	44	#define ITS_FLAGS_WORKAROUND_CAVIUM_23144 (1ULL << 2)
52	45
53	46	#define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING (1 << 0)
	47	+#define RDIST_FLAGS_RD_TABLES_PREALLOCATED (1 << 1)
54	48
55	49	static u32 lpi_id_bits;
56	50
..	..	@@ -63,7 +57,7 @@
63	57	#define LPI_PROPBASE_SZ ALIGN(BIT(LPI_NRBITS), SZ_64K)
64	58	#define LPI_PENDBASE_SZ ALIGN(BIT(LPI_NRBITS) / 8, SZ_64K)
65	59
66		-#define LPI_PROP_DEFAULT_PRIO 0xa0
	60	+#define LPI_PROP_DEFAULT_PRIO GICD_INT_DEF_PRI
67	61
68	62	/*
69	63	* Collection structure - just an ID, and a redistributor address to
..	..	@@ -102,6 +96,7 @@
102	96	struct mutex dev_alloc_lock;
103	97	struct list_head entry;
104	98	void __iomem *base;
	99	+ void __iomem *sgir_base;
105	100	phys_addr_t phys_base;
106	101	struct its_cmd_block *cmd_base;
107	102	struct its_cmd_block *cmd_write;
..	..	@@ -109,24 +104,36 @@
109	104	struct its_collection *collections;
110	105	struct fwnode_handle *fwnode_handle;
111	106	u64 (get_msi_base)(struct its_device its_dev);
	107	+ u64 typer;
112	108	u64 cbaser_save;
113	109	u32 ctlr_save;
	110	+ u32 mpidr;
114	111	struct list_head its_device_list;
115	112	u64 flags;
116	113	unsigned long list_nr;
117		- u32 ite_size;
118		- u32 device_ids;
119	114	int numa_node;
120	115	unsigned int msi_domain_flags;
121	116	u32 pre_its_base; /* for Socionext Synquacer */
122		- bool is_v4;
123	117	int vlpi_redist_offset;
124	118	};
	119	+
	120	+#define is_v4(its) (!!((its)->typer & GITS_TYPER_VLPIS))
	121	+#define is_v4_1(its) (!!((its)->typer & GITS_TYPER_VMAPP))
	122	+#define device_ids(its) (FIELD_GET(GITS_TYPER_DEVBITS, (its)->typer) + 1)
125	123
126	124	#define ITS_ITT_ALIGN SZ_256
127	125
128	126	/* The maximum number of VPEID bits supported by VLPI commands */
129		-#define ITS_MAX_VPEID_BITS (16)
	127	+#define ITS_MAX_VPEID_BITS \
	128	+ ({ \
	129	+ int nvpeid = 16; \
	130	+ if (gic_rdists->has_rvpeid && \
	131	+ gic_rdists->gicd_typer2 & GICD_TYPER2_VIL) \
	132	+ nvpeid = 1 + (gic_rdists->gicd_typer2 & \
	133	+ GICD_TYPER2_VID); \
	134	+ \
	135	+ nvpeid; \
	136	+ })
130	137	#define ITS_MAX_VPEID (1 << (ITS_MAX_VPEID_BITS))
131	138
132	139	/* Convert page order to size in bytes */
..	..	@@ -137,7 +144,7 @@
137	144	u16 *col_map;
138	145	irq_hw_number_t lpi_base;
139	146	int nr_lpis;
140		- struct mutex vlpi_lock;
	147	+ raw_spinlock_t vlpi_lock;
141	148	struct its_vm *vm;
142	149	struct its_vlpi_map *vlpi_maps;
143	150	int nr_vlpis;
..	..	@@ -167,6 +174,13 @@
167	174	int next_victim;
168	175	} vpe_proxy;
169	176
	177	+struct cpu_lpi_count {
	178	+ atomic_t managed;
	179	+ atomic_t unmanaged;
	180	+};
	181	+
	182	+static DEFINE_PER_CPU(struct cpu_lpi_count, cpu_lpi_count);
	183	+
170	184	static LIST_HEAD(its_nodes);
171	185	static DEFINE_RAW_SPINLOCK(its_lock);
172	186	static struct rdists *gic_rdists;
..	..	@@ -179,8 +193,18 @@
179	193	static DEFINE_IDA(its_vpeid_ida);
180	194
181	195	#define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist))
	196	+#define gic_data_rdist_cpu(cpu) (per_cpu_ptr(gic_rdists->rdist, cpu))
182	197	#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
183	198	#define gic_data_rdist_vlpi_base() (gic_data_rdist_rd_base() + SZ_128K)
	199	+
	200	+/*
	201	+ * Skip ITSs that have no vLPIs mapped, unless we're on GICv4.1, as we
	202	+ * always have vSGIs mapped.
	203	+ */
	204	+static bool require_its_list_vmovp(struct its_vm vm, struct its_node its)
	205	+{
	206	+ return (gic_rdists->has_rvpeid \|\| vm->vlpi_count[its->list_nr]);
	207	+}
184	208
185	209	static u16 get_its_list(struct its_vm *vm)
186	210	{
..	..	@@ -188,14 +212,20 @@
188	212	unsigned long its_list = 0;
189	213
190	214	list_for_each_entry(its, &its_nodes, entry) {
191		- if (!its->is_v4)
	215	+ if (!is_v4(its))
192	216	continue;
193	217
194		- if (vm->vlpi_count[its->list_nr])
	218	+ if (require_its_list_vmovp(vm, its))
195	219	__set_bit(its->list_nr, &its_list);
196	220	}
197	221
198	222	return (u16)its_list;
	223	+}
	224	+
	225	+static inline u32 its_get_event_id(struct irq_data *d)
	226	+{
	227	+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
	228	+ return d->hwirq - its_dev->event_map.lpi_base;
199	229	}
200	230
201	231	static struct its_collection dev_event_to_col(struct its_device its_dev,
..	..	@@ -204,6 +234,64 @@
204	234	struct its_node *its = its_dev->its;
205	235
206	236	return its->collections + its_dev->event_map.col_map[event];
	237	+}
	238	+
	239	+static struct its_vlpi_map dev_event_to_vlpi_map(struct its_device its_dev,
	240	+ u32 event)
	241	+{
	242	+ if (WARN_ON_ONCE(event >= its_dev->event_map.nr_lpis))
	243	+ return NULL;
	244	+
	245	+ return &its_dev->event_map.vlpi_maps[event];
	246	+}
	247	+
	248	+static struct its_vlpi_map get_vlpi_map(struct irq_data d)
	249	+{
	250	+ if (irqd_is_forwarded_to_vcpu(d)) {
	251	+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
	252	+ u32 event = its_get_event_id(d);
	253	+
	254	+ return dev_event_to_vlpi_map(its_dev, event);
	255	+ }
	256	+
	257	+ return NULL;
	258	+}
	259	+
	260	+static int vpe_to_cpuid_lock(struct its_vpe vpe, unsigned long flags)
	261	+{
	262	+ raw_spin_lock_irqsave(&vpe->vpe_lock, *flags);
	263	+ return vpe->col_idx;
	264	+}
	265	+
	266	+static void vpe_to_cpuid_unlock(struct its_vpe *vpe, unsigned long flags)
	267	+{
	268	+ raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);
	269	+}
	270	+
	271	+static int irq_to_cpuid_lock(struct irq_data d, unsigned long flags)
	272	+{
	273	+ struct its_vlpi_map *map = get_vlpi_map(d);
	274	+ int cpu;
	275	+
	276	+ if (map) {
	277	+ cpu = vpe_to_cpuid_lock(map->vpe, flags);
	278	+ } else {
	279	+ /* Physical LPIs are already locked via the irq_desc lock */
	280	+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
	281	+ cpu = its_dev->event_map.col_map[its_get_event_id(d)];
	282	+ /* Keep GCC quiet... */
	283	+ *flags = 0;
	284	+ }
	285	+
	286	+ return cpu;
	287	+}
	288	+
	289	+static void irq_to_cpuid_unlock(struct irq_data *d, unsigned long flags)
	290	+{
	291	+ struct its_vlpi_map *map = get_vlpi_map(d);
	292	+
	293	+ if (map)
	294	+ vpe_to_cpuid_unlock(map->vpe, flags);
207	295	}
208	296
209	297	static struct its_collection valid_col(struct its_collection col)
..	..	@@ -305,6 +393,19 @@
305	393	u16 seq_num;
306	394	u16 its_list;
307	395	} its_vmovp_cmd;
	396	+
	397	+ struct {
	398	+ struct its_vpe *vpe;
	399	+ } its_invdb_cmd;
	400	+
	401	+ struct {
	402	+ struct its_vpe *vpe;
	403	+ u8 sgi;
	404	+ u8 priority;
	405	+ bool enable;
	406	+ bool group;
	407	+ bool clear;
	408	+ } its_vsgi_cmd;
308	409	};
309	410	};
310	411
..	..	@@ -312,7 +413,10 @@
312	413	* The ITS command block, which is what the ITS actually parses.
313	414	*/
314	415	struct its_cmd_block {
315		- u64 raw_cmd[4];
	416	+ union {
	417	+ u64 raw_cmd[4];
	418	+ __le64 raw_cmd_le[4];
	419	+ };
316	420	};
317	421
318	422	#define ITS_CMD_QUEUE_SZ SZ_64K
..	..	@@ -418,13 +522,70 @@
418	522	its_mask_encode(&cmd->raw_cmd[3], vpt_size, 4, 0);
419	523	}
420	524
	525	+static void its_encode_vconf_addr(struct its_cmd_block *cmd, u64 vconf_pa)
	526	+{
	527	+ its_mask_encode(&cmd->raw_cmd[0], vconf_pa >> 16, 51, 16);
	528	+}
	529	+
	530	+static void its_encode_alloc(struct its_cmd_block *cmd, bool alloc)
	531	+{
	532	+ its_mask_encode(&cmd->raw_cmd[0], alloc, 8, 8);
	533	+}
	534	+
	535	+static void its_encode_ptz(struct its_cmd_block *cmd, bool ptz)
	536	+{
	537	+ its_mask_encode(&cmd->raw_cmd[0], ptz, 9, 9);
	538	+}
	539	+
	540	+static void its_encode_vmapp_default_db(struct its_cmd_block *cmd,
	541	+ u32 vpe_db_lpi)
	542	+{
	543	+ its_mask_encode(&cmd->raw_cmd[1], vpe_db_lpi, 31, 0);
	544	+}
	545	+
	546	+static void its_encode_vmovp_default_db(struct its_cmd_block *cmd,
	547	+ u32 vpe_db_lpi)
	548	+{
	549	+ its_mask_encode(&cmd->raw_cmd[3], vpe_db_lpi, 31, 0);
	550	+}
	551	+
	552	+static void its_encode_db(struct its_cmd_block *cmd, bool db)
	553	+{
	554	+ its_mask_encode(&cmd->raw_cmd[2], db, 63, 63);
	555	+}
	556	+
	557	+static void its_encode_sgi_intid(struct its_cmd_block *cmd, u8 sgi)
	558	+{
	559	+ its_mask_encode(&cmd->raw_cmd[0], sgi, 35, 32);
	560	+}
	561	+
	562	+static void its_encode_sgi_priority(struct its_cmd_block *cmd, u8 prio)
	563	+{
	564	+ its_mask_encode(&cmd->raw_cmd[0], prio >> 4, 23, 20);
	565	+}
	566	+
	567	+static void its_encode_sgi_group(struct its_cmd_block *cmd, bool grp)
	568	+{
	569	+ its_mask_encode(&cmd->raw_cmd[0], grp, 10, 10);
	570	+}
	571	+
	572	+static void its_encode_sgi_clear(struct its_cmd_block *cmd, bool clr)
	573	+{
	574	+ its_mask_encode(&cmd->raw_cmd[0], clr, 9, 9);
	575	+}
	576	+
	577	+static void its_encode_sgi_enable(struct its_cmd_block *cmd, bool en)
	578	+{
	579	+ its_mask_encode(&cmd->raw_cmd[0], en, 8, 8);
	580	+}
	581	+
421	582	static inline void its_fixup_cmd(struct its_cmd_block *cmd)
422	583	{
423	584	/* Let's fixup BE commands */
424		- cmd->raw_cmd[0] = cpu_to_le64(cmd->raw_cmd[0]);
425		- cmd->raw_cmd[1] = cpu_to_le64(cmd->raw_cmd[1]);
426		- cmd->raw_cmd[2] = cpu_to_le64(cmd->raw_cmd[2]);
427		- cmd->raw_cmd[3] = cpu_to_le64(cmd->raw_cmd[3]);
	585	+ cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]);
	586	+ cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]);
	587	+ cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]);
	588	+ cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]);
428	589	}
429	590
430	591	static struct its_collection its_build_mapd_cmd(struct its_node its,
..	..	@@ -601,19 +762,45 @@
601	762	struct its_cmd_block *cmd,
602	763	struct its_cmd_desc *desc)
603	764	{
604		- unsigned long vpt_addr;
	765	+ unsigned long vpt_addr, vconf_addr;
605	766	u64 target;
606		-
607		- vpt_addr = virt_to_phys(page_address(desc->its_vmapp_cmd.vpe->vpt_page));
608		- target = desc->its_vmapp_cmd.col->target_address + its->vlpi_redist_offset;
	767	+ bool alloc;
609	768
610	769	its_encode_cmd(cmd, GITS_CMD_VMAPP);
611	770	its_encode_vpeid(cmd, desc->its_vmapp_cmd.vpe->vpe_id);
612	771	its_encode_valid(cmd, desc->its_vmapp_cmd.valid);
	772	+
	773	+ if (!desc->its_vmapp_cmd.valid) {
	774	+ if (is_v4_1(its)) {
	775	+ alloc = !atomic_dec_return(&desc->its_vmapp_cmd.vpe->vmapp_count);
	776	+ its_encode_alloc(cmd, alloc);
	777	+ }
	778	+
	779	+ goto out;
	780	+ }
	781	+
	782	+ vpt_addr = virt_to_phys(page_address(desc->its_vmapp_cmd.vpe->vpt_page));
	783	+ target = desc->its_vmapp_cmd.col->target_address + its->vlpi_redist_offset;
	784	+
613	785	its_encode_target(cmd, target);
614	786	its_encode_vpt_addr(cmd, vpt_addr);
615	787	its_encode_vpt_size(cmd, LPI_NRBITS - 1);
616	788
	789	+ if (!is_v4_1(its))
	790	+ goto out;
	791	+
	792	+ vconf_addr = virt_to_phys(page_address(desc->its_vmapp_cmd.vpe->its_vm->vprop_page));
	793	+
	794	+ alloc = !atomic_fetch_inc(&desc->its_vmapp_cmd.vpe->vmapp_count);
	795	+
	796	+ its_encode_alloc(cmd, alloc);
	797	+
	798	+ /* We can only signal PTZ when alloc==1. Why do we have two bits? */
	799	+ its_encode_ptz(cmd, alloc);
	800	+ its_encode_vconf_addr(cmd, vconf_addr);
	801	+ its_encode_vmapp_default_db(cmd, desc->its_vmapp_cmd.vpe->vpe_db_lpi);
	802	+
	803	+out:
617	804	its_fixup_cmd(cmd);
618	805
619	806	return valid_vpe(its, desc->its_vmapp_cmd.vpe);
..	..	@@ -625,7 +812,7 @@
625	812	{
626	813	u32 db;
627	814
628		- if (desc->its_vmapti_cmd.db_enabled)
	815	+ if (!is_v4_1(its) && desc->its_vmapti_cmd.db_enabled)
629	816	db = desc->its_vmapti_cmd.vpe->vpe_db_lpi;
630	817	else
631	818	db = 1023;
..	..	@@ -648,7 +835,7 @@
648	835	{
649	836	u32 db;
650	837
651		- if (desc->its_vmovi_cmd.db_enabled)
	838	+ if (!is_v4_1(its) && desc->its_vmovi_cmd.db_enabled)
652	839	db = desc->its_vmovi_cmd.vpe->vpe_db_lpi;
653	840	else
654	841	db = 1023;
..	..	@@ -678,9 +865,103 @@
678	865	its_encode_vpeid(cmd, desc->its_vmovp_cmd.vpe->vpe_id);
679	866	its_encode_target(cmd, target);
680	867
	868	+ if (is_v4_1(its)) {
	869	+ its_encode_db(cmd, true);
	870	+ its_encode_vmovp_default_db(cmd, desc->its_vmovp_cmd.vpe->vpe_db_lpi);
	871	+ }
	872	+
681	873	its_fixup_cmd(cmd);
682	874
683	875	return valid_vpe(its, desc->its_vmovp_cmd.vpe);
	876	+}
	877	+
	878	+static struct its_vpe its_build_vinv_cmd(struct its_node its,
	879	+ struct its_cmd_block *cmd,
	880	+ struct its_cmd_desc *desc)
	881	+{
	882	+ struct its_vlpi_map *map;
	883	+
	884	+ map = dev_event_to_vlpi_map(desc->its_inv_cmd.dev,
	885	+ desc->its_inv_cmd.event_id);
	886	+
	887	+ its_encode_cmd(cmd, GITS_CMD_INV);
	888	+ its_encode_devid(cmd, desc->its_inv_cmd.dev->device_id);
	889	+ its_encode_event_id(cmd, desc->its_inv_cmd.event_id);
	890	+
	891	+ its_fixup_cmd(cmd);
	892	+
	893	+ return valid_vpe(its, map->vpe);
	894	+}
	895	+
	896	+static struct its_vpe its_build_vint_cmd(struct its_node its,
	897	+ struct its_cmd_block *cmd,
	898	+ struct its_cmd_desc *desc)
	899	+{
	900	+ struct its_vlpi_map *map;
	901	+
	902	+ map = dev_event_to_vlpi_map(desc->its_int_cmd.dev,
	903	+ desc->its_int_cmd.event_id);
	904	+
	905	+ its_encode_cmd(cmd, GITS_CMD_INT);
	906	+ its_encode_devid(cmd, desc->its_int_cmd.dev->device_id);
	907	+ its_encode_event_id(cmd, desc->its_int_cmd.event_id);
	908	+
	909	+ its_fixup_cmd(cmd);
	910	+
	911	+ return valid_vpe(its, map->vpe);
	912	+}
	913	+
	914	+static struct its_vpe its_build_vclear_cmd(struct its_node its,
	915	+ struct its_cmd_block *cmd,
	916	+ struct its_cmd_desc *desc)
	917	+{
	918	+ struct its_vlpi_map *map;
	919	+
	920	+ map = dev_event_to_vlpi_map(desc->its_clear_cmd.dev,
	921	+ desc->its_clear_cmd.event_id);
	922	+
	923	+ its_encode_cmd(cmd, GITS_CMD_CLEAR);
	924	+ its_encode_devid(cmd, desc->its_clear_cmd.dev->device_id);
	925	+ its_encode_event_id(cmd, desc->its_clear_cmd.event_id);
	926	+
	927	+ its_fixup_cmd(cmd);
	928	+
	929	+ return valid_vpe(its, map->vpe);
	930	+}
	931	+
	932	+static struct its_vpe its_build_invdb_cmd(struct its_node its,
	933	+ struct its_cmd_block *cmd,
	934	+ struct its_cmd_desc *desc)
	935	+{
	936	+ if (WARN_ON(!is_v4_1(its)))
	937	+ return NULL;
	938	+
	939	+ its_encode_cmd(cmd, GITS_CMD_INVDB);
	940	+ its_encode_vpeid(cmd, desc->its_invdb_cmd.vpe->vpe_id);
	941	+
	942	+ its_fixup_cmd(cmd);
	943	+
	944	+ return valid_vpe(its, desc->its_invdb_cmd.vpe);
	945	+}
	946	+
	947	+static struct its_vpe its_build_vsgi_cmd(struct its_node its,
	948	+ struct its_cmd_block *cmd,
	949	+ struct its_cmd_desc *desc)
	950	+{
	951	+ if (WARN_ON(!is_v4_1(its)))
	952	+ return NULL;
	953	+
	954	+ its_encode_cmd(cmd, GITS_CMD_VSGI);
	955	+ its_encode_vpeid(cmd, desc->its_vsgi_cmd.vpe->vpe_id);
	956	+ its_encode_sgi_intid(cmd, desc->its_vsgi_cmd.sgi);
	957	+ its_encode_sgi_priority(cmd, desc->its_vsgi_cmd.priority);
	958	+ its_encode_sgi_group(cmd, desc->its_vsgi_cmd.group);
	959	+ its_encode_sgi_clear(cmd, desc->its_vsgi_cmd.clear);
	960	+ its_encode_sgi_enable(cmd, desc->its_vsgi_cmd.enable);
	961	+
	962	+ its_fixup_cmd(cmd);
	963	+
	964	+ return valid_vpe(its, desc->its_vsgi_cmd.vpe);
684	965	}
685	966
686	967	static u64 its_cmd_ptr_to_offset(struct its_node *its,
..	..	@@ -960,7 +1241,7 @@
960	1241
961	1242	static void its_send_vmapti(struct its_device *dev, u32 id)
962	1243	{
963		- struct its_vlpi_map *map = &dev->event_map.vlpi_maps[id];
	1244	+ struct its_vlpi_map *map = dev_event_to_vlpi_map(dev, id);
964	1245	struct its_cmd_desc desc;
965	1246
966	1247	desc.its_vmapti_cmd.vpe = map->vpe;
..	..	@@ -974,7 +1255,7 @@
974	1255
975	1256	static void its_send_vmovi(struct its_device *dev, u32 id)
976	1257	{
977		- struct its_vlpi_map *map = &dev->event_map.vlpi_maps[id];
	1258	+ struct its_vlpi_map *map = dev_event_to_vlpi_map(dev, id);
978	1259	struct its_cmd_desc desc;
979	1260
980	1261	desc.its_vmovi_cmd.vpe = map->vpe;
..	..	@@ -1028,10 +1309,10 @@
1028	1309
1029	1310	/* Emit VMOVPs */
1030	1311	list_for_each_entry(its, &its_nodes, entry) {
1031		- if (!its->is_v4)
	1312	+ if (!is_v4(its))
1032	1313	continue;
1033	1314
1034		- if (!vpe->its_vm->vlpi_count[its->list_nr])
	1315	+ if (!require_its_list_vmovp(vpe->its_vm, its))
1035	1316	continue;
1036	1317
1037	1318	desc.its_vmovp_cmd.col = &its->collections[col_id];
..	..	@@ -1049,40 +1330,79 @@
1049	1330	its_send_single_vcommand(its, its_build_vinvall_cmd, &desc);
1050	1331	}
1051	1332
	1333	+static void its_send_vinv(struct its_device *dev, u32 event_id)
	1334	+{
	1335	+ struct its_cmd_desc desc;
	1336	+
	1337	+ /*
	1338	+ * There is no real VINV command. This is just a normal INV,
	1339	+ * with a VSYNC instead of a SYNC.
	1340	+ */
	1341	+ desc.its_inv_cmd.dev = dev;
	1342	+ desc.its_inv_cmd.event_id = event_id;
	1343	+
	1344	+ its_send_single_vcommand(dev->its, its_build_vinv_cmd, &desc);
	1345	+}
	1346	+
	1347	+static void its_send_vint(struct its_device *dev, u32 event_id)
	1348	+{
	1349	+ struct its_cmd_desc desc;
	1350	+
	1351	+ /*
	1352	+ * There is no real VINT command. This is just a normal INT,
	1353	+ * with a VSYNC instead of a SYNC.
	1354	+ */
	1355	+ desc.its_int_cmd.dev = dev;
	1356	+ desc.its_int_cmd.event_id = event_id;
	1357	+
	1358	+ its_send_single_vcommand(dev->its, its_build_vint_cmd, &desc);
	1359	+}
	1360	+
	1361	+static void its_send_vclear(struct its_device *dev, u32 event_id)
	1362	+{
	1363	+ struct its_cmd_desc desc;
	1364	+
	1365	+ /*
	1366	+ * There is no real VCLEAR command. This is just a normal CLEAR,
	1367	+ * with a VSYNC instead of a SYNC.
	1368	+ */
	1369	+ desc.its_clear_cmd.dev = dev;
	1370	+ desc.its_clear_cmd.event_id = event_id;
	1371	+
	1372	+ its_send_single_vcommand(dev->its, its_build_vclear_cmd, &desc);
	1373	+}
	1374	+
	1375	+static void its_send_invdb(struct its_node its, struct its_vpe vpe)
	1376	+{
	1377	+ struct its_cmd_desc desc;
	1378	+
	1379	+ desc.its_invdb_cmd.vpe = vpe;
	1380	+ its_send_single_vcommand(its, its_build_invdb_cmd, &desc);
	1381	+}
	1382	+
1052	1383	/*
1053	1384	* irqchip functions - assumes MSI, mostly.
1054	1385	*/
1055		-
1056		-static inline u32 its_get_event_id(struct irq_data *d)
1057		-{
1058		- struct its_device *its_dev = irq_data_get_irq_chip_data(d);
1059		- return d->hwirq - its_dev->event_map.lpi_base;
1060		-}
1061		-
1062	1386	static void lpi_write_config(struct irq_data *d, u8 clr, u8 set)
1063	1387	{
	1388	+ struct its_vlpi_map *map = get_vlpi_map(d);
1064	1389	irq_hw_number_t hwirq;
1065		- struct page *prop_page;
	1390	+ void *va;
1066	1391	u8 *cfg;
1067	1392
1068		- if (irqd_is_forwarded_to_vcpu(d)) {
1069		- struct its_device *its_dev = irq_data_get_irq_chip_data(d);
1070		- u32 event = its_get_event_id(d);
1071		- struct its_vlpi_map *map;
1072		-
1073		- prop_page = its_dev->event_map.vm->vprop_page;
1074		- map = &its_dev->event_map.vlpi_maps[event];
	1393	+ if (map) {
	1394	+ va = page_address(map->vm->vprop_page);
1075	1395	hwirq = map->vintid;
1076	1396
1077	1397	/* Remember the updated property */
1078	1398	map->properties &= ~clr;
1079	1399	map->properties \|= set \| LPI_PROP_GROUP1;
1080	1400	} else {
1081		- prop_page = gic_rdists->prop_page;
	1401	+ va = gic_rdists->prop_table_va;
1082	1402	hwirq = d->hwirq;
1083	1403	}
1084	1404
1085		- cfg = page_address(prop_page) + hwirq - 8192;
	1405	+ cfg = va + hwirq - 8192;
1086	1406	*cfg &= ~clr;
1087	1407	*cfg \|= set \| LPI_PROP_GROUP1;
1088	1408
..	..	@@ -1097,23 +1417,76 @@
1097	1417	dsb(ishst);
1098	1418	}
1099	1419
	1420	+static void wait_for_syncr(void __iomem *rdbase)
	1421	+{
	1422	+ while (readl_relaxed(rdbase + GICR_SYNCR) & 1)
	1423	+ cpu_relax();
	1424	+}
	1425	+
	1426	+static void direct_lpi_inv(struct irq_data *d)
	1427	+{
	1428	+ struct its_vlpi_map *map = get_vlpi_map(d);
	1429	+ void __iomem *rdbase;
	1430	+ unsigned long flags;
	1431	+ u64 val;
	1432	+ int cpu;
	1433	+
	1434	+ if (map) {
	1435	+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
	1436	+
	1437	+ WARN_ON(!is_v4_1(its_dev->its));
	1438	+
	1439	+ val = GICR_INVLPIR_V;
	1440	+ val \|= FIELD_PREP(GICR_INVLPIR_VPEID, map->vpe->vpe_id);
	1441	+ val \|= FIELD_PREP(GICR_INVLPIR_INTID, map->vintid);
	1442	+ } else {
	1443	+ val = d->hwirq;
	1444	+ }
	1445	+
	1446	+ /* Target the redistributor this LPI is currently routed to */
	1447	+ cpu = irq_to_cpuid_lock(d, &flags);
	1448	+ raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
	1449	+ rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
	1450	+ gic_write_lpir(val, rdbase + GICR_INVLPIR);
	1451	+
	1452	+ wait_for_syncr(rdbase);
	1453	+ raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
	1454	+ irq_to_cpuid_unlock(d, flags);
	1455	+}
	1456	+
1100	1457	static void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
1101	1458	{
1102	1459	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
1103	1460
1104	1461	lpi_write_config(d, clr, set);
1105		- its_send_inv(its_dev, its_get_event_id(d));
	1462	+ if (gic_rdists->has_direct_lpi &&
	1463	+ (is_v4_1(its_dev->its) \|\| !irqd_is_forwarded_to_vcpu(d)))
	1464	+ direct_lpi_inv(d);
	1465	+ else if (!irqd_is_forwarded_to_vcpu(d))
	1466	+ its_send_inv(its_dev, its_get_event_id(d));
	1467	+ else
	1468	+ its_send_vinv(its_dev, its_get_event_id(d));
1106	1469	}
1107	1470
1108	1471	static void its_vlpi_set_doorbell(struct irq_data *d, bool enable)
1109	1472	{
1110	1473	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
1111	1474	u32 event = its_get_event_id(d);
	1475	+ struct its_vlpi_map *map;
1112	1476
1113		- if (its_dev->event_map.vlpi_maps[event].db_enabled == enable)
	1477	+ /*
	1478	+ * GICv4.1 does away with the per-LPI nonsense, nothing to do
	1479	+ * here.
	1480	+ */
	1481	+ if (is_v4_1(its_dev->its))
1114	1482	return;
1115	1483
1116		- its_dev->event_map.vlpi_maps[event].db_enabled = enable;
	1484	+ map = dev_event_to_vlpi_map(its_dev, event);
	1485	+
	1486	+ if (map->db_enabled == enable)
	1487	+ return;
	1488	+
	1489	+ map->db_enabled = enable;
1117	1490
1118	1491	/*
1119	1492	* More fun with the architecture:
..	..	@@ -1144,42 +1517,159 @@
1144	1517	lpi_update_config(d, 0, LPI_PROP_ENABLED);
1145	1518	}
1146	1519
	1520	+static __maybe_unused u32 its_read_lpi_count(struct irq_data *d, int cpu)
	1521	+{
	1522	+ if (irqd_affinity_is_managed(d))
	1523	+ return atomic_read(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed);
	1524	+
	1525	+ return atomic_read(&per_cpu_ptr(&cpu_lpi_count, cpu)->unmanaged);
	1526	+}
	1527	+
	1528	+static void its_inc_lpi_count(struct irq_data *d, int cpu)
	1529	+{
	1530	+ if (irqd_affinity_is_managed(d))
	1531	+ atomic_inc(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed);
	1532	+ else
	1533	+ atomic_inc(&per_cpu_ptr(&cpu_lpi_count, cpu)->unmanaged);
	1534	+}
	1535	+
	1536	+static void its_dec_lpi_count(struct irq_data *d, int cpu)
	1537	+{
	1538	+ if (irqd_affinity_is_managed(d))
	1539	+ atomic_dec(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed);
	1540	+ else
	1541	+ atomic_dec(&per_cpu_ptr(&cpu_lpi_count, cpu)->unmanaged);
	1542	+}
	1543	+
	1544	+static unsigned int cpumask_pick_least_loaded(struct irq_data *d,
	1545	+ const struct cpumask *cpu_mask)
	1546	+{
	1547	+ unsigned int cpu = nr_cpu_ids, tmp;
	1548	+ int count = S32_MAX;
	1549	+
	1550	+ for_each_cpu(tmp, cpu_mask) {
	1551	+ int this_count = its_read_lpi_count(d, tmp);
	1552	+ if (this_count < count) {
	1553	+ cpu = tmp;
	1554	+ count = this_count;
	1555	+ }
	1556	+ }
	1557	+
	1558	+ return cpu;
	1559	+}
	1560	+
	1561	+/*
	1562	+ * As suggested by Thomas Gleixner in:
	1563	+ * https://lore.kernel.org/r/87h80q2aoc.fsf@nanos.tec.linutronix.de
	1564	+ */
	1565	+static int its_select_cpu(struct irq_data *d,
	1566	+ const struct cpumask *aff_mask)
	1567	+{
	1568	+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
	1569	+ cpumask_var_t tmpmask;
	1570	+ int cpu, node;
	1571	+
	1572	+ if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
	1573	+ return -ENOMEM;
	1574	+
	1575	+ node = its_dev->its->numa_node;
	1576	+
	1577	+ if (!irqd_affinity_is_managed(d)) {
	1578	+ /* First try the NUMA node */
	1579	+ if (node != NUMA_NO_NODE) {
	1580	+ /*
	1581	+ * Try the intersection of the affinity mask and the
	1582	+ * node mask (and the online mask, just to be safe).
	1583	+ */
	1584	+ cpumask_and(tmpmask, cpumask_of_node(node), aff_mask);
	1585	+ cpumask_and(tmpmask, tmpmask, cpu_online_mask);
	1586	+
	1587	+ /*
	1588	+ * Ideally, we would check if the mask is empty, and
	1589	+ * try again on the full node here.
	1590	+ *
	1591	+ * But it turns out that the way ACPI describes the
	1592	+ * affinity for ITSs only deals about memory, and
	1593	+ * not target CPUs, so it cannot describe a single
	1594	+ * ITS placed next to two NUMA nodes.
	1595	+ *
	1596	+ * Instead, just fallback on the online mask. This
	1597	+ * diverges from Thomas' suggestion above.
	1598	+ */
	1599	+ cpu = cpumask_pick_least_loaded(d, tmpmask);
	1600	+ if (cpu < nr_cpu_ids)
	1601	+ goto out;
	1602	+
	1603	+ /* If we can't cross sockets, give up */
	1604	+ if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144))
	1605	+ goto out;
	1606	+
	1607	+ /* If the above failed, expand the search */
	1608	+ }
	1609	+
	1610	+ /* Try the intersection of the affinity and online masks */
	1611	+ cpumask_and(tmpmask, aff_mask, cpu_online_mask);
	1612	+
	1613	+ /* If that doesn't fly, the online mask is the last resort */
	1614	+ if (cpumask_empty(tmpmask))
	1615	+ cpumask_copy(tmpmask, cpu_online_mask);
	1616	+
	1617	+ cpu = cpumask_pick_least_loaded(d, tmpmask);
	1618	+ } else {
	1619	+ cpumask_copy(tmpmask, aff_mask);
	1620	+
	1621	+ /* If we cannot cross sockets, limit the search to that node */
	1622	+ if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) &&
	1623	+ node != NUMA_NO_NODE)
	1624	+ cpumask_and(tmpmask, tmpmask, cpumask_of_node(node));
	1625	+
	1626	+ cpu = cpumask_pick_least_loaded(d, tmpmask);
	1627	+ }
	1628	+out:
	1629	+ free_cpumask_var(tmpmask);
	1630	+
	1631	+ pr_debug("IRQ%d -> %*pbl CPU%d\n", d->irq, cpumask_pr_args(aff_mask), cpu);
	1632	+ return cpu;
	1633	+}
	1634	+
1147	1635	static int its_set_affinity(struct irq_data d, const struct cpumask mask_val,
1148	1636	bool force)
1149	1637	{
1150		- unsigned int cpu;
1151		- const struct cpumask *cpu_mask = cpu_online_mask;
1152	1638	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
1153	1639	struct its_collection *target_col;
1154	1640	u32 id = its_get_event_id(d);
	1641	+ int cpu, prev_cpu;
1155	1642
1156	1643	/* A forwarded interrupt should use irq_set_vcpu_affinity */
1157	1644	if (irqd_is_forwarded_to_vcpu(d))
1158	1645	return -EINVAL;
1159	1646
1160		- /* lpi cannot be routed to a redistributor that is on a foreign node */
1161		- if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) {
1162		- if (its_dev->its->numa_node >= 0) {
1163		- cpu_mask = cpumask_of_node(its_dev->its->numa_node);
1164		- if (!cpumask_intersects(mask_val, cpu_mask))
1165		- return -EINVAL;
1166		- }
1167		- }
	1647	+ prev_cpu = its_dev->event_map.col_map[id];
	1648	+ its_dec_lpi_count(d, prev_cpu);
1168	1649
1169		- cpu = cpumask_any_and(mask_val, cpu_mask);
	1650	+ if (!force)
	1651	+ cpu = its_select_cpu(d, mask_val);
	1652	+ else
	1653	+ cpu = cpumask_pick_least_loaded(d, mask_val);
1170	1654
1171		- if (cpu >= nr_cpu_ids)
1172		- return -EINVAL;
	1655	+ if (cpu < 0 \|\| cpu >= nr_cpu_ids)
	1656	+ goto err;
1173	1657
1174	1658	/* don't set the affinity when the target cpu is same as current one */
1175		- if (cpu != its_dev->event_map.col_map[id]) {
	1659	+ if (cpu != prev_cpu) {
1176	1660	target_col = &its_dev->its->collections[cpu];
1177	1661	its_send_movi(its_dev, target_col, id);
1178	1662	its_dev->event_map.col_map[id] = cpu;
1179	1663	irq_data_update_effective_affinity(d, cpumask_of(cpu));
1180	1664	}
1181	1665
	1666	+ its_inc_lpi_count(d, cpu);
	1667	+
1182	1668	return IRQ_SET_MASK_OK_DONE;
	1669	+
	1670	+err:
	1671	+ its_inc_lpi_count(d, prev_cpu);
	1672	+ return -EINVAL;
1183	1673	}
1184	1674
1185	1675	static u64 its_irq_get_msi_base(struct its_device *its_dev)
..	..	@@ -1202,7 +1692,7 @@
1202	1692	msg->address_hi = upper_32_bits(addr);
1203	1693	msg->data = its_get_event_id(d);
1204	1694
1205		- iommu_dma_map_msi_msg(d->irq, msg);
	1695	+ iommu_dma_compose_msi_msg(irq_data_get_msi_desc(d), msg);
1206	1696	}
1207	1697
1208	1698	static int its_irq_set_irqchip_state(struct irq_data *d,
..	..	@@ -1215,20 +1705,51 @@
1215	1705	if (which != IRQCHIP_STATE_PENDING)
1216	1706	return -EINVAL;
1217	1707
1218		- if (state)
1219		- its_send_int(its_dev, event);
1220		- else
1221		- its_send_clear(its_dev, event);
	1708	+ if (irqd_is_forwarded_to_vcpu(d)) {
	1709	+ if (state)
	1710	+ its_send_vint(its_dev, event);
	1711	+ else
	1712	+ its_send_vclear(its_dev, event);
	1713	+ } else {
	1714	+ if (state)
	1715	+ its_send_int(its_dev, event);
	1716	+ else
	1717	+ its_send_clear(its_dev, event);
	1718	+ }
1222	1719
1223	1720	return 0;
	1721	+}
	1722	+
	1723	+static int its_irq_retrigger(struct irq_data *d)
	1724	+{
	1725	+ return !its_irq_set_irqchip_state(d, IRQCHIP_STATE_PENDING, true);
	1726	+}
	1727	+
	1728	+/*
	1729	+ * Two favourable cases:
	1730	+ *
	1731	+ * (a) Either we have a GICv4.1, and all vPEs have to be mapped at all times
	1732	+ * for vSGI delivery
	1733	+ *
	1734	+ * (b) Or the ITSs do not use a list map, meaning that VMOVP is cheap enough
	1735	+ * and we're better off mapping all VPEs always
	1736	+ *
	1737	+ * If neither (a) nor (b) is true, then we map vPEs on demand.
	1738	+ *
	1739	+ */
	1740	+static bool gic_requires_eager_mapping(void)
	1741	+{
	1742	+ if (!its_list_map \|\| gic_rdists->has_rvpeid)
	1743	+ return true;
	1744	+
	1745	+ return false;
1224	1746	}
1225	1747
1226	1748	static void its_map_vm(struct its_node its, struct its_vm vm)
1227	1749	{
1228	1750	unsigned long flags;
1229	1751
1230		- /* Not using the ITS list? Everything is always mapped. */
1231		- if (!its_list_map)
	1752	+ if (gic_requires_eager_mapping())
1232	1753	return;
1233	1754
1234	1755	raw_spin_lock_irqsave(&vmovp_lock, flags);
..	..	@@ -1262,7 +1783,7 @@
1262	1783	unsigned long flags;
1263	1784
1264	1785	/* Not using the ITS list? Everything is always mapped. */
1265		- if (!its_list_map)
	1786	+ if (gic_requires_eager_mapping())
1266	1787	return;
1267	1788
1268	1789	raw_spin_lock_irqsave(&vmovp_lock, flags);
..	..	@@ -1286,13 +1807,13 @@
1286	1807	if (!info->map)
1287	1808	return -EINVAL;
1288	1809
1289		- mutex_lock(&its_dev->event_map.vlpi_lock);
	1810	+ raw_spin_lock(&its_dev->event_map.vlpi_lock);
1290	1811
1291	1812	if (!its_dev->event_map.vm) {
1292	1813	struct its_vlpi_map *maps;
1293	1814
1294	1815	maps = kcalloc(its_dev->event_map.nr_lpis, sizeof(*maps),
1295		- GFP_KERNEL);
	1816	+ GFP_ATOMIC);
1296	1817	if (!maps) {
1297	1818	ret = -ENOMEM;
1298	1819	goto out;
..	..	@@ -1335,29 +1856,30 @@
1335	1856	}
1336	1857
1337	1858	out:
1338		- mutex_unlock(&its_dev->event_map.vlpi_lock);
	1859	+ raw_spin_unlock(&its_dev->event_map.vlpi_lock);
1339	1860	return ret;
1340	1861	}
1341	1862
1342	1863	static int its_vlpi_get(struct irq_data d, struct its_cmd_info info)
1343	1864	{
1344	1865	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
1345		- u32 event = its_get_event_id(d);
	1866	+ struct its_vlpi_map *map;
1346	1867	int ret = 0;
1347	1868
1348		- mutex_lock(&its_dev->event_map.vlpi_lock);
	1869	+ raw_spin_lock(&its_dev->event_map.vlpi_lock);
1349	1870
1350		- if (!its_dev->event_map.vm \|\|
1351		- !its_dev->event_map.vlpi_maps[event].vm) {
	1871	+ map = get_vlpi_map(d);
	1872	+
	1873	+ if (!its_dev->event_map.vm \|\| !map) {
1352	1874	ret = -EINVAL;
1353	1875	goto out;
1354	1876	}
1355	1877
1356	1878	/* Copy our mapping information to the incoming request */
1357		- *info->map = its_dev->event_map.vlpi_maps[event];
	1879	+ info->map = map;
1358	1880
1359	1881	out:
1360		- mutex_unlock(&its_dev->event_map.vlpi_lock);
	1882	+ raw_spin_unlock(&its_dev->event_map.vlpi_lock);
1361	1883	return ret;
1362	1884	}
1363	1885
..	..	@@ -1367,7 +1889,7 @@
1367	1889	u32 event = its_get_event_id(d);
1368	1890	int ret = 0;
1369	1891
1370		- mutex_lock(&its_dev->event_map.vlpi_lock);
	1892	+ raw_spin_lock(&its_dev->event_map.vlpi_lock);
1371	1893
1372	1894	if (!its_dev->event_map.vm \|\| !irqd_is_forwarded_to_vcpu(d)) {
1373	1895	ret = -EINVAL;
..	..	@@ -1397,7 +1919,7 @@
1397	1919	}
1398	1920
1399	1921	out:
1400		- mutex_unlock(&its_dev->event_map.vlpi_lock);
	1922	+ raw_spin_unlock(&its_dev->event_map.vlpi_lock);
1401	1923	return ret;
1402	1924	}
1403	1925
..	..	@@ -1423,7 +1945,7 @@
1423	1945	struct its_cmd_info *info = vcpu_info;
1424	1946
1425	1947	/* Need a v4 ITS */
1426		- if (!its_dev->its->is_v4)
	1948	+ if (!is_v4(its_dev->its))
1427	1949	return -EINVAL;
1428	1950
1429	1951	/* Unmap request? */
..	..	@@ -1454,6 +1976,7 @@
1454	1976	.irq_set_affinity = its_set_affinity,
1455	1977	.irq_compose_msi_msg = its_irq_compose_msi_msg,
1456	1978	.irq_set_irqchip_state = its_irq_set_irqchip_state,
	1979	+ .irq_retrigger = its_irq_retrigger,
1457	1980	.irq_set_vcpu_affinity = its_irq_set_vcpu_affinity,
1458	1981	};
1459	1982
..	..	@@ -1488,39 +2011,13 @@
1488	2011	{
1489	2012	struct lpi_range *range;
1490	2013
1491		- range = kzalloc(sizeof(*range), GFP_KERNEL);
	2014	+ range = kmalloc(sizeof(*range), GFP_KERNEL);
1492	2015	if (range) {
1493		- INIT_LIST_HEAD(&range->entry);
1494	2016	range->base_id = base;
1495	2017	range->span = span;
1496	2018	}
1497	2019
1498	2020	return range;
1499		-}
1500		-
1501		-static int lpi_range_cmp(void priv, struct list_head a, struct list_head *b)
1502		-{
1503		- struct lpi_range ra, rb;
1504		-
1505		- ra = container_of(a, struct lpi_range, entry);
1506		- rb = container_of(b, struct lpi_range, entry);
1507		-
1508		- return ra->base_id - rb->base_id;
1509		-}
1510		-
1511		-static void merge_lpi_ranges(void)
1512		-{
1513		- struct lpi_range range, tmp;
1514		-
1515		- list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
1516		- if (!list_is_last(&range->entry, &lpi_range_list) &&
1517		- (tmp->base_id == (range->base_id + range->span))) {
1518		- tmp->base_id = range->base_id;
1519		- tmp->span += range->span;
1520		- list_del(&range->entry);
1521		- kfree(range);
1522		- }
1523		- }
1524	2021	}
1525	2022
1526	2023	static int alloc_lpi_range(u32 nr_lpis, u32 *base)
..	..	@@ -1552,25 +2049,49 @@
1552	2049	return err;
1553	2050	}
1554	2051
	2052	+static void merge_lpi_ranges(struct lpi_range a, struct lpi_range b)
	2053	+{
	2054	+ if (&a->entry == &lpi_range_list \|\| &b->entry == &lpi_range_list)
	2055	+ return;
	2056	+ if (a->base_id + a->span != b->base_id)
	2057	+ return;
	2058	+ b->base_id = a->base_id;
	2059	+ b->span += a->span;
	2060	+ list_del(&a->entry);
	2061	+ kfree(a);
	2062	+}
	2063	+
1555	2064	static int free_lpi_range(u32 base, u32 nr_lpis)
1556	2065	{
1557		- struct lpi_range *new;
1558		- int err = 0;
	2066	+ struct lpi_range new, old;
	2067	+
	2068	+ new = mk_lpi_range(base, nr_lpis);
	2069	+ if (!new)
	2070	+ return -ENOMEM;
1559	2071
1560	2072	mutex_lock(&lpi_range_lock);
1561	2073
1562		- new = mk_lpi_range(base, nr_lpis);
1563		- if (!new) {
1564		- err = -ENOMEM;
1565		- goto out;
	2074	+ list_for_each_entry_reverse(old, &lpi_range_list, entry) {
	2075	+ if (old->base_id < base)
	2076	+ break;
1566	2077	}
	2078	+ /*
	2079	+ * old is the last element with ->base_id smaller than base,
	2080	+ * so new goes right after it. If there are no elements with
	2081	+ * ->base_id smaller than base, &old->entry ends up pointing
	2082	+ * at the head of the list, and inserting new it the start of
	2083	+ * the list is the right thing to do in that case as well.
	2084	+ */
	2085	+ list_add(&new->entry, &old->entry);
	2086	+ /*
	2087	+ * Now check if we can merge with the preceding and/or
	2088	+ * following ranges.
	2089	+ */
	2090	+ merge_lpi_ranges(old, new);
	2091	+ merge_lpi_ranges(new, list_next_entry(new, entry));
1567	2092
1568		- list_add(&new->entry, &lpi_range_list);
1569		- list_sort(NULL, &lpi_range_list, lpi_range_cmp);
1570		- merge_lpi_ranges();
1571		-out:
1572	2093	mutex_unlock(&lpi_range_lock);
1573		- return err;
	2094	+ return 0;
1574	2095	}
1575	2096
1576	2097	static int __init its_lpi_init(u32 id_bits)
..	..	@@ -1634,6 +2155,15 @@
1634	2155	kfree(bitmap);
1635	2156	}
1636	2157
	2158	+static void gic_reset_prop_table(void *va)
	2159	+{
	2160	+ /* Priority 0xa0, Group-1, disabled */
	2161	+ memset(va, LPI_PROP_DEFAULT_PRIO \| LPI_PROP_GROUP1, LPI_PROPBASE_SZ);
	2162	+
	2163	+ /* Make sure the GIC will observe the written configuration */
	2164	+ gic_flush_dcache_to_poc(va, LPI_PROPBASE_SZ);
	2165	+}
	2166	+
1637	2167	static struct page *its_allocate_prop_table(gfp_t gfp_flags)
1638	2168	{
1639	2169	struct page *prop_page;
..	..	@@ -1644,13 +2174,7 @@
1644	2174	if (!prop_page)
1645	2175	return NULL;
1646	2176
1647		- /* Priority 0xa0, Group-1, disabled */
1648		- memset(page_address(prop_page),
1649		- LPI_PROP_DEFAULT_PRIO \| LPI_PROP_GROUP1,
1650		- LPI_PROPBASE_SZ);
1651		-
1652		- /* Make sure the GIC will observe the written configuration */
1653		- gic_flush_dcache_to_poc(page_address(prop_page), LPI_PROPBASE_SZ);
	2177	+ gic_reset_prop_table(page_address(prop_page));
1654	2178
1655	2179	return prop_page;
1656	2180	}
..	..	@@ -1661,20 +2185,74 @@
1661	2185	get_order(LPI_PROPBASE_SZ));
1662	2186	}
1663	2187
1664		-static int __init its_alloc_lpi_tables(void)
	2188	+static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size)
1665	2189	{
1666		- phys_addr_t paddr;
	2190	+ phys_addr_t start, end, addr_end;
	2191	+ u64 i;
1667	2192
1668		- lpi_id_bits = min_t(u32, GICD_TYPER_ID_BITS(gic_rdists->gicd_typer),
1669		- ITS_MAX_LPI_NRBITS);
1670		- gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT);
1671		- if (!gic_rdists->prop_page) {
1672		- pr_err("Failed to allocate PROPBASE\n");
1673		- return -ENOMEM;
	2193	+ /*
	2194	+ * We don't bother checking for a kdump kernel as by
	2195	+ * construction, the LPI tables are out of this kernel's
	2196	+ * memory map.
	2197	+ */
	2198	+ if (is_kdump_kernel())
	2199	+ return true;
	2200	+
	2201	+ addr_end = addr + size - 1;
	2202	+
	2203	+ for_each_reserved_mem_range(i, &start, &end) {
	2204	+ if (addr >= start && addr_end <= end)
	2205	+ return true;
1674	2206	}
1675	2207
1676		- paddr = page_to_phys(gic_rdists->prop_page);
1677		- pr_info("GIC: using LPI property table @%pa\n", &paddr);
	2208	+ /* Not found, not a good sign... */
	2209	+ pr_warn("GICv3: Expected reserved range [%pa:%pa], not found\n",
	2210	+ &addr, &addr_end);
	2211	+ add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
	2212	+ return false;
	2213	+}
	2214	+
	2215	+static int gic_reserve_range(phys_addr_t addr, unsigned long size)
	2216	+{
	2217	+ if (efi_enabled(EFI_CONFIG_TABLES))
	2218	+ return efi_mem_reserve_persistent(addr, size);
	2219	+
	2220	+ return 0;
	2221	+}
	2222	+
	2223	+static int __init its_setup_lpi_prop_table(void)
	2224	+{
	2225	+ if (gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED) {
	2226	+ u64 val;
	2227	+
	2228	+ val = gicr_read_propbaser(gic_data_rdist_rd_base() + GICR_PROPBASER);
	2229	+ lpi_id_bits = (val & GICR_PROPBASER_IDBITS_MASK) + 1;
	2230	+
	2231	+ gic_rdists->prop_table_pa = val & GENMASK_ULL(51, 12);
	2232	+ gic_rdists->prop_table_va = memremap(gic_rdists->prop_table_pa,
	2233	+ LPI_PROPBASE_SZ,
	2234	+ MEMREMAP_WB);
	2235	+ gic_reset_prop_table(gic_rdists->prop_table_va);
	2236	+ } else {
	2237	+ struct page *page;
	2238	+
	2239	+ lpi_id_bits = min_t(u32,
	2240	+ GICD_TYPER_ID_BITS(gic_rdists->gicd_typer),
	2241	+ ITS_MAX_LPI_NRBITS);
	2242	+ page = its_allocate_prop_table(GFP_NOWAIT);
	2243	+ if (!page) {
	2244	+ pr_err("Failed to allocate PROPBASE\n");
	2245	+ return -ENOMEM;
	2246	+ }
	2247	+
	2248	+ gic_rdists->prop_table_pa = page_to_phys(page);
	2249	+ gic_rdists->prop_table_va = page_address(page);
	2250	+ WARN_ON(gic_reserve_range(gic_rdists->prop_table_pa,
	2251	+ LPI_PROPBASE_SZ));
	2252	+ }
	2253	+
	2254	+ pr_info("GICv3: using LPI property table @%pa\n",
	2255	+ &gic_rdists->prop_table_pa);
1678	2256
1679	2257	return its_lpi_init(lpi_id_bits);
1680	2258	}
..	..	@@ -1706,18 +2284,18 @@
1706	2284	}
1707	2285
1708	2286	static int its_setup_baser(struct its_node its, struct its_baser baser,
1709		- u64 cache, u64 shr, u32 psz, u32 order,
1710		- bool indirect)
	2287	+ u64 cache, u64 shr, u32 order, bool indirect)
1711	2288	{
1712	2289	u64 val = its_read_baser(its, baser);
1713	2290	u64 esz = GITS_BASER_ENTRY_SIZE(val);
1714	2291	u64 type = GITS_BASER_TYPE(val);
1715	2292	u64 baser_phys, tmp;
1716		- u32 alloc_pages;
	2293	+ u32 alloc_pages, psz;
	2294	+ struct page *page;
1717	2295	void *base;
1718	2296	gfp_t gfp_flags;
1719	2297
1720		-retry_alloc_baser:
	2298	+ psz = baser->psz;
1721	2299	alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
1722	2300	if (alloc_pages > GITS_BASER_PAGES_MAX) {
1723	2301	pr_warn("ITS@%pa: %s too large, reduce ITS pages %u->%u\n",
..	..	@@ -1730,10 +2308,11 @@
1730	2308	gfp_flags = GFP_KERNEL \| __GFP_ZERO;
1731	2309	if (of_machine_is_compatible("rockchip,rk3568") \|\| of_machine_is_compatible("rockchip,rk3566"))
1732	2310	gfp_flags \|= GFP_DMA32;
1733		- base = (void *)__get_free_pages(gfp_flags, order);
1734		- if (!base)
	2311	+ page = alloc_pages_node(its->numa_node, gfp_flags, order);
	2312	+ if (!page)
1735	2313	return -ENOMEM;
1736	2314
	2315	+ base = (void *)page_address(page);
1737	2316	baser_phys = virt_to_phys(base);
1738	2317
1739	2318	/* Check if the physical address of the memory is above 48bits */
..	..	@@ -1776,8 +2355,10 @@
1776	2355	its_write_baser(its, baser, val);
1777	2356	tmp = baser->val;
1778	2357
1779		- if (of_machine_is_compatible("rockchip,rk3568") \|\|
1780		- of_machine_is_compatible("rockchip,rk3566")) {
	2358	+ if (IS_ENABLED(CONFIG_NO_GKI) &&
	2359	+ (of_machine_is_compatible("rockchip,rk3568") \|\|
	2360	+ of_machine_is_compatible("rockchip,rk3566") \|\|
	2361	+ of_machine_is_compatible("rockchip,rk3588"))) {
1781	2362	if (tmp & GITS_BASER_SHAREABILITY_MASK)
1782	2363	tmp &= ~GITS_BASER_SHAREABILITY_MASK;
1783	2364	else
..	..	@@ -1798,25 +2379,6 @@
1798	2379	gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order));
1799	2380	}
1800	2381	goto retry_baser;
1801		- }
1802		-
1803		- if ((val ^ tmp) & GITS_BASER_PAGE_SIZE_MASK) {
1804		- /*
1805		- * Page size didn't stick. Let's try a smaller
1806		- * size and retry. If we reach 4K, then
1807		- * something is horribly wrong...
1808		- */
1809		- free_pages((unsigned long)base, order);
1810		- baser->base = NULL;
1811		-
1812		- switch (psz) {
1813		- case SZ_16K:
1814		- psz = SZ_4K;
1815		- goto retry_alloc_baser;
1816		- case SZ_64K:
1817		- psz = SZ_16K;
1818		- goto retry_alloc_baser;
1819		- }
1820	2382	}
1821	2383
1822	2384	if (val != tmp) {
..	..	@@ -1844,13 +2406,14 @@
1844	2406
1845	2407	static bool its_parse_indirect_baser(struct its_node *its,
1846	2408	struct its_baser *baser,
1847		- u32 psz, u32 *order, u32 ids)
	2409	+ u32 *order, u32 ids)
1848	2410	{
1849	2411	u64 tmp = its_read_baser(its, baser);
1850	2412	u64 type = GITS_BASER_TYPE(tmp);
1851	2413	u64 esz = GITS_BASER_ENTRY_SIZE(tmp);
1852	2414	u64 val = GITS_BASER_InnerShareable \| GITS_BASER_RaWaWb;
1853	2415	u32 new_order = *order;
	2416	+ u32 psz = baser->psz;
1854	2417	bool indirect = false;
1855	2418
1856	2419	/* No need to enable Indirection if memory requirement < (psz2)bytes /
..	..	@@ -1886,14 +2449,73 @@
1886	2449	if (new_order >= MAX_ORDER) {
1887	2450	new_order = MAX_ORDER - 1;
1888	2451	ids = ilog2(PAGE_ORDER_TO_SIZE(new_order) / (int)esz);
1889		- pr_warn("ITS@%pa: %s Table too large, reduce ids %u->%u\n",
	2452	+ pr_warn("ITS@%pa: %s Table too large, reduce ids %llu->%u\n",
1890	2453	&its->phys_base, its_base_type_string[type],
1891		- its->device_ids, ids);
	2454	+ device_ids(its), ids);
1892	2455	}
1893	2456
1894	2457	*order = new_order;
1895	2458
1896	2459	return indirect;
	2460	+}
	2461	+
	2462	+static u32 compute_common_aff(u64 val)
	2463	+{
	2464	+ u32 aff, clpiaff;
	2465	+
	2466	+ aff = FIELD_GET(GICR_TYPER_AFFINITY, val);
	2467	+ clpiaff = FIELD_GET(GICR_TYPER_COMMON_LPI_AFF, val);
	2468	+
	2469	+ return aff & ~(GENMASK(31, 0) >> (clpiaff * 8));
	2470	+}
	2471	+
	2472	+static u32 compute_its_aff(struct its_node *its)
	2473	+{
	2474	+ u64 val;
	2475	+ u32 svpet;
	2476	+
	2477	+ /*
	2478	+ * Reencode the ITS SVPET and MPIDR as a GICR_TYPER, and compute
	2479	+ * the resulting affinity. We then use that to see if this match
	2480	+ * our own affinity.
	2481	+ */
	2482	+ svpet = FIELD_GET(GITS_TYPER_SVPET, its->typer);
	2483	+ val = FIELD_PREP(GICR_TYPER_COMMON_LPI_AFF, svpet);
	2484	+ val \|= FIELD_PREP(GICR_TYPER_AFFINITY, its->mpidr);
	2485	+ return compute_common_aff(val);
	2486	+}
	2487	+
	2488	+static struct its_node find_sibling_its(struct its_node cur_its)
	2489	+{
	2490	+ struct its_node *its;
	2491	+ u32 aff;
	2492	+
	2493	+ if (!FIELD_GET(GITS_TYPER_SVPET, cur_its->typer))
	2494	+ return NULL;
	2495	+
	2496	+ aff = compute_its_aff(cur_its);
	2497	+
	2498	+ list_for_each_entry(its, &its_nodes, entry) {
	2499	+ u64 baser;
	2500	+
	2501	+ if (!is_v4_1(its) \|\| its == cur_its)
	2502	+ continue;
	2503	+
	2504	+ if (!FIELD_GET(GITS_TYPER_SVPET, its->typer))
	2505	+ continue;
	2506	+
	2507	+ if (aff != compute_its_aff(its))
	2508	+ continue;
	2509	+
	2510	+ /* GICv4.1 guarantees that the vPE table is GITS_BASER2 */
	2511	+ baser = its->tables[2].val;
	2512	+ if (!(baser & GITS_BASER_VALID))
	2513	+ continue;
	2514	+
	2515	+ return its;
	2516	+ }
	2517	+
	2518	+ return NULL;
1897	2519	}
1898	2520
1899	2521	static void its_free_tables(struct its_node *its)
..	..	@@ -1909,11 +2531,58 @@
1909	2531	}
1910	2532	}
1911	2533
	2534	+static int its_probe_baser_psz(struct its_node its, struct its_baser baser)
	2535	+{
	2536	+ u64 psz = SZ_64K;
	2537	+
	2538	+ while (psz) {
	2539	+ u64 val, gpsz;
	2540	+
	2541	+ val = its_read_baser(its, baser);
	2542	+ val &= ~GITS_BASER_PAGE_SIZE_MASK;
	2543	+
	2544	+ switch (psz) {
	2545	+ case SZ_64K:
	2546	+ gpsz = GITS_BASER_PAGE_SIZE_64K;
	2547	+ break;
	2548	+ case SZ_16K:
	2549	+ gpsz = GITS_BASER_PAGE_SIZE_16K;
	2550	+ break;
	2551	+ case SZ_4K:
	2552	+ default:
	2553	+ gpsz = GITS_BASER_PAGE_SIZE_4K;
	2554	+ break;
	2555	+ }
	2556	+
	2557	+ gpsz >>= GITS_BASER_PAGE_SIZE_SHIFT;
	2558	+
	2559	+ val \|= FIELD_PREP(GITS_BASER_PAGE_SIZE_MASK, gpsz);
	2560	+ its_write_baser(its, baser, val);
	2561	+
	2562	+ if (FIELD_GET(GITS_BASER_PAGE_SIZE_MASK, baser->val) == gpsz)
	2563	+ break;
	2564	+
	2565	+ switch (psz) {
	2566	+ case SZ_64K:
	2567	+ psz = SZ_16K;
	2568	+ break;
	2569	+ case SZ_16K:
	2570	+ psz = SZ_4K;
	2571	+ break;
	2572	+ case SZ_4K:
	2573	+ default:
	2574	+ return -1;
	2575	+ }
	2576	+ }
	2577	+
	2578	+ baser->psz = psz;
	2579	+ return 0;
	2580	+}
	2581	+
1912	2582	static int its_alloc_tables(struct its_node *its)
1913	2583	{
1914	2584	u64 shr = GITS_BASER_InnerShareable;
1915	2585	u64 cache = GITS_BASER_RaWaWb;
1916		- u32 psz = SZ_64K;
1917	2586	int err, i;
1918	2587
1919	2588	if (its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_22375)
..	..	@@ -1924,37 +2593,337 @@
1924	2593	struct its_baser *baser = its->tables + i;
1925	2594	u64 val = its_read_baser(its, baser);
1926	2595	u64 type = GITS_BASER_TYPE(val);
1927		- u32 order = get_order(psz);
1928	2596	bool indirect = false;
	2597	+ u32 order;
1929	2598
1930		- switch (type) {
1931		- case GITS_BASER_TYPE_NONE:
	2599	+ if (type == GITS_BASER_TYPE_NONE)
1932	2600	continue;
1933	2601
	2602	+ if (its_probe_baser_psz(its, baser)) {
	2603	+ its_free_tables(its);
	2604	+ return -ENXIO;
	2605	+ }
	2606	+
	2607	+ order = get_order(baser->psz);
	2608	+
	2609	+ switch (type) {
1934	2610	case GITS_BASER_TYPE_DEVICE:
1935		- indirect = its_parse_indirect_baser(its, baser,
1936		- psz, &order,
1937		- its->device_ids);
	2611	+ indirect = its_parse_indirect_baser(its, baser, &order,
	2612	+ device_ids(its));
1938	2613	break;
1939	2614
1940	2615	case GITS_BASER_TYPE_VCPU:
1941		- indirect = its_parse_indirect_baser(its, baser,
1942		- psz, &order,
	2616	+ if (is_v4_1(its)) {
	2617	+ struct its_node *sibling;
	2618	+
	2619	+ WARN_ON(i != 2);
	2620	+ if ((sibling = find_sibling_its(its))) {
	2621	+ *baser = sibling->tables[2];
	2622	+ its_write_baser(its, baser, baser->val);
	2623	+ continue;
	2624	+ }
	2625	+ }
	2626	+
	2627	+ indirect = its_parse_indirect_baser(its, baser, &order,
1943	2628	ITS_MAX_VPEID_BITS);
1944	2629	break;
1945	2630	}
1946	2631
1947		- err = its_setup_baser(its, baser, cache, shr, psz, order, indirect);
	2632	+ err = its_setup_baser(its, baser, cache, shr, order, indirect);
1948	2633	if (err < 0) {
1949	2634	its_free_tables(its);
1950	2635	return err;
1951	2636	}
1952	2637
1953	2638	/* Update settings which will be used for next BASERn */
1954		- psz = baser->psz;
1955	2639	cache = baser->val & GITS_BASER_CACHEABILITY_MASK;
1956	2640	shr = baser->val & GITS_BASER_SHAREABILITY_MASK;
1957	2641	}
	2642	+
	2643	+ return 0;
	2644	+}
	2645	+
	2646	+static u64 inherit_vpe_l1_table_from_its(void)
	2647	+{
	2648	+ struct its_node *its;
	2649	+ u64 val;
	2650	+ u32 aff;
	2651	+
	2652	+ val = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER);
	2653	+ aff = compute_common_aff(val);
	2654	+
	2655	+ list_for_each_entry(its, &its_nodes, entry) {
	2656	+ u64 baser, addr;
	2657	+
	2658	+ if (!is_v4_1(its))
	2659	+ continue;
	2660	+
	2661	+ if (!FIELD_GET(GITS_TYPER_SVPET, its->typer))
	2662	+ continue;
	2663	+
	2664	+ if (aff != compute_its_aff(its))
	2665	+ continue;
	2666	+
	2667	+ /* GICv4.1 guarantees that the vPE table is GITS_BASER2 */
	2668	+ baser = its->tables[2].val;
	2669	+ if (!(baser & GITS_BASER_VALID))
	2670	+ continue;
	2671	+
	2672	+ /* We have a winner! */
	2673	+ gic_data_rdist()->vpe_l1_base = its->tables[2].base;
	2674	+
	2675	+ val = GICR_VPROPBASER_4_1_VALID;
	2676	+ if (baser & GITS_BASER_INDIRECT)
	2677	+ val \|= GICR_VPROPBASER_4_1_INDIRECT;
	2678	+ val \|= FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE,
	2679	+ FIELD_GET(GITS_BASER_PAGE_SIZE_MASK, baser));
	2680	+ switch (FIELD_GET(GITS_BASER_PAGE_SIZE_MASK, baser)) {
	2681	+ case GIC_PAGE_SIZE_64K:
	2682	+ addr = GITS_BASER_ADDR_48_to_52(baser);
	2683	+ break;
	2684	+ default:
	2685	+ addr = baser & GENMASK_ULL(47, 12);
	2686	+ break;
	2687	+ }
	2688	+ val \|= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, addr >> 12);
	2689	+ val \|= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK,
	2690	+ FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser));
	2691	+ val \|= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK,
	2692	+ FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser));
	2693	+ val \|= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, GITS_BASER_NR_PAGES(baser) - 1);
	2694	+
	2695	+ return val;
	2696	+ }
	2697	+
	2698	+ return 0;
	2699	+}
	2700	+
	2701	+static u64 inherit_vpe_l1_table_from_rd(cpumask_t **mask)
	2702	+{
	2703	+ u32 aff;
	2704	+ u64 val;
	2705	+ int cpu;
	2706	+
	2707	+ val = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER);
	2708	+ aff = compute_common_aff(val);
	2709	+
	2710	+ for_each_possible_cpu(cpu) {
	2711	+ void __iomem *base = gic_data_rdist_cpu(cpu)->rd_base;
	2712	+
	2713	+ if (!base \|\| cpu == smp_processor_id())
	2714	+ continue;
	2715	+
	2716	+ val = gic_read_typer(base + GICR_TYPER);
	2717	+ if (aff != compute_common_aff(val))
	2718	+ continue;
	2719	+
	2720	+ /*
	2721	+ * At this point, we have a victim. This particular CPU
	2722	+ * has already booted, and has an affinity that matches
	2723	+ * ours wrt CommonLPIAff. Let's use its own VPROPBASER.
	2724	+ * Make sure we don't write the Z bit in that case.
	2725	+ */
	2726	+ val = gicr_read_vpropbaser(base + SZ_128K + GICR_VPROPBASER);
	2727	+ val &= ~GICR_VPROPBASER_4_1_Z;
	2728	+
	2729	+ gic_data_rdist()->vpe_l1_base = gic_data_rdist_cpu(cpu)->vpe_l1_base;
	2730	+ *mask = gic_data_rdist_cpu(cpu)->vpe_table_mask;
	2731	+
	2732	+ return val;
	2733	+ }
	2734	+
	2735	+ return 0;
	2736	+}
	2737	+
	2738	+static bool allocate_vpe_l2_table(int cpu, u32 id)
	2739	+{
	2740	+ void __iomem *base = gic_data_rdist_cpu(cpu)->rd_base;
	2741	+ unsigned int psz, esz, idx, npg, gpsz;
	2742	+ u64 val;
	2743	+ struct page *page;
	2744	+ __le64 *table;
	2745	+
	2746	+ if (!gic_rdists->has_rvpeid)
	2747	+ return true;
	2748	+
	2749	+ /* Skip non-present CPUs */
	2750	+ if (!base)
	2751	+ return true;
	2752	+
	2753	+ val = gicr_read_vpropbaser(base + SZ_128K + GICR_VPROPBASER);
	2754	+
	2755	+ esz = FIELD_GET(GICR_VPROPBASER_4_1_ENTRY_SIZE, val) + 1;
	2756	+ gpsz = FIELD_GET(GICR_VPROPBASER_4_1_PAGE_SIZE, val);
	2757	+ npg = FIELD_GET(GICR_VPROPBASER_4_1_SIZE, val) + 1;
	2758	+
	2759	+ switch (gpsz) {
	2760	+ default:
	2761	+ WARN_ON(1);
	2762	+ fallthrough;
	2763	+ case GIC_PAGE_SIZE_4K:
	2764	+ psz = SZ_4K;
	2765	+ break;
	2766	+ case GIC_PAGE_SIZE_16K:
	2767	+ psz = SZ_16K;
	2768	+ break;
	2769	+ case GIC_PAGE_SIZE_64K:
	2770	+ psz = SZ_64K;
	2771	+ break;
	2772	+ }
	2773	+
	2774	+ /* Don't allow vpe_id that exceeds single, flat table limit */
	2775	+ if (!(val & GICR_VPROPBASER_4_1_INDIRECT))
	2776	+ return (id < (npg * psz / (esz * SZ_8)));
	2777	+
	2778	+ /* Compute 1st level table index & check if that exceeds table limit */
	2779	+ idx = id >> ilog2(psz / (esz * SZ_8));
	2780	+ if (idx >= (npg * psz / GITS_LVL1_ENTRY_SIZE))
	2781	+ return false;
	2782	+
	2783	+ table = gic_data_rdist_cpu(cpu)->vpe_l1_base;
	2784	+
	2785	+ /* Allocate memory for 2nd level table */
	2786	+ if (!table[idx]) {
	2787	+ page = alloc_pages(GFP_KERNEL \| __GFP_ZERO, get_order(psz));
	2788	+ if (!page)
	2789	+ return false;
	2790	+
	2791	+ /* Flush Lvl2 table to PoC if hw doesn't support coherency */
	2792	+ if (!(val & GICR_VPROPBASER_SHAREABILITY_MASK))
	2793	+ gic_flush_dcache_to_poc(page_address(page), psz);
	2794	+
	2795	+ table[idx] = cpu_to_le64(page_to_phys(page) \| GITS_BASER_VALID);
	2796	+
	2797	+ /* Flush Lvl1 entry to PoC if hw doesn't support coherency */
	2798	+ if (!(val & GICR_VPROPBASER_SHAREABILITY_MASK))
	2799	+ gic_flush_dcache_to_poc(table + idx, GITS_LVL1_ENTRY_SIZE);
	2800	+
	2801	+ /* Ensure updated table contents are visible to RD hardware */
	2802	+ dsb(sy);
	2803	+ }
	2804	+
	2805	+ return true;
	2806	+}
	2807	+
	2808	+static int allocate_vpe_l1_table(void)
	2809	+{
	2810	+ void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
	2811	+ u64 val, gpsz, npg, pa;
	2812	+ unsigned int psz = SZ_64K;
	2813	+ unsigned int np, epp, esz;
	2814	+ struct page *page;
	2815	+
	2816	+ if (!gic_rdists->has_rvpeid)
	2817	+ return 0;
	2818	+
	2819	+ /*
	2820	+ * if VPENDBASER.Valid is set, disable any previously programmed
	2821	+ * VPE by setting PendingLast while clearing Valid. This has the
	2822	+ * effect of making sure no doorbell will be generated and we can
	2823	+ * then safely clear VPROPBASER.Valid.
	2824	+ */
	2825	+ if (gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER) & GICR_VPENDBASER_Valid)
	2826	+ gicr_write_vpendbaser(GICR_VPENDBASER_PendingLast,
	2827	+ vlpi_base + GICR_VPENDBASER);
	2828	+
	2829	+ /*
	2830	+ * If we can inherit the configuration from another RD, let's do
	2831	+ * so. Otherwise, we have to go through the allocation process. We
	2832	+ * assume that all RDs have the exact same requirements, as
	2833	+ * nothing will work otherwise.
	2834	+ */
	2835	+ val = inherit_vpe_l1_table_from_rd(&gic_data_rdist()->vpe_table_mask);
	2836	+ if (val & GICR_VPROPBASER_4_1_VALID)
	2837	+ goto out;
	2838	+
	2839	+ gic_data_rdist()->vpe_table_mask = kzalloc(sizeof(cpumask_t), GFP_ATOMIC);
	2840	+ if (!gic_data_rdist()->vpe_table_mask)
	2841	+ return -ENOMEM;
	2842	+
	2843	+ val = inherit_vpe_l1_table_from_its();
	2844	+ if (val & GICR_VPROPBASER_4_1_VALID)
	2845	+ goto out;
	2846	+
	2847	+ /* First probe the page size */
	2848	+ val = FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE, GIC_PAGE_SIZE_64K);
	2849	+ gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
	2850	+ val = gicr_read_vpropbaser(vlpi_base + GICR_VPROPBASER);
	2851	+ gpsz = FIELD_GET(GICR_VPROPBASER_4_1_PAGE_SIZE, val);
	2852	+ esz = FIELD_GET(GICR_VPROPBASER_4_1_ENTRY_SIZE, val);
	2853	+
	2854	+ switch (gpsz) {
	2855	+ default:
	2856	+ gpsz = GIC_PAGE_SIZE_4K;
	2857	+ fallthrough;
	2858	+ case GIC_PAGE_SIZE_4K:
	2859	+ psz = SZ_4K;
	2860	+ break;
	2861	+ case GIC_PAGE_SIZE_16K:
	2862	+ psz = SZ_16K;
	2863	+ break;
	2864	+ case GIC_PAGE_SIZE_64K:
	2865	+ psz = SZ_64K;
	2866	+ break;
	2867	+ }
	2868	+
	2869	+ /*
	2870	+ * Start populating the register from scratch, including RO fields
	2871	+ * (which we want to print in debug cases...)
	2872	+ */
	2873	+ val = 0;
	2874	+ val \|= FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE, gpsz);
	2875	+ val \|= FIELD_PREP(GICR_VPROPBASER_4_1_ENTRY_SIZE, esz);
	2876	+
	2877	+ /* How many entries per GIC page? */
	2878	+ esz++;
	2879	+ epp = psz / (esz * SZ_8);
	2880	+
	2881	+ /*
	2882	+ * If we need more than just a single L1 page, flag the table
	2883	+ * as indirect and compute the number of required L1 pages.
	2884	+ */
	2885	+ if (epp < ITS_MAX_VPEID) {
	2886	+ int nl2;
	2887	+
	2888	+ val \|= GICR_VPROPBASER_4_1_INDIRECT;
	2889	+
	2890	+ /* Number of L2 pages required to cover the VPEID space */
	2891	+ nl2 = DIV_ROUND_UP(ITS_MAX_VPEID, epp);
	2892	+
	2893	+ /* Number of L1 pages to point to the L2 pages */
	2894	+ npg = DIV_ROUND_UP(nl2 * SZ_8, psz);
	2895	+ } else {
	2896	+ npg = 1;
	2897	+ }
	2898	+
	2899	+ val \|= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, npg - 1);
	2900	+
	2901	+ /* Right, that's the number of CPU pages we need for L1 */
	2902	+ np = DIV_ROUND_UP(npg * psz, PAGE_SIZE);
	2903	+
	2904	+ pr_debug("np = %d, npg = %lld, psz = %d, epp = %d, esz = %d\n",
	2905	+ np, npg, psz, epp, esz);
	2906	+ page = alloc_pages(GFP_ATOMIC \| __GFP_ZERO, get_order(np * PAGE_SIZE));
	2907	+ if (!page)
	2908	+ return -ENOMEM;
	2909	+
	2910	+ gic_data_rdist()->vpe_l1_base = page_address(page);
	2911	+ pa = virt_to_phys(page_address(page));
	2912	+ WARN_ON(!IS_ALIGNED(pa, psz));
	2913	+
	2914	+ val \|= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, pa >> 12);
	2915	+ val \|= GICR_VPROPBASER_RaWb;
	2916	+ val \|= GICR_VPROPBASER_InnerShareable;
	2917	+ val \|= GICR_VPROPBASER_4_1_Z;
	2918	+ val \|= GICR_VPROPBASER_4_1_VALID;
	2919	+
	2920	+out:
	2921	+ gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
	2922	+ cpumask_set_cpu(smp_processor_id(), gic_data_rdist()->vpe_table_mask);
	2923	+
	2924	+ pr_debug("CPU%d: VPROPBASER = %llx %*pbl\n",
	2925	+ smp_processor_id(), val,
	2926	+ cpumask_pr_args(gic_data_rdist()->vpe_table_mask));
1958	2927
1959	2928	return 0;
1960	2929	}
..	..	@@ -1977,14 +2946,11 @@
1977	2946	static struct page *its_allocate_pending_table(gfp_t gfp_flags)
1978	2947	{
1979	2948	struct page *pend_page;
1980		- /*
1981		- * The pending pages have to be at least 64kB aligned,
1982		- * hence the 'max(LPI_PENDBASE_SZ, SZ_64K)' below.
1983		- */
	2949	+
1984	2950	if (of_machine_is_compatible("rockchip,rk3568") \|\| of_machine_is_compatible("rockchip,rk3566"))
1985	2951	gfp_flags \|= GFP_DMA32;
1986	2952	pend_page = alloc_pages(gfp_flags \| __GFP_ZERO,
1987		- get_order(max_t(u32, LPI_PENDBASE_SZ, SZ_64K)));
	2953	+ get_order(LPI_PENDBASE_SZ));
1988	2954	if (!pend_page)
1989	2955	return NULL;
1990	2956
..	..	@@ -1996,22 +2962,73 @@
1996	2962
1997	2963	static void its_free_pending_table(struct page *pt)
1998	2964	{
1999		- free_pages((unsigned long)page_address(pt),
2000		- get_order(max_t(u32, LPI_PENDBASE_SZ, SZ_64K)));
	2965	+ free_pages((unsigned long)page_address(pt), get_order(LPI_PENDBASE_SZ));
2001	2966	}
2002	2967
2003		-static u64 its_clear_vpend_valid(void __iomem *vlpi_base)
	2968	+/*
	2969	+ * Booting with kdump and LPIs enabled is generally fine. Any other
	2970	+ * case is wrong in the absence of firmware/EFI support.
	2971	+ */
	2972	+static bool enabled_lpis_allowed(void)
	2973	+{
	2974	+ phys_addr_t addr;
	2975	+ u64 val;
	2976	+
	2977	+ /* Check whether the property table is in a reserved region */
	2978	+ val = gicr_read_propbaser(gic_data_rdist_rd_base() + GICR_PROPBASER);
	2979	+ addr = val & GENMASK_ULL(51, 12);
	2980	+
	2981	+ return gic_check_reserved_range(addr, LPI_PROPBASE_SZ);
	2982	+}
	2983	+
	2984	+static int __init allocate_lpi_tables(void)
	2985	+{
	2986	+ u64 val;
	2987	+ int err, cpu;
	2988	+
	2989	+ /*
	2990	+ * If LPIs are enabled while we run this from the boot CPU,
	2991	+ * flag the RD tables as pre-allocated if the stars do align.
	2992	+ */
	2993	+ val = readl_relaxed(gic_data_rdist_rd_base() + GICR_CTLR);
	2994	+ if ((val & GICR_CTLR_ENABLE_LPIS) && enabled_lpis_allowed()) {
	2995	+ gic_rdists->flags \|= (RDIST_FLAGS_RD_TABLES_PREALLOCATED \|
	2996	+ RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING);
	2997	+ pr_info("GICv3: Using preallocated redistributor tables\n");
	2998	+ }
	2999	+
	3000	+ err = its_setup_lpi_prop_table();
	3001	+ if (err)
	3002	+ return err;
	3003	+
	3004	+ /*
	3005	+ * We allocate all the pending tables anyway, as we may have a
	3006	+ * mix of RDs that have had LPIs enabled, and some that
	3007	+ * don't. We'll free the unused ones as each CPU comes online.
	3008	+ */
	3009	+ for_each_possible_cpu(cpu) {
	3010	+ struct page *pend_page;
	3011	+
	3012	+ pend_page = its_allocate_pending_table(GFP_NOWAIT);
	3013	+ if (!pend_page) {
	3014	+ pr_err("Failed to allocate PENDBASE for CPU%d\n", cpu);
	3015	+ return -ENOMEM;
	3016	+ }
	3017	+
	3018	+ gic_data_rdist_cpu(cpu)->pend_page = pend_page;
	3019	+ }
	3020	+
	3021	+ return 0;
	3022	+}
	3023	+
	3024	+static u64 read_vpend_dirty_clear(void __iomem *vlpi_base)
2004	3025	{
2005	3026	u32 count = 1000000; /* 1s! */
2006	3027	bool clean;
2007	3028	u64 val;
2008	3029
2009		- val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
2010		- val &= ~GICR_VPENDBASER_Valid;
2011		- gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
2012		-
2013	3030	do {
2014		- val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
	3031	+ val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
2015	3032	clean = !(val & GICR_VPENDBASER_Dirty);
2016	3033	if (!clean) {
2017	3034	count--;
..	..	@@ -2020,6 +3037,27 @@
2020	3037	}
2021	3038	} while (!clean && count);
2022	3039
	3040	+ if (unlikely(!clean))
	3041	+ pr_err_ratelimited("ITS virtual pending table not cleaning\n");
	3042	+
	3043	+ return val;
	3044	+}
	3045	+
	3046	+static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set)
	3047	+{
	3048	+ u64 val;
	3049	+
	3050	+ /* Make sure we wait until the RD is done with the initial scan */
	3051	+ val = read_vpend_dirty_clear(vlpi_base);
	3052	+ val &= ~GICR_VPENDBASER_Valid;
	3053	+ val &= ~clr;
	3054	+ val \|= set;
	3055	+ gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
	3056	+
	3057	+ val = read_vpend_dirty_clear(vlpi_base);
	3058	+ if (unlikely(val & GICR_VPENDBASER_Dirty))
	3059	+ val \|= GICR_VPENDBASER_PendingLast;
	3060	+
2023	3061	return val;
2024	3062	}
2025	3063
..	..	@@ -2027,28 +3065,40 @@
2027	3065	{
2028	3066	void __iomem *rbase = gic_data_rdist_rd_base();
2029	3067	struct page *pend_page;
	3068	+ phys_addr_t paddr;
2030	3069	u64 val, tmp;
2031	3070
2032		- /* If we didn't allocate the pending table yet, do it now */
2033		- pend_page = gic_data_rdist()->pend_page;
2034		- if (!pend_page) {
2035		- phys_addr_t paddr;
	3071	+ if (gic_data_rdist()->lpi_enabled)
	3072	+ return;
2036	3073
2037		- pend_page = its_allocate_pending_table(GFP_NOWAIT);
2038		- if (!pend_page) {
2039		- pr_err("Failed to allocate PENDBASE for CPU%d\n",
2040		- smp_processor_id());
2041		- return;
2042		- }
	3074	+ val = readl_relaxed(rbase + GICR_CTLR);
	3075	+ if ((gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED) &&
	3076	+ (val & GICR_CTLR_ENABLE_LPIS)) {
	3077	+ /*
	3078	+ * Check that we get the same property table on all
	3079	+ * RDs. If we don't, this is hopeless.
	3080	+ */
	3081	+ paddr = gicr_read_propbaser(rbase + GICR_PROPBASER);
	3082	+ paddr &= GENMASK_ULL(51, 12);
	3083	+ if (WARN_ON(gic_rdists->prop_table_pa != paddr))
	3084	+ add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
2043	3085
2044		- paddr = page_to_phys(pend_page);
2045		- pr_info("CPU%d: using LPI pending table @%pa\n",
2046		- smp_processor_id(), &paddr);
2047		- gic_data_rdist()->pend_page = pend_page;
	3086	+ paddr = gicr_read_pendbaser(rbase + GICR_PENDBASER);
	3087	+ paddr &= GENMASK_ULL(51, 16);
	3088	+
	3089	+ WARN_ON(!gic_check_reserved_range(paddr, LPI_PENDBASE_SZ));
	3090	+ its_free_pending_table(gic_data_rdist()->pend_page);
	3091	+ gic_data_rdist()->pend_page = NULL;
	3092	+
	3093	+ goto out;
2048	3094	}
2049	3095
	3096	+ pend_page = gic_data_rdist()->pend_page;
	3097	+ paddr = page_to_phys(pend_page);
	3098	+ WARN_ON(gic_reserve_range(paddr, LPI_PENDBASE_SZ));
	3099	+
2050	3100	/* set PROPBASE */
2051		- val = (page_to_phys(gic_rdists->prop_page) \|
	3101	+ val = (gic_rdists->prop_table_pa \|
2052	3102	GICR_PROPBASER_InnerShareable \|
2053	3103	GICR_PROPBASER_RaWaWb \|
2054	3104	((LPI_NRBITS - 1) & GICR_PROPBASER_IDBITS_MASK));
..	..	@@ -2056,7 +3106,10 @@
2056	3106	gicr_write_propbaser(val, rbase + GICR_PROPBASER);
2057	3107	tmp = gicr_read_propbaser(rbase + GICR_PROPBASER);
2058	3108
2059		- if (of_machine_is_compatible("rockchip,rk3568") \|\| of_machine_is_compatible("rockchip,rk3566"))
	3109	+ if (IS_ENABLED(CONFIG_NO_GKI) &&
	3110	+ (of_machine_is_compatible("rockchip,rk3568") \|\|
	3111	+ of_machine_is_compatible("rockchip,rk3566") \|\|
	3112	+ of_machine_is_compatible("rockchip,rk3588")))
2060	3113	tmp &= ~GICR_PROPBASER_SHAREABILITY_MASK;
2061	3114
2062	3115	if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) {
..	..	@@ -2083,7 +3136,10 @@
2083	3136	gicr_write_pendbaser(val, rbase + GICR_PENDBASER);
2084	3137	tmp = gicr_read_pendbaser(rbase + GICR_PENDBASER);
2085	3138
2086		- if (of_machine_is_compatible("rockchip,rk3568") \|\| of_machine_is_compatible("rockchip,rk3566"))
	3139	+ if (IS_ENABLED(CONFIG_NO_GKI) &&
	3140	+ (of_machine_is_compatible("rockchip,rk3568") \|\|
	3141	+ of_machine_is_compatible("rockchip,rk3566") \|\|
	3142	+ of_machine_is_compatible("rockchip,rk3588")))
2087	3143	tmp &= ~GICR_PENDBASER_SHAREABILITY_MASK;
2088	3144
2089	3145	if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) {
..	..	@@ -2102,7 +3158,7 @@
2102	3158	val \|= GICR_CTLR_ENABLE_LPIS;
2103	3159	writel_relaxed(val, rbase + GICR_CTLR);
2104	3160
2105		- if (gic_rdists->has_vlpis) {
	3161	+ if (gic_rdists->has_vlpis && !gic_rdists->has_rvpeid) {
2106	3162	void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
2107	3163
2108	3164	/*
..	..	@@ -2115,19 +3171,34 @@
2115	3171	val = (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK;
2116	3172	pr_debug("GICv4: CPU%d: Init IDbits to 0x%llx for GICR_VPROPBASER\n",
2117	3173	smp_processor_id(), val);
2118		- gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
	3174	+ gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
2119	3175
2120	3176	/*
2121	3177	* Also clear Valid bit of GICR_VPENDBASER, in case some
2122	3178	* ancient programming gets left in and has possibility of
2123	3179	* corrupting memory.
2124	3180	*/
2125		- val = its_clear_vpend_valid(vlpi_base);
2126		- WARN_ON(val & GICR_VPENDBASER_Dirty);
	3181	+ val = its_clear_vpend_valid(vlpi_base, 0, 0);
	3182	+ }
	3183	+
	3184	+ if (allocate_vpe_l1_table()) {
	3185	+ /*
	3186	+ * If the allocation has failed, we're in massive trouble.
	3187	+ * Disable direct injection, and pray that no VM was
	3188	+ * already running...
	3189	+ */
	3190	+ gic_rdists->has_rvpeid = false;
	3191	+ gic_rdists->has_vlpis = false;
2127	3192	}
2128	3193
2129	3194	/* Make sure the GIC has seen the above */
2130	3195	dsb(sy);
	3196	+out:
	3197	+ gic_data_rdist()->lpi_enabled = true;
	3198	+ pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n",
	3199	+ smp_processor_id(),
	3200	+ gic_data_rdist()->pend_page ? "allocated" : "reserved",
	3201	+ &paddr);
2131	3202	}
2132	3203
2133	3204	static void its_cpu_init_collection(struct its_node *its)
..	..	@@ -2212,7 +3283,8 @@
2212	3283	return NULL;
2213	3284	}
2214	3285
2215		-static bool its_alloc_table_entry(struct its_baser *baser, u32 id)
	3286	+static bool its_alloc_table_entry(struct its_node *its,
	3287	+ struct its_baser *baser, u32 id)
2216	3288	{
2217	3289	struct page *page;
2218	3290	u32 esz, idx;
..	..	@@ -2236,7 +3308,8 @@
2236	3308
2237	3309	if (of_machine_is_compatible("rockchip,rk3568") \|\| of_machine_is_compatible("rockchip,rk3566"))
2238	3310	gfp_flags \|= GFP_DMA32;
2239		- page = alloc_pages(gfp_flags, get_order(baser->psz));
	3311	+ page = alloc_pages_node(its->numa_node, gfp_flags,
	3312	+ get_order(baser->psz));
2240	3313	if (!page)
2241	3314	return false;
2242	3315
..	..	@@ -2265,14 +3338,15 @@
2265	3338
2266	3339	/* Don't allow device id that exceeds ITS hardware limit */
2267	3340	if (!baser)
2268		- return (ilog2(dev_id) < its->device_ids);
	3341	+ return (ilog2(dev_id) < device_ids(its));
2269	3342
2270		- return its_alloc_table_entry(baser, dev_id);
	3343	+ return its_alloc_table_entry(its, baser, dev_id);
2271	3344	}
2272	3345
2273	3346	static bool its_alloc_vpe_table(u32 vpe_id)
2274	3347	{
2275	3348	struct its_node *its;
	3349	+ int cpu;
2276	3350
2277	3351	/*
2278	3352	* Make sure the L2 tables are allocated on all v4 ITSs. We
..	..	@@ -2284,14 +3358,27 @@
2284	3358	list_for_each_entry(its, &its_nodes, entry) {
2285	3359	struct its_baser *baser;
2286	3360
2287		- if (!its->is_v4)
	3361	+ if (!is_v4(its))
2288	3362	continue;
2289	3363
2290	3364	baser = its_get_baser(its, GITS_BASER_TYPE_VCPU);
2291	3365	if (!baser)
2292	3366	return false;
2293	3367
2294		- if (!its_alloc_table_entry(baser, vpe_id))
	3368	+ if (!its_alloc_table_entry(its, baser, vpe_id))
	3369	+ return false;
	3370	+ }
	3371	+
	3372	+ /* Non v4.1? No need to iterate RDs and go back early. */
	3373	+ if (!gic_rdists->has_rvpeid)
	3374	+ return true;
	3375	+
	3376	+ /*
	3377	+ * Make sure the L2 tables are allocated for all copies of
	3378	+ * the L1 table on all v4.1 RDs.
	3379	+ */
	3380	+ for_each_possible_cpu(cpu) {
	3381	+ if (!allocate_vpe_l2_table(cpu, vpe_id))
2295	3382	return false;
2296	3383	}
2297	3384
..	..	@@ -2324,12 +3411,16 @@
2324	3411	* sized as a power of two (and you need at least one bit...).
2325	3412	*/
2326	3413	nr_ites = max(2, nvecs);
2327		- sz = nr_ites * its->ite_size;
	3414	+ sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1);
2328	3415	sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
2329	3416	gfp_flags = GFP_KERNEL;
2330		- if (of_machine_is_compatible("rockchip,rk3568") \|\| of_machine_is_compatible("rockchip,rk3566"))
	3417	+ if (of_machine_is_compatible("rockchip,rk3568") \|\| of_machine_is_compatible("rockchip,rk3566")) {
2331	3418	gfp_flags \|= GFP_DMA32;
2332		- itt = (void *)__get_free_pages(gfp_flags, get_order(sz));
	3419	+ itt = (void *)__get_free_pages(gfp_flags, get_order(sz));
	3420	+ } else {
	3421	+ itt = kzalloc_node(sz, gfp_flags, its->numa_node);
	3422	+ }
	3423	+
2333	3424	if (alloc_lpis) {
2334	3425	lpi_map = its_lpi_alloc(nvecs, &lpi_base, &nr_lpis);
2335	3426	if (lpi_map)
..	..	@@ -2343,7 +3434,13 @@
2343	3434
2344	3435	if (!dev \|\| !itt \|\| !col_map \|\| (!lpi_map && alloc_lpis)) {
2345	3436	kfree(dev);
2346		- free_pages((unsigned long)itt, get_order(sz));
	3437	+
	3438	+ if (of_machine_is_compatible("rockchip,rk3568") \|\|
	3439	+ of_machine_is_compatible("rockchip,rk3566"))
	3440	+ free_pages((unsigned long)itt, get_order(sz));
	3441	+ else
	3442	+ kfree(itt);
	3443	+
2347	3444	kfree(lpi_map);
2348	3445	kfree(col_map);
2349	3446	return NULL;
..	..	@@ -2359,7 +3456,7 @@
2359	3456	dev->event_map.col_map = col_map;
2360	3457	dev->event_map.lpi_base = lpi_base;
2361	3458	dev->event_map.nr_lpis = nr_lpis;
2362		- mutex_init(&dev->event_map.vlpi_lock);
	3459	+ raw_spin_lock_init(&dev->event_map.vlpi_lock);
2363	3460	dev->device_id = dev_id;
2364	3461	INIT_LIST_HEAD(&dev->entry);
2365	3462
..	..	@@ -2380,7 +3477,14 @@
2380	3477	raw_spin_lock_irqsave(&its_dev->its->lock, flags);
2381	3478	list_del(&its_dev->entry);
2382	3479	raw_spin_unlock_irqrestore(&its_dev->its->lock, flags);
2383		- free_pages((unsigned long)its_dev->itt, get_order(its_dev->itt_sz));
	3480	+ kfree(its_dev->event_map.col_map);
	3481	+
	3482	+ if (of_machine_is_compatible("rockchip,rk3568") \|\|
	3483	+ of_machine_is_compatible("rockchip,rk3566"))
	3484	+ free_pages((unsigned long)its_dev->itt, get_order(its_dev->itt_sz));
	3485	+ else
	3486	+ kfree(its_dev->itt);
	3487	+
2384	3488	kfree(its_dev);
2385	3489	}
2386	3490
..	..	@@ -2388,6 +3492,7 @@
2388	3492	{
2389	3493	int idx;
2390	3494
	3495	+ /* Find a free LPI region in lpi_map and allocate them. */
2391	3496	idx = bitmap_find_free_region(dev->event_map.lpi_map,
2392	3497	dev->event_map.nr_lpis,
2393	3498	get_count_order(nvecs));
..	..	@@ -2395,7 +3500,6 @@
2395	3500	return -ENOSPC;
2396	3501
2397	3502	*hwirq = dev->event_map.lpi_base + idx;
2398		- set_bit(idx, dev->event_map.lpi_map);
2399	3503
2400	3504	return 0;
2401	3505	}
..	..	@@ -2410,7 +3514,7 @@
2410	3514	int err = 0;
2411	3515
2412	3516	/*
2413		- * We ignore "dev" entierely, and rely on the dev_id that has
	3517	+ * We ignore "dev" entirely, and rely on the dev_id that has
2414	3518	* been passed via the scratchpad. This limits this domain's
2415	3519	* usefulness to upper layers that definitely know that they
2416	3520	* are built on top of the ITS.
..	..	@@ -2489,12 +3593,17 @@
2489	3593	{
2490	3594	msi_alloc_info_t *info = args;
2491	3595	struct its_device *its_dev = info->scratchpad[0].ptr;
	3596	+ struct its_node *its = its_dev->its;
2492	3597	struct irq_data *irqd;
2493	3598	irq_hw_number_t hwirq;
2494	3599	int err;
2495	3600	int i;
2496	3601
2497	3602	err = its_alloc_device_irq(its_dev, nr_irqs, &hwirq);
	3603	+ if (err)
	3604	+ return err;
	3605	+
	3606	+ err = iommu_dma_prepare_msi(info->desc, its->get_msi_base(its_dev));
2498	3607	if (err)
2499	3608	return err;
2500	3609
..	..	@@ -2521,22 +3630,13 @@
2521	3630	{
2522	3631	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
2523	3632	u32 event = its_get_event_id(d);
2524		- const struct cpumask *cpu_mask = cpu_online_mask;
2525	3633	int cpu;
2526	3634
2527		- /* get the cpu_mask of local node */
2528		- if (its_dev->its->numa_node >= 0)
2529		- cpu_mask = cpumask_of_node(its_dev->its->numa_node);
	3635	+ cpu = its_select_cpu(d, cpu_online_mask);
	3636	+ if (cpu < 0 \|\| cpu >= nr_cpu_ids)
	3637	+ return -EINVAL;
2530	3638
2531		- /* Bind the LPI to the first possible CPU */
2532		- cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
2533		- if (cpu >= nr_cpu_ids) {
2534		- if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144)
2535		- return -EINVAL;
2536		-
2537		- cpu = cpumask_first(cpu_online_mask);
2538		- }
2539		-
	3639	+ its_inc_lpi_count(d, cpu);
2540	3640	its_dev->event_map.col_map[event] = cpu;
2541	3641	irq_data_update_effective_affinity(d, cpumask_of(cpu));
2542	3642
..	..	@@ -2551,6 +3651,7 @@
2551	3651	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
2552	3652	u32 event = its_get_event_id(d);
2553	3653
	3654	+ its_dec_lpi_count(d, its_dev->event_map.col_map[event]);
2554	3655	/* Stop the delivery of interrupts */
2555	3656	its_send_discard(its_dev, event);
2556	3657	}
..	..	@@ -2586,7 +3687,6 @@
2586	3687	its_lpi_free(its_dev->event_map.lpi_map,
2587	3688	its_dev->event_map.lpi_base,
2588	3689	its_dev->event_map.nr_lpis);
2589		- kfree(its_dev->event_map.col_map);
2590	3690
2591	3691	/* Unmap device/itt */
2592	3692	its_send_mapd(its_dev, 0);
..	..	@@ -2608,7 +3708,7 @@
2608	3708	/*
2609	3709	* This is insane.
2610	3710	*
2611		- * If a GICv4 doesn't implement Direct LPIs (which is extremely
	3711	+ * If a GICv4.0 doesn't implement Direct LPIs (which is extremely
2612	3712	* likely), the only way to perform an invalidate is to use a fake
2613	3713	* device to issue an INV command, implying that the LPI has first
2614	3714	* been mapped to some event on that device. Since this is not exactly
..	..	@@ -2616,9 +3716,20 @@
2616	3716	* only issue an UNMAP if we're short on available slots.
2617	3717	*
2618	3718	* Broken by design(tm).
	3719	+ *
	3720	+ * GICv4.1, on the other hand, mandates that we're able to invalidate
	3721	+ * by writing to a MMIO register. It doesn't implement the whole of
	3722	+ * DirectLPI, but that's good enough. And most of the time, we don't
	3723	+ * even have to invalidate anything, as the redistributor can be told
	3724	+ * whether to generate a doorbell or not (we thus leave it enabled,
	3725	+ * always).
2619	3726	*/
2620	3727	static void its_vpe_db_proxy_unmap_locked(struct its_vpe *vpe)
2621	3728	{
	3729	+ /* GICv4.1 doesn't use a proxy, so nothing to do here */
	3730	+ if (gic_rdists->has_rvpeid)
	3731	+ return;
	3732	+
2622	3733	/* Already unmapped? */
2623	3734	if (vpe->vpe_proxy_event == -1)
2624	3735	return;
..	..	@@ -2641,6 +3752,10 @@
2641	3752
2642	3753	static void its_vpe_db_proxy_unmap(struct its_vpe *vpe)
2643	3754	{
	3755	+ /* GICv4.1 doesn't use a proxy, so nothing to do here */
	3756	+ if (gic_rdists->has_rvpeid)
	3757	+ return;
	3758	+
2644	3759	if (!gic_rdists->has_direct_lpi) {
2645	3760	unsigned long flags;
2646	3761
..	..	@@ -2652,6 +3767,10 @@
2652	3767
2653	3768	static void its_vpe_db_proxy_map_locked(struct its_vpe *vpe)
2654	3769	{
	3770	+ /* GICv4.1 doesn't use a proxy, so nothing to do here */
	3771	+ if (gic_rdists->has_rvpeid)
	3772	+ return;
	3773	+
2655	3774	/* Already mapped? */
2656	3775	if (vpe->vpe_proxy_event != -1)
2657	3776	return;
..	..	@@ -2674,13 +3793,16 @@
2674	3793	unsigned long flags;
2675	3794	struct its_collection *target_col;
2676	3795
	3796	+ /* GICv4.1 doesn't use a proxy, so nothing to do here */
	3797	+ if (gic_rdists->has_rvpeid)
	3798	+ return;
	3799	+
2677	3800	if (gic_rdists->has_direct_lpi) {
2678	3801	void __iomem *rdbase;
2679	3802
2680	3803	rdbase = per_cpu_ptr(gic_rdists->rdist, from)->rd_base;
2681	3804	gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR);
2682		- while (gic_read_lpir(rdbase + GICR_SYNCR) & 1)
2683		- cpu_relax();
	3805	+ wait_for_syncr(rdbase);
2684	3806
2685	3807	return;
2686	3808	}
..	..	@@ -2701,25 +3823,58 @@
2701	3823	bool force)
2702	3824	{
2703	3825	struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
2704		- int cpu = cpumask_first(mask_val);
	3826	+ int from, cpu = cpumask_first(mask_val);
	3827	+ unsigned long flags;
2705	3828
2706	3829	/*
2707	3830	* Changing affinity is mega expensive, so let's be as lazy as
2708	3831	* we can and only do it if we really have to. Also, if mapped
2709	3832	* into the proxy device, we need to move the doorbell
2710	3833	* interrupt to its new location.
	3834	+ *
	3835	+ * Another thing is that changing the affinity of a vPE affects
	3836	+ * other interrupts such as all the vLPIs that are routed to
	3837	+ * this vPE. This means that the irq_desc lock is not enough to
	3838	+ * protect us, and that we must ensure nobody samples vpe->col_idx
	3839	+ * during the update, hence the lock below which must also be
	3840	+ * taken on any vLPI handling path that evaluates vpe->col_idx.
2711	3841	*/
2712		- if (vpe->col_idx != cpu) {
2713		- int from = vpe->col_idx;
	3842	+ from = vpe_to_cpuid_lock(vpe, &flags);
	3843	+ if (from == cpu)
	3844	+ goto out;
2714	3845
2715		- vpe->col_idx = cpu;
2716		- its_send_vmovp(vpe);
2717		- its_vpe_db_proxy_move(vpe, from, cpu);
2718		- }
	3846	+ vpe->col_idx = cpu;
2719	3847
	3848	+ /*
	3849	+ * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD
	3850	+ * is sharing its VPE table with the current one.
	3851	+ */
	3852	+ if (gic_data_rdist_cpu(cpu)->vpe_table_mask &&
	3853	+ cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask))
	3854	+ goto out;
	3855	+
	3856	+ its_send_vmovp(vpe);
	3857	+ its_vpe_db_proxy_move(vpe, from, cpu);
	3858	+
	3859	+out:
2720	3860	irq_data_update_effective_affinity(d, cpumask_of(cpu));
	3861	+ vpe_to_cpuid_unlock(vpe, flags);
2721	3862
2722	3863	return IRQ_SET_MASK_OK_DONE;
	3864	+}
	3865	+
	3866	+static void its_wait_vpt_parse_complete(void)
	3867	+{
	3868	+ void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
	3869	+ u64 val;
	3870	+
	3871	+ if (!gic_rdists->has_vpend_valid_dirty)
	3872	+ return;
	3873	+
	3874	+ WARN_ON_ONCE(readq_relaxed_poll_timeout_atomic(vlpi_base + GICR_VPENDBASER,
	3875	+ val,
	3876	+ !(val & GICR_VPENDBASER_Dirty),
	3877	+ 10, 500));
2723	3878	}
2724	3879
2725	3880	static void its_vpe_schedule(struct its_vpe *vpe)
..	..	@@ -2733,12 +3888,12 @@
2733	3888	val \|= (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK;
2734	3889	val \|= GICR_VPROPBASER_RaWb;
2735	3890	val \|= GICR_VPROPBASER_InnerShareable;
2736		- gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
	3891	+ gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
2737	3892
2738	3893	val = virt_to_phys(page_address(vpe->vpt_page)) &
2739	3894	GENMASK_ULL(51, 16);
2740	3895	val \|= GICR_VPENDBASER_RaWaWb;
2741		- val \|= GICR_VPENDBASER_NonShareable;
	3896	+ val \|= GICR_VPENDBASER_InnerShareable;
2742	3897	/*
2743	3898	* There is no good way of finding out if the pending table is
2744	3899	* empty as we can race against the doorbell interrupt very
..	..	@@ -2751,7 +3906,7 @@
2751	3906	val \|= GICR_VPENDBASER_PendingLast;
2752	3907	val \|= vpe->idai ? GICR_VPENDBASER_IDAI : 0;
2753	3908	val \|= GICR_VPENDBASER_Valid;
2754		- gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
	3909	+ gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
2755	3910	}
2756	3911
2757	3912	static void its_vpe_deschedule(struct its_vpe *vpe)
..	..	@@ -2759,16 +3914,10 @@
2759	3914	void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
2760	3915	u64 val;
2761	3916
2762		- val = its_clear_vpend_valid(vlpi_base);
	3917	+ val = its_clear_vpend_valid(vlpi_base, 0, 0);
2763	3918
2764		- if (unlikely(val & GICR_VPENDBASER_Dirty)) {
2765		- pr_err_ratelimited("ITS virtual pending table not cleaning\n");
2766		- vpe->idai = false;
2767		- vpe->pending_last = true;
2768		- } else {
2769		- vpe->idai = !!(val & GICR_VPENDBASER_IDAI);
2770		- vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
2771		- }
	3919	+ vpe->idai = !!(val & GICR_VPENDBASER_IDAI);
	3920	+ vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
2772	3921	}
2773	3922
2774	3923	static void its_vpe_invall(struct its_vpe *vpe)
..	..	@@ -2776,7 +3925,7 @@
2776	3925	struct its_node *its;
2777	3926
2778	3927	list_for_each_entry(its, &its_nodes, entry) {
2779		- if (!its->is_v4)
	3928	+ if (!is_v4(its))
2780	3929	continue;
2781	3930
2782	3931	if (its_list_map && !vpe->its_vm->vlpi_count[its->list_nr])
..	..	@@ -2803,6 +3952,10 @@
2803	3952
2804	3953	case DESCHEDULE_VPE:
2805	3954	its_vpe_deschedule(vpe);
	3955	+ return 0;
	3956	+
	3957	+ case COMMIT_VPE:
	3958	+ its_wait_vpt_parse_complete();
2806	3959	return 0;
2807	3960
2808	3961	case INVALL_VPE:
..	..	@@ -2834,10 +3987,12 @@
2834	3987	if (gic_rdists->has_direct_lpi) {
2835	3988	void __iomem *rdbase;
2836	3989
	3990	+ /* Target the redistributor this VPE is currently known on */
	3991	+ raw_spin_lock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock);
2837	3992	rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
2838		- gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_INVLPIR);
2839		- while (gic_read_lpir(rdbase + GICR_SYNCR) & 1)
2840		- cpu_relax();
	3993	+ gic_write_lpir(d->parent_data->hwirq, rdbase + GICR_INVLPIR);
	3994	+ wait_for_syncr(rdbase);
	3995	+ raw_spin_unlock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock);
2841	3996	} else {
2842	3997	its_vpe_send_cmd(vpe, its_send_inv);
2843	3998	}
..	..	@@ -2879,8 +4034,7 @@
2879	4034	gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_SETLPIR);
2880	4035	} else {
2881	4036	gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR);
2882		- while (gic_read_lpir(rdbase + GICR_SYNCR) & 1)
2883		- cpu_relax();
	4037	+ wait_for_syncr(rdbase);
2884	4038	}
2885	4039	} else {
2886	4040	if (state)
..	..	@@ -2906,6 +4060,375 @@
2906	4060	.irq_retrigger = its_vpe_retrigger,
2907	4061	.irq_set_irqchip_state = its_vpe_set_irqchip_state,
2908	4062	.irq_set_vcpu_affinity = its_vpe_set_vcpu_affinity,
	4063	+};
	4064	+
	4065	+static struct its_node *find_4_1_its(void)
	4066	+{
	4067	+ static struct its_node *its = NULL;
	4068	+
	4069	+ if (!its) {
	4070	+ list_for_each_entry(its, &its_nodes, entry) {
	4071	+ if (is_v4_1(its))
	4072	+ return its;
	4073	+ }
	4074	+
	4075	+ /* Oops? */
	4076	+ its = NULL;
	4077	+ }
	4078	+
	4079	+ return its;
	4080	+}
	4081	+
	4082	+static void its_vpe_4_1_send_inv(struct irq_data *d)
	4083	+{
	4084	+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
	4085	+ struct its_node *its;
	4086	+
	4087	+ /*
	4088	+ * GICv4.1 wants doorbells to be invalidated using the
	4089	+ * INVDB command in order to be broadcast to all RDs. Send
	4090	+ * it to the first valid ITS, and let the HW do its magic.
	4091	+ */
	4092	+ its = find_4_1_its();
	4093	+ if (its)
	4094	+ its_send_invdb(its, vpe);
	4095	+}
	4096	+
	4097	+static void its_vpe_4_1_mask_irq(struct irq_data *d)
	4098	+{
	4099	+ lpi_write_config(d->parent_data, LPI_PROP_ENABLED, 0);
	4100	+ its_vpe_4_1_send_inv(d);
	4101	+}
	4102	+
	4103	+static void its_vpe_4_1_unmask_irq(struct irq_data *d)
	4104	+{
	4105	+ lpi_write_config(d->parent_data, 0, LPI_PROP_ENABLED);
	4106	+ its_vpe_4_1_send_inv(d);
	4107	+}
	4108	+
	4109	+static void its_vpe_4_1_schedule(struct its_vpe *vpe,
	4110	+ struct its_cmd_info *info)
	4111	+{
	4112	+ void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
	4113	+ u64 val = 0;
	4114	+
	4115	+ /* Schedule the VPE */
	4116	+ val \|= GICR_VPENDBASER_Valid;
	4117	+ val \|= info->g0en ? GICR_VPENDBASER_4_1_VGRP0EN : 0;
	4118	+ val \|= info->g1en ? GICR_VPENDBASER_4_1_VGRP1EN : 0;
	4119	+ val \|= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id);
	4120	+
	4121	+ gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
	4122	+}
	4123	+
	4124	+static void its_vpe_4_1_deschedule(struct its_vpe *vpe,
	4125	+ struct its_cmd_info *info)
	4126	+{
	4127	+ void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
	4128	+ u64 val;
	4129	+
	4130	+ if (info->req_db) {
	4131	+ unsigned long flags;
	4132	+
	4133	+ /*
	4134	+ * vPE is going to block: make the vPE non-resident with
	4135	+ * PendingLast clear and DB set. The GIC guarantees that if
	4136	+ * we read-back PendingLast clear, then a doorbell will be
	4137	+ * delivered when an interrupt comes.
	4138	+ *
	4139	+ * Note the locking to deal with the concurrent update of
	4140	+ * pending_last from the doorbell interrupt handler that can
	4141	+ * run concurrently.
	4142	+ */
	4143	+ raw_spin_lock_irqsave(&vpe->vpe_lock, flags);
	4144	+ val = its_clear_vpend_valid(vlpi_base,
	4145	+ GICR_VPENDBASER_PendingLast,
	4146	+ GICR_VPENDBASER_4_1_DB);
	4147	+ vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
	4148	+ raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);
	4149	+ } else {
	4150	+ /*
	4151	+ * We're not blocking, so just make the vPE non-resident
	4152	+ * with PendingLast set, indicating that we'll be back.
	4153	+ */
	4154	+ val = its_clear_vpend_valid(vlpi_base,
	4155	+ 0,
	4156	+ GICR_VPENDBASER_PendingLast);
	4157	+ vpe->pending_last = true;
	4158	+ }
	4159	+}
	4160	+
	4161	+static void its_vpe_4_1_invall(struct its_vpe *vpe)
	4162	+{
	4163	+ void __iomem *rdbase;
	4164	+ unsigned long flags;
	4165	+ u64 val;
	4166	+ int cpu;
	4167	+
	4168	+ val = GICR_INVALLR_V;
	4169	+ val \|= FIELD_PREP(GICR_INVALLR_VPEID, vpe->vpe_id);
	4170	+
	4171	+ /* Target the redistributor this vPE is currently known on */
	4172	+ cpu = vpe_to_cpuid_lock(vpe, &flags);
	4173	+ raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
	4174	+ rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
	4175	+ gic_write_lpir(val, rdbase + GICR_INVALLR);
	4176	+
	4177	+ wait_for_syncr(rdbase);
	4178	+ raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
	4179	+ vpe_to_cpuid_unlock(vpe, flags);
	4180	+}
	4181	+
	4182	+static int its_vpe_4_1_set_vcpu_affinity(struct irq_data d, void vcpu_info)
	4183	+{
	4184	+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
	4185	+ struct its_cmd_info *info = vcpu_info;
	4186	+
	4187	+ switch (info->cmd_type) {
	4188	+ case SCHEDULE_VPE:
	4189	+ its_vpe_4_1_schedule(vpe, info);
	4190	+ return 0;
	4191	+
	4192	+ case DESCHEDULE_VPE:
	4193	+ its_vpe_4_1_deschedule(vpe, info);
	4194	+ return 0;
	4195	+
	4196	+ case COMMIT_VPE:
	4197	+ its_wait_vpt_parse_complete();
	4198	+ return 0;
	4199	+
	4200	+ case INVALL_VPE:
	4201	+ its_vpe_4_1_invall(vpe);
	4202	+ return 0;
	4203	+
	4204	+ default:
	4205	+ return -EINVAL;
	4206	+ }
	4207	+}
	4208	+
	4209	+static struct irq_chip its_vpe_4_1_irq_chip = {
	4210	+ .name = "GICv4.1-vpe",
	4211	+ .irq_mask = its_vpe_4_1_mask_irq,
	4212	+ .irq_unmask = its_vpe_4_1_unmask_irq,
	4213	+ .irq_eoi = irq_chip_eoi_parent,
	4214	+ .irq_set_affinity = its_vpe_set_affinity,
	4215	+ .irq_set_vcpu_affinity = its_vpe_4_1_set_vcpu_affinity,
	4216	+};
	4217	+
	4218	+static void its_configure_sgi(struct irq_data *d, bool clear)
	4219	+{
	4220	+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
	4221	+ struct its_cmd_desc desc;
	4222	+
	4223	+ desc.its_vsgi_cmd.vpe = vpe;
	4224	+ desc.its_vsgi_cmd.sgi = d->hwirq;
	4225	+ desc.its_vsgi_cmd.priority = vpe->sgi_config[d->hwirq].priority;
	4226	+ desc.its_vsgi_cmd.enable = vpe->sgi_config[d->hwirq].enabled;
	4227	+ desc.its_vsgi_cmd.group = vpe->sgi_config[d->hwirq].group;
	4228	+ desc.its_vsgi_cmd.clear = clear;
	4229	+
	4230	+ /*
	4231	+ * GICv4.1 allows us to send VSGI commands to any ITS as long as the
	4232	+ * destination VPE is mapped there. Since we map them eagerly at
	4233	+ * activation time, we're pretty sure the first GICv4.1 ITS will do.
	4234	+ */
	4235	+ its_send_single_vcommand(find_4_1_its(), its_build_vsgi_cmd, &desc);
	4236	+}
	4237	+
	4238	+static void its_sgi_mask_irq(struct irq_data *d)
	4239	+{
	4240	+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
	4241	+
	4242	+ vpe->sgi_config[d->hwirq].enabled = false;
	4243	+ its_configure_sgi(d, false);
	4244	+}
	4245	+
	4246	+static void its_sgi_unmask_irq(struct irq_data *d)
	4247	+{
	4248	+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
	4249	+
	4250	+ vpe->sgi_config[d->hwirq].enabled = true;
	4251	+ its_configure_sgi(d, false);
	4252	+}
	4253	+
	4254	+static int its_sgi_set_affinity(struct irq_data *d,
	4255	+ const struct cpumask *mask_val,
	4256	+ bool force)
	4257	+{
	4258	+ /*
	4259	+ * There is no notion of affinity for virtual SGIs, at least
	4260	+ * not on the host (since they can only be targetting a vPE).
	4261	+ * Tell the kernel we've done whatever it asked for.
	4262	+ */
	4263	+ irq_data_update_effective_affinity(d, mask_val);
	4264	+ return IRQ_SET_MASK_OK;
	4265	+}
	4266	+
	4267	+static int its_sgi_set_irqchip_state(struct irq_data *d,
	4268	+ enum irqchip_irq_state which,
	4269	+ bool state)
	4270	+{
	4271	+ if (which != IRQCHIP_STATE_PENDING)
	4272	+ return -EINVAL;
	4273	+
	4274	+ if (state) {
	4275	+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
	4276	+ struct its_node *its = find_4_1_its();
	4277	+ u64 val;
	4278	+
	4279	+ val = FIELD_PREP(GITS_SGIR_VPEID, vpe->vpe_id);
	4280	+ val \|= FIELD_PREP(GITS_SGIR_VINTID, d->hwirq);
	4281	+ writeq_relaxed(val, its->sgir_base + GITS_SGIR - SZ_128K);
	4282	+ } else {
	4283	+ its_configure_sgi(d, true);
	4284	+ }
	4285	+
	4286	+ return 0;
	4287	+}
	4288	+
	4289	+static int its_sgi_get_irqchip_state(struct irq_data *d,
	4290	+ enum irqchip_irq_state which, bool *val)
	4291	+{
	4292	+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
	4293	+ void __iomem *base;
	4294	+ unsigned long flags;
	4295	+ u32 count = 1000000; /* 1s! */
	4296	+ u32 status;
	4297	+ int cpu;
	4298	+
	4299	+ if (which != IRQCHIP_STATE_PENDING)
	4300	+ return -EINVAL;
	4301	+
	4302	+ /*
	4303	+ * Locking galore! We can race against two different events:
	4304	+ *
	4305	+ * - Concurent vPE affinity change: we must make sure it cannot
	4306	+ * happen, or we'll talk to the wrong redistributor. This is
	4307	+ * identical to what happens with vLPIs.
	4308	+ *
	4309	+ * - Concurrent VSGIPENDR access: As it involves accessing two
	4310	+ * MMIO registers, this must be made atomic one way or another.
	4311	+ */
	4312	+ cpu = vpe_to_cpuid_lock(vpe, &flags);
	4313	+ raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
	4314	+ base = gic_data_rdist_cpu(cpu)->rd_base + SZ_128K;
	4315	+ writel_relaxed(vpe->vpe_id, base + GICR_VSGIR);
	4316	+ do {
	4317	+ status = readl_relaxed(base + GICR_VSGIPENDR);
	4318	+ if (!(status & GICR_VSGIPENDR_BUSY))
	4319	+ goto out;
	4320	+
	4321	+ count--;
	4322	+ if (!count) {
	4323	+ pr_err_ratelimited("Unable to get SGI status\n");
	4324	+ goto out;
	4325	+ }
	4326	+ cpu_relax();
	4327	+ udelay(1);
	4328	+ } while (count);
	4329	+
	4330	+out:
	4331	+ raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
	4332	+ vpe_to_cpuid_unlock(vpe, flags);
	4333	+
	4334	+ if (!count)
	4335	+ return -ENXIO;
	4336	+
	4337	+ *val = !!(status & (1 << d->hwirq));
	4338	+
	4339	+ return 0;
	4340	+}
	4341	+
	4342	+static int its_sgi_set_vcpu_affinity(struct irq_data d, void vcpu_info)
	4343	+{
	4344	+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
	4345	+ struct its_cmd_info *info = vcpu_info;
	4346	+
	4347	+ switch (info->cmd_type) {
	4348	+ case PROP_UPDATE_VSGI:
	4349	+ vpe->sgi_config[d->hwirq].priority = info->priority;
	4350	+ vpe->sgi_config[d->hwirq].group = info->group;
	4351	+ its_configure_sgi(d, false);
	4352	+ return 0;
	4353	+
	4354	+ default:
	4355	+ return -EINVAL;
	4356	+ }
	4357	+}
	4358	+
	4359	+static struct irq_chip its_sgi_irq_chip = {
	4360	+ .name = "GICv4.1-sgi",
	4361	+ .irq_mask = its_sgi_mask_irq,
	4362	+ .irq_unmask = its_sgi_unmask_irq,
	4363	+ .irq_set_affinity = its_sgi_set_affinity,
	4364	+ .irq_set_irqchip_state = its_sgi_set_irqchip_state,
	4365	+ .irq_get_irqchip_state = its_sgi_get_irqchip_state,
	4366	+ .irq_set_vcpu_affinity = its_sgi_set_vcpu_affinity,
	4367	+};
	4368	+
	4369	+static int its_sgi_irq_domain_alloc(struct irq_domain *domain,
	4370	+ unsigned int virq, unsigned int nr_irqs,
	4371	+ void *args)
	4372	+{
	4373	+ struct its_vpe *vpe = args;
	4374	+ int i;
	4375	+
	4376	+ /* Yes, we do want 16 SGIs */
	4377	+ WARN_ON(nr_irqs != 16);
	4378	+
	4379	+ for (i = 0; i < 16; i++) {
	4380	+ vpe->sgi_config[i].priority = 0;
	4381	+ vpe->sgi_config[i].enabled = false;
	4382	+ vpe->sgi_config[i].group = false;
	4383	+
	4384	+ irq_domain_set_hwirq_and_chip(domain, virq + i, i,
	4385	+ &its_sgi_irq_chip, vpe);
	4386	+ irq_set_status_flags(virq + i, IRQ_DISABLE_UNLAZY);
	4387	+ }
	4388	+
	4389	+ return 0;
	4390	+}
	4391	+
	4392	+static void its_sgi_irq_domain_free(struct irq_domain *domain,
	4393	+ unsigned int virq,
	4394	+ unsigned int nr_irqs)
	4395	+{
	4396	+ /* Nothing to do */
	4397	+}
	4398	+
	4399	+static int its_sgi_irq_domain_activate(struct irq_domain *domain,
	4400	+ struct irq_data *d, bool reserve)
	4401	+{
	4402	+ /* Write out the initial SGI configuration */
	4403	+ its_configure_sgi(d, false);
	4404	+ return 0;
	4405	+}
	4406	+
	4407	+static void its_sgi_irq_domain_deactivate(struct irq_domain *domain,
	4408	+ struct irq_data *d)
	4409	+{
	4410	+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
	4411	+
	4412	+ /*
	4413	+ * The VSGI command is awkward:
	4414	+ *
	4415	+ * - To change the configuration, CLEAR must be set to false,
	4416	+ * leaving the pending bit unchanged.
	4417	+ * - To clear the pending bit, CLEAR must be set to true, leaving
	4418	+ * the configuration unchanged.
	4419	+ *
	4420	+ * You just can't do both at once, hence the two commands below.
	4421	+ */
	4422	+ vpe->sgi_config[d->hwirq].enabled = false;
	4423	+ its_configure_sgi(d, false);
	4424	+ its_configure_sgi(d, true);
	4425	+}
	4426	+
	4427	+static const struct irq_domain_ops its_sgi_domain_ops = {
	4428	+ .alloc = its_sgi_irq_domain_alloc,
	4429	+ .free = its_sgi_irq_domain_free,
	4430	+ .activate = its_sgi_irq_domain_activate,
	4431	+ .deactivate = its_sgi_irq_domain_deactivate,
2909	4432	};
2910	4433
2911	4434	static int its_vpe_id_alloc(void)
..	..	@@ -2941,9 +4464,13 @@
2941	4464	return -ENOMEM;
2942	4465	}
2943	4466
	4467	+ raw_spin_lock_init(&vpe->vpe_lock);
2944	4468	vpe->vpe_id = vpe_id;
2945	4469	vpe->vpt_page = vpt_page;
2946		- vpe->vpe_proxy_event = -1;
	4470	+ if (gic_rdists->has_rvpeid)
	4471	+ atomic_set(&vpe->vmapp_count, 0);
	4472	+ else
	4473	+ vpe->vpe_proxy_event = -1;
2947	4474
2948	4475	return 0;
2949	4476	}
..	..	@@ -2985,6 +4512,7 @@
2985	4512	static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
2986	4513	unsigned int nr_irqs, void *args)
2987	4514	{
	4515	+ struct irq_chip *irqchip = &its_vpe_irq_chip;
2988	4516	struct its_vm *vm = args;
2989	4517	unsigned long *bitmap;
2990	4518	struct page *vprop_page;
..	..	@@ -3012,6 +4540,9 @@
3012	4540	vm->nr_db_lpis = nr_ids;
3013	4541	vm->vprop_page = vprop_page;
3014	4542
	4543	+ if (gic_rdists->has_rvpeid)
	4544	+ irqchip = &its_vpe_4_1_irq_chip;
	4545	+
3015	4546	for (i = 0; i < nr_irqs; i++) {
3016	4547	vm->vpes[i]->vpe_db_lpi = base + i;
3017	4548	err = its_vpe_init(vm->vpes[i]);
..	..	@@ -3022,7 +4553,7 @@
3022	4553	if (err)
3023	4554	break;
3024	4555	irq_domain_set_hwirq_and_chip(domain, virq + i, i,
3025		- &its_vpe_irq_chip, vm->vpes[i]);
	4556	+ irqchip, vm->vpes[i]);
3026	4557	set_bit(i, bitmap);
3027	4558	}
3028	4559
..	..	@@ -3043,15 +4574,19 @@
3043	4574	struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
3044	4575	struct its_node *its;
3045	4576
3046		- /* If we use the list map, we issue VMAPP on demand... */
3047		- if (its_list_map)
	4577	+ /*
	4578	+ * If we use the list map, we issue VMAPP on demand... Unless
	4579	+ * we're on a GICv4.1 and we eagerly map the VPE on all ITSs
	4580	+ * so that VSGIs can work.
	4581	+ */
	4582	+ if (!gic_requires_eager_mapping())
3048	4583	return 0;
3049	4584
3050	4585	/* Map the VPE to the first possible CPU */
3051	4586	vpe->col_idx = cpumask_first(cpu_online_mask);
3052	4587
3053	4588	list_for_each_entry(its, &its_nodes, entry) {
3054		- if (!its->is_v4)
	4589	+ if (!is_v4(its))
3055	4590	continue;
3056	4591
3057	4592	its_send_vmapp(its, vpe, true);
..	..	@@ -3070,14 +4605,14 @@
3070	4605	struct its_node *its;
3071	4606
3072	4607	/*
3073		- * If we use the list map, we unmap the VPE once no VLPIs are
3074		- * associated with the VM.
	4608	+ * If we use the list map on GICv4.0, we unmap the VPE once no
	4609	+ * VLPIs are associated with the VM.
3075	4610	*/
3076		- if (its_list_map)
	4611	+ if (!gic_requires_eager_mapping())
3077	4612	return;
3078	4613
3079	4614	list_for_each_entry(its, &its_nodes, entry) {
3080		- if (!its->is_v4)
	4615	+ if (!is_v4(its))
3081	4616	continue;
3082	4617
3083	4618	its_send_vmapp(its, vpe, false);
..	..	@@ -3128,8 +4663,9 @@
3128	4663	{
3129	4664	struct its_node *its = data;
3130	4665
3131		- /* erratum 22375: only alloc 8MB table size */
3132		- its->device_ids = 0x14; /* 20 bits, 8MB */
	4666	+ /* erratum 22375: only alloc 8MB table size (20 bits) */
	4667	+ its->typer &= ~GITS_TYPER_DEVBITS;
	4668	+ its->typer \|= FIELD_PREP(GITS_TYPER_DEVBITS, 20 - 1);
3133	4669	its->flags \|= ITS_FLAGS_WORKAROUND_CAVIUM_22375;
3134	4670
3135	4671	return true;
..	..	@@ -3149,7 +4685,8 @@
3149	4685	struct its_node *its = data;
3150	4686
3151	4687	/* On QDF2400, the size of the ITE is 16Bytes */
3152		- its->ite_size = 16;
	4688	+ its->typer &= ~GITS_TYPER_ITT_ENTRY_SIZE;
	4689	+ its->typer \|= FIELD_PREP(GITS_TYPER_ITT_ENTRY_SIZE, 16 - 1);
3153	4690
3154	4691	return true;
3155	4692	}
..	..	@@ -3183,8 +4720,10 @@
3183	4720	its->get_msi_base = its_irq_get_msi_base_pre_its;
3184	4721
3185	4722	ids = ilog2(pre_its_window[1]) - 2;
3186		- if (its->device_ids > ids)
3187		- its->device_ids = ids;
	4723	+ if (device_ids(its) > ids) {
	4724	+ its->typer &= ~GITS_TYPER_DEVBITS;
	4725	+ its->typer \|= FIELD_PREP(GITS_TYPER_DEVBITS, ids - 1);
	4726	+ }
3188	4727
3189	4728	/* the pre-ITS breaks isolation, so disable MSI remapping */
3190	4729	its->msi_domain_flags &= ~IRQ_DOMAIN_FLAG_MSI_REMAP;
..	..	@@ -3411,7 +4950,7 @@
3411	4950	}
3412	4951
3413	4952	/* Use the last possible DevID */
3414		- devid = GENMASK(its->device_ids - 1, 0);
	4953	+ devid = GENMASK(device_ids(its) - 1, 0);
3415	4954	vpe_proxy.dev = its_create_device(its, devid, entries, false);
3416	4955	if (!vpe_proxy.dev) {
3417	4956	kfree(vpe_proxy.vpes);
..	..	@@ -3474,10 +5013,11 @@
3474	5013	void __iomem *its_base;
3475	5014	u32 val, ctlr;
3476	5015	u64 baser, tmp, typer;
	5016	+ struct page *page;
3477	5017	int err;
3478	5018	gfp_t gfp_flags;
3479	5019
3480		- its_base = ioremap(res->start, resource_size(res));
	5020	+ its_base = ioremap(res->start, SZ_64K);
3481	5021	if (!its_base) {
3482	5022	pr_warn("ITS@%pa: Unable to map ITS registers\n", &res->start);
3483	5023	return -ENOMEM;
..	..	@@ -3509,12 +5049,10 @@
3509	5049	INIT_LIST_HEAD(&its->entry);
3510	5050	INIT_LIST_HEAD(&its->its_device_list);
3511	5051	typer = gic_read_typer(its_base + GITS_TYPER);
	5052	+ its->typer = typer;
3512	5053	its->base = its_base;
3513	5054	its->phys_base = res->start;
3514		- its->ite_size = GITS_TYPER_ITT_ENTRY_SIZE(typer);
3515		- its->device_ids = GITS_TYPER_DEVBITS(typer);
3516		- its->is_v4 = !!(typer & GITS_TYPER_VLPIS);
3517		- if (its->is_v4) {
	5055	+ if (is_v4(its)) {
3518	5056	if (!(typer & GITS_TYPER_VMOVP)) {
3519	5057	err = its_compute_its_list_map(res, its_base);
3520	5058	if (err < 0)
..	..	@@ -3527,6 +5065,21 @@
3527	5065	} else {
3528	5066	pr_info("ITS@%pa: Single VMOVP capable\n", &res->start);
3529	5067	}
	5068	+
	5069	+ if (is_v4_1(its)) {
	5070	+ u32 svpet = FIELD_GET(GITS_TYPER_SVPET, typer);
	5071	+
	5072	+ its->sgir_base = ioremap(res->start + SZ_128K, SZ_64K);
	5073	+ if (!its->sgir_base) {
	5074	+ err = -ENOMEM;
	5075	+ goto out_free_its;
	5076	+ }
	5077	+
	5078	+ its->mpidr = readl_relaxed(its_base + GITS_MPIDR);
	5079	+
	5080	+ pr_info("ITS@%pa: Using GICv4.1 mode %08x %08x\n",
	5081	+ &res->start, its->mpidr, svpet);
	5082	+ }
3530	5083	}
3531	5084
3532	5085	its->numa_node = numa_node;
..	..	@@ -3534,12 +5087,13 @@
3534	5087	gfp_flags = GFP_KERNEL \| __GFP_ZERO;
3535	5088	if (of_machine_is_compatible("rockchip,rk3568") \|\| of_machine_is_compatible("rockchip,rk3566"))
3536	5089	gfp_flags \|= GFP_DMA32;
3537		- its->cmd_base = (void *)__get_free_pages(gfp_flags,
3538		- get_order(ITS_CMD_QUEUE_SZ));
3539		- if (!its->cmd_base) {
	5090	+ page = alloc_pages_node(its->numa_node, gfp_flags,
	5091	+ get_order(ITS_CMD_QUEUE_SZ));
	5092	+ if (!page) {
3540	5093	err = -ENOMEM;
3541		- goto out_free_its;
	5094	+ goto out_unmap_sgir;
3542	5095	}
	5096	+ its->cmd_base = (void *)page_address(page);
3543	5097	its->cmd_write = its->cmd_base;
3544	5098	its->fwnode_handle = handle;
3545	5099	its->get_msi_base = its_irq_get_msi_base;
..	..	@@ -3564,7 +5118,10 @@
3564	5118	gits_write_cbaser(baser, its->base + GITS_CBASER);
3565	5119	tmp = gits_read_cbaser(its->base + GITS_CBASER);
3566	5120
3567		- if (of_machine_is_compatible("rockchip,rk3568") \|\| of_machine_is_compatible("rockchip,rk3566"))
	5121	+ if (IS_ENABLED(CONFIG_NO_GKI) &&
	5122	+ (of_machine_is_compatible("rockchip,rk3568") \|\|
	5123	+ of_machine_is_compatible("rockchip,rk3566") \|\|
	5124	+ of_machine_is_compatible("rockchip,rk3588")))
3568	5125	tmp &= ~GITS_CBASER_SHAREABILITY_MASK;
3569	5126
3570	5127	if ((tmp ^ baser) & GITS_CBASER_SHAREABILITY_MASK) {
..	..	@@ -3586,7 +5143,7 @@
3586	5143	gits_write_cwriter(0, its->base + GITS_CWRITER);
3587	5144	ctlr = readl_relaxed(its->base + GITS_CTLR);
3588	5145	ctlr \|= GITS_CTLR_ENABLE;
3589		- if (its->is_v4)
	5146	+ if (is_v4(its))
3590	5147	ctlr \|= GITS_CTLR_ImDe;
3591	5148	writel_relaxed(ctlr, its->base + GITS_CTLR);
3592	5149
..	..	@@ -3604,6 +5161,9 @@
3604	5161	its_free_tables(its);
3605	5162	out_free_cmd:
3606	5163	free_pages((unsigned long)its->cmd_base, get_order(ITS_CMD_QUEUE_SZ));
	5164	+out_unmap_sgir:
	5165	+ if (its->sgir_base)
	5166	+ iounmap(its->sgir_base);
3607	5167	out_free_its:
3608	5168	kfree(its);
3609	5169	out_unmap:
..	..	@@ -3623,16 +5183,6 @@
3623	5183	u64 timeout = USEC_PER_SEC;
3624	5184	u64 val;
3625	5185
3626		- /*
3627		- * If coming via a CPU hotplug event, we don't need to disable
3628		- * LPIs before trying to re-enable them. They are already
3629		- * configured and all is well in the world. Detect this case
3630		- * by checking the allocation of the pending table for the
3631		- * current CPU.
3632		- */
3633		- if (gic_data_rdist()->pend_page)
3634		- return 0;
3635		-
3636	5186	if (!gic_rdists_supports_plpis()) {
3637	5187	pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
3638	5188	return -ENXIO;
..	..	@@ -3642,7 +5192,21 @@
3642	5192	if (!(val & GICR_CTLR_ENABLE_LPIS))
3643	5193	return 0;
3644	5194
3645		- pr_warn("CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
	5195	+ /*
	5196	+ * If coming via a CPU hotplug event, we don't need to disable
	5197	+ * LPIs before trying to re-enable them. They are already
	5198	+ * configured and all is well in the world.
	5199	+ *
	5200	+ * If running with preallocated tables, there is nothing to do.
	5201	+ */
	5202	+ if (gic_data_rdist()->lpi_enabled \|\|
	5203	+ (gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED))
	5204	+ return 0;
	5205	+
	5206	+ /*
	5207	+ * From that point on, we only try to do some damage control.
	5208	+ */
	5209	+ pr_warn("GICv3: CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
3646	5210	smp_processor_id());
3647	5211	add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
3648	5212
..	..	@@ -3753,13 +5317,13 @@
3753	5317	return NUMA_NO_NODE;
3754	5318	}
3755	5319
3756		-static int __init gic_acpi_match_srat_its(struct acpi_subtable_header *header,
	5320	+static int __init gic_acpi_match_srat_its(union acpi_subtable_headers *header,
3757	5321	const unsigned long end)
3758	5322	{
3759	5323	return 0;
3760	5324	}
3761	5325
3762		-static int __init gic_acpi_parse_srat_its(struct acpi_subtable_header *header,
	5326	+static int __init gic_acpi_parse_srat_its(union acpi_subtable_headers *header,
3763	5327	const unsigned long end)
3764	5328	{
3765	5329	int node;
..	..	@@ -3775,7 +5339,12 @@
3775	5339	return -EINVAL;
3776	5340	}
3777	5341
3778		- node = acpi_map_pxm_to_node(its_affinity->proximity_domain);
	5342	+ /*
	5343	+ * Note that in theory a new proximity node could be created by this
	5344	+ * entry as it is an SRAT resource allocation structure.
	5345	+ * We do not currently support doing so.
	5346	+ */
	5347	+ node = pxm_to_node(its_affinity->proximity_domain);
3779	5348
3780	5349	if (node == NUMA_NO_NODE \|\| node >= MAX_NUMNODES) {
3781	5350	pr_err("SRAT: Invalid NUMA node %d in ITS affinity\n", node);
..	..	@@ -3826,7 +5395,7 @@
3826	5395	static void __init acpi_its_srat_maps_free(void) { }
3827	5396	#endif
3828	5397
3829		-static int __init gic_acpi_parse_madt_its(struct acpi_subtable_header *header,
	5398	+static int __init gic_acpi_parse_madt_its(union acpi_subtable_headers *header,
3830	5399	const unsigned long end)
3831	5400	{
3832	5401	struct acpi_madt_generic_translator *its_entry;
..	..	@@ -3840,7 +5409,7 @@
3840	5409	res.end = its_entry->base_address + ACPI_GICV3_ITS_MEM_SIZE - 1;
3841	5410	res.flags = IORESOURCE_MEM;
3842	5411
3843		- dom_handle = irq_domain_alloc_fwnode((void *)its_entry->base_address);
	5412	+ dom_handle = irq_domain_alloc_fwnode(&res.start);
3844	5413	if (!dom_handle) {
3845	5414	pr_err("ITS@%pa: Unable to allocate GICv3 ITS domain token\n",
3846	5415	&res.start);
..	..	@@ -3883,7 +5452,10 @@
3883	5452	struct device_node *of_node;
3884	5453	struct its_node *its;
3885	5454	bool has_v4 = false;
	5455	+ bool has_v4_1 = false;
3886	5456	int err;
	5457	+
	5458	+ gic_rdists = rdists;
3887	5459
3888	5460	its_parent = parent_domain;
3889	5461	of_node = to_of_node(handle);
..	..	@@ -3897,17 +5469,29 @@
3897	5469	return -ENXIO;
3898	5470	}
3899	5471
3900		- gic_rdists = rdists;
3901		- err = its_alloc_lpi_tables();
	5472	+ err = allocate_lpi_tables();
3902	5473	if (err)
3903	5474	return err;
3904	5475
3905		- list_for_each_entry(its, &its_nodes, entry)
3906		- has_v4 \|= its->is_v4;
	5476	+ list_for_each_entry(its, &its_nodes, entry) {
	5477	+ has_v4 \|= is_v4(its);
	5478	+ has_v4_1 \|= is_v4_1(its);
	5479	+ }
	5480	+
	5481	+ /* Don't bother with inconsistent systems */
	5482	+ if (WARN_ON(!has_v4_1 && rdists->has_rvpeid))
	5483	+ rdists->has_rvpeid = false;
3907	5484
3908	5485	if (has_v4 & rdists->has_vlpis) {
	5486	+ const struct irq_domain_ops *sgi_ops;
	5487	+
	5488	+ if (has_v4_1)
	5489	+ sgi_ops = &its_sgi_domain_ops;
	5490	+ else
	5491	+ sgi_ops = NULL;
	5492	+
3909	5493	if (its_init_vpe_domain() \|\|
3910		- its_init_v4(parent_domain, &its_vpe_domain_ops)) {
	5494	+ its_init_v4(parent_domain, &its_vpe_domain_ops, sgi_ops)) {
3911	5495	rdists->has_vlpis = false;
3912	5496	pr_err("ITS: Disabling GICv4 support\n");
3913	5497	}